resumate / tests /test_github.py
gperdrizet's picture
Added README text GitHub repo data retreival
84cdb3a verified
"""
Unit tests for the github module.
"""
import unittest
from unittest.mock import patch, MagicMock
import requests
import base64
from functions import github
# pylint: disable=protected-access
class TestGetGitHubRepositories(unittest.TestCase):
"""Test cases for the get_github_repositories function."""
@patch('functions.github._get_user_repositories')
@patch('functions.github._process_repository_data')
def test_successful_repository_retrieval(self, mock_process, mock_get_repos):
"""Test successful repository retrieval."""
# Mock raw repository data
mock_raw_repos = [
{
"name": "test-repo",
"description": "Test repository",
"language": "Python",
"stargazers_count": 10,
"forks_count": 5,
"updated_at": "2024-01-01T00:00:00Z",
"html_url": "https://github.com/user/test-repo",
"topics": ["python", "test"],
"fork": False
}
]
# Mock processed repository data
mock_processed_repos = [
{
"name": "test-repo",
"description": "Test repository",
"language": "Python",
"stars": 10,
"forks": 5,
"updated_at": "2024-01-01T00:00:00Z",
"created_at": "2024-01-01T00:00:00Z",
"html_url": "https://github.com/user/test-repo",
"topics": ["python", "test"],
"size": 100,
"readme": "# Test Repository\n\nThis is a test README."
}
]
mock_get_repos.return_value = mock_raw_repos
mock_process.return_value = mock_processed_repos
with patch('pathlib.Path.mkdir'), patch('builtins.open'), patch('json.dump'):
result = github.get_github_repositories("testuser")
self.assertEqual(result, mock_processed_repos)
mock_get_repos.assert_called_once_with("testuser")
mock_process.assert_called_once_with(mock_raw_repos)
@patch('functions.github._get_user_repositories')
def test_no_repositories_found(self, mock_get_repos):
"""Test when no repositories are found."""
mock_get_repos.return_value = None
result = github.get_github_repositories("emptyuser")
self.assertIsNone(result)
mock_get_repos.assert_called_once_with("emptyuser")
@patch('functions.github._get_user_repositories')
def test_exception_during_processing(self, mock_get_repos):
"""Test exception handling during repository processing."""
mock_get_repos.side_effect = Exception("API error")
result = github.get_github_repositories("erroruser")
self.assertIsNone(result)
mock_get_repos.assert_called_once_with("erroruser")
@patch('functions.github._get_user_repositories')
@patch('functions.github._process_repository_data')
def test_file_saving_error(self, mock_process, mock_get_repos):
"""Test that file saving errors don't break the function."""
mock_get_repos.return_value = [{"name": "test"}]
mock_process.return_value = [{"name": "test", "stars": 0}]
# Mock file operations to raise an exception
with patch('pathlib.Path.mkdir'), \
patch('builtins.open', side_effect=Exception("File error")), \
patch('logging.getLogger') as mock_get_logger:
mock_logger = mock_get_logger.return_value
result = github.get_github_repositories("testuser")
# Should still return the repositories despite file error
self.assertEqual(result, [{"name": "test", "stars": 0}])
# Should log a warning about the file save error
mock_logger.warning.assert_called()
class TestGetUserRepositories(unittest.TestCase):
"""Test cases for the _get_user_repositories function."""
@patch('requests.get')
def test_successful_single_page(self, mock_get):
"""Test successful repository retrieval with single page."""
mock_response = MagicMock()
mock_response.status_code = 200
mock_response.json.return_value = [
{
"name": "repo1",
"description": "First repo",
"language": "Python"
},
{
"name": "repo2",
"description": "Second repo",
"language": "JavaScript"
}
]
mock_get.return_value = mock_response
result = github._get_user_repositories("testuser")
self.assertEqual(len(result), 2)
self.assertEqual(result[0]["name"], "repo1")
self.assertEqual(result[1]["name"], "repo2")
# Verify API call parameters
mock_get.assert_called_once()
call_args = mock_get.call_args
self.assertIn("https://api.github.com/users/testuser/repos", call_args[0][0])
self.assertEqual(call_args[1]["params"]["type"], "public")
self.assertEqual(call_args[1]["params"]["sort"], "updated")
self.assertEqual(call_args[1]["headers"]["User-Agent"], "Resumate-App/1.0")
@patch('requests.get')
def test_successful_multiple_pages(self, mock_get):
"""Test successful repository retrieval with multiple pages."""
# First page response
first_response = MagicMock()
first_response.status_code = 200
first_response.json.return_value = [{"name": f"repo{i}"} for i in range(100)]
# Second page response (less than per_page, so pagination stops)
second_response = MagicMock()
second_response.status_code = 200
second_response.json.return_value = [{"name": f"repo{i}"} for i in range(100, 150)]
mock_get.side_effect = [first_response, second_response]
result = github._get_user_repositories("testuser")
self.assertEqual(len(result), 150)
self.assertEqual(mock_get.call_count, 2)
@patch('requests.get')
def test_api_error_404(self, mock_get):
"""Test handling of 404 user not found error."""
mock_response = MagicMock()
mock_response.status_code = 404
mock_get.return_value = mock_response
result = github._get_user_repositories("nonexistentuser")
self.assertIsNone(result)
@patch('requests.get')
def test_api_error_403(self, mock_get):
"""Test handling of 403 rate limit error."""
mock_response = MagicMock()
mock_response.status_code = 403
mock_get.return_value = mock_response
result = github._get_user_repositories("testuser")
self.assertIsNone(result)
@patch('requests.get')
def test_network_error_no_repos(self, mock_get):
"""Test handling of network errors with no existing repos."""
mock_get.side_effect = requests.RequestException("Connection error")
result = github._get_user_repositories("testuser")
self.assertIsNone(result)
@patch('requests.get')
def test_network_error_with_partial_repos(self, mock_get):
"""Test handling of network errors after getting some repos."""
# First call succeeds
first_response = MagicMock()
first_response.status_code = 200
first_response.json.return_value = [{"name": "repo1"}]
# Second call fails
mock_get.side_effect = [first_response, requests.RequestException("Connection error")]
with patch('logging.getLogger'):
result = github._get_user_repositories("testuser")
# Should return the partial data from the first successful call
self.assertEqual(result, [{"name": "repo1"}])
@patch('requests.get')
def test_safety_limit_prevents_infinite_loop(self, mock_get):
"""Test that safety limit prevents infinite pagination."""
# Mock response that always returns full pages
mock_response = MagicMock()
mock_response.status_code = 200
mock_response.json.return_value = [{"name": f"repo{i}"} for i in range(100)]
mock_get.return_value = mock_response
result = github._get_user_repositories("testuser")
# Should stop at page 10 (safety limit)
self.assertEqual(mock_get.call_count, 10)
self.assertEqual(len(result), 1000) # 10 pages * 100 repos each
class TestProcessRepositoryData(unittest.TestCase):
"""Test cases for the _process_repository_data function."""
@patch('functions.github.get_repository_readme')
def test_basic_processing(self, mock_get_readme):
"""Test basic repository data processing."""
mock_get_readme.return_value = "# Test Repository\n\nThis is a test README."
raw_repos = [
{
"name": "test-repo",
"description": "Test repository",
"language": "Python",
"stargazers_count": 10,
"forks_count": 5,
"updated_at": "2024-01-01T00:00:00Z",
"created_at": "2024-01-01T00:00:00Z",
"html_url": "https://github.com/user/test-repo",
"topics": ["python", "test"],
"size": 100,
"fork": False
}
]
result = github._process_repository_data(raw_repos)
self.assertEqual(len(result), 1)
processed_repo = result[0]
self.assertEqual(processed_repo["name"], "test-repo")
self.assertEqual(processed_repo["description"], "Test repository")
self.assertEqual(processed_repo["language"], "Python")
self.assertEqual(processed_repo["stars"], 10)
self.assertEqual(processed_repo["forks"], 5)
self.assertEqual(processed_repo["updated_at"], "2024-01-01T00:00:00Z")
self.assertEqual(processed_repo["created_at"], "2024-01-01T00:00:00Z")
self.assertEqual(processed_repo["html_url"], "https://github.com/user/test-repo")
self.assertEqual(processed_repo["topics"], ["python", "test"])
self.assertEqual(processed_repo["size"], 100)
self.assertEqual(processed_repo["readme"], "# Test Repository\n\nThis is a test README.")
# Verify README was fetched
mock_get_readme.assert_called_once_with("https://github.com/user/test-repo")
@patch('functions.github.get_repository_readme')
def test_fork_filtering(self, mock_get_readme):
"""Test filtering of unmodified forks."""
mock_get_readme.return_value = "# Repository README"
raw_repos = [
{
"name": "original-repo",
"fork": False,
"stargazers_count": 5,
"html_url": "https://github.com/user/original-repo"
},
{
"name": "unmodified-fork",
"fork": True,
"stargazers_count": 0,
"html_url": "https://github.com/user/unmodified-fork"
},
{
"name": "modified-fork",
"fork": True,
"stargazers_count": 3,
"html_url": "https://github.com/user/modified-fork"
}
]
result = github._process_repository_data(raw_repos)
# Should include original repo and modified fork, exclude unmodified fork
self.assertEqual(len(result), 2)
repo_names = [repo["name"] for repo in result]
self.assertIn("original-repo", repo_names)
self.assertIn("modified-fork", repo_names)
self.assertNotIn("unmodified-fork", repo_names)
# Verify README was fetched for included repos only
self.assertEqual(mock_get_readme.call_count, 2)
@patch('functions.github.get_repository_readme')
def test_missing_fields(self, mock_get_readme):
"""Test handling of missing fields in repository data."""
mock_get_readme.return_value = ""
raw_repos = [
{
"name": "minimal-repo"
# Missing most optional fields
}
]
result = github._process_repository_data(raw_repos)
self.assertEqual(len(result), 1)
processed_repo = result[0]
self.assertEqual(processed_repo["name"], "minimal-repo")
self.assertEqual(processed_repo["description"], "")
self.assertEqual(processed_repo["language"], "")
self.assertEqual(processed_repo["stars"], 0)
self.assertEqual(processed_repo["forks"], 0)
self.assertEqual(processed_repo["updated_at"], "")
self.assertEqual(processed_repo["created_at"], "")
self.assertEqual(processed_repo["html_url"], "")
self.assertEqual(processed_repo["topics"], [])
self.assertEqual(processed_repo["size"], 0)
self.assertEqual(processed_repo["readme"], "")
# Verify README function was NOT called since there's no URL
mock_get_readme.assert_not_called()
@patch('functions.github.get_repository_readme')
def test_processing_error_handling(self, mock_get_readme):
"""Test handling of processing errors for individual repos."""
mock_get_readme.return_value = "README content"
# Create a repo dict that will cause an error during processing
raw_repos = [
{
"name": "good-repo",
"stargazers_count": 5,
"html_url": "https://github.com/user/good-repo"
},
# This will cause an AttributeError when trying to call .get() on None
None,
{
"name": "another-good-repo",
"stargazers_count": 3,
"html_url": "https://github.com/user/another-good-repo"
}
]
with patch('logging.getLogger') as mock_get_logger:
_ = mock_get_logger.return_value
# The function currently has a bug where it doesn't handle None repos
# This will raise an AttributeError
with self.assertRaises(AttributeError):
github._process_repository_data(raw_repos)
@patch('functions.github.get_repository_readme')
def test_empty_repository_list(self, mock_get_readme):
"""Test processing of empty repository list."""
result = github._process_repository_data([])
self.assertEqual(result, [])
# Verify no README calls were made
mock_get_readme.assert_not_called()
@patch('functions.github.get_repository_readme')
def test_readme_retrieval_error_handling(self, mock_get_readme):
"""Test handling when README retrieval fails."""
# Simulate README function returning empty string (error case)
mock_get_readme.return_value = ""
raw_repos = [
{
"name": "test-repo",
"html_url": "https://github.com/user/test-repo",
"stargazers_count": 5
}
]
result = github._process_repository_data(raw_repos)
self.assertEqual(len(result), 1)
self.assertEqual(result[0]["readme"], "")
mock_get_readme.assert_called_once_with("https://github.com/user/test-repo")
def test_all_forks_filtered(self):
"""Test when all repositories are unmodified forks."""
raw_repos = [
{
"name": "fork1",
"fork": True,
"stargazers_count": 0
},
{
"name": "fork2",
"fork": True,
"stargazers_count": 0
}
]
result = github._process_repository_data(raw_repos)
self.assertEqual(result, [])
class TestGetRepositoryReadme(unittest.TestCase):
"""Test cases for the get_repository_readme function."""
@patch('requests.get')
def test_successful_readme_retrieval(self, mock_get):
"""Test successful README file retrieval."""
readme_content = "# Test Repository\n\nThis is a test README file."
encoded_content = base64.b64encode(readme_content.encode('utf-8')).decode('ascii')
mock_response = MagicMock()
mock_response.status_code = 200
mock_response.json.return_value = {
"content": encoded_content,
"encoding": "base64"
}
mock_get.return_value = mock_response
result = github.get_repository_readme("https://github.com/owner/repo")
self.assertEqual(result, readme_content)
mock_get.assert_called_once()
call_args = mock_get.call_args
self.assertIn("https://api.github.com/repos/owner/repo/readme", call_args[0][0])
self.assertEqual(call_args[1]["headers"]["User-Agent"], "Resumate-App/1.0")
@patch('requests.get')
def test_readme_not_found(self, mock_get):
"""Test handling when README file doesn't exist."""
mock_response = MagicMock()
mock_response.status_code = 404
mock_get.return_value = mock_response
result = github.get_repository_readme("https://github.com/owner/repo")
self.assertEqual(result, "")
@patch('requests.get')
def test_api_error(self, mock_get):
"""Test handling of API errors."""
mock_response = MagicMock()
mock_response.status_code = 500
mock_get.return_value = mock_response
result = github.get_repository_readme("https://github.com/owner/repo")
self.assertEqual(result, "")
@patch('requests.get')
def test_network_error(self, mock_get):
"""Test handling of network errors."""
mock_get.side_effect = requests.RequestException("Connection error")
result = github.get_repository_readme("https://github.com/owner/repo")
self.assertEqual(result, "")
def test_invalid_url_format(self):
"""Test handling of invalid URL formats."""
invalid_urls = [
"https://gitlab.com/owner/repo",
"https://github.com/owner",
"https://github.com/owner/repo/extra/path",
"not-a-url",
"",
"https://github.com/"
]
for url in invalid_urls:
with self.subTest(url=url):
result = github.get_repository_readme(url)
self.assertEqual(result, "")
@patch('requests.get')
def test_missing_content_field(self, mock_get):
"""Test handling when API response is missing content field."""
mock_response = MagicMock()
mock_response.status_code = 200
mock_response.json.return_value = {
"encoding": "base64"
# Missing "content" field
}
mock_get.return_value = mock_response
result = github.get_repository_readme("https://github.com/owner/repo")
self.assertEqual(result, "")
@patch('requests.get')
def test_invalid_base64_content(self, mock_get):
"""Test handling of invalid base64 content."""
mock_response = MagicMock()
mock_response.status_code = 200
mock_response.json.return_value = {
"content": "invalid-base64-content!@#$",
"encoding": "base64"
}
mock_get.return_value = mock_response
result = github.get_repository_readme("https://github.com/owner/repo")
self.assertEqual(result, "")
@patch('requests.get')
def test_unicode_readme_content(self, mock_get):
"""Test handling of README with Unicode characters."""
readme_content = "# Test 🚀\n\nEmoji and unicode: 中文 русский"
encoded_content = base64.b64encode(readme_content.encode('utf-8')).decode('ascii')
mock_response = MagicMock()
mock_response.status_code = 200
mock_response.json.return_value = {
"content": encoded_content,
"encoding": "base64"
}
mock_get.return_value = mock_response
result = github.get_repository_readme("https://github.com/owner/repo")
self.assertEqual(result, readme_content)
@patch('requests.get')
def test_large_readme_content(self, mock_get):
"""Test handling of large README files."""
# Create a large README content
readme_content = "# Large README\n\n" + "This is a line of content.\n" * 1000
encoded_content = base64.b64encode(readme_content.encode('utf-8')).decode('ascii')
mock_response = MagicMock()
mock_response.status_code = 200
mock_response.json.return_value = {
"content": encoded_content,
"encoding": "base64"
}
mock_get.return_value = mock_response
result = github.get_repository_readme("https://github.com/owner/repo")
self.assertEqual(result, readme_content)
self.assertGreater(len(result), 10000) # Verify it's actually large
@patch('requests.get')
def test_url_with_trailing_slash(self, mock_get):
"""Test handling of URLs with trailing slash."""
readme_content = "# Test README"
encoded_content = base64.b64encode(readme_content.encode('utf-8')).decode('ascii')
mock_response = MagicMock()
mock_response.status_code = 200
mock_response.json.return_value = {
"content": encoded_content,
"encoding": "base64"
}
mock_get.return_value = mock_response
result = github.get_repository_readme("https://github.com/owner/repo/")
self.assertEqual(result, readme_content)
# Verify the API call used the correct URL without trailing slash
call_args = mock_get.call_args
self.assertIn("https://api.github.com/repos/owner/repo/readme", call_args[0][0])
if __name__ == '__main__':
unittest.main()