"""
github.py

Functions for retrieving information from GitHub profiles and repositories.
"""

# import re
import json
import logging
import base64
from typing import List, Dict
from pathlib import Path
from datetime import datetime

import requests

# pylint: disable=broad-exception-caught


def get_github_repositories(username: str) -> list:
    """
    Retrieve public repositories from a GitHub profile URL.

    Args:
        username (str): GitHub username (e.g., username)

    Returns:
        dict: List containing dictionaries of repository information

    Example:
        [
            {
                "name": "repo-name",
                "description": "Repository description",
                "language": "Python",
                "stars": 10,
                "forks": 2,
                "updated_at": "2024-01-01T00:00:00Z",
                "html_url": "https://github.com/user/repo",
                "topics": ["python", "api"],
                "readme": "# Project Title\n\nProject description..."
            }
        ]
    """

    logger = logging.getLogger(f'{__name__}.get_github_repositories')

    try:

        logger.info("Fetching repositories for GitHub user: %s", username)

        # Get repositories
        repositories = _get_user_repositories(username)

        if repositories:
            repositories = _process_repository_data(repositories)

            # Save results to JSON file
            try:
                github_repos_dir = Path(__file__).parent.parent / "data" / "github_repos"
                github_repos_dir.mkdir(parents=True, exist_ok=True)

                # Create timestamped filename
                timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
                output_file = github_repos_dir / f"github_repos_{timestamp}.json"

                with open(output_file, 'w', encoding='utf-8') as f:
                    json.dump(repositories, f, indent=2, ensure_ascii=False)

            except Exception as save_error:
                logger.warning("Failed to save GitHub repositories to file: %s", str(save_error))

    except Exception as e:
        logger.error("Error retrieving GitHub repositories: %s", str(e))

        return None

    return repositories


def _get_user_repositories(username: str) -> Dict:
    """
    Get user's public repositories from GitHub API.

    Args:
        username (str): GitHub username

    Returns:
        dict: API response with repositories
    """

    logger = logging.getLogger(f'{__name__}._get_user_repositories')

    try:
        # Get repositories with pagination
        all_repos = []
        page = 1
        per_page = 100  # Maximum allowed by GitHub API

        while True:

            url = f"https://api.github.com/users/{username}/repos"

            params = {
                "type": "public",
                "sort": "updated",
                "direction": "desc",
                "per_page": per_page,
                "page": page
            }

            headers = {
                "Accept": "application/vnd.github.v3+json",
                "User-Agent": "Resumate-App/1.0"
            }

            response = requests.get(url, headers=headers, params=params, timeout=10)

            if response.status_code != 200:
                logger.error("GitHub API error: %s", response.status_code)
                return None

            repos = response.json()

            if not repos:  # No more repositories
                break

            all_repos.extend(repos)

            # If we got less than per_page, we've reached the end
            if len(repos) < per_page:
                break

            page += 1

            # Safety limit to prevent infinite loops
            if page > 10:  # Max 1000 repos
                break

        return all_repos

    except requests.RequestException as e:
        logger.error("Network error fetching repositories: %s", str(e))

        # If we have some repos, return them
        if len(all_repos) > 0:
            logger.info("Returning partial repository data due to error")
            return all_repos

        else:
            logger.error("No repositories found and network error occurred")
            return None


def _process_repository_data(repos: List[Dict]) -> List[Dict]:
    """
    Process and clean repository data for easier consumption.

    Args:
        repos (List[Dict]): Raw repository data from GitHub API

    Returns:
        List[Dict]: Processed repository data
    """

    logger = logging.getLogger(f'{__name__}._process_repository_data')

    processed = []

    for repo in repos:

        # Skip forks unless they have significant modifications
        if repo.get("fork", False) and repo.get("stargazers_count", 0) == 0:
            continue

        try:
            processed_repo = {
                "name": repo.get("name", ""),
                "description": repo.get("description", ""),
                "language": repo.get("language", ""),
                "stars": repo.get("stargazers_count", 0),
                "forks": repo.get("forks_count", 0),
                "updated_at": repo.get("updated_at", ""),
                "created_at": repo.get("created_at", ""),
                "html_url": repo.get("html_url", ""),
                "topics": repo.get("topics", []),
                "size": repo.get("size", 0)
            }

            # Get README content for the repository
            repo_url = repo.get("html_url", "")

            if repo_url:
                readme_content = get_repository_readme(repo_url)
                processed_repo["readme"] = readme_content

            else:
                processed_repo["readme"] = ""

            processed.append(processed_repo)

        except Exception as e:
            logger.error("Error processing repository data: %s", str(e))
            continue

    return processed


def get_repository_readme(repo_url: str) -> str:
    """
    Get the fulltext content of a repository's README file.
    
    Args:
        repo_url (str): GitHub repository URL (e.g., "https://github.com/owner/repo")
    
    Returns:
        str: README file content as text, or empty string if not found/error
    
    Example:
        >>> readme_content = get_repository_readme("https://github.com/owner/repo")
        >>> print(readme_content[:100])
        # My Project
        
        This is a sample project that does...
    """

    logger = logging.getLogger(f'{__name__}.get_repository_readme')

    try:
        # Extract owner and repo name from URL
        if not repo_url.startswith("https://github.com/"):
            logger.error("Invalid GitHub URL format: %s", repo_url)
            return ""

        # Remove trailing slash and split
        repo_url = repo_url.rstrip("/")
        parts = repo_url.replace("https://github.com/", "").split("/")

        if len(parts) != 2:
            logger.error("Invalid GitHub URL format, expected owner/repo: %s", repo_url)
            return ""

        owner, repo = parts

        logger.info("Fetching README for repository: %s/%s", owner, repo)

        # GitHub API endpoint for README
        api_url = f"https://api.github.com/repos/{owner}/{repo}/readme"

        headers = {
            "Accept": "application/vnd.github.v3+json",
            "User-Agent": "Resumate-App/1.0"
        }

        response = requests.get(api_url, headers=headers, timeout=10)

        if response.status_code == 404:
            logger.info("No README file found for repository: %s/%s", owner, repo)
            return ""

        if response.status_code != 200:
            logger.error("GitHub API error fetching README: %s", response.status_code)
            return ""

        readme_data = response.json()

        # README content is base64 encoded
        if "content" not in readme_data:
            logger.error("README API response missing content field")
            return ""

        # Decode base64 content
        encoded_content = readme_data["content"]

        # Remove any whitespace/newlines from base64 string
        encoded_content = encoded_content.replace("\n", "").replace(" ", "")

        try:
            decoded_content = base64.b64decode(encoded_content).decode('utf-8')
            logger.info(
                "Successfully retrieved README content (%d characters)",
                len(decoded_content)
            )

            return decoded_content

        except Exception as decode_error:
            logger.error("Error decoding README content: %s", str(decode_error))
            return ""

    except requests.RequestException as e:
        logger.error("Network error fetching README: %s", str(e))
        return ""

    except Exception as e:
        logger.error("Error retrieving README: %s", str(e))
        return ""