"""GitHub repository importer for vcspull."""

from __future__ import annotations

import logging
import typing as t
import urllib.parse

from .base import (
    HTTPClient,
    ImportMode,
    ImportOptions,
    RemoteRepo,
    filter_repo,
    get_token_from_env,
)

log = logging.getLogger(__name__)

GITHUB_API_URL = "https://api.github.com"
DEFAULT_PER_PAGE = 100
# GitHub search API limits results to 1000; exceeding this causes HTTP 422.
SEARCH_MAX_RESULTS = 1000


class GitHubImporter:
    """Importer for GitHub repositories.

    Supports three modes:
    - USER: Fetch repositories for a user
    - ORG: Fetch repositories for an organization
    - SEARCH: Search for repositories by query

    Examples
    --------
    >>> importer = GitHubImporter()
    >>> importer.service_name
    'GitHub'
    """

    service_name: str = "GitHub"

    def __init__(
        self,
        token: str | None = None,
        base_url: str | None = None,
    ) -> None:
        """Initialize the GitHub importer.

        Parameters
        ----------
        token : str | None
            GitHub API token. If not provided, will try GITHUB_TOKEN env var.
        base_url : str | None
            Base URL for GitHub Enterprise. Defaults to api.github.com.

        Notes
        -----
        Authentication is optional for public repositories. For private
        repositories or higher rate limits, set ``GITHUB_TOKEN`` or ``GH_TOKEN``.

        Classic PAT: no scopes needed for public repos; ``repo`` scope for
        private. Fine-grained PAT: "Metadata: Read-only" for public; add
        "Contents: Read-only" for private repos.

        Create a token at https://github.com/settings/tokens.

        Examples
        --------
        >>> importer = GitHubImporter(token="fake")
        >>> importer.service_name
        'GitHub'
        """
        self._token = token or get_token_from_env("GITHUB_TOKEN", "GH_TOKEN")
        self._base_url = (base_url or GITHUB_API_URL).rstrip("/")

        # GitHub Enterprise needs /api/v3; public api.github.com does not
        api_url = self._base_url
        if base_url and "/api/" not in self._base_url:
            api_url = f"{self._base_url}/api/v3"

        self._client = HTTPClient(
            api_url,
            token=self._token,
            auth_header="Authorization",
            auth_prefix="Bearer",
            user_agent="vcspull",
        )

    @property
    def is_authenticated(self) -> bool:
        """Check if the importer has authentication configured.

        Returns
        -------
        bool
            True if a token is configured

        Examples
        --------
        >>> GitHubImporter(token="fake").is_authenticated
        True
        """
        return self._token is not None

    def fetch_repos(self, options: ImportOptions) -> t.Iterator[RemoteRepo]:
        """Fetch repositories from GitHub.

        Parameters
        ----------
        options : ImportOptions
            Import options

        Yields
        ------
        RemoteRepo
            Repository information

        Raises
        ------
        AuthenticationError
            When authentication fails
        RateLimitError
            When rate limit is exceeded
        NotFoundError
            When user/org is not found
        """
        if options.mode == ImportMode.USER:
            yield from self._fetch_user(options)
        elif options.mode == ImportMode.ORG:
            yield from self._fetch_org(options)
        elif options.mode == ImportMode.SEARCH:
            yield from self._fetch_search(options)

    def _fetch_user(self, options: ImportOptions) -> t.Iterator[RemoteRepo]:
        """Fetch repositories for a user.

        Parameters
        ----------
        options : ImportOptions
            Import options

        Yields
        ------
        RemoteRepo
            Repository information
        """
        target = urllib.parse.quote(options.target, safe="")
        endpoint = f"/users/{target}/repos"
        yield from self._paginate_repos(endpoint, options)

    def _fetch_org(self, options: ImportOptions) -> t.Iterator[RemoteRepo]:
        """Fetch repositories for an organization.

        Parameters
        ----------
        options : ImportOptions
            Import options

        Yields
        ------
        RemoteRepo
            Repository information
        """
        target = urllib.parse.quote(options.target, safe="")
        endpoint = f"/orgs/{target}/repos"
        yield from self._paginate_repos(endpoint, options)

    def _fetch_search(self, options: ImportOptions) -> t.Iterator[RemoteRepo]:
        """Search for repositories.

        Parameters
        ----------
        options : ImportOptions
            Import options

        Yields
        ------
        RemoteRepo
            Repository information
        """
        query_parts = [options.target]

        if options.language:
            query_parts.append(f"language:{options.language}")

        if options.min_stars > 0:
            query_parts.append(f"stars:>={options.min_stars}")

        query = " ".join(query_parts)
        endpoint = "/search/repositories"
        page = 1
        count = 0
        total_available: int | None = None

        while count < options.limit:
            # Always use DEFAULT_PER_PAGE to maintain consistent pagination offset.
            # Changing per_page between pages causes offset misalignment and duplicates.
            params: dict[str, str | int] = {
                "q": query,
                "per_page": DEFAULT_PER_PAGE,
                "page": page,
                "sort": "stars",
                "order": "desc",
            }

            data, headers = self._client.get(
                endpoint,
                params=params,
                service_name=self.service_name,
            )

            self._log_rate_limit(headers)

            total_count = data.get("total_count", 0)
            if page == 1:
                total_available = total_count
                if total_count > 1000:
                    log.warning(
                        "GitHub search returned %d total results but API limits "
                        "to 1000; consider narrowing your query",
                        total_count,
                    )

            items = data.get("items", [])
            if not items:
                break

            for item in items:
                if count >= options.limit:
                    break

                repo = self._parse_repo(item)
                if filter_repo(repo, options):
                    yield repo
                    count += 1

            # Check if there are more pages
            if len(items) < DEFAULT_PER_PAGE:
                break

            # GitHub search API caps at 1000 results
            if page * DEFAULT_PER_PAGE >= SEARCH_MAX_RESULTS:
                break

            page += 1

        # Warn if results were truncated by --limit
        if (
            count >= options.limit
            and total_available is not None
            and total_available > count
        ):
            log.warning(
                "Showing %d of %d repositories (use --limit 0 to fetch all)",
                count,
                total_available,
            )

    def _paginate_repos(
        self,
        endpoint: str,
        options: ImportOptions,
    ) -> t.Iterator[RemoteRepo]:
        """Paginate through repository listing endpoints.

        Parameters
        ----------
        endpoint : str
            API endpoint
        options : ImportOptions
            Import options

        Yields
        ------
        RemoteRepo
            Repository information
        """
        page = 1
        count = 0
        more_available = False

        while count < options.limit:
            # Always use DEFAULT_PER_PAGE to maintain consistent pagination offset.
            # Changing per_page between pages causes offset misalignment and duplicates.
            params: dict[str, str | int] = {
                "per_page": DEFAULT_PER_PAGE,
                "page": page,
                "sort": "updated",
                "direction": "desc",
            }

            data, headers = self._client.get(
                endpoint,
                params=params,
                service_name=self.service_name,
            )

            self._log_rate_limit(headers)

            if not data:
                break

            for idx, item in enumerate(data):
                if count >= options.limit:
                    # Remaining items on this page or a full page = more exist
                    more_available = (
                        idx < len(data) - 1 or len(data) == DEFAULT_PER_PAGE
                    )
                    break

                repo = self._parse_repo(item)
                if filter_repo(repo, options):
                    yield repo
                    count += 1

            # Boundary: limit reached on the last item of a full page
            if count >= options.limit and len(data) == DEFAULT_PER_PAGE:
                more_available = True
                break

            # Check if there are more pages
            if len(data) < DEFAULT_PER_PAGE:
                break

            page += 1

        # Warn if results were truncated by --limit
        # GitHub user/org endpoints don't return total count
        if count >= options.limit and more_available:
            log.warning(
                "Showing %d repositories; more may be available "
                "(use --limit 0 to fetch all)",
                count,
            )

    def _parse_repo(self, data: dict[str, t.Any]) -> RemoteRepo:
        """Parse GitHub API response into RemoteRepo.

        Parameters
        ----------
        data : dict
            GitHub API repository data

        Returns
        -------
        RemoteRepo
            Parsed repository information
        """
        return RemoteRepo(
            name=data.get("name", ""),
            clone_url=data.get("clone_url", ""),
            ssh_url=data.get("ssh_url", ""),
            html_url=data.get("html_url", ""),
            description=data.get("description"),
            language=data.get("language"),
            topics=tuple(data.get("topics") or []),
            stars=data.get("stargazers_count", 0),
            is_fork=data.get("fork", False),
            is_archived=data.get("archived", False),
            default_branch=data.get("default_branch", "main"),
            owner=(data.get("owner") or {}).get("login", ""),
        )

    def _log_rate_limit(self, headers: dict[str, str]) -> None:
        """Log rate limit information from response headers.

        Parameters
        ----------
        headers : dict[str, str]
            Response headers
        """
        remaining = headers.get("x-ratelimit-remaining")
        limit = headers.get("x-ratelimit-limit")

        if remaining is not None and limit is not None:
            try:
                remaining_int = int(remaining)
            except (ValueError, TypeError):
                return
            if remaining_int < 10:
                log.warning(
                    "GitHub API rate limit low: %s/%s remaining",
                    remaining,
                    limit,
                )
            else:
                log.debug(
                    "GitHub API rate limit: %s/%s remaining",
                    remaining,
                    limit,
                )
