Source code for evo_gafs.core.evaluator

"""Fitness evaluation via cross-validation (the wrapper criterion)."""

from __future__ import annotations

import logging
import warnings

import numpy as np
from sklearn.base import BaseEstimator, clone
from sklearn.model_selection import cross_val_score

from evo_gafs.core.config import GAConfig

logger = logging.getLogger("evo_gafs")



[docs]
class FitnessEvaluator:
    """Evaluate an individual (binary feature mask) with cross-validation.

    The evaluator is instantiated once per fit and registered with DEAP as the
    ``evaluate`` operator. It caches results per individual to avoid re-running
    cross-validation for genomes that have already been seen.

    Penalisation strategy
    ----------------------
    * Fewer than ``min_features`` active features -> fitness of zero.

    Fitness by mode
    ---------------
    * ``'single'``::

          fitness = alpha * cv_score + (1 - alpha) * compression

      where ``compression = 1 - n_selected / n_total``.
    * ``'multiobjective'``: returns ``(cv_score, compression)``, both maximised
      (DEAP/NSGA-II handles the Pareto front).

    Parameters
    ----------
    estimator : sklearn estimator
        Model used as the wrapper criterion. It is cloned for each evaluation.
    X : numpy.ndarray of shape (n_samples, n_features)
        Feature matrix.
    y : numpy.ndarray of shape (n_samples,)
        Target vector.
    scoring : str
        scikit-learn scoring string.
    cv : cross-validation splitter
        Splitter used for the score.
    config : GAConfig
        Configuration (provides ``mode``, ``alpha``, ``min_features``,
        ``n_jobs``).
    """

    def __init__(
        self,
        estimator: BaseEstimator,
        X: np.ndarray,
        y: np.ndarray,
        scoring: str,
        cv: object,
        config: GAConfig,
    ) -> None:
        self.estimator = estimator
        self.X = X
        self.y = y
        self.scoring = scoring
        self.cv = cv
        self.config = config
        self.n_features = X.shape[1]
        self._eval_count = 0
        self._cache: dict[tuple[int, ...], tuple[float, ...]] = {}

    def __call__(self, individual: list[int]) -> tuple[float, ...]:
        """Evaluate one individual and return its fitness tuple.

        Returns
        -------
        tuple of float
            ``(fitness,)`` in single-objective mode, ``(cv_score, compression)``
            in multi-objective mode.
        """
        key = tuple(individual)
        if key in self._cache:
            return self._cache[key]

        self._eval_count += 1
        selected = [i for i, bit in enumerate(individual) if bit == 1]
        n_selected = len(selected)

        if n_selected < self.config.min_features:
            penalty: tuple[float, ...] = (
                (0.0, 0.0) if self.config.mode == "multiobjective" else (0.0,)
            )
            self._cache[key] = penalty
            return penalty

        cv_score = self._cross_val_score(selected)
        compression = 1.0 - (n_selected / self.n_features)

        if self.config.mode == "multiobjective":
            result: tuple[float, ...] = (cv_score, compression)
        else:
            alpha = self.config.alpha
            result = (alpha * cv_score + (1.0 - alpha) * compression,)

        self._cache[key] = result
        return result


[docs]
    def cv_score(self, selected: list[int]) -> float:
        """Public helper: raw (unweighted) CV score for a feature subset."""
        return self._cross_val_score(selected)


    def _cross_val_score(self, selected: list[int]) -> float:
        X_sub = self.X[:, selected]
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            try:
                scores = cross_val_score(
                    clone(self.estimator),
                    X_sub,
                    self.y,
                    scoring=self.scoring,
                    cv=self.cv,
                    n_jobs=self.config.n_jobs,
                    error_score=0.0,
                )
                return float(np.mean(scores))
            except Exception as exc:  # pragma: no cover - defensive
                logger.warning("Cross-validation failed for a candidate: %s", exc)
                return 0.0

    @property
    def eval_count(self) -> int:
        """Number of (non-cached) fitness evaluations performed."""
        return self._eval_count