Source code for evo_gafs.core.evaluator
"""Fitness evaluation via cross-validation (the wrapper criterion)."""
from __future__ import annotations
import logging
import warnings
import numpy as np
from sklearn.base import BaseEstimator, clone
from sklearn.model_selection import cross_val_score
from evo_gafs.core.config import GAConfig
logger = logging.getLogger("evo_gafs")
[docs]
class FitnessEvaluator:
"""Evaluate an individual (binary feature mask) with cross-validation.
The evaluator is instantiated once per fit and registered with DEAP as the
``evaluate`` operator. It caches results per individual to avoid re-running
cross-validation for genomes that have already been seen.
Penalisation strategy
----------------------
* Fewer than ``min_features`` active features -> fitness of zero.
Fitness by mode
---------------
* ``'single'``::
fitness = alpha * cv_score + (1 - alpha) * compression
where ``compression = 1 - n_selected / n_total``.
* ``'multiobjective'``: returns ``(cv_score, compression)``, both maximised
(DEAP/NSGA-II handles the Pareto front).
Parameters
----------
estimator : sklearn estimator
Model used as the wrapper criterion. It is cloned for each evaluation.
X : numpy.ndarray of shape (n_samples, n_features)
Feature matrix.
y : numpy.ndarray of shape (n_samples,)
Target vector.
scoring : str
scikit-learn scoring string.
cv : cross-validation splitter
Splitter used for the score.
config : GAConfig
Configuration (provides ``mode``, ``alpha``, ``min_features``,
``n_jobs``).
"""
def __init__(
self,
estimator: BaseEstimator,
X: np.ndarray,
y: np.ndarray,
scoring: str,
cv: object,
config: GAConfig,
) -> None:
self.estimator = estimator
self.X = X
self.y = y
self.scoring = scoring
self.cv = cv
self.config = config
self.n_features = X.shape[1]
self._eval_count = 0
self._cache: dict[tuple[int, ...], tuple[float, ...]] = {}
def __call__(self, individual: list[int]) -> tuple[float, ...]:
"""Evaluate one individual and return its fitness tuple.
Returns
-------
tuple of float
``(fitness,)`` in single-objective mode, ``(cv_score, compression)``
in multi-objective mode.
"""
key = tuple(individual)
if key in self._cache:
return self._cache[key]
self._eval_count += 1
selected = [i for i, bit in enumerate(individual) if bit == 1]
n_selected = len(selected)
if n_selected < self.config.min_features:
penalty: tuple[float, ...] = (
(0.0, 0.0) if self.config.mode == "multiobjective" else (0.0,)
)
self._cache[key] = penalty
return penalty
cv_score = self._cross_val_score(selected)
compression = 1.0 - (n_selected / self.n_features)
if self.config.mode == "multiobjective":
result: tuple[float, ...] = (cv_score, compression)
else:
alpha = self.config.alpha
result = (alpha * cv_score + (1.0 - alpha) * compression,)
self._cache[key] = result
return result
[docs]
def cv_score(self, selected: list[int]) -> float:
"""Public helper: raw (unweighted) CV score for a feature subset."""
return self._cross_val_score(selected)
def _cross_val_score(self, selected: list[int]) -> float:
X_sub = self.X[:, selected]
with warnings.catch_warnings():
warnings.simplefilter("ignore")
try:
scores = cross_val_score(
clone(self.estimator),
X_sub,
self.y,
scoring=self.scoring,
cv=self.cv,
n_jobs=self.config.n_jobs,
error_score=0.0,
)
return float(np.mean(scores))
except Exception as exc: # pragma: no cover - defensive
logger.warning("Cross-validation failed for a candidate: %s", exc)
return 0.0
@property
def eval_count(self) -> int:
"""Number of (non-cached) fitness evaluations performed."""
return self._eval_count