Source code for evo_gafs.benchmark.runner

"""Multi-dataset benchmarking utility for :class:`GAFeatureSelector`."""

from __future__ import annotations

import copy
import time
import warnings
from typing import Union

import numpy as np
import pandas as pd
from sklearn.base import BaseEstimator, clone
from sklearn.model_selection import KFold, StratifiedKFold, cross_val_score
from sklearn.preprocessing import LabelEncoder

from evo_gafs.core.config import GAConfig
from evo_gafs.core.selector import GAFeatureSelector

ArrayLike = Union[np.ndarray, pd.DataFrame]


[docs] class BenchmarkRunner: """Run and compare GA feature selection across several datasets. For each registered dataset the runner records: * the model's cross-validated score using **all** features (baseline), * the score using the features **selected** by the GA, * the compression ratio, and * the wall-clock time. Examples -------- >>> runner = BenchmarkRunner() # doctest: +SKIP >>> runner.add_dataset("Iris", X, y, task_type="classification") # doctest: +SKIP >>> runner.run(DecisionTreeClassifier()) # doctest: +SKIP >>> runner.report() # doctest: +SKIP """ def __init__(self) -> None: self._datasets: list[dict] = [] self._results: list[dict] = []
[docs] def add_dataset( self, name: str, X: ArrayLike, y: ArrayLike, task_type: str = "auto", description: str = "", ) -> BenchmarkRunner: """Register a dataset for the benchmark. Returns ``self`` for chaining.""" self._datasets.append( {"name": name, "X": X, "y": y, "task_type": task_type, "description": description} ) return self
[docs] def run( self, estimator: BaseEstimator, config: GAConfig | None = None, scoring: str | None = None, verbose: bool = True, estimator_regression: BaseEstimator | None = None, ) -> list[dict]: """Run the benchmark over all registered datasets. Parameters ---------- estimator : sklearn estimator Model for classification datasets (and for all datasets when ``estimator_regression`` is not given). config : GAConfig, optional Configuration applied to every run. scoring : str, optional Scoring string; auto-selected per task when ``None``. verbose : bool, default=True Print a per-dataset progress report. estimator_regression : sklearn estimator, optional Alternative model for regression datasets. Returns ------- list of dict One result entry per dataset. """ config = config or GAConfig(verbose=False) self._results = [] for dataset in self._datasets: self._results.append( self._run_one(dataset, estimator, config, scoring, verbose, estimator_regression) ) return self._results
def _run_one( self, dataset: dict, estimator: BaseEstimator, config: GAConfig, scoring: str | None, verbose: bool, estimator_regression: BaseEstimator | None, ) -> dict: name, X, y = dataset["name"], dataset["X"], dataset["y"] X_array = X.to_numpy() if isinstance(X, pd.DataFrame) else np.asarray(X) y_array = np.asarray(y) if y_array.dtype == object: y_array = LabelEncoder().fit_transform(y_array) task = dataset["task_type"] if task == "auto": unique = np.unique(y_array) task = ( "classification" if len(unique) <= 20 and y_array.dtype.kind in "iub" else "regression" ) est = clone( estimator_regression if task == "regression" and estimator_regression is not None else estimator ) scoring_resolved = scoring or ("accuracy" if task == "classification" else "r2") if verbose: print( f"\n{'=' * 60}\n Dataset: {name}\n Shape: {X_array.shape}\n" f" Task: {task}\n{'=' * 60}" ) cv = self._build_cv(task, config, y_array) cv_score_full = self._score(est, X_array, y_array, scoring_resolved, cv, config) if verbose: print(f" Baseline CV score (all features): {cv_score_full:.4f}") start = time.time() selector = GAFeatureSelector( estimator=clone(est), config=copy.deepcopy(config), scoring=scoring_resolved, task_type=task, feature_names=list(X.columns) if isinstance(X, pd.DataFrame) else None, ) selector.fit(X, y) elapsed = time.time() - start result = selector.result_ X_selected = X_array[:, result.selected_indices] cv_score_selected = self._score(est, X_selected, y_array, scoring_resolved, cv, config) if verbose: print(f" GA CV score (selected features): {cv_score_selected:.4f}") print( f" Features: {X_array.shape[1]} -> {result.n_selected} " f"({result.compression_ratio:.1%} compression)" ) print(f" Time: {elapsed:.2f}s | Evaluations: {result.n_evaluations}") return { "name": name, "n_samples": X_array.shape[0], "n_features_original": X_array.shape[1], "n_features_selected": result.n_selected, "compression_ratio": result.compression_ratio, "cv_score_full": cv_score_full, "cv_score_selected": cv_score_selected, "score_delta": cv_score_selected - cv_score_full, "time_seconds": elapsed, "n_evaluations": result.n_evaluations, "scoring": scoring_resolved, "task_type": task, "selected_features": result.selected_feature_names, "result": result, "selector": selector, } @staticmethod def _build_cv(task: str, config: GAConfig, y: np.ndarray) -> object: if task == "classification": return StratifiedKFold( n_splits=config.cv_folds, shuffle=True, random_state=config.random_seed ) return KFold(n_splits=config.cv_folds, shuffle=True, random_state=config.random_seed) @staticmethod def _score(estimator, X, y, scoring, cv, config: GAConfig) -> float: with warnings.catch_warnings(): warnings.simplefilter("ignore") scores = cross_val_score( clone(estimator), X, y, scoring=scoring, cv=cv, n_jobs=config.n_jobs ) return float(np.mean(scores))
[docs] def report(self) -> pd.DataFrame: """Return (and print) a summary :class:`pandas.DataFrame` of the runs.""" if not self._results: print("No results. Call run() first.") return pd.DataFrame() rows = [ { "Dataset": r["name"], "Samples": r["n_samples"], "Feats. original": r["n_features_original"], "Feats. selected": r["n_features_selected"], "Compression": f"{r['compression_ratio']:.1%}", "CV (all)": f"{r['cv_score_full']:.4f}", "CV (selected)": f"{r['cv_score_selected']:.4f}", "Delta": f"{r['score_delta']:+.4f}", "Time (s)": f"{r['time_seconds']:.1f}", "Evals": r["n_evaluations"], "Task": r["task_type"], "Scoring": r["scoring"], } for r in self._results ] df = pd.DataFrame(rows) print("\n" + "=" * 80) print(" BENCHMARK REPORT - GA Feature Selection") print("=" * 80) print(df.to_string(index=False)) print("=" * 80) return df
@property def results(self) -> list[dict]: """The list of result entries from the last :meth:`run`.""" return self._results