survivalist.meta

 1from .ensemble_selection import (
 2    EnsembleSelection,
 3    EnsembleSelectionRegressor,
 4    MeanEstimator,
 5)
 6from .stacking import Stacking
 7
 8__all__ = [
 9    "EnsembleSelection",
10    "EnsembleSelectionRegressor",
11    "MeanEstimator",
12    "Stacking",
13]
class EnsembleSelection(survivalist.meta.ensemble_selection.BaseEnsembleSelection):
353class EnsembleSelection(BaseEnsembleSelection):
354    """Ensemble selection for survival analysis that accounts for a score and correlations between predictions.
355
356    The ensemble is pruned during training only according to the specified score (accuracy) and
357    additionally for prediction according to the correlation between predictions (diversity).
358
359    The hillclimbing is based on cross-validation to avoid having to create a separate validation set.
360
361    See [1]_, [2]_, [3]_ for further description.
362
363    Parameters
364    ----------
365    base_estimators : list
366        List of (name, estimator) tuples (implementing fit/predict) that are
367        part of the ensemble.
368
369    scorer : callable
370        Function with signature ``func(estimator, X_test, y_test, **test_predict_params)`` that evaluates the error
371        of the prediction on the test data. The function should return a scalar value.
372        *Larger* values of the score are assumed to be better.
373
374    n_estimators : float or int, optional, default: 0.2
375        If a float, the percentage of estimators in the ensemble to retain, if an int the
376        absolute number of estimators to retain.
377
378    min_score : float, optional, default: 0.66
379        Threshold for pruning estimators based on scoring metric. After `fit`, only estimators
380        with a score above `min_score` are retained.
381
382    min_correlation : float, optional, default: 0.6
383        Threshold for Pearson's correlation coefficient that determines when predictions of
384        two estimators are significantly correlated.
385
386    cv : int, a cv generator instance, or None, optional
387        The input specifying which cv generator to use. It can be an
388        integer, in which case it is the number of folds in a KFold,
389        None, in which case 3 fold is used, or another object, that
390        will then be used as a cv generator. The generator has to ensure
391        that each sample is only used once for testing.
392
393    n_jobs : int, optional, default: 1
394        Number of jobs to run in parallel.
395
396    verbose : integer
397        Controls the verbosity: the higher, the more messages.
398
399    Attributes
400    ----------
401    scores_ : ndarray, shape = (n_base_estimators,)
402        Array of scores (relative to best performing estimator)
403
404    fitted_models_ : ndarray
405        Selected models during training based on `scorer`.
406
407    n_features_in_ : int
408        Number of features seen during ``fit``.
409
410    feature_names_in_ : ndarray of shape (`n_features_in_`,)
411        Names of features seen during ``fit``. Defined only when `X`
412        has feature names that are all strings.
413
414    References
415    ----------
416
417    .. [1] Pölsterl, S., Gupta, P., Wang, L., Conjeti, S., Katouzian, A., and Navab, N.,
418         "Heterogeneous ensembles for predicting survival of metastatic, castrate-resistant prostate cancer patients".
419         F1000Research, vol. 5, no. 2676, 2016
420
421    .. [2] Caruana, R., Munson, A., Niculescu-Mizil, A.
422        "Getting the most out of ensemble selection". 6th IEEE International Conference on Data Mining, 828-833, 2006
423
424    .. [3] Rooney, N., Patterson, D., Anand, S., Tsymbal, A.
425        "Dynamic integration of regression models. International Workshop on Multiple Classifier Systems".
426        Lecture Notes in Computer Science, vol. 3181, 164-173, 2004
427    """
428
429    _parameter_constraints = {
430        **BaseEnsembleSelection._parameter_constraints,
431    }
432    _parameter_constraints.pop("meta_estimator")
433
434    def __init__(
435        self,
436        base_estimators,
437        *,
438        scorer=None,
439        n_estimators=0.2,
440        min_score=0.2,
441        correlation="pearson",
442        min_correlation=0.6,
443        cv=None,
444        n_jobs=1,
445        verbose=0,
446    ):
447        super().__init__(
448            meta_estimator=MeanRankEstimator(),
449            base_estimators=base_estimators,
450            scorer=scorer,
451            n_estimators=n_estimators,
452            min_score=min_score,
453            correlation=correlation,
454            min_correlation=min_correlation,
455            cv=cv,
456            n_jobs=n_jobs,
457            verbose=verbose,
458        )
459
460    def _fit(self, X, y, cv, **fit_params):
461        scores, base_ensemble = self._fit_and_score_ensemble(
462            X, y, cv, **fit_params)
463        self.fitted_models_, self.scores_ = self._prune_by_cv_score(
464            scores, base_ensemble)
465
466    def _prune_by_cv_score(self, scores, base_ensemble, model_names=None):
467        mean_scores = scores.mean(axis=1)
468        idx_good_models = np.flatnonzero(mean_scores >= self.min_score)
469        if len(idx_good_models) == 0:
470            raise ValueError(
471                "no base estimator exceeds min_score, try decreasing it")
472
473        total_score = mean_scores[idx_good_models]
474        max_score = total_score.max()
475        total_score /= max_score
476
477        fitted_models = self._create_cv_ensemble(
478            base_ensemble, idx_good_models, model_names)
479
480        return fitted_models, total_score
481
482    def _prune_by_correlation(self, X):
483        n_models = len(self.fitted_models_)
484
485        out = Parallel(n_jobs=self.n_jobs, verbose=self.verbose)(
486            delayed(_predict)(est, X, i) for i, est in enumerate(self.fitted_models_)
487        )
488
489        predictions = np.empty((X.shape[0], n_models), order="F")
490        for i, p in out:
491            predictions[:, i] = p
492
493        if n_models > self.n_estimators_:
494            final_scores = self._add_diversity_score(self.scores_, predictions)
495            sorted_idx = np.argsort(-final_scores, kind="mergesort")
496
497            selected_models = sorted_idx[: self.n_estimators_]
498            return predictions[:, selected_models]
499
500        return predictions
501
502    def _predict_estimators(self, X):
503        predictions = self._prune_by_correlation(X)
504        return predictions

Ensemble selection for survival analysis that accounts for a score and correlations between predictions.

The ensemble is pruned during training only according to the specified score (accuracy) and additionally for prediction according to the correlation between predictions (diversity).

The hillclimbing is based on cross-validation to avoid having to create a separate validation set.

See 1, 2, 3 for further description.

Parameters

base_estimators : list List of (name, estimator) tuples (implementing fit/predict) that are part of the ensemble.

scorer : callable Function with signature func(estimator, X_test, y_test, **test_predict_params) that evaluates the error of the prediction on the test data. The function should return a scalar value. Larger values of the score are assumed to be better.

n_estimators : float or int, optional, default: 0.2 If a float, the percentage of estimators in the ensemble to retain, if an int the absolute number of estimators to retain.

min_score : float, optional, default: 0.66 Threshold for pruning estimators based on scoring metric. After fit, only estimators with a score above min_score are retained.

min_correlation : float, optional, default: 0.6 Threshold for Pearson's correlation coefficient that determines when predictions of two estimators are significantly correlated.

cv : int, a cv generator instance, or None, optional The input specifying which cv generator to use. It can be an integer, in which case it is the number of folds in a KFold, None, in which case 3 fold is used, or another object, that will then be used as a cv generator. The generator has to ensure that each sample is only used once for testing.

n_jobs : int, optional, default: 1 Number of jobs to run in parallel.

verbose : integer Controls the verbosity: the higher, the more messages.

Attributes

scores_ : ndarray, shape = (n_base_estimators,) Array of scores (relative to best performing estimator)

fitted_models_ : ndarray Selected models during training based on scorer.

n_features_in_ : int Number of features seen during fit.

feature_names_in_ : ndarray of shape (n_features_in_,) Names of features seen during fit. Defined only when X has feature names that are all strings.

References


  1. Pölsterl, S., Gupta, P., Wang, L., Conjeti, S., Katouzian, A., and Navab, N., "Heterogeneous ensembles for predicting survival of metastatic, castrate-resistant prostate cancer patients". F1000Research, vol. 5, no. 2676, 2016 

  2. Caruana, R., Munson, A., Niculescu-Mizil, A. "Getting the most out of ensemble selection". 6th IEEE International Conference on Data Mining, 828-833, 2006 

  3. Rooney, N., Patterson, D., Anand, S., Tsymbal, A. "Dynamic integration of regression models. International Workshop on Multiple Classifier Systems". Lecture Notes in Computer Science, vol. 3181, 164-173, 2004 

class EnsembleSelectionRegressor(survivalist.meta.ensemble_selection.BaseEnsembleSelection):
507class EnsembleSelectionRegressor(BaseEnsembleSelection):
508    """Ensemble selection for regression that accounts for the accuracy and correlation of errors.
509
510    The ensemble is pruned during training according to estimators' accuracy and the correlation
511    between prediction errors per sample. The accuracy of the *i*-th estimator defined as
512    :math:`\\frac{ \\min_{i=1,\\ldots, n}(error_i) }{ error_i }`.
513    In addition to the accuracy, models are selected based on the correlation between residuals
514    of different models (diversity). The diversity of the *i*-th estimator is defined as
515    :math:`\\frac{n-count}{n}`, where *count* is the number of estimators for whom the correlation
516    of residuals exceeds `min_correlation`.
517
518    The hillclimbing is based on cross-validation to avoid having to create a separate validation set.
519
520    See [1]_, [2]_, [3]_ for further description.
521
522    Parameters
523    ----------
524    base_estimators : list
525        List of (name, estimator) tuples (implementing fit/predict) that are
526        part of the ensemble.
527
528    scorer : callable
529        Function with signature ``func(estimator, X_test, y_test, **test_predict_params)`` that evaluates the error
530        of the prediction on the test data. The function should return a scalar value.
531        *Smaller* values of the score are assumed to be better.
532
533    n_estimators : float or int, optional, default: 0.2
534        If a float, the percentage of estimators in the ensemble to retain, if an int the
535        absolute number of estimators to retain.
536
537    min_score : float, optional, default: 0.66
538        Threshold for pruning estimators based on scoring metric. After `fit`, only estimators
539        with a accuracy above `min_score` are retained.
540
541    min_correlation : float, optional, default: 0.6
542        Threshold for Pearson's correlation coefficient that determines when residuals of
543        two estimators are significantly correlated.
544
545    cv : int, a cv generator instance, or None, optional
546        The input specifying which cv generator to use. It can be an
547        integer, in which case it is the number of folds in a KFold,
548        None, in which case 3 fold is used, or another object, that
549        will then be used as a cv generator. The generator has to ensure
550        that each sample is only used once for testing.
551
552    n_jobs : int, optional, default: 1
553        Number of jobs to run in parallel.
554
555    verbose : int, optional, default: 0
556        Controls the verbosity: the higher, the more messages.
557
558    Attributes
559    ----------
560    scores_ : ndarray, shape = (n_base_estimators,)
561        Array of scores (relative to best performing estimator)
562
563    fitted_models_ : ndarray
564        Selected models during training based on `scorer`.
565
566    n_features_in_ : int
567        Number of features seen during ``fit``.
568
569    feature_names_in_ : ndarray of shape (`n_features_in_`,)
570        Names of features seen during ``fit``. Defined only when `X`
571        has feature names that are all strings.
572
573    References
574    ----------
575
576    .. [1] Pölsterl, S., Gupta, P., Wang, L., Conjeti, S., Katouzian, A., and Navab, N.,
577         "Heterogeneous ensembles for predicting survival of metastatic, castrate-resistant prostate cancer patients".
578         F1000Research, vol. 5, no. 2676, 2016
579
580    .. [2] Caruana, R., Munson, A., Niculescu-Mizil, A.
581        "Getting the most out of ensemble selection". 6th IEEE International Conference on Data Mining, 828-833, 2006
582
583    .. [3] Rooney, N., Patterson, D., Anand, S., Tsymbal, A.
584        "Dynamic integration of regression models. International Workshop on Multiple Classifier Systems".
585        Lecture Notes in Computer Science, vol. 3181, 164-173, 2004
586    """
587
588    _parameter_constraints = {
589        **BaseEnsembleSelection._parameter_constraints,
590    }
591    _parameter_constraints.pop("meta_estimator")
592
593    def __init__(
594        self,
595        base_estimators,
596        *,
597        scorer=None,
598        n_estimators=0.2,
599        min_score=0.66,
600        correlation="pearson",
601        min_correlation=0.6,
602        cv=None,
603        n_jobs=1,
604        verbose=0,
605    ):
606        super().__init__(
607            meta_estimator=MeanEstimator(),
608            base_estimators=base_estimators,
609            scorer=scorer,
610            n_estimators=n_estimators,
611            min_score=min_score,
612            correlation=correlation,
613            min_correlation=min_correlation,
614            cv=cv,
615            n_jobs=n_jobs,
616            verbose=verbose,
617        )
618
619    @property
620    def _predict_risk_score(self):
621        return False
622
623    def _fit(self, X, y, cv, **fit_params):
624        scores, base_ensemble = self._fit_and_score_ensemble(
625            X, y, cv, **fit_params)
626        fitted_models, scores = self._prune_by_cv_score(scores, base_ensemble)
627
628        if len(fitted_models) > self.n_estimators_:
629            fitted_models, scores = self._prune_by_correlation(
630                fitted_models, scores, X, y)
631
632        self.fitted_models_ = fitted_models
633        self.scores_ = scores
634
635    def _prune_by_cv_score(self, scores, base_ensemble, model_names=None):
636        mean_scores = scores.mean(axis=1)
637        mean_scores = mean_scores.min() / mean_scores
638
639        idx_good_models = np.flatnonzero(mean_scores >= self.min_score)
640        if len(idx_good_models) == 0:
641            raise ValueError(
642                "no base estimator exceeds min_score, try decreasing it")
643
644        fitted_models = self._create_cv_ensemble(
645            base_ensemble, idx_good_models, model_names)
646
647        return fitted_models, mean_scores[idx_good_models]
648
649    def _prune_by_correlation(self, fitted_models, scores, X, y):
650        n_models = len(fitted_models)
651
652        out = Parallel(n_jobs=self.n_jobs, verbose=self.verbose)(
653            delayed(_score_regressor)(est, X, y, i) for i, est in enumerate(fitted_models)
654        )
655
656        error = np.empty((X.shape[0], n_models), order="F")
657        for i, err in out:
658            error[:, i] = err
659
660        final_scores = self._add_diversity_score(scores, error)
661        sorted_idx = np.argsort(-final_scores, kind="mergesort")
662
663        selected_models = sorted_idx[: self.n_estimators_]
664
665        return fitted_models[selected_models], final_scores
666
667    def _predict_estimators(self, X):
668        n_models = len(self.fitted_models_)
669
670        out = Parallel(n_jobs=self.n_jobs, verbose=self.verbose)(
671            delayed(_predict)(est, X, i) for i, est in enumerate(self.fitted_models_)
672        )
673
674        predictions = np.empty((X.shape[0], n_models), order="F")
675        for i, p in out:
676            predictions[:, i] = p
677
678        return predictions

Ensemble selection for regression that accounts for the accuracy and correlation of errors.

The ensemble is pruned during training according to estimators' accuracy and the correlation between prediction errors per sample. The accuracy of the i-th estimator defined as \( \frac{ \min_{i=1,\ldots, n}(error_i) }{ error_i } \). In addition to the accuracy, models are selected based on the correlation between residuals of different models (diversity). The diversity of the i-th estimator is defined as \( \frac{n-count}{n} \), where count is the number of estimators for whom the correlation of residuals exceeds min_correlation.

The hillclimbing is based on cross-validation to avoid having to create a separate validation set.

See 1, 2, 3 for further description.

Parameters

base_estimators : list List of (name, estimator) tuples (implementing fit/predict) that are part of the ensemble.

scorer : callable Function with signature func(estimator, X_test, y_test, **test_predict_params) that evaluates the error of the prediction on the test data. The function should return a scalar value. Smaller values of the score are assumed to be better.

n_estimators : float or int, optional, default: 0.2 If a float, the percentage of estimators in the ensemble to retain, if an int the absolute number of estimators to retain.

min_score : float, optional, default: 0.66 Threshold for pruning estimators based on scoring metric. After fit, only estimators with a accuracy above min_score are retained.

min_correlation : float, optional, default: 0.6 Threshold for Pearson's correlation coefficient that determines when residuals of two estimators are significantly correlated.

cv : int, a cv generator instance, or None, optional The input specifying which cv generator to use. It can be an integer, in which case it is the number of folds in a KFold, None, in which case 3 fold is used, or another object, that will then be used as a cv generator. The generator has to ensure that each sample is only used once for testing.

n_jobs : int, optional, default: 1 Number of jobs to run in parallel.

verbose : int, optional, default: 0 Controls the verbosity: the higher, the more messages.

Attributes

scores_ : ndarray, shape = (n_base_estimators,) Array of scores (relative to best performing estimator)

fitted_models_ : ndarray Selected models during training based on scorer.

n_features_in_ : int Number of features seen during fit.

feature_names_in_ : ndarray of shape (n_features_in_,) Names of features seen during fit. Defined only when X has feature names that are all strings.

References


  1. Pölsterl, S., Gupta, P., Wang, L., Conjeti, S., Katouzian, A., and Navab, N., "Heterogeneous ensembles for predicting survival of metastatic, castrate-resistant prostate cancer patients". F1000Research, vol. 5, no. 2676, 2016 

  2. Caruana, R., Munson, A., Niculescu-Mizil, A. "Getting the most out of ensemble selection". 6th IEEE International Conference on Data Mining, 828-833, 2006 

  3. Rooney, N., Patterson, D., Anand, S., Tsymbal, A. "Dynamic integration of regression models. International Workshop on Multiple Classifier Systems". Lecture Notes in Computer Science, vol. 3181, 164-173, 2004 

class MeanEstimator(sklearn.base.BaseEstimator):
61class MeanEstimator(BaseEstimator):
62    def fit(self, X, y=None, **kwargs):  # pragma: no cover; # pylint: disable=unused-argument
63        return self
64
65    def predict(self, X):  # pylint: disable=no-self-use
66        return X.mean(axis=X.ndim - 1)

Base class for all estimators in scikit-learn.

Inheriting from this class provides default implementations of:

  • setting and getting parameters used by GridSearchCV and friends;
  • textual and HTML representation displayed in terminals and IDEs;
  • estimator serialization;
  • parameters validation;
  • data validation;
  • feature names validation.

Read more in the :ref:User Guide <rolling_your_own_estimator>.

Notes

All estimators should specify all the parameters that can be set at the class level in their __init__ as explicit keyword arguments (no *args or **kwargs).

Examples

>>> import numpy as np
>>> from sklearn.base import BaseEstimator
>>> class MyEstimator(BaseEstimator):
...     def __init__(self, *, param=1):
...         self.param = param
...     def fit(self, X, y=None):
...         self.is_fitted_ = True
...         return self
...     def predict(self, X):
...         return np.full(shape=X.shape[0], fill_value=self.param)
>>> estimator = MyEstimator(param=2)
>>> estimator.get_params()
{'param': 2}
>>> X = np.array([[1, 2], [2, 3], [3, 4]])
>>> y = np.array([1, 0, 1])
>>> estimator.fit(X, y).predict(X)
array([2, 2, 2])
>>> estimator.set_params(param=3).fit(X, y).predict(X)
array([3, 3, 3])
def fit(self, X, y=None, **kwargs):
62    def fit(self, X, y=None, **kwargs):  # pragma: no cover; # pylint: disable=unused-argument
63        return self
def predict(self, X):
65    def predict(self, X):  # pylint: disable=no-self-use
66        return X.mean(axis=X.ndim - 1)
class Stacking(sklearn.base.MetaEstimatorMixin, survivalist.base.SurvivalAnalysisMixin, sklearn.utils.metaestimators._BaseComposition):
 36class Stacking(MetaEstimatorMixin, SurvivalAnalysisMixin, _BaseComposition):
 37    """Meta estimator that combines multiple base learners.
 38
 39    By default, base estimators' output corresponds to the array returned
 40    by `predict_proba`. If `predict_proba` is not available or `probabilities = False`,
 41    the output of `predict` is used.
 42
 43    Parameters
 44    ----------
 45    meta_estimator : instance of estimator
 46        The estimator that is used to combine the output of different
 47        base estimators.
 48
 49    base_estimators : list
 50        List of (name, estimator) tuples (implementing fit/predict) that are
 51        part of the ensemble.
 52
 53    probabilities : bool, optional, default: True
 54        Whether to allow using `predict_proba` method of base learners, if available.
 55
 56    Attributes
 57    ----------
 58    estimators_ : list of estimators
 59        The elements of the estimators parameter, having been fitted on the
 60        training data.
 61
 62    named_estimators_ : dict
 63        Attribute to access any fitted sub-estimators by name.
 64
 65    final_estimator_ : estimator
 66        The estimator which combines the output of `estimators_`.
 67
 68    n_features_in_ : int
 69        Number of features seen during ``fit``.
 70
 71    feature_names_in_ : ndarray of shape (`n_features_in_`,)
 72        Names of features seen during ``fit``. Defined only when `X`
 73        has feature names that are all strings.
 74
 75    unique_times_ : array of shape = (n_unique_times,)
 76        Unique time points.
 77    """
 78
 79    _parameter_constraints = {
 80        "meta_estimator": [HasMethods(["fit"])],
 81        "base_estimators": [list],
 82        "probabilities": ["boolean"],
 83    }
 84
 85    def __init__(self, meta_estimator, base_estimators, *, probabilities=True):
 86        self.meta_estimator = meta_estimator
 87        self.base_estimators = base_estimators
 88        self.probabilities = probabilities
 89
 90        self._extra_params = [
 91            "meta_estimator",
 92            "base_estimators",
 93            "probabilities",
 94        ]
 95
 96    def _validate_estimators(self):
 97        names, estimators = zip(*self.base_estimators)
 98        if len(set(names)) != len(self.base_estimators):
 99            raise ValueError(f"Names provided are not unique: {names}")
100
101        for t in estimators:
102            if not hasattr(t, "fit") or not (hasattr(t, "predict") or hasattr(t, "predict_proba")):
103                raise TypeError(
104                    "All base estimators should implement "
105                    "fit and predict/predict_proba"
106                    f" {t!s} (type {type(t)}) doesn't)"
107                )
108
109    def set_params(self, **params):
110        """
111        Set the parameters of an estimator from the ensemble.
112
113        Valid parameter keys can be listed with `get_params()`. Note that you
114        can directly set the parameters of the estimators contained in
115        `estimators`.
116
117        Parameters
118        ----------
119        **params : keyword arguments
120            Specific parameters using e.g.
121            `set_params(parameter_name=new_value)`. In addition, to setting the
122            parameters of the estimator, the individual estimator of the
123            estimators can also be set, or can be removed by setting them to
124            'drop'.
125
126        Returns
127        -------
128        self : object
129            Estimator instance.
130        """
131        super()._set_params("base_estimators", **params)
132        return self
133
134    def get_params(self, deep=True):
135        """
136        Get the parameters of an estimator from the ensemble.
137
138        Returns the parameters given in the constructor as well as the
139        estimators contained within the `estimators` parameter.
140
141        Parameters
142        ----------
143        deep : bool, default=True
144            Setting it to True gets the various estimators and the parameters
145            of the estimators as well.
146
147        Returns
148        -------
149        params : dict
150            Parameter and estimator names mapped to their values or parameter
151            names mapped to their values.
152        """
153        return super()._get_params("base_estimators", deep=deep)
154
155    def _split_fit_params(self, fit_params):
156        fit_params_steps = {step: {} for step, _ in self.base_estimators}
157        for pname, pval in fit_params.items():
158            step, param = pname.split("__", 1)
159            fit_params_steps[step][param] = pval
160        return fit_params_steps
161
162    def _fit_estimators(self, X, y=None, **fit_params):
163        if hasattr(self, "feature_names_in_"):
164            # Delete the attribute when the estimator is fitted on a new dataset
165            # that has no feature names.
166            delattr(self, "feature_names_in_")
167
168        fit_params_steps = self._split_fit_params(fit_params)
169        names = []
170        estimators = []
171        for name, estimator in self.base_estimators:
172            est = clone(estimator).fit(X, y, **fit_params_steps[name])
173
174            if hasattr(est, "n_features_in_"):
175                self.n_features_in_ = est.n_features_in_
176            if hasattr(est, "feature_names_in_"):
177                self.feature_names_in_ = est.feature_names_in_
178
179            estimators.append(est)
180            names.append(name)
181
182        self.named_estimators = dict(zip(names, estimators))
183        self.estimators_ = estimators
184
185    def _predict_estimators(self, X):
186        Xt = None
187        start = 0
188        for estimator in self.estimators_:
189            if self.probabilities and hasattr(estimator, "predict_proba"):
190                p = estimator.predict_proba(X)
191            else:
192                p = estimator.predict(X)
193
194            if p.ndim == 1:
195                p = p[:, np.newaxis]
196
197            if Xt is None:
198                # assume that prediction array has the same size for all base learners
199                n_classes = p.shape[1]
200                Xt = np.empty(
201                    (p.shape[0], n_classes * len(self.base_estimators)),
202                    order="F",
203                )
204            Xt[:, slice(start, start + n_classes)] = p
205            start += n_classes
206
207        return Xt
208
209    def __len__(self):
210        return len(self.base_estimators)
211
212    def fit(self, X, y=None, **fit_params):
213        """Fit base estimators.
214
215        Parameters
216        ----------
217        X : array-like, shape = (n_samples, n_features)
218            Training data.
219
220        y : array-like, optional
221            Target data if base estimators are supervised.
222
223        Returns
224        -------
225        self
226        """
227        self._validate_params()
228        self._validate_estimators()
229        self._fit_estimators(X, y, **fit_params)
230        Xt = self._predict_estimators(X)
231        self.final_estimator_ = self.meta_estimator.fit(Xt, y)
232
233        return self
234
235    @available_if(_meta_estimator_has("predict"))
236    def predict(self, X):
237        """Perform prediction.
238
239        Only available of the meta estimator has a predict method.
240
241        Parameters
242        ----------
243        X : array-like, shape = (n_samples, n_features)
244            Data with samples to predict.
245
246        Returns
247        -------
248        prediction : array, shape = (n_samples, n_dim)
249            Prediction of meta estimator that combines
250            predictions of base estimators. `n_dim` depends
251            on the return value of meta estimator's `predict`
252            method.
253        """
254        Xt = self._predict_estimators(X)
255        return self.final_estimator_.predict(Xt)
256
257    @available_if(_meta_estimator_has("predict_proba"))
258    def predict_proba(self, X):
259        """Perform prediction.
260
261        Only available if the meta estimator has a predict_proba method.
262
263        Parameters
264        ----------
265        X : array-like, shape = (n_samples, n_features)
266            Data with samples to predict.
267
268        Returns
269        -------
270        prediction : ndarray, shape = (n_samples, n_dim)
271            Prediction of meta estimator that combines
272            predictions of base estimators. `n_dim` depends
273            on the return value of meta estimator's `predict`
274            method.
275        """
276        Xt = self._predict_estimators(X)
277        return self.final_estimator_.predict_proba(Xt)
278
279    @available_if(_meta_estimator_has("predict_log_proba"))
280    def predict_log_proba(self, X):
281        """Perform prediction.
282
283        Only available if the meta estimator has a predict_log_proba method.
284
285        Parameters
286        ----------
287        X : array-like, shape = (n_samples, n_features)
288            Data with samples to predict.
289
290        Returns
291        -------
292        prediction : ndarray, shape = (n_samples, n_dim)
293            Prediction of meta estimator that combines
294            predictions of base estimators. `n_dim` depends
295            on the return value of meta estimator's `predict`
296            method.
297        """
298        Xt = self._predict_estimators(X)
299        return self.final_estimator_.predict_log_proba(Xt)
300
301    @property_available_if(_meta_estimator_has("unique_times_"))
302    def unique_times_(self):
303        return self.meta_estimator.unique_times_
304
305    @available_if(_meta_estimator_has("predict_cumulative_hazard_function"))
306    def predict_cumulative_hazard_function(self, X, return_array=False):
307        """Perform prediction.
308
309        Only available if the meta estimator has a predict_cumulative_hazard_function method.
310
311        Parameters
312        ----------
313        X : array-like, shape = (n_samples, n_features)
314            Data with samples to predict.
315
316        return_array : boolean, default: False
317            If set, return an array with the cumulative hazard rate
318            for each `self.unique_times_`, otherwise an array of
319            :class:`survivalist.functions.StepFunction`.
320
321        Returns
322        -------
323        cum_hazard : ndarray
324            If `return_array` is set, an array with the cumulative hazard rate
325            for each `self.unique_times_`, otherwise an array of length `n_samples`
326            of :class:`survivalist.functions.StepFunction` instances will be returned.
327        """
328        Xt = self._predict_estimators(X)
329        return self.final_estimator_.predict_cumulative_hazard_function(Xt, return_array)
330
331    @available_if(_meta_estimator_has("predict_survival_function"))
332    def predict_survival_function(self, X, return_array=False):
333        """Perform prediction.
334
335        Only available if the meta estimator has a predict_survival_function method.
336
337        Parameters
338        ----------
339        X : array-like, shape = (n_samples, n_features)
340            Data with samples to predict.
341
342        Returns
343        -------
344        survival : ndarray
345            If `return_array` is set, an array with the probability of
346            survival for each `self.unique_times_`, otherwise an array of
347            length `n_samples` of :class:`survivalist.functions.StepFunction`
348            instances will be returned.
349
350        return_array : boolean, default: False
351            If set, return an array with the probability
352            of survival for each `self.unique_times_`,
353            otherwise an array of :class:`survivalist.functions.StepFunction`.
354
355        """
356        Xt = self._predict_estimators(X)
357        return self.final_estimator_.predict_survival_function(Xt, return_array)

Meta estimator that combines multiple base learners.

By default, base estimators' output corresponds to the array returned by predict_proba. If predict_proba is not available or probabilities = False, the output of predict is used.

Parameters

meta_estimator : instance of estimator The estimator that is used to combine the output of different base estimators.

base_estimators : list List of (name, estimator) tuples (implementing fit/predict) that are part of the ensemble.

probabilities : bool, optional, default: True Whether to allow using predict_proba method of base learners, if available.

Attributes

estimators_ : list of estimators The elements of the estimators parameter, having been fitted on the training data.

named_estimators_ : dict Attribute to access any fitted sub-estimators by name.

final_estimator_ : estimator The estimator which combines the output of estimators_.

n_features_in_ : int Number of features seen during fit.

feature_names_in_ : ndarray of shape (n_features_in_,) Names of features seen during fit. Defined only when X has feature names that are all strings.

unique_times_ : array of shape = (n_unique_times,) Unique time points.

def fit(self, X, y=None, **fit_params):
212    def fit(self, X, y=None, **fit_params):
213        """Fit base estimators.
214
215        Parameters
216        ----------
217        X : array-like, shape = (n_samples, n_features)
218            Training data.
219
220        y : array-like, optional
221            Target data if base estimators are supervised.
222
223        Returns
224        -------
225        self
226        """
227        self._validate_params()
228        self._validate_estimators()
229        self._fit_estimators(X, y, **fit_params)
230        Xt = self._predict_estimators(X)
231        self.final_estimator_ = self.meta_estimator.fit(Xt, y)
232
233        return self

Fit base estimators.

Parameters

X : array-like, shape = (n_samples, n_features) Training data.

y : array-like, optional Target data if base estimators are supervised.

Returns

self

@available_if(_meta_estimator_has('predict'))
def predict(self, X):
235    @available_if(_meta_estimator_has("predict"))
236    def predict(self, X):
237        """Perform prediction.
238
239        Only available of the meta estimator has a predict method.
240
241        Parameters
242        ----------
243        X : array-like, shape = (n_samples, n_features)
244            Data with samples to predict.
245
246        Returns
247        -------
248        prediction : array, shape = (n_samples, n_dim)
249            Prediction of meta estimator that combines
250            predictions of base estimators. `n_dim` depends
251            on the return value of meta estimator's `predict`
252            method.
253        """
254        Xt = self._predict_estimators(X)
255        return self.final_estimator_.predict(Xt)

Perform prediction.

Only available of the meta estimator has a predict method.

Parameters

X : array-like, shape = (n_samples, n_features) Data with samples to predict.

Returns

prediction : array, shape = (n_samples, n_dim) Prediction of meta estimator that combines predictions of base estimators. n_dim depends on the return value of meta estimator's predict method.

@available_if(_meta_estimator_has('predict_proba'))
def predict_proba(self, X):
257    @available_if(_meta_estimator_has("predict_proba"))
258    def predict_proba(self, X):
259        """Perform prediction.
260
261        Only available if the meta estimator has a predict_proba method.
262
263        Parameters
264        ----------
265        X : array-like, shape = (n_samples, n_features)
266            Data with samples to predict.
267
268        Returns
269        -------
270        prediction : ndarray, shape = (n_samples, n_dim)
271            Prediction of meta estimator that combines
272            predictions of base estimators. `n_dim` depends
273            on the return value of meta estimator's `predict`
274            method.
275        """
276        Xt = self._predict_estimators(X)
277        return self.final_estimator_.predict_proba(Xt)

Perform prediction.

Only available if the meta estimator has a predict_proba method.

Parameters

X : array-like, shape = (n_samples, n_features) Data with samples to predict.

Returns

prediction : ndarray, shape = (n_samples, n_dim) Prediction of meta estimator that combines predictions of base estimators. n_dim depends on the return value of meta estimator's predict method.