survivalist.metrics

   1# This program is free software: you can redistribute it and/or modify
   2# it under the terms of the GNU General Public License as published by
   3# the Free Software Foundation, either version 3 of the License, or
   4# (at your option) any later version.
   5#
   6# This program is distributed in the hope that it will be useful,
   7# but WITHOUT ANY WARRANTY; without even the implied warranty of
   8# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   9# GNU General Public License for more details.
  10#
  11# You should have received a copy of the GNU General Public License
  12# along with this program.  If not, see <http://www.gnu.org/licenses/>.
  13import numpy as np
  14from sklearn.base import BaseEstimator
  15from sklearn.utils import check_array, check_consistent_length
  16from sklearn.utils.metaestimators import available_if
  17from sklearn.utils.validation import check_is_fitted
  18
  19from .exceptions import NoComparablePairException
  20from .nonparametric import (
  21    CensoringDistributionEstimator,
  22    SurvivalFunctionEstimator,
  23)
  24from .util import check_y_survival
  25
  26__all__ = [
  27    "as_concordance_index_ipcw_scorer",
  28    "as_cumulative_dynamic_auc_scorer",
  29    "as_integrated_brier_score_scorer",
  30    "brier_score",
  31    "concordance_index_censored",
  32    "concordance_index_ipcw",
  33    "cumulative_dynamic_auc",
  34    "integrated_brier_score",
  35]
  36
  37
  38def _check_estimate_1d(estimate, test_time):
  39    estimate = check_array(estimate, ensure_2d=False, input_name="estimate")
  40    if estimate.ndim != 1:
  41        raise ValueError(
  42            f"Expected 1D array, got {estimate.ndim}D array instead:\narray={estimate}.\n")
  43    check_consistent_length(test_time, estimate)
  44    return estimate
  45
  46
  47def _check_inputs(event_indicator, event_time, estimate):
  48    check_consistent_length(event_indicator, event_time, estimate)
  49    event_indicator = check_array(
  50        event_indicator, ensure_2d=False, input_name="event_indicator")
  51    event_time = check_array(
  52        event_time, ensure_2d=False, input_name="event_time")
  53    estimate = _check_estimate_1d(estimate, event_time)
  54
  55    if not np.issubdtype(event_indicator.dtype, np.bool_):
  56        raise ValueError(
  57            f"only boolean arrays are supported as class labels for survival analysis, got {event_indicator.dtype}"
  58        )
  59
  60    if len(event_time) < 2:
  61        raise ValueError("Need a minimum of two samples")
  62
  63    if not event_indicator.any():
  64        raise ValueError("All samples are censored")
  65
  66    return event_indicator, event_time, estimate
  67
  68
  69def _check_times(test_time, times):
  70    times = check_array(np.atleast_1d(
  71        times), ensure_2d=False, input_name="times")
  72    times = np.unique(times)
  73
  74    if times.max() >= test_time.max() or times.min() < test_time.min():
  75        raise ValueError(
  76            f"all times must be within follow-up time of test data: [{test_time.min()}; {test_time.max()}["
  77        )
  78
  79    return times
  80
  81
  82def _check_estimate_2d(estimate, test_time, time_points, estimator):
  83    estimate = check_array(
  84        estimate,
  85        ensure_2d=False,
  86        allow_nd=False,
  87        input_name="estimate",
  88        estimator=estimator,
  89    )
  90    time_points = _check_times(test_time, time_points)
  91    check_consistent_length(test_time, estimate)
  92
  93    if estimate.ndim == 2 and estimate.shape[1] != time_points.shape[0]:
  94        raise ValueError(
  95            f"expected estimate with {time_points.shape[0]} columns, but got {estimate.shape[1]}")
  96
  97    return estimate, time_points
  98
  99
 100def _iter_comparable(event_indicator, event_time, order):
 101    n_samples = len(event_time)
 102    tied_time = 0
 103    i = 0
 104    while i < n_samples - 1:
 105        time_i = event_time[order[i]]
 106        end = i + 1
 107        while end < n_samples and event_time[order[end]] == time_i:
 108            end += 1
 109
 110        # check for tied event times
 111        event_at_same_time = event_indicator[order[i:end]]
 112        censored_at_same_time = ~event_at_same_time
 113
 114        mask = np.zeros(n_samples, dtype=bool)
 115        mask[end:] = True
 116        # an event is comparable to censored samples at same time point
 117        mask[i:end] = censored_at_same_time
 118
 119        for j in range(i, end):
 120            if event_indicator[order[j]]:
 121                tied_time += censored_at_same_time.sum()
 122                yield (j, mask, tied_time)
 123        i = end
 124
 125
 126def _estimate_concordance_index(event_indicator, event_time, estimate, weights, tied_tol=1e-8):
 127    order = np.argsort(event_time)
 128
 129    tied_time = None
 130
 131    concordant = 0
 132    discordant = 0
 133    tied_risk = 0
 134    numerator = 0.0
 135    denominator = 0.0
 136    for ind, mask, tied_time in _iter_comparable(event_indicator, event_time, order):
 137        est_i = estimate[order[ind]]
 138        event_i = event_indicator[order[ind]]
 139        w_i = weights[order[ind]]
 140
 141        est = estimate[order[mask]]
 142
 143        assert event_i, f"got censored sample at index {order[ind]}, but expected uncensored"
 144
 145        ties = np.absolute(est - est_i) <= tied_tol
 146        n_ties = ties.sum()
 147        # an event should have a higher score
 148        con = est < est_i
 149        n_con = con[~ties].sum()
 150
 151        numerator += w_i * n_con + 0.5 * w_i * n_ties
 152        denominator += w_i * mask.sum()
 153
 154        tied_risk += n_ties
 155        concordant += n_con
 156        discordant += est.size - n_con - n_ties
 157
 158    if tied_time is None:
 159        raise NoComparablePairException(
 160            "Data has no comparable pairs, cannot estimate concordance index.")
 161
 162    cindex = numerator / denominator
 163    return cindex, concordant, discordant, tied_risk, tied_time
 164
 165
 166def concordance_index_censored(event_indicator, event_time, estimate, tied_tol=1e-8):
 167    """Concordance index for right-censored data
 168
 169    The concordance index is defined as the proportion of all comparable pairs
 170    in which the predictions and outcomes are concordant.
 171
 172    Two samples are comparable if (i) both of them experienced an event (at different times),
 173    or (ii) the one with a shorter observed survival time experienced an event, in which case
 174    the event-free subject "outlived" the other. A pair is not comparable if they experienced
 175    events at the same time.
 176
 177    Concordance intuitively means that two samples were ordered correctly by the model.
 178    More specifically, two samples are concordant, if the one with a higher estimated
 179    risk score has a shorter actual survival time.
 180    When predicted risks are identical for a pair, 0.5 rather than 1 is added to the count
 181    of concordant pairs.
 182
 183    See the :ref:`User Guide </user_guide/evaluating-survival-models.ipynb>`
 184    and [1]_ for further description.
 185
 186    Parameters
 187    ----------
 188    event_indicator : array-like, shape = (n_samples,)
 189        Boolean array denotes whether an event occurred
 190
 191    event_time : array-like, shape = (n_samples,)
 192        Array containing the time of an event or time of censoring
 193
 194    estimate : array-like, shape = (n_samples,)
 195        Estimated risk of experiencing an event
 196
 197    tied_tol : float, optional, default: 1e-8
 198        The tolerance value for considering ties.
 199        If the absolute difference between risk scores is smaller
 200        or equal than `tied_tol`, risk scores are considered tied.
 201
 202    Returns
 203    -------
 204    cindex : float
 205        Concordance index
 206
 207    concordant : int
 208        Number of concordant pairs
 209
 210    discordant : int
 211        Number of discordant pairs
 212
 213    tied_risk : int
 214        Number of pairs having tied estimated risks
 215
 216    tied_time : int
 217        Number of comparable pairs sharing the same time
 218
 219    See also
 220    --------
 221    concordance_index_ipcw
 222        Alternative estimator of the concordance index with less bias.
 223
 224    References
 225    ----------
 226    .. [1] Harrell, F.E., Califf, R.M., Pryor, D.B., Lee, K.L., Rosati, R.A,
 227           "Multivariable prognostic models: issues in developing models,
 228           evaluating assumptions and adequacy, and measuring and reducing errors",
 229           Statistics in Medicine, 15(4), 361-87, 1996.
 230    """
 231    event_indicator, event_time, estimate = _check_inputs(
 232        event_indicator, event_time, estimate)
 233
 234    w = np.ones_like(estimate)
 235
 236    return _estimate_concordance_index(event_indicator, event_time, estimate, w, tied_tol)
 237
 238
 239def concordance_index_ipcw(survival_train, survival_test, estimate, tau=None, tied_tol=1e-8):
 240    """Concordance index for right-censored data based on inverse probability of censoring weights.
 241
 242    This is an alternative to the estimator in :func:`concordance_index_censored`
 243    that does not depend on the distribution of censoring times in the test data.
 244    Therefore, the estimate is unbiased and consistent for a population concordance
 245    measure that is free of censoring.
 246
 247    It is based on inverse probability of censoring weights, thus requires
 248    access to survival times from the training data to estimate the censoring
 249    distribution. Note that this requires that survival times `survival_test`
 250    lie within the range of survival times `survival_train`. This can be
 251    achieved by specifying the truncation time `tau`.
 252    The resulting `cindex` tells how well the given prediction model works in
 253    predicting events that occur in the time range from 0 to `tau`.
 254
 255    The estimator uses the Kaplan-Meier estimator to estimate the
 256    censoring survivor function. Therefore, it is restricted to
 257    situations where the random censoring assumption holds and
 258    censoring is independent of the features.
 259
 260    See the :ref:`User Guide </user_guide/evaluating-survival-models.ipynb>`
 261    and [1]_ for further description.
 262
 263    Parameters
 264    ----------
 265    survival_train : structured array, shape = (n_train_samples,)
 266        Survival times for training data to estimate the censoring
 267        distribution from.
 268        A structured array containing the binary event indicator
 269        as first field, and time of event or time of censoring as
 270        second field.
 271
 272    survival_test : structured array, shape = (n_samples,)
 273        Survival times of test data.
 274        A structured array containing the binary event indicator
 275        as first field, and time of event or time of censoring as
 276        second field.
 277
 278    estimate : array-like, shape = (n_samples,)
 279        Estimated risk of experiencing an event of test data.
 280
 281    tau : float, optional
 282        Truncation time. The survival function for the underlying
 283        censoring time distribution :math:`D` needs to be positive
 284        at `tau`, i.e., `tau` should be chosen such that the
 285        probability of being censored after time `tau` is non-zero:
 286        :math:`P(D > \\tau) > 0`. If `None`, no truncation is performed.
 287
 288    tied_tol : float, optional, default: 1e-8
 289        The tolerance value for considering ties.
 290        If the absolute difference between risk scores is smaller
 291        or equal than `tied_tol`, risk scores are considered tied.
 292
 293    Returns
 294    -------
 295    cindex : float
 296        Concordance index
 297
 298    concordant : int
 299        Number of concordant pairs
 300
 301    discordant : int
 302        Number of discordant pairs
 303
 304    tied_risk : int
 305        Number of pairs having tied estimated risks
 306
 307    tied_time : int
 308        Number of comparable pairs sharing the same time
 309
 310    See also
 311    --------
 312    concordance_index_censored
 313        Simpler estimator of the concordance index.
 314
 315    as_concordance_index_ipcw_scorer
 316        Wrapper class that uses :func:`concordance_index_ipcw`
 317        in its ``score`` method instead of the default
 318        :func:`concordance_index_censored`.
 319
 320    References
 321    ----------
 322    .. [1] Uno, H., Cai, T., Pencina, M. J., D’Agostino, R. B., & Wei, L. J. (2011).
 323           "On the C-statistics for evaluating overall adequacy of risk prediction
 324           procedures with censored survival data".
 325           Statistics in Medicine, 30(10), 1105–1117.
 326    """
 327    test_event, test_time = check_y_survival(survival_test)
 328
 329    if tau is not None:
 330        mask = test_time < tau
 331        survival_test = survival_test[mask]
 332
 333    estimate = _check_estimate_1d(estimate, test_time)
 334
 335    cens = CensoringDistributionEstimator()
 336    cens.fit(survival_train)
 337    ipcw_test = cens.predict_ipcw(survival_test)
 338    if tau is None:
 339        ipcw = ipcw_test
 340    else:
 341        ipcw = np.empty(estimate.shape[0], dtype=ipcw_test.dtype)
 342        ipcw[mask] = ipcw_test
 343        ipcw[~mask] = 0
 344
 345    w = np.square(ipcw)
 346
 347    return _estimate_concordance_index(test_event, test_time, estimate, w, tied_tol)
 348
 349
 350def cumulative_dynamic_auc(survival_train, survival_test, estimate, times, tied_tol=1e-8):
 351    """Estimator of cumulative/dynamic AUC for right-censored time-to-event data.
 352
 353    The receiver operating characteristic (ROC) curve and the area under the
 354    ROC curve (AUC) can be extended to survival data by defining
 355    sensitivity (true positive rate) and specificity (true negative rate)
 356    as time-dependent measures. *Cumulative cases* are all individuals that
 357    experienced an event prior to or at time :math:`t` (:math:`t_i \\leq t`),
 358    whereas *dynamic controls* are those with :math:`t_i > t`.
 359    The associated cumulative/dynamic AUC quantifies how well a model can
 360    distinguish subjects who fail by a given time (:math:`t_i \\leq t`) from
 361    subjects who fail after this time (:math:`t_i > t`).
 362
 363    Given an estimator of the :math:`i`-th individual's risk score
 364    :math:`\\hat{f}(\\mathbf{x}_i)`, the cumulative/dynamic AUC at time
 365    :math:`t` is defined as
 366
 367    .. math::
 368
 369        \\widehat{\\mathrm{AUC}}(t) =
 370        \\frac{\\sum_{i=1}^n \\sum_{j=1}^n I(y_j > t) I(y_i \\leq t) \\omega_i
 371        I(\\hat{f}(\\mathbf{x}_j) \\leq \\hat{f}(\\mathbf{x}_i))}
 372        {(\\sum_{i=1}^n I(y_i > t)) (\\sum_{i=1}^n I(y_i \\leq t) \\omega_i)}
 373
 374    where :math:`\\omega_i` are inverse probability of censoring weights (IPCW).
 375
 376    To estimate IPCW, access to survival times from the training data is required
 377    to estimate the censoring distribution. Note that this requires that survival
 378    times `survival_test` lie within the range of survival times `survival_train`.
 379    This can be achieved by specifying `times` accordingly, e.g. by setting
 380    `times[-1]` slightly below the maximum expected follow-up time.
 381    IPCW are computed using the Kaplan-Meier estimator, which is
 382    restricted to situations where the random censoring assumption holds and
 383    censoring is independent of the features.
 384
 385    This function can also be used to evaluate models with time-dependent predictions
 386    :math:`\\hat{f}(\\mathbf{x}_i, t)`, such as :class:`survivalist.ensemble.RandomSurvivalForest`
 387    (see :ref:`User Guide </user_guide/evaluating-survival-models.ipynb#Using-Time-dependent-Risk-Scores>`).
 388    In this case, `estimate` must be a 2-d array where ``estimate[i, j]`` is the
 389    predicted risk score for the i-th instance at time point ``times[j]``.
 390
 391    Finally, the function also provides a single summary measure that refers to the mean
 392    of the :math:`\\mathrm{AUC}(t)` over the time range :math:`(\\tau_1, \\tau_2)`.
 393
 394    .. math::
 395
 396        \\overline{\\mathrm{AUC}}(\\tau_1, \\tau_2) =
 397        \\frac{1}{\\hat{S}(\\tau_1) - \\hat{S}(\\tau_2)}
 398        \\int_{\\tau_1}^{\\tau_2} \\widehat{\\mathrm{AUC}}(t)\\,d \\hat{S}(t)
 399
 400    where :math:`\\hat{S}(t)` is the Kaplan–Meier estimator of the survival function.
 401
 402    See the :ref:`User Guide </user_guide/evaluating-survival-models.ipynb#Time-dependent-Area-under-the-ROC>`,
 403    [1]_, [2]_, [3]_ for further description.
 404
 405    Parameters
 406    ----------
 407    survival_train : structured array, shape = (n_train_samples,)
 408        Survival times for training data to estimate the censoring
 409        distribution from.
 410        A structured array containing the binary event indicator
 411        as first field, and time of event or time of censoring as
 412        second field.
 413
 414    survival_test : structured array, shape = (n_samples,)
 415        Survival times of test data.
 416        A structured array containing the binary event indicator
 417        as first field, and time of event or time of censoring as
 418        second field.
 419
 420    estimate : array-like, shape = (n_samples,) or (n_samples, n_times)
 421        Estimated risk of experiencing an event of test data.
 422        If `estimate` is a 1-d array, the same risk score across all time
 423        points is used. If `estimate` is a 2-d array, the risk scores in the
 424        j-th column are used to evaluate the j-th time point.
 425
 426    times : array-like, shape = (n_times,)
 427        The time points for which the area under the
 428        time-dependent ROC curve is computed. Values must be
 429        within the range of follow-up times of the test data
 430        `survival_test`.
 431
 432    tied_tol : float, optional, default: 1e-8
 433        The tolerance value for considering ties.
 434        If the absolute difference between risk scores is smaller
 435        or equal than `tied_tol`, risk scores are considered tied.
 436
 437    Returns
 438    -------
 439    auc : array, shape = (n_times,)
 440        The cumulative/dynamic AUC estimates (evaluated at `times`).
 441    mean_auc : float
 442        Summary measure referring to the mean cumulative/dynamic AUC
 443        over the specified time range `(times[0], times[-1])`.
 444
 445    See also
 446    --------
 447    as_cumulative_dynamic_auc_scorer
 448        Wrapper class that uses :func:`cumulative_dynamic_auc`
 449        in its ``score`` method instead of the default
 450        :func:`concordance_index_censored`.
 451
 452    References
 453    ----------
 454    .. [1] H. Uno, T. Cai, L. Tian, and L. J. Wei,
 455           "Evaluating prediction rules for t-year survivors with censored regression models,"
 456           Journal of the American Statistical Association, vol. 102, pp. 527–537, 2007.
 457    .. [2] H. Hung and C. T. Chiang,
 458           "Estimation methods for time-dependent AUC models with survival data,"
 459           Canadian Journal of Statistics, vol. 38, no. 1, pp. 8–26, 2010.
 460    .. [3] J. Lambert and S. Chevret,
 461           "Summary measure of discrimination in survival models based on cumulative/dynamic time-dependent ROC curves,"
 462           Statistical Methods in Medical Research, 2014.
 463    """
 464    test_event, test_time = check_y_survival(survival_test)
 465    estimate, times = _check_estimate_2d(
 466        estimate, test_time, times, estimator="cumulative_dynamic_auc")
 467
 468    n_samples = estimate.shape[0]
 469    n_times = times.shape[0]
 470    if estimate.ndim == 1:
 471        estimate = np.broadcast_to(
 472            estimate[:, np.newaxis], (n_samples, n_times))
 473
 474    # fit and transform IPCW
 475    cens = CensoringDistributionEstimator()
 476    cens.fit(survival_train)
 477    ipcw = cens.predict_ipcw(survival_test)
 478
 479    # expand arrays to (n_samples, n_times) shape
 480    test_time = np.broadcast_to(test_time[:, np.newaxis], (n_samples, n_times))
 481    test_event = np.broadcast_to(
 482        test_event[:, np.newaxis], (n_samples, n_times))
 483    times_2d = np.broadcast_to(times, (n_samples, n_times))
 484    ipcw = np.broadcast_to(ipcw[:, np.newaxis], (n_samples, n_times))
 485
 486    # sort each time point (columns) by risk score (descending)
 487    o = np.argsort(-estimate, axis=0)
 488    test_time = np.take_along_axis(test_time, o, axis=0)
 489    test_event = np.take_along_axis(test_event, o, axis=0)
 490    estimate = np.take_along_axis(estimate, o, axis=0)
 491    ipcw = np.take_along_axis(ipcw, o, axis=0)
 492
 493    is_case = (test_time <= times_2d) & test_event
 494    is_control = test_time > times_2d
 495    n_controls = is_control.sum(axis=0)
 496
 497    # prepend row of infinity values
 498    estimate_diff = np.concatenate(
 499        (np.broadcast_to(np.inf, (1, n_times)), estimate))
 500    is_tied = np.absolute(np.diff(estimate_diff, axis=0)) <= tied_tol
 501
 502    cumsum_tp = np.cumsum(is_case * ipcw, axis=0)
 503    cumsum_fp = np.cumsum(is_control, axis=0)
 504    true_pos = cumsum_tp / cumsum_tp[-1]
 505    false_pos = cumsum_fp / n_controls
 506
 507    scores = np.empty(n_times, dtype=float)
 508    it = np.nditer((true_pos, false_pos, is_tied),
 509                   order="F", flags=["external_loop"])
 510    with it:
 511        for i, (tp, fp, mask) in enumerate(it):
 512            idx = np.flatnonzero(mask) - 1
 513            # only keep the last estimate for tied risk scores
 514            tp_no_ties = np.delete(tp, idx)
 515            fp_no_ties = np.delete(fp, idx)
 516            # Add an extra threshold position
 517            # to make sure that the curve starts at (0, 0)
 518            tp_no_ties = np.r_[0, tp_no_ties]
 519            fp_no_ties = np.r_[0, fp_no_ties]
 520            scores[i] = np.trapezoid(tp_no_ties, fp_no_ties)
 521
 522    if n_times == 1:
 523        mean_auc = scores[0]
 524    else:
 525        surv = SurvivalFunctionEstimator()
 526        surv.fit(survival_test)
 527        s_times = surv.predict_proba(times)
 528        # compute integral of AUC over survival function
 529        d = -np.diff(np.r_[1.0, s_times])
 530        integral = (scores * d).sum()
 531        mean_auc = integral / (1.0 - s_times[-1])
 532
 533    return scores, mean_auc
 534
 535
 536def brier_score(survival_train, survival_test, estimate, times):
 537    """Estimate the time-dependent Brier score for right censored data.
 538
 539    The time-dependent Brier score is the mean squared error at time point :math:`t`:
 540
 541    .. math::
 542
 543        \\mathrm{BS}^c(t) = \\frac{1}{n} \\sum_{i=1}^n I(y_i \\leq t \\land \\delta_i = 1)
 544        \\frac{(0 - \\hat{\\pi}(t | \\mathbf{x}_i))^2}{\\hat{G}(y_i)} + I(y_i > t)
 545        \\frac{(1 - \\hat{\\pi}(t | \\mathbf{x}_i))^2}{\\hat{G}(t)} ,
 546
 547    where :math:`\\hat{\\pi}(t | \\mathbf{x})` is the predicted probability of
 548    remaining event-free up to time point :math:`t` for a feature vector :math:`\\mathbf{x}`,
 549    and :math:`1/\\hat{G}(t)` is a inverse probability of censoring weight, estimated by
 550    the Kaplan-Meier estimator.
 551
 552    See the :ref:`User Guide </user_guide/evaluating-survival-models.ipynb#Time-dependent-Brier-Score>`
 553    and [1]_ for details.
 554
 555    Parameters
 556    ----------
 557    survival_train : structured array, shape = (n_train_samples,)
 558        Survival times for training data to estimate the censoring
 559        distribution from.
 560        A structured array containing the binary event indicator
 561        as first field, and time of event or time of censoring as
 562        second field.
 563
 564    survival_test : structured array, shape = (n_samples,)
 565        Survival times of test data.
 566        A structured array containing the binary event indicator
 567        as first field, and time of event or time of censoring as
 568        second field.
 569
 570    estimate : array-like, shape = (n_samples, n_times)
 571        Estimated probability of remaining event-free at time points
 572        specified by `times`. The value of ``estimate[i]`` must correspond to
 573        the estimated probability of remaining event-free up to the time point
 574        ``times[i]``. Typically, estimated probabilities are obtained via the
 575        survival function returned by an estimator's
 576        ``predict_survival_function`` method.
 577
 578    times : array-like, shape = (n_times,)
 579        The time points for which to estimate the Brier score.
 580        Values must be within the range of follow-up times of
 581        the test data `survival_test`.
 582
 583    Returns
 584    -------
 585    times : array, shape = (n_times,)
 586        Unique time points at which the brier scores was estimated.
 587
 588    brier_scores : array , shape = (n_times,)
 589        Values of the brier score.
 590
 591    Examples
 592    --------
 593    >>> from survivalist.datasets import load_gbsg2
 594    >>> from survivalist.linear_model import CoxPHSurvivalAnalysis
 595    >>> from survivalist.metrics import brier_score
 596    >>> from survivalist.preprocessing import OneHotEncoder
 597
 598    Load and prepare data.
 599
 600    >>> X, y = load_gbsg2()
 601    >>> X.loc[:, "tgrade"] = X.loc[:, "tgrade"].map(len).astype(int)
 602    >>> Xt = OneHotEncoder().fit_transform(X)
 603
 604    Fit a Cox model.
 605
 606    >>> est = CoxPHSurvivalAnalysis(ties="efron").fit(Xt, y)
 607
 608    Retrieve individual survival functions and get probability
 609    of remaining event free up to 5 years (=1825 days).
 610
 611    >>> survs = est.predict_survival_function(Xt)
 612    >>> preds = [fn(1825) for fn in survs]
 613
 614    Compute the Brier score at 5 years.
 615
 616    >>> times, score = brier_score(y, y, preds, 1825)
 617    >>> print(score)
 618    [0.20881843]
 619
 620    See also
 621    --------
 622    integrated_brier_score
 623        Computes the average Brier score over all time points.
 624
 625    References
 626    ----------
 627    .. [1] E. Graf, C. Schmoor, W. Sauerbrei, and M. Schumacher,
 628           "Assessment and comparison of prognostic classification schemes for survival data,"
 629           Statistics in Medicine, vol. 18, no. 17-18, pp. 2529–2545, 1999.
 630    """
 631    test_event, test_time = check_y_survival(survival_test)
 632    estimate, times = _check_estimate_2d(
 633        estimate, test_time, times, estimator="brier_score")
 634    if estimate.ndim == 1 and times.shape[0] == 1:
 635        estimate = estimate.reshape(-1, 1)
 636
 637    # fit IPCW estimator
 638    cens = CensoringDistributionEstimator().fit(survival_train)
 639    # calculate inverse probability of censoring weight at current time point t.
 640    prob_cens_t = cens.predict_proba(times)
 641    prob_cens_t[prob_cens_t == 0] = np.inf
 642    # calculate inverse probability of censoring weights at observed time point
 643    prob_cens_y = cens.predict_proba(test_time)
 644    prob_cens_y[prob_cens_y == 0] = np.inf
 645
 646    # Calculating the brier scores at each time point
 647    brier_scores = np.empty(times.shape[0], dtype=float)
 648    for i, t in enumerate(times):
 649        est = estimate[:, i]
 650        is_case = (test_time <= t) & test_event
 651        is_control = test_time > t
 652
 653        brier_scores[i] = np.mean(
 654            np.square(est) * is_case.astype(int) / prob_cens_y
 655            + np.square(1.0 - est) * is_control.astype(int) / prob_cens_t[i]
 656        )
 657
 658    return times, brier_scores
 659
 660
 661def integrated_brier_score(survival_train, survival_test, estimate, times):
 662    """The Integrated Brier Score (IBS) provides an overall calculation of
 663    the model performance at all available times :math:`t_1 \\leq t \\leq t_\\text{max}`.
 664
 665    The integrated time-dependent Brier score over the interval
 666    :math:`[t_1; t_\\text{max}]` is defined as
 667
 668    .. math::
 669
 670        \\mathrm{IBS} = \\int_{t_1}^{t_\\text{max}} \\mathrm{BS}^c(t) d w(t)
 671
 672    where the weighting function is :math:`w(t) = t / t_\\text{max}`.
 673    The integral is estimated via the trapezoidal rule.
 674
 675    See the :ref:`User Guide </user_guide/evaluating-survival-models.ipynb#Time-dependent-Brier-Score>`
 676    and [1]_ for further details.
 677
 678    Parameters
 679    ----------
 680    survival_train : structured array, shape = (n_train_samples,)
 681        Survival times for training data to estimate the censoring
 682        distribution from.
 683        A structured array containing the binary event indicator
 684        as first field, and time of event or time of censoring as
 685        second field.
 686
 687    survival_test : structured array, shape = (n_samples,)
 688        Survival times of test data.
 689        A structured array containing the binary event indicator
 690        as first field, and time of event or time of censoring as
 691        second field.
 692
 693    estimate : array-like, shape = (n_samples, n_times)
 694        Estimated probability of remaining event-free at time points
 695        specified by `times`. The value of ``estimate[i]`` must correspond to
 696        the estimated probability of remaining event-free up to the time point
 697        ``times[i]``. Typically, estimated probabilities are obtained via the
 698        survival function returned by an estimator's
 699        ``predict_survival_function`` method.
 700
 701    times : array-like, shape = (n_times,)
 702        The time points for which to estimate the Brier score.
 703        Values must be within the range of follow-up times of
 704        the test data `survival_test`.
 705
 706    Returns
 707    -------
 708    ibs : float
 709        The integrated Brier score.
 710
 711    Examples
 712    --------
 713    >>> import numpy as np
 714    >>> from survivalist.datasets import load_gbsg2
 715    >>> from survivalist.linear_model import CoxPHSurvivalAnalysis
 716    >>> from survivalist.metrics import integrated_brier_score
 717    >>> from survivalist.preprocessing import OneHotEncoder
 718
 719    Load and prepare data.
 720
 721    >>> X, y = load_gbsg2()
 722    >>> X.loc[:, "tgrade"] = X.loc[:, "tgrade"].map(len).astype(int)
 723    >>> Xt = OneHotEncoder().fit_transform(X)
 724
 725    Fit a Cox model.
 726
 727    >>> est = CoxPHSurvivalAnalysis(ties="efron").fit(Xt, y)
 728
 729    Retrieve individual survival functions and get probability
 730    of remaining event free from 1 year to 5 years (=1825 days).
 731
 732    >>> survs = est.predict_survival_function(Xt)
 733    >>> times = np.arange(365, 1826)
 734    >>> preds = np.asarray([[fn(t) for t in times] for fn in survs])
 735
 736    Compute the integrated Brier score from 1 to 5 years.
 737
 738    >>> score = integrated_brier_score(y, y, preds, times)
 739    >>> print(score)
 740    0.1815853064627424
 741
 742    See also
 743    --------
 744    brier_score
 745        Computes the Brier score at specified time points.
 746
 747    as_integrated_brier_score_scorer
 748        Wrapper class that uses :func:`integrated_brier_score`
 749        in its ``score`` method instead of the default
 750        :func:`concordance_index_censored`.
 751
 752    References
 753    ----------
 754    .. [1] E. Graf, C. Schmoor, W. Sauerbrei, and M. Schumacher,
 755           "Assessment and comparison of prognostic classification schemes for survival data,"
 756           Statistics in Medicine, vol. 18, no. 17-18, pp. 2529–2545, 1999.
 757    """
 758    # Computing the brier scores
 759    times, brier_scores = brier_score(
 760        survival_train, survival_test, estimate, times)
 761
 762    if times.shape[0] < 2:
 763        raise ValueError("At least two time points must be given")
 764
 765    # Computing the IBS
 766    ibs_value = np.trapezoid(brier_scores, times) / (times[-1] - times[0])
 767
 768    return ibs_value
 769
 770
 771def _estimator_has(attr):
 772    """Check that meta_estimator has `attr`.
 773
 774    Used together with `available_if`."""
 775
 776    def check(self):
 777        # raise original `AttributeError` if `attr` does not exist
 778        getattr(self.estimator_, attr)
 779        return True
 780
 781    return check
 782
 783
 784class _ScoreOverrideMixin:
 785    def __init__(
 786        self,
 787        estimator,
 788        predict_func,
 789        score_func,
 790        score_index,
 791        greater_is_better,
 792    ):
 793        if not hasattr(estimator, predict_func):
 794            raise AttributeError(
 795                f"{estimator!r} object has no attribute {predict_func!r}")
 796
 797        self.estimator = estimator
 798        self._predict_func = predict_func
 799        self._score_func = score_func
 800        self._score_index = score_index
 801        self._sign = 1 if greater_is_better else -1
 802
 803    def _get_score_params(self):
 804        """Return dict of parameters passed to ``score_func``."""
 805        params = self.get_params(deep=False)
 806        del params["estimator"]
 807        return params
 808
 809    def fit(self, X, y, **fit_params):
 810        self._train_y = np.array(y, copy=True)
 811        self.estimator_ = self.estimator.fit(X, y, **fit_params)
 812        return self
 813
 814    def _do_predict(self, X):
 815        predict_func = getattr(self.estimator_, self._predict_func)
 816        return predict_func(X)
 817
 818    def score(self, X, y):
 819        """Returns the score on the given data.
 820
 821        Parameters
 822        ----------
 823        X : array-like of shape (n_samples, n_features)
 824            Input data, where n_samples is the number of samples and
 825            n_features is the number of features.
 826
 827        y : array-like of shape (n_samples,)
 828            Target relative to X for classification or regression;
 829            None for unsupervised learning.
 830
 831        Returns
 832        -------
 833        score : float
 834        """
 835        estimate = self._do_predict(X)
 836        score = self._score_func(
 837            survival_train=self._train_y,
 838            survival_test=y,
 839            estimate=estimate,
 840            **self._get_score_params(),
 841        )
 842        if self._score_index is not None:
 843            score = score[self._score_index]
 844        return self._sign * score
 845
 846    @available_if(_estimator_has("predict"))
 847    def predict(self, X):
 848        """Call predict on the estimator.
 849
 850        Only available if estimator supports ``predict``.
 851
 852        Parameters
 853        ----------
 854        X : indexable, length n_samples
 855            Must fulfill the input assumptions of the
 856            underlying estimator.
 857        """
 858        check_is_fitted(self, "estimator_")
 859        return self.estimator_.predict(X)
 860
 861    @available_if(_estimator_has("predict_cumulative_hazard_function"))
 862    def predict_cumulative_hazard_function(self, X):
 863        """Call predict_cumulative_hazard_function on the estimator.
 864
 865        Only available if estimator supports ``predict_cumulative_hazard_function``.
 866
 867        Parameters
 868        ----------
 869        X : indexable, length n_samples
 870            Must fulfill the input assumptions of the
 871            underlying estimator.
 872        """
 873        check_is_fitted(self, "estimator_")
 874        return self.estimator_.predict_cumulative_hazard_function(X)
 875
 876    @available_if(_estimator_has("predict_survival_function"))
 877    def predict_survival_function(self, X):
 878        """Call predict_survival_function on the estimator.
 879
 880        Only available if estimator supports ``predict_survival_function``.
 881
 882        Parameters
 883        ----------
 884        X : indexable, length n_samples
 885            Must fulfill the input assumptions of the
 886            underlying estimator.
 887        """
 888        check_is_fitted(self, "estimator_")
 889        return self.estimator_.predict_survival_function(X)
 890
 891
 892class as_cumulative_dynamic_auc_scorer(_ScoreOverrideMixin, BaseEstimator):
 893    """Wraps an estimator to use :func:`cumulative_dynamic_auc` as ``score`` function.
 894
 895    See the :ref:`User Guide </user_guide/evaluating-survival-models.ipynb#Using-Metrics-in-Hyper-parameter-Search>`
 896    for using it for hyper-parameter optimization.
 897
 898    Parameters
 899    ----------
 900    estimator : object
 901        Instance of an estimator.
 902
 903    times : array-like, shape = (n_times,)
 904        The time points for which the area under the
 905        time-dependent ROC curve is computed. Values must be
 906        within the range of follow-up times of the test data
 907        `survival_test`.
 908
 909    tied_tol : float, optional, default: 1e-8
 910        The tolerance value for considering ties.
 911        If the absolute difference between risk scores is smaller
 912        or equal than `tied_tol`, risk scores are considered tied.
 913
 914    Attributes
 915    ----------
 916    estimator_ : estimator
 917        Estimator that was fit.
 918
 919    See also
 920    --------
 921    cumulative_dynamic_auc
 922    """
 923
 924    def __init__(self, estimator, times, tied_tol=1e-8):
 925        super().__init__(
 926            estimator=estimator,
 927            predict_func="predict",
 928            score_func=cumulative_dynamic_auc,
 929            score_index=1,
 930            greater_is_better=True,
 931        )
 932        self.times = times
 933        self.tied_tol = tied_tol
 934
 935
 936class as_concordance_index_ipcw_scorer(_ScoreOverrideMixin, BaseEstimator):
 937    """Wraps an estimator to use :func:`concordance_index_ipcw` as ``score`` function.
 938
 939    See the :ref:`User Guide </user_guide/evaluating-survival-models.ipynb#Using-Metrics-in-Hyper-parameter-Search>`
 940    for using it for hyper-parameter optimization.
 941
 942    Parameters
 943    ----------
 944    estimator : object
 945        Instance of an estimator.
 946
 947    tau : float, optional
 948        Truncation time. The survival function for the underlying
 949        censoring time distribution :math:`D` needs to be positive
 950        at `tau`, i.e., `tau` should be chosen such that the
 951        probability of being censored after time `tau` is non-zero:
 952        :math:`P(D > \\tau) > 0`. If `None`, no truncation is performed.
 953
 954    tied_tol : float, optional, default: 1e-8
 955        The tolerance value for considering ties.
 956        If the absolute difference between risk scores is smaller
 957        or equal than `tied_tol`, risk scores are considered tied.
 958
 959    Attributes
 960    ----------
 961    estimator_ : estimator
 962        Estimator that was fit.
 963
 964    See also
 965    --------
 966    concordance_index_ipcw
 967    """
 968
 969    def __init__(self, estimator, tau=None, tied_tol=1e-8):
 970        super().__init__(
 971            estimator=estimator,
 972            predict_func="predict",
 973            score_func=concordance_index_ipcw,
 974            score_index=0,
 975            greater_is_better=True,
 976        )
 977        self.tau = tau
 978        self.tied_tol = tied_tol
 979
 980
 981class as_integrated_brier_score_scorer(_ScoreOverrideMixin, BaseEstimator):
 982    """Wraps an estimator to use the negative of :func:`integrated_brier_score` as ``score`` function.
 983
 984    The estimator needs to be able to estimate survival functions via
 985    a ``predict_survival_function`` method.
 986
 987    See the :ref:`User Guide </user_guide/evaluating-survival-models.ipynb#Using-Metrics-in-Hyper-parameter-Search>`
 988    for using it for hyper-parameter optimization.
 989
 990    Parameters
 991    ----------
 992    estimator : object
 993        Instance of an estimator that provides ``predict_survival_function``.
 994
 995    times : array-like, shape = (n_times,)
 996        The time points for which to estimate the Brier score.
 997        Values must be within the range of follow-up times of
 998        the test data `survival_test`.
 999
1000    Attributes
1001    ----------
1002    estimator_ : estimator
1003        Estimator that was fit.
1004
1005    See also
1006    --------
1007    integrated_brier_score
1008    """
1009
1010    def __init__(self, estimator, times):
1011        super().__init__(
1012            estimator=estimator,
1013            predict_func="predict_survival_function",
1014            score_func=integrated_brier_score,
1015            score_index=None,
1016            greater_is_better=False,
1017        )
1018        self.times = times
1019
1020    def _do_predict(self, X):
1021        predict_func = getattr(self.estimator_, self._predict_func)
1022        surv_fns = predict_func(X)
1023        times = self.times
1024        estimates = np.empty((len(surv_fns), len(times)))
1025        for i, fn in enumerate(surv_fns):
1026            estimates[i, :] = fn(times)
1027        return estimates
class as_concordance_index_ipcw_scorer(_ScoreOverrideMixin, sklearn.base.BaseEstimator):
937class as_concordance_index_ipcw_scorer(_ScoreOverrideMixin, BaseEstimator):
938    """Wraps an estimator to use :func:`concordance_index_ipcw` as ``score`` function.
939
940    See the :ref:`User Guide </user_guide/evaluating-survival-models.ipynb#Using-Metrics-in-Hyper-parameter-Search>`
941    for using it for hyper-parameter optimization.
942
943    Parameters
944    ----------
945    estimator : object
946        Instance of an estimator.
947
948    tau : float, optional
949        Truncation time. The survival function for the underlying
950        censoring time distribution :math:`D` needs to be positive
951        at `tau`, i.e., `tau` should be chosen such that the
952        probability of being censored after time `tau` is non-zero:
953        :math:`P(D > \\tau) > 0`. If `None`, no truncation is performed.
954
955    tied_tol : float, optional, default: 1e-8
956        The tolerance value for considering ties.
957        If the absolute difference between risk scores is smaller
958        or equal than `tied_tol`, risk scores are considered tied.
959
960    Attributes
961    ----------
962    estimator_ : estimator
963        Estimator that was fit.
964
965    See also
966    --------
967    concordance_index_ipcw
968    """
969
970    def __init__(self, estimator, tau=None, tied_tol=1e-8):
971        super().__init__(
972            estimator=estimator,
973            predict_func="predict",
974            score_func=concordance_index_ipcw,
975            score_index=0,
976            greater_is_better=True,
977        )
978        self.tau = tau
979        self.tied_tol = tied_tol

Wraps an estimator to use concordance_index_ipcw() as score function.

See the :ref:User Guide </user_guide/evaluating-survival-models.ipynb#Using-Metrics-in-Hyper-parameter-Search> for using it for hyper-parameter optimization.

Parameters

estimator : object Instance of an estimator.

tau : float, optional Truncation time. The survival function for the underlying censoring time distribution \( D \) needs to be positive at tau, i.e., tau should be chosen such that the probability of being censored after time tau is non-zero: \( P(D > \tau) > 0 \). If None, no truncation is performed.

tied_tol : float, optional, default: 1e-8 The tolerance value for considering ties. If the absolute difference between risk scores is smaller or equal than tied_tol, risk scores are considered tied.

Attributes

estimator_ : estimator Estimator that was fit.

See also

concordance_index_ipcw

class as_cumulative_dynamic_auc_scorer(_ScoreOverrideMixin, sklearn.base.BaseEstimator):
893class as_cumulative_dynamic_auc_scorer(_ScoreOverrideMixin, BaseEstimator):
894    """Wraps an estimator to use :func:`cumulative_dynamic_auc` as ``score`` function.
895
896    See the :ref:`User Guide </user_guide/evaluating-survival-models.ipynb#Using-Metrics-in-Hyper-parameter-Search>`
897    for using it for hyper-parameter optimization.
898
899    Parameters
900    ----------
901    estimator : object
902        Instance of an estimator.
903
904    times : array-like, shape = (n_times,)
905        The time points for which the area under the
906        time-dependent ROC curve is computed. Values must be
907        within the range of follow-up times of the test data
908        `survival_test`.
909
910    tied_tol : float, optional, default: 1e-8
911        The tolerance value for considering ties.
912        If the absolute difference between risk scores is smaller
913        or equal than `tied_tol`, risk scores are considered tied.
914
915    Attributes
916    ----------
917    estimator_ : estimator
918        Estimator that was fit.
919
920    See also
921    --------
922    cumulative_dynamic_auc
923    """
924
925    def __init__(self, estimator, times, tied_tol=1e-8):
926        super().__init__(
927            estimator=estimator,
928            predict_func="predict",
929            score_func=cumulative_dynamic_auc,
930            score_index=1,
931            greater_is_better=True,
932        )
933        self.times = times
934        self.tied_tol = tied_tol

Wraps an estimator to use cumulative_dynamic_auc() as score function.

See the :ref:User Guide </user_guide/evaluating-survival-models.ipynb#Using-Metrics-in-Hyper-parameter-Search> for using it for hyper-parameter optimization.

Parameters

estimator : object Instance of an estimator.

times : array-like, shape = (n_times,) The time points for which the area under the time-dependent ROC curve is computed. Values must be within the range of follow-up times of the test data survival_test.

tied_tol : float, optional, default: 1e-8 The tolerance value for considering ties. If the absolute difference between risk scores is smaller or equal than tied_tol, risk scores are considered tied.

Attributes

estimator_ : estimator Estimator that was fit.

See also

cumulative_dynamic_auc

class as_integrated_brier_score_scorer(_ScoreOverrideMixin, sklearn.base.BaseEstimator):
 982class as_integrated_brier_score_scorer(_ScoreOverrideMixin, BaseEstimator):
 983    """Wraps an estimator to use the negative of :func:`integrated_brier_score` as ``score`` function.
 984
 985    The estimator needs to be able to estimate survival functions via
 986    a ``predict_survival_function`` method.
 987
 988    See the :ref:`User Guide </user_guide/evaluating-survival-models.ipynb#Using-Metrics-in-Hyper-parameter-Search>`
 989    for using it for hyper-parameter optimization.
 990
 991    Parameters
 992    ----------
 993    estimator : object
 994        Instance of an estimator that provides ``predict_survival_function``.
 995
 996    times : array-like, shape = (n_times,)
 997        The time points for which to estimate the Brier score.
 998        Values must be within the range of follow-up times of
 999        the test data `survival_test`.
1000
1001    Attributes
1002    ----------
1003    estimator_ : estimator
1004        Estimator that was fit.
1005
1006    See also
1007    --------
1008    integrated_brier_score
1009    """
1010
1011    def __init__(self, estimator, times):
1012        super().__init__(
1013            estimator=estimator,
1014            predict_func="predict_survival_function",
1015            score_func=integrated_brier_score,
1016            score_index=None,
1017            greater_is_better=False,
1018        )
1019        self.times = times
1020
1021    def _do_predict(self, X):
1022        predict_func = getattr(self.estimator_, self._predict_func)
1023        surv_fns = predict_func(X)
1024        times = self.times
1025        estimates = np.empty((len(surv_fns), len(times)))
1026        for i, fn in enumerate(surv_fns):
1027            estimates[i, :] = fn(times)
1028        return estimates

Wraps an estimator to use the negative of integrated_brier_score() as score function.

The estimator needs to be able to estimate survival functions via a predict_survival_function method.

See the :ref:User Guide </user_guide/evaluating-survival-models.ipynb#Using-Metrics-in-Hyper-parameter-Search> for using it for hyper-parameter optimization.

Parameters

estimator : object Instance of an estimator that provides predict_survival_function.

times : array-like, shape = (n_times,) The time points for which to estimate the Brier score. Values must be within the range of follow-up times of the test data survival_test.

Attributes

estimator_ : estimator Estimator that was fit.

See also

integrated_brier_score

def brier_score(survival_train, survival_test, estimate, times):
537def brier_score(survival_train, survival_test, estimate, times):
538    """Estimate the time-dependent Brier score for right censored data.
539
540    The time-dependent Brier score is the mean squared error at time point :math:`t`:
541
542    .. math::
543
544        \\mathrm{BS}^c(t) = \\frac{1}{n} \\sum_{i=1}^n I(y_i \\leq t \\land \\delta_i = 1)
545        \\frac{(0 - \\hat{\\pi}(t | \\mathbf{x}_i))^2}{\\hat{G}(y_i)} + I(y_i > t)
546        \\frac{(1 - \\hat{\\pi}(t | \\mathbf{x}_i))^2}{\\hat{G}(t)} ,
547
548    where :math:`\\hat{\\pi}(t | \\mathbf{x})` is the predicted probability of
549    remaining event-free up to time point :math:`t` for a feature vector :math:`\\mathbf{x}`,
550    and :math:`1/\\hat{G}(t)` is a inverse probability of censoring weight, estimated by
551    the Kaplan-Meier estimator.
552
553    See the :ref:`User Guide </user_guide/evaluating-survival-models.ipynb#Time-dependent-Brier-Score>`
554    and [1]_ for details.
555
556    Parameters
557    ----------
558    survival_train : structured array, shape = (n_train_samples,)
559        Survival times for training data to estimate the censoring
560        distribution from.
561        A structured array containing the binary event indicator
562        as first field, and time of event or time of censoring as
563        second field.
564
565    survival_test : structured array, shape = (n_samples,)
566        Survival times of test data.
567        A structured array containing the binary event indicator
568        as first field, and time of event or time of censoring as
569        second field.
570
571    estimate : array-like, shape = (n_samples, n_times)
572        Estimated probability of remaining event-free at time points
573        specified by `times`. The value of ``estimate[i]`` must correspond to
574        the estimated probability of remaining event-free up to the time point
575        ``times[i]``. Typically, estimated probabilities are obtained via the
576        survival function returned by an estimator's
577        ``predict_survival_function`` method.
578
579    times : array-like, shape = (n_times,)
580        The time points for which to estimate the Brier score.
581        Values must be within the range of follow-up times of
582        the test data `survival_test`.
583
584    Returns
585    -------
586    times : array, shape = (n_times,)
587        Unique time points at which the brier scores was estimated.
588
589    brier_scores : array , shape = (n_times,)
590        Values of the brier score.
591
592    Examples
593    --------
594    >>> from survivalist.datasets import load_gbsg2
595    >>> from survivalist.linear_model import CoxPHSurvivalAnalysis
596    >>> from survivalist.metrics import brier_score
597    >>> from survivalist.preprocessing import OneHotEncoder
598
599    Load and prepare data.
600
601    >>> X, y = load_gbsg2()
602    >>> X.loc[:, "tgrade"] = X.loc[:, "tgrade"].map(len).astype(int)
603    >>> Xt = OneHotEncoder().fit_transform(X)
604
605    Fit a Cox model.
606
607    >>> est = CoxPHSurvivalAnalysis(ties="efron").fit(Xt, y)
608
609    Retrieve individual survival functions and get probability
610    of remaining event free up to 5 years (=1825 days).
611
612    >>> survs = est.predict_survival_function(Xt)
613    >>> preds = [fn(1825) for fn in survs]
614
615    Compute the Brier score at 5 years.
616
617    >>> times, score = brier_score(y, y, preds, 1825)
618    >>> print(score)
619    [0.20881843]
620
621    See also
622    --------
623    integrated_brier_score
624        Computes the average Brier score over all time points.
625
626    References
627    ----------
628    .. [1] E. Graf, C. Schmoor, W. Sauerbrei, and M. Schumacher,
629           "Assessment and comparison of prognostic classification schemes for survival data,"
630           Statistics in Medicine, vol. 18, no. 17-18, pp. 2529–2545, 1999.
631    """
632    test_event, test_time = check_y_survival(survival_test)
633    estimate, times = _check_estimate_2d(
634        estimate, test_time, times, estimator="brier_score")
635    if estimate.ndim == 1 and times.shape[0] == 1:
636        estimate = estimate.reshape(-1, 1)
637
638    # fit IPCW estimator
639    cens = CensoringDistributionEstimator().fit(survival_train)
640    # calculate inverse probability of censoring weight at current time point t.
641    prob_cens_t = cens.predict_proba(times)
642    prob_cens_t[prob_cens_t == 0] = np.inf
643    # calculate inverse probability of censoring weights at observed time point
644    prob_cens_y = cens.predict_proba(test_time)
645    prob_cens_y[prob_cens_y == 0] = np.inf
646
647    # Calculating the brier scores at each time point
648    brier_scores = np.empty(times.shape[0], dtype=float)
649    for i, t in enumerate(times):
650        est = estimate[:, i]
651        is_case = (test_time <= t) & test_event
652        is_control = test_time > t
653
654        brier_scores[i] = np.mean(
655            np.square(est) * is_case.astype(int) / prob_cens_y
656            + np.square(1.0 - est) * is_control.astype(int) / prob_cens_t[i]
657        )
658
659    return times, brier_scores

Estimate the time-dependent Brier score for right censored data.

The time-dependent Brier score is the mean squared error at time point \( t \):

$$\mathrm{BS}^c(t) = \frac{1}{n} \sum_{i=1}^n I(y_i \leq t \land \delta_i = 1) \frac{(0 - \hat{\pi}(t | \mathbf{x}_i))^2}{\hat{G}(y_i)} + I(y_i > t) \frac{(1 - \hat{\pi}(t | \mathbf{x}_i))^2}{\hat{G}(t)} ,$$

where \( \hat{\pi}(t | \mathbf{x}) \) is the predicted probability of remaining event-free up to time point \( t \) for a feature vector \( \mathbf{x} \), and \( 1/\hat{G}(t) \) is a inverse probability of censoring weight, estimated by the Kaplan-Meier estimator.

See the :ref:User Guide </user_guide/evaluating-survival-models.ipynb#Time-dependent-Brier-Score> and 1 for details.

Parameters

survival_train : structured array, shape = (n_train_samples,) Survival times for training data to estimate the censoring distribution from. A structured array containing the binary event indicator as first field, and time of event or time of censoring as second field.

survival_test : structured array, shape = (n_samples,) Survival times of test data. A structured array containing the binary event indicator as first field, and time of event or time of censoring as second field.

estimate : array-like, shape = (n_samples, n_times) Estimated probability of remaining event-free at time points specified by times. The value of estimate[i] must correspond to the estimated probability of remaining event-free up to the time point times[i]. Typically, estimated probabilities are obtained via the survival function returned by an estimator's predict_survival_function method.

times : array-like, shape = (n_times,) The time points for which to estimate the Brier score. Values must be within the range of follow-up times of the test data survival_test.

Returns

times : array, shape = (n_times,) Unique time points at which the brier scores was estimated.

brier_scores : array , shape = (n_times,) Values of the brier score.

Examples

>>> from survivalist.datasets import load_gbsg2
>>> from survivalist.linear_model import CoxPHSurvivalAnalysis
>>> from survivalist.metrics import brier_score
>>> from survivalist.preprocessing import OneHotEncoder

Load and prepare data.

>>> X, y = load_gbsg2()
>>> X.loc[:, "tgrade"] = X.loc[:, "tgrade"].map(len).astype(int)
>>> Xt = OneHotEncoder().fit_transform(X)

Fit a Cox model.

>>> est = CoxPHSurvivalAnalysis(ties="efron").fit(Xt, y)

Retrieve individual survival functions and get probability of remaining event free up to 5 years (=1825 days).

>>> survs = est.predict_survival_function(Xt)
>>> preds = [fn(1825) for fn in survs]

Compute the Brier score at 5 years.

>>> times, score = brier_score(y, y, preds, 1825)
>>> print(score)
[0.20881843]

See also

integrated_brier_score Computes the average Brier score over all time points.

References


  1. E. Graf, C. Schmoor, W. Sauerbrei, and M. Schumacher, "Assessment and comparison of prognostic classification schemes for survival data," Statistics in Medicine, vol. 18, no. 17-18, pp. 2529–2545, 1999. 

def concordance_index_censored(event_indicator, event_time, estimate, tied_tol=1e-08):
167def concordance_index_censored(event_indicator, event_time, estimate, tied_tol=1e-8):
168    """Concordance index for right-censored data
169
170    The concordance index is defined as the proportion of all comparable pairs
171    in which the predictions and outcomes are concordant.
172
173    Two samples are comparable if (i) both of them experienced an event (at different times),
174    or (ii) the one with a shorter observed survival time experienced an event, in which case
175    the event-free subject "outlived" the other. A pair is not comparable if they experienced
176    events at the same time.
177
178    Concordance intuitively means that two samples were ordered correctly by the model.
179    More specifically, two samples are concordant, if the one with a higher estimated
180    risk score has a shorter actual survival time.
181    When predicted risks are identical for a pair, 0.5 rather than 1 is added to the count
182    of concordant pairs.
183
184    See the :ref:`User Guide </user_guide/evaluating-survival-models.ipynb>`
185    and [1]_ for further description.
186
187    Parameters
188    ----------
189    event_indicator : array-like, shape = (n_samples,)
190        Boolean array denotes whether an event occurred
191
192    event_time : array-like, shape = (n_samples,)
193        Array containing the time of an event or time of censoring
194
195    estimate : array-like, shape = (n_samples,)
196        Estimated risk of experiencing an event
197
198    tied_tol : float, optional, default: 1e-8
199        The tolerance value for considering ties.
200        If the absolute difference between risk scores is smaller
201        or equal than `tied_tol`, risk scores are considered tied.
202
203    Returns
204    -------
205    cindex : float
206        Concordance index
207
208    concordant : int
209        Number of concordant pairs
210
211    discordant : int
212        Number of discordant pairs
213
214    tied_risk : int
215        Number of pairs having tied estimated risks
216
217    tied_time : int
218        Number of comparable pairs sharing the same time
219
220    See also
221    --------
222    concordance_index_ipcw
223        Alternative estimator of the concordance index with less bias.
224
225    References
226    ----------
227    .. [1] Harrell, F.E., Califf, R.M., Pryor, D.B., Lee, K.L., Rosati, R.A,
228           "Multivariable prognostic models: issues in developing models,
229           evaluating assumptions and adequacy, and measuring and reducing errors",
230           Statistics in Medicine, 15(4), 361-87, 1996.
231    """
232    event_indicator, event_time, estimate = _check_inputs(
233        event_indicator, event_time, estimate)
234
235    w = np.ones_like(estimate)
236
237    return _estimate_concordance_index(event_indicator, event_time, estimate, w, tied_tol)

Concordance index for right-censored data

The concordance index is defined as the proportion of all comparable pairs in which the predictions and outcomes are concordant.

Two samples are comparable if (i) both of them experienced an event (at different times), or (ii) the one with a shorter observed survival time experienced an event, in which case the event-free subject "outlived" the other. A pair is not comparable if they experienced events at the same time.

Concordance intuitively means that two samples were ordered correctly by the model. More specifically, two samples are concordant, if the one with a higher estimated risk score has a shorter actual survival time. When predicted risks are identical for a pair, 0.5 rather than 1 is added to the count of concordant pairs.

See the :ref:User Guide </user_guide/evaluating-survival-models.ipynb> and 1 for further description.

Parameters

event_indicator : array-like, shape = (n_samples,) Boolean array denotes whether an event occurred

event_time : array-like, shape = (n_samples,) Array containing the time of an event or time of censoring

estimate : array-like, shape = (n_samples,) Estimated risk of experiencing an event

tied_tol : float, optional, default: 1e-8 The tolerance value for considering ties. If the absolute difference between risk scores is smaller or equal than tied_tol, risk scores are considered tied.

Returns

cindex : float Concordance index

concordant : int Number of concordant pairs

discordant : int Number of discordant pairs

tied_risk : int Number of pairs having tied estimated risks

tied_time : int Number of comparable pairs sharing the same time

See also

concordance_index_ipcw Alternative estimator of the concordance index with less bias.

References


  1. Harrell, F.E., Califf, R.M., Pryor, D.B., Lee, K.L., Rosati, R.A, "Multivariable prognostic models: issues in developing models, evaluating assumptions and adequacy, and measuring and reducing errors", Statistics in Medicine, 15(4), 361-87, 1996. 

def concordance_index_ipcw(survival_train, survival_test, estimate, tau=None, tied_tol=1e-08):
240def concordance_index_ipcw(survival_train, survival_test, estimate, tau=None, tied_tol=1e-8):
241    """Concordance index for right-censored data based on inverse probability of censoring weights.
242
243    This is an alternative to the estimator in :func:`concordance_index_censored`
244    that does not depend on the distribution of censoring times in the test data.
245    Therefore, the estimate is unbiased and consistent for a population concordance
246    measure that is free of censoring.
247
248    It is based on inverse probability of censoring weights, thus requires
249    access to survival times from the training data to estimate the censoring
250    distribution. Note that this requires that survival times `survival_test`
251    lie within the range of survival times `survival_train`. This can be
252    achieved by specifying the truncation time `tau`.
253    The resulting `cindex` tells how well the given prediction model works in
254    predicting events that occur in the time range from 0 to `tau`.
255
256    The estimator uses the Kaplan-Meier estimator to estimate the
257    censoring survivor function. Therefore, it is restricted to
258    situations where the random censoring assumption holds and
259    censoring is independent of the features.
260
261    See the :ref:`User Guide </user_guide/evaluating-survival-models.ipynb>`
262    and [1]_ for further description.
263
264    Parameters
265    ----------
266    survival_train : structured array, shape = (n_train_samples,)
267        Survival times for training data to estimate the censoring
268        distribution from.
269        A structured array containing the binary event indicator
270        as first field, and time of event or time of censoring as
271        second field.
272
273    survival_test : structured array, shape = (n_samples,)
274        Survival times of test data.
275        A structured array containing the binary event indicator
276        as first field, and time of event or time of censoring as
277        second field.
278
279    estimate : array-like, shape = (n_samples,)
280        Estimated risk of experiencing an event of test data.
281
282    tau : float, optional
283        Truncation time. The survival function for the underlying
284        censoring time distribution :math:`D` needs to be positive
285        at `tau`, i.e., `tau` should be chosen such that the
286        probability of being censored after time `tau` is non-zero:
287        :math:`P(D > \\tau) > 0`. If `None`, no truncation is performed.
288
289    tied_tol : float, optional, default: 1e-8
290        The tolerance value for considering ties.
291        If the absolute difference between risk scores is smaller
292        or equal than `tied_tol`, risk scores are considered tied.
293
294    Returns
295    -------
296    cindex : float
297        Concordance index
298
299    concordant : int
300        Number of concordant pairs
301
302    discordant : int
303        Number of discordant pairs
304
305    tied_risk : int
306        Number of pairs having tied estimated risks
307
308    tied_time : int
309        Number of comparable pairs sharing the same time
310
311    See also
312    --------
313    concordance_index_censored
314        Simpler estimator of the concordance index.
315
316    as_concordance_index_ipcw_scorer
317        Wrapper class that uses :func:`concordance_index_ipcw`
318        in its ``score`` method instead of the default
319        :func:`concordance_index_censored`.
320
321    References
322    ----------
323    .. [1] Uno, H., Cai, T., Pencina, M. J., D’Agostino, R. B., & Wei, L. J. (2011).
324           "On the C-statistics for evaluating overall adequacy of risk prediction
325           procedures with censored survival data".
326           Statistics in Medicine, 30(10), 1105–1117.
327    """
328    test_event, test_time = check_y_survival(survival_test)
329
330    if tau is not None:
331        mask = test_time < tau
332        survival_test = survival_test[mask]
333
334    estimate = _check_estimate_1d(estimate, test_time)
335
336    cens = CensoringDistributionEstimator()
337    cens.fit(survival_train)
338    ipcw_test = cens.predict_ipcw(survival_test)
339    if tau is None:
340        ipcw = ipcw_test
341    else:
342        ipcw = np.empty(estimate.shape[0], dtype=ipcw_test.dtype)
343        ipcw[mask] = ipcw_test
344        ipcw[~mask] = 0
345
346    w = np.square(ipcw)
347
348    return _estimate_concordance_index(test_event, test_time, estimate, w, tied_tol)

Concordance index for right-censored data based on inverse probability of censoring weights.

This is an alternative to the estimator in concordance_index_censored() that does not depend on the distribution of censoring times in the test data. Therefore, the estimate is unbiased and consistent for a population concordance measure that is free of censoring.

It is based on inverse probability of censoring weights, thus requires access to survival times from the training data to estimate the censoring distribution. Note that this requires that survival times survival_test lie within the range of survival times survival_train. This can be achieved by specifying the truncation time tau. The resulting cindex tells how well the given prediction model works in predicting events that occur in the time range from 0 to tau.

The estimator uses the Kaplan-Meier estimator to estimate the censoring survivor function. Therefore, it is restricted to situations where the random censoring assumption holds and censoring is independent of the features.

See the :ref:User Guide </user_guide/evaluating-survival-models.ipynb> and 1 for further description.

Parameters

survival_train : structured array, shape = (n_train_samples,) Survival times for training data to estimate the censoring distribution from. A structured array containing the binary event indicator as first field, and time of event or time of censoring as second field.

survival_test : structured array, shape = (n_samples,) Survival times of test data. A structured array containing the binary event indicator as first field, and time of event or time of censoring as second field.

estimate : array-like, shape = (n_samples,) Estimated risk of experiencing an event of test data.

tau : float, optional Truncation time. The survival function for the underlying censoring time distribution \( D \) needs to be positive at tau, i.e., tau should be chosen such that the probability of being censored after time tau is non-zero: \( P(D > \tau) > 0 \). If None, no truncation is performed.

tied_tol : float, optional, default: 1e-8 The tolerance value for considering ties. If the absolute difference between risk scores is smaller or equal than tied_tol, risk scores are considered tied.

Returns

cindex : float Concordance index

concordant : int Number of concordant pairs

discordant : int Number of discordant pairs

tied_risk : int Number of pairs having tied estimated risks

tied_time : int Number of comparable pairs sharing the same time

See also

concordance_index_censored Simpler estimator of the concordance index.

as_concordance_index_ipcw_scorer Wrapper class that uses concordance_index_ipcw() in its score method instead of the default concordance_index_censored().

References


  1. Uno, H., Cai, T., Pencina, M. J., D’Agostino, R. B., & Wei, L. J. (2011). "On the C-statistics for evaluating overall adequacy of risk prediction procedures with censored survival data". Statistics in Medicine, 30(10), 1105–1117. 

def cumulative_dynamic_auc(survival_train, survival_test, estimate, times, tied_tol=1e-08):
351def cumulative_dynamic_auc(survival_train, survival_test, estimate, times, tied_tol=1e-8):
352    """Estimator of cumulative/dynamic AUC for right-censored time-to-event data.
353
354    The receiver operating characteristic (ROC) curve and the area under the
355    ROC curve (AUC) can be extended to survival data by defining
356    sensitivity (true positive rate) and specificity (true negative rate)
357    as time-dependent measures. *Cumulative cases* are all individuals that
358    experienced an event prior to or at time :math:`t` (:math:`t_i \\leq t`),
359    whereas *dynamic controls* are those with :math:`t_i > t`.
360    The associated cumulative/dynamic AUC quantifies how well a model can
361    distinguish subjects who fail by a given time (:math:`t_i \\leq t`) from
362    subjects who fail after this time (:math:`t_i > t`).
363
364    Given an estimator of the :math:`i`-th individual's risk score
365    :math:`\\hat{f}(\\mathbf{x}_i)`, the cumulative/dynamic AUC at time
366    :math:`t` is defined as
367
368    .. math::
369
370        \\widehat{\\mathrm{AUC}}(t) =
371        \\frac{\\sum_{i=1}^n \\sum_{j=1}^n I(y_j > t) I(y_i \\leq t) \\omega_i
372        I(\\hat{f}(\\mathbf{x}_j) \\leq \\hat{f}(\\mathbf{x}_i))}
373        {(\\sum_{i=1}^n I(y_i > t)) (\\sum_{i=1}^n I(y_i \\leq t) \\omega_i)}
374
375    where :math:`\\omega_i` are inverse probability of censoring weights (IPCW).
376
377    To estimate IPCW, access to survival times from the training data is required
378    to estimate the censoring distribution. Note that this requires that survival
379    times `survival_test` lie within the range of survival times `survival_train`.
380    This can be achieved by specifying `times` accordingly, e.g. by setting
381    `times[-1]` slightly below the maximum expected follow-up time.
382    IPCW are computed using the Kaplan-Meier estimator, which is
383    restricted to situations where the random censoring assumption holds and
384    censoring is independent of the features.
385
386    This function can also be used to evaluate models with time-dependent predictions
387    :math:`\\hat{f}(\\mathbf{x}_i, t)`, such as :class:`survivalist.ensemble.RandomSurvivalForest`
388    (see :ref:`User Guide </user_guide/evaluating-survival-models.ipynb#Using-Time-dependent-Risk-Scores>`).
389    In this case, `estimate` must be a 2-d array where ``estimate[i, j]`` is the
390    predicted risk score for the i-th instance at time point ``times[j]``.
391
392    Finally, the function also provides a single summary measure that refers to the mean
393    of the :math:`\\mathrm{AUC}(t)` over the time range :math:`(\\tau_1, \\tau_2)`.
394
395    .. math::
396
397        \\overline{\\mathrm{AUC}}(\\tau_1, \\tau_2) =
398        \\frac{1}{\\hat{S}(\\tau_1) - \\hat{S}(\\tau_2)}
399        \\int_{\\tau_1}^{\\tau_2} \\widehat{\\mathrm{AUC}}(t)\\,d \\hat{S}(t)
400
401    where :math:`\\hat{S}(t)` is the Kaplan–Meier estimator of the survival function.
402
403    See the :ref:`User Guide </user_guide/evaluating-survival-models.ipynb#Time-dependent-Area-under-the-ROC>`,
404    [1]_, [2]_, [3]_ for further description.
405
406    Parameters
407    ----------
408    survival_train : structured array, shape = (n_train_samples,)
409        Survival times for training data to estimate the censoring
410        distribution from.
411        A structured array containing the binary event indicator
412        as first field, and time of event or time of censoring as
413        second field.
414
415    survival_test : structured array, shape = (n_samples,)
416        Survival times of test data.
417        A structured array containing the binary event indicator
418        as first field, and time of event or time of censoring as
419        second field.
420
421    estimate : array-like, shape = (n_samples,) or (n_samples, n_times)
422        Estimated risk of experiencing an event of test data.
423        If `estimate` is a 1-d array, the same risk score across all time
424        points is used. If `estimate` is a 2-d array, the risk scores in the
425        j-th column are used to evaluate the j-th time point.
426
427    times : array-like, shape = (n_times,)
428        The time points for which the area under the
429        time-dependent ROC curve is computed. Values must be
430        within the range of follow-up times of the test data
431        `survival_test`.
432
433    tied_tol : float, optional, default: 1e-8
434        The tolerance value for considering ties.
435        If the absolute difference between risk scores is smaller
436        or equal than `tied_tol`, risk scores are considered tied.
437
438    Returns
439    -------
440    auc : array, shape = (n_times,)
441        The cumulative/dynamic AUC estimates (evaluated at `times`).
442    mean_auc : float
443        Summary measure referring to the mean cumulative/dynamic AUC
444        over the specified time range `(times[0], times[-1])`.
445
446    See also
447    --------
448    as_cumulative_dynamic_auc_scorer
449        Wrapper class that uses :func:`cumulative_dynamic_auc`
450        in its ``score`` method instead of the default
451        :func:`concordance_index_censored`.
452
453    References
454    ----------
455    .. [1] H. Uno, T. Cai, L. Tian, and L. J. Wei,
456           "Evaluating prediction rules for t-year survivors with censored regression models,"
457           Journal of the American Statistical Association, vol. 102, pp. 527–537, 2007.
458    .. [2] H. Hung and C. T. Chiang,
459           "Estimation methods for time-dependent AUC models with survival data,"
460           Canadian Journal of Statistics, vol. 38, no. 1, pp. 8–26, 2010.
461    .. [3] J. Lambert and S. Chevret,
462           "Summary measure of discrimination in survival models based on cumulative/dynamic time-dependent ROC curves,"
463           Statistical Methods in Medical Research, 2014.
464    """
465    test_event, test_time = check_y_survival(survival_test)
466    estimate, times = _check_estimate_2d(
467        estimate, test_time, times, estimator="cumulative_dynamic_auc")
468
469    n_samples = estimate.shape[0]
470    n_times = times.shape[0]
471    if estimate.ndim == 1:
472        estimate = np.broadcast_to(
473            estimate[:, np.newaxis], (n_samples, n_times))
474
475    # fit and transform IPCW
476    cens = CensoringDistributionEstimator()
477    cens.fit(survival_train)
478    ipcw = cens.predict_ipcw(survival_test)
479
480    # expand arrays to (n_samples, n_times) shape
481    test_time = np.broadcast_to(test_time[:, np.newaxis], (n_samples, n_times))
482    test_event = np.broadcast_to(
483        test_event[:, np.newaxis], (n_samples, n_times))
484    times_2d = np.broadcast_to(times, (n_samples, n_times))
485    ipcw = np.broadcast_to(ipcw[:, np.newaxis], (n_samples, n_times))
486
487    # sort each time point (columns) by risk score (descending)
488    o = np.argsort(-estimate, axis=0)
489    test_time = np.take_along_axis(test_time, o, axis=0)
490    test_event = np.take_along_axis(test_event, o, axis=0)
491    estimate = np.take_along_axis(estimate, o, axis=0)
492    ipcw = np.take_along_axis(ipcw, o, axis=0)
493
494    is_case = (test_time <= times_2d) & test_event
495    is_control = test_time > times_2d
496    n_controls = is_control.sum(axis=0)
497
498    # prepend row of infinity values
499    estimate_diff = np.concatenate(
500        (np.broadcast_to(np.inf, (1, n_times)), estimate))
501    is_tied = np.absolute(np.diff(estimate_diff, axis=0)) <= tied_tol
502
503    cumsum_tp = np.cumsum(is_case * ipcw, axis=0)
504    cumsum_fp = np.cumsum(is_control, axis=0)
505    true_pos = cumsum_tp / cumsum_tp[-1]
506    false_pos = cumsum_fp / n_controls
507
508    scores = np.empty(n_times, dtype=float)
509    it = np.nditer((true_pos, false_pos, is_tied),
510                   order="F", flags=["external_loop"])
511    with it:
512        for i, (tp, fp, mask) in enumerate(it):
513            idx = np.flatnonzero(mask) - 1
514            # only keep the last estimate for tied risk scores
515            tp_no_ties = np.delete(tp, idx)
516            fp_no_ties = np.delete(fp, idx)
517            # Add an extra threshold position
518            # to make sure that the curve starts at (0, 0)
519            tp_no_ties = np.r_[0, tp_no_ties]
520            fp_no_ties = np.r_[0, fp_no_ties]
521            scores[i] = np.trapezoid(tp_no_ties, fp_no_ties)
522
523    if n_times == 1:
524        mean_auc = scores[0]
525    else:
526        surv = SurvivalFunctionEstimator()
527        surv.fit(survival_test)
528        s_times = surv.predict_proba(times)
529        # compute integral of AUC over survival function
530        d = -np.diff(np.r_[1.0, s_times])
531        integral = (scores * d).sum()
532        mean_auc = integral / (1.0 - s_times[-1])
533
534    return scores, mean_auc

Estimator of cumulative/dynamic AUC for right-censored time-to-event data.

The receiver operating characteristic (ROC) curve and the area under the ROC curve (AUC) can be extended to survival data by defining sensitivity (true positive rate) and specificity (true negative rate) as time-dependent measures. Cumulative cases are all individuals that experienced an event prior to or at time \( t \) (\( t_i \leq t \)), whereas dynamic controls are those with \( t_i > t \). The associated cumulative/dynamic AUC quantifies how well a model can distinguish subjects who fail by a given time (\( t_i \leq t \)) from subjects who fail after this time (\( t_i > t \)).

Given an estimator of the \( i \)-th individual's risk score \( \hat{f}(\mathbf{x}_i) \), the cumulative/dynamic AUC at time \( t \) is defined as

$$\widehat{\mathrm{AUC}}(t) = \frac{\sum_{i=1}^n \sum_{j=1}^n I(y_j > t) I(y_i \leq t) \omega_i I(\hat{f}(\mathbf{x}_j) \leq \hat{f}(\mathbf{x}_i))} {(\sum_{i=1}^n I(y_i > t)) (\sum_{i=1}^n I(y_i \leq t) \omega_i)}$$

where \( \omega_i \) are inverse probability of censoring weights (IPCW).

To estimate IPCW, access to survival times from the training data is required to estimate the censoring distribution. Note that this requires that survival times survival_test lie within the range of survival times survival_train. This can be achieved by specifying times accordingly, e.g. by setting times[-1] slightly below the maximum expected follow-up time. IPCW are computed using the Kaplan-Meier estimator, which is restricted to situations where the random censoring assumption holds and censoring is independent of the features.

This function can also be used to evaluate models with time-dependent predictions \( \hat{f}(\mathbf{x}_i, t) \), such as survivalist.ensemble.RandomSurvivalForest (see :ref:User Guide </user_guide/evaluating-survival-models.ipynb#Using-Time-dependent-Risk-Scores>). In this case, estimate must be a 2-d array where estimate[i, j] is the predicted risk score for the i-th instance at time point times[j].

Finally, the function also provides a single summary measure that refers to the mean of the \( \mathrm{AUC}(t) \) over the time range \( (\tau_1, \tau_2) \).

$$\overline{\mathrm{AUC}}(\tau_1, \tau_2) = \frac{1}{\hat{S}(\tau_1) - \hat{S}(\tau_2)} \int_{\tau_1}^{\tau_2} \widehat{\mathrm{AUC}}(t)\,d \hat{S}(t)$$

where \( \hat{S}(t) \) is the Kaplan–Meier estimator of the survival function.

See the :ref:User Guide </user_guide/evaluating-survival-models.ipynb#Time-dependent-Area-under-the-ROC>, 1, 2, 3 for further description.

Parameters

survival_train : structured array, shape = (n_train_samples,) Survival times for training data to estimate the censoring distribution from. A structured array containing the binary event indicator as first field, and time of event or time of censoring as second field.

survival_test : structured array, shape = (n_samples,) Survival times of test data. A structured array containing the binary event indicator as first field, and time of event or time of censoring as second field.

estimate : array-like, shape = (n_samples,) or (n_samples, n_times) Estimated risk of experiencing an event of test data. If estimate is a 1-d array, the same risk score across all time points is used. If estimate is a 2-d array, the risk scores in the j-th column are used to evaluate the j-th time point.

times : array-like, shape = (n_times,) The time points for which the area under the time-dependent ROC curve is computed. Values must be within the range of follow-up times of the test data survival_test.

tied_tol : float, optional, default: 1e-8 The tolerance value for considering ties. If the absolute difference between risk scores is smaller or equal than tied_tol, risk scores are considered tied.

Returns

auc : array, shape = (n_times,) The cumulative/dynamic AUC estimates (evaluated at times). mean_auc : float Summary measure referring to the mean cumulative/dynamic AUC over the specified time range (times[0], times[-1]).

See also

as_cumulative_dynamic_auc_scorer Wrapper class that uses cumulative_dynamic_auc() in its score method instead of the default concordance_index_censored().

References


  1. H. Uno, T. Cai, L. Tian, and L. J. Wei, "Evaluating prediction rules for t-year survivors with censored regression models," Journal of the American Statistical Association, vol. 102, pp. 527–537, 2007. 

  2. H. Hung and C. T. Chiang, "Estimation methods for time-dependent AUC models with survival data," Canadian Journal of Statistics, vol. 38, no. 1, pp. 8–26, 2010. 

  3. J. Lambert and S. Chevret, "Summary measure of discrimination in survival models based on cumulative/dynamic time-dependent ROC curves," Statistical Methods in Medical Research, 2014. 

def integrated_brier_score(survival_train, survival_test, estimate, times):
662def integrated_brier_score(survival_train, survival_test, estimate, times):
663    """The Integrated Brier Score (IBS) provides an overall calculation of
664    the model performance at all available times :math:`t_1 \\leq t \\leq t_\\text{max}`.
665
666    The integrated time-dependent Brier score over the interval
667    :math:`[t_1; t_\\text{max}]` is defined as
668
669    .. math::
670
671        \\mathrm{IBS} = \\int_{t_1}^{t_\\text{max}} \\mathrm{BS}^c(t) d w(t)
672
673    where the weighting function is :math:`w(t) = t / t_\\text{max}`.
674    The integral is estimated via the trapezoidal rule.
675
676    See the :ref:`User Guide </user_guide/evaluating-survival-models.ipynb#Time-dependent-Brier-Score>`
677    and [1]_ for further details.
678
679    Parameters
680    ----------
681    survival_train : structured array, shape = (n_train_samples,)
682        Survival times for training data to estimate the censoring
683        distribution from.
684        A structured array containing the binary event indicator
685        as first field, and time of event or time of censoring as
686        second field.
687
688    survival_test : structured array, shape = (n_samples,)
689        Survival times of test data.
690        A structured array containing the binary event indicator
691        as first field, and time of event or time of censoring as
692        second field.
693
694    estimate : array-like, shape = (n_samples, n_times)
695        Estimated probability of remaining event-free at time points
696        specified by `times`. The value of ``estimate[i]`` must correspond to
697        the estimated probability of remaining event-free up to the time point
698        ``times[i]``. Typically, estimated probabilities are obtained via the
699        survival function returned by an estimator's
700        ``predict_survival_function`` method.
701
702    times : array-like, shape = (n_times,)
703        The time points for which to estimate the Brier score.
704        Values must be within the range of follow-up times of
705        the test data `survival_test`.
706
707    Returns
708    -------
709    ibs : float
710        The integrated Brier score.
711
712    Examples
713    --------
714    >>> import numpy as np
715    >>> from survivalist.datasets import load_gbsg2
716    >>> from survivalist.linear_model import CoxPHSurvivalAnalysis
717    >>> from survivalist.metrics import integrated_brier_score
718    >>> from survivalist.preprocessing import OneHotEncoder
719
720    Load and prepare data.
721
722    >>> X, y = load_gbsg2()
723    >>> X.loc[:, "tgrade"] = X.loc[:, "tgrade"].map(len).astype(int)
724    >>> Xt = OneHotEncoder().fit_transform(X)
725
726    Fit a Cox model.
727
728    >>> est = CoxPHSurvivalAnalysis(ties="efron").fit(Xt, y)
729
730    Retrieve individual survival functions and get probability
731    of remaining event free from 1 year to 5 years (=1825 days).
732
733    >>> survs = est.predict_survival_function(Xt)
734    >>> times = np.arange(365, 1826)
735    >>> preds = np.asarray([[fn(t) for t in times] for fn in survs])
736
737    Compute the integrated Brier score from 1 to 5 years.
738
739    >>> score = integrated_brier_score(y, y, preds, times)
740    >>> print(score)
741    0.1815853064627424
742
743    See also
744    --------
745    brier_score
746        Computes the Brier score at specified time points.
747
748    as_integrated_brier_score_scorer
749        Wrapper class that uses :func:`integrated_brier_score`
750        in its ``score`` method instead of the default
751        :func:`concordance_index_censored`.
752
753    References
754    ----------
755    .. [1] E. Graf, C. Schmoor, W. Sauerbrei, and M. Schumacher,
756           "Assessment and comparison of prognostic classification schemes for survival data,"
757           Statistics in Medicine, vol. 18, no. 17-18, pp. 2529–2545, 1999.
758    """
759    # Computing the brier scores
760    times, brier_scores = brier_score(
761        survival_train, survival_test, estimate, times)
762
763    if times.shape[0] < 2:
764        raise ValueError("At least two time points must be given")
765
766    # Computing the IBS
767    ibs_value = np.trapezoid(brier_scores, times) / (times[-1] - times[0])
768
769    return ibs_value

The Integrated Brier Score (IBS) provides an overall calculation of the model performance at all available times \( t_1 \leq t \leq t_\text{max} \).

The integrated time-dependent Brier score over the interval \( [t_1; t_\text{max}] \) is defined as

$$\mathrm{IBS} = \int_{t_1}^{t_\text{max}} \mathrm{BS}^c(t) d w(t)$$

where the weighting function is \( w(t) = t / t_\text{max} \). The integral is estimated via the trapezoidal rule.

See the :ref:User Guide </user_guide/evaluating-survival-models.ipynb#Time-dependent-Brier-Score> and 1 for further details.

Parameters

survival_train : structured array, shape = (n_train_samples,) Survival times for training data to estimate the censoring distribution from. A structured array containing the binary event indicator as first field, and time of event or time of censoring as second field.

survival_test : structured array, shape = (n_samples,) Survival times of test data. A structured array containing the binary event indicator as first field, and time of event or time of censoring as second field.

estimate : array-like, shape = (n_samples, n_times) Estimated probability of remaining event-free at time points specified by times. The value of estimate[i] must correspond to the estimated probability of remaining event-free up to the time point times[i]. Typically, estimated probabilities are obtained via the survival function returned by an estimator's predict_survival_function method.

times : array-like, shape = (n_times,) The time points for which to estimate the Brier score. Values must be within the range of follow-up times of the test data survival_test.

Returns

ibs : float The integrated Brier score.

Examples

>>> import numpy as np
>>> from survivalist.datasets import load_gbsg2
>>> from survivalist.linear_model import CoxPHSurvivalAnalysis
>>> from survivalist.metrics import integrated_brier_score
>>> from survivalist.preprocessing import OneHotEncoder

Load and prepare data.

>>> X, y = load_gbsg2()
>>> X.loc[:, "tgrade"] = X.loc[:, "tgrade"].map(len).astype(int)
>>> Xt = OneHotEncoder().fit_transform(X)

Fit a Cox model.

>>> est = CoxPHSurvivalAnalysis(ties="efron").fit(Xt, y)

Retrieve individual survival functions and get probability of remaining event free from 1 year to 5 years (=1825 days).

>>> survs = est.predict_survival_function(Xt)
>>> times = np.arange(365, 1826)
>>> preds = np.asarray([[fn(t) for t in times] for fn in survs])

Compute the integrated Brier score from 1 to 5 years.

>>> score = integrated_brier_score(y, y, preds, times)
>>> print(score)
0.1815853064627424

See also

brier_score Computes the Brier score at specified time points.

as_integrated_brier_score_scorer Wrapper class that uses integrated_brier_score() in its score method instead of the default concordance_index_censored().

References


  1. E. Graf, C. Schmoor, W. Sauerbrei, and M. Schumacher, "Assessment and comparison of prognostic classification schemes for survival data," Statistics in Medicine, vol. 18, no. 17-18, pp. 2529–2545, 1999.