survivalist.metrics
1# This program is free software: you can redistribute it and/or modify 2# it under the terms of the GNU General Public License as published by 3# the Free Software Foundation, either version 3 of the License, or 4# (at your option) any later version. 5# 6# This program is distributed in the hope that it will be useful, 7# but WITHOUT ANY WARRANTY; without even the implied warranty of 8# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9# GNU General Public License for more details. 10# 11# You should have received a copy of the GNU General Public License 12# along with this program. If not, see <http://www.gnu.org/licenses/>. 13import numpy as np 14from sklearn.base import BaseEstimator 15from sklearn.utils import check_array, check_consistent_length 16from sklearn.utils.metaestimators import available_if 17from sklearn.utils.validation import check_is_fitted 18 19from .exceptions import NoComparablePairException 20from .nonparametric import ( 21 CensoringDistributionEstimator, 22 SurvivalFunctionEstimator, 23) 24from .util import check_y_survival 25 26__all__ = [ 27 "as_concordance_index_ipcw_scorer", 28 "as_cumulative_dynamic_auc_scorer", 29 "as_integrated_brier_score_scorer", 30 "brier_score", 31 "concordance_index_censored", 32 "concordance_index_ipcw", 33 "cumulative_dynamic_auc", 34 "integrated_brier_score", 35] 36 37 38def _check_estimate_1d(estimate, test_time): 39 estimate = check_array(estimate, ensure_2d=False, input_name="estimate") 40 if estimate.ndim != 1: 41 raise ValueError( 42 f"Expected 1D array, got {estimate.ndim}D array instead:\narray={estimate}.\n") 43 check_consistent_length(test_time, estimate) 44 return estimate 45 46 47def _check_inputs(event_indicator, event_time, estimate): 48 check_consistent_length(event_indicator, event_time, estimate) 49 event_indicator = check_array( 50 event_indicator, ensure_2d=False, input_name="event_indicator") 51 event_time = check_array( 52 event_time, ensure_2d=False, input_name="event_time") 53 estimate = _check_estimate_1d(estimate, event_time) 54 55 if not np.issubdtype(event_indicator.dtype, np.bool_): 56 raise ValueError( 57 f"only boolean arrays are supported as class labels for survival analysis, got {event_indicator.dtype}" 58 ) 59 60 if len(event_time) < 2: 61 raise ValueError("Need a minimum of two samples") 62 63 if not event_indicator.any(): 64 raise ValueError("All samples are censored") 65 66 return event_indicator, event_time, estimate 67 68 69def _check_times(test_time, times): 70 times = check_array(np.atleast_1d( 71 times), ensure_2d=False, input_name="times") 72 times = np.unique(times) 73 74 if times.max() >= test_time.max() or times.min() < test_time.min(): 75 raise ValueError( 76 f"all times must be within follow-up time of test data: [{test_time.min()}; {test_time.max()}[" 77 ) 78 79 return times 80 81 82def _check_estimate_2d(estimate, test_time, time_points, estimator): 83 estimate = check_array( 84 estimate, 85 ensure_2d=False, 86 allow_nd=False, 87 input_name="estimate", 88 estimator=estimator, 89 ) 90 time_points = _check_times(test_time, time_points) 91 check_consistent_length(test_time, estimate) 92 93 if estimate.ndim == 2 and estimate.shape[1] != time_points.shape[0]: 94 raise ValueError( 95 f"expected estimate with {time_points.shape[0]} columns, but got {estimate.shape[1]}") 96 97 return estimate, time_points 98 99 100def _iter_comparable(event_indicator, event_time, order): 101 n_samples = len(event_time) 102 tied_time = 0 103 i = 0 104 while i < n_samples - 1: 105 time_i = event_time[order[i]] 106 end = i + 1 107 while end < n_samples and event_time[order[end]] == time_i: 108 end += 1 109 110 # check for tied event times 111 event_at_same_time = event_indicator[order[i:end]] 112 censored_at_same_time = ~event_at_same_time 113 114 mask = np.zeros(n_samples, dtype=bool) 115 mask[end:] = True 116 # an event is comparable to censored samples at same time point 117 mask[i:end] = censored_at_same_time 118 119 for j in range(i, end): 120 if event_indicator[order[j]]: 121 tied_time += censored_at_same_time.sum() 122 yield (j, mask, tied_time) 123 i = end 124 125 126def _estimate_concordance_index(event_indicator, event_time, estimate, weights, tied_tol=1e-8): 127 order = np.argsort(event_time) 128 129 tied_time = None 130 131 concordant = 0 132 discordant = 0 133 tied_risk = 0 134 numerator = 0.0 135 denominator = 0.0 136 for ind, mask, tied_time in _iter_comparable(event_indicator, event_time, order): 137 est_i = estimate[order[ind]] 138 event_i = event_indicator[order[ind]] 139 w_i = weights[order[ind]] 140 141 est = estimate[order[mask]] 142 143 assert event_i, f"got censored sample at index {order[ind]}, but expected uncensored" 144 145 ties = np.absolute(est - est_i) <= tied_tol 146 n_ties = ties.sum() 147 # an event should have a higher score 148 con = est < est_i 149 n_con = con[~ties].sum() 150 151 numerator += w_i * n_con + 0.5 * w_i * n_ties 152 denominator += w_i * mask.sum() 153 154 tied_risk += n_ties 155 concordant += n_con 156 discordant += est.size - n_con - n_ties 157 158 if tied_time is None: 159 raise NoComparablePairException( 160 "Data has no comparable pairs, cannot estimate concordance index.") 161 162 cindex = numerator / denominator 163 return cindex, concordant, discordant, tied_risk, tied_time 164 165 166def concordance_index_censored(event_indicator, event_time, estimate, tied_tol=1e-8): 167 """Concordance index for right-censored data 168 169 The concordance index is defined as the proportion of all comparable pairs 170 in which the predictions and outcomes are concordant. 171 172 Two samples are comparable if (i) both of them experienced an event (at different times), 173 or (ii) the one with a shorter observed survival time experienced an event, in which case 174 the event-free subject "outlived" the other. A pair is not comparable if they experienced 175 events at the same time. 176 177 Concordance intuitively means that two samples were ordered correctly by the model. 178 More specifically, two samples are concordant, if the one with a higher estimated 179 risk score has a shorter actual survival time. 180 When predicted risks are identical for a pair, 0.5 rather than 1 is added to the count 181 of concordant pairs. 182 183 See the :ref:`User Guide </user_guide/evaluating-survival-models.ipynb>` 184 and [1]_ for further description. 185 186 Parameters 187 ---------- 188 event_indicator : array-like, shape = (n_samples,) 189 Boolean array denotes whether an event occurred 190 191 event_time : array-like, shape = (n_samples,) 192 Array containing the time of an event or time of censoring 193 194 estimate : array-like, shape = (n_samples,) 195 Estimated risk of experiencing an event 196 197 tied_tol : float, optional, default: 1e-8 198 The tolerance value for considering ties. 199 If the absolute difference between risk scores is smaller 200 or equal than `tied_tol`, risk scores are considered tied. 201 202 Returns 203 ------- 204 cindex : float 205 Concordance index 206 207 concordant : int 208 Number of concordant pairs 209 210 discordant : int 211 Number of discordant pairs 212 213 tied_risk : int 214 Number of pairs having tied estimated risks 215 216 tied_time : int 217 Number of comparable pairs sharing the same time 218 219 See also 220 -------- 221 concordance_index_ipcw 222 Alternative estimator of the concordance index with less bias. 223 224 References 225 ---------- 226 .. [1] Harrell, F.E., Califf, R.M., Pryor, D.B., Lee, K.L., Rosati, R.A, 227 "Multivariable prognostic models: issues in developing models, 228 evaluating assumptions and adequacy, and measuring and reducing errors", 229 Statistics in Medicine, 15(4), 361-87, 1996. 230 """ 231 event_indicator, event_time, estimate = _check_inputs( 232 event_indicator, event_time, estimate) 233 234 w = np.ones_like(estimate) 235 236 return _estimate_concordance_index(event_indicator, event_time, estimate, w, tied_tol) 237 238 239def concordance_index_ipcw(survival_train, survival_test, estimate, tau=None, tied_tol=1e-8): 240 """Concordance index for right-censored data based on inverse probability of censoring weights. 241 242 This is an alternative to the estimator in :func:`concordance_index_censored` 243 that does not depend on the distribution of censoring times in the test data. 244 Therefore, the estimate is unbiased and consistent for a population concordance 245 measure that is free of censoring. 246 247 It is based on inverse probability of censoring weights, thus requires 248 access to survival times from the training data to estimate the censoring 249 distribution. Note that this requires that survival times `survival_test` 250 lie within the range of survival times `survival_train`. This can be 251 achieved by specifying the truncation time `tau`. 252 The resulting `cindex` tells how well the given prediction model works in 253 predicting events that occur in the time range from 0 to `tau`. 254 255 The estimator uses the Kaplan-Meier estimator to estimate the 256 censoring survivor function. Therefore, it is restricted to 257 situations where the random censoring assumption holds and 258 censoring is independent of the features. 259 260 See the :ref:`User Guide </user_guide/evaluating-survival-models.ipynb>` 261 and [1]_ for further description. 262 263 Parameters 264 ---------- 265 survival_train : structured array, shape = (n_train_samples,) 266 Survival times for training data to estimate the censoring 267 distribution from. 268 A structured array containing the binary event indicator 269 as first field, and time of event or time of censoring as 270 second field. 271 272 survival_test : structured array, shape = (n_samples,) 273 Survival times of test data. 274 A structured array containing the binary event indicator 275 as first field, and time of event or time of censoring as 276 second field. 277 278 estimate : array-like, shape = (n_samples,) 279 Estimated risk of experiencing an event of test data. 280 281 tau : float, optional 282 Truncation time. The survival function for the underlying 283 censoring time distribution :math:`D` needs to be positive 284 at `tau`, i.e., `tau` should be chosen such that the 285 probability of being censored after time `tau` is non-zero: 286 :math:`P(D > \\tau) > 0`. If `None`, no truncation is performed. 287 288 tied_tol : float, optional, default: 1e-8 289 The tolerance value for considering ties. 290 If the absolute difference between risk scores is smaller 291 or equal than `tied_tol`, risk scores are considered tied. 292 293 Returns 294 ------- 295 cindex : float 296 Concordance index 297 298 concordant : int 299 Number of concordant pairs 300 301 discordant : int 302 Number of discordant pairs 303 304 tied_risk : int 305 Number of pairs having tied estimated risks 306 307 tied_time : int 308 Number of comparable pairs sharing the same time 309 310 See also 311 -------- 312 concordance_index_censored 313 Simpler estimator of the concordance index. 314 315 as_concordance_index_ipcw_scorer 316 Wrapper class that uses :func:`concordance_index_ipcw` 317 in its ``score`` method instead of the default 318 :func:`concordance_index_censored`. 319 320 References 321 ---------- 322 .. [1] Uno, H., Cai, T., Pencina, M. J., D’Agostino, R. B., & Wei, L. J. (2011). 323 "On the C-statistics for evaluating overall adequacy of risk prediction 324 procedures with censored survival data". 325 Statistics in Medicine, 30(10), 1105–1117. 326 """ 327 test_event, test_time = check_y_survival(survival_test) 328 329 if tau is not None: 330 mask = test_time < tau 331 survival_test = survival_test[mask] 332 333 estimate = _check_estimate_1d(estimate, test_time) 334 335 cens = CensoringDistributionEstimator() 336 cens.fit(survival_train) 337 ipcw_test = cens.predict_ipcw(survival_test) 338 if tau is None: 339 ipcw = ipcw_test 340 else: 341 ipcw = np.empty(estimate.shape[0], dtype=ipcw_test.dtype) 342 ipcw[mask] = ipcw_test 343 ipcw[~mask] = 0 344 345 w = np.square(ipcw) 346 347 return _estimate_concordance_index(test_event, test_time, estimate, w, tied_tol) 348 349 350def cumulative_dynamic_auc(survival_train, survival_test, estimate, times, tied_tol=1e-8): 351 """Estimator of cumulative/dynamic AUC for right-censored time-to-event data. 352 353 The receiver operating characteristic (ROC) curve and the area under the 354 ROC curve (AUC) can be extended to survival data by defining 355 sensitivity (true positive rate) and specificity (true negative rate) 356 as time-dependent measures. *Cumulative cases* are all individuals that 357 experienced an event prior to or at time :math:`t` (:math:`t_i \\leq t`), 358 whereas *dynamic controls* are those with :math:`t_i > t`. 359 The associated cumulative/dynamic AUC quantifies how well a model can 360 distinguish subjects who fail by a given time (:math:`t_i \\leq t`) from 361 subjects who fail after this time (:math:`t_i > t`). 362 363 Given an estimator of the :math:`i`-th individual's risk score 364 :math:`\\hat{f}(\\mathbf{x}_i)`, the cumulative/dynamic AUC at time 365 :math:`t` is defined as 366 367 .. math:: 368 369 \\widehat{\\mathrm{AUC}}(t) = 370 \\frac{\\sum_{i=1}^n \\sum_{j=1}^n I(y_j > t) I(y_i \\leq t) \\omega_i 371 I(\\hat{f}(\\mathbf{x}_j) \\leq \\hat{f}(\\mathbf{x}_i))} 372 {(\\sum_{i=1}^n I(y_i > t)) (\\sum_{i=1}^n I(y_i \\leq t) \\omega_i)} 373 374 where :math:`\\omega_i` are inverse probability of censoring weights (IPCW). 375 376 To estimate IPCW, access to survival times from the training data is required 377 to estimate the censoring distribution. Note that this requires that survival 378 times `survival_test` lie within the range of survival times `survival_train`. 379 This can be achieved by specifying `times` accordingly, e.g. by setting 380 `times[-1]` slightly below the maximum expected follow-up time. 381 IPCW are computed using the Kaplan-Meier estimator, which is 382 restricted to situations where the random censoring assumption holds and 383 censoring is independent of the features. 384 385 This function can also be used to evaluate models with time-dependent predictions 386 :math:`\\hat{f}(\\mathbf{x}_i, t)`, such as :class:`survivalist.ensemble.RandomSurvivalForest` 387 (see :ref:`User Guide </user_guide/evaluating-survival-models.ipynb#Using-Time-dependent-Risk-Scores>`). 388 In this case, `estimate` must be a 2-d array where ``estimate[i, j]`` is the 389 predicted risk score for the i-th instance at time point ``times[j]``. 390 391 Finally, the function also provides a single summary measure that refers to the mean 392 of the :math:`\\mathrm{AUC}(t)` over the time range :math:`(\\tau_1, \\tau_2)`. 393 394 .. math:: 395 396 \\overline{\\mathrm{AUC}}(\\tau_1, \\tau_2) = 397 \\frac{1}{\\hat{S}(\\tau_1) - \\hat{S}(\\tau_2)} 398 \\int_{\\tau_1}^{\\tau_2} \\widehat{\\mathrm{AUC}}(t)\\,d \\hat{S}(t) 399 400 where :math:`\\hat{S}(t)` is the Kaplan–Meier estimator of the survival function. 401 402 See the :ref:`User Guide </user_guide/evaluating-survival-models.ipynb#Time-dependent-Area-under-the-ROC>`, 403 [1]_, [2]_, [3]_ for further description. 404 405 Parameters 406 ---------- 407 survival_train : structured array, shape = (n_train_samples,) 408 Survival times for training data to estimate the censoring 409 distribution from. 410 A structured array containing the binary event indicator 411 as first field, and time of event or time of censoring as 412 second field. 413 414 survival_test : structured array, shape = (n_samples,) 415 Survival times of test data. 416 A structured array containing the binary event indicator 417 as first field, and time of event or time of censoring as 418 second field. 419 420 estimate : array-like, shape = (n_samples,) or (n_samples, n_times) 421 Estimated risk of experiencing an event of test data. 422 If `estimate` is a 1-d array, the same risk score across all time 423 points is used. If `estimate` is a 2-d array, the risk scores in the 424 j-th column are used to evaluate the j-th time point. 425 426 times : array-like, shape = (n_times,) 427 The time points for which the area under the 428 time-dependent ROC curve is computed. Values must be 429 within the range of follow-up times of the test data 430 `survival_test`. 431 432 tied_tol : float, optional, default: 1e-8 433 The tolerance value for considering ties. 434 If the absolute difference between risk scores is smaller 435 or equal than `tied_tol`, risk scores are considered tied. 436 437 Returns 438 ------- 439 auc : array, shape = (n_times,) 440 The cumulative/dynamic AUC estimates (evaluated at `times`). 441 mean_auc : float 442 Summary measure referring to the mean cumulative/dynamic AUC 443 over the specified time range `(times[0], times[-1])`. 444 445 See also 446 -------- 447 as_cumulative_dynamic_auc_scorer 448 Wrapper class that uses :func:`cumulative_dynamic_auc` 449 in its ``score`` method instead of the default 450 :func:`concordance_index_censored`. 451 452 References 453 ---------- 454 .. [1] H. Uno, T. Cai, L. Tian, and L. J. Wei, 455 "Evaluating prediction rules for t-year survivors with censored regression models," 456 Journal of the American Statistical Association, vol. 102, pp. 527–537, 2007. 457 .. [2] H. Hung and C. T. Chiang, 458 "Estimation methods for time-dependent AUC models with survival data," 459 Canadian Journal of Statistics, vol. 38, no. 1, pp. 8–26, 2010. 460 .. [3] J. Lambert and S. Chevret, 461 "Summary measure of discrimination in survival models based on cumulative/dynamic time-dependent ROC curves," 462 Statistical Methods in Medical Research, 2014. 463 """ 464 test_event, test_time = check_y_survival(survival_test) 465 estimate, times = _check_estimate_2d( 466 estimate, test_time, times, estimator="cumulative_dynamic_auc") 467 468 n_samples = estimate.shape[0] 469 n_times = times.shape[0] 470 if estimate.ndim == 1: 471 estimate = np.broadcast_to( 472 estimate[:, np.newaxis], (n_samples, n_times)) 473 474 # fit and transform IPCW 475 cens = CensoringDistributionEstimator() 476 cens.fit(survival_train) 477 ipcw = cens.predict_ipcw(survival_test) 478 479 # expand arrays to (n_samples, n_times) shape 480 test_time = np.broadcast_to(test_time[:, np.newaxis], (n_samples, n_times)) 481 test_event = np.broadcast_to( 482 test_event[:, np.newaxis], (n_samples, n_times)) 483 times_2d = np.broadcast_to(times, (n_samples, n_times)) 484 ipcw = np.broadcast_to(ipcw[:, np.newaxis], (n_samples, n_times)) 485 486 # sort each time point (columns) by risk score (descending) 487 o = np.argsort(-estimate, axis=0) 488 test_time = np.take_along_axis(test_time, o, axis=0) 489 test_event = np.take_along_axis(test_event, o, axis=0) 490 estimate = np.take_along_axis(estimate, o, axis=0) 491 ipcw = np.take_along_axis(ipcw, o, axis=0) 492 493 is_case = (test_time <= times_2d) & test_event 494 is_control = test_time > times_2d 495 n_controls = is_control.sum(axis=0) 496 497 # prepend row of infinity values 498 estimate_diff = np.concatenate( 499 (np.broadcast_to(np.inf, (1, n_times)), estimate)) 500 is_tied = np.absolute(np.diff(estimate_diff, axis=0)) <= tied_tol 501 502 cumsum_tp = np.cumsum(is_case * ipcw, axis=0) 503 cumsum_fp = np.cumsum(is_control, axis=0) 504 true_pos = cumsum_tp / cumsum_tp[-1] 505 false_pos = cumsum_fp / n_controls 506 507 scores = np.empty(n_times, dtype=float) 508 it = np.nditer((true_pos, false_pos, is_tied), 509 order="F", flags=["external_loop"]) 510 with it: 511 for i, (tp, fp, mask) in enumerate(it): 512 idx = np.flatnonzero(mask) - 1 513 # only keep the last estimate for tied risk scores 514 tp_no_ties = np.delete(tp, idx) 515 fp_no_ties = np.delete(fp, idx) 516 # Add an extra threshold position 517 # to make sure that the curve starts at (0, 0) 518 tp_no_ties = np.r_[0, tp_no_ties] 519 fp_no_ties = np.r_[0, fp_no_ties] 520 scores[i] = np.trapezoid(tp_no_ties, fp_no_ties) 521 522 if n_times == 1: 523 mean_auc = scores[0] 524 else: 525 surv = SurvivalFunctionEstimator() 526 surv.fit(survival_test) 527 s_times = surv.predict_proba(times) 528 # compute integral of AUC over survival function 529 d = -np.diff(np.r_[1.0, s_times]) 530 integral = (scores * d).sum() 531 mean_auc = integral / (1.0 - s_times[-1]) 532 533 return scores, mean_auc 534 535 536def brier_score(survival_train, survival_test, estimate, times): 537 """Estimate the time-dependent Brier score for right censored data. 538 539 The time-dependent Brier score is the mean squared error at time point :math:`t`: 540 541 .. math:: 542 543 \\mathrm{BS}^c(t) = \\frac{1}{n} \\sum_{i=1}^n I(y_i \\leq t \\land \\delta_i = 1) 544 \\frac{(0 - \\hat{\\pi}(t | \\mathbf{x}_i))^2}{\\hat{G}(y_i)} + I(y_i > t) 545 \\frac{(1 - \\hat{\\pi}(t | \\mathbf{x}_i))^2}{\\hat{G}(t)} , 546 547 where :math:`\\hat{\\pi}(t | \\mathbf{x})` is the predicted probability of 548 remaining event-free up to time point :math:`t` for a feature vector :math:`\\mathbf{x}`, 549 and :math:`1/\\hat{G}(t)` is a inverse probability of censoring weight, estimated by 550 the Kaplan-Meier estimator. 551 552 See the :ref:`User Guide </user_guide/evaluating-survival-models.ipynb#Time-dependent-Brier-Score>` 553 and [1]_ for details. 554 555 Parameters 556 ---------- 557 survival_train : structured array, shape = (n_train_samples,) 558 Survival times for training data to estimate the censoring 559 distribution from. 560 A structured array containing the binary event indicator 561 as first field, and time of event or time of censoring as 562 second field. 563 564 survival_test : structured array, shape = (n_samples,) 565 Survival times of test data. 566 A structured array containing the binary event indicator 567 as first field, and time of event or time of censoring as 568 second field. 569 570 estimate : array-like, shape = (n_samples, n_times) 571 Estimated probability of remaining event-free at time points 572 specified by `times`. The value of ``estimate[i]`` must correspond to 573 the estimated probability of remaining event-free up to the time point 574 ``times[i]``. Typically, estimated probabilities are obtained via the 575 survival function returned by an estimator's 576 ``predict_survival_function`` method. 577 578 times : array-like, shape = (n_times,) 579 The time points for which to estimate the Brier score. 580 Values must be within the range of follow-up times of 581 the test data `survival_test`. 582 583 Returns 584 ------- 585 times : array, shape = (n_times,) 586 Unique time points at which the brier scores was estimated. 587 588 brier_scores : array , shape = (n_times,) 589 Values of the brier score. 590 591 Examples 592 -------- 593 >>> from survivalist.datasets import load_gbsg2 594 >>> from survivalist.linear_model import CoxPHSurvivalAnalysis 595 >>> from survivalist.metrics import brier_score 596 >>> from survivalist.preprocessing import OneHotEncoder 597 598 Load and prepare data. 599 600 >>> X, y = load_gbsg2() 601 >>> X.loc[:, "tgrade"] = X.loc[:, "tgrade"].map(len).astype(int) 602 >>> Xt = OneHotEncoder().fit_transform(X) 603 604 Fit a Cox model. 605 606 >>> est = CoxPHSurvivalAnalysis(ties="efron").fit(Xt, y) 607 608 Retrieve individual survival functions and get probability 609 of remaining event free up to 5 years (=1825 days). 610 611 >>> survs = est.predict_survival_function(Xt) 612 >>> preds = [fn(1825) for fn in survs] 613 614 Compute the Brier score at 5 years. 615 616 >>> times, score = brier_score(y, y, preds, 1825) 617 >>> print(score) 618 [0.20881843] 619 620 See also 621 -------- 622 integrated_brier_score 623 Computes the average Brier score over all time points. 624 625 References 626 ---------- 627 .. [1] E. Graf, C. Schmoor, W. Sauerbrei, and M. Schumacher, 628 "Assessment and comparison of prognostic classification schemes for survival data," 629 Statistics in Medicine, vol. 18, no. 17-18, pp. 2529–2545, 1999. 630 """ 631 test_event, test_time = check_y_survival(survival_test) 632 estimate, times = _check_estimate_2d( 633 estimate, test_time, times, estimator="brier_score") 634 if estimate.ndim == 1 and times.shape[0] == 1: 635 estimate = estimate.reshape(-1, 1) 636 637 # fit IPCW estimator 638 cens = CensoringDistributionEstimator().fit(survival_train) 639 # calculate inverse probability of censoring weight at current time point t. 640 prob_cens_t = cens.predict_proba(times) 641 prob_cens_t[prob_cens_t == 0] = np.inf 642 # calculate inverse probability of censoring weights at observed time point 643 prob_cens_y = cens.predict_proba(test_time) 644 prob_cens_y[prob_cens_y == 0] = np.inf 645 646 # Calculating the brier scores at each time point 647 brier_scores = np.empty(times.shape[0], dtype=float) 648 for i, t in enumerate(times): 649 est = estimate[:, i] 650 is_case = (test_time <= t) & test_event 651 is_control = test_time > t 652 653 brier_scores[i] = np.mean( 654 np.square(est) * is_case.astype(int) / prob_cens_y 655 + np.square(1.0 - est) * is_control.astype(int) / prob_cens_t[i] 656 ) 657 658 return times, brier_scores 659 660 661def integrated_brier_score(survival_train, survival_test, estimate, times): 662 """The Integrated Brier Score (IBS) provides an overall calculation of 663 the model performance at all available times :math:`t_1 \\leq t \\leq t_\\text{max}`. 664 665 The integrated time-dependent Brier score over the interval 666 :math:`[t_1; t_\\text{max}]` is defined as 667 668 .. math:: 669 670 \\mathrm{IBS} = \\int_{t_1}^{t_\\text{max}} \\mathrm{BS}^c(t) d w(t) 671 672 where the weighting function is :math:`w(t) = t / t_\\text{max}`. 673 The integral is estimated via the trapezoidal rule. 674 675 See the :ref:`User Guide </user_guide/evaluating-survival-models.ipynb#Time-dependent-Brier-Score>` 676 and [1]_ for further details. 677 678 Parameters 679 ---------- 680 survival_train : structured array, shape = (n_train_samples,) 681 Survival times for training data to estimate the censoring 682 distribution from. 683 A structured array containing the binary event indicator 684 as first field, and time of event or time of censoring as 685 second field. 686 687 survival_test : structured array, shape = (n_samples,) 688 Survival times of test data. 689 A structured array containing the binary event indicator 690 as first field, and time of event or time of censoring as 691 second field. 692 693 estimate : array-like, shape = (n_samples, n_times) 694 Estimated probability of remaining event-free at time points 695 specified by `times`. The value of ``estimate[i]`` must correspond to 696 the estimated probability of remaining event-free up to the time point 697 ``times[i]``. Typically, estimated probabilities are obtained via the 698 survival function returned by an estimator's 699 ``predict_survival_function`` method. 700 701 times : array-like, shape = (n_times,) 702 The time points for which to estimate the Brier score. 703 Values must be within the range of follow-up times of 704 the test data `survival_test`. 705 706 Returns 707 ------- 708 ibs : float 709 The integrated Brier score. 710 711 Examples 712 -------- 713 >>> import numpy as np 714 >>> from survivalist.datasets import load_gbsg2 715 >>> from survivalist.linear_model import CoxPHSurvivalAnalysis 716 >>> from survivalist.metrics import integrated_brier_score 717 >>> from survivalist.preprocessing import OneHotEncoder 718 719 Load and prepare data. 720 721 >>> X, y = load_gbsg2() 722 >>> X.loc[:, "tgrade"] = X.loc[:, "tgrade"].map(len).astype(int) 723 >>> Xt = OneHotEncoder().fit_transform(X) 724 725 Fit a Cox model. 726 727 >>> est = CoxPHSurvivalAnalysis(ties="efron").fit(Xt, y) 728 729 Retrieve individual survival functions and get probability 730 of remaining event free from 1 year to 5 years (=1825 days). 731 732 >>> survs = est.predict_survival_function(Xt) 733 >>> times = np.arange(365, 1826) 734 >>> preds = np.asarray([[fn(t) for t in times] for fn in survs]) 735 736 Compute the integrated Brier score from 1 to 5 years. 737 738 >>> score = integrated_brier_score(y, y, preds, times) 739 >>> print(score) 740 0.1815853064627424 741 742 See also 743 -------- 744 brier_score 745 Computes the Brier score at specified time points. 746 747 as_integrated_brier_score_scorer 748 Wrapper class that uses :func:`integrated_brier_score` 749 in its ``score`` method instead of the default 750 :func:`concordance_index_censored`. 751 752 References 753 ---------- 754 .. [1] E. Graf, C. Schmoor, W. Sauerbrei, and M. Schumacher, 755 "Assessment and comparison of prognostic classification schemes for survival data," 756 Statistics in Medicine, vol. 18, no. 17-18, pp. 2529–2545, 1999. 757 """ 758 # Computing the brier scores 759 times, brier_scores = brier_score( 760 survival_train, survival_test, estimate, times) 761 762 if times.shape[0] < 2: 763 raise ValueError("At least two time points must be given") 764 765 # Computing the IBS 766 ibs_value = np.trapezoid(brier_scores, times) / (times[-1] - times[0]) 767 768 return ibs_value 769 770 771def _estimator_has(attr): 772 """Check that meta_estimator has `attr`. 773 774 Used together with `available_if`.""" 775 776 def check(self): 777 # raise original `AttributeError` if `attr` does not exist 778 getattr(self.estimator_, attr) 779 return True 780 781 return check 782 783 784class _ScoreOverrideMixin: 785 def __init__( 786 self, 787 estimator, 788 predict_func, 789 score_func, 790 score_index, 791 greater_is_better, 792 ): 793 if not hasattr(estimator, predict_func): 794 raise AttributeError( 795 f"{estimator!r} object has no attribute {predict_func!r}") 796 797 self.estimator = estimator 798 self._predict_func = predict_func 799 self._score_func = score_func 800 self._score_index = score_index 801 self._sign = 1 if greater_is_better else -1 802 803 def _get_score_params(self): 804 """Return dict of parameters passed to ``score_func``.""" 805 params = self.get_params(deep=False) 806 del params["estimator"] 807 return params 808 809 def fit(self, X, y, **fit_params): 810 self._train_y = np.array(y, copy=True) 811 self.estimator_ = self.estimator.fit(X, y, **fit_params) 812 return self 813 814 def _do_predict(self, X): 815 predict_func = getattr(self.estimator_, self._predict_func) 816 return predict_func(X) 817 818 def score(self, X, y): 819 """Returns the score on the given data. 820 821 Parameters 822 ---------- 823 X : array-like of shape (n_samples, n_features) 824 Input data, where n_samples is the number of samples and 825 n_features is the number of features. 826 827 y : array-like of shape (n_samples,) 828 Target relative to X for classification or regression; 829 None for unsupervised learning. 830 831 Returns 832 ------- 833 score : float 834 """ 835 estimate = self._do_predict(X) 836 score = self._score_func( 837 survival_train=self._train_y, 838 survival_test=y, 839 estimate=estimate, 840 **self._get_score_params(), 841 ) 842 if self._score_index is not None: 843 score = score[self._score_index] 844 return self._sign * score 845 846 @available_if(_estimator_has("predict")) 847 def predict(self, X): 848 """Call predict on the estimator. 849 850 Only available if estimator supports ``predict``. 851 852 Parameters 853 ---------- 854 X : indexable, length n_samples 855 Must fulfill the input assumptions of the 856 underlying estimator. 857 """ 858 check_is_fitted(self, "estimator_") 859 return self.estimator_.predict(X) 860 861 @available_if(_estimator_has("predict_cumulative_hazard_function")) 862 def predict_cumulative_hazard_function(self, X): 863 """Call predict_cumulative_hazard_function on the estimator. 864 865 Only available if estimator supports ``predict_cumulative_hazard_function``. 866 867 Parameters 868 ---------- 869 X : indexable, length n_samples 870 Must fulfill the input assumptions of the 871 underlying estimator. 872 """ 873 check_is_fitted(self, "estimator_") 874 return self.estimator_.predict_cumulative_hazard_function(X) 875 876 @available_if(_estimator_has("predict_survival_function")) 877 def predict_survival_function(self, X): 878 """Call predict_survival_function on the estimator. 879 880 Only available if estimator supports ``predict_survival_function``. 881 882 Parameters 883 ---------- 884 X : indexable, length n_samples 885 Must fulfill the input assumptions of the 886 underlying estimator. 887 """ 888 check_is_fitted(self, "estimator_") 889 return self.estimator_.predict_survival_function(X) 890 891 892class as_cumulative_dynamic_auc_scorer(_ScoreOverrideMixin, BaseEstimator): 893 """Wraps an estimator to use :func:`cumulative_dynamic_auc` as ``score`` function. 894 895 See the :ref:`User Guide </user_guide/evaluating-survival-models.ipynb#Using-Metrics-in-Hyper-parameter-Search>` 896 for using it for hyper-parameter optimization. 897 898 Parameters 899 ---------- 900 estimator : object 901 Instance of an estimator. 902 903 times : array-like, shape = (n_times,) 904 The time points for which the area under the 905 time-dependent ROC curve is computed. Values must be 906 within the range of follow-up times of the test data 907 `survival_test`. 908 909 tied_tol : float, optional, default: 1e-8 910 The tolerance value for considering ties. 911 If the absolute difference between risk scores is smaller 912 or equal than `tied_tol`, risk scores are considered tied. 913 914 Attributes 915 ---------- 916 estimator_ : estimator 917 Estimator that was fit. 918 919 See also 920 -------- 921 cumulative_dynamic_auc 922 """ 923 924 def __init__(self, estimator, times, tied_tol=1e-8): 925 super().__init__( 926 estimator=estimator, 927 predict_func="predict", 928 score_func=cumulative_dynamic_auc, 929 score_index=1, 930 greater_is_better=True, 931 ) 932 self.times = times 933 self.tied_tol = tied_tol 934 935 936class as_concordance_index_ipcw_scorer(_ScoreOverrideMixin, BaseEstimator): 937 """Wraps an estimator to use :func:`concordance_index_ipcw` as ``score`` function. 938 939 See the :ref:`User Guide </user_guide/evaluating-survival-models.ipynb#Using-Metrics-in-Hyper-parameter-Search>` 940 for using it for hyper-parameter optimization. 941 942 Parameters 943 ---------- 944 estimator : object 945 Instance of an estimator. 946 947 tau : float, optional 948 Truncation time. The survival function for the underlying 949 censoring time distribution :math:`D` needs to be positive 950 at `tau`, i.e., `tau` should be chosen such that the 951 probability of being censored after time `tau` is non-zero: 952 :math:`P(D > \\tau) > 0`. If `None`, no truncation is performed. 953 954 tied_tol : float, optional, default: 1e-8 955 The tolerance value for considering ties. 956 If the absolute difference between risk scores is smaller 957 or equal than `tied_tol`, risk scores are considered tied. 958 959 Attributes 960 ---------- 961 estimator_ : estimator 962 Estimator that was fit. 963 964 See also 965 -------- 966 concordance_index_ipcw 967 """ 968 969 def __init__(self, estimator, tau=None, tied_tol=1e-8): 970 super().__init__( 971 estimator=estimator, 972 predict_func="predict", 973 score_func=concordance_index_ipcw, 974 score_index=0, 975 greater_is_better=True, 976 ) 977 self.tau = tau 978 self.tied_tol = tied_tol 979 980 981class as_integrated_brier_score_scorer(_ScoreOverrideMixin, BaseEstimator): 982 """Wraps an estimator to use the negative of :func:`integrated_brier_score` as ``score`` function. 983 984 The estimator needs to be able to estimate survival functions via 985 a ``predict_survival_function`` method. 986 987 See the :ref:`User Guide </user_guide/evaluating-survival-models.ipynb#Using-Metrics-in-Hyper-parameter-Search>` 988 for using it for hyper-parameter optimization. 989 990 Parameters 991 ---------- 992 estimator : object 993 Instance of an estimator that provides ``predict_survival_function``. 994 995 times : array-like, shape = (n_times,) 996 The time points for which to estimate the Brier score. 997 Values must be within the range of follow-up times of 998 the test data `survival_test`. 999 1000 Attributes 1001 ---------- 1002 estimator_ : estimator 1003 Estimator that was fit. 1004 1005 See also 1006 -------- 1007 integrated_brier_score 1008 """ 1009 1010 def __init__(self, estimator, times): 1011 super().__init__( 1012 estimator=estimator, 1013 predict_func="predict_survival_function", 1014 score_func=integrated_brier_score, 1015 score_index=None, 1016 greater_is_better=False, 1017 ) 1018 self.times = times 1019 1020 def _do_predict(self, X): 1021 predict_func = getattr(self.estimator_, self._predict_func) 1022 surv_fns = predict_func(X) 1023 times = self.times 1024 estimates = np.empty((len(surv_fns), len(times))) 1025 for i, fn in enumerate(surv_fns): 1026 estimates[i, :] = fn(times) 1027 return estimates
937class as_concordance_index_ipcw_scorer(_ScoreOverrideMixin, BaseEstimator): 938 """Wraps an estimator to use :func:`concordance_index_ipcw` as ``score`` function. 939 940 See the :ref:`User Guide </user_guide/evaluating-survival-models.ipynb#Using-Metrics-in-Hyper-parameter-Search>` 941 for using it for hyper-parameter optimization. 942 943 Parameters 944 ---------- 945 estimator : object 946 Instance of an estimator. 947 948 tau : float, optional 949 Truncation time. The survival function for the underlying 950 censoring time distribution :math:`D` needs to be positive 951 at `tau`, i.e., `tau` should be chosen such that the 952 probability of being censored after time `tau` is non-zero: 953 :math:`P(D > \\tau) > 0`. If `None`, no truncation is performed. 954 955 tied_tol : float, optional, default: 1e-8 956 The tolerance value for considering ties. 957 If the absolute difference between risk scores is smaller 958 or equal than `tied_tol`, risk scores are considered tied. 959 960 Attributes 961 ---------- 962 estimator_ : estimator 963 Estimator that was fit. 964 965 See also 966 -------- 967 concordance_index_ipcw 968 """ 969 970 def __init__(self, estimator, tau=None, tied_tol=1e-8): 971 super().__init__( 972 estimator=estimator, 973 predict_func="predict", 974 score_func=concordance_index_ipcw, 975 score_index=0, 976 greater_is_better=True, 977 ) 978 self.tau = tau 979 self.tied_tol = tied_tol
Wraps an estimator to use concordance_index_ipcw() as score function.
See the :ref:User Guide </user_guide/evaluating-survival-models.ipynb#Using-Metrics-in-Hyper-parameter-Search>
for using it for hyper-parameter optimization.
Parameters
estimator : object Instance of an estimator.
tau : float, optional
Truncation time. The survival function for the underlying
censoring time distribution \( D \) needs to be positive
at tau, i.e., tau should be chosen such that the
probability of being censored after time tau is non-zero:
\( P(D > \tau) > 0 \). If None, no truncation is performed.
tied_tol : float, optional, default: 1e-8
The tolerance value for considering ties.
If the absolute difference between risk scores is smaller
or equal than tied_tol, risk scores are considered tied.
Attributes
estimator_ : estimator Estimator that was fit.
See also
concordance_index_ipcw
893class as_cumulative_dynamic_auc_scorer(_ScoreOverrideMixin, BaseEstimator): 894 """Wraps an estimator to use :func:`cumulative_dynamic_auc` as ``score`` function. 895 896 See the :ref:`User Guide </user_guide/evaluating-survival-models.ipynb#Using-Metrics-in-Hyper-parameter-Search>` 897 for using it for hyper-parameter optimization. 898 899 Parameters 900 ---------- 901 estimator : object 902 Instance of an estimator. 903 904 times : array-like, shape = (n_times,) 905 The time points for which the area under the 906 time-dependent ROC curve is computed. Values must be 907 within the range of follow-up times of the test data 908 `survival_test`. 909 910 tied_tol : float, optional, default: 1e-8 911 The tolerance value for considering ties. 912 If the absolute difference between risk scores is smaller 913 or equal than `tied_tol`, risk scores are considered tied. 914 915 Attributes 916 ---------- 917 estimator_ : estimator 918 Estimator that was fit. 919 920 See also 921 -------- 922 cumulative_dynamic_auc 923 """ 924 925 def __init__(self, estimator, times, tied_tol=1e-8): 926 super().__init__( 927 estimator=estimator, 928 predict_func="predict", 929 score_func=cumulative_dynamic_auc, 930 score_index=1, 931 greater_is_better=True, 932 ) 933 self.times = times 934 self.tied_tol = tied_tol
Wraps an estimator to use cumulative_dynamic_auc() as score function.
See the :ref:User Guide </user_guide/evaluating-survival-models.ipynb#Using-Metrics-in-Hyper-parameter-Search>
for using it for hyper-parameter optimization.
Parameters
estimator : object Instance of an estimator.
times : array-like, shape = (n_times,)
The time points for which the area under the
time-dependent ROC curve is computed. Values must be
within the range of follow-up times of the test data
survival_test.
tied_tol : float, optional, default: 1e-8
The tolerance value for considering ties.
If the absolute difference between risk scores is smaller
or equal than tied_tol, risk scores are considered tied.
Attributes
estimator_ : estimator Estimator that was fit.
See also
cumulative_dynamic_auc
982class as_integrated_brier_score_scorer(_ScoreOverrideMixin, BaseEstimator): 983 """Wraps an estimator to use the negative of :func:`integrated_brier_score` as ``score`` function. 984 985 The estimator needs to be able to estimate survival functions via 986 a ``predict_survival_function`` method. 987 988 See the :ref:`User Guide </user_guide/evaluating-survival-models.ipynb#Using-Metrics-in-Hyper-parameter-Search>` 989 for using it for hyper-parameter optimization. 990 991 Parameters 992 ---------- 993 estimator : object 994 Instance of an estimator that provides ``predict_survival_function``. 995 996 times : array-like, shape = (n_times,) 997 The time points for which to estimate the Brier score. 998 Values must be within the range of follow-up times of 999 the test data `survival_test`. 1000 1001 Attributes 1002 ---------- 1003 estimator_ : estimator 1004 Estimator that was fit. 1005 1006 See also 1007 -------- 1008 integrated_brier_score 1009 """ 1010 1011 def __init__(self, estimator, times): 1012 super().__init__( 1013 estimator=estimator, 1014 predict_func="predict_survival_function", 1015 score_func=integrated_brier_score, 1016 score_index=None, 1017 greater_is_better=False, 1018 ) 1019 self.times = times 1020 1021 def _do_predict(self, X): 1022 predict_func = getattr(self.estimator_, self._predict_func) 1023 surv_fns = predict_func(X) 1024 times = self.times 1025 estimates = np.empty((len(surv_fns), len(times))) 1026 for i, fn in enumerate(surv_fns): 1027 estimates[i, :] = fn(times) 1028 return estimates
Wraps an estimator to use the negative of integrated_brier_score() as score function.
The estimator needs to be able to estimate survival functions via
a predict_survival_function method.
See the :ref:User Guide </user_guide/evaluating-survival-models.ipynb#Using-Metrics-in-Hyper-parameter-Search>
for using it for hyper-parameter optimization.
Parameters
estimator : object
Instance of an estimator that provides predict_survival_function.
times : array-like, shape = (n_times,)
The time points for which to estimate the Brier score.
Values must be within the range of follow-up times of
the test data survival_test.
Attributes
estimator_ : estimator Estimator that was fit.
See also
integrated_brier_score
537def brier_score(survival_train, survival_test, estimate, times): 538 """Estimate the time-dependent Brier score for right censored data. 539 540 The time-dependent Brier score is the mean squared error at time point :math:`t`: 541 542 .. math:: 543 544 \\mathrm{BS}^c(t) = \\frac{1}{n} \\sum_{i=1}^n I(y_i \\leq t \\land \\delta_i = 1) 545 \\frac{(0 - \\hat{\\pi}(t | \\mathbf{x}_i))^2}{\\hat{G}(y_i)} + I(y_i > t) 546 \\frac{(1 - \\hat{\\pi}(t | \\mathbf{x}_i))^2}{\\hat{G}(t)} , 547 548 where :math:`\\hat{\\pi}(t | \\mathbf{x})` is the predicted probability of 549 remaining event-free up to time point :math:`t` for a feature vector :math:`\\mathbf{x}`, 550 and :math:`1/\\hat{G}(t)` is a inverse probability of censoring weight, estimated by 551 the Kaplan-Meier estimator. 552 553 See the :ref:`User Guide </user_guide/evaluating-survival-models.ipynb#Time-dependent-Brier-Score>` 554 and [1]_ for details. 555 556 Parameters 557 ---------- 558 survival_train : structured array, shape = (n_train_samples,) 559 Survival times for training data to estimate the censoring 560 distribution from. 561 A structured array containing the binary event indicator 562 as first field, and time of event or time of censoring as 563 second field. 564 565 survival_test : structured array, shape = (n_samples,) 566 Survival times of test data. 567 A structured array containing the binary event indicator 568 as first field, and time of event or time of censoring as 569 second field. 570 571 estimate : array-like, shape = (n_samples, n_times) 572 Estimated probability of remaining event-free at time points 573 specified by `times`. The value of ``estimate[i]`` must correspond to 574 the estimated probability of remaining event-free up to the time point 575 ``times[i]``. Typically, estimated probabilities are obtained via the 576 survival function returned by an estimator's 577 ``predict_survival_function`` method. 578 579 times : array-like, shape = (n_times,) 580 The time points for which to estimate the Brier score. 581 Values must be within the range of follow-up times of 582 the test data `survival_test`. 583 584 Returns 585 ------- 586 times : array, shape = (n_times,) 587 Unique time points at which the brier scores was estimated. 588 589 brier_scores : array , shape = (n_times,) 590 Values of the brier score. 591 592 Examples 593 -------- 594 >>> from survivalist.datasets import load_gbsg2 595 >>> from survivalist.linear_model import CoxPHSurvivalAnalysis 596 >>> from survivalist.metrics import brier_score 597 >>> from survivalist.preprocessing import OneHotEncoder 598 599 Load and prepare data. 600 601 >>> X, y = load_gbsg2() 602 >>> X.loc[:, "tgrade"] = X.loc[:, "tgrade"].map(len).astype(int) 603 >>> Xt = OneHotEncoder().fit_transform(X) 604 605 Fit a Cox model. 606 607 >>> est = CoxPHSurvivalAnalysis(ties="efron").fit(Xt, y) 608 609 Retrieve individual survival functions and get probability 610 of remaining event free up to 5 years (=1825 days). 611 612 >>> survs = est.predict_survival_function(Xt) 613 >>> preds = [fn(1825) for fn in survs] 614 615 Compute the Brier score at 5 years. 616 617 >>> times, score = brier_score(y, y, preds, 1825) 618 >>> print(score) 619 [0.20881843] 620 621 See also 622 -------- 623 integrated_brier_score 624 Computes the average Brier score over all time points. 625 626 References 627 ---------- 628 .. [1] E. Graf, C. Schmoor, W. Sauerbrei, and M. Schumacher, 629 "Assessment and comparison of prognostic classification schemes for survival data," 630 Statistics in Medicine, vol. 18, no. 17-18, pp. 2529–2545, 1999. 631 """ 632 test_event, test_time = check_y_survival(survival_test) 633 estimate, times = _check_estimate_2d( 634 estimate, test_time, times, estimator="brier_score") 635 if estimate.ndim == 1 and times.shape[0] == 1: 636 estimate = estimate.reshape(-1, 1) 637 638 # fit IPCW estimator 639 cens = CensoringDistributionEstimator().fit(survival_train) 640 # calculate inverse probability of censoring weight at current time point t. 641 prob_cens_t = cens.predict_proba(times) 642 prob_cens_t[prob_cens_t == 0] = np.inf 643 # calculate inverse probability of censoring weights at observed time point 644 prob_cens_y = cens.predict_proba(test_time) 645 prob_cens_y[prob_cens_y == 0] = np.inf 646 647 # Calculating the brier scores at each time point 648 brier_scores = np.empty(times.shape[0], dtype=float) 649 for i, t in enumerate(times): 650 est = estimate[:, i] 651 is_case = (test_time <= t) & test_event 652 is_control = test_time > t 653 654 brier_scores[i] = np.mean( 655 np.square(est) * is_case.astype(int) / prob_cens_y 656 + np.square(1.0 - est) * is_control.astype(int) / prob_cens_t[i] 657 ) 658 659 return times, brier_scores
Estimate the time-dependent Brier score for right censored data.
The time-dependent Brier score is the mean squared error at time point \( t \):
$$\mathrm{BS}^c(t) = \frac{1}{n} \sum_{i=1}^n I(y_i \leq t \land \delta_i = 1) \frac{(0 - \hat{\pi}(t | \mathbf{x}_i))^2}{\hat{G}(y_i)} + I(y_i > t) \frac{(1 - \hat{\pi}(t | \mathbf{x}_i))^2}{\hat{G}(t)} ,$$
where \( \hat{\pi}(t | \mathbf{x}) \) is the predicted probability of remaining event-free up to time point \( t \) for a feature vector \( \mathbf{x} \), and \( 1/\hat{G}(t) \) is a inverse probability of censoring weight, estimated by the Kaplan-Meier estimator.
See the :ref:User Guide </user_guide/evaluating-survival-models.ipynb#Time-dependent-Brier-Score>
and 1 for details.
Parameters
survival_train : structured array, shape = (n_train_samples,) Survival times for training data to estimate the censoring distribution from. A structured array containing the binary event indicator as first field, and time of event or time of censoring as second field.
survival_test : structured array, shape = (n_samples,) Survival times of test data. A structured array containing the binary event indicator as first field, and time of event or time of censoring as second field.
estimate : array-like, shape = (n_samples, n_times)
Estimated probability of remaining event-free at time points
specified by times. The value of estimate[i] must correspond to
the estimated probability of remaining event-free up to the time point
times[i]. Typically, estimated probabilities are obtained via the
survival function returned by an estimator's
predict_survival_function method.
times : array-like, shape = (n_times,)
The time points for which to estimate the Brier score.
Values must be within the range of follow-up times of
the test data survival_test.
Returns
times : array, shape = (n_times,) Unique time points at which the brier scores was estimated.
brier_scores : array , shape = (n_times,) Values of the brier score.
Examples
>>> from survivalist.datasets import load_gbsg2
>>> from survivalist.linear_model import CoxPHSurvivalAnalysis
>>> from survivalist.metrics import brier_score
>>> from survivalist.preprocessing import OneHotEncoder
Load and prepare data.
>>> X, y = load_gbsg2()
>>> X.loc[:, "tgrade"] = X.loc[:, "tgrade"].map(len).astype(int)
>>> Xt = OneHotEncoder().fit_transform(X)
Fit a Cox model.
>>> est = CoxPHSurvivalAnalysis(ties="efron").fit(Xt, y)
Retrieve individual survival functions and get probability of remaining event free up to 5 years (=1825 days).
>>> survs = est.predict_survival_function(Xt)
>>> preds = [fn(1825) for fn in survs]
Compute the Brier score at 5 years.
>>> times, score = brier_score(y, y, preds, 1825)
>>> print(score)
[0.20881843]
See also
integrated_brier_score Computes the average Brier score over all time points.
References
-
E. Graf, C. Schmoor, W. Sauerbrei, and M. Schumacher, "Assessment and comparison of prognostic classification schemes for survival data," Statistics in Medicine, vol. 18, no. 17-18, pp. 2529–2545, 1999. ↩
167def concordance_index_censored(event_indicator, event_time, estimate, tied_tol=1e-8): 168 """Concordance index for right-censored data 169 170 The concordance index is defined as the proportion of all comparable pairs 171 in which the predictions and outcomes are concordant. 172 173 Two samples are comparable if (i) both of them experienced an event (at different times), 174 or (ii) the one with a shorter observed survival time experienced an event, in which case 175 the event-free subject "outlived" the other. A pair is not comparable if they experienced 176 events at the same time. 177 178 Concordance intuitively means that two samples were ordered correctly by the model. 179 More specifically, two samples are concordant, if the one with a higher estimated 180 risk score has a shorter actual survival time. 181 When predicted risks are identical for a pair, 0.5 rather than 1 is added to the count 182 of concordant pairs. 183 184 See the :ref:`User Guide </user_guide/evaluating-survival-models.ipynb>` 185 and [1]_ for further description. 186 187 Parameters 188 ---------- 189 event_indicator : array-like, shape = (n_samples,) 190 Boolean array denotes whether an event occurred 191 192 event_time : array-like, shape = (n_samples,) 193 Array containing the time of an event or time of censoring 194 195 estimate : array-like, shape = (n_samples,) 196 Estimated risk of experiencing an event 197 198 tied_tol : float, optional, default: 1e-8 199 The tolerance value for considering ties. 200 If the absolute difference between risk scores is smaller 201 or equal than `tied_tol`, risk scores are considered tied. 202 203 Returns 204 ------- 205 cindex : float 206 Concordance index 207 208 concordant : int 209 Number of concordant pairs 210 211 discordant : int 212 Number of discordant pairs 213 214 tied_risk : int 215 Number of pairs having tied estimated risks 216 217 tied_time : int 218 Number of comparable pairs sharing the same time 219 220 See also 221 -------- 222 concordance_index_ipcw 223 Alternative estimator of the concordance index with less bias. 224 225 References 226 ---------- 227 .. [1] Harrell, F.E., Califf, R.M., Pryor, D.B., Lee, K.L., Rosati, R.A, 228 "Multivariable prognostic models: issues in developing models, 229 evaluating assumptions and adequacy, and measuring and reducing errors", 230 Statistics in Medicine, 15(4), 361-87, 1996. 231 """ 232 event_indicator, event_time, estimate = _check_inputs( 233 event_indicator, event_time, estimate) 234 235 w = np.ones_like(estimate) 236 237 return _estimate_concordance_index(event_indicator, event_time, estimate, w, tied_tol)
Concordance index for right-censored data
The concordance index is defined as the proportion of all comparable pairs in which the predictions and outcomes are concordant.
Two samples are comparable if (i) both of them experienced an event (at different times), or (ii) the one with a shorter observed survival time experienced an event, in which case the event-free subject "outlived" the other. A pair is not comparable if they experienced events at the same time.
Concordance intuitively means that two samples were ordered correctly by the model. More specifically, two samples are concordant, if the one with a higher estimated risk score has a shorter actual survival time. When predicted risks are identical for a pair, 0.5 rather than 1 is added to the count of concordant pairs.
See the :ref:User Guide </user_guide/evaluating-survival-models.ipynb>
and 1 for further description.
Parameters
event_indicator : array-like, shape = (n_samples,) Boolean array denotes whether an event occurred
event_time : array-like, shape = (n_samples,) Array containing the time of an event or time of censoring
estimate : array-like, shape = (n_samples,) Estimated risk of experiencing an event
tied_tol : float, optional, default: 1e-8
The tolerance value for considering ties.
If the absolute difference between risk scores is smaller
or equal than tied_tol, risk scores are considered tied.
Returns
cindex : float Concordance index
concordant : int Number of concordant pairs
discordant : int Number of discordant pairs
tied_risk : int Number of pairs having tied estimated risks
tied_time : int Number of comparable pairs sharing the same time
See also
concordance_index_ipcw Alternative estimator of the concordance index with less bias.
References
-
Harrell, F.E., Califf, R.M., Pryor, D.B., Lee, K.L., Rosati, R.A, "Multivariable prognostic models: issues in developing models, evaluating assumptions and adequacy, and measuring and reducing errors", Statistics in Medicine, 15(4), 361-87, 1996. ↩
240def concordance_index_ipcw(survival_train, survival_test, estimate, tau=None, tied_tol=1e-8): 241 """Concordance index for right-censored data based on inverse probability of censoring weights. 242 243 This is an alternative to the estimator in :func:`concordance_index_censored` 244 that does not depend on the distribution of censoring times in the test data. 245 Therefore, the estimate is unbiased and consistent for a population concordance 246 measure that is free of censoring. 247 248 It is based on inverse probability of censoring weights, thus requires 249 access to survival times from the training data to estimate the censoring 250 distribution. Note that this requires that survival times `survival_test` 251 lie within the range of survival times `survival_train`. This can be 252 achieved by specifying the truncation time `tau`. 253 The resulting `cindex` tells how well the given prediction model works in 254 predicting events that occur in the time range from 0 to `tau`. 255 256 The estimator uses the Kaplan-Meier estimator to estimate the 257 censoring survivor function. Therefore, it is restricted to 258 situations where the random censoring assumption holds and 259 censoring is independent of the features. 260 261 See the :ref:`User Guide </user_guide/evaluating-survival-models.ipynb>` 262 and [1]_ for further description. 263 264 Parameters 265 ---------- 266 survival_train : structured array, shape = (n_train_samples,) 267 Survival times for training data to estimate the censoring 268 distribution from. 269 A structured array containing the binary event indicator 270 as first field, and time of event or time of censoring as 271 second field. 272 273 survival_test : structured array, shape = (n_samples,) 274 Survival times of test data. 275 A structured array containing the binary event indicator 276 as first field, and time of event or time of censoring as 277 second field. 278 279 estimate : array-like, shape = (n_samples,) 280 Estimated risk of experiencing an event of test data. 281 282 tau : float, optional 283 Truncation time. The survival function for the underlying 284 censoring time distribution :math:`D` needs to be positive 285 at `tau`, i.e., `tau` should be chosen such that the 286 probability of being censored after time `tau` is non-zero: 287 :math:`P(D > \\tau) > 0`. If `None`, no truncation is performed. 288 289 tied_tol : float, optional, default: 1e-8 290 The tolerance value for considering ties. 291 If the absolute difference between risk scores is smaller 292 or equal than `tied_tol`, risk scores are considered tied. 293 294 Returns 295 ------- 296 cindex : float 297 Concordance index 298 299 concordant : int 300 Number of concordant pairs 301 302 discordant : int 303 Number of discordant pairs 304 305 tied_risk : int 306 Number of pairs having tied estimated risks 307 308 tied_time : int 309 Number of comparable pairs sharing the same time 310 311 See also 312 -------- 313 concordance_index_censored 314 Simpler estimator of the concordance index. 315 316 as_concordance_index_ipcw_scorer 317 Wrapper class that uses :func:`concordance_index_ipcw` 318 in its ``score`` method instead of the default 319 :func:`concordance_index_censored`. 320 321 References 322 ---------- 323 .. [1] Uno, H., Cai, T., Pencina, M. J., D’Agostino, R. B., & Wei, L. J. (2011). 324 "On the C-statistics for evaluating overall adequacy of risk prediction 325 procedures with censored survival data". 326 Statistics in Medicine, 30(10), 1105–1117. 327 """ 328 test_event, test_time = check_y_survival(survival_test) 329 330 if tau is not None: 331 mask = test_time < tau 332 survival_test = survival_test[mask] 333 334 estimate = _check_estimate_1d(estimate, test_time) 335 336 cens = CensoringDistributionEstimator() 337 cens.fit(survival_train) 338 ipcw_test = cens.predict_ipcw(survival_test) 339 if tau is None: 340 ipcw = ipcw_test 341 else: 342 ipcw = np.empty(estimate.shape[0], dtype=ipcw_test.dtype) 343 ipcw[mask] = ipcw_test 344 ipcw[~mask] = 0 345 346 w = np.square(ipcw) 347 348 return _estimate_concordance_index(test_event, test_time, estimate, w, tied_tol)
Concordance index for right-censored data based on inverse probability of censoring weights.
This is an alternative to the estimator in concordance_index_censored()
that does not depend on the distribution of censoring times in the test data.
Therefore, the estimate is unbiased and consistent for a population concordance
measure that is free of censoring.
It is based on inverse probability of censoring weights, thus requires
access to survival times from the training data to estimate the censoring
distribution. Note that this requires that survival times survival_test
lie within the range of survival times survival_train. This can be
achieved by specifying the truncation time tau.
The resulting cindex tells how well the given prediction model works in
predicting events that occur in the time range from 0 to tau.
The estimator uses the Kaplan-Meier estimator to estimate the censoring survivor function. Therefore, it is restricted to situations where the random censoring assumption holds and censoring is independent of the features.
See the :ref:User Guide </user_guide/evaluating-survival-models.ipynb>
and 1 for further description.
Parameters
survival_train : structured array, shape = (n_train_samples,) Survival times for training data to estimate the censoring distribution from. A structured array containing the binary event indicator as first field, and time of event or time of censoring as second field.
survival_test : structured array, shape = (n_samples,) Survival times of test data. A structured array containing the binary event indicator as first field, and time of event or time of censoring as second field.
estimate : array-like, shape = (n_samples,) Estimated risk of experiencing an event of test data.
tau : float, optional
Truncation time. The survival function for the underlying
censoring time distribution \( D \) needs to be positive
at tau, i.e., tau should be chosen such that the
probability of being censored after time tau is non-zero:
\( P(D > \tau) > 0 \). If None, no truncation is performed.
tied_tol : float, optional, default: 1e-8
The tolerance value for considering ties.
If the absolute difference between risk scores is smaller
or equal than tied_tol, risk scores are considered tied.
Returns
cindex : float Concordance index
concordant : int Number of concordant pairs
discordant : int Number of discordant pairs
tied_risk : int Number of pairs having tied estimated risks
tied_time : int Number of comparable pairs sharing the same time
See also
concordance_index_censored Simpler estimator of the concordance index.
as_concordance_index_ipcw_scorer
Wrapper class that uses concordance_index_ipcw()
in its score method instead of the default
concordance_index_censored().
References
-
Uno, H., Cai, T., Pencina, M. J., D’Agostino, R. B., & Wei, L. J. (2011). "On the C-statistics for evaluating overall adequacy of risk prediction procedures with censored survival data". Statistics in Medicine, 30(10), 1105–1117. ↩
351def cumulative_dynamic_auc(survival_train, survival_test, estimate, times, tied_tol=1e-8): 352 """Estimator of cumulative/dynamic AUC for right-censored time-to-event data. 353 354 The receiver operating characteristic (ROC) curve and the area under the 355 ROC curve (AUC) can be extended to survival data by defining 356 sensitivity (true positive rate) and specificity (true negative rate) 357 as time-dependent measures. *Cumulative cases* are all individuals that 358 experienced an event prior to or at time :math:`t` (:math:`t_i \\leq t`), 359 whereas *dynamic controls* are those with :math:`t_i > t`. 360 The associated cumulative/dynamic AUC quantifies how well a model can 361 distinguish subjects who fail by a given time (:math:`t_i \\leq t`) from 362 subjects who fail after this time (:math:`t_i > t`). 363 364 Given an estimator of the :math:`i`-th individual's risk score 365 :math:`\\hat{f}(\\mathbf{x}_i)`, the cumulative/dynamic AUC at time 366 :math:`t` is defined as 367 368 .. math:: 369 370 \\widehat{\\mathrm{AUC}}(t) = 371 \\frac{\\sum_{i=1}^n \\sum_{j=1}^n I(y_j > t) I(y_i \\leq t) \\omega_i 372 I(\\hat{f}(\\mathbf{x}_j) \\leq \\hat{f}(\\mathbf{x}_i))} 373 {(\\sum_{i=1}^n I(y_i > t)) (\\sum_{i=1}^n I(y_i \\leq t) \\omega_i)} 374 375 where :math:`\\omega_i` are inverse probability of censoring weights (IPCW). 376 377 To estimate IPCW, access to survival times from the training data is required 378 to estimate the censoring distribution. Note that this requires that survival 379 times `survival_test` lie within the range of survival times `survival_train`. 380 This can be achieved by specifying `times` accordingly, e.g. by setting 381 `times[-1]` slightly below the maximum expected follow-up time. 382 IPCW are computed using the Kaplan-Meier estimator, which is 383 restricted to situations where the random censoring assumption holds and 384 censoring is independent of the features. 385 386 This function can also be used to evaluate models with time-dependent predictions 387 :math:`\\hat{f}(\\mathbf{x}_i, t)`, such as :class:`survivalist.ensemble.RandomSurvivalForest` 388 (see :ref:`User Guide </user_guide/evaluating-survival-models.ipynb#Using-Time-dependent-Risk-Scores>`). 389 In this case, `estimate` must be a 2-d array where ``estimate[i, j]`` is the 390 predicted risk score for the i-th instance at time point ``times[j]``. 391 392 Finally, the function also provides a single summary measure that refers to the mean 393 of the :math:`\\mathrm{AUC}(t)` over the time range :math:`(\\tau_1, \\tau_2)`. 394 395 .. math:: 396 397 \\overline{\\mathrm{AUC}}(\\tau_1, \\tau_2) = 398 \\frac{1}{\\hat{S}(\\tau_1) - \\hat{S}(\\tau_2)} 399 \\int_{\\tau_1}^{\\tau_2} \\widehat{\\mathrm{AUC}}(t)\\,d \\hat{S}(t) 400 401 where :math:`\\hat{S}(t)` is the Kaplan–Meier estimator of the survival function. 402 403 See the :ref:`User Guide </user_guide/evaluating-survival-models.ipynb#Time-dependent-Area-under-the-ROC>`, 404 [1]_, [2]_, [3]_ for further description. 405 406 Parameters 407 ---------- 408 survival_train : structured array, shape = (n_train_samples,) 409 Survival times for training data to estimate the censoring 410 distribution from. 411 A structured array containing the binary event indicator 412 as first field, and time of event or time of censoring as 413 second field. 414 415 survival_test : structured array, shape = (n_samples,) 416 Survival times of test data. 417 A structured array containing the binary event indicator 418 as first field, and time of event or time of censoring as 419 second field. 420 421 estimate : array-like, shape = (n_samples,) or (n_samples, n_times) 422 Estimated risk of experiencing an event of test data. 423 If `estimate` is a 1-d array, the same risk score across all time 424 points is used. If `estimate` is a 2-d array, the risk scores in the 425 j-th column are used to evaluate the j-th time point. 426 427 times : array-like, shape = (n_times,) 428 The time points for which the area under the 429 time-dependent ROC curve is computed. Values must be 430 within the range of follow-up times of the test data 431 `survival_test`. 432 433 tied_tol : float, optional, default: 1e-8 434 The tolerance value for considering ties. 435 If the absolute difference between risk scores is smaller 436 or equal than `tied_tol`, risk scores are considered tied. 437 438 Returns 439 ------- 440 auc : array, shape = (n_times,) 441 The cumulative/dynamic AUC estimates (evaluated at `times`). 442 mean_auc : float 443 Summary measure referring to the mean cumulative/dynamic AUC 444 over the specified time range `(times[0], times[-1])`. 445 446 See also 447 -------- 448 as_cumulative_dynamic_auc_scorer 449 Wrapper class that uses :func:`cumulative_dynamic_auc` 450 in its ``score`` method instead of the default 451 :func:`concordance_index_censored`. 452 453 References 454 ---------- 455 .. [1] H. Uno, T. Cai, L. Tian, and L. J. Wei, 456 "Evaluating prediction rules for t-year survivors with censored regression models," 457 Journal of the American Statistical Association, vol. 102, pp. 527–537, 2007. 458 .. [2] H. Hung and C. T. Chiang, 459 "Estimation methods for time-dependent AUC models with survival data," 460 Canadian Journal of Statistics, vol. 38, no. 1, pp. 8–26, 2010. 461 .. [3] J. Lambert and S. Chevret, 462 "Summary measure of discrimination in survival models based on cumulative/dynamic time-dependent ROC curves," 463 Statistical Methods in Medical Research, 2014. 464 """ 465 test_event, test_time = check_y_survival(survival_test) 466 estimate, times = _check_estimate_2d( 467 estimate, test_time, times, estimator="cumulative_dynamic_auc") 468 469 n_samples = estimate.shape[0] 470 n_times = times.shape[0] 471 if estimate.ndim == 1: 472 estimate = np.broadcast_to( 473 estimate[:, np.newaxis], (n_samples, n_times)) 474 475 # fit and transform IPCW 476 cens = CensoringDistributionEstimator() 477 cens.fit(survival_train) 478 ipcw = cens.predict_ipcw(survival_test) 479 480 # expand arrays to (n_samples, n_times) shape 481 test_time = np.broadcast_to(test_time[:, np.newaxis], (n_samples, n_times)) 482 test_event = np.broadcast_to( 483 test_event[:, np.newaxis], (n_samples, n_times)) 484 times_2d = np.broadcast_to(times, (n_samples, n_times)) 485 ipcw = np.broadcast_to(ipcw[:, np.newaxis], (n_samples, n_times)) 486 487 # sort each time point (columns) by risk score (descending) 488 o = np.argsort(-estimate, axis=0) 489 test_time = np.take_along_axis(test_time, o, axis=0) 490 test_event = np.take_along_axis(test_event, o, axis=0) 491 estimate = np.take_along_axis(estimate, o, axis=0) 492 ipcw = np.take_along_axis(ipcw, o, axis=0) 493 494 is_case = (test_time <= times_2d) & test_event 495 is_control = test_time > times_2d 496 n_controls = is_control.sum(axis=0) 497 498 # prepend row of infinity values 499 estimate_diff = np.concatenate( 500 (np.broadcast_to(np.inf, (1, n_times)), estimate)) 501 is_tied = np.absolute(np.diff(estimate_diff, axis=0)) <= tied_tol 502 503 cumsum_tp = np.cumsum(is_case * ipcw, axis=0) 504 cumsum_fp = np.cumsum(is_control, axis=0) 505 true_pos = cumsum_tp / cumsum_tp[-1] 506 false_pos = cumsum_fp / n_controls 507 508 scores = np.empty(n_times, dtype=float) 509 it = np.nditer((true_pos, false_pos, is_tied), 510 order="F", flags=["external_loop"]) 511 with it: 512 for i, (tp, fp, mask) in enumerate(it): 513 idx = np.flatnonzero(mask) - 1 514 # only keep the last estimate for tied risk scores 515 tp_no_ties = np.delete(tp, idx) 516 fp_no_ties = np.delete(fp, idx) 517 # Add an extra threshold position 518 # to make sure that the curve starts at (0, 0) 519 tp_no_ties = np.r_[0, tp_no_ties] 520 fp_no_ties = np.r_[0, fp_no_ties] 521 scores[i] = np.trapezoid(tp_no_ties, fp_no_ties) 522 523 if n_times == 1: 524 mean_auc = scores[0] 525 else: 526 surv = SurvivalFunctionEstimator() 527 surv.fit(survival_test) 528 s_times = surv.predict_proba(times) 529 # compute integral of AUC over survival function 530 d = -np.diff(np.r_[1.0, s_times]) 531 integral = (scores * d).sum() 532 mean_auc = integral / (1.0 - s_times[-1]) 533 534 return scores, mean_auc
Estimator of cumulative/dynamic AUC for right-censored time-to-event data.
The receiver operating characteristic (ROC) curve and the area under the ROC curve (AUC) can be extended to survival data by defining sensitivity (true positive rate) and specificity (true negative rate) as time-dependent measures. Cumulative cases are all individuals that experienced an event prior to or at time \( t \) (\( t_i \leq t \)), whereas dynamic controls are those with \( t_i > t \). The associated cumulative/dynamic AUC quantifies how well a model can distinguish subjects who fail by a given time (\( t_i \leq t \)) from subjects who fail after this time (\( t_i > t \)).
Given an estimator of the \( i \)-th individual's risk score \( \hat{f}(\mathbf{x}_i) \), the cumulative/dynamic AUC at time \( t \) is defined as
$$\widehat{\mathrm{AUC}}(t) = \frac{\sum_{i=1}^n \sum_{j=1}^n I(y_j > t) I(y_i \leq t) \omega_i I(\hat{f}(\mathbf{x}_j) \leq \hat{f}(\mathbf{x}_i))} {(\sum_{i=1}^n I(y_i > t)) (\sum_{i=1}^n I(y_i \leq t) \omega_i)}$$
where \( \omega_i \) are inverse probability of censoring weights (IPCW).
To estimate IPCW, access to survival times from the training data is required
to estimate the censoring distribution. Note that this requires that survival
times survival_test lie within the range of survival times survival_train.
This can be achieved by specifying times accordingly, e.g. by setting
times[-1] slightly below the maximum expected follow-up time.
IPCW are computed using the Kaplan-Meier estimator, which is
restricted to situations where the random censoring assumption holds and
censoring is independent of the features.
This function can also be used to evaluate models with time-dependent predictions
\( \hat{f}(\mathbf{x}_i, t) \), such as survivalist.ensemble.RandomSurvivalForest
(see :ref:User Guide </user_guide/evaluating-survival-models.ipynb#Using-Time-dependent-Risk-Scores>).
In this case, estimate must be a 2-d array where estimate[i, j] is the
predicted risk score for the i-th instance at time point times[j].
Finally, the function also provides a single summary measure that refers to the mean of the \( \mathrm{AUC}(t) \) over the time range \( (\tau_1, \tau_2) \).
$$\overline{\mathrm{AUC}}(\tau_1, \tau_2) = \frac{1}{\hat{S}(\tau_1) - \hat{S}(\tau_2)} \int_{\tau_1}^{\tau_2} \widehat{\mathrm{AUC}}(t)\,d \hat{S}(t)$$
where \( \hat{S}(t) \) is the Kaplan–Meier estimator of the survival function.
See the :ref:User Guide </user_guide/evaluating-survival-models.ipynb#Time-dependent-Area-under-the-ROC>,
1, 2, 3 for further description.
Parameters
survival_train : structured array, shape = (n_train_samples,) Survival times for training data to estimate the censoring distribution from. A structured array containing the binary event indicator as first field, and time of event or time of censoring as second field.
survival_test : structured array, shape = (n_samples,) Survival times of test data. A structured array containing the binary event indicator as first field, and time of event or time of censoring as second field.
estimate : array-like, shape = (n_samples,) or (n_samples, n_times)
Estimated risk of experiencing an event of test data.
If estimate is a 1-d array, the same risk score across all time
points is used. If estimate is a 2-d array, the risk scores in the
j-th column are used to evaluate the j-th time point.
times : array-like, shape = (n_times,)
The time points for which the area under the
time-dependent ROC curve is computed. Values must be
within the range of follow-up times of the test data
survival_test.
tied_tol : float, optional, default: 1e-8
The tolerance value for considering ties.
If the absolute difference between risk scores is smaller
or equal than tied_tol, risk scores are considered tied.
Returns
auc : array, shape = (n_times,)
The cumulative/dynamic AUC estimates (evaluated at times).
mean_auc : float
Summary measure referring to the mean cumulative/dynamic AUC
over the specified time range (times[0], times[-1]).
See also
as_cumulative_dynamic_auc_scorer
Wrapper class that uses cumulative_dynamic_auc()
in its score method instead of the default
concordance_index_censored().
References
-
H. Uno, T. Cai, L. Tian, and L. J. Wei, "Evaluating prediction rules for t-year survivors with censored regression models," Journal of the American Statistical Association, vol. 102, pp. 527–537, 2007. ↩
-
H. Hung and C. T. Chiang, "Estimation methods for time-dependent AUC models with survival data," Canadian Journal of Statistics, vol. 38, no. 1, pp. 8–26, 2010. ↩
-
J. Lambert and S. Chevret, "Summary measure of discrimination in survival models based on cumulative/dynamic time-dependent ROC curves," Statistical Methods in Medical Research, 2014. ↩
662def integrated_brier_score(survival_train, survival_test, estimate, times): 663 """The Integrated Brier Score (IBS) provides an overall calculation of 664 the model performance at all available times :math:`t_1 \\leq t \\leq t_\\text{max}`. 665 666 The integrated time-dependent Brier score over the interval 667 :math:`[t_1; t_\\text{max}]` is defined as 668 669 .. math:: 670 671 \\mathrm{IBS} = \\int_{t_1}^{t_\\text{max}} \\mathrm{BS}^c(t) d w(t) 672 673 where the weighting function is :math:`w(t) = t / t_\\text{max}`. 674 The integral is estimated via the trapezoidal rule. 675 676 See the :ref:`User Guide </user_guide/evaluating-survival-models.ipynb#Time-dependent-Brier-Score>` 677 and [1]_ for further details. 678 679 Parameters 680 ---------- 681 survival_train : structured array, shape = (n_train_samples,) 682 Survival times for training data to estimate the censoring 683 distribution from. 684 A structured array containing the binary event indicator 685 as first field, and time of event or time of censoring as 686 second field. 687 688 survival_test : structured array, shape = (n_samples,) 689 Survival times of test data. 690 A structured array containing the binary event indicator 691 as first field, and time of event or time of censoring as 692 second field. 693 694 estimate : array-like, shape = (n_samples, n_times) 695 Estimated probability of remaining event-free at time points 696 specified by `times`. The value of ``estimate[i]`` must correspond to 697 the estimated probability of remaining event-free up to the time point 698 ``times[i]``. Typically, estimated probabilities are obtained via the 699 survival function returned by an estimator's 700 ``predict_survival_function`` method. 701 702 times : array-like, shape = (n_times,) 703 The time points for which to estimate the Brier score. 704 Values must be within the range of follow-up times of 705 the test data `survival_test`. 706 707 Returns 708 ------- 709 ibs : float 710 The integrated Brier score. 711 712 Examples 713 -------- 714 >>> import numpy as np 715 >>> from survivalist.datasets import load_gbsg2 716 >>> from survivalist.linear_model import CoxPHSurvivalAnalysis 717 >>> from survivalist.metrics import integrated_brier_score 718 >>> from survivalist.preprocessing import OneHotEncoder 719 720 Load and prepare data. 721 722 >>> X, y = load_gbsg2() 723 >>> X.loc[:, "tgrade"] = X.loc[:, "tgrade"].map(len).astype(int) 724 >>> Xt = OneHotEncoder().fit_transform(X) 725 726 Fit a Cox model. 727 728 >>> est = CoxPHSurvivalAnalysis(ties="efron").fit(Xt, y) 729 730 Retrieve individual survival functions and get probability 731 of remaining event free from 1 year to 5 years (=1825 days). 732 733 >>> survs = est.predict_survival_function(Xt) 734 >>> times = np.arange(365, 1826) 735 >>> preds = np.asarray([[fn(t) for t in times] for fn in survs]) 736 737 Compute the integrated Brier score from 1 to 5 years. 738 739 >>> score = integrated_brier_score(y, y, preds, times) 740 >>> print(score) 741 0.1815853064627424 742 743 See also 744 -------- 745 brier_score 746 Computes the Brier score at specified time points. 747 748 as_integrated_brier_score_scorer 749 Wrapper class that uses :func:`integrated_brier_score` 750 in its ``score`` method instead of the default 751 :func:`concordance_index_censored`. 752 753 References 754 ---------- 755 .. [1] E. Graf, C. Schmoor, W. Sauerbrei, and M. Schumacher, 756 "Assessment and comparison of prognostic classification schemes for survival data," 757 Statistics in Medicine, vol. 18, no. 17-18, pp. 2529–2545, 1999. 758 """ 759 # Computing the brier scores 760 times, brier_scores = brier_score( 761 survival_train, survival_test, estimate, times) 762 763 if times.shape[0] < 2: 764 raise ValueError("At least two time points must be given") 765 766 # Computing the IBS 767 ibs_value = np.trapezoid(brier_scores, times) / (times[-1] - times[0]) 768 769 return ibs_value
The Integrated Brier Score (IBS) provides an overall calculation of the model performance at all available times \( t_1 \leq t \leq t_\text{max} \).
The integrated time-dependent Brier score over the interval \( [t_1; t_\text{max}] \) is defined as
$$\mathrm{IBS} = \int_{t_1}^{t_\text{max}} \mathrm{BS}^c(t) d w(t)$$
where the weighting function is \( w(t) = t / t_\text{max} \). The integral is estimated via the trapezoidal rule.
See the :ref:User Guide </user_guide/evaluating-survival-models.ipynb#Time-dependent-Brier-Score>
and 1 for further details.
Parameters
survival_train : structured array, shape = (n_train_samples,) Survival times for training data to estimate the censoring distribution from. A structured array containing the binary event indicator as first field, and time of event or time of censoring as second field.
survival_test : structured array, shape = (n_samples,) Survival times of test data. A structured array containing the binary event indicator as first field, and time of event or time of censoring as second field.
estimate : array-like, shape = (n_samples, n_times)
Estimated probability of remaining event-free at time points
specified by times. The value of estimate[i] must correspond to
the estimated probability of remaining event-free up to the time point
times[i]. Typically, estimated probabilities are obtained via the
survival function returned by an estimator's
predict_survival_function method.
times : array-like, shape = (n_times,)
The time points for which to estimate the Brier score.
Values must be within the range of follow-up times of
the test data survival_test.
Returns
ibs : float The integrated Brier score.
Examples
>>> import numpy as np
>>> from survivalist.datasets import load_gbsg2
>>> from survivalist.linear_model import CoxPHSurvivalAnalysis
>>> from survivalist.metrics import integrated_brier_score
>>> from survivalist.preprocessing import OneHotEncoder
Load and prepare data.
>>> X, y = load_gbsg2()
>>> X.loc[:, "tgrade"] = X.loc[:, "tgrade"].map(len).astype(int)
>>> Xt = OneHotEncoder().fit_transform(X)
Fit a Cox model.
>>> est = CoxPHSurvivalAnalysis(ties="efron").fit(Xt, y)
Retrieve individual survival functions and get probability of remaining event free from 1 year to 5 years (=1825 days).
>>> survs = est.predict_survival_function(Xt)
>>> times = np.arange(365, 1826)
>>> preds = np.asarray([[fn(t) for t in times] for fn in survs])
Compute the integrated Brier score from 1 to 5 years.
>>> score = integrated_brier_score(y, y, preds, times)
>>> print(score)
0.1815853064627424
See also
brier_score Computes the Brier score at specified time points.
as_integrated_brier_score_scorer
Wrapper class that uses integrated_brier_score()
in its score method instead of the default
concordance_index_censored().
References
-
E. Graf, C. Schmoor, W. Sauerbrei, and M. Schumacher, "Assessment and comparison of prognostic classification schemes for survival data," Statistics in Medicine, vol. 18, no. 17-18, pp. 2529–2545, 1999. ↩