survivalist.utils

1from .simulation import simulate_replications
2
3__all__ = ["simulate_replications"]
def simulate_replications(data, method='bootstrap', num_replications=10, n_obs=None, **kwargs):
 84def simulate_replications(data, method="bootstrap", num_replications=10, n_obs=None, **kwargs):
 85    """
 86    Create multiple replications of the input's distribution using a specified simulation method.
 87
 88    Parameters:
 89        data (array-like): Input vector of data.
 90        method (str): Method for simulation:
 91                      - 'bootstrap': Bootstrap resampling.
 92                      - 'kde': Kernel Density Estimation.
 93                      - 'normal': Parametric distribution fitting.
 94                      - 'ecdf': Empirical CDF-based sampling.
 95                      - 'permutation': Permutation resampling.
 96                      - 'smooth_bootstrap': Smoothed bootstrap with added noise.
 97        num_samples (int): Number of samples in each replication.
 98        num_replications (int): Number of replications to generate.
 99        n_obs (int): Number of observations to generate for each replication.
100        kwargs: Additional parameters for specific methods.
101
102    Returns:
103        pd.DataFrame: A DataFrame where each column represents a replication.
104    """
105
106    num_samples = len(data)
107
108    replications = []
109
110    for _ in range(num_replications):
111        simulated_data = simulate_distribution(
112            data, method=method, num_samples=num_samples, **kwargs)
113        replications.append(simulated_data)
114
115    # Combine replications into a DataFrame
116    replications_df = pd.DataFrame(replications).transpose()
117    replications_df.columns = [
118        f"Replication_{i+1}" for i in range(num_replications)]
119
120    # If n_obs is specified, sample n_obs from each replication
121    if n_obs is not None:
122        replications_df = replications_df.sample(
123            n=n_obs, replace=True, random_state=42).reset_index(drop=True)
124        return replications_df.values
125
126    return replications_df.values

Create multiple replications of the input's distribution using a specified simulation method.

Parameters: data (array-like): Input vector of data. method (str): Method for simulation: - 'bootstrap': Bootstrap resampling. - 'kde': Kernel Density Estimation. - 'normal': Parametric distribution fitting. - 'ecdf': Empirical CDF-based sampling. - 'permutation': Permutation resampling. - 'smooth_bootstrap': Smoothed bootstrap with added noise. num_samples (int): Number of samples in each replication. num_replications (int): Number of replications to generate. n_obs (int): Number of observations to generate for each replication. kwargs: Additional parameters for specific methods.

Returns: pd.DataFrame: A DataFrame where each column represents a replication.