nnetsauce

  1from .base.base import Base
  2from .base.baseRegressor import BaseRegressor
  3from .boosting.adaBoostClassifier import AdaBoostClassifier
  4from .custom.customClassifier import CustomClassifier
  5from .custom.customRegressor import CustomRegressor
  6from .custom.customBackpropRegressor import CustomBackPropRegressor
  7from .datasets import Downloader
  8from .deep.deepClassifier import DeepClassifier
  9from .deep.deepRegressor import DeepRegressor
 10from .deep.deepMTS import DeepMTS
 11from .elasticnet2.enet2 import ElasticNet2Regressor
 12from .glm.glmClassifier import GLMClassifier
 13from .glm.glmRegressor import GLMRegressor
 14from .kernel.kernel import KernelRidge
 15from .lazypredict.lazydeepClassifier import LazyDeepClassifier, LazyClassifier
 16from .lazypredict.lazydeepRegressor import LazyDeepRegressor, LazyRegressor
 17from .lazypredict.lazydeepClassifier import LazyDeepClassifier
 18from .lazypredict.lazydeepRegressor import LazyDeepRegressor
 19from .lazypredict.lazydeepMTS import LazyDeepMTS, LazyMTS
 20from .mts.mts import MTS
 21from .mts.mlarch import MLARCH
 22from .mts.classical import ClassicalMTS
 23from .mts.stackedmts import MTSStacker
 24from .mts.multioutputmts import MultiOutputMTS
 25from .mts.discretetokenmts import DiscreteTokenMTS
 26from .multitask.multitaskClassifier import MultitaskClassifier
 27from .multitask.simplemultitaskClassifier import SimpleMultitaskClassifier
 28from .neuralnet.neuralnetregression import NeuralNetRegressor
 29from .neuralnet.neuralnetclassification import NeuralNetClassifier
 30from .optimizers.optimizer import Optimizer
 31from .predictioninterval import PredictionInterval
 32from .predictionset import PredictionSet
 33from .quantile.quantileregression import QuantileRegressor
 34from .quantile.quantileclassification import QuantileClassifier
 35from .randombag.randomBagClassifier import RandomBagClassifier
 36from .randombag.randomBagRegressor import RandomBagRegressor
 37from .randomfourier.randomfourier import RandomFourierEstimator
 38from .rff.rffridge import (
 39    RandomFourierFeaturesRidge,
 40    RandomFourierFeaturesRidgeGCV,
 41)
 42from .ridge.ridge import RidgeRegressor
 43from .ridge2.ridge2Classifier import Ridge2Classifier
 44from .ridge2.ridge2Regressor import Ridge2Regressor
 45from .ridge2.ridge2MultitaskClassifier import Ridge2MultitaskClassifier
 46from .ridge2.ridge2MTSJAX import Ridge2Forecaster
 47from .ridge2.ridge2multioutputregressor import Ridge2MultiOutputRegressor
 48from .rvfl.bayesianrvflRegressor import BayesianRVFLRegressor
 49from .rvfl.bayesianrvfl2Regressor import BayesianRVFL2Regressor
 50from .sampling import SubSampler
 51from .updater import RegressorUpdater, ClassifierUpdater
 52from .votingregressor import MedianVotingRegressor
 53
 54__all__ = [
 55    "AdaBoostClassifier",
 56    "Base",
 57    "BaseRegressor",
 58    "BayesianRVFLRegressor",
 59    "BayesianRVFL2Regressor",
 60    "ClassicalMTS",
 61    "CustomClassifier",
 62    "CustomRegressor",
 63    "CustomBackPropRegressor",
 64    "DeepClassifier",
 65    "DeepRegressor",
 66    "DeepMTS",
 67    "DiscreteTokenMTS",
 68    "Downloader",
 69    "ElasticNet2Regressor",
 70    "GLMClassifier",
 71    "GLMRegressor",
 72    "KernelRidge",
 73    "LazyClassifier",
 74    "LazyRegressor",
 75    "LazyDeepClassifier",
 76    "LazyDeepRegressor",
 77    "LazyMTS",
 78    "LazyDeepMTS",
 79    "MLARCH",
 80    "MedianVotingRegressor",
 81    "MTS",
 82    "MTSStacker",
 83    "MultiOutputMTS",
 84    "MultitaskClassifier",
 85    "NeuralNetRegressor",
 86    "NeuralNetClassifier",
 87    "PredictionInterval",
 88    "PredictionSet",
 89    "SimpleMultitaskClassifier",
 90    "Optimizer",
 91    "QuantileRegressor",
 92    "QuantileClassifier",
 93    "RandomBagRegressor",
 94    "RandomBagClassifier",
 95    "RandomFourierEstimator",
 96    "RandomFourierFeaturesRidge",
 97    "RandomFourierFeaturesRidgeGCV",
 98    "RegressorUpdater",
 99    "ClassifierUpdater",
100    "RidgeRegressor",
101    "Ridge2Regressor",
102    "Ridge2MultiOutputRegressor",
103    "Ridge2Classifier",
104    "Ridge2MultitaskClassifier",
105    "Ridge2Forecaster",
106    "SubSampler",
107]
class AdaBoostClassifier(nnetsauce.boosting.bst.Boosting, sklearn.base.ClassifierMixin):
 21class AdaBoostClassifier(Boosting, ClassifierMixin):
 22    """AdaBoost Classification (SAMME) model class derived from class Boosting
 23
 24    Parameters:
 25
 26        obj: object
 27            any object containing a method fit (obj.fit()) and a method predict
 28            (obj.predict())
 29
 30        n_estimators: int
 31            number of boosting iterations
 32
 33        learning_rate: float
 34            learning rate of the boosting procedure
 35
 36        n_hidden_features: int
 37            number of nodes in the hidden layer
 38
 39        reg_lambda: float
 40            regularization parameter for weights
 41
 42        reg_alpha: float
 43            controls compromize between l1 and l2 norm of weights
 44
 45        activation_name: str
 46            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
 47
 48        a: float
 49            hyperparameter for 'prelu' or 'elu' activation function
 50
 51        nodes_sim: str
 52            type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
 53            'uniform'
 54
 55        bias: boolean
 56            indicates if the hidden layer contains a bias term (True) or not
 57            (False)
 58
 59        dropout: float
 60            regularization parameter; (random) percentage of nodes dropped out
 61            of the training
 62
 63        direct_link: boolean
 64            indicates if the original predictors are included (True) in model's
 65            fitting or not (False)
 66
 67        n_clusters: int
 68            number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
 69                no clustering)
 70
 71        cluster_encode: bool
 72            defines how the variable containing clusters is treated (default is one-hot)
 73            if `False`, then labels are used, without one-hot encoding
 74
 75        type_clust: str
 76            type of clustering method: currently k-means ('kmeans') or Gaussian
 77            Mixture Model ('gmm')
 78
 79        type_scaling: a tuple of 3 strings
 80            scaling methods for inputs, hidden layer, and clustering respectively
 81            (and when relevant).
 82            Currently available: standardization ('std') or MinMax scaling ('minmax')
 83
 84        col_sample: float
 85            percentage of covariates randomly chosen for training
 86
 87        row_sample: float
 88            percentage of rows chosen for training, by stratified bootstrapping
 89
 90        seed: int
 91            reproducibility seed for nodes_sim=='uniform'
 92
 93        verbose: int
 94            0 for no output, 1 for a progress bar (default is 1)
 95
 96        method: str
 97            type of Adaboost method, 'SAMME' (discrete) or 'SAMME.R' (real)
 98
 99        backend: str
100            "cpu" or "gpu" or "tpu"
101
102    Attributes:
103
104        alpha_: list
105            AdaBoost coefficients alpha_m
106
107        base_learners_: dict
108            a dictionary containing the base learners
109
110    Examples:
111
112    See also [https://github.com/Techtonique/nnetsauce/blob/master/examples/adaboost_classification.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/adaboost_classification.py)
113
114    ```python
115    import nnetsauce as ns
116    import numpy as np
117    from sklearn.datasets import load_breast_cancer
118    from sklearn.linear_model import LogisticRegression
119    from sklearn.model_selection import train_test_split
120    from sklearn import metrics
121    from time import time
122
123    breast_cancer = load_breast_cancer()
124    Z = breast_cancer.data
125    t = breast_cancer.target
126    np.random.seed(123)
127    X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2)
128
129    # SAMME.R
130    clf = LogisticRegression(solver='liblinear', multi_class = 'ovr',
131                            random_state=123)
132    fit_obj = ns.AdaBoostClassifier(clf,
133                                    n_hidden_features=int(11.22338867),
134                                    direct_link=True,
135                                    n_estimators=250, learning_rate=0.01126343,
136                                    col_sample=0.72684326, row_sample=0.86429443,
137                                    dropout=0.63078613, n_clusters=2,
138                                    type_clust="gmm",
139                                    verbose=1, seed = 123,
140                                    method="SAMME.R")
141
142    start = time()
143    fit_obj.fit(X_train, y_train)
144    print(f"Elapsed {time() - start}")
145
146    start = time()
147    print(fit_obj.score(X_test, y_test))
148    print(f"Elapsed {time() - start}")
149
150    preds = fit_obj.predict(X_test)
151
152    print(metrics.classification_report(preds, y_test))
153
154    ```
155
156    """
157
158    # construct the object -----
159    _estimator_type = "classifier"
160
161    def __init__(
162        self,
163        obj,
164        n_estimators=10,
165        learning_rate=0.1,
166        n_hidden_features=1,
167        reg_lambda=0,
168        reg_alpha=0.5,
169        activation_name="relu",
170        a=0.01,
171        nodes_sim="sobol",
172        bias=True,
173        dropout=0,
174        direct_link=False,
175        n_clusters=2,
176        cluster_encode=True,
177        type_clust="kmeans",
178        type_scaling=("std", "std", "std"),
179        col_sample=1,
180        row_sample=1,
181        seed=123,
182        verbose=1,
183        method="SAMME",
184        backend="cpu",
185    ):
186        self.type_fit = "classification"
187        self.verbose = verbose
188        self.method = method
189        self.reg_lambda = reg_lambda
190        self.reg_alpha = reg_alpha
191
192        super().__init__(
193            obj=obj,
194            n_estimators=n_estimators,
195            learning_rate=learning_rate,
196            n_hidden_features=n_hidden_features,
197            activation_name=activation_name,
198            a=a,
199            nodes_sim=nodes_sim,
200            bias=bias,
201            dropout=dropout,
202            direct_link=direct_link,
203            n_clusters=n_clusters,
204            cluster_encode=cluster_encode,
205            type_clust=type_clust,
206            type_scaling=type_scaling,
207            col_sample=col_sample,
208            row_sample=row_sample,
209            seed=seed,
210            backend=backend,
211        )
212
213        self.alpha_ = []
214        self.base_learners_ = dict.fromkeys(range(n_estimators))
215
216    def fit(self, X, y, sample_weight=None, **kwargs):
217        """Fit Boosting model to training data (X, y).
218
219        Parameters:
220
221            X: {array-like}, shape = [n_samples, n_features]
222                Training vectors, where n_samples is the number
223                of samples and n_features is the number of features.
224
225            y: array-like, shape = [n_samples]
226                Target values.
227
228            **kwargs: additional parameters to be passed to
229                    self.cook_training_set or self.obj.fit
230
231        Returns:
232
233             self: object
234        """
235
236        assert mx.is_factor(y), "y must contain only integers"
237
238        assert self.method in (
239            "SAMME",
240            "SAMME.R",
241        ), "`method` must be either 'SAMME' or 'SAMME.R'"
242
243        assert (self.reg_lambda <= 1) & (
244            self.reg_lambda >= 0
245        ), "must have self.reg_lambda <= 1 &  self.reg_lambda >= 0"
246
247        assert (self.reg_alpha <= 1) & (
248            self.reg_alpha >= 0
249        ), "must have self.reg_alpha <= 1 &  self.reg_alpha >= 0"
250
251        # training
252        n, p = X.shape
253        self.n_classes = len(np.unique(y))
254        self.classes_ = np.unique(y)  # for compatibility with sklearn
255        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
256
257        if sample_weight is None:
258            w_m = np.repeat(1.0 / n, n)
259        else:
260            w_m = np.asarray(sample_weight)
261
262        base_learner = CustomClassifier(
263            self.obj,
264            n_hidden_features=self.n_hidden_features,
265            activation_name=self.activation_name,
266            a=self.a,
267            nodes_sim=self.nodes_sim,
268            bias=self.bias,
269            dropout=self.dropout,
270            direct_link=self.direct_link,
271            n_clusters=self.n_clusters,
272            type_clust=self.type_clust,
273            type_scaling=self.type_scaling,
274            col_sample=self.col_sample,
275            row_sample=self.row_sample,
276            seed=self.seed,
277        )
278
279        if self.verbose == 1:
280            pbar = Progbar(self.n_estimators)
281
282        if self.method == "SAMME":
283            err_m = 1e6
284            err_bound = 1 - 1 / self.n_classes
285            self.alpha_.append(1.0)
286            x_range_n = range(n)
287
288            for m in range(self.n_estimators):
289                preds = base_learner.fit(
290                    X, y, sample_weight=w_m.ravel(), **kwargs
291                ).predict(X)
292
293                self.base_learners_.update({m: deepcopy(base_learner)})
294
295                cond = [y[i] != preds[i] for i in x_range_n]
296
297                err_m = max(
298                    sum([elt[0] * elt[1] for elt in zip(cond, w_m)]),
299                    2.220446049250313e-16,
300                )  # sum(w_m) == 1
301
302                if self.reg_lambda > 0:
303                    err_m += self.reg_lambda * (
304                        (1 - self.reg_alpha) * 0.5 * sum([x**2 for x in w_m])
305                        + self.reg_alpha * sum([abs(x) for x in w_m])
306                    )
307
308                err_m = min(err_m, err_bound)
309
310                alpha_m = self.learning_rate * log(
311                    (self.n_classes - 1) * (1 - err_m) / err_m
312                )
313
314                self.alpha_.append(alpha_m)
315
316                w_m_temp = [exp(alpha_m * cond[i]) for i in x_range_n]
317
318                sum_w_m = sum(w_m_temp)
319
320                w_m = np.asarray([w_m_temp[i] / sum_w_m for i in x_range_n])
321
322                base_learner.set_params(seed=self.seed + (m + 1) * 1000)
323
324                if self.verbose == 1:
325                    pbar.update(m)
326
327            if self.verbose == 1:
328                pbar.update(self.n_estimators)
329
330            self.n_estimators = len(self.base_learners_)
331            self.classes_ = np.unique(y)
332
333            return self
334
335        if self.method == "SAMME.R":
336            Y = mo.one_hot_encode2(y, self.n_classes)
337
338            if sample_weight is None:
339                w_m = np.repeat(1.0 / n, n)  # (N, 1)
340
341            else:
342                w_m = np.asarray(sample_weight)
343
344            for m in range(self.n_estimators):
345                probs = base_learner.fit(
346                    X, y, sample_weight=w_m.ravel(), **kwargs
347                ).predict_proba(X)
348
349                np.clip(
350                    a=probs, a_min=2.220446049250313e-16, a_max=1.0, out=probs
351                )
352
353                self.base_learners_.update({m: deepcopy(base_learner)})
354
355                w_m *= np.exp(
356                    -1.0
357                    * self.learning_rate
358                    * (1.0 - 1.0 / self.n_classes)
359                    * xlogy(Y, probs).sum(axis=1)
360                )
361
362                w_m /= np.sum(w_m)
363
364                base_learner.set_params(seed=self.seed + (m + 1) * 1000)
365
366                if self.verbose == 1:
367                    pbar.update(m)
368
369            if self.verbose == 1:
370                pbar.update(self.n_estimators)
371
372            self.n_estimators = len(self.base_learners_)
373            self.classes_ = np.unique(y)
374
375            return self
376
377    def predict(self, X, **kwargs):
378        """Predict test data X.
379
380        Parameters:
381
382            X: {array-like}, shape = [n_samples, n_features]
383                Training vectors, where n_samples is the number
384                of samples and n_features is the number of features.
385
386            **kwargs: additional parameters to be passed to
387                  self.cook_test_set
388
389        Returns:
390
391            model predictions: {array-like}
392        """
393        return self.predict_proba(X, **kwargs).argmax(axis=1)
394
395    def predict_proba(self, X, **kwargs):
396        """Predict probabilities for test data X.
397
398        Parameters:
399
400            X: {array-like}, shape = [n_samples, n_features]
401                Training vectors, where n_samples is the number
402                of samples and n_features is the number of features.
403
404            **kwargs: additional parameters to be passed to
405                  self.cook_test_set
406
407        Returns:
408
409            probability estimates for test data: {array-like}
410
411        """
412
413        n_iter = len(self.base_learners_)
414
415        if self.method == "SAMME":
416            ensemble_learner = np.zeros((X.shape[0], self.n_classes))
417
418            # if self.verbose == 1:
419            #    pbar = Progbar(n_iter)
420
421            for idx, base_learner in self.base_learners_.items():
422                preds = base_learner.predict(X, **kwargs)
423
424                ensemble_learner += self.alpha_[idx] * mo.one_hot_encode2(
425                    preds, self.n_classes
426                )
427
428                # if self.verbose == 1:
429                #    pbar.update(idx)
430
431            # if self.verbose == 1:
432            #    pbar.update(n_iter)
433
434            expit_ensemble_learner = expit(ensemble_learner)
435
436            sum_ensemble = expit_ensemble_learner.sum(axis=1)
437
438            return expit_ensemble_learner / sum_ensemble[:, None]
439
440        # if self.method == "SAMME.R":
441        ensemble_learner = 0
442
443        # if self.verbose == 1:
444        #    pbar = Progbar(n_iter)
445
446        for idx, base_learner in self.base_learners_.items():
447            probs = base_learner.predict_proba(X, **kwargs)
448
449            np.clip(a=probs, a_min=2.220446049250313e-16, a_max=1.0, out=probs)
450
451            log_preds_proba = np.log(probs)
452
453            ensemble_learner += (
454                log_preds_proba - log_preds_proba.mean(axis=1)[:, None]
455            )
456
457            # if self.verbose == 1:
458            #    pbar.update(idx)
459
460        ensemble_learner *= self.n_classes - 1
461
462        # if self.verbose == 1:
463        #    pbar.update(n_iter)
464
465        expit_ensemble_learner = expit(ensemble_learner)
466
467        sum_ensemble = expit_ensemble_learner.sum(axis=1)
468
469        return expit_ensemble_learner / sum_ensemble[:, None]
470
471    @property
472    def _estimator_type(self):
473        return "classifier"

AdaBoost Classification (SAMME) model class derived from class Boosting

Parameters:

obj: object
    any object containing a method fit (obj.fit()) and a method predict
    (obj.predict())

n_estimators: int
    number of boosting iterations

learning_rate: float
    learning rate of the boosting procedure

n_hidden_features: int
    number of nodes in the hidden layer

reg_lambda: float
    regularization parameter for weights

reg_alpha: float
    controls compromize between l1 and l2 norm of weights

activation_name: str
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'

a: float
    hyperparameter for 'prelu' or 'elu' activation function

nodes_sim: str
    type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
    'uniform'

bias: boolean
    indicates if the hidden layer contains a bias term (True) or not
    (False)

dropout: float
    regularization parameter; (random) percentage of nodes dropped out
    of the training

direct_link: boolean
    indicates if the original predictors are included (True) in model's
    fitting or not (False)

n_clusters: int
    number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
        no clustering)

cluster_encode: bool
    defines how the variable containing clusters is treated (default is one-hot)
    if `False`, then labels are used, without one-hot encoding

type_clust: str
    type of clustering method: currently k-means ('kmeans') or Gaussian
    Mixture Model ('gmm')

type_scaling: a tuple of 3 strings
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax')

col_sample: float
    percentage of covariates randomly chosen for training

row_sample: float
    percentage of rows chosen for training, by stratified bootstrapping

seed: int
    reproducibility seed for nodes_sim=='uniform'

verbose: int
    0 for no output, 1 for a progress bar (default is 1)

method: str
    type of Adaboost method, 'SAMME' (discrete) or 'SAMME.R' (real)

backend: str
    "cpu" or "gpu" or "tpu"

Attributes:

alpha_: list
    AdaBoost coefficients alpha_m

base_learners_: dict
    a dictionary containing the base learners

Examples:

See also https://github.com/Techtonique/nnetsauce/blob/master/examples/adaboost_classification.py

import nnetsauce as ns
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics
from time import time

breast_cancer = load_breast_cancer()
Z = breast_cancer.data
t = breast_cancer.target
np.random.seed(123)
X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2)

# SAMME.R
clf = LogisticRegression(solver='liblinear', multi_class = 'ovr',
                        random_state=123)
fit_obj = ns.AdaBoostClassifier(clf,
                                n_hidden_features=int(11.22338867),
                                direct_link=True,
                                n_estimators=250, learning_rate=0.01126343,
                                col_sample=0.72684326, row_sample=0.86429443,
                                dropout=0.63078613, n_clusters=2,
                                type_clust="gmm",
                                verbose=1, seed = 123,
                                method="SAMME.R")

start = time()
fit_obj.fit(X_train, y_train)
print(f"Elapsed {time() - start}")

start = time()
print(fit_obj.score(X_test, y_test))
print(f"Elapsed {time() - start}")

preds = fit_obj.predict(X_test)

print(metrics.classification_report(preds, y_test))
def fit(self, X, y, sample_weight=None, **kwargs):
216    def fit(self, X, y, sample_weight=None, **kwargs):
217        """Fit Boosting model to training data (X, y).
218
219        Parameters:
220
221            X: {array-like}, shape = [n_samples, n_features]
222                Training vectors, where n_samples is the number
223                of samples and n_features is the number of features.
224
225            y: array-like, shape = [n_samples]
226                Target values.
227
228            **kwargs: additional parameters to be passed to
229                    self.cook_training_set or self.obj.fit
230
231        Returns:
232
233             self: object
234        """
235
236        assert mx.is_factor(y), "y must contain only integers"
237
238        assert self.method in (
239            "SAMME",
240            "SAMME.R",
241        ), "`method` must be either 'SAMME' or 'SAMME.R'"
242
243        assert (self.reg_lambda <= 1) & (
244            self.reg_lambda >= 0
245        ), "must have self.reg_lambda <= 1 &  self.reg_lambda >= 0"
246
247        assert (self.reg_alpha <= 1) & (
248            self.reg_alpha >= 0
249        ), "must have self.reg_alpha <= 1 &  self.reg_alpha >= 0"
250
251        # training
252        n, p = X.shape
253        self.n_classes = len(np.unique(y))
254        self.classes_ = np.unique(y)  # for compatibility with sklearn
255        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
256
257        if sample_weight is None:
258            w_m = np.repeat(1.0 / n, n)
259        else:
260            w_m = np.asarray(sample_weight)
261
262        base_learner = CustomClassifier(
263            self.obj,
264            n_hidden_features=self.n_hidden_features,
265            activation_name=self.activation_name,
266            a=self.a,
267            nodes_sim=self.nodes_sim,
268            bias=self.bias,
269            dropout=self.dropout,
270            direct_link=self.direct_link,
271            n_clusters=self.n_clusters,
272            type_clust=self.type_clust,
273            type_scaling=self.type_scaling,
274            col_sample=self.col_sample,
275            row_sample=self.row_sample,
276            seed=self.seed,
277        )
278
279        if self.verbose == 1:
280            pbar = Progbar(self.n_estimators)
281
282        if self.method == "SAMME":
283            err_m = 1e6
284            err_bound = 1 - 1 / self.n_classes
285            self.alpha_.append(1.0)
286            x_range_n = range(n)
287
288            for m in range(self.n_estimators):
289                preds = base_learner.fit(
290                    X, y, sample_weight=w_m.ravel(), **kwargs
291                ).predict(X)
292
293                self.base_learners_.update({m: deepcopy(base_learner)})
294
295                cond = [y[i] != preds[i] for i in x_range_n]
296
297                err_m = max(
298                    sum([elt[0] * elt[1] for elt in zip(cond, w_m)]),
299                    2.220446049250313e-16,
300                )  # sum(w_m) == 1
301
302                if self.reg_lambda > 0:
303                    err_m += self.reg_lambda * (
304                        (1 - self.reg_alpha) * 0.5 * sum([x**2 for x in w_m])
305                        + self.reg_alpha * sum([abs(x) for x in w_m])
306                    )
307
308                err_m = min(err_m, err_bound)
309
310                alpha_m = self.learning_rate * log(
311                    (self.n_classes - 1) * (1 - err_m) / err_m
312                )
313
314                self.alpha_.append(alpha_m)
315
316                w_m_temp = [exp(alpha_m * cond[i]) for i in x_range_n]
317
318                sum_w_m = sum(w_m_temp)
319
320                w_m = np.asarray([w_m_temp[i] / sum_w_m for i in x_range_n])
321
322                base_learner.set_params(seed=self.seed + (m + 1) * 1000)
323
324                if self.verbose == 1:
325                    pbar.update(m)
326
327            if self.verbose == 1:
328                pbar.update(self.n_estimators)
329
330            self.n_estimators = len(self.base_learners_)
331            self.classes_ = np.unique(y)
332
333            return self
334
335        if self.method == "SAMME.R":
336            Y = mo.one_hot_encode2(y, self.n_classes)
337
338            if sample_weight is None:
339                w_m = np.repeat(1.0 / n, n)  # (N, 1)
340
341            else:
342                w_m = np.asarray(sample_weight)
343
344            for m in range(self.n_estimators):
345                probs = base_learner.fit(
346                    X, y, sample_weight=w_m.ravel(), **kwargs
347                ).predict_proba(X)
348
349                np.clip(
350                    a=probs, a_min=2.220446049250313e-16, a_max=1.0, out=probs
351                )
352
353                self.base_learners_.update({m: deepcopy(base_learner)})
354
355                w_m *= np.exp(
356                    -1.0
357                    * self.learning_rate
358                    * (1.0 - 1.0 / self.n_classes)
359                    * xlogy(Y, probs).sum(axis=1)
360                )
361
362                w_m /= np.sum(w_m)
363
364                base_learner.set_params(seed=self.seed + (m + 1) * 1000)
365
366                if self.verbose == 1:
367                    pbar.update(m)
368
369            if self.verbose == 1:
370                pbar.update(self.n_estimators)
371
372            self.n_estimators = len(self.base_learners_)
373            self.classes_ = np.unique(y)
374
375            return self

Fit Boosting model to training data (X, y).

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

**kwargs: additional parameters to be passed to
        self.cook_training_set or self.obj.fit

Returns:

 self: object
def predict(self, X, **kwargs):
377    def predict(self, X, **kwargs):
378        """Predict test data X.
379
380        Parameters:
381
382            X: {array-like}, shape = [n_samples, n_features]
383                Training vectors, where n_samples is the number
384                of samples and n_features is the number of features.
385
386            **kwargs: additional parameters to be passed to
387                  self.cook_test_set
388
389        Returns:
390
391            model predictions: {array-like}
392        """
393        return self.predict_proba(X, **kwargs).argmax(axis=1)

Predict test data X.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
      self.cook_test_set

Returns:

model predictions: {array-like}
def predict_proba(self, X, **kwargs):
395    def predict_proba(self, X, **kwargs):
396        """Predict probabilities for test data X.
397
398        Parameters:
399
400            X: {array-like}, shape = [n_samples, n_features]
401                Training vectors, where n_samples is the number
402                of samples and n_features is the number of features.
403
404            **kwargs: additional parameters to be passed to
405                  self.cook_test_set
406
407        Returns:
408
409            probability estimates for test data: {array-like}
410
411        """
412
413        n_iter = len(self.base_learners_)
414
415        if self.method == "SAMME":
416            ensemble_learner = np.zeros((X.shape[0], self.n_classes))
417
418            # if self.verbose == 1:
419            #    pbar = Progbar(n_iter)
420
421            for idx, base_learner in self.base_learners_.items():
422                preds = base_learner.predict(X, **kwargs)
423
424                ensemble_learner += self.alpha_[idx] * mo.one_hot_encode2(
425                    preds, self.n_classes
426                )
427
428                # if self.verbose == 1:
429                #    pbar.update(idx)
430
431            # if self.verbose == 1:
432            #    pbar.update(n_iter)
433
434            expit_ensemble_learner = expit(ensemble_learner)
435
436            sum_ensemble = expit_ensemble_learner.sum(axis=1)
437
438            return expit_ensemble_learner / sum_ensemble[:, None]
439
440        # if self.method == "SAMME.R":
441        ensemble_learner = 0
442
443        # if self.verbose == 1:
444        #    pbar = Progbar(n_iter)
445
446        for idx, base_learner in self.base_learners_.items():
447            probs = base_learner.predict_proba(X, **kwargs)
448
449            np.clip(a=probs, a_min=2.220446049250313e-16, a_max=1.0, out=probs)
450
451            log_preds_proba = np.log(probs)
452
453            ensemble_learner += (
454                log_preds_proba - log_preds_proba.mean(axis=1)[:, None]
455            )
456
457            # if self.verbose == 1:
458            #    pbar.update(idx)
459
460        ensemble_learner *= self.n_classes - 1
461
462        # if self.verbose == 1:
463        #    pbar.update(n_iter)
464
465        expit_ensemble_learner = expit(ensemble_learner)
466
467        sum_ensemble = expit_ensemble_learner.sum(axis=1)
468
469        return expit_ensemble_learner / sum_ensemble[:, None]

Predict probabilities for test data X.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
      self.cook_test_set

Returns:

probability estimates for test data: {array-like}
class Base(sklearn.base.BaseEstimator):
  48class Base(BaseEstimator):
  49    """Base model from which all the other classes inherit.
  50
  51    This class contains the most important data preprocessing/feature engineering methods.
  52
  53    Parameters:
  54
  55        n_hidden_features: int
  56            number of nodes in the hidden layer
  57
  58        activation_name: str
  59            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
  60
  61        a: float
  62            hyperparameter for 'prelu' or 'elu' activation function
  63
  64        nodes_sim: str
  65            type of simulation for hidden layer nodes: 'sobol', 'hammersley', 'halton',
  66            'uniform'
  67
  68        bias: boolean
  69            indicates if the hidden layer contains a bias term (True) or
  70            not (False)
  71
  72        dropout: float
  73            regularization parameter; (random) percentage of nodes dropped out
  74            of the training
  75
  76        direct_link: boolean
  77            indicates if the original features are included (True) in model's
  78            fitting or not (False)
  79
  80        n_clusters: int
  81            number of clusters for type_clust='kmeans' or type_clust='gmm'
  82            clustering (could be 0: no clustering)
  83
  84        cluster_encode: bool
  85            defines how the variable containing clusters is treated (default is one-hot);
  86            if `False`, then labels are used, without one-hot encoding
  87
  88        type_clust: str
  89            type of clustering method: currently k-means ('kmeans') or Gaussian
  90            Mixture Model ('gmm')
  91
  92        type_scaling: a tuple of 3 strings
  93            scaling methods for inputs, hidden layer, and clustering respectively
  94            (and when relevant).
  95            Currently available: standardization ('std') or MinMax scaling ('minmax') or robust scaling ('robust') or  max absolute scaling ('maxabs')
  96
  97        col_sample: float
  98            percentage of features randomly chosen for training
  99
 100        row_sample: float
 101            percentage of rows chosen for training, by stratified bootstrapping
 102
 103        seed: int
 104            reproducibility seed for nodes_sim=='uniform', clustering and dropout
 105
 106        backend: str
 107            "cpu" or "gpu" or "tpu"
 108
 109    """
 110
 111    # construct the object -----
 112
 113    def __init__(
 114        self,
 115        n_hidden_features=5,
 116        activation_name="relu",
 117        a=0.01,
 118        nodes_sim="sobol",
 119        bias=True,
 120        dropout=0,
 121        direct_link=True,
 122        n_clusters=2,
 123        cluster_encode=True,
 124        type_clust="kmeans",
 125        type_scaling=("std", "std", "std"),
 126        col_sample=1,
 127        row_sample=1,
 128        seed=123,
 129        backend="cpu",
 130    ):
 131        if not JAX_AVAILABLE and backend != "cpu":
 132            raise RuntimeError(
 133                "JAX is required for this feature. Install with: pip install yourpackage[jax]"
 134            )
 135
 136        # input checks -----
 137
 138        sys_platform = platform.system()
 139
 140        if (sys_platform == "Windows") and (backend in ("gpu", "tpu")):
 141            warnings.warn(
 142                "No GPU/TPU computing on Windows yet, backend set to 'cpu'"
 143            )
 144            backend = "cpu"
 145
 146        assert activation_name in (
 147            "relu",
 148            "tanh",
 149            "sigmoid",
 150            "prelu",
 151            "elu",
 152        ), "'activation_name' must be in ('relu', 'tanh', 'sigmoid','prelu', 'elu')"
 153
 154        assert nodes_sim in (
 155            "sobol",
 156            "hammersley",
 157            "uniform",
 158            "halton",
 159        ), "'nodes_sim' must be in ('sobol', 'hammersley', 'uniform', 'halton')"
 160
 161        assert type_clust in (
 162            "kmeans",
 163            "gmm",
 164        ), "'type_clust' must be in ('kmeans', 'gmm')"
 165
 166        assert (len(type_scaling) == 3) & all(
 167            type_scaling[i] in ("minmax", "std", "robust", "maxabs")
 168            for i in range(len(type_scaling))
 169        ), "'type_scaling' must have length 3, and available scaling methods are 'minmax' scaling, standardization ('std'), robust scaling ('robust') and max absolute ('maxabs')"
 170
 171        assert (col_sample >= 0) & (
 172            col_sample <= 1
 173        ), "'col_sample' must be comprised between 0 and 1 (both included)"
 174
 175        assert backend in (
 176            "cpu",
 177            "gpu",
 178            "tpu",
 179        ), "must have 'backend' in ('cpu', 'gpu', 'tpu')"
 180
 181        self.n_hidden_features = n_hidden_features
 182        self.activation_name = activation_name
 183        self.a = a
 184        self.nodes_sim = nodes_sim
 185        self.bias = bias
 186        self.seed = seed
 187        self.backend = backend
 188        self.dropout = dropout
 189        self.direct_link = direct_link
 190        self.cluster_encode = cluster_encode
 191        self.type_clust = type_clust
 192        self.type_scaling = type_scaling
 193        self.col_sample = col_sample
 194        self.row_sample = row_sample
 195        self.n_clusters = n_clusters
 196        if isinstance(self, RegressorMixin):
 197            self.type_fit = "regression"
 198        elif isinstance(self, ClassifierMixin):
 199            self.type_fit = "classification"
 200        self.subsampler_ = None
 201        self.index_col_ = None
 202        self.index_row_ = True
 203        self.clustering_obj_ = None
 204        self.clustering_scaler_ = None
 205        self.nn_scaler_ = None
 206        self.scaler_ = None
 207        self.encoder_ = None
 208        self.W_ = None
 209        self.X_ = None
 210        self.y_ = None
 211        self.y_mean_ = None
 212        self.beta_ = None
 213
 214        # activation function -----
 215
 216        activation_options = {
 217            "relu": ac.relu if (self.backend == "cpu") else jnn.relu,
 218            "tanh": np.tanh if (self.backend == "cpu") else jnp.tanh,
 219            "sigmoid": (ac.sigmoid if (self.backend == "cpu") else jnn.sigmoid),
 220            "prelu": partial(ac.prelu, a=a),
 221            "elu": (
 222                partial(ac.elu, a=a)
 223                if (self.backend == "cpu")
 224                else partial(jnn.elu, a=a)
 225            ),
 226        }
 227
 228        self.activation_func = activation_options[activation_name]
 229
 230    # "preprocessing" methods to be inherited -----
 231
 232    def encode_clusters(self, X=None, predict=False, scaler=None, **kwargs):  #
 233        """Create new covariates with kmeans or GMM clustering
 234
 235        Parameters:
 236
 237            X: {array-like}, shape = [n_samples, n_features]
 238                Training vectors, where n_samples is the number
 239                of samples and n_features is the number of features.
 240
 241            predict: boolean
 242                is False on training set and True on test set
 243
 244            scaler: {object} of class StandardScaler, MinMaxScaler, RobustScaler or MaxAbsScaler
 245                if scaler has already been fitted on training data (online training), it can be passed here
 246
 247            **kwargs:
 248                additional parameters to be passed to the
 249                clustering method
 250
 251        Returns:
 252
 253            Clusters' matrix, one-hot encoded: {array-like}
 254
 255        """
 256
 257        np.random.seed(self.seed)
 258
 259        if X is None:
 260            X = self.X_
 261
 262        if isinstance(X, pd.DataFrame):
 263            X = copy.deepcopy(X.values.astype(float))
 264
 265        if len(X.shape) == 1:
 266            X = X.reshape(1, -1)
 267
 268        if predict is False:  # encode training set
 269            # scale input data before clustering
 270            self.clustering_scaler_, scaled_X = mo.scale_covariates(
 271                X, choice=self.type_scaling[2], scaler=self.clustering_scaler_
 272            )
 273
 274            self.clustering_obj_, X_clustered = mo.cluster_covariates(
 275                scaled_X,
 276                self.n_clusters,
 277                self.seed,
 278                type_clust=self.type_clust,
 279                **kwargs
 280            )
 281
 282            if self.cluster_encode:
 283                return mo.one_hot_encode(X_clustered, self.n_clusters).astype(
 284                    np.float16
 285                )
 286
 287            return X_clustered.astype(np.float16)
 288
 289        # if predict == True, encode test set
 290        X_clustered = self.clustering_obj_.predict(
 291            self.clustering_scaler_.transform(X)
 292        )
 293
 294        if self.cluster_encode == True:
 295            return mo.one_hot_encode(X_clustered, self.n_clusters).astype(
 296                np.float16
 297            )
 298
 299        return X_clustered.astype(np.float16)
 300
 301    def create_layer(self, scaled_X, W=None):
 302        """Create hidden layer.
 303
 304        Parameters:
 305
 306            scaled_X: {array-like}, shape = [n_samples, n_features]
 307                Training vectors, where n_samples is the number
 308                of samples and n_features is the number of features
 309
 310            W: {array-like}, shape = [n_features, hidden_features]
 311                if provided, constructs the hidden layer with W; otherwise computed internally
 312
 313        Returns:
 314
 315            Hidden layer matrix: {array-like}
 316
 317        """
 318
 319        n_features = scaled_X.shape[1]
 320
 321        # hash_sim = {
 322        #         "sobol": generate_sobol,
 323        #         "hammersley": generate_hammersley,
 324        #         "uniform": generate_uniform,
 325        #         "halton": generate_halton
 326        #     }
 327
 328        if self.bias is False:  # no bias term in the hidden layer
 329            if W is None:
 330                if self.nodes_sim == "sobol":
 331                    self.W_ = generate_sobol(
 332                        n_dims=n_features,
 333                        n_points=self.n_hidden_features,
 334                        seed=self.seed,
 335                    )
 336                elif self.nodes_sim == "hammersley":
 337                    self.W_ = generate_hammersley(
 338                        n_dims=n_features,
 339                        n_points=self.n_hidden_features,
 340                        seed=self.seed,
 341                    )
 342                elif self.nodes_sim == "uniform":
 343                    self.W_ = generate_uniform(
 344                        n_dims=n_features,
 345                        n_points=self.n_hidden_features,
 346                        seed=self.seed,
 347                    )
 348                else:
 349                    self.W_ = generate_halton(
 350                        n_dims=n_features,
 351                        n_points=self.n_hidden_features,
 352                        seed=self.seed,
 353                    )
 354
 355                assert (
 356                    scaled_X.shape[1] == self.W_.shape[0]
 357                ), "check dimensions of covariates X and matrix W"
 358
 359                return mo.dropout(
 360                    x=self.activation_func(
 361                        mo.safe_sparse_dot(
 362                            a=scaled_X, b=self.W_, backend=self.backend
 363                        )
 364                    ),
 365                    drop_prob=self.dropout,
 366                    seed=self.seed,
 367                )
 368
 369            # W is not none
 370            assert (
 371                scaled_X.shape[1] == W.shape[0]
 372            ), "check dimensions of covariates X and matrix W"
 373
 374            # self.W_ = W
 375            return mo.dropout(
 376                x=self.activation_func(
 377                    mo.safe_sparse_dot(a=scaled_X, b=W, backend=self.backend)
 378                ),
 379                drop_prob=self.dropout,
 380                seed=self.seed,
 381            )
 382
 383        # with bias term in the hidden layer
 384        if W is None:
 385            n_features_1 = n_features + 1
 386
 387            if self.nodes_sim == "sobol":
 388                self.W_ = generate_sobol(
 389                    n_dims=n_features_1,
 390                    n_points=self.n_hidden_features,
 391                    seed=self.seed,
 392                )
 393            elif self.nodes_sim == "hammersley":
 394                self.W_ = generate_hammersley(
 395                    n_dims=n_features_1,
 396                    n_points=self.n_hidden_features,
 397                    seed=self.seed,
 398                )
 399            elif self.nodes_sim == "uniform":
 400                self.W_ = generate_uniform(
 401                    n_dims=n_features_1,
 402                    n_points=self.n_hidden_features,
 403                    seed=self.seed,
 404                )
 405            else:
 406                self.W_ = generate_halton(
 407                    n_dims=n_features_1,
 408                    n_points=self.n_hidden_features,
 409                    seed=self.seed,
 410                )
 411
 412            # self.W_ = hash_sim[self.nodes_sim](
 413            #         n_dims=n_features_1,
 414            #         n_points=self.n_hidden_features,
 415            #         seed=self.seed,
 416            #     )
 417
 418            return mo.dropout(
 419                x=self.activation_func(
 420                    mo.safe_sparse_dot(
 421                        a=mo.cbind(
 422                            np.ones(scaled_X.shape[0]),
 423                            scaled_X,
 424                            backend=self.backend,
 425                        ),
 426                        b=self.W_,
 427                        backend=self.backend,
 428                    )
 429                ),
 430                drop_prob=self.dropout,
 431                seed=self.seed,
 432            )
 433
 434        # W is not None
 435        # self.W_ = W
 436        return mo.dropout(
 437            x=self.activation_func(
 438                mo.safe_sparse_dot(
 439                    a=mo.cbind(
 440                        np.ones(scaled_X.shape[0]),
 441                        scaled_X,
 442                        backend=self.backend,
 443                    ),
 444                    b=W,
 445                    backend=self.backend,
 446                )
 447            ),
 448            drop_prob=self.dropout,
 449            seed=self.seed,
 450        )
 451
 452    def _jax_create_layer(self, scaled_X, W=None):
 453        """JAX-compatible version of create_layer that exactly matches the original functionality."""
 454        key = jax.random.PRNGKey(self.seed)
 455        n_features = scaled_X.shape[1]
 456
 457        # Generate weights if not provided
 458        if W is None:
 459            if self.bias:
 460                n_features_1 = n_features + 1
 461                shape = (n_features_1, self.n_hidden_features)
 462            else:
 463                shape = (n_features, self.n_hidden_features)
 464
 465            # JAX-compatible weight generation matching original behavior
 466            if self.nodes_sim == "sobol":
 467                W_np = generate_sobol(
 468                    n_dims=n_features_1,
 469                    n_points=self.n_hidden_features,
 470                    seed=self.seed,
 471                )
 472                W = jnp.asarray(W_np)
 473            elif self.nodes_sim == "hammersley":
 474                W_np = generate_hammersley(
 475                    n_dims=n_features_1,
 476                    n_points=self.n_hidden_features,
 477                    seed=self.seed,
 478                )
 479                W = jnp.asarray(W_np)
 480            elif self.nodes_sim == "uniform":
 481                key, subkey = jax.random.split(key)
 482                W = jax.random.uniform(
 483                    subkey, shape=shape, minval=-1.0, maxval=1.0
 484                )
 485            else:  # halton
 486                W_np = generate_halton(
 487                    n_dims=n_features_1,
 488                    n_points=self.n_hidden_features,
 489                    seed=self.seed,
 490                )
 491                W = jnp.asarray(W_np)
 492
 493            self.W_ = np.array(W)  # Store as numpy for original methods
 494
 495        # Prepare input with bias if needed
 496        if self.bias:
 497            X_with_bias = jnp.hstack(
 498                [jnp.ones((scaled_X.shape[0], 1)), scaled_X]
 499            )
 500            print("X_with_bias shape:", X_with_bias.shape)
 501            print("W shape:", W.shape)
 502            linear_output = jnp.dot(X_with_bias, W)
 503        else:
 504            linear_output = jnp.dot(scaled_X, W)
 505
 506        # Apply activation function
 507        if self.activation_name == "relu":
 508            activated = jax.nn.relu(linear_output)
 509        elif self.activation_name == "tanh":
 510            activated = jnp.tanh(linear_output)
 511        elif self.activation_name == "sigmoid":
 512            activated = jax.nn.sigmoid(linear_output)
 513        else:  # leaky relu
 514            activated = jax.nn.leaky_relu(linear_output, negative_slope=self.a)
 515
 516        # Apply dropout
 517        if self.dropout > 0:
 518            key, subkey = jax.random.split(key)
 519            mask = jax.random.bernoulli(
 520                subkey, p=1 - self.dropout, shape=activated.shape
 521            )
 522            activated = jnp.where(mask, activated / (1 - self.dropout), 0)
 523
 524        return activated
 525
 526    def cook_training_set(self, y=None, X=None, W=None, **kwargs):
 527        """Create new hidden features for training set, with hidden layer, center the response.
 528
 529        Parameters:
 530
 531            y: array-like, shape = [n_samples]
 532                Target values
 533
 534            X: {array-like}, shape = [n_samples, n_features]
 535                Training vectors, where n_samples is the number
 536                of samples and n_features is the number of features
 537
 538            W: {array-like}, shape = [n_features, hidden_features]
 539                if provided, constructs the hidden layer via W
 540
 541        Returns:
 542
 543            (centered response, direct link + hidden layer matrix): {tuple}
 544
 545        """
 546
 547        # either X and y are stored or not
 548        # assert ((y is None) & (X is None)) | ((y is not None) & (X is not None))
 549        if self.n_hidden_features > 0:  # has a hidden layer
 550            assert (
 551                len(self.type_scaling) >= 2
 552            ), "must have len(self.type_scaling) >= 2 when self.n_hidden_features > 0"
 553
 554        if X is None:
 555            if self.col_sample == 1:
 556                input_X = self.X_
 557            else:
 558                n_features = self.X_.shape[1]
 559                new_n_features = int(np.ceil(n_features * self.col_sample))
 560                assert (
 561                    new_n_features >= 1
 562                ), "check class attribute 'col_sample' and the number of covariates provided for X"
 563                np.random.seed(self.seed)
 564                index_col = np.random.choice(
 565                    range(n_features), size=new_n_features, replace=False
 566                )
 567                self.index_col_ = index_col
 568                input_X = self.X_[:, self.index_col_]
 569
 570        else:  # X is not None # keep X vs self.X_
 571            if isinstance(X, pd.DataFrame):
 572                X = copy.deepcopy(X.values.astype(float))
 573
 574            if self.col_sample == 1:
 575                input_X = X
 576            else:
 577                n_features = X.shape[1]
 578                new_n_features = int(np.ceil(n_features * self.col_sample))
 579                assert (
 580                    new_n_features >= 1
 581                ), "check class attribute 'col_sample' and the number of covariates provided for X"
 582                np.random.seed(self.seed)
 583                index_col = np.random.choice(
 584                    range(n_features), size=new_n_features, replace=False
 585                )
 586                self.index_col_ = index_col
 587                input_X = X[:, self.index_col_]
 588
 589        if self.n_clusters <= 0:
 590            # data without any clustering: self.n_clusters is None -----
 591
 592            if self.n_hidden_features > 0:  # with hidden layer
 593                self.nn_scaler_, scaled_X = mo.scale_covariates(
 594                    input_X, choice=self.type_scaling[1], scaler=self.nn_scaler_
 595                )
 596                Phi_X = (
 597                    self.create_layer(scaled_X)
 598                    if W is None
 599                    else self.create_layer(scaled_X, W=W)
 600                )
 601                Z = (
 602                    mo.cbind(input_X, Phi_X, backend=self.backend)
 603                    if self.direct_link is True
 604                    else Phi_X
 605                )
 606                self.scaler_, scaled_Z = mo.scale_covariates(
 607                    Z, choice=self.type_scaling[0], scaler=self.scaler_
 608                )
 609            else:  # no hidden layer
 610                Z = input_X
 611                self.scaler_, scaled_Z = mo.scale_covariates(
 612                    Z, choice=self.type_scaling[0], scaler=self.scaler_
 613                )
 614
 615        else:
 616            # data with clustering: self.n_clusters is not None ----- # keep
 617
 618            augmented_X = mo.cbind(
 619                input_X,
 620                self.encode_clusters(input_X, **kwargs),
 621                backend=self.backend,
 622            )
 623
 624            if self.n_hidden_features > 0:  # with hidden layer
 625                self.nn_scaler_, scaled_X = mo.scale_covariates(
 626                    augmented_X,
 627                    choice=self.type_scaling[1],
 628                    scaler=self.nn_scaler_,
 629                )
 630                Phi_X = (
 631                    self.create_layer(scaled_X)
 632                    if W is None
 633                    else self.create_layer(scaled_X, W=W)
 634                )
 635                Z = (
 636                    mo.cbind(augmented_X, Phi_X, backend=self.backend)
 637                    if self.direct_link is True
 638                    else Phi_X
 639                )
 640                self.scaler_, scaled_Z = mo.scale_covariates(
 641                    Z, choice=self.type_scaling[0], scaler=self.scaler_
 642                )
 643            else:  # no hidden layer
 644                Z = augmented_X
 645                self.scaler_, scaled_Z = mo.scale_covariates(
 646                    Z, choice=self.type_scaling[0], scaler=self.scaler_
 647                )
 648
 649        # Returning model inputs -----
 650        if mx.is_factor(y) is False:  # regression
 651            # center y
 652            if y is None:
 653                self.y_mean_, centered_y = mo.center_response(self.y_)
 654            else:
 655                self.y_mean_, centered_y = mo.center_response(y)
 656
 657            # y is subsampled
 658            if self.row_sample < 1:
 659                n, p = Z.shape
 660
 661                self.subsampler_ = (
 662                    SubSampler(
 663                        y=self.y_, row_sample=self.row_sample, seed=self.seed
 664                    )
 665                    if y is None
 666                    else SubSampler(
 667                        y=y, row_sample=self.row_sample, seed=self.seed
 668                    )
 669                )
 670
 671                self.index_row_ = self.subsampler_.subsample()
 672
 673                n_row_sample = len(self.index_row_)
 674                # regression
 675                return (
 676                    centered_y[self.index_row_].reshape(n_row_sample),
 677                    self.scaler_.transform(
 678                        Z[self.index_row_, :].reshape(n_row_sample, p)
 679                    ),
 680                )
 681            # y is not subsampled
 682            # regression
 683            return (centered_y, self.scaler_.transform(Z))
 684
 685        # classification
 686        # y is subsampled
 687        if self.row_sample < 1:
 688            n, p = Z.shape
 689
 690            self.subsampler_ = (
 691                SubSampler(
 692                    y=self.y_, row_sample=self.row_sample, seed=self.seed
 693                )
 694                if y is None
 695                else SubSampler(y=y, row_sample=self.row_sample, seed=self.seed)
 696            )
 697
 698            self.index_row_ = self.subsampler_.subsample()
 699
 700            n_row_sample = len(self.index_row_)
 701            # classification
 702            return (
 703                y[self.index_row_].reshape(n_row_sample),
 704                self.scaler_.transform(
 705                    Z[self.index_row_, :].reshape(n_row_sample, p)
 706                ),
 707            )
 708        # y is not subsampled
 709        # classification
 710        return (y, self.scaler_.transform(Z))
 711
 712    def cook_test_set(self, X, **kwargs):
 713        """Transform data from test set, with hidden layer.
 714
 715        Parameters:
 716
 717            X: {array-like}, shape = [n_samples, n_features]
 718                Training vectors, where n_samples is the number
 719                of samples and n_features is the number of features
 720
 721            **kwargs: additional parameters to be passed to self.encode_cluster
 722
 723        Returns:
 724
 725            Transformed test set : {array-like}
 726        """
 727
 728        if isinstance(X, pd.DataFrame):
 729            X = copy.deepcopy(X.values.astype(float))
 730
 731        if len(X.shape) == 1:
 732            X = X.reshape(1, -1)
 733
 734        if (
 735            self.n_clusters == 0
 736        ):  # data without clustering: self.n_clusters is None -----
 737            if self.n_hidden_features > 0:
 738                # if hidden layer
 739                scaled_X = (
 740                    self.nn_scaler_.transform(X)
 741                    if (self.col_sample == 1)
 742                    else self.nn_scaler_.transform(X[:, self.index_col_])
 743                )
 744                Phi_X = self.create_layer(scaled_X, self.W_)
 745                if self.direct_link:
 746                    return self.scaler_.transform(
 747                        mo.cbind(scaled_X, Phi_X, backend=self.backend)
 748                    )
 749                # when self.direct_link == False
 750                return self.scaler_.transform(Phi_X)
 751            # if no hidden layer # self.n_hidden_features == 0
 752            return self.scaler_.transform(X)
 753
 754        # data with clustering: self.n_clusters > 0 -----
 755        if self.col_sample == 1:
 756            predicted_clusters = self.encode_clusters(
 757                X=X, predict=True, **kwargs
 758            )
 759            augmented_X = mo.cbind(X, predicted_clusters, backend=self.backend)
 760        else:
 761            predicted_clusters = self.encode_clusters(
 762                X=X[:, self.index_col_], predict=True, **kwargs
 763            )
 764            augmented_X = mo.cbind(
 765                X[:, self.index_col_], predicted_clusters, backend=self.backend
 766            )
 767
 768        if self.n_hidden_features > 0:  # if hidden layer
 769            scaled_X = self.nn_scaler_.transform(augmented_X)
 770            Phi_X = self.create_layer(scaled_X, self.W_)
 771            if self.direct_link:
 772                return self.scaler_.transform(
 773                    mo.cbind(augmented_X, Phi_X, backend=self.backend)
 774                )
 775            return self.scaler_.transform(Phi_X)
 776
 777        # if no hidden layer
 778        return self.scaler_.transform(augmented_X)
 779
 780    def cook_training_set_jax(self, y=None, X=None, W=None, **kwargs):
 781        """JAX-compatible version of cook_training_set that maintains side effects."""
 782        # Initialize random key
 783        key = jax.random.PRNGKey(self.seed)
 784
 785        # Convert inputs to JAX arrays
 786        X = jnp.asarray(X) if X is not None else jnp.asarray(self.X_)
 787        y = jnp.asarray(y) if y is not None else jnp.asarray(self.y_)
 788
 789        # Handle column sampling
 790        if self.col_sample < 1:
 791            n_features = X.shape[1]
 792            new_n_features = int(jnp.ceil(n_features * self.col_sample))
 793            assert new_n_features >= 1, "Invalid col_sample"
 794
 795            key, subkey = jax.random.split(key)
 796            index_col = jax.random.choice(
 797                subkey, n_features, shape=(new_n_features,), replace=False
 798            )
 799            self.index_col_ = np.array(
 800                index_col
 801            )  # Store as numpy for original methods
 802            input_X = X[:, index_col]
 803            n_features = (
 804                new_n_features  # Update n_features after column sampling
 805            )
 806        else:
 807            input_X = X
 808            n_features = X.shape[1]
 809
 810        augmented_X = input_X
 811
 812        # JAX-compatible scaling
 813        def jax_scale(data, mean=None, std=None):
 814            if mean is None:
 815                mean = jnp.mean(data, axis=0)
 816            if std is None:
 817                std = jnp.std(data, axis=0)
 818            return (data - mean) / (std + 1e-10), mean, std
 819
 820        # Hidden layer processing
 821        if self.n_hidden_features > 0:
 822            # Initialize weights if not provided
 823            if W is None:
 824                shape = (n_features, self.n_hidden_features)
 825
 826                # JAX-compatible weight generation
 827                if self.nodes_sim == "uniform":
 828                    key, subkey = jax.random.split(key)
 829                    W = jax.random.uniform(
 830                        subkey, shape=shape, minval=-1.0, maxval=1.0
 831                    ) * (1 / jnp.sqrt(n_features))
 832                else:
 833                    # For other sequences, use numpy generation then convert to JAX
 834                    if self.nodes_sim == "sobol":
 835                        W_np = generate_sobol(
 836                            n_dims=shape[0],
 837                            n_points=shape[1],
 838                            seed=self.seed,
 839                        )
 840                    elif self.nodes_sim == "hammersley":
 841                        W_np = generate_hammersley(
 842                            n_dims=shape[0],
 843                            n_points=shape[1],
 844                            seed=self.seed,
 845                        )
 846                    elif self.nodes_sim == "halton":
 847                        W_np = generate_halton(
 848                            n_dims=shape[0],
 849                            n_points=shape[1],
 850                            seed=self.seed,
 851                        )
 852                    else:  # default to uniform
 853                        key, subkey = jax.random.split(key)
 854                        W = jax.random.uniform(
 855                            subkey, shape=shape, minval=-1.0, maxval=1.0
 856                        ) * (1 / jnp.sqrt(n_features))
 857
 858                    if self.nodes_sim in ["sobol", "hammersley", "halton"]:
 859                        W = jnp.asarray(W_np) * (1 / jnp.sqrt(n_features))
 860
 861                self.W_ = np.array(W)  # Store as numpy for original methods
 862
 863            # Scale features
 864            scaled_X, self.nn_mean_, self.nn_std_ = jax_scale(
 865                augmented_X,
 866                getattr(self, "nn_mean_", None),
 867                getattr(self, "nn_std_", None),
 868            )
 869
 870            # Create hidden layer with proper bias handling
 871            linear_output = jnp.dot(scaled_X, W)
 872
 873            # Apply activation
 874            if self.activation_name == "relu":
 875                Phi_X = jax.nn.relu(linear_output)
 876            elif self.activation_name == "tanh":
 877                Phi_X = jnp.tanh(linear_output)
 878            elif self.activation_name == "sigmoid":
 879                Phi_X = jax.nn.sigmoid(linear_output)
 880            else:  # leaky relu
 881                Phi_X = jax.nn.leaky_relu(linear_output, negative_slope=self.a)
 882
 883            # Apply dropout
 884            if self.dropout > 0:
 885                key, subkey = jax.random.split(key)
 886                mask = jax.random.bernoulli(
 887                    subkey, p=1 - self.dropout, shape=Phi_X.shape
 888                )
 889                Phi_X = jnp.where(mask, Phi_X / (1 - self.dropout), 0)
 890
 891            Z = jnp.hstack([scaled_X, Phi_X]) if self.direct_link else Phi_X
 892        else:
 893            Z = augmented_X
 894
 895        # Final scaling
 896        scaled_Z, self.scale_mean_, self.scale_std_ = jax_scale(
 897            Z,
 898            getattr(self, "scale_mean_", None),
 899            getattr(self, "scale_std_", None),
 900        )
 901
 902        # Center response for regression
 903        if not hasattr(mx, "is_factor") or not mx.is_factor(
 904            y
 905        ):  # regression case
 906            self.y_mean_ = float(
 907                jnp.mean(y)
 908            )  # Convert to Python float for compatibility
 909            centered_y = y - self.y_mean_
 910        else:
 911            centered_y = y
 912
 913        # Handle row sampling
 914        if self.row_sample < 1:
 915            key, subkey = jax.random.split(key)
 916            n_samples = Z.shape[0]
 917            n_row_sample = int(jnp.ceil(n_samples * self.row_sample))
 918            index_row = jax.random.choice(
 919                subkey, n_samples, shape=(n_row_sample,), replace=False
 920            )
 921            self.index_row_ = np.array(
 922                index_row
 923            )  # Store as numpy for original methods
 924            return (centered_y[index_row], scaled_Z[index_row])
 925
 926        return (centered_y, scaled_Z)
 927
 928    def cook_test_set_jax(self, X, **kwargs):
 929        """JAX-compatible test set processing with matching dimension handling."""
 930        X = jnp.asarray(X)
 931
 932        if len(X.shape) == 1:
 933            X = X.reshape(1, -1)
 934
 935        # Handle column sampling
 936        input_X = (
 937            X if self.col_sample == 1 else X[:, jnp.asarray(self.index_col_)]
 938        )
 939
 940        augmented_X = input_X
 941
 942        # JAX-compatible scaling
 943        scaled_X = (augmented_X - self.nn_mean_) / (self.nn_std_ + 1e-10)
 944
 945        # Process hidden layer if needed
 946        if self.n_hidden_features > 0:
 947            Phi_X = self._jax_create_layer(scaled_X, jnp.asarray(self.W_))
 948            Z = jnp.hstack([scaled_X, Phi_X]) if self.direct_link else Phi_X
 949        else:
 950            Z = augmented_X
 951
 952        # Final scaling
 953        scaled_Z = (Z - self.scale_mean_) / (self.scale_std_ + 1e-10)
 954
 955        return scaled_Z
 956
 957    def _jax_create_layer(self, X, W):
 958        """JAX-compatible hidden layer creation."""
 959        # print("X", X.shape)
 960        # print("W", W.shape)
 961        # print("self.W_", self.W_.shape)
 962        linear_output = jnp.dot(X, W)
 963
 964        if self.activation_name == "relu":
 965            return jax.nn.relu(linear_output)
 966        elif self.activation_name == "tanh":
 967            return jnp.tanh(linear_output)
 968        elif self.activation_name == "sigmoid":
 969            return jax.nn.sigmoid(linear_output)
 970        else:  # leaky relu
 971            return jax.nn.leaky_relu(linear_output, negative_slope=self.a)
 972
 973    def cross_val_score(
 974        self,
 975        X,
 976        y,
 977        cv=5,
 978        scoring="accuracy",
 979        random_state=42,
 980        n_jobs=-1,
 981        epsilon=0.5,
 982        penalized=True,
 983        objective="abs",
 984        **kwargs
 985    ):
 986        """
 987        Penalized Cross-validation score for a model.
 988
 989        Parameters:
 990
 991            X: {array-like}, shape = [n_samples, n_features]
 992                Training vectors, where n_samples is the number
 993                of samples and n_features is the number of features
 994
 995            y: array-like, shape = [n_samples]
 996                Target values
 997
 998            X_test: {array-like}, shape = [n_samples, n_features]
 999                Test vectors, where n_samples is the number
1000                of samples and n_features is the number of features
1001
1002            y_test: array-like, shape = [n_samples]
1003                Target values
1004
1005            cv: int
1006                Number of folds
1007
1008            scoring: str
1009                Scoring metric
1010
1011            random_state: int
1012                Random state
1013
1014            n_jobs: int
1015                Number of jobs to run in parallel
1016
1017            epsilon: float
1018                Penalty parameter
1019
1020            penalized: bool
1021                Whether to obtain penalized cross-validation score or not
1022
1023            objective: str
1024                'abs': Minimize the absolute difference between cross-validation score and validation score
1025                'relative': Minimize the relative difference between cross-validation score and validation score
1026        Returns:
1027
1028            A namedtuple with the following fields:
1029                - cv_score: float
1030                    cross-validation score
1031                - val_score: float
1032                    validation score
1033                - penalized_score: float
1034                    penalized cross-validation score: cv_score / val_score + epsilon*(1/val_score + 1/cv_score)
1035                    If higher scoring metric is better, minimize the function result.
1036                    If lower scoring metric is better, maximize the function result.
1037        """
1038        if scoring == "accuracy":
1039            scoring_func = accuracy_score
1040        elif scoring == "balanced_accuracy":
1041            scoring_func = balanced_accuracy_score
1042        elif scoring == "f1":
1043            scoring_func = f1_score
1044        elif scoring == "roc_auc":
1045            scoring_func = roc_auc_score
1046        elif scoring == "r2":
1047            scoring_func = r2_score
1048        elif scoring == "mse":
1049            scoring_func = mean_squared_error
1050        elif scoring == "mae":
1051            scoring_func = mean_absolute_error
1052        elif scoring == "mape":
1053            scoring_func = mean_absolute_percentage_error
1054        elif scoring == "rmse":
1055
1056            def scoring_func(y_true, y_pred):
1057                return np.sqrt(mean_squared_error(y_true, y_pred))
1058
1059        X_train, X_val, y_train, y_val = train_test_split(
1060            X, y, test_size=0.2, random_state=random_state
1061        )
1062
1063        res = cross_val_score(
1064            self, X_train, y_train, cv=cv, scoring=scoring, n_jobs=n_jobs
1065        )  # cross-validation error
1066
1067        if penalized == False:
1068            return res
1069
1070        DescribeResult = namedtuple(
1071            "DescribeResult", ["cv_score", "val_score", "penalized_score"]
1072        )
1073
1074        numerator = res.mean()
1075
1076        # Evaluate on the (cv+1)-th fold
1077        preds_val = self.fit(X_train, y_train).predict(X_val)
1078        try:
1079            denominator = scoring(y_val, preds_val)  # validation error
1080        except Exception as e:
1081            denominator = scoring_func(y_val, preds_val)
1082
1083        # if higher is better
1084        if objective == "abs":
1085            penalized_score = np.abs(numerator - denominator) + epsilon * (
1086                1 / denominator + 1 / numerator
1087            )
1088        elif objective == "relative":
1089            ratio = numerator / denominator
1090            penalized_score = np.abs(ratio - 1) + epsilon * (
1091                1 / denominator + 1 / numerator
1092            )
1093
1094        return DescribeResult(
1095            cv_score=numerator,
1096            val_score=denominator,
1097            penalized_score=penalized_score,
1098        )

Base model from which all the other classes inherit.

This class contains the most important data preprocessing/feature engineering methods.

Parameters:

n_hidden_features: int
    number of nodes in the hidden layer

activation_name: str
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'

a: float
    hyperparameter for 'prelu' or 'elu' activation function

nodes_sim: str
    type of simulation for hidden layer nodes: 'sobol', 'hammersley', 'halton',
    'uniform'

bias: boolean
    indicates if the hidden layer contains a bias term (True) or
    not (False)

dropout: float
    regularization parameter; (random) percentage of nodes dropped out
    of the training

direct_link: boolean
    indicates if the original features are included (True) in model's
    fitting or not (False)

n_clusters: int
    number of clusters for type_clust='kmeans' or type_clust='gmm'
    clustering (could be 0: no clustering)

cluster_encode: bool
    defines how the variable containing clusters is treated (default is one-hot);
    if `False`, then labels are used, without one-hot encoding

type_clust: str
    type of clustering method: currently k-means ('kmeans') or Gaussian
    Mixture Model ('gmm')

type_scaling: a tuple of 3 strings
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax') or robust scaling ('robust') or  max absolute scaling ('maxabs')

col_sample: float
    percentage of features randomly chosen for training

row_sample: float
    percentage of rows chosen for training, by stratified bootstrapping

seed: int
    reproducibility seed for nodes_sim=='uniform', clustering and dropout

backend: str
    "cpu" or "gpu" or "tpu"
def encode_clusters(self, X=None, predict=False, scaler=None, **kwargs):
232    def encode_clusters(self, X=None, predict=False, scaler=None, **kwargs):  #
233        """Create new covariates with kmeans or GMM clustering
234
235        Parameters:
236
237            X: {array-like}, shape = [n_samples, n_features]
238                Training vectors, where n_samples is the number
239                of samples and n_features is the number of features.
240
241            predict: boolean
242                is False on training set and True on test set
243
244            scaler: {object} of class StandardScaler, MinMaxScaler, RobustScaler or MaxAbsScaler
245                if scaler has already been fitted on training data (online training), it can be passed here
246
247            **kwargs:
248                additional parameters to be passed to the
249                clustering method
250
251        Returns:
252
253            Clusters' matrix, one-hot encoded: {array-like}
254
255        """
256
257        np.random.seed(self.seed)
258
259        if X is None:
260            X = self.X_
261
262        if isinstance(X, pd.DataFrame):
263            X = copy.deepcopy(X.values.astype(float))
264
265        if len(X.shape) == 1:
266            X = X.reshape(1, -1)
267
268        if predict is False:  # encode training set
269            # scale input data before clustering
270            self.clustering_scaler_, scaled_X = mo.scale_covariates(
271                X, choice=self.type_scaling[2], scaler=self.clustering_scaler_
272            )
273
274            self.clustering_obj_, X_clustered = mo.cluster_covariates(
275                scaled_X,
276                self.n_clusters,
277                self.seed,
278                type_clust=self.type_clust,
279                **kwargs
280            )
281
282            if self.cluster_encode:
283                return mo.one_hot_encode(X_clustered, self.n_clusters).astype(
284                    np.float16
285                )
286
287            return X_clustered.astype(np.float16)
288
289        # if predict == True, encode test set
290        X_clustered = self.clustering_obj_.predict(
291            self.clustering_scaler_.transform(X)
292        )
293
294        if self.cluster_encode == True:
295            return mo.one_hot_encode(X_clustered, self.n_clusters).astype(
296                np.float16
297            )
298
299        return X_clustered.astype(np.float16)

Create new covariates with kmeans or GMM clustering

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

predict: boolean
    is False on training set and True on test set

scaler: {object} of class StandardScaler, MinMaxScaler, RobustScaler or MaxAbsScaler
    if scaler has already been fitted on training data (online training), it can be passed here

**kwargs:
    additional parameters to be passed to the
    clustering method

Returns:

Clusters' matrix, one-hot encoded: {array-like}
def create_layer(self, scaled_X, W=None):
301    def create_layer(self, scaled_X, W=None):
302        """Create hidden layer.
303
304        Parameters:
305
306            scaled_X: {array-like}, shape = [n_samples, n_features]
307                Training vectors, where n_samples is the number
308                of samples and n_features is the number of features
309
310            W: {array-like}, shape = [n_features, hidden_features]
311                if provided, constructs the hidden layer with W; otherwise computed internally
312
313        Returns:
314
315            Hidden layer matrix: {array-like}
316
317        """
318
319        n_features = scaled_X.shape[1]
320
321        # hash_sim = {
322        #         "sobol": generate_sobol,
323        #         "hammersley": generate_hammersley,
324        #         "uniform": generate_uniform,
325        #         "halton": generate_halton
326        #     }
327
328        if self.bias is False:  # no bias term in the hidden layer
329            if W is None:
330                if self.nodes_sim == "sobol":
331                    self.W_ = generate_sobol(
332                        n_dims=n_features,
333                        n_points=self.n_hidden_features,
334                        seed=self.seed,
335                    )
336                elif self.nodes_sim == "hammersley":
337                    self.W_ = generate_hammersley(
338                        n_dims=n_features,
339                        n_points=self.n_hidden_features,
340                        seed=self.seed,
341                    )
342                elif self.nodes_sim == "uniform":
343                    self.W_ = generate_uniform(
344                        n_dims=n_features,
345                        n_points=self.n_hidden_features,
346                        seed=self.seed,
347                    )
348                else:
349                    self.W_ = generate_halton(
350                        n_dims=n_features,
351                        n_points=self.n_hidden_features,
352                        seed=self.seed,
353                    )
354
355                assert (
356                    scaled_X.shape[1] == self.W_.shape[0]
357                ), "check dimensions of covariates X and matrix W"
358
359                return mo.dropout(
360                    x=self.activation_func(
361                        mo.safe_sparse_dot(
362                            a=scaled_X, b=self.W_, backend=self.backend
363                        )
364                    ),
365                    drop_prob=self.dropout,
366                    seed=self.seed,
367                )
368
369            # W is not none
370            assert (
371                scaled_X.shape[1] == W.shape[0]
372            ), "check dimensions of covariates X and matrix W"
373
374            # self.W_ = W
375            return mo.dropout(
376                x=self.activation_func(
377                    mo.safe_sparse_dot(a=scaled_X, b=W, backend=self.backend)
378                ),
379                drop_prob=self.dropout,
380                seed=self.seed,
381            )
382
383        # with bias term in the hidden layer
384        if W is None:
385            n_features_1 = n_features + 1
386
387            if self.nodes_sim == "sobol":
388                self.W_ = generate_sobol(
389                    n_dims=n_features_1,
390                    n_points=self.n_hidden_features,
391                    seed=self.seed,
392                )
393            elif self.nodes_sim == "hammersley":
394                self.W_ = generate_hammersley(
395                    n_dims=n_features_1,
396                    n_points=self.n_hidden_features,
397                    seed=self.seed,
398                )
399            elif self.nodes_sim == "uniform":
400                self.W_ = generate_uniform(
401                    n_dims=n_features_1,
402                    n_points=self.n_hidden_features,
403                    seed=self.seed,
404                )
405            else:
406                self.W_ = generate_halton(
407                    n_dims=n_features_1,
408                    n_points=self.n_hidden_features,
409                    seed=self.seed,
410                )
411
412            # self.W_ = hash_sim[self.nodes_sim](
413            #         n_dims=n_features_1,
414            #         n_points=self.n_hidden_features,
415            #         seed=self.seed,
416            #     )
417
418            return mo.dropout(
419                x=self.activation_func(
420                    mo.safe_sparse_dot(
421                        a=mo.cbind(
422                            np.ones(scaled_X.shape[0]),
423                            scaled_X,
424                            backend=self.backend,
425                        ),
426                        b=self.W_,
427                        backend=self.backend,
428                    )
429                ),
430                drop_prob=self.dropout,
431                seed=self.seed,
432            )
433
434        # W is not None
435        # self.W_ = W
436        return mo.dropout(
437            x=self.activation_func(
438                mo.safe_sparse_dot(
439                    a=mo.cbind(
440                        np.ones(scaled_X.shape[0]),
441                        scaled_X,
442                        backend=self.backend,
443                    ),
444                    b=W,
445                    backend=self.backend,
446                )
447            ),
448            drop_prob=self.dropout,
449            seed=self.seed,
450        )

Create hidden layer.

Parameters:

scaled_X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features

W: {array-like}, shape = [n_features, hidden_features]
    if provided, constructs the hidden layer with W; otherwise computed internally

Returns:

Hidden layer matrix: {array-like}
def cook_training_set(self, y=None, X=None, W=None, **kwargs):
526    def cook_training_set(self, y=None, X=None, W=None, **kwargs):
527        """Create new hidden features for training set, with hidden layer, center the response.
528
529        Parameters:
530
531            y: array-like, shape = [n_samples]
532                Target values
533
534            X: {array-like}, shape = [n_samples, n_features]
535                Training vectors, where n_samples is the number
536                of samples and n_features is the number of features
537
538            W: {array-like}, shape = [n_features, hidden_features]
539                if provided, constructs the hidden layer via W
540
541        Returns:
542
543            (centered response, direct link + hidden layer matrix): {tuple}
544
545        """
546
547        # either X and y are stored or not
548        # assert ((y is None) & (X is None)) | ((y is not None) & (X is not None))
549        if self.n_hidden_features > 0:  # has a hidden layer
550            assert (
551                len(self.type_scaling) >= 2
552            ), "must have len(self.type_scaling) >= 2 when self.n_hidden_features > 0"
553
554        if X is None:
555            if self.col_sample == 1:
556                input_X = self.X_
557            else:
558                n_features = self.X_.shape[1]
559                new_n_features = int(np.ceil(n_features * self.col_sample))
560                assert (
561                    new_n_features >= 1
562                ), "check class attribute 'col_sample' and the number of covariates provided for X"
563                np.random.seed(self.seed)
564                index_col = np.random.choice(
565                    range(n_features), size=new_n_features, replace=False
566                )
567                self.index_col_ = index_col
568                input_X = self.X_[:, self.index_col_]
569
570        else:  # X is not None # keep X vs self.X_
571            if isinstance(X, pd.DataFrame):
572                X = copy.deepcopy(X.values.astype(float))
573
574            if self.col_sample == 1:
575                input_X = X
576            else:
577                n_features = X.shape[1]
578                new_n_features = int(np.ceil(n_features * self.col_sample))
579                assert (
580                    new_n_features >= 1
581                ), "check class attribute 'col_sample' and the number of covariates provided for X"
582                np.random.seed(self.seed)
583                index_col = np.random.choice(
584                    range(n_features), size=new_n_features, replace=False
585                )
586                self.index_col_ = index_col
587                input_X = X[:, self.index_col_]
588
589        if self.n_clusters <= 0:
590            # data without any clustering: self.n_clusters is None -----
591
592            if self.n_hidden_features > 0:  # with hidden layer
593                self.nn_scaler_, scaled_X = mo.scale_covariates(
594                    input_X, choice=self.type_scaling[1], scaler=self.nn_scaler_
595                )
596                Phi_X = (
597                    self.create_layer(scaled_X)
598                    if W is None
599                    else self.create_layer(scaled_X, W=W)
600                )
601                Z = (
602                    mo.cbind(input_X, Phi_X, backend=self.backend)
603                    if self.direct_link is True
604                    else Phi_X
605                )
606                self.scaler_, scaled_Z = mo.scale_covariates(
607                    Z, choice=self.type_scaling[0], scaler=self.scaler_
608                )
609            else:  # no hidden layer
610                Z = input_X
611                self.scaler_, scaled_Z = mo.scale_covariates(
612                    Z, choice=self.type_scaling[0], scaler=self.scaler_
613                )
614
615        else:
616            # data with clustering: self.n_clusters is not None ----- # keep
617
618            augmented_X = mo.cbind(
619                input_X,
620                self.encode_clusters(input_X, **kwargs),
621                backend=self.backend,
622            )
623
624            if self.n_hidden_features > 0:  # with hidden layer
625                self.nn_scaler_, scaled_X = mo.scale_covariates(
626                    augmented_X,
627                    choice=self.type_scaling[1],
628                    scaler=self.nn_scaler_,
629                )
630                Phi_X = (
631                    self.create_layer(scaled_X)
632                    if W is None
633                    else self.create_layer(scaled_X, W=W)
634                )
635                Z = (
636                    mo.cbind(augmented_X, Phi_X, backend=self.backend)
637                    if self.direct_link is True
638                    else Phi_X
639                )
640                self.scaler_, scaled_Z = mo.scale_covariates(
641                    Z, choice=self.type_scaling[0], scaler=self.scaler_
642                )
643            else:  # no hidden layer
644                Z = augmented_X
645                self.scaler_, scaled_Z = mo.scale_covariates(
646                    Z, choice=self.type_scaling[0], scaler=self.scaler_
647                )
648
649        # Returning model inputs -----
650        if mx.is_factor(y) is False:  # regression
651            # center y
652            if y is None:
653                self.y_mean_, centered_y = mo.center_response(self.y_)
654            else:
655                self.y_mean_, centered_y = mo.center_response(y)
656
657            # y is subsampled
658            if self.row_sample < 1:
659                n, p = Z.shape
660
661                self.subsampler_ = (
662                    SubSampler(
663                        y=self.y_, row_sample=self.row_sample, seed=self.seed
664                    )
665                    if y is None
666                    else SubSampler(
667                        y=y, row_sample=self.row_sample, seed=self.seed
668                    )
669                )
670
671                self.index_row_ = self.subsampler_.subsample()
672
673                n_row_sample = len(self.index_row_)
674                # regression
675                return (
676                    centered_y[self.index_row_].reshape(n_row_sample),
677                    self.scaler_.transform(
678                        Z[self.index_row_, :].reshape(n_row_sample, p)
679                    ),
680                )
681            # y is not subsampled
682            # regression
683            return (centered_y, self.scaler_.transform(Z))
684
685        # classification
686        # y is subsampled
687        if self.row_sample < 1:
688            n, p = Z.shape
689
690            self.subsampler_ = (
691                SubSampler(
692                    y=self.y_, row_sample=self.row_sample, seed=self.seed
693                )
694                if y is None
695                else SubSampler(y=y, row_sample=self.row_sample, seed=self.seed)
696            )
697
698            self.index_row_ = self.subsampler_.subsample()
699
700            n_row_sample = len(self.index_row_)
701            # classification
702            return (
703                y[self.index_row_].reshape(n_row_sample),
704                self.scaler_.transform(
705                    Z[self.index_row_, :].reshape(n_row_sample, p)
706                ),
707            )
708        # y is not subsampled
709        # classification
710        return (y, self.scaler_.transform(Z))

Create new hidden features for training set, with hidden layer, center the response.

Parameters:

y: array-like, shape = [n_samples]
    Target values

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features

W: {array-like}, shape = [n_features, hidden_features]
    if provided, constructs the hidden layer via W

Returns:

(centered response, direct link + hidden layer matrix): {tuple}
def cook_test_set(self, X, **kwargs):
712    def cook_test_set(self, X, **kwargs):
713        """Transform data from test set, with hidden layer.
714
715        Parameters:
716
717            X: {array-like}, shape = [n_samples, n_features]
718                Training vectors, where n_samples is the number
719                of samples and n_features is the number of features
720
721            **kwargs: additional parameters to be passed to self.encode_cluster
722
723        Returns:
724
725            Transformed test set : {array-like}
726        """
727
728        if isinstance(X, pd.DataFrame):
729            X = copy.deepcopy(X.values.astype(float))
730
731        if len(X.shape) == 1:
732            X = X.reshape(1, -1)
733
734        if (
735            self.n_clusters == 0
736        ):  # data without clustering: self.n_clusters is None -----
737            if self.n_hidden_features > 0:
738                # if hidden layer
739                scaled_X = (
740                    self.nn_scaler_.transform(X)
741                    if (self.col_sample == 1)
742                    else self.nn_scaler_.transform(X[:, self.index_col_])
743                )
744                Phi_X = self.create_layer(scaled_X, self.W_)
745                if self.direct_link:
746                    return self.scaler_.transform(
747                        mo.cbind(scaled_X, Phi_X, backend=self.backend)
748                    )
749                # when self.direct_link == False
750                return self.scaler_.transform(Phi_X)
751            # if no hidden layer # self.n_hidden_features == 0
752            return self.scaler_.transform(X)
753
754        # data with clustering: self.n_clusters > 0 -----
755        if self.col_sample == 1:
756            predicted_clusters = self.encode_clusters(
757                X=X, predict=True, **kwargs
758            )
759            augmented_X = mo.cbind(X, predicted_clusters, backend=self.backend)
760        else:
761            predicted_clusters = self.encode_clusters(
762                X=X[:, self.index_col_], predict=True, **kwargs
763            )
764            augmented_X = mo.cbind(
765                X[:, self.index_col_], predicted_clusters, backend=self.backend
766            )
767
768        if self.n_hidden_features > 0:  # if hidden layer
769            scaled_X = self.nn_scaler_.transform(augmented_X)
770            Phi_X = self.create_layer(scaled_X, self.W_)
771            if self.direct_link:
772                return self.scaler_.transform(
773                    mo.cbind(augmented_X, Phi_X, backend=self.backend)
774                )
775            return self.scaler_.transform(Phi_X)
776
777        # if no hidden layer
778        return self.scaler_.transform(augmented_X)

Transform data from test set, with hidden layer.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features

**kwargs: additional parameters to be passed to self.encode_cluster

Returns:

Transformed test set : {array-like}
class BaseRegressor(nnetsauce.Base, sklearn.base.RegressorMixin):
 15class BaseRegressor(Base, RegressorMixin):
 16    """Random Vector Functional Link Network regression without shrinkage
 17
 18    Parameters:
 19
 20        n_hidden_features: int
 21            number of nodes in the hidden layer
 22
 23        activation_name: str
 24            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
 25
 26        a: float
 27            hyperparameter for 'prelu' or 'elu' activation function
 28
 29        nodes_sim: str
 30            type of simulation for hidden layer nodes: 'sobol', 'hammersley', 'halton',
 31            'uniform'
 32
 33        bias: boolean
 34            indicates if the hidden layer contains a bias term (True) or
 35            not (False)
 36
 37        dropout: float
 38            regularization parameter; (random) percentage of nodes dropped out
 39            of the training
 40
 41        direct_link: boolean
 42            indicates if the original features are included (True) in model's
 43            fitting or not (False)
 44
 45        n_clusters: int
 46            number of clusters for type_clust='kmeans' or type_clust='gmm'
 47            clustering (could be 0: no clustering)
 48
 49        cluster_encode: bool
 50            defines how the variable containing clusters is treated (default is one-hot);
 51            if `False`, then labels are used, without one-hot encoding
 52
 53        type_clust: str
 54            type of clustering method: currently k-means ('kmeans') or Gaussian
 55            Mixture Model ('gmm')
 56
 57        type_scaling: a tuple of 3 strings
 58            scaling methods for inputs, hidden layer, and clustering respectively
 59            (and when relevant).
 60            Currently available: standardization ('std') or MinMax scaling ('minmax')
 61
 62        col_sample: float
 63            percentage of features randomly chosen for training
 64
 65        row_sample: float
 66            percentage of rows chosen for training, by stratified bootstrapping
 67
 68        seed: int
 69            reproducibility seed for nodes_sim=='uniform', clustering and dropout
 70
 71        backend: str
 72            "cpu" or "gpu" or "tpu"
 73
 74    Attributes:
 75
 76        beta_: vector
 77            regression coefficients
 78
 79        GCV_: float
 80            Generalized Cross-Validation error
 81
 82    """
 83
 84    # construct the object -----
 85
 86    def __init__(
 87        self,
 88        n_hidden_features=5,
 89        activation_name="relu",
 90        a=0.01,
 91        nodes_sim="sobol",
 92        bias=True,
 93        dropout=0,
 94        direct_link=True,
 95        n_clusters=2,
 96        cluster_encode=True,
 97        type_clust="kmeans",
 98        type_scaling=("std", "std", "std"),
 99        col_sample=1,
100        row_sample=1,
101        seed=123,
102        backend="cpu",
103    ):
104        super().__init__(
105            n_hidden_features=n_hidden_features,
106            activation_name=activation_name,
107            a=a,
108            nodes_sim=nodes_sim,
109            bias=bias,
110            dropout=dropout,
111            direct_link=direct_link,
112            n_clusters=n_clusters,
113            cluster_encode=cluster_encode,
114            type_clust=type_clust,
115            type_scaling=type_scaling,
116            col_sample=col_sample,
117            row_sample=row_sample,
118            seed=seed,
119            backend=backend,
120        )
121
122    def fit(self, X, y, **kwargs):
123        """Fit BaseRegressor to training data (X, y)
124
125        Parameters:
126
127            X: {array-like}, shape = [n_samples, n_features]
128                Training vectors, where n_samples is the number
129                of samples and n_features is the number of features
130
131            y: array-like, shape = [n_samples]
132                Target values
133
134            **kwargs: additional parameters to be passed to self.cook_training_set
135
136        Returns:
137
138            self: object
139        """
140
141        centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
142
143        fit_obj = lmf.beta_Sigma_hat(
144            X=scaled_Z, y=centered_y, backend=self.backend
145        )
146
147        self.beta_ = fit_obj["beta_hat"]
148
149        self.GCV_ = fit_obj["GCV"]
150
151        return self
152
153    def predict(self, X, **kwargs):
154        """Predict test data X.
155
156        Parameters:
157
158            X: {array-like}, shape = [n_samples, n_features]
159                Training vectors, where n_samples is the number
160                of samples and n_features is the number of features
161
162            **kwargs: additional parameters to be passed to self.cook_test_set
163
164        Returns:
165
166            model predictions: {array-like}
167        """
168
169        if len(X.shape) == 1:
170            n_features = X.shape[0]
171            new_X = mo.rbind(
172                X.reshape(1, n_features),
173                np.ones(n_features).reshape(1, n_features),
174            )
175
176            return (
177                self.y_mean_
178                + mo.safe_sparse_dot(
179                    a=self.cook_test_set(new_X, **kwargs),
180                    b=self.beta_,
181                    backend=self.backend,
182                )
183            )[0]
184
185        return self.y_mean_ + mo.safe_sparse_dot(
186            a=self.cook_test_set(X, **kwargs),
187            b=self.beta_,
188            backend=self.backend,
189        )

Random Vector Functional Link Network regression without shrinkage

Parameters:

n_hidden_features: int
    number of nodes in the hidden layer

activation_name: str
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'

a: float
    hyperparameter for 'prelu' or 'elu' activation function

nodes_sim: str
    type of simulation for hidden layer nodes: 'sobol', 'hammersley', 'halton',
    'uniform'

bias: boolean
    indicates if the hidden layer contains a bias term (True) or
    not (False)

dropout: float
    regularization parameter; (random) percentage of nodes dropped out
    of the training

direct_link: boolean
    indicates if the original features are included (True) in model's
    fitting or not (False)

n_clusters: int
    number of clusters for type_clust='kmeans' or type_clust='gmm'
    clustering (could be 0: no clustering)

cluster_encode: bool
    defines how the variable containing clusters is treated (default is one-hot);
    if `False`, then labels are used, without one-hot encoding

type_clust: str
    type of clustering method: currently k-means ('kmeans') or Gaussian
    Mixture Model ('gmm')

type_scaling: a tuple of 3 strings
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax')

col_sample: float
    percentage of features randomly chosen for training

row_sample: float
    percentage of rows chosen for training, by stratified bootstrapping

seed: int
    reproducibility seed for nodes_sim=='uniform', clustering and dropout

backend: str
    "cpu" or "gpu" or "tpu"

Attributes:

beta_: vector
    regression coefficients

GCV_: float
    Generalized Cross-Validation error
def fit(self, X, y, **kwargs):
122    def fit(self, X, y, **kwargs):
123        """Fit BaseRegressor to training data (X, y)
124
125        Parameters:
126
127            X: {array-like}, shape = [n_samples, n_features]
128                Training vectors, where n_samples is the number
129                of samples and n_features is the number of features
130
131            y: array-like, shape = [n_samples]
132                Target values
133
134            **kwargs: additional parameters to be passed to self.cook_training_set
135
136        Returns:
137
138            self: object
139        """
140
141        centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
142
143        fit_obj = lmf.beta_Sigma_hat(
144            X=scaled_Z, y=centered_y, backend=self.backend
145        )
146
147        self.beta_ = fit_obj["beta_hat"]
148
149        self.GCV_ = fit_obj["GCV"]
150
151        return self

Fit BaseRegressor to training data (X, y)

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features

y: array-like, shape = [n_samples]
    Target values

**kwargs: additional parameters to be passed to self.cook_training_set

Returns:

self: object
def predict(self, X, **kwargs):
153    def predict(self, X, **kwargs):
154        """Predict test data X.
155
156        Parameters:
157
158            X: {array-like}, shape = [n_samples, n_features]
159                Training vectors, where n_samples is the number
160                of samples and n_features is the number of features
161
162            **kwargs: additional parameters to be passed to self.cook_test_set
163
164        Returns:
165
166            model predictions: {array-like}
167        """
168
169        if len(X.shape) == 1:
170            n_features = X.shape[0]
171            new_X = mo.rbind(
172                X.reshape(1, n_features),
173                np.ones(n_features).reshape(1, n_features),
174            )
175
176            return (
177                self.y_mean_
178                + mo.safe_sparse_dot(
179                    a=self.cook_test_set(new_X, **kwargs),
180                    b=self.beta_,
181                    backend=self.backend,
182                )
183            )[0]
184
185        return self.y_mean_ + mo.safe_sparse_dot(
186            a=self.cook_test_set(X, **kwargs),
187            b=self.beta_,
188            backend=self.backend,
189        )

Predict test data X.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features

**kwargs: additional parameters to be passed to self.cook_test_set

Returns:

model predictions: {array-like}
class BayesianRVFLRegressor(nnetsauce.Base, sklearn.base.RegressorMixin):
 15class BayesianRVFLRegressor(Base, RegressorMixin):
 16    """Bayesian Random Vector Functional Link Network regression with one prior
 17
 18    Parameters:
 19
 20        n_hidden_features: int
 21            number of nodes in the hidden layer
 22
 23        activation_name: str
 24            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
 25
 26        a: float
 27            hyperparameter for 'prelu' or 'elu' activation function
 28
 29        nodes_sim: str
 30            type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 'uniform'
 31
 32        bias: boolean
 33            indicates if the hidden layer contains a bias term (True) or not (False)
 34
 35        dropout: float
 36            regularization parameter; (random) percentage of nodes dropped out
 37            of the training
 38
 39        direct_link: boolean
 40            indicates if the original features are included (True) in model''s fitting or not (False)
 41
 42        n_clusters: int
 43            number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering)
 44
 45        cluster_encode: bool
 46            defines how the variable containing clusters is treated (default is one-hot)
 47            if `False`, then labels are used, without one-hot encoding
 48
 49        type_clust: str
 50            type of clustering method: currently k-means ('kmeans') or Gaussian Mixture Model ('gmm')
 51
 52        type_scaling: a tuple of 3 strings
 53            scaling methods for inputs, hidden layer, and clustering respectively
 54            (and when relevant).
 55            Currently available: standardization ('std') or MinMax scaling ('minmax')
 56
 57        seed: int
 58            reproducibility seed for nodes_sim=='uniform'
 59
 60        s: float
 61            std. dev. of regression parameters in Bayesian Ridge Regression
 62
 63        sigma: float
 64            std. dev. of residuals in Bayesian Ridge Regression
 65
 66        return_std: boolean
 67            if True, uncertainty around predictions is evaluated
 68
 69        backend: str
 70            "cpu" or "gpu" or "tpu"
 71
 72    Attributes:
 73
 74        beta_: array-like
 75            regression''s coefficients
 76
 77        Sigma_: array-like
 78            covariance of the distribution of fitted parameters
 79
 80        GCV_: float
 81            Generalized cross-validation error
 82
 83        y_mean_: float
 84            average response
 85
 86    Examples:
 87
 88    ```python
 89    TBD
 90    ```
 91
 92    """
 93
 94    # construct the object -----
 95
 96    def __init__(
 97        self,
 98        n_hidden_features=5,
 99        activation_name="relu",
100        a=0.01,
101        nodes_sim="sobol",
102        bias=True,
103        dropout=0,
104        direct_link=True,
105        n_clusters=2,
106        cluster_encode=True,
107        type_clust="kmeans",
108        type_scaling=("std", "std", "std"),
109        seed=123,
110        s=0.1,
111        sigma=0.05,
112        return_std=True,
113        backend="cpu",
114    ):
115        super().__init__(
116            n_hidden_features=n_hidden_features,
117            activation_name=activation_name,
118            a=a,
119            nodes_sim=nodes_sim,
120            bias=bias,
121            dropout=dropout,
122            direct_link=direct_link,
123            n_clusters=n_clusters,
124            cluster_encode=cluster_encode,
125            type_clust=type_clust,
126            type_scaling=type_scaling,
127            seed=seed,
128            backend=backend,
129        )
130        self.s = s
131        self.sigma = sigma
132        self.beta_ = None
133        self.Sigma_ = None
134        self.GCV_ = None
135        self.return_std = return_std
136
137    def fit(self, X, y, **kwargs):
138        """Fit BayesianRVFLRegressor to training data (X, y).
139
140        Parameters:
141
142            X: {array-like}, shape = [n_samples, n_features]
143                Training vectors, where n_samples is the number
144                of samples and n_features is the number of features.
145
146            y: array-like, shape = [n_samples]
147                Target values.
148
149            **kwargs: additional parameters to be passed to
150                    self.cook_training_set
151
152        Returns:
153
154            self: object
155
156        """
157
158        centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
159
160        fit_obj = lmf.beta_Sigma_hat_rvfl(
161            X=scaled_Z,
162            y=centered_y,
163            s=self.s,
164            sigma=self.sigma,
165            fit_intercept=False,
166            return_cov=self.return_std,
167            backend=self.backend,
168        )
169
170        self.beta_ = fit_obj["beta_hat"]
171
172        if self.return_std == True:
173            self.Sigma_ = fit_obj["Sigma_hat"]
174
175        self.GCV_ = fit_obj["GCV"]
176
177        return self
178
179    def predict(self, X, return_std=False, **kwargs):
180        """Predict test data X.
181
182        Parameters:
183
184            X: {array-like}, shape = [n_samples, n_features]
185                Training vectors, where n_samples is the number
186                of samples and n_features is the number of features.
187
188            return_std: {boolean}, standard dev. is returned or not
189
190            **kwargs: additional parameters to be passed to
191                    self.cook_test_set
192
193        Returns:
194
195            model predictions: {array-like}
196
197        """
198
199        if len(X.shape) == 1:  # one observation in the test set only
200            n_features = X.shape[0]
201            new_X = mo.rbind(
202                x=X.reshape(1, n_features),
203                y=np.ones(n_features).reshape(1, n_features),
204                backend=self.backend,
205            )
206
207        self.return_std = return_std
208
209        if self.return_std == False:
210            if len(X.shape) == 1:
211                return (
212                    self.y_mean_
213                    + mo.safe_sparse_dot(
214                        a=self.cook_test_set(new_X, **kwargs),
215                        b=self.beta_,
216                        backend=self.backend,
217                    )
218                )[0]
219
220            return self.y_mean_ + mo.safe_sparse_dot(
221                a=self.cook_test_set(X, **kwargs),
222                b=self.beta_,
223                backend=self.backend,
224            )
225
226        else:  # confidence interval required for preds?
227            if len(X.shape) == 1:
228                Z = self.cook_test_set(new_X, **kwargs)
229
230                pred_obj = lmf.beta_Sigma_hat_rvfl(
231                    s=self.s,
232                    sigma=self.sigma,
233                    X_star=Z,
234                    return_cov=True,
235                    beta_hat_=self.beta_,
236                    Sigma_hat_=self.Sigma_,
237                    backend=self.backend,
238                )
239
240                return (
241                    self.y_mean_ + pred_obj["preds"][0],
242                    pred_obj["preds_std"][0],
243                )
244
245            Z = self.cook_test_set(X, **kwargs)
246
247            pred_obj = lmf.beta_Sigma_hat_rvfl(
248                s=self.s,
249                sigma=self.sigma,
250                X_star=Z,
251                return_cov=True,
252                beta_hat_=self.beta_,
253                Sigma_hat_=self.Sigma_,
254                backend=self.backend,
255            )
256
257            return (self.y_mean_ + pred_obj["preds"], pred_obj["preds_std"])

Bayesian Random Vector Functional Link Network regression with one prior

Parameters:

n_hidden_features: int
    number of nodes in the hidden layer

activation_name: str
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'

a: float
    hyperparameter for 'prelu' or 'elu' activation function

nodes_sim: str
    type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 'uniform'

bias: boolean
    indicates if the hidden layer contains a bias term (True) or not (False)

dropout: float
    regularization parameter; (random) percentage of nodes dropped out
    of the training

direct_link: boolean
    indicates if the original features are included (True) in model''s fitting or not (False)

n_clusters: int
    number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering)

cluster_encode: bool
    defines how the variable containing clusters is treated (default is one-hot)
    if `False`, then labels are used, without one-hot encoding

type_clust: str
    type of clustering method: currently k-means ('kmeans') or Gaussian Mixture Model ('gmm')

type_scaling: a tuple of 3 strings
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax')

seed: int
    reproducibility seed for nodes_sim=='uniform'

s: float
    std. dev. of regression parameters in Bayesian Ridge Regression

sigma: float
    std. dev. of residuals in Bayesian Ridge Regression

return_std: boolean
    if True, uncertainty around predictions is evaluated

backend: str
    "cpu" or "gpu" or "tpu"

Attributes:

beta_: array-like
    regression''s coefficients

Sigma_: array-like
    covariance of the distribution of fitted parameters

GCV_: float
    Generalized cross-validation error

y_mean_: float
    average response

Examples:

TBD
def fit(self, X, y, **kwargs):
137    def fit(self, X, y, **kwargs):
138        """Fit BayesianRVFLRegressor to training data (X, y).
139
140        Parameters:
141
142            X: {array-like}, shape = [n_samples, n_features]
143                Training vectors, where n_samples is the number
144                of samples and n_features is the number of features.
145
146            y: array-like, shape = [n_samples]
147                Target values.
148
149            **kwargs: additional parameters to be passed to
150                    self.cook_training_set
151
152        Returns:
153
154            self: object
155
156        """
157
158        centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
159
160        fit_obj = lmf.beta_Sigma_hat_rvfl(
161            X=scaled_Z,
162            y=centered_y,
163            s=self.s,
164            sigma=self.sigma,
165            fit_intercept=False,
166            return_cov=self.return_std,
167            backend=self.backend,
168        )
169
170        self.beta_ = fit_obj["beta_hat"]
171
172        if self.return_std == True:
173            self.Sigma_ = fit_obj["Sigma_hat"]
174
175        self.GCV_ = fit_obj["GCV"]
176
177        return self

Fit BayesianRVFLRegressor to training data (X, y).

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

**kwargs: additional parameters to be passed to
        self.cook_training_set

Returns:

self: object
def predict(self, X, return_std=False, **kwargs):
179    def predict(self, X, return_std=False, **kwargs):
180        """Predict test data X.
181
182        Parameters:
183
184            X: {array-like}, shape = [n_samples, n_features]
185                Training vectors, where n_samples is the number
186                of samples and n_features is the number of features.
187
188            return_std: {boolean}, standard dev. is returned or not
189
190            **kwargs: additional parameters to be passed to
191                    self.cook_test_set
192
193        Returns:
194
195            model predictions: {array-like}
196
197        """
198
199        if len(X.shape) == 1:  # one observation in the test set only
200            n_features = X.shape[0]
201            new_X = mo.rbind(
202                x=X.reshape(1, n_features),
203                y=np.ones(n_features).reshape(1, n_features),
204                backend=self.backend,
205            )
206
207        self.return_std = return_std
208
209        if self.return_std == False:
210            if len(X.shape) == 1:
211                return (
212                    self.y_mean_
213                    + mo.safe_sparse_dot(
214                        a=self.cook_test_set(new_X, **kwargs),
215                        b=self.beta_,
216                        backend=self.backend,
217                    )
218                )[0]
219
220            return self.y_mean_ + mo.safe_sparse_dot(
221                a=self.cook_test_set(X, **kwargs),
222                b=self.beta_,
223                backend=self.backend,
224            )
225
226        else:  # confidence interval required for preds?
227            if len(X.shape) == 1:
228                Z = self.cook_test_set(new_X, **kwargs)
229
230                pred_obj = lmf.beta_Sigma_hat_rvfl(
231                    s=self.s,
232                    sigma=self.sigma,
233                    X_star=Z,
234                    return_cov=True,
235                    beta_hat_=self.beta_,
236                    Sigma_hat_=self.Sigma_,
237                    backend=self.backend,
238                )
239
240                return (
241                    self.y_mean_ + pred_obj["preds"][0],
242                    pred_obj["preds_std"][0],
243                )
244
245            Z = self.cook_test_set(X, **kwargs)
246
247            pred_obj = lmf.beta_Sigma_hat_rvfl(
248                s=self.s,
249                sigma=self.sigma,
250                X_star=Z,
251                return_cov=True,
252                beta_hat_=self.beta_,
253                Sigma_hat_=self.Sigma_,
254                backend=self.backend,
255            )
256
257            return (self.y_mean_ + pred_obj["preds"], pred_obj["preds_std"])

Predict test data X.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

return_std: {boolean}, standard dev. is returned or not

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

model predictions: {array-like}
class BayesianRVFL2Regressor(nnetsauce.Base, sklearn.base.RegressorMixin):
 15class BayesianRVFL2Regressor(Base, RegressorMixin):
 16    """Bayesian Random Vector Functional Link Network regression with two priors
 17
 18    Parameters:
 19
 20        n_hidden_features: int
 21            number of nodes in the hidden layer
 22
 23        activation_name: str
 24            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
 25
 26        a: float
 27            hyperparameter for 'prelu' or 'elu' activation function
 28
 29        nodes_sim: str
 30            type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 'uniform'
 31
 32        bias: boolean
 33            indicates if the hidden layer contains a bias term (True) or not (False)
 34
 35        dropout: float
 36            regularization parameter; (random) percentage of nodes dropped out
 37            of the training
 38
 39        direct_link: boolean
 40            indicates if the original features are included (True) in model''s fitting or not (False)
 41
 42        n_clusters: int
 43            number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering)
 44
 45        cluster_encode: bool
 46            defines how the variable containing clusters is treated (default is one-hot)
 47            if `False`, then labels are used, without one-hot encoding
 48
 49        type_clust: str
 50            type of clustering method: currently k-means ('kmeans') or Gaussian Mixture Model ('gmm')
 51
 52        type_scaling: a tuple of 3 strings
 53            scaling methods for inputs, hidden layer, and clustering respectively
 54            (and when relevant).
 55            Currently available: standardization ('std') or MinMax scaling ('minmax')
 56
 57        seed: int
 58            reproducibility seed for nodes_sim=='uniform'
 59
 60        s1: float
 61            std. dev. of init. regression parameters in Bayesian Ridge Regression
 62
 63        s2: float
 64            std. dev. of augmented regression parameters in Bayesian Ridge Regression
 65
 66        sigma: float
 67            std. dev. of residuals in Bayesian Ridge Regression
 68
 69        return_std: boolean
 70            if True, uncertainty around predictions is evaluated
 71
 72        backend: str
 73            "cpu" or "gpu" or "tpu"
 74
 75    Attributes:
 76
 77        beta_: array-like
 78            regression''s coefficients
 79
 80        Sigma_: array-like
 81            covariance of the distribution of fitted parameters
 82
 83        GCV_: float
 84            Generalized cross-validation error
 85
 86        y_mean_: float
 87            average response
 88
 89    Examples:
 90
 91    ```python
 92    TBD
 93    ```
 94
 95    """
 96
 97    # construct the object -----
 98
 99    def __init__(
100        self,
101        n_hidden_features=5,
102        activation_name="relu",
103        a=0.01,
104        nodes_sim="sobol",
105        bias=True,
106        dropout=0,
107        direct_link=True,
108        n_clusters=0,
109        cluster_encode=True,
110        type_clust="kmeans",
111        type_scaling=("std", "std", "std"),
112        seed=123,
113        s1=0.1,
114        s2=0.1,
115        sigma=0.05,
116        return_std=True,
117        backend="cpu",
118    ):
119        super().__init__(
120            n_hidden_features=n_hidden_features,
121            activation_name=activation_name,
122            a=a,
123            nodes_sim=nodes_sim,
124            bias=bias,
125            dropout=dropout,
126            direct_link=direct_link,
127            n_clusters=n_clusters,
128            cluster_encode=cluster_encode,
129            type_clust=type_clust,
130            type_scaling=type_scaling,
131            seed=seed,
132            backend=backend,
133        )
134
135        self.s1 = s1
136        self.s2 = s2
137        self.sigma = sigma
138        self.beta_ = None
139        self.Sigma_ = None
140        self.GCV_ = None
141        self.return_std = return_std
142        self.coef_ = None
143
144    def fit(self, X, y, **kwargs):
145        """Fit BayesianRVFL2Regressor to training data (X, y)
146
147        Parameters:
148
149            X: {array-like}, shape = [n_samples, n_features]
150                Training vectors, where n_samples is the number
151                of samples and n_features is the number of features
152
153            y: array-like, shape = [n_samples]
154                Target values
155
156            **kwargs: additional parameters to be passed to
157                    self.cook_training_set
158
159        Returns:
160
161            self: object
162
163        """
164
165        centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
166
167        n, p = X.shape
168        q = self.n_hidden_features
169
170        if self.direct_link == True:
171            r = p + self.n_clusters
172
173            block11 = (self.s1**2) * np.eye(r)
174            block12 = np.zeros((r, q))
175            block21 = np.zeros((q, r))
176            block22 = (self.s2**2) * np.eye(q)
177
178            Sigma_prior = mo.rbind(
179                x=mo.cbind(x=block11, y=block12, backend=self.backend),
180                y=mo.cbind(x=block21, y=block22, backend=self.backend),
181                backend=self.backend,
182            )
183
184        else:
185            Sigma_prior = (self.s2**2) * np.eye(q)
186
187        fit_obj = lmf.beta_Sigma_hat_rvfl2(
188            X=scaled_Z,
189            y=centered_y,
190            Sigma=Sigma_prior,
191            sigma=self.sigma,
192            fit_intercept=False,
193            return_cov=self.return_std,
194            backend=self.backend,
195        )
196
197        self.beta_ = fit_obj["beta_hat"]
198
199        self.coef_ = self.beta_
200
201        if self.return_std == True:
202            self.Sigma_ = fit_obj["Sigma_hat"]
203
204        self.GCV_ = fit_obj["GCV"]
205
206        return self
207
208    def predict(self, X, return_std=False, **kwargs):
209        """Predict test data X.
210
211        Parameters:
212
213            X: {array-like}, shape = [n_samples, n_features]
214                Training vectors, where n_samples is the number
215                of samples and n_features is the number of features.
216
217            return_std: {boolean}, standard dev. is returned or not
218
219            **kwargs: additional parameters to be passed to
220                    self.cook_test_set
221
222        Returns:
223
224            model predictions: {array-like}
225
226        """
227
228        if len(X.shape) == 1:  # one observation in the test set only
229            n_features = X.shape[0]
230            new_X = mo.rbind(
231                x=X.reshape(1, n_features),
232                y=np.ones(n_features).reshape(1, n_features),
233                backend=self.backend,
234            )
235
236        self.return_std = return_std
237
238        if self.return_std == False:
239            if len(X.shape) == 1:
240                return (
241                    self.y_mean_
242                    + mo.safe_sparse_dot(
243                        self.cook_test_set(new_X, **kwargs),
244                        self.beta_,
245                        backend=self.backend,
246                    )
247                )[0]
248
249            return self.y_mean_ + mo.safe_sparse_dot(
250                self.cook_test_set(X, **kwargs),
251                self.beta_,
252                backend=self.backend,
253            )
254
255        else:  # confidence interval required for preds?
256            if len(X.shape) == 1:
257                Z = self.cook_test_set(new_X, **kwargs)
258
259                pred_obj = lmf.beta_Sigma_hat_rvfl2(
260                    X_star=Z,
261                    return_cov=self.return_std,
262                    beta_hat_=self.beta_,
263                    Sigma_hat_=self.Sigma_,
264                    backend=self.backend,
265                )
266
267                return (
268                    self.y_mean_ + pred_obj["preds"][0],
269                    pred_obj["preds_std"][0],
270                )
271
272            Z = self.cook_test_set(X, **kwargs)
273
274            pred_obj = lmf.beta_Sigma_hat_rvfl2(
275                X_star=Z,
276                return_cov=self.return_std,
277                beta_hat_=self.beta_,
278                Sigma_hat_=self.Sigma_,
279                backend=self.backend,
280            )
281
282            return (self.y_mean_ + pred_obj["preds"], pred_obj["preds_std"])

Bayesian Random Vector Functional Link Network regression with two priors

Parameters:

n_hidden_features: int
    number of nodes in the hidden layer

activation_name: str
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'

a: float
    hyperparameter for 'prelu' or 'elu' activation function

nodes_sim: str
    type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 'uniform'

bias: boolean
    indicates if the hidden layer contains a bias term (True) or not (False)

dropout: float
    regularization parameter; (random) percentage of nodes dropped out
    of the training

direct_link: boolean
    indicates if the original features are included (True) in model''s fitting or not (False)

n_clusters: int
    number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering)

cluster_encode: bool
    defines how the variable containing clusters is treated (default is one-hot)
    if `False`, then labels are used, without one-hot encoding

type_clust: str
    type of clustering method: currently k-means ('kmeans') or Gaussian Mixture Model ('gmm')

type_scaling: a tuple of 3 strings
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax')

seed: int
    reproducibility seed for nodes_sim=='uniform'

s1: float
    std. dev. of init. regression parameters in Bayesian Ridge Regression

s2: float
    std. dev. of augmented regression parameters in Bayesian Ridge Regression

sigma: float
    std. dev. of residuals in Bayesian Ridge Regression

return_std: boolean
    if True, uncertainty around predictions is evaluated

backend: str
    "cpu" or "gpu" or "tpu"

Attributes:

beta_: array-like
    regression''s coefficients

Sigma_: array-like
    covariance of the distribution of fitted parameters

GCV_: float
    Generalized cross-validation error

y_mean_: float
    average response

Examples:

TBD
def fit(self, X, y, **kwargs):
144    def fit(self, X, y, **kwargs):
145        """Fit BayesianRVFL2Regressor to training data (X, y)
146
147        Parameters:
148
149            X: {array-like}, shape = [n_samples, n_features]
150                Training vectors, where n_samples is the number
151                of samples and n_features is the number of features
152
153            y: array-like, shape = [n_samples]
154                Target values
155
156            **kwargs: additional parameters to be passed to
157                    self.cook_training_set
158
159        Returns:
160
161            self: object
162
163        """
164
165        centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
166
167        n, p = X.shape
168        q = self.n_hidden_features
169
170        if self.direct_link == True:
171            r = p + self.n_clusters
172
173            block11 = (self.s1**2) * np.eye(r)
174            block12 = np.zeros((r, q))
175            block21 = np.zeros((q, r))
176            block22 = (self.s2**2) * np.eye(q)
177
178            Sigma_prior = mo.rbind(
179                x=mo.cbind(x=block11, y=block12, backend=self.backend),
180                y=mo.cbind(x=block21, y=block22, backend=self.backend),
181                backend=self.backend,
182            )
183
184        else:
185            Sigma_prior = (self.s2**2) * np.eye(q)
186
187        fit_obj = lmf.beta_Sigma_hat_rvfl2(
188            X=scaled_Z,
189            y=centered_y,
190            Sigma=Sigma_prior,
191            sigma=self.sigma,
192            fit_intercept=False,
193            return_cov=self.return_std,
194            backend=self.backend,
195        )
196
197        self.beta_ = fit_obj["beta_hat"]
198
199        self.coef_ = self.beta_
200
201        if self.return_std == True:
202            self.Sigma_ = fit_obj["Sigma_hat"]
203
204        self.GCV_ = fit_obj["GCV"]
205
206        return self

Fit BayesianRVFL2Regressor to training data (X, y)

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features

y: array-like, shape = [n_samples]
    Target values

**kwargs: additional parameters to be passed to
        self.cook_training_set

Returns:

self: object
def predict(self, X, return_std=False, **kwargs):
208    def predict(self, X, return_std=False, **kwargs):
209        """Predict test data X.
210
211        Parameters:
212
213            X: {array-like}, shape = [n_samples, n_features]
214                Training vectors, where n_samples is the number
215                of samples and n_features is the number of features.
216
217            return_std: {boolean}, standard dev. is returned or not
218
219            **kwargs: additional parameters to be passed to
220                    self.cook_test_set
221
222        Returns:
223
224            model predictions: {array-like}
225
226        """
227
228        if len(X.shape) == 1:  # one observation in the test set only
229            n_features = X.shape[0]
230            new_X = mo.rbind(
231                x=X.reshape(1, n_features),
232                y=np.ones(n_features).reshape(1, n_features),
233                backend=self.backend,
234            )
235
236        self.return_std = return_std
237
238        if self.return_std == False:
239            if len(X.shape) == 1:
240                return (
241                    self.y_mean_
242                    + mo.safe_sparse_dot(
243                        self.cook_test_set(new_X, **kwargs),
244                        self.beta_,
245                        backend=self.backend,
246                    )
247                )[0]
248
249            return self.y_mean_ + mo.safe_sparse_dot(
250                self.cook_test_set(X, **kwargs),
251                self.beta_,
252                backend=self.backend,
253            )
254
255        else:  # confidence interval required for preds?
256            if len(X.shape) == 1:
257                Z = self.cook_test_set(new_X, **kwargs)
258
259                pred_obj = lmf.beta_Sigma_hat_rvfl2(
260                    X_star=Z,
261                    return_cov=self.return_std,
262                    beta_hat_=self.beta_,
263                    Sigma_hat_=self.Sigma_,
264                    backend=self.backend,
265                )
266
267                return (
268                    self.y_mean_ + pred_obj["preds"][0],
269                    pred_obj["preds_std"][0],
270                )
271
272            Z = self.cook_test_set(X, **kwargs)
273
274            pred_obj = lmf.beta_Sigma_hat_rvfl2(
275                X_star=Z,
276                return_cov=self.return_std,
277                beta_hat_=self.beta_,
278                Sigma_hat_=self.Sigma_,
279                backend=self.backend,
280            )
281
282            return (self.y_mean_ + pred_obj["preds"], pred_obj["preds_std"])

Predict test data X.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

return_std: {boolean}, standard dev. is returned or not

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

model predictions: {array-like}
class ClassicalMTS(nnetsauce.MTS):
 42class ClassicalMTS(MTS):
 43    """Time series with statistical models (statsmodels), mostly for benchmarks
 44
 45    Parameters:
 46
 47        model: type of model: str.
 48            currently, 'VAR', 'VECM', 'ARIMA', 'ETS', 'Theta'
 49            Default is None
 50
 51        obj: object
 52            A time series model from statsmodels
 53
 54    Attributes:
 55
 56        df_: data frame
 57            the input data frame, in case a data.frame is provided to `fit`
 58
 59        level_: int
 60            level of confidence for prediction intervals (default is 95)
 61
 62    Examples:
 63    See examples/classical_mts_timeseries.py
 64    """
 65
 66    # construct the object -----
 67
 68    def __init__(self, model="VAR", obj=None):
 69        if obj is not None:
 70            self.model = None
 71            self.obj = obj
 72        else:
 73            self.model = model
 74            if self.model == "VAR":
 75                self.obj = VAR
 76            elif self.model == "VECM":
 77                self.obj = VECM
 78            elif self.model == "ARIMA":
 79                self.obj = ARIMA
 80            elif self.model == "ETS":
 81                self.obj = ExponentialSmoothing
 82            elif self.model == "Theta":
 83                self.obj = ThetaModel
 84            else:
 85                raise ValueError("model not recognized")
 86        self.n_series = None
 87        self.replications = None
 88        self.mean_ = None
 89        self.upper_ = None
 90        self.lower_ = None
 91        self.output_dates_ = None
 92        self.alpha_ = None
 93        self.df_ = None
 94        self.residuals_ = []
 95        self.sims_ = None
 96        self.level_ = None
 97
 98    def fit(self, X, **kwargs):
 99        """Fit ClassicalMTS model to training data X, with optional regressors xreg
100
101        Parameters:
102
103        X: {array-like}, shape = [n_samples, n_features]
104            Training time series, where n_samples is the number
105            of samples and n_features is the number of features;
106            X must be in increasing order (most recent observations last)
107
108        **kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity)
109
110        Returns:
111
112        self: object
113        """
114
115        try:
116            self.n_series = X.shape[1]
117        except Exception:
118            self.n_series = 1
119
120        if (isinstance(X, pd.DataFrame) is False) and isinstance(
121            X, pd.Series
122        ) is False:  # input data set is a numpy array
123            X = pd.DataFrame(X)
124            if self.n_series > 1:
125                self.series_names = [
126                    "series" + str(i) for i in range(X.shape[1])
127                ]
128            else:
129                self.series_names = "series0"
130
131        else:  # input data set is a DataFrame or Series with column names
132            X_index = None
133            if X.index is not None and len(X.shape) > 1:
134                X_index = X.index
135                X = copy.deepcopy(mo.convert_df_to_numeric(X))
136            if X_index is not None:
137                try:
138                    X.index = X_index
139                except Exception:
140                    pass
141            if isinstance(X, pd.DataFrame):
142                self.series_names = X.columns.tolist()
143            else:
144                self.series_names = X.name
145
146        if isinstance(X, pd.DataFrame) or isinstance(X, pd.Series):
147            self.df_ = X
148            X = X.values
149            self.df_.columns = self.series_names
150            self.input_dates = ts.compute_input_dates(self.df_)
151        else:
152            self.df_ = pd.DataFrame(X, columns=self.series_names)
153
154        if self.model == "Theta":
155            try:
156                self.obj = self.obj(self.df_, **kwargs).fit()
157            except Exception as e:
158                self.obj = self.obj(self.df_.values, **kwargs).fit()
159            self.residuals_ = None
160        else:
161            self.obj = self.obj(X, **kwargs).fit()
162            try:
163                self.residuals_ = self.obj.resid
164            except Exception as e:  # Theta
165                self.residuals_ = None
166
167        return self
168
169    def predict(self, h=5, level=95, **kwargs):
170        """Forecast all the time series, h steps ahead
171
172        Parameters:
173
174        h: {integer}
175            Forecasting horizon
176
177        **kwargs: additional parameters to be passed to
178                self.cook_test_set
179
180        Returns:
181
182        model predictions for horizon = h: {array-like}
183
184        """
185
186        self.output_dates_, frequency = ts.compute_output_dates(self.df_, h)
187        self.level_ = level
188        self.lower_ = None  # do not remove (/!\)
189        self.upper_ = None  # do not remove (/!\)
190        self.sims_ = None  # do not remove (/!\)
191        self.level_ = level
192        self.alpha_ = 100 - level
193
194        pi_multiplier = norm.ppf(1 - self.alpha_ / 200)
195
196        # Named tuple for forecast results
197        DescribeResult = namedtuple(
198            "DescribeResult", ("mean", "lower", "upper")
199        )
200
201        if (
202            self.obj is not None
203        ):  # try all the special cases of the else section (there's probably a better way)
204            try:
205                (
206                    mean_forecast,
207                    lower_bound,
208                    upper_bound,
209                ) = self.obj.forecast_interval(
210                    self.obj.endog, steps=h, alpha=self.alpha_ / 100, **kwargs
211                )
212
213            except Exception as e:
214                try:
215                    forecast_result = self.obj.predict(steps=h)
216                    mean_forecast = forecast_result
217                    (
218                        lower_bound,
219                        upper_bound,
220                    ) = self._compute_confidence_intervals(
221                        forecast_result, alpha=self.alpha_ / 100, **kwargs
222                    )
223
224                except Exception as e:
225                    try:
226                        forecast_result = self.obj.get_forecast(steps=h)
227                        mean_forecast = forecast_result.predicted_mean
228                        lower_bound = forecast_result.conf_int()[:, 0]
229                        upper_bound = forecast_result.conf_int()[:, 1]
230
231                    except Exception as e:
232                        try:
233                            forecast_result = self.obj.forecast(steps=h)
234                            residuals = self.obj.resid
235                            std_errors = np.std(residuals)
236                            mean_forecast = forecast_result
237                            lower_bound = (
238                                forecast_result - pi_multiplier * std_errors
239                            )
240                            upper_bound = (
241                                forecast_result + pi_multiplier * std_errors
242                            )
243
244                        except Exception as e:
245                            try:
246                                mean_forecast = self.obj.forecast(
247                                    steps=h
248                                ).values
249                                forecast_result = self.obj.prediction_intervals(
250                                    steps=h, alpha=self.alpha_ / 100, **kwargs
251                                )
252                                lower_bound = forecast_result["lower"].values
253                                upper_bound = forecast_result["upper"].values
254                            except Exception:
255                                mean_forecast = self.obj.forecast(steps=h)
256                                forecast_result = self.obj.prediction_intervals(
257                                    steps=h, alpha=self.alpha_ / 100, **kwargs
258                                )
259                                lower_bound = forecast_result["lower"]
260                                upper_bound = forecast_result["upper"]
261
262        else:
263            if self.model == "VAR":
264                (
265                    mean_forecast,
266                    lower_bound,
267                    upper_bound,
268                ) = self.obj.forecast_interval(
269                    self.obj.endog, steps=h, alpha=self.alpha_ / 100, **kwargs
270                )
271
272            elif self.model == "VECM":
273                forecast_result = self.obj.predict(steps=h)
274                mean_forecast = forecast_result
275                lower_bound, upper_bound = self._compute_confidence_intervals(
276                    forecast_result, alpha=self.alpha_ / 100, **kwargs
277                )
278
279            elif self.model == "ARIMA":
280                forecast_result = self.obj.get_forecast(steps=h)
281                mean_forecast = forecast_result.predicted_mean
282                lower_bound = forecast_result.conf_int()[:, 0]
283                upper_bound = forecast_result.conf_int()[:, 1]
284
285            elif self.model == "ETS":
286                forecast_result = self.obj.forecast(steps=h)
287                residuals = self.obj.resid
288                std_errors = np.std(residuals)
289                mean_forecast = forecast_result
290                lower_bound = forecast_result - pi_multiplier * std_errors
291                upper_bound = forecast_result + pi_multiplier * std_errors
292
293            elif self.model == "Theta":
294                try:
295                    mean_forecast = self.obj.forecast(steps=h).values
296                    forecast_result = self.obj.prediction_intervals(
297                        steps=h, alpha=self.alpha_ / 100, **kwargs
298                    )
299                    lower_bound = forecast_result["lower"].values
300                    upper_bound = forecast_result["upper"].values
301                except Exception:
302                    mean_forecast = self.obj.forecast(steps=h)
303                    forecast_result = self.obj.prediction_intervals(
304                        steps=h, alpha=self.alpha_ / 100, **kwargs
305                    )
306                    lower_bound = forecast_result["lower"]
307                    upper_bound = forecast_result["upper"]
308
309            else:
310                raise ValueError("model not recognized")
311
312        try:
313            self.mean_ = pd.DataFrame(
314                mean_forecast,
315                columns=self.series_names,
316                index=self.output_dates_,
317            )
318            self.lower_ = pd.DataFrame(
319                lower_bound, columns=self.series_names, index=self.output_dates_
320            )
321            self.upper_ = pd.DataFrame(
322                upper_bound, columns=self.series_names, index=self.output_dates_
323            )
324        except Exception:
325            self.mean_ = pd.Series(
326                mean_forecast, name=self.series_names, index=self.output_dates_
327            )
328            self.lower_ = pd.Series(
329                lower_bound, name=self.series_names, index=self.output_dates_
330            )
331            self.upper_ = pd.Series(
332                upper_bound, name=self.series_names, index=self.output_dates_
333            )
334
335        return DescribeResult(
336            mean=self.mean_, lower=self.lower_, upper=self.upper_
337        )
338
339    def _compute_confidence_intervals(self, forecast_result, alpha):
340        """
341        Compute confidence intervals for VECM forecasts.
342        Uses the covariance of residuals to approximate the confidence intervals.
343        """
344        residuals = self.obj.resid
345        cov_matrix = np.cov(residuals.T)  # Covariance matrix of residuals
346        std_errors = np.sqrt(np.diag(cov_matrix))  # Standard errors
347
348        z_value = norm.ppf(1 - alpha / 2)  # Z-score for the given alpha level
349        lower_bound = forecast_result - z_value * std_errors
350        upper_bound = forecast_result + z_value * std_errors
351
352        return lower_bound, upper_bound
353
354    def score(self, X, training_index, testing_index, scoring=None, **kwargs):
355        """Train on training_index, score on testing_index."""
356
357        assert (
358            bool(set(training_index).intersection(set(testing_index))) == False
359        ), "Non-overlapping 'training_index' and 'testing_index' required"
360
361        # Dimensions
362        try:
363            # multivariate time series
364            n, p = X.shape
365        except:
366            # univariate time series
367            n = X.shape[0]
368            p = 1
369
370        # Training and testing sets
371        if p > 1:
372            X_train = X[training_index, :]
373            X_test = X[testing_index, :]
374        else:
375            X_train = X[training_index]
376            X_test = X[testing_index]
377
378        # Horizon
379        h = len(testing_index)
380        assert (
381            len(training_index) + h
382        ) <= n, "Please check lengths of training and testing windows"
383
384        # Fit and predict
385        self.fit(X_train, **kwargs)
386        preds = self.predict(h=h, **kwargs)
387
388        if scoring is None:
389            scoring = "neg_root_mean_squared_error"
390
391        # check inputs
392        assert scoring in (
393            "explained_variance",
394            "neg_mean_absolute_error",
395            "neg_mean_squared_error",
396            "neg_root_mean_squared_error",
397            "neg_mean_squared_log_error",
398            "neg_median_absolute_error",
399            "r2",
400        ), "'scoring' should be in ('explained_variance', 'neg_mean_absolute_error', \
401                               'neg_mean_squared_error', 'neg_root_mean_squared_error', 'neg_mean_squared_log_error', \
402                               'neg_median_absolute_error', 'r2')"
403
404        scoring_options = {
405            "explained_variance": skm2.explained_variance_score,
406            "neg_mean_absolute_error": skm2.mean_absolute_error,
407            "neg_mean_squared_error": lambda x, y: np.mean((x - y) ** 2),
408            "neg_root_mean_squared_error": lambda x, y: np.sqrt(
409                np.mean((x - y) ** 2)
410            ),
411            "neg_mean_squared_log_error": skm2.mean_squared_log_error,
412            "neg_median_absolute_error": skm2.median_absolute_error,
413            "r2": skm2.r2_score,
414        }
415
416        # if p > 1:
417        #     return tuple(
418        #         [
419        #             scoring_options[scoring](
420        #                 X_test[:, i], preds[:, i]#, **kwargs
421        #             )
422        #             for i in range(p)
423        #         ]
424        #     )
425        # else:
426        return scoring_options[scoring](X_test, preds)
427
428    def plot(self, series=None, type_axis="dates", type_plot="pi"):
429        """Plot time series forecast
430
431        Parameters:
432
433        series: {integer} or {string}
434            series index or name
435
436        """
437
438        assert all(
439            [
440                self.mean_ is not None,
441                self.lower_ is not None,
442                self.upper_ is not None,
443                self.output_dates_ is not None,
444            ]
445        ), "model forecasting must be obtained first (with predict)"
446
447        if series is None:
448            assert (
449                self.n_series == 1
450            ), "please specify series index or name (n_series > 1)"
451            series = 0
452
453        if isinstance(series, str):
454            assert (
455                series in self.series_names
456            ), f"series {series} doesn't exist in the input dataset"
457            series_idx = self.df_.columns.get_loc(series)
458        else:
459            assert isinstance(series, int) and (
460                0 <= series < self.n_series
461            ), f"check series index (< {self.n_series})"
462            series_idx = series
463
464        if isinstance(self.df_, pd.DataFrame):
465            y_all = list(self.df_.iloc[:, series_idx]) + list(
466                self.mean_.iloc[:, series_idx]
467            )
468            y_test = list(self.mean_.iloc[:, series_idx])
469        else:
470            y_all = list(self.df_.values) + list(self.mean_.values)
471            y_test = list(self.mean_.values)
472        n_points_all = len(y_all)
473        n_points_train = self.df_.shape[0]
474
475        if type_axis == "numeric":
476            x_all = [i for i in range(n_points_all)]
477            x_test = [i for i in range(n_points_train, n_points_all)]
478
479        if type_axis == "dates":  # use dates
480            x_all = np.concatenate(
481                (self.input_dates.values, self.output_dates_.values), axis=None
482            )
483            x_test = self.output_dates_.values
484
485        if type_plot == "pi":
486            fig, ax = plt.subplots()
487            ax.plot(x_all, y_all, "-")
488            ax.plot(x_test, y_test, "-", color="orange")
489            try:
490                ax.fill_between(
491                    x_test,
492                    self.lower_.iloc[:, series_idx],
493                    self.upper_.iloc[:, series_idx],
494                    alpha=0.2,
495                    color="orange",
496                )
497            except Exception:
498                ax.fill_between(
499                    x_test,
500                    self.lower_.values,
501                    self.upper_.values,
502                    alpha=0.2,
503                    color="orange",
504                )
505            if self.replications is None:
506                if self.n_series > 1:
507                    plt.title(
508                        f"prediction intervals for {series}",
509                        loc="left",
510                        fontsize=12,
511                        fontweight=0,
512                        color="black",
513                    )
514                else:
515                    plt.title(
516                        f"prediction intervals for input time series",
517                        loc="left",
518                        fontsize=12,
519                        fontweight=0,
520                        color="black",
521                    )
522                plt.show()
523            else:  # self.replications is not None
524                if self.n_series > 1:
525                    plt.title(
526                        f"prediction intervals for {self.replications} simulations of {series}",
527                        loc="left",
528                        fontsize=12,
529                        fontweight=0,
530                        color="black",
531                    )
532                else:
533                    plt.title(
534                        f"prediction intervals for {self.replications} simulations of input time series",
535                        loc="left",
536                        fontsize=12,
537                        fontweight=0,
538                        color="black",
539                    )
540                plt.show()
541
542        if type_plot == "spaghetti":
543            palette = plt.get_cmap("Set1")
544            sims_ix = getsims(self.sims_, series_idx)
545            plt.plot(x_all, y_all, "-")
546            for col_ix in range(
547                sims_ix.shape[1]
548            ):  # avoid this when there are thousands of simulations
549                plt.plot(
550                    x_test,
551                    sims_ix[:, col_ix],
552                    "-",
553                    color=palette(col_ix),
554                    linewidth=1,
555                    alpha=0.9,
556                )
557            plt.plot(x_all, y_all, "-", color="black")
558            plt.plot(x_test, y_test, "-", color="blue")
559            # Add titles
560            if self.n_series > 1:
561                plt.title(
562                    f"{self.replications} simulations of {series}",
563                    loc="left",
564                    fontsize=12,
565                    fontweight=0,
566                    color="black",
567                )
568            else:
569                plt.title(
570                    f"{self.replications} simulations of input time series",
571                    loc="left",
572                    fontsize=12,
573                    fontweight=0,
574                    color="black",
575                )
576            plt.xlabel("Time")
577            plt.ylabel("Values")
578            # Show the graph
579            plt.show()
580
581    def cross_val_score(
582        self,
583        X,
584        scoring="root_mean_squared_error",
585        n_jobs=None,
586        verbose=0,
587        xreg=None,
588        initial_window=5,
589        horizon=3,
590        fixed_window=False,
591        show_progress=True,
592        level=95,
593        **kwargs,
594    ):
595        """Evaluate a score by time series cross-validation.
596
597        Parameters:
598
599            X: {array-like, sparse matrix} of shape (n_samples, n_features)
600                The data to fit.
601
602            scoring: str or a function
603                A str in ('root_mean_squared_error', 'mean_squared_error', 'mean_error',
604                'mean_absolute_error', 'mean_error', 'mean_percentage_error',
605                'mean_absolute_percentage_error',  'winkler_score', 'coverage')
606                Or a function defined as 'coverage' and 'winkler_score' in `utils.timeseries`
607
608            n_jobs: int, default=None
609                Number of jobs to run in parallel.
610
611            verbose: int, default=0
612                The verbosity level.
613
614            xreg: array-like, optional (default=None)
615                Additional (external) regressors to be passed to `fit`
616                xreg must be in 'increasing' order (most recent observations last)
617
618            initial_window: int
619                initial number of consecutive values in each training set sample
620
621            horizon: int
622                number of consecutive values in test set sample
623
624            fixed_window: boolean
625                if False, all training samples start at index 0, and the training
626                window's size is increasing.
627                if True, the training window's size is fixed, and the window is
628                rolling forward
629
630            show_progress: boolean
631                if True, a progress bar is printed
632
633            **kwargs: dict
634                additional parameters to be passed to `fit` and `predict`
635
636        Returns:
637
638            A tuple: descriptive statistics or errors and raw errors
639
640        """
641        tscv = TimeSeriesSplit()
642
643        tscv_obj = tscv.split(
644            X,
645            initial_window=initial_window,
646            horizon=horizon,
647            fixed_window=fixed_window,
648        )
649
650        if isinstance(scoring, str):
651            assert scoring in (
652                "root_mean_squared_error",
653                "mean_squared_error",
654                "mean_error",
655                "mean_absolute_error",
656                "mean_percentage_error",
657                "mean_absolute_percentage_error",
658                "winkler_score",
659                "coverage",
660            ), "must have scoring in ('root_mean_squared_error', 'mean_squared_error', 'mean_error', 'mean_absolute_error', 'mean_error', 'mean_percentage_error', 'mean_absolute_percentage_error',  'winkler_score', 'coverage')"
661
662            def err_func(X_test, X_pred, scoring):
663                if (self.replications is not None) or (
664                    self.type_pi == "gaussian"
665                ):  # probabilistic
666                    if scoring == "winkler_score":
667                        return winkler_score(X_pred, X_test, level=level)
668                    elif scoring == "coverage":
669                        return coverage(X_pred, X_test, level=level)
670                    else:
671                        return mean_errors(
672                            pred=X_pred.mean, actual=X_test, scoring=scoring
673                        )
674                else:  # not probabilistic
675                    return mean_errors(
676                        pred=X_pred, actual=X_test, scoring=scoring
677                    )
678
679        else:  # isinstance(scoring, str) = False
680            err_func = scoring
681
682        errors = []
683
684        train_indices = []
685
686        test_indices = []
687
688        for train_index, test_index in tscv_obj:
689            train_indices.append(train_index)
690            test_indices.append(test_index)
691
692        if show_progress is True:
693            iterator = tqdm(
694                zip(train_indices, test_indices), total=len(train_indices)
695            )
696        else:
697            iterator = zip(train_indices, test_indices)
698
699        for train_index, test_index in iterator:
700            if verbose == 1:
701                print(f"TRAIN: {train_index}")
702                print(f"TEST: {test_index}")
703
704            if isinstance(X, pd.DataFrame):
705                self.fit(X.iloc[train_index, :], xreg=xreg, **kwargs)
706                X_test = X.iloc[test_index, :]
707            else:
708                self.fit(X[train_index, :], xreg=xreg, **kwargs)
709                X_test = X[test_index, :]
710            X_pred = self.predict(h=int(len(test_index)), level=level, **kwargs)
711
712            errors.append(err_func(X_test, X_pred, scoring))
713
714        res = np.asarray(errors)
715
716        return res, describe(res)

Time series with statistical models (statsmodels), mostly for benchmarks

Parameters:

model: type of model: str.
    currently, 'VAR', 'VECM', 'ARIMA', 'ETS', 'Theta'
    Default is None

obj: object
    A time series model from statsmodels

Attributes:

df_: data frame
    the input data frame, in case a data.frame is provided to `fit`

level_: int
    level of confidence for prediction intervals (default is 95)

Examples: See examples/classical_mts_timeseries.py

def fit(self, X, **kwargs):
 98    def fit(self, X, **kwargs):
 99        """Fit ClassicalMTS model to training data X, with optional regressors xreg
100
101        Parameters:
102
103        X: {array-like}, shape = [n_samples, n_features]
104            Training time series, where n_samples is the number
105            of samples and n_features is the number of features;
106            X must be in increasing order (most recent observations last)
107
108        **kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity)
109
110        Returns:
111
112        self: object
113        """
114
115        try:
116            self.n_series = X.shape[1]
117        except Exception:
118            self.n_series = 1
119
120        if (isinstance(X, pd.DataFrame) is False) and isinstance(
121            X, pd.Series
122        ) is False:  # input data set is a numpy array
123            X = pd.DataFrame(X)
124            if self.n_series > 1:
125                self.series_names = [
126                    "series" + str(i) for i in range(X.shape[1])
127                ]
128            else:
129                self.series_names = "series0"
130
131        else:  # input data set is a DataFrame or Series with column names
132            X_index = None
133            if X.index is not None and len(X.shape) > 1:
134                X_index = X.index
135                X = copy.deepcopy(mo.convert_df_to_numeric(X))
136            if X_index is not None:
137                try:
138                    X.index = X_index
139                except Exception:
140                    pass
141            if isinstance(X, pd.DataFrame):
142                self.series_names = X.columns.tolist()
143            else:
144                self.series_names = X.name
145
146        if isinstance(X, pd.DataFrame) or isinstance(X, pd.Series):
147            self.df_ = X
148            X = X.values
149            self.df_.columns = self.series_names
150            self.input_dates = ts.compute_input_dates(self.df_)
151        else:
152            self.df_ = pd.DataFrame(X, columns=self.series_names)
153
154        if self.model == "Theta":
155            try:
156                self.obj = self.obj(self.df_, **kwargs).fit()
157            except Exception as e:
158                self.obj = self.obj(self.df_.values, **kwargs).fit()
159            self.residuals_ = None
160        else:
161            self.obj = self.obj(X, **kwargs).fit()
162            try:
163                self.residuals_ = self.obj.resid
164            except Exception as e:  # Theta
165                self.residuals_ = None
166
167        return self

Fit ClassicalMTS model to training data X, with optional regressors xreg

Parameters:

X: {array-like}, shape = [n_samples, n_features] Training time series, where n_samples is the number of samples and n_features is the number of features; X must be in increasing order (most recent observations last)

**kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity)

Returns:

self: object

def predict(self, h=5, level=95, **kwargs):
169    def predict(self, h=5, level=95, **kwargs):
170        """Forecast all the time series, h steps ahead
171
172        Parameters:
173
174        h: {integer}
175            Forecasting horizon
176
177        **kwargs: additional parameters to be passed to
178                self.cook_test_set
179
180        Returns:
181
182        model predictions for horizon = h: {array-like}
183
184        """
185
186        self.output_dates_, frequency = ts.compute_output_dates(self.df_, h)
187        self.level_ = level
188        self.lower_ = None  # do not remove (/!\)
189        self.upper_ = None  # do not remove (/!\)
190        self.sims_ = None  # do not remove (/!\)
191        self.level_ = level
192        self.alpha_ = 100 - level
193
194        pi_multiplier = norm.ppf(1 - self.alpha_ / 200)
195
196        # Named tuple for forecast results
197        DescribeResult = namedtuple(
198            "DescribeResult", ("mean", "lower", "upper")
199        )
200
201        if (
202            self.obj is not None
203        ):  # try all the special cases of the else section (there's probably a better way)
204            try:
205                (
206                    mean_forecast,
207                    lower_bound,
208                    upper_bound,
209                ) = self.obj.forecast_interval(
210                    self.obj.endog, steps=h, alpha=self.alpha_ / 100, **kwargs
211                )
212
213            except Exception as e:
214                try:
215                    forecast_result = self.obj.predict(steps=h)
216                    mean_forecast = forecast_result
217                    (
218                        lower_bound,
219                        upper_bound,
220                    ) = self._compute_confidence_intervals(
221                        forecast_result, alpha=self.alpha_ / 100, **kwargs
222                    )
223
224                except Exception as e:
225                    try:
226                        forecast_result = self.obj.get_forecast(steps=h)
227                        mean_forecast = forecast_result.predicted_mean
228                        lower_bound = forecast_result.conf_int()[:, 0]
229                        upper_bound = forecast_result.conf_int()[:, 1]
230
231                    except Exception as e:
232                        try:
233                            forecast_result = self.obj.forecast(steps=h)
234                            residuals = self.obj.resid
235                            std_errors = np.std(residuals)
236                            mean_forecast = forecast_result
237                            lower_bound = (
238                                forecast_result - pi_multiplier * std_errors
239                            )
240                            upper_bound = (
241                                forecast_result + pi_multiplier * std_errors
242                            )
243
244                        except Exception as e:
245                            try:
246                                mean_forecast = self.obj.forecast(
247                                    steps=h
248                                ).values
249                                forecast_result = self.obj.prediction_intervals(
250                                    steps=h, alpha=self.alpha_ / 100, **kwargs
251                                )
252                                lower_bound = forecast_result["lower"].values
253                                upper_bound = forecast_result["upper"].values
254                            except Exception:
255                                mean_forecast = self.obj.forecast(steps=h)
256                                forecast_result = self.obj.prediction_intervals(
257                                    steps=h, alpha=self.alpha_ / 100, **kwargs
258                                )
259                                lower_bound = forecast_result["lower"]
260                                upper_bound = forecast_result["upper"]
261
262        else:
263            if self.model == "VAR":
264                (
265                    mean_forecast,
266                    lower_bound,
267                    upper_bound,
268                ) = self.obj.forecast_interval(
269                    self.obj.endog, steps=h, alpha=self.alpha_ / 100, **kwargs
270                )
271
272            elif self.model == "VECM":
273                forecast_result = self.obj.predict(steps=h)
274                mean_forecast = forecast_result
275                lower_bound, upper_bound = self._compute_confidence_intervals(
276                    forecast_result, alpha=self.alpha_ / 100, **kwargs
277                )
278
279            elif self.model == "ARIMA":
280                forecast_result = self.obj.get_forecast(steps=h)
281                mean_forecast = forecast_result.predicted_mean
282                lower_bound = forecast_result.conf_int()[:, 0]
283                upper_bound = forecast_result.conf_int()[:, 1]
284
285            elif self.model == "ETS":
286                forecast_result = self.obj.forecast(steps=h)
287                residuals = self.obj.resid
288                std_errors = np.std(residuals)
289                mean_forecast = forecast_result
290                lower_bound = forecast_result - pi_multiplier * std_errors
291                upper_bound = forecast_result + pi_multiplier * std_errors
292
293            elif self.model == "Theta":
294                try:
295                    mean_forecast = self.obj.forecast(steps=h).values
296                    forecast_result = self.obj.prediction_intervals(
297                        steps=h, alpha=self.alpha_ / 100, **kwargs
298                    )
299                    lower_bound = forecast_result["lower"].values
300                    upper_bound = forecast_result["upper"].values
301                except Exception:
302                    mean_forecast = self.obj.forecast(steps=h)
303                    forecast_result = self.obj.prediction_intervals(
304                        steps=h, alpha=self.alpha_ / 100, **kwargs
305                    )
306                    lower_bound = forecast_result["lower"]
307                    upper_bound = forecast_result["upper"]
308
309            else:
310                raise ValueError("model not recognized")
311
312        try:
313            self.mean_ = pd.DataFrame(
314                mean_forecast,
315                columns=self.series_names,
316                index=self.output_dates_,
317            )
318            self.lower_ = pd.DataFrame(
319                lower_bound, columns=self.series_names, index=self.output_dates_
320            )
321            self.upper_ = pd.DataFrame(
322                upper_bound, columns=self.series_names, index=self.output_dates_
323            )
324        except Exception:
325            self.mean_ = pd.Series(
326                mean_forecast, name=self.series_names, index=self.output_dates_
327            )
328            self.lower_ = pd.Series(
329                lower_bound, name=self.series_names, index=self.output_dates_
330            )
331            self.upper_ = pd.Series(
332                upper_bound, name=self.series_names, index=self.output_dates_
333            )
334
335        return DescribeResult(
336            mean=self.mean_, lower=self.lower_, upper=self.upper_
337        )

Forecast all the time series, h steps ahead

Parameters:

h: {integer} Forecasting horizon

**kwargs: additional parameters to be passed to self.cook_test_set

Returns:

model predictions for horizon = h: {array-like}

def score(self, X, training_index, testing_index, scoring=None, **kwargs):
354    def score(self, X, training_index, testing_index, scoring=None, **kwargs):
355        """Train on training_index, score on testing_index."""
356
357        assert (
358            bool(set(training_index).intersection(set(testing_index))) == False
359        ), "Non-overlapping 'training_index' and 'testing_index' required"
360
361        # Dimensions
362        try:
363            # multivariate time series
364            n, p = X.shape
365        except:
366            # univariate time series
367            n = X.shape[0]
368            p = 1
369
370        # Training and testing sets
371        if p > 1:
372            X_train = X[training_index, :]
373            X_test = X[testing_index, :]
374        else:
375            X_train = X[training_index]
376            X_test = X[testing_index]
377
378        # Horizon
379        h = len(testing_index)
380        assert (
381            len(training_index) + h
382        ) <= n, "Please check lengths of training and testing windows"
383
384        # Fit and predict
385        self.fit(X_train, **kwargs)
386        preds = self.predict(h=h, **kwargs)
387
388        if scoring is None:
389            scoring = "neg_root_mean_squared_error"
390
391        # check inputs
392        assert scoring in (
393            "explained_variance",
394            "neg_mean_absolute_error",
395            "neg_mean_squared_error",
396            "neg_root_mean_squared_error",
397            "neg_mean_squared_log_error",
398            "neg_median_absolute_error",
399            "r2",
400        ), "'scoring' should be in ('explained_variance', 'neg_mean_absolute_error', \
401                               'neg_mean_squared_error', 'neg_root_mean_squared_error', 'neg_mean_squared_log_error', \
402                               'neg_median_absolute_error', 'r2')"
403
404        scoring_options = {
405            "explained_variance": skm2.explained_variance_score,
406            "neg_mean_absolute_error": skm2.mean_absolute_error,
407            "neg_mean_squared_error": lambda x, y: np.mean((x - y) ** 2),
408            "neg_root_mean_squared_error": lambda x, y: np.sqrt(
409                np.mean((x - y) ** 2)
410            ),
411            "neg_mean_squared_log_error": skm2.mean_squared_log_error,
412            "neg_median_absolute_error": skm2.median_absolute_error,
413            "r2": skm2.r2_score,
414        }
415
416        # if p > 1:
417        #     return tuple(
418        #         [
419        #             scoring_options[scoring](
420        #                 X_test[:, i], preds[:, i]#, **kwargs
421        #             )
422        #             for i in range(p)
423        #         ]
424        #     )
425        # else:
426        return scoring_options[scoring](X_test, preds)

Train on training_index, score on testing_index.

class CustomClassifier(nnetsauce.custom.custom.Custom, sklearn.base.ClassifierMixin):
 16class CustomClassifier(Custom, ClassifierMixin):
 17    """Custom Classification model
 18
 19    Attributes:
 20
 21        obj: object
 22            any object containing a method fit (obj.fit()) and a method predict
 23            (obj.predict())
 24
 25        n_hidden_features: int
 26            number of nodes in the hidden layer
 27
 28        activation_name: str
 29            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
 30
 31        a: float
 32            hyperparameter for 'prelu' or 'elu' activation function
 33
 34        nodes_sim: str
 35            type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
 36            'uniform'
 37
 38        bias: boolean
 39            indicates if the hidden layer contains a bias term (True) or not
 40            (False)
 41
 42        dropout: float
 43            regularization parameter; (random) percentage of nodes dropped out
 44            of the training
 45
 46        direct_link: boolean
 47            indicates if the original predictors are included (True) in model''s
 48            fitting or not (False)
 49
 50        n_clusters: int
 51            number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
 52                no clustering)
 53
 54        cluster_encode: bool
 55            defines how the variable containing clusters is treated (default is one-hot)
 56            if `False`, then labels are used, without one-hot encoding
 57
 58        type_clust: str
 59            type of clustering method: currently k-means ('kmeans') or Gaussian
 60            Mixture Model ('gmm')
 61
 62        type_scaling: a tuple of 3 strings
 63            scaling methods for inputs, hidden layer, and clustering respectively
 64            (and when relevant).
 65            Currently available: standardization ('std') or MinMax scaling ('minmax')
 66
 67        col_sample: float
 68            percentage of covariates randomly chosen for training
 69
 70        row_sample: float
 71            percentage of rows chosen for training, by stratified bootstrapping
 72
 73        cv_calibration: int, cross-validation generator, or iterable, default=2
 74            Determines the cross-validation splitting strategy. Same as
 75            `sklearn.calibration.CalibratedClassifierCV`
 76
 77        calibration_method: str
 78            {‘sigmoid’, ‘isotonic’}, default=’sigmoid’
 79            The method to use for calibration. Same as
 80            `sklearn.calibration.CalibratedClassifierCV`
 81
 82        seed: int
 83            reproducibility seed for nodes_sim=='uniform'
 84
 85        backend: str
 86            "cpu" or "gpu" or "tpu"
 87
 88    Examples:
 89
 90    Note: it's better to use the `DeepClassifier` or `LazyDeepClassifier` classes directly
 91
 92    ```python
 93    import nnetsauce as ns
 94    from sklearn.ensemble import RandomForestClassifier
 95    from sklearn.model_selection import train_test_split
 96    from sklearn.datasets import load_digits
 97    from time import time
 98
 99    digits = load_digits()
100    X = digits.data
101    y = digits.target
102    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
103                                                        random_state=123)
104
105    # layer 1 (base layer) ----
106    layer1_regr = RandomForestClassifier(n_estimators=10, random_state=123)
107
108    start = time()
109
110    layer1_regr.fit(X_train, y_train)
111
112    # Accuracy in layer 1
113    print(layer1_regr.score(X_test, y_test))
114
115    # layer 2 using layer 1 ----
116    layer2_regr = ns.CustomClassifier(obj = layer1_regr, n_hidden_features=5,
117                            direct_link=True, bias=True,
118                            nodes_sim='uniform', activation_name='relu',
119                            n_clusters=2, seed=123)
120    layer2_regr.fit(X_train, y_train)
121
122    # Accuracy in layer 2
123    print(layer2_regr.score(X_test, y_test))
124
125    # layer 3 using layer 2 ----
126    layer3_regr = ns.CustomClassifier(obj = layer2_regr, n_hidden_features=10,
127                            direct_link=True, bias=True, dropout=0.7,
128                            nodes_sim='uniform', activation_name='relu',
129                            n_clusters=2, seed=123)
130    layer3_regr.fit(X_train, y_train)
131
132    # Accuracy in layer 3
133    print(layer3_regr.score(X_test, y_test))
134
135    print(f"Elapsed {time() - start}")
136    ```
137
138    """
139
140    # construct the object -----
141    _estimator_type = "classifier"
142
143    def __init__(
144        self,
145        obj,
146        n_hidden_features=5,
147        activation_name="relu",
148        a=0.01,
149        nodes_sim="sobol",
150        bias=True,
151        dropout=0,
152        direct_link=True,
153        n_clusters=2,
154        cluster_encode=True,
155        type_clust="kmeans",
156        type_scaling=("std", "std", "std"),
157        col_sample=1,
158        row_sample=1,
159        cv_calibration=2,
160        calibration_method="sigmoid",
161        seed=123,
162        backend="cpu",
163    ):
164        super().__init__(
165            obj=obj,
166            n_hidden_features=n_hidden_features,
167            activation_name=activation_name,
168            a=a,
169            nodes_sim=nodes_sim,
170            bias=bias,
171            dropout=dropout,
172            direct_link=direct_link,
173            n_clusters=n_clusters,
174            cluster_encode=cluster_encode,
175            type_clust=type_clust,
176            type_scaling=type_scaling,
177            col_sample=col_sample,
178            row_sample=row_sample,
179            seed=seed,
180            backend=backend,
181        )
182        self.coef_ = None
183        self.intercept_ = None
184        self.type_fit = "classification"
185        self.cv_calibration = cv_calibration
186        self.calibration_method = calibration_method
187
188    def __sklearn_clone__(self):
189        """Create a clone of the estimator.
190
191        This is required for scikit-learn's calibration system to work properly.
192        """
193        # Create a new instance with the same parameters
194        clone = CustomClassifier(
195            obj=self.obj,
196            n_hidden_features=self.n_hidden_features,
197            activation_name=self.activation_name,
198            a=self.a,
199            nodes_sim=self.nodes_sim,
200            bias=self.bias,
201            dropout=self.dropout,
202            direct_link=self.direct_link,
203            n_clusters=self.n_clusters,
204            cluster_encode=self.cluster_encode,
205            type_clust=self.type_clust,
206            type_scaling=self.type_scaling,
207            col_sample=self.col_sample,
208            row_sample=self.row_sample,
209            cv_calibration=self.cv_calibration,
210            calibration_method=self.calibration_method,
211            seed=self.seed,
212            backend=self.backend,
213        )
214        return clone
215
216    def fit(self, X, y, sample_weight=None, **kwargs):
217        """Fit custom model to training data (X, y).
218
219        Parameters:
220
221            X: {array-like}, shape = [n_samples, n_features]
222                Training vectors, where n_samples is the number
223                of samples and n_features is the number of features.
224
225            y: array-like, shape = [n_samples]
226                Target values.
227
228            sample_weight: array-like, shape = [n_samples]
229                Sample weights.
230
231            **kwargs: additional parameters to be passed to
232                        self.cook_training_set or self.obj.fit
233
234        Returns:
235
236            self: object
237        """
238
239        if len(X.shape) == 1:
240            if isinstance(X, pd.DataFrame):
241                X = pd.DataFrame(X.values.reshape(1, -1), columns=X.columns)
242            else:
243                X = X.reshape(1, -1)
244
245        output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
246        self.classes_ = np.unique(y)
247        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
248
249        # Wrap in CalibratedClassifierCV if needed
250        if self.cv_calibration is not None:
251            self.obj = CalibratedClassifierCV(
252                self.obj, cv=self.cv_calibration, method=self.calibration_method
253            )
254
255        # if sample_weights, else: (must use self.row_index)
256        if sample_weight is not None:
257            self.obj.fit(
258                scaled_Z,
259                output_y,
260                sample_weight=sample_weight[self.index_row_].ravel(),
261                **kwargs
262            )
263            return self
264
265        # if sample_weight is None:
266        self.obj.fit(scaled_Z, output_y, **kwargs)
267        self.classes_ = np.unique(y)  # for compatibility with sklearn
268        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
269
270        if hasattr(self.obj, "coef_"):
271            self.coef_ = self.obj.coef_
272
273        if hasattr(self.obj, "intercept_"):
274            self.intercept_ = self.obj.intercept_
275
276        return self
277
278    def partial_fit(self, X, y, sample_weight=None, **kwargs):
279        """Partial fit custom model to training data (X, y).
280
281        Parameters:
282
283            X: {array-like}, shape = [n_samples, n_features]
284                Subset of training vectors, where n_samples is the number
285                of samples and n_features is the number of features.
286
287            y: array-like, shape = [n_samples]
288                Subset of target values.
289
290            sample_weight: array-like, shape = [n_samples]
291                Sample weights.
292
293            **kwargs: additional parameters to be passed to
294                        self.cook_training_set or self.obj.fit
295
296        Returns:
297
298            self: object
299        """
300
301        if len(X.shape) == 1:
302            if isinstance(X, pd.DataFrame):
303                X = pd.DataFrame(X.values.reshape(1, -1), columns=X.columns)
304            else:
305                X = X.reshape(1, -1)
306            y = np.array([y], dtype=int)
307
308        output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
309        self.n_classes_ = len(np.unique(y))  # for compatibility with sklearn
310
311        # if sample_weights, else: (must use self.row_index)
312        if sample_weight is not None:
313            try:
314                self.obj.partial_fit(
315                    scaled_Z,
316                    output_y,
317                    sample_weight=sample_weight[self.index_row_].ravel(),
318                    # **kwargs
319                )
320            except:
321                NotImplementedError
322
323            return self
324
325        # if sample_weight is None:
326        # try:
327        self.obj.partial_fit(scaled_Z, output_y)
328        # except:
329        #    raise NotImplementedError
330
331        self.classes_ = np.unique(y)  # for compatibility with sklearn
332        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
333
334        return self
335
336    def predict(self, X, **kwargs):
337        """Predict test data X.
338
339        Parameters:
340
341            X: {array-like}, shape = [n_samples, n_features]
342                Training vectors, where n_samples is the number
343                of samples and n_features is the number of features.
344
345            **kwargs: additional parameters to be passed to
346                    self.cook_test_set
347
348        Returns:
349
350            model predictions: {array-like}
351        """
352
353        if len(X.shape) == 1:
354            n_features = X.shape[0]
355            new_X = mo.rbind(
356                X.reshape(1, n_features),
357                np.ones(n_features).reshape(1, n_features),
358            )
359
360            return (
361                self.obj.predict(self.cook_test_set(new_X, **kwargs), **kwargs)
362            )[0]
363
364        return self.obj.predict(self.cook_test_set(X, **kwargs), **kwargs)
365
366    def predict_proba(self, X, **kwargs):
367        """Predict probabilities for test data X.
368
369        Args:
370
371            X: {array-like}, shape = [n_samples, n_features]
372                Training vectors, where n_samples is the number
373                of samples and n_features is the number of features.
374
375            **kwargs: additional parameters to be passed to
376                    self.cook_test_set
377
378        Returns:
379
380            probability estimates for test data: {array-like}
381        """
382
383        if len(X.shape) == 1:
384            n_features = X.shape[0]
385            new_X = mo.rbind(
386                X.reshape(1, n_features),
387                np.ones(n_features).reshape(1, n_features),
388            )
389            return (
390                self.obj.predict_proba(
391                    self.cook_test_set(new_X, **kwargs), **kwargs
392                )
393            )[0]
394        return self.obj.predict_proba(self.cook_test_set(X, **kwargs), **kwargs)
395
396    def decision_function(self, X, **kwargs):
397        """Compute the decision function of X.
398
399        Parameters:
400            X: {array-like}, shape = [n_samples, n_features]
401                Samples to compute decision function for.
402
403            **kwargs: additional parameters to be passed to
404                    self.cook_test_set
405
406        Returns:
407            array-like of shape (n_samples,) or (n_samples, n_classes)
408            Decision function of the input samples. The order of outputs is the same
409            as that of the classes passed to fit.
410        """
411        if not hasattr(self.obj, "decision_function"):
412            # If base classifier doesn't have decision_function, use predict_proba
413            proba = self.predict_proba(X, **kwargs)
414            if proba.shape[1] == 2:
415                return proba[:, 1]  # For binary classification
416            return proba  # For multiclass
417
418        if len(X.shape) == 1:
419            n_features = X.shape[0]
420            new_X = mo.rbind(
421                X.reshape(1, n_features),
422                np.ones(n_features).reshape(1, n_features),
423            )
424
425            return (
426                self.obj.decision_function(
427                    self.cook_test_set(new_X, **kwargs), **kwargs
428                )
429            )[0]
430
431        return self.obj.decision_function(
432            self.cook_test_set(X, **kwargs), **kwargs
433        )
434
435    def score(self, X, y, scoring=None):
436        """Scoring function for classification.
437
438        Args:
439
440            X: {array-like}, shape = [n_samples, n_features]
441                Training vectors, where n_samples is the number
442                of samples and n_features is the number of features.
443
444            y: array-like, shape = [n_samples]
445                Target values.
446
447            scoring: str
448                scoring method (default is accuracy)
449
450        Returns:
451
452            score: float
453        """
454
455        if scoring is None:
456            scoring = "accuracy"
457
458        if scoring == "accuracy":
459            return skm2.accuracy_score(y, self.predict(X))
460
461        if scoring == "f1":
462            return skm2.f1_score(y, self.predict(X))
463
464        if scoring == "precision":
465            return skm2.precision_score(y, self.predict(X))
466
467        if scoring == "recall":
468            return skm2.recall_score(y, self.predict(X))
469
470        if scoring == "roc_auc":
471            return skm2.roc_auc_score(y, self.predict(X))
472
473        if scoring == "log_loss":
474            return skm2.log_loss(y, self.predict_proba(X))
475
476        if scoring == "balanced_accuracy":
477            return skm2.balanced_accuracy_score(y, self.predict(X))
478
479        if scoring == "average_precision":
480            return skm2.average_precision_score(y, self.predict(X))
481
482        if scoring == "neg_brier_score":
483            return -skm2.brier_score_loss(y, self.predict_proba(X))
484
485        if scoring == "neg_log_loss":
486            return -skm2.log_loss(y, self.predict_proba(X))
487
488    @property
489    def _estimator_type(self):
490        return "classifier"

Custom Classification model

Attributes:

obj: object
    any object containing a method fit (obj.fit()) and a method predict
    (obj.predict())

n_hidden_features: int
    number of nodes in the hidden layer

activation_name: str
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'

a: float
    hyperparameter for 'prelu' or 'elu' activation function

nodes_sim: str
    type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
    'uniform'

bias: boolean
    indicates if the hidden layer contains a bias term (True) or not
    (False)

dropout: float
    regularization parameter; (random) percentage of nodes dropped out
    of the training

direct_link: boolean
    indicates if the original predictors are included (True) in model''s
    fitting or not (False)

n_clusters: int
    number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
        no clustering)

cluster_encode: bool
    defines how the variable containing clusters is treated (default is one-hot)
    if `False`, then labels are used, without one-hot encoding

type_clust: str
    type of clustering method: currently k-means ('kmeans') or Gaussian
    Mixture Model ('gmm')

type_scaling: a tuple of 3 strings
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax')

col_sample: float
    percentage of covariates randomly chosen for training

row_sample: float
    percentage of rows chosen for training, by stratified bootstrapping

cv_calibration: int, cross-validation generator, or iterable, default=2
    Determines the cross-validation splitting strategy. Same as
    `sklearn.calibration.CalibratedClassifierCV`

calibration_method: str
    {‘sigmoid’, ‘isotonic’}, default=’sigmoid’
    The method to use for calibration. Same as
    `sklearn.calibration.CalibratedClassifierCV`

seed: int
    reproducibility seed for nodes_sim=='uniform'

backend: str
    "cpu" or "gpu" or "tpu"

Examples:

Note: it's better to use the DeepClassifier or LazyDeepClassifier classes directly

import nnetsauce as ns
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_digits
from time import time

digits = load_digits()
X = digits.data
y = digits.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
                                                    random_state=123)

# layer 1 (base layer) ----
layer1_regr = RandomForestClassifier(n_estimators=10, random_state=123)

start = time()

layer1_regr.fit(X_train, y_train)

# Accuracy in layer 1
print(layer1_regr.score(X_test, y_test))

# layer 2 using layer 1 ----
layer2_regr = ns.CustomClassifier(obj = layer1_regr, n_hidden_features=5,
                        direct_link=True, bias=True,
                        nodes_sim='uniform', activation_name='relu',
                        n_clusters=2, seed=123)
layer2_regr.fit(X_train, y_train)

# Accuracy in layer 2
print(layer2_regr.score(X_test, y_test))

# layer 3 using layer 2 ----
layer3_regr = ns.CustomClassifier(obj = layer2_regr, n_hidden_features=10,
                        direct_link=True, bias=True, dropout=0.7,
                        nodes_sim='uniform', activation_name='relu',
                        n_clusters=2, seed=123)
layer3_regr.fit(X_train, y_train)

# Accuracy in layer 3
print(layer3_regr.score(X_test, y_test))

print(f"Elapsed {time() - start}")
def fit(self, X, y, sample_weight=None, **kwargs):
216    def fit(self, X, y, sample_weight=None, **kwargs):
217        """Fit custom model to training data (X, y).
218
219        Parameters:
220
221            X: {array-like}, shape = [n_samples, n_features]
222                Training vectors, where n_samples is the number
223                of samples and n_features is the number of features.
224
225            y: array-like, shape = [n_samples]
226                Target values.
227
228            sample_weight: array-like, shape = [n_samples]
229                Sample weights.
230
231            **kwargs: additional parameters to be passed to
232                        self.cook_training_set or self.obj.fit
233
234        Returns:
235
236            self: object
237        """
238
239        if len(X.shape) == 1:
240            if isinstance(X, pd.DataFrame):
241                X = pd.DataFrame(X.values.reshape(1, -1), columns=X.columns)
242            else:
243                X = X.reshape(1, -1)
244
245        output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
246        self.classes_ = np.unique(y)
247        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
248
249        # Wrap in CalibratedClassifierCV if needed
250        if self.cv_calibration is not None:
251            self.obj = CalibratedClassifierCV(
252                self.obj, cv=self.cv_calibration, method=self.calibration_method
253            )
254
255        # if sample_weights, else: (must use self.row_index)
256        if sample_weight is not None:
257            self.obj.fit(
258                scaled_Z,
259                output_y,
260                sample_weight=sample_weight[self.index_row_].ravel(),
261                **kwargs
262            )
263            return self
264
265        # if sample_weight is None:
266        self.obj.fit(scaled_Z, output_y, **kwargs)
267        self.classes_ = np.unique(y)  # for compatibility with sklearn
268        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
269
270        if hasattr(self.obj, "coef_"):
271            self.coef_ = self.obj.coef_
272
273        if hasattr(self.obj, "intercept_"):
274            self.intercept_ = self.obj.intercept_
275
276        return self

Fit custom model to training data (X, y).

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

sample_weight: array-like, shape = [n_samples]
    Sample weights.

**kwargs: additional parameters to be passed to
            self.cook_training_set or self.obj.fit

Returns:

self: object
def predict(self, X, **kwargs):
336    def predict(self, X, **kwargs):
337        """Predict test data X.
338
339        Parameters:
340
341            X: {array-like}, shape = [n_samples, n_features]
342                Training vectors, where n_samples is the number
343                of samples and n_features is the number of features.
344
345            **kwargs: additional parameters to be passed to
346                    self.cook_test_set
347
348        Returns:
349
350            model predictions: {array-like}
351        """
352
353        if len(X.shape) == 1:
354            n_features = X.shape[0]
355            new_X = mo.rbind(
356                X.reshape(1, n_features),
357                np.ones(n_features).reshape(1, n_features),
358            )
359
360            return (
361                self.obj.predict(self.cook_test_set(new_X, **kwargs), **kwargs)
362            )[0]
363
364        return self.obj.predict(self.cook_test_set(X, **kwargs), **kwargs)

Predict test data X.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

model predictions: {array-like}
def predict_proba(self, X, **kwargs):
366    def predict_proba(self, X, **kwargs):
367        """Predict probabilities for test data X.
368
369        Args:
370
371            X: {array-like}, shape = [n_samples, n_features]
372                Training vectors, where n_samples is the number
373                of samples and n_features is the number of features.
374
375            **kwargs: additional parameters to be passed to
376                    self.cook_test_set
377
378        Returns:
379
380            probability estimates for test data: {array-like}
381        """
382
383        if len(X.shape) == 1:
384            n_features = X.shape[0]
385            new_X = mo.rbind(
386                X.reshape(1, n_features),
387                np.ones(n_features).reshape(1, n_features),
388            )
389            return (
390                self.obj.predict_proba(
391                    self.cook_test_set(new_X, **kwargs), **kwargs
392                )
393            )[0]
394        return self.obj.predict_proba(self.cook_test_set(X, **kwargs), **kwargs)

Predict probabilities for test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

probability estimates for test data: {array-like}
def score(self, X, y, scoring=None):
435    def score(self, X, y, scoring=None):
436        """Scoring function for classification.
437
438        Args:
439
440            X: {array-like}, shape = [n_samples, n_features]
441                Training vectors, where n_samples is the number
442                of samples and n_features is the number of features.
443
444            y: array-like, shape = [n_samples]
445                Target values.
446
447            scoring: str
448                scoring method (default is accuracy)
449
450        Returns:
451
452            score: float
453        """
454
455        if scoring is None:
456            scoring = "accuracy"
457
458        if scoring == "accuracy":
459            return skm2.accuracy_score(y, self.predict(X))
460
461        if scoring == "f1":
462            return skm2.f1_score(y, self.predict(X))
463
464        if scoring == "precision":
465            return skm2.precision_score(y, self.predict(X))
466
467        if scoring == "recall":
468            return skm2.recall_score(y, self.predict(X))
469
470        if scoring == "roc_auc":
471            return skm2.roc_auc_score(y, self.predict(X))
472
473        if scoring == "log_loss":
474            return skm2.log_loss(y, self.predict_proba(X))
475
476        if scoring == "balanced_accuracy":
477            return skm2.balanced_accuracy_score(y, self.predict(X))
478
479        if scoring == "average_precision":
480            return skm2.average_precision_score(y, self.predict(X))
481
482        if scoring == "neg_brier_score":
483            return -skm2.brier_score_loss(y, self.predict_proba(X))
484
485        if scoring == "neg_log_loss":
486            return -skm2.log_loss(y, self.predict_proba(X))

Scoring function for classification.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

scoring: str
    scoring method (default is accuracy)

Returns:

score: float
class CustomRegressor(nnetsauce.custom.custom.Custom, sklearn.base.RegressorMixin):
 18class CustomRegressor(Custom, RegressorMixin):
 19    """Custom Regression model
 20
 21    This class is used to 'augment' any regression model with transformed features.
 22
 23    Parameters:
 24
 25        obj: object
 26            any object containing a method fit (obj.fit()) and a method predict
 27            (obj.predict())
 28
 29        n_hidden_features: int
 30            number of nodes in the hidden layer
 31
 32        activation_name: str
 33            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
 34
 35        a: float
 36            hyperparameter for 'prelu' or 'elu' activation function
 37
 38        nodes_sim: str
 39            type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
 40            'uniform'
 41
 42        bias: boolean
 43            indicates if the hidden layer contains a bias term (True) or not
 44            (False)
 45
 46        dropout: float
 47            regularization parameter; (random) percentage of nodes dropped out
 48            of the training
 49
 50        direct_link: boolean
 51            indicates if the original predictors are included (True) in model's
 52            fitting or not (False)
 53
 54        n_clusters: int
 55            number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
 56                no clustering)
 57
 58        cluster_encode: bool
 59            defines how the variable containing clusters is treated (default is one-hot)
 60            if `False`, then labels are used, without one-hot encoding
 61
 62        type_clust: str
 63            type of clustering method: currently k-means ('kmeans') or Gaussian
 64            Mixture Model ('gmm')
 65
 66        type_scaling: a tuple of 3 strings
 67            scaling methods for inputs, hidden layer, and clustering respectively
 68            (and when relevant).
 69            Currently available: standardization ('std') or MinMax scaling ('minmax')
 70
 71        type_pi: str.
 72            type of prediction interval; currently `None` (split or local
 73            conformal without simulation), "kde" or "bootstrap" (simulated split
 74            conformal).
 75
 76        replications: int.
 77            number of replications (if needed) for predictive simulation.
 78            Used only in `self.predict`, for `self.kernel` in ('gaussian',
 79            'tophat') and `self.type_pi = 'kde'`. Default is `None`.
 80
 81        kernel: str.
 82            the kernel to use for kernel density estimation (used for predictive
 83            simulation in `self.predict`, with `method='splitconformal'` and
 84            `type_pi = 'kde'`). Currently, either 'gaussian' or 'tophat'.
 85
 86        type_split: str.
 87            Type of splitting for conformal prediction. None (default), or
 88            "random" (random split of data) or "sequential" (sequential split of data)
 89
 90        col_sample: float
 91            percentage of covariates randomly chosen for training
 92
 93        row_sample: float
 94            percentage of rows chosen for training, by stratified bootstrapping
 95
 96        level: float
 97            confidence level for prediction intervals
 98
 99        pi_method: str
100            method for prediction intervals: 'splitconformal' or 'localconformal'
101
102        seed: int
103            reproducibility seed for nodes_sim=='uniform'
104
105        type_fit: str
106            'regression'
107
108        backend: str
109            "cpu" or "gpu" or "tpu"
110
111    Examples:
112
113    See [https://thierrymoudiki.github.io/blog/2024/03/18/python/conformal-and-bayesian-regression](https://thierrymoudiki.github.io/blog/2024/03/18/python/conformal-and-bayesian-regression)
114
115    """
116
117    # construct the object -----
118
119    def __init__(
120        self,
121        obj,
122        n_hidden_features=5,
123        activation_name="relu",
124        a=0.01,
125        nodes_sim="sobol",
126        bias=True,
127        dropout=0,
128        direct_link=True,
129        n_clusters=2,
130        cluster_encode=True,
131        type_clust="kmeans",
132        type_scaling=("std", "std", "std"),
133        type_pi=None,
134        replications=None,
135        kernel=None,
136        type_split=None,
137        col_sample=1,
138        row_sample=1,
139        level=None,
140        pi_method=None,
141        seed=123,
142        backend="cpu",
143    ):
144        super().__init__(
145            obj=obj,
146            n_hidden_features=n_hidden_features,
147            activation_name=activation_name,
148            a=a,
149            nodes_sim=nodes_sim,
150            bias=bias,
151            dropout=dropout,
152            direct_link=direct_link,
153            n_clusters=n_clusters,
154            cluster_encode=cluster_encode,
155            type_clust=type_clust,
156            type_scaling=type_scaling,
157            col_sample=col_sample,
158            row_sample=row_sample,
159            seed=seed,
160            backend=backend,
161        )
162
163        self.type_fit = "regression"
164        self.type_pi = type_pi
165        self.replications = replications
166        self.kernel = kernel
167        self.type_split = type_split
168        self.level = level
169        self.pi_method = pi_method
170        self.coef_ = None
171        self.intercept_ = None
172        self.X_ = None
173        self.y_ = None
174        self.aic_ = None
175        self.aicc_ = None
176        self.bic_ = None
177
178    def fit(self, X, y, sample_weight=None, **kwargs):
179        """Fit custom model to training data (X, y).
180
181        Parameters:
182
183            X: {array-like}, shape = [n_samples, n_features]
184                Training vectors, where n_samples is the number
185                of samples and n_features is the number of features.
186
187            y: array-like, shape = [n_samples]
188                Target values.
189
190            sample_weight: array-like, shape = [n_samples]
191                Sample weights.
192
193            **kwargs: additional parameters to be passed to
194                self.cook_training_set or self.obj.fit
195
196        Returns:
197
198            self: object
199
200        """
201
202        centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
203
204        if self.level is not None:
205            self.obj = PredictionInterval(
206                obj=self.obj, method=self.pi_method, level=self.level
207            )
208
209        # if sample_weights, else: (must use self.row_index)
210        if sample_weight is not None:
211            self.obj.fit(
212                scaled_Z,
213                centered_y,
214                sample_weight=sample_weight[self.index_row_].ravel(),
215                **kwargs
216            )
217
218            return self
219
220        self.obj.fit(scaled_Z, centered_y, **kwargs)
221
222        self.X_ = X
223
224        self.y_ = y
225
226        # Compute SSE
227        centered_y_pred = self.obj.predict(scaled_Z)
228        self.sse_ = np.sum((centered_y - centered_y_pred) ** 2)
229
230        # Get number of parameters
231        n_params = (
232            self.n_hidden_features + X.shape[1]
233        )  # hidden features + original features
234        if self.n_clusters > 0:
235            n_params += self.n_clusters  # add clusters if used
236
237        # Compute information criteria
238        n_samples = X.shape[0]
239        temp = n_samples * np.log(self.sse_ / n_samples)
240        self.aic_ = temp + 2 * n_params
241        self.bic_ = temp + np.log(n_samples) * n_params
242
243        if hasattr(self.obj, "coef_"):
244            self.coef_ = self.obj.coef_
245
246        if hasattr(self.obj, "intercept_"):
247            self.intercept_ = self.obj.intercept_
248
249        return self
250
251    def partial_fit(self, X, y, **kwargs):
252        """Partial fit custom model to training data (X, y).
253
254        Parameters:
255
256            X: {array-like}, shape = [n_samples, n_features]
257                Subset of training vectors, where n_samples is the number
258                of samples and n_features is the number of features.
259
260            y: array-like, shape = [n_samples]
261                Subset of target values.
262
263            **kwargs: additional parameters to be passed to
264                self.cook_training_set or self.obj.fit
265
266        Returns:
267
268            self: object
269
270        """
271
272        if len(X.shape) == 1:
273            if isinstance(X, pd.DataFrame):
274                X = pd.DataFrame(X.values.reshape(1, -1), columns=X.columns)
275            else:
276                X = X.reshape(1, -1)
277            y = np.array([y])
278
279        centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
280
281        self.obj.partial_fit(scaled_Z, centered_y, **kwargs)
282
283        self.X_ = X
284
285        self.y_ = y
286
287        return self
288
289    def predict(self, X, level=95, method="splitconformal", **kwargs):
290        """Predict test data X.
291
292        Parameters:
293
294            X: {array-like}, shape = [n_samples, n_features]
295                Training vectors, where n_samples is the number
296                of samples and n_features is the number of features.
297
298            level: int
299                Level of confidence (default = 95)
300
301            method: str
302                'splitconformal', 'localconformal'
303                prediction (if you specify `return_pi = True`)
304
305            **kwargs: additional parameters
306                    `return_pi = True` for conformal prediction,
307                    with `method` in ('splitconformal', 'localconformal')
308                    or `return_std = True` for `self.obj` in
309                    (`sklearn.linear_model.BayesianRidge`,
310                    `sklearn.linear_model.ARDRegressor`,
311                    `sklearn.gaussian_process.GaussianProcessRegressor`)`
312
313        Returns:
314
315            model predictions:
316                an array if uncertainty quantification is not requested,
317                  or a tuple if with prediction intervals and simulations
318                  if `return_std = True` (mean, standard deviation,
319                  lower and upper prediction interval) or `return_pi = True`
320                  ()
321
322        """
323
324        if "return_std" in kwargs:
325            alpha = 100 - level
326            pi_multiplier = norm.ppf(1 - alpha / 200)
327
328            if len(X.shape) == 1:
329                n_features = X.shape[0]
330                new_X = mo.rbind(
331                    X.reshape(1, n_features),
332                    np.ones(n_features).reshape(1, n_features),
333                )
334
335                mean_, std_ = self.obj.predict(
336                    self.cook_test_set(new_X, **kwargs), return_std=True
337                )[0]
338
339                preds = self.y_mean_ + mean_
340                lower = self.y_mean_ + (mean_ - pi_multiplier * std_)
341                upper = self.y_mean_ + (mean_ + pi_multiplier * std_)
342
343                DescribeResults = namedtuple(
344                    "DescribeResults", ["mean", "std", "lower", "upper"]
345                )
346
347                return DescribeResults(preds, std_, lower, upper)
348
349            # len(X.shape) > 1
350            mean_, std_ = self.obj.predict(
351                self.cook_test_set(X, **kwargs), return_std=True
352            )
353
354            preds = self.y_mean_ + mean_
355            lower = self.y_mean_ + (mean_ - pi_multiplier * std_)
356            upper = self.y_mean_ + (mean_ + pi_multiplier * std_)
357
358            DescribeResults = namedtuple(
359                "DescribeResults", ["mean", "std", "lower", "upper"]
360            )
361
362            return DescribeResults(preds, std_, lower, upper)
363
364        if "return_pi" in kwargs:
365            assert method in (
366                "splitconformal",
367                "localconformal",
368            ), "method must be in ('splitconformal', 'localconformal')"
369            self.pi = PredictionInterval(
370                obj=self,
371                method=method,
372                level=level,
373                type_pi=self.type_pi,
374                replications=self.replications,
375                kernel=self.kernel,
376            )
377
378            if len(self.X_.shape) == 1:
379                if isinstance(X, pd.DataFrame):
380                    self.X_ = pd.DataFrame(
381                        self.X_.values.reshape(1, -1), columns=self.X_.columns
382                    )
383                else:
384                    self.X_ = self.X_.reshape(1, -1)
385                self.y_ = np.array([self.y_])
386
387            self.pi.fit(self.X_, self.y_)
388            # self.X_ = None # consumes memory to keep, dangerous to delete (side effect)
389            # self.y_ = None # consumes memory to keep, dangerous to delete (side effect)
390            preds = self.pi.predict(X, return_pi=True)
391            return preds
392
393        # "return_std" not in kwargs
394        if len(X.shape) == 1:
395            n_features = X.shape[0]
396            new_X = mo.rbind(
397                X.reshape(1, n_features),
398                np.ones(n_features).reshape(1, n_features),
399            )
400
401            return (
402                self.y_mean_
403                + self.obj.predict(
404                    self.cook_test_set(new_X, **kwargs), **kwargs
405                )
406            )[0]
407
408        # len(X.shape) > 1
409        return self.y_mean_ + self.obj.predict(
410            self.cook_test_set(X, **kwargs), **kwargs
411        )
412
413    def score(self, X, y, scoring=None):
414        """Compute the score of the model.
415
416        Parameters:
417
418            X: {array-like}, shape = [n_samples, n_features]
419                Training vectors, where n_samples is the number
420                of samples and n_features is the number of features.
421
422            y: array-like, shape = [n_samples]
423                Target values.
424
425            scoring: str
426                scoring method
427
428        Returns:
429
430            score: float
431
432        """
433
434        if scoring is None:
435            return np.sqrt(np.mean((self.predict(X) - y) ** 2))
436
437        return skm2.get_scorer(scoring)(self, X, y)

Custom Regression model

This class is used to 'augment' any regression model with transformed features.

Parameters:

obj: object
    any object containing a method fit (obj.fit()) and a method predict
    (obj.predict())

n_hidden_features: int
    number of nodes in the hidden layer

activation_name: str
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'

a: float
    hyperparameter for 'prelu' or 'elu' activation function

nodes_sim: str
    type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
    'uniform'

bias: boolean
    indicates if the hidden layer contains a bias term (True) or not
    (False)

dropout: float
    regularization parameter; (random) percentage of nodes dropped out
    of the training

direct_link: boolean
    indicates if the original predictors are included (True) in model's
    fitting or not (False)

n_clusters: int
    number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
        no clustering)

cluster_encode: bool
    defines how the variable containing clusters is treated (default is one-hot)
    if `False`, then labels are used, without one-hot encoding

type_clust: str
    type of clustering method: currently k-means ('kmeans') or Gaussian
    Mixture Model ('gmm')

type_scaling: a tuple of 3 strings
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax')

type_pi: str.
    type of prediction interval; currently `None` (split or local
    conformal without simulation), "kde" or "bootstrap" (simulated split
    conformal).

replications: int.
    number of replications (if needed) for predictive simulation.
    Used only in `self.predict`, for `self.kernel` in ('gaussian',
    'tophat') and `self.type_pi = 'kde'`. Default is `None`.

kernel: str.
    the kernel to use for kernel density estimation (used for predictive
    simulation in `self.predict`, with `method='splitconformal'` and
    `type_pi = 'kde'`). Currently, either 'gaussian' or 'tophat'.

type_split: str.
    Type of splitting for conformal prediction. None (default), or
    "random" (random split of data) or "sequential" (sequential split of data)

col_sample: float
    percentage of covariates randomly chosen for training

row_sample: float
    percentage of rows chosen for training, by stratified bootstrapping

level: float
    confidence level for prediction intervals

pi_method: str
    method for prediction intervals: 'splitconformal' or 'localconformal'

seed: int
    reproducibility seed for nodes_sim=='uniform'

type_fit: str
    'regression'

backend: str
    "cpu" or "gpu" or "tpu"

Examples:

See https://thierrymoudiki.github.io/blog/2024/03/18/python/conformal-and-bayesian-regression

def fit(self, X, y, sample_weight=None, **kwargs):
178    def fit(self, X, y, sample_weight=None, **kwargs):
179        """Fit custom model to training data (X, y).
180
181        Parameters:
182
183            X: {array-like}, shape = [n_samples, n_features]
184                Training vectors, where n_samples is the number
185                of samples and n_features is the number of features.
186
187            y: array-like, shape = [n_samples]
188                Target values.
189
190            sample_weight: array-like, shape = [n_samples]
191                Sample weights.
192
193            **kwargs: additional parameters to be passed to
194                self.cook_training_set or self.obj.fit
195
196        Returns:
197
198            self: object
199
200        """
201
202        centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
203
204        if self.level is not None:
205            self.obj = PredictionInterval(
206                obj=self.obj, method=self.pi_method, level=self.level
207            )
208
209        # if sample_weights, else: (must use self.row_index)
210        if sample_weight is not None:
211            self.obj.fit(
212                scaled_Z,
213                centered_y,
214                sample_weight=sample_weight[self.index_row_].ravel(),
215                **kwargs
216            )
217
218            return self
219
220        self.obj.fit(scaled_Z, centered_y, **kwargs)
221
222        self.X_ = X
223
224        self.y_ = y
225
226        # Compute SSE
227        centered_y_pred = self.obj.predict(scaled_Z)
228        self.sse_ = np.sum((centered_y - centered_y_pred) ** 2)
229
230        # Get number of parameters
231        n_params = (
232            self.n_hidden_features + X.shape[1]
233        )  # hidden features + original features
234        if self.n_clusters > 0:
235            n_params += self.n_clusters  # add clusters if used
236
237        # Compute information criteria
238        n_samples = X.shape[0]
239        temp = n_samples * np.log(self.sse_ / n_samples)
240        self.aic_ = temp + 2 * n_params
241        self.bic_ = temp + np.log(n_samples) * n_params
242
243        if hasattr(self.obj, "coef_"):
244            self.coef_ = self.obj.coef_
245
246        if hasattr(self.obj, "intercept_"):
247            self.intercept_ = self.obj.intercept_
248
249        return self

Fit custom model to training data (X, y).

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

sample_weight: array-like, shape = [n_samples]
    Sample weights.

**kwargs: additional parameters to be passed to
    self.cook_training_set or self.obj.fit

Returns:

self: object
def predict(self, X, level=95, method='splitconformal', **kwargs):
289    def predict(self, X, level=95, method="splitconformal", **kwargs):
290        """Predict test data X.
291
292        Parameters:
293
294            X: {array-like}, shape = [n_samples, n_features]
295                Training vectors, where n_samples is the number
296                of samples and n_features is the number of features.
297
298            level: int
299                Level of confidence (default = 95)
300
301            method: str
302                'splitconformal', 'localconformal'
303                prediction (if you specify `return_pi = True`)
304
305            **kwargs: additional parameters
306                    `return_pi = True` for conformal prediction,
307                    with `method` in ('splitconformal', 'localconformal')
308                    or `return_std = True` for `self.obj` in
309                    (`sklearn.linear_model.BayesianRidge`,
310                    `sklearn.linear_model.ARDRegressor`,
311                    `sklearn.gaussian_process.GaussianProcessRegressor`)`
312
313        Returns:
314
315            model predictions:
316                an array if uncertainty quantification is not requested,
317                  or a tuple if with prediction intervals and simulations
318                  if `return_std = True` (mean, standard deviation,
319                  lower and upper prediction interval) or `return_pi = True`
320                  ()
321
322        """
323
324        if "return_std" in kwargs:
325            alpha = 100 - level
326            pi_multiplier = norm.ppf(1 - alpha / 200)
327
328            if len(X.shape) == 1:
329                n_features = X.shape[0]
330                new_X = mo.rbind(
331                    X.reshape(1, n_features),
332                    np.ones(n_features).reshape(1, n_features),
333                )
334
335                mean_, std_ = self.obj.predict(
336                    self.cook_test_set(new_X, **kwargs), return_std=True
337                )[0]
338
339                preds = self.y_mean_ + mean_
340                lower = self.y_mean_ + (mean_ - pi_multiplier * std_)
341                upper = self.y_mean_ + (mean_ + pi_multiplier * std_)
342
343                DescribeResults = namedtuple(
344                    "DescribeResults", ["mean", "std", "lower", "upper"]
345                )
346
347                return DescribeResults(preds, std_, lower, upper)
348
349            # len(X.shape) > 1
350            mean_, std_ = self.obj.predict(
351                self.cook_test_set(X, **kwargs), return_std=True
352            )
353
354            preds = self.y_mean_ + mean_
355            lower = self.y_mean_ + (mean_ - pi_multiplier * std_)
356            upper = self.y_mean_ + (mean_ + pi_multiplier * std_)
357
358            DescribeResults = namedtuple(
359                "DescribeResults", ["mean", "std", "lower", "upper"]
360            )
361
362            return DescribeResults(preds, std_, lower, upper)
363
364        if "return_pi" in kwargs:
365            assert method in (
366                "splitconformal",
367                "localconformal",
368            ), "method must be in ('splitconformal', 'localconformal')"
369            self.pi = PredictionInterval(
370                obj=self,
371                method=method,
372                level=level,
373                type_pi=self.type_pi,
374                replications=self.replications,
375                kernel=self.kernel,
376            )
377
378            if len(self.X_.shape) == 1:
379                if isinstance(X, pd.DataFrame):
380                    self.X_ = pd.DataFrame(
381                        self.X_.values.reshape(1, -1), columns=self.X_.columns
382                    )
383                else:
384                    self.X_ = self.X_.reshape(1, -1)
385                self.y_ = np.array([self.y_])
386
387            self.pi.fit(self.X_, self.y_)
388            # self.X_ = None # consumes memory to keep, dangerous to delete (side effect)
389            # self.y_ = None # consumes memory to keep, dangerous to delete (side effect)
390            preds = self.pi.predict(X, return_pi=True)
391            return preds
392
393        # "return_std" not in kwargs
394        if len(X.shape) == 1:
395            n_features = X.shape[0]
396            new_X = mo.rbind(
397                X.reshape(1, n_features),
398                np.ones(n_features).reshape(1, n_features),
399            )
400
401            return (
402                self.y_mean_
403                + self.obj.predict(
404                    self.cook_test_set(new_X, **kwargs), **kwargs
405                )
406            )[0]
407
408        # len(X.shape) > 1
409        return self.y_mean_ + self.obj.predict(
410            self.cook_test_set(X, **kwargs), **kwargs
411        )

Predict test data X.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

level: int
    Level of confidence (default = 95)

method: str
    'splitconformal', 'localconformal'
    prediction (if you specify `return_pi = True`)

**kwargs: additional parameters
        `return_pi = True` for conformal prediction,
        with `method` in ('splitconformal', 'localconformal')
        or `return_std = True` for `self.obj` in
        (`sklearn.linear_model.BayesianRidge`,
        `sklearn.linear_model.ARDRegressor`,
        `sklearn.gaussian_process.GaussianProcessRegressor`)`

Returns:

model predictions:
    an array if uncertainty quantification is not requested,
      or a tuple if with prediction intervals and simulations
      if `return_std = True` (mean, standard deviation,
      lower and upper prediction interval) or `return_pi = True`
      ()
def score(self, X, y, scoring=None):
413    def score(self, X, y, scoring=None):
414        """Compute the score of the model.
415
416        Parameters:
417
418            X: {array-like}, shape = [n_samples, n_features]
419                Training vectors, where n_samples is the number
420                of samples and n_features is the number of features.
421
422            y: array-like, shape = [n_samples]
423                Target values.
424
425            scoring: str
426                scoring method
427
428        Returns:
429
430            score: float
431
432        """
433
434        if scoring is None:
435            return np.sqrt(np.mean((self.predict(X) - y) ** 2))
436
437        return skm2.get_scorer(scoring)(self, X, y)

Compute the score of the model.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

scoring: str
    scoring method

Returns:

score: float
class CustomBackPropRegressor(nnetsauce.custom.custom.Custom, sklearn.base.RegressorMixin):
 18class CustomBackPropRegressor(Custom, RegressorMixin):
 19    """
 20    Finite difference trainer for nnetsauce models.
 21
 22    Parameters
 23    ----------
 24
 25    base_model : str
 26        The name of the base model (e.g., 'RidgeCV').
 27
 28    type_grad : {'finitediff', 'autodiff'}, optional
 29        Type of gradient computation to use (default='finitediff').
 30
 31    lr : float, optional
 32        Learning rate for optimization (default=1e-4).
 33
 34    optimizer : {'gd', 'sgd', 'adam', 'cd'}, optional
 35        Optimization algorithm: gradient descent ('gd'), stochastic gradient descent ('sgd'),
 36        Adam ('adam'), or coordinate descent ('cd'). Default is 'gd'.
 37
 38    eps : float, optional
 39        Scaling factor for adaptive finite difference step size (default=1e-3).
 40
 41    batch_size : int, optional
 42        Batch size for 'sgd' optimizer (default=32).
 43
 44    alpha : float, optional
 45        Elastic net penalty strength (default=0.0).
 46
 47    l1_ratio : float, optional
 48        Elastic net mixing parameter (0 = Ridge, 1 = Lasso, default=0.0).
 49
 50    type_loss : {'mse', 'quantile'}, optional
 51        Type of loss function to use (default='mse').
 52
 53    q : float, optional
 54        Quantile for quantile loss (default=0.5).
 55
 56    **kwargs
 57        Additional parameters to pass to the scikit-learn model.
 58
 59    """
 60
 61    def __init__(
 62        self,
 63        base_model,
 64        type_grad="finitediff",
 65        lr=1e-4,
 66        optimizer="gd",
 67        eps=1e-3,
 68        batch_size=32,
 69        alpha=0.0,
 70        l1_ratio=0.0,
 71        type_loss="mse",
 72        q=0.5,
 73        backend="cpu",
 74        **kwargs,
 75    ):
 76        super().__init__(base_model, True, **kwargs)
 77        self.base_model = base_model
 78        self.custom_kwargs = kwargs
 79        self.backend = backend
 80        self.model = ns.CustomRegressor(
 81            self.base_model, backend=self.backend, **self.custom_kwargs
 82        )
 83        assert isinstance(
 84            self.model, ns.CustomRegressor
 85        ), "'model' must be of class ns.CustomRegressor"
 86        self.type_grad = type_grad
 87        self.lr = lr
 88        self.optimizer = optimizer
 89        self.eps = eps
 90        self.loss_history_ = []
 91        self.opt_state = None
 92        self.batch_size = batch_size  # for SGD
 93        self.loss_history_ = []
 94        self._cd_index = 0  # For coordinate descent
 95        self.alpha = alpha
 96        self.l1_ratio = l1_ratio
 97        self.type_loss = type_loss
 98        self.q = q
 99
100    def _loss(self, X, y, **kwargs):
101        """
102        Compute the loss (with elastic net penalty) for the current model.
103
104        Parameters
105        ----------
106
107        X : array-like of shape (n_samples, n_features)
108            Input data.
109
110        y : array-like of shape (n_samples,)
111            Target values.
112
113        **kwargs
114            Additional keyword arguments for loss calculation.
115
116        Returns
117        -------
118        float
119            The computed loss value.
120        """
121        y_pred = self.model.predict(X)
122        if self.type_loss == "mse":
123            loss = np.mean((y - y_pred) ** 2)
124        elif self.type_loss == "quantile":
125            loss = mean_pinball_loss(y, y_pred, alpha=self.q, **kwargs)
126        W = self.model.W_
127        l1 = np.sum(np.abs(W))
128        l2 = np.sum(W**2)
129        return loss + self.alpha * (
130            self.l1_ratio * l1 + 0.5 * (1 - self.l1_ratio) * l2
131        )
132
133    def _compute_grad(self, X, y):
134        """
135        Compute the gradient of the loss with respect to W_ using finite differences.
136
137        Parameters
138        ----------
139
140        X : array-like of shape (n_samples, n_features)
141            Input data.
142
143        y : array-like of shape (n_samples,)
144            Target values.
145
146        Returns
147        -------
148
149        ndarray
150            Gradient array with the same shape as W_.
151        """
152
153        # Finite difference gradient computation
154        W = deepcopy(self.model.W_)
155        shape = W.shape
156        W_flat = W.flatten()
157        n_params = W_flat.size
158
159        # Adaptive finite difference step
160        h_vec = self.eps * np.maximum(1.0, np.abs(W_flat))
161        eye = np.eye(n_params)
162
163        loss_plus = np.zeros(n_params)
164        loss_minus = np.zeros(n_params)
165
166        for i in range(n_params):
167            h_i = h_vec[i]
168            Wp = W_flat.copy()
169            Wp[i] += h_i
170            Wm = W_flat.copy()
171            Wm[i] -= h_i
172
173            self.model.W_ = Wp.reshape(shape)
174            loss_plus[i] = self._loss(X, y)
175
176            self.model.W_ = Wm.reshape(shape)
177            loss_minus[i] = self._loss(X, y)
178
179        grad = ((loss_plus - loss_minus) / (2 * h_vec)).reshape(shape)
180
181        # Add elastic net gradient
182        l1_grad = self.alpha * self.l1_ratio * np.sign(W)
183        l2_grad = self.alpha * (1 - self.l1_ratio) * W
184        grad += l1_grad + l2_grad
185
186        self.model.W_ = W  # restore original
187        return grad
188
189    def fit(
190        self,
191        X,
192        y,
193        epochs=10,
194        verbose=True,
195        show_progress=True,
196        sample_weight=None,
197        **kwargs,
198    ):
199        """
200        Fit the model using finite difference optimization.
201
202        Parameters
203        ----------
204
205        X : array-like of shape (n_samples, n_features)
206            Training data.
207
208        y : array-like of shape (n_samples,)
209            Target values.
210
211        epochs : int, optional
212            Number of optimization steps (default=10).
213
214        verbose : bool, optional
215            Whether to print progress messages (default=True).
216
217        show_progress : bool, optional
218            Whether to show tqdm progress bar (default=True).
219
220        sample_weight : array-like, optional
221            Sample weights.
222
223        **kwargs
224            Additional keyword arguments.
225
226        Returns
227        -------
228
229        self : object
230            Returns self.
231        """
232
233        self.model.fit(X, y)
234
235        iterator = tqdm(range(epochs)) if show_progress else range(epochs)
236
237        for epoch in iterator:
238            grad = self._compute_grad(X, y)
239
240            if self.optimizer == "gd":
241                self.model.W_ -= self.lr * grad
242                self.model.W_ = np.clip(self.model.W_, 0, 1)
243                # print("self.model.W_", self.model.W_)
244
245            elif self.optimizer == "sgd":
246                # Sample a mini-batch for stochastic gradient
247                n_samples = X.shape[0]
248                idxs = np.random.choice(
249                    n_samples, self.batch_size, replace=False
250                )
251                if isinstance(X, pd.DataFrame):
252                    X_batch = X.iloc[idxs, :]
253                else:
254                    X_batch = X[idxs, :]
255                y_batch = y[idxs]
256                grad = self._compute_grad(X_batch, y_batch)
257
258                self.model.W_ -= self.lr * grad
259                self.model.W_ = np.clip(self.model.W_, 0, 1)
260
261            elif self.optimizer == "adam":
262                if self.opt_state is None:
263                    self.opt_state = {
264                        "m": np.zeros_like(grad),
265                        "v": np.zeros_like(grad),
266                        "t": 0,
267                    }
268                beta1, beta2, eps = 0.9, 0.999, 1e-8
269                self.opt_state["t"] += 1
270                self.opt_state["m"] = (
271                    beta1 * self.opt_state["m"] + (1 - beta1) * grad
272                )
273                self.opt_state["v"] = beta2 * self.opt_state["v"] + (
274                    1 - beta2
275                ) * (grad**2)
276                m_hat = self.opt_state["m"] / (1 - beta1 ** self.opt_state["t"])
277                v_hat = self.opt_state["v"] / (1 - beta2 ** self.opt_state["t"])
278
279                self.model.W_ -= self.lr * m_hat / (np.sqrt(v_hat) + eps)
280                self.model.W_ = np.clip(self.model.W_, 0, 1)
281                # print("self.model.W_", self.model.W_)
282
283            elif self.optimizer == "cd":  # coordinate descent
284                W_shape = self.model.W_.shape
285                W_flat_size = self.model.W_.size
286                W_flat = self.model.W_.flatten()
287                grad_flat = grad.flatten()
288
289                # Update only one coordinate per epoch (cyclic)
290                idx = self._cd_index % W_flat_size
291                W_flat[idx] -= self.lr * grad_flat[idx]
292                # Clip the updated value
293                W_flat[idx] = np.clip(W_flat[idx], 0, 1)
294
295                # Restore W_
296                self.model.W_ = W_flat.reshape(W_shape)
297
298                self._cd_index += 1
299
300            else:
301                raise ValueError(f"Unsupported optimizer: {self.optimizer}")
302
303            loss = self._loss(X, y)
304            self.loss_history_.append(loss)
305
306            if verbose:
307                print(f"Epoch {epoch+1}: Loss = {loss:.6f}")
308
309        # if sample_weights, else: (must use self.row_index)
310        if sample_weight in kwargs:
311            self.model.fit(
312                X,
313                y,
314                sample_weight=sample_weight[self.index_row_].ravel(),
315                **kwargs,
316            )
317
318            return self
319
320        return self
321
322    def predict(self, X, level=95, method="splitconformal", **kwargs):
323        """
324        Predict using the trained model.
325
326        Parameters
327        ----------
328
329        X : array-like of shape (n_samples, n_features)
330            Input data.
331
332        level : int, optional
333            Level of confidence for prediction intervals (default=95).
334
335        method : {'splitconformal', 'localconformal'}, optional
336            Method for conformal prediction (default='splitconformal').
337
338        **kwargs
339            Additional keyword arguments. Use `return_pi=True` for prediction intervals,
340            or `return_std=True` for standard deviation estimates.
341
342        Returns
343        -------
344
345        array or tuple
346            Model predictions, or a tuple with prediction intervals or standard deviations if requested.
347        """
348        if "return_std" in kwargs:
349            alpha = 100 - level
350            pi_multiplier = norm.ppf(1 - alpha / 200)
351
352            if len(X.shape) == 1:
353                n_features = X.shape[0]
354                new_X = mo.rbind(
355                    X.reshape(1, n_features),
356                    np.ones(n_features).reshape(1, n_features),
357                )
358
359                mean_, std_ = self.model.predict(new_X, return_std=True)[0]
360
361                preds = mean_
362                lower = mean_ - pi_multiplier * std_
363                upper = mean_ + pi_multiplier * std_
364
365                DescribeResults = namedtuple(
366                    "DescribeResults", ["mean", "std", "lower", "upper"]
367                )
368
369                return DescribeResults(preds, std_, lower, upper)
370
371            # len(X.shape) > 1
372            mean_, std_ = self.model.predict(X, return_std=True)
373
374            preds = mean_
375            lower = mean_ - pi_multiplier * std_
376            upper = mean_ + pi_multiplier * std_
377
378            DescribeResults = namedtuple(
379                "DescribeResults", ["mean", "std", "lower", "upper"]
380            )
381
382            return DescribeResults(preds, std_, lower, upper)
383
384        if "return_pi" in kwargs:
385            assert method in (
386                "splitconformal",
387                "localconformal",
388            ), "method must be in ('splitconformal', 'localconformal')"
389            self.pi = ns.PredictionInterval(
390                obj=self,
391                method=method,
392                level=level,
393                type_pi=self.type_pi,
394                replications=self.replications,
395                kernel=self.kernel,
396            )
397
398            if len(self.X_.shape) == 1:
399                if isinstance(X, pd.DataFrame):
400                    self.X_ = pd.DataFrame(
401                        self.X_.values.reshape(1, -1), columns=self.X_.columns
402                    )
403                else:
404                    self.X_ = self.X_.reshape(1, -1)
405                self.y_ = np.array([self.y_])
406
407            self.pi.fit(self.X_, self.y_)
408            # self.X_ = None # consumes memory to keep, dangerous to delete (side effect)
409            # self.y_ = None # consumes memory to keep, dangerous to delete (side effect)
410            preds = self.pi.predict(X, return_pi=True)
411            return preds
412
413        # "return_std" not in kwargs
414        if len(X.shape) == 1:
415            n_features = X.shape[0]
416            new_X = mo.rbind(
417                X.reshape(1, n_features),
418                np.ones(n_features).reshape(1, n_features),
419            )
420
421            return (0 + self.model.predict(new_X, **kwargs))[0]
422
423        # len(X.shape) > 1
424        return self.model.predict(X, **kwargs)

Finite difference trainer for nnetsauce models.

Parameters

base_model : str The name of the base model (e.g., 'RidgeCV').

type_grad : {'finitediff', 'autodiff'}, optional Type of gradient computation to use (default='finitediff').

lr : float, optional Learning rate for optimization (default=1e-4).

optimizer : {'gd', 'sgd', 'adam', 'cd'}, optional Optimization algorithm: gradient descent ('gd'), stochastic gradient descent ('sgd'), Adam ('adam'), or coordinate descent ('cd'). Default is 'gd'.

eps : float, optional Scaling factor for adaptive finite difference step size (default=1e-3).

batch_size : int, optional Batch size for 'sgd' optimizer (default=32).

alpha : float, optional Elastic net penalty strength (default=0.0).

l1_ratio : float, optional Elastic net mixing parameter (0 = Ridge, 1 = Lasso, default=0.0).

type_loss : {'mse', 'quantile'}, optional Type of loss function to use (default='mse').

q : float, optional Quantile for quantile loss (default=0.5).

**kwargs Additional parameters to pass to the scikit-learn model.

def fit( self, X, y, epochs=10, verbose=True, show_progress=True, sample_weight=None, **kwargs):
189    def fit(
190        self,
191        X,
192        y,
193        epochs=10,
194        verbose=True,
195        show_progress=True,
196        sample_weight=None,
197        **kwargs,
198    ):
199        """
200        Fit the model using finite difference optimization.
201
202        Parameters
203        ----------
204
205        X : array-like of shape (n_samples, n_features)
206            Training data.
207
208        y : array-like of shape (n_samples,)
209            Target values.
210
211        epochs : int, optional
212            Number of optimization steps (default=10).
213
214        verbose : bool, optional
215            Whether to print progress messages (default=True).
216
217        show_progress : bool, optional
218            Whether to show tqdm progress bar (default=True).
219
220        sample_weight : array-like, optional
221            Sample weights.
222
223        **kwargs
224            Additional keyword arguments.
225
226        Returns
227        -------
228
229        self : object
230            Returns self.
231        """
232
233        self.model.fit(X, y)
234
235        iterator = tqdm(range(epochs)) if show_progress else range(epochs)
236
237        for epoch in iterator:
238            grad = self._compute_grad(X, y)
239
240            if self.optimizer == "gd":
241                self.model.W_ -= self.lr * grad
242                self.model.W_ = np.clip(self.model.W_, 0, 1)
243                # print("self.model.W_", self.model.W_)
244
245            elif self.optimizer == "sgd":
246                # Sample a mini-batch for stochastic gradient
247                n_samples = X.shape[0]
248                idxs = np.random.choice(
249                    n_samples, self.batch_size, replace=False
250                )
251                if isinstance(X, pd.DataFrame):
252                    X_batch = X.iloc[idxs, :]
253                else:
254                    X_batch = X[idxs, :]
255                y_batch = y[idxs]
256                grad = self._compute_grad(X_batch, y_batch)
257
258                self.model.W_ -= self.lr * grad
259                self.model.W_ = np.clip(self.model.W_, 0, 1)
260
261            elif self.optimizer == "adam":
262                if self.opt_state is None:
263                    self.opt_state = {
264                        "m": np.zeros_like(grad),
265                        "v": np.zeros_like(grad),
266                        "t": 0,
267                    }
268                beta1, beta2, eps = 0.9, 0.999, 1e-8
269                self.opt_state["t"] += 1
270                self.opt_state["m"] = (
271                    beta1 * self.opt_state["m"] + (1 - beta1) * grad
272                )
273                self.opt_state["v"] = beta2 * self.opt_state["v"] + (
274                    1 - beta2
275                ) * (grad**2)
276                m_hat = self.opt_state["m"] / (1 - beta1 ** self.opt_state["t"])
277                v_hat = self.opt_state["v"] / (1 - beta2 ** self.opt_state["t"])
278
279                self.model.W_ -= self.lr * m_hat / (np.sqrt(v_hat) + eps)
280                self.model.W_ = np.clip(self.model.W_, 0, 1)
281                # print("self.model.W_", self.model.W_)
282
283            elif self.optimizer == "cd":  # coordinate descent
284                W_shape = self.model.W_.shape
285                W_flat_size = self.model.W_.size
286                W_flat = self.model.W_.flatten()
287                grad_flat = grad.flatten()
288
289                # Update only one coordinate per epoch (cyclic)
290                idx = self._cd_index % W_flat_size
291                W_flat[idx] -= self.lr * grad_flat[idx]
292                # Clip the updated value
293                W_flat[idx] = np.clip(W_flat[idx], 0, 1)
294
295                # Restore W_
296                self.model.W_ = W_flat.reshape(W_shape)
297
298                self._cd_index += 1
299
300            else:
301                raise ValueError(f"Unsupported optimizer: {self.optimizer}")
302
303            loss = self._loss(X, y)
304            self.loss_history_.append(loss)
305
306            if verbose:
307                print(f"Epoch {epoch+1}: Loss = {loss:.6f}")
308
309        # if sample_weights, else: (must use self.row_index)
310        if sample_weight in kwargs:
311            self.model.fit(
312                X,
313                y,
314                sample_weight=sample_weight[self.index_row_].ravel(),
315                **kwargs,
316            )
317
318            return self
319
320        return self

Fit the model using finite difference optimization.

Parameters

X : array-like of shape (n_samples, n_features) Training data.

y : array-like of shape (n_samples,) Target values.

epochs : int, optional Number of optimization steps (default=10).

verbose : bool, optional Whether to print progress messages (default=True).

show_progress : bool, optional Whether to show tqdm progress bar (default=True).

sample_weight : array-like, optional Sample weights.

**kwargs Additional keyword arguments.

Returns

self : object Returns self.

def predict(self, X, level=95, method='splitconformal', **kwargs):
322    def predict(self, X, level=95, method="splitconformal", **kwargs):
323        """
324        Predict using the trained model.
325
326        Parameters
327        ----------
328
329        X : array-like of shape (n_samples, n_features)
330            Input data.
331
332        level : int, optional
333            Level of confidence for prediction intervals (default=95).
334
335        method : {'splitconformal', 'localconformal'}, optional
336            Method for conformal prediction (default='splitconformal').
337
338        **kwargs
339            Additional keyword arguments. Use `return_pi=True` for prediction intervals,
340            or `return_std=True` for standard deviation estimates.
341
342        Returns
343        -------
344
345        array or tuple
346            Model predictions, or a tuple with prediction intervals or standard deviations if requested.
347        """
348        if "return_std" in kwargs:
349            alpha = 100 - level
350            pi_multiplier = norm.ppf(1 - alpha / 200)
351
352            if len(X.shape) == 1:
353                n_features = X.shape[0]
354                new_X = mo.rbind(
355                    X.reshape(1, n_features),
356                    np.ones(n_features).reshape(1, n_features),
357                )
358
359                mean_, std_ = self.model.predict(new_X, return_std=True)[0]
360
361                preds = mean_
362                lower = mean_ - pi_multiplier * std_
363                upper = mean_ + pi_multiplier * std_
364
365                DescribeResults = namedtuple(
366                    "DescribeResults", ["mean", "std", "lower", "upper"]
367                )
368
369                return DescribeResults(preds, std_, lower, upper)
370
371            # len(X.shape) > 1
372            mean_, std_ = self.model.predict(X, return_std=True)
373
374            preds = mean_
375            lower = mean_ - pi_multiplier * std_
376            upper = mean_ + pi_multiplier * std_
377
378            DescribeResults = namedtuple(
379                "DescribeResults", ["mean", "std", "lower", "upper"]
380            )
381
382            return DescribeResults(preds, std_, lower, upper)
383
384        if "return_pi" in kwargs:
385            assert method in (
386                "splitconformal",
387                "localconformal",
388            ), "method must be in ('splitconformal', 'localconformal')"
389            self.pi = ns.PredictionInterval(
390                obj=self,
391                method=method,
392                level=level,
393                type_pi=self.type_pi,
394                replications=self.replications,
395                kernel=self.kernel,
396            )
397
398            if len(self.X_.shape) == 1:
399                if isinstance(X, pd.DataFrame):
400                    self.X_ = pd.DataFrame(
401                        self.X_.values.reshape(1, -1), columns=self.X_.columns
402                    )
403                else:
404                    self.X_ = self.X_.reshape(1, -1)
405                self.y_ = np.array([self.y_])
406
407            self.pi.fit(self.X_, self.y_)
408            # self.X_ = None # consumes memory to keep, dangerous to delete (side effect)
409            # self.y_ = None # consumes memory to keep, dangerous to delete (side effect)
410            preds = self.pi.predict(X, return_pi=True)
411            return preds
412
413        # "return_std" not in kwargs
414        if len(X.shape) == 1:
415            n_features = X.shape[0]
416            new_X = mo.rbind(
417                X.reshape(1, n_features),
418                np.ones(n_features).reshape(1, n_features),
419            )
420
421            return (0 + self.model.predict(new_X, **kwargs))[0]
422
423        # len(X.shape) > 1
424        return self.model.predict(X, **kwargs)

Predict using the trained model.

Parameters

X : array-like of shape (n_samples, n_features) Input data.

level : int, optional Level of confidence for prediction intervals (default=95).

method : {'splitconformal', 'localconformal'}, optional Method for conformal prediction (default='splitconformal').

**kwargs Additional keyword arguments. Use return_pi=True for prediction intervals, or return_std=True for standard deviation estimates.

Returns

array or tuple Model predictions, or a tuple with prediction intervals or standard deviations if requested.

class DeepClassifier(nnetsauce.CustomClassifier, sklearn.base.ClassifierMixin):
 36class DeepClassifier(CustomClassifier, ClassifierMixin):
 37    """
 38    Deep Classifier
 39
 40    Parameters:
 41
 42        obj: an object
 43            A base learner, see also https://www.researchgate.net/publication/380701207_Deep_Quasi-Randomized_neural_Networks_for_classification
 44
 45        n_layers: int (default=3)
 46            Number of layers. `n_layers = 1` is a simple `CustomClassifier`
 47
 48        verbose : int, optional (default=0)
 49            Monitor progress when fitting.
 50
 51        All the other parameters are nnetsauce `CustomClassifier`'s
 52
 53    Examples:
 54
 55        ```python
 56        import nnetsauce as ns
 57        from sklearn.datasets import load_breast_cancer
 58        from sklearn.model_selection import train_test_split
 59        from sklearn.linear_model import LogisticRegressionCV
 60        data = load_breast_cancer()
 61        X = data.data
 62        y= data.target
 63        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=123)
 64        obj = LogisticRegressionCV()
 65        clf = ns.DeepClassifier(obj)
 66        clf.fit(X_train, y_train)
 67        print(clf.score(clf.predict(X_test), y_test))
 68        ```
 69    """
 70
 71    _estimator_type = "classifier"
 72
 73    def __init__(
 74        self,
 75        obj,
 76        # Defining depth
 77        n_layers=3,
 78        verbose=0,
 79        # CustomClassifier attributes
 80        n_hidden_features=5,
 81        activation_name="relu",
 82        a=0.01,
 83        nodes_sim="sobol",
 84        bias=True,
 85        dropout=0,
 86        direct_link=True,
 87        n_clusters=2,
 88        cluster_encode=True,
 89        type_clust="kmeans",
 90        type_scaling=("std", "std", "std"),
 91        col_sample=1,
 92        row_sample=1,
 93        cv_calibration=2,
 94        calibration_method="sigmoid",
 95        seed=123,
 96        backend="cpu",
 97    ):
 98        super().__init__(
 99            obj=obj,
100            n_hidden_features=n_hidden_features,
101            activation_name=activation_name,
102            a=a,
103            nodes_sim=nodes_sim,
104            bias=bias,
105            dropout=dropout,
106            direct_link=direct_link,
107            n_clusters=n_clusters,
108            cluster_encode=cluster_encode,
109            type_clust=type_clust,
110            type_scaling=type_scaling,
111            col_sample=col_sample,
112            row_sample=row_sample,
113            seed=seed,
114            backend=backend,
115        )
116        self.coef_ = None
117        self.intercept_ = None
118        self.type_fit = "classification"
119        self.cv_calibration = cv_calibration
120        self.calibration_method = calibration_method
121
122        # Only wrap in CalibratedClassifierCV if not already wrapped
123        # if not isinstance(obj, CalibratedClassifierCV):
124        #     self.obj = CalibratedClassifierCV(
125        #         self.obj,
126        #         cv=self.cv_calibration,
127        #         method=self.calibration_method
128        #     )
129        # else:
130        self.coef_ = None
131        self.intercept_ = None
132        self.type_fit = "classification"
133        self.cv_calibration = cv_calibration
134        self.calibration_method = calibration_method
135        self.obj = obj
136
137        assert n_layers >= 1, "must have n_layers >= 1"
138        self.stacked_obj = obj
139        self.verbose = verbose
140        self.n_layers = n_layers
141        self.classes_ = None
142        self.n_classes_ = None
143
144    def fit(self, X, y, **kwargs):
145        """Fit Classification algorithms to X and y.
146        Parameters
147        ----------
148        X : array-like,
149            Training vectors, where rows is the number of samples
150            and columns is the number of features.
151        y : array-like,
152            Training vectors, where rows is the number of samples
153            and columns is the number of features.
154        **kwargs: dict
155            Additional parameters to be passed to the fit method
156            of the base learner. For example, `sample_weight`.
157
158        Returns
159        -------
160        A fitted object
161        """
162
163        self.classes_ = np.unique(y)
164        self.n_classes_ = len(
165            self.classes_
166        )  # for compatibility with         scikit-learn
167
168        if isinstance(X, np.ndarray):
169            X = pd.DataFrame(X)
170
171        # init layer
172        self.stacked_obj = CustomClassifier(
173            obj=self.stacked_obj,
174            n_hidden_features=self.n_hidden_features,
175            activation_name=self.activation_name,
176            a=self.a,
177            nodes_sim=self.nodes_sim,
178            bias=self.bias,
179            dropout=self.dropout,
180            direct_link=self.direct_link,
181            n_clusters=self.n_clusters,
182            cluster_encode=self.cluster_encode,
183            type_clust=self.type_clust,
184            type_scaling=self.type_scaling,
185            col_sample=self.col_sample,
186            row_sample=self.row_sample,
187            cv_calibration=None,
188            calibration_method=None,
189            seed=self.seed,
190            backend=self.backend,
191        )
192
193        if self.verbose > 0:
194            iterator = tqdm(range(self.n_layers - 1))
195        else:
196            iterator = range(self.n_layers - 1)
197
198        for _ in iterator:
199            self.stacked_obj = deepcopy(
200                CustomClassifier(
201                    obj=self.stacked_obj,
202                    n_hidden_features=self.n_hidden_features,
203                    activation_name=self.activation_name,
204                    a=self.a,
205                    nodes_sim=self.nodes_sim,
206                    bias=self.bias,
207                    dropout=self.dropout,
208                    direct_link=self.direct_link,
209                    n_clusters=self.n_clusters,
210                    cluster_encode=self.cluster_encode,
211                    type_clust=self.type_clust,
212                    type_scaling=self.type_scaling,
213                    col_sample=self.col_sample,
214                    row_sample=self.row_sample,
215                    cv_calibration=None,
216                    calibration_method=None,
217                    seed=self.seed,
218                    backend=self.backend,
219                )
220            )
221            self.stacked_obj.fit(X, y, **kwargs)
222
223        return self
224
225    def partial_fit(self, X, y, **kwargs):
226        """Fit Regression algorithms to X and y.
227        Parameters
228        ----------
229        X : array-like,
230            Training vectors, where rows is the number of samples
231            and columns is the number of features.
232        y : array-like,
233            Training vectors, where rows is the number of samples
234            and columns is the number of features.
235        **kwargs: dict
236            Additional parameters to be passed to the fit method
237            of the base learner. For example, `sample_weight`.
238        Returns
239        -------
240        A fitted object
241        """
242        assert hasattr(self, "stacked_obj"), "model must be fitted first"
243        current_obj = self.stacked_obj
244        for _ in range(self.n_layers):
245            try:
246                input_X = current_obj.obj.cook_test_set(X)
247                current_obj.obj.partial_fit(input_X, y, **kwargs)
248                try:
249                    current_obj = current_obj.obj
250                except AttributeError:
251                    pass
252            except ValueError:
253                pass
254        return self
255
256    def predict(self, X):
257        return self.stacked_obj.predict(X)
258
259    def predict_proba(self, X):
260        return self.stacked_obj.predict_proba(X)
261
262    def score(self, X, y, scoring=None):
263        return self.stacked_obj.score(X, y, scoring)
264
265    def cross_val_optim(
266        self,
267        X_train,
268        y_train,
269        X_test=None,
270        y_test=None,
271        scoring="accuracy",
272        surrogate_obj=None,
273        cv=5,
274        n_jobs=None,
275        n_init=10,
276        n_iter=190,
277        abs_tol=1e-3,
278        verbose=2,
279        seed=123,
280        **kwargs,
281    ):
282        """Cross-validation function and hyperparameters' search
283
284        Parameters:
285
286            X_train: array-like,
287                Training vectors, where rows is the number of samples
288                and columns is the number of features.
289
290            y_train: array-like,
291                Training vectors, where rows is the number of samples
292                and columns is the number of features.
293
294            X_test: array-like,
295                Testing vectors, where rows is the number of samples
296                and columns is the number of features.
297
298            y_test: array-like,
299                Testing vectors, where rows is the number of samples
300                and columns is the number of features.
301
302            scoring: str
303                scoring metric; see https://scikit-learn.org/stable/modules/model_evaluation.html#the-scoring-parameter-defining-model-evaluation-rules
304
305            surrogate_obj: an object;
306                An ML model for estimating the uncertainty around the objective function
307
308            cv: int;
309                number of cross-validation folds
310
311            n_jobs: int;
312                number of jobs for parallel execution
313
314            n_init: an integer;
315                number of points in the initial setting, when `x_init` and `y_init` are not provided
316
317            n_iter: an integer;
318                number of iterations of the minimization algorithm
319
320            abs_tol: a float;
321                tolerance for convergence of the optimizer (early stopping based on acquisition function)
322
323            verbose: int
324                controls verbosity
325
326            seed: int
327                reproducibility seed
328
329            **kwargs: dict
330                additional parameters to be passed to the estimator
331
332        Examples:
333
334            ```python
335            ```
336        """
337
338        num_to_activation_name = {1: "relu", 2: "sigmoid", 3: "tanh"}
339        num_to_nodes_sim = {1: "sobol", 2: "uniform", 3: "hammersley"}
340        num_to_type_clust = {1: "kmeans", 2: "gmm"}
341
342        def deepclassifier_cv(
343            X_train,
344            y_train,
345            # Defining depth
346            n_layers=3,
347            # CustomClassifier attributes
348            n_hidden_features=5,
349            activation_name="relu",
350            nodes_sim="sobol",
351            dropout=0,
352            n_clusters=2,
353            type_clust="kmeans",
354            cv=5,
355            n_jobs=None,
356            scoring="accuracy",
357            seed=123,
358        ):
359            self.set_params(
360                **{
361                    "n_layers": n_layers,
362                    # CustomClassifier attributes
363                    "n_hidden_features": n_hidden_features,
364                    "activation_name": activation_name,
365                    "nodes_sim": nodes_sim,
366                    "dropout": dropout,
367                    "n_clusters": n_clusters,
368                    "type_clust": type_clust,
369                    **kwargs,
370                }
371            )
372            return -cross_val_score(
373                estimator=self,
374                X=X_train,
375                y=y_train,
376                scoring=scoring,
377                cv=cv,
378                n_jobs=n_jobs,
379                verbose=0,
380            ).mean()
381
382        # objective function for hyperparams tuning
383        def crossval_objective(xx):
384            return deepclassifier_cv(
385                X_train=X_train,
386                y_train=y_train,
387                # Defining depth
388                n_layers=int(np.ceil(xx[0])),
389                # CustomClassifier attributes
390                n_hidden_features=int(np.ceil(xx[1])),
391                activation_name=num_to_activation_name[np.ceil(xx[2])],
392                nodes_sim=num_to_nodes_sim[int(np.ceil(xx[3]))],
393                dropout=xx[4],
394                n_clusters=int(np.ceil(xx[5])),
395                type_clust=num_to_type_clust[int(np.ceil(xx[6]))],
396                cv=cv,
397                n_jobs=n_jobs,
398                scoring=scoring,
399                seed=seed,
400            )
401
402        if surrogate_obj is None:
403            gp_opt = gp.GPOpt(
404                objective_func=crossval_objective,
405                lower_bound=np.array([0, 3, 0, 0, 0.0, 0, 0]),
406                upper_bound=np.array([5, 100, 3, 3, 0.4, 5, 2]),
407                params_names=[
408                    "n_layers",
409                    # CustomClassifier attributes
410                    "n_hidden_features",
411                    "activation_name",
412                    "nodes_sim",
413                    "dropout",
414                    "n_clusters",
415                    "type_clust",
416                ],
417                method="bayesian",
418                n_init=n_init,
419                n_iter=n_iter,
420                seed=seed,
421            )
422        else:
423            gp_opt = gp.GPOpt(
424                objective_func=crossval_objective,
425                lower_bound=np.array([0, 3, 0, 0, 0.0, 0, 0]),
426                upper_bound=np.array([5, 100, 3, 3, 0.4, 5, 2]),
427                params_names=[
428                    "n_layers",
429                    # CustomClassifier attributes
430                    "n_hidden_features",
431                    "activation_name",
432                    "nodes_sim",
433                    "dropout",
434                    "n_clusters",
435                    "type_clust",
436                ],
437                acquisition="ucb",
438                method="splitconformal",
439                surrogate_obj=ns.PredictionInterval(
440                    obj=surrogate_obj, method="splitconformal"
441                ),
442                n_init=n_init,
443                n_iter=n_iter,
444                seed=seed,
445            )
446
447        res = gp_opt.optimize(verbose=verbose, abs_tol=abs_tol)
448        res.best_params["n_layers"] = int(np.ceil(res.best_params["n_layers"]))
449        res.best_params["n_hidden_features"] = int(
450            np.ceil(res.best_params["n_hidden_features"])
451        )
452        res.best_params["activation_name"] = num_to_activation_name[
453            np.ceil(res.best_params["activation_name"])
454        ]
455        res.best_params["nodes_sim"] = num_to_nodes_sim[
456            int(np.ceil(res.best_params["nodes_sim"]))
457        ]
458        res.best_params["dropout"] = res.best_params["dropout"]
459        res.best_params["n_clusters"] = int(
460            np.ceil(res.best_params["n_clusters"])
461        )
462        res.best_params["type_clust"] = num_to_type_clust[
463            int(np.ceil(res.best_params["type_clust"]))
464        ]
465
466        # out-of-sample error
467        if X_test is not None and y_test is not None:
468            self.set_params(**res.best_params, verbose=0, seed=seed)
469            preds = self.fit(X_train, y_train).predict(X_test)
470            # check error on y_test
471            oos_err = getattr(metrics, scoring + "_score")(
472                y_true=y_test, y_pred=preds
473            )
474            result = namedtuple("result", res._fields + ("test_" + scoring,))
475            return result(*res, oos_err)
476        else:
477            return res
478
479    def lazy_cross_val_optim(
480        self,
481        X_train,
482        y_train,
483        X_test=None,
484        y_test=None,
485        scoring="accuracy",
486        surrogate_objs=None,
487        customize=False,
488        cv=5,
489        n_jobs=None,
490        n_init=10,
491        n_iter=190,
492        abs_tol=1e-3,
493        verbose=1,
494        seed=123,
495    ):
496        """Automated Cross-validation function and hyperparameters' search using multiple surrogates
497
498        Parameters:
499
500            X_train: array-like,
501                Training vectors, where rows is the number of samples
502                and columns is the number of features.
503
504            y_train: array-like,
505                Training vectors, where rows is the number of samples
506                and columns is the number of features.
507
508            X_test: array-like,
509                Testing vectors, where rows is the number of samples
510                and columns is the number of features.
511
512            y_test: array-like,
513                Testing vectors, where rows is the number of samples
514                and columns is the number of features.
515
516            scoring: str
517                scoring metric; see https://scikit-learn.org/stable/modules/model_evaluation.html#the-scoring-parameter-defining-model-evaluation-rules
518
519            surrogate_objs: object names as a list of strings;
520                ML models for estimating the uncertainty around the objective function
521
522            customize: boolean
523                if True, the surrogate is transformed into a quasi-randomized network (default is False)
524
525            cv: int;
526                number of cross-validation folds
527
528            n_jobs: int;
529                number of jobs for parallel execution
530
531            n_init: an integer;
532                number of points in the initial setting, when `x_init` and `y_init` are not provided
533
534            n_iter: an integer;
535                number of iterations of the minimization algorithm
536
537            abs_tol: a float;
538                tolerance for convergence of the optimizer (early stopping based on acquisition function)
539
540            verbose: int
541                controls verbosity
542
543            seed: int
544                reproducibility seed
545
546        Examples:
547
548            ```python
549            ```
550        """
551
552        removed_regressors = [
553            "TheilSenRegressor",
554            "ARDRegression",
555            "CCA",
556            "GaussianProcessRegressor",
557            "GradientBoostingRegressor",
558            "HistGradientBoostingRegressor",
559            "IsotonicRegression",
560            "MultiOutputRegressor",
561            "MultiTaskElasticNet",
562            "MultiTaskElasticNetCV",
563            "MultiTaskLasso",
564            "MultiTaskLassoCV",
565            "OrthogonalMatchingPursuit",
566            "OrthogonalMatchingPursuitCV",
567            "PLSCanonical",
568            "PLSRegression",
569            "RadiusNeighborsRegressor",
570            "RegressorChain",
571            "StackingRegressor",
572            "VotingRegressor",
573        ]
574
575        results = []
576
577        for est in all_estimators():
578            if surrogate_objs is None:
579                if issubclass(est[1], RegressorMixin) and (
580                    est[0] not in removed_regressors
581                ):
582                    try:
583                        if customize == True:
584                            surr_obj = ns.CustomClassifier(obj=est[1]())
585                        else:
586                            surr_obj = est[1]()
587                        res = self.cross_val_optim(
588                            X_train=X_train,
589                            y_train=y_train,
590                            X_test=X_test,
591                            y_test=y_test,
592                            surrogate_obj=surr_obj,
593                            cv=cv,
594                            n_jobs=n_jobs,
595                            scoring=scoring,
596                            n_init=n_init,
597                            n_iter=n_iter,
598                            abs_tol=abs_tol,
599                            verbose=verbose,
600                            seed=seed,
601                        )
602                        if customize == True:
603                            results.append((f"CustomClassifier({est[0]})", res))
604                        else:
605                            results.append((est[0], res))
606                    except:
607                        pass
608
609            else:
610                if (
611                    issubclass(est[1], RegressorMixin)
612                    and (est[0] not in removed_regressors)
613                    and est[0] in surrogate_objs
614                ):
615                    try:
616                        if customize == True:
617                            surr_obj = ns.CustomClassifier(obj=est[1]())
618                        else:
619                            surr_obj = est[1]()
620                        res = self.cross_val_optim(
621                            X_train=X_train,
622                            y_train=y_train,
623                            X_test=X_test,
624                            y_test=y_test,
625                            surrogate_obj=surr_obj,
626                            cv=cv,
627                            n_jobs=n_jobs,
628                            scoring=scoring,
629                            n_init=n_init,
630                            n_iter=n_iter,
631                            abs_tol=abs_tol,
632                            verbose=verbose,
633                            seed=seed,
634                        )
635                        if customize == True:
636                            results.append((f"CustomClassifier({est[0]})", res))
637                        else:
638                            results.append((est[0], res))
639                    except:
640                        pass
641
642        return results
643
644    @property
645    def _estimator_type(self):
646        return "classifier"

Deep Classifier

Parameters:

obj: an object
    A base learner, see also https://www.researchgate.net/publication/380701207_Deep_Quasi-Randomized_neural_Networks_for_classification

n_layers: int (default=3)
    Number of layers. `n_layers = 1` is a simple `CustomClassifier`

verbose : int, optional (default=0)
    Monitor progress when fitting.

All the other parameters are nnetsauce `CustomClassifier`'s

Examples:

import nnetsauce as ns
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegressionCV
data = load_breast_cancer()
X = data.data
y= data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=123)
obj = LogisticRegressionCV()
clf = ns.DeepClassifier(obj)
clf.fit(X_train, y_train)
print(clf.score(clf.predict(X_test), y_test))
def fit(self, X, y, **kwargs):
144    def fit(self, X, y, **kwargs):
145        """Fit Classification algorithms to X and y.
146        Parameters
147        ----------
148        X : array-like,
149            Training vectors, where rows is the number of samples
150            and columns is the number of features.
151        y : array-like,
152            Training vectors, where rows is the number of samples
153            and columns is the number of features.
154        **kwargs: dict
155            Additional parameters to be passed to the fit method
156            of the base learner. For example, `sample_weight`.
157
158        Returns
159        -------
160        A fitted object
161        """
162
163        self.classes_ = np.unique(y)
164        self.n_classes_ = len(
165            self.classes_
166        )  # for compatibility with         scikit-learn
167
168        if isinstance(X, np.ndarray):
169            X = pd.DataFrame(X)
170
171        # init layer
172        self.stacked_obj = CustomClassifier(
173            obj=self.stacked_obj,
174            n_hidden_features=self.n_hidden_features,
175            activation_name=self.activation_name,
176            a=self.a,
177            nodes_sim=self.nodes_sim,
178            bias=self.bias,
179            dropout=self.dropout,
180            direct_link=self.direct_link,
181            n_clusters=self.n_clusters,
182            cluster_encode=self.cluster_encode,
183            type_clust=self.type_clust,
184            type_scaling=self.type_scaling,
185            col_sample=self.col_sample,
186            row_sample=self.row_sample,
187            cv_calibration=None,
188            calibration_method=None,
189            seed=self.seed,
190            backend=self.backend,
191        )
192
193        if self.verbose > 0:
194            iterator = tqdm(range(self.n_layers - 1))
195        else:
196            iterator = range(self.n_layers - 1)
197
198        for _ in iterator:
199            self.stacked_obj = deepcopy(
200                CustomClassifier(
201                    obj=self.stacked_obj,
202                    n_hidden_features=self.n_hidden_features,
203                    activation_name=self.activation_name,
204                    a=self.a,
205                    nodes_sim=self.nodes_sim,
206                    bias=self.bias,
207                    dropout=self.dropout,
208                    direct_link=self.direct_link,
209                    n_clusters=self.n_clusters,
210                    cluster_encode=self.cluster_encode,
211                    type_clust=self.type_clust,
212                    type_scaling=self.type_scaling,
213                    col_sample=self.col_sample,
214                    row_sample=self.row_sample,
215                    cv_calibration=None,
216                    calibration_method=None,
217                    seed=self.seed,
218                    backend=self.backend,
219                )
220            )
221            self.stacked_obj.fit(X, y, **kwargs)
222
223        return self

Fit Classification algorithms to X and y.

Parameters

X : array-like, Training vectors, where rows is the number of samples and columns is the number of features. y : array-like, Training vectors, where rows is the number of samples and columns is the number of features. **kwargs: dict Additional parameters to be passed to the fit method of the base learner. For example, sample_weight.

Returns

A fitted object

def predict(self, X):
256    def predict(self, X):
257        return self.stacked_obj.predict(X)

Predict test data X.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

model predictions: {array-like}
def predict_proba(self, X):
259    def predict_proba(self, X):
260        return self.stacked_obj.predict_proba(X)

Predict probabilities for test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

probability estimates for test data: {array-like}
def score(self, X, y, scoring=None):
262    def score(self, X, y, scoring=None):
263        return self.stacked_obj.score(X, y, scoring)

Scoring function for classification.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

scoring: str
    scoring method (default is accuracy)

Returns:

score: float
class DeepRegressor(nnetsauce.CustomRegressor, sklearn.base.RegressorMixin):
 13class DeepRegressor(CustomRegressor, RegressorMixin):
 14    """
 15    Deep Regressor
 16
 17    Parameters:
 18
 19        obj: an object
 20            A base learner, see also https://www.researchgate.net/publication/380701207_Deep_Quasi-Randomized_neural_Networks_for_classification
 21
 22        verbose : int, optional (default=0)
 23            Monitor progress when fitting.
 24
 25        n_layers: int (default=2)
 26            Number of layers. `n_layers = 1` is a simple `CustomRegressor`
 27
 28        All the other parameters are nnetsauce `CustomRegressor`'s
 29
 30    Examples:
 31
 32        ```python
 33        import nnetsauce as ns
 34        from sklearn.datasets import load_diabetes
 35        from sklearn.model_selection import train_test_split
 36        from sklearn.linear_model import RidgeCV
 37        data = load_diabetes()
 38        X = data.data
 39        y= data.target
 40        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=123)
 41        obj = RidgeCV()
 42        clf = ns.DeepRegressor(obj)
 43        clf.fit(X_train, y_train)
 44        print(clf.score(clf.predict(X_test), y_test))
 45        ```
 46
 47    """
 48
 49    def __init__(
 50        self,
 51        obj,
 52        # Defining depth
 53        n_layers=2,
 54        verbose=0,
 55        # CustomRegressor attributes
 56        n_hidden_features=5,
 57        activation_name="relu",
 58        a=0.01,
 59        nodes_sim="sobol",
 60        bias=True,
 61        dropout=0,
 62        direct_link=True,
 63        n_clusters=2,
 64        cluster_encode=True,
 65        type_clust="kmeans",
 66        type_scaling=("std", "std", "std"),
 67        col_sample=1,
 68        row_sample=1,
 69        level=None,
 70        pi_method="splitconformal",
 71        seed=123,
 72        backend="cpu",
 73    ):
 74        super().__init__(
 75            obj=obj,
 76            n_hidden_features=n_hidden_features,
 77            activation_name=activation_name,
 78            a=a,
 79            nodes_sim=nodes_sim,
 80            bias=bias,
 81            dropout=dropout,
 82            direct_link=direct_link,
 83            n_clusters=n_clusters,
 84            cluster_encode=cluster_encode,
 85            type_clust=type_clust,
 86            type_scaling=type_scaling,
 87            col_sample=col_sample,
 88            row_sample=row_sample,
 89            level=level,
 90            pi_method=pi_method,
 91            seed=seed,
 92            backend=backend,
 93        )
 94
 95        assert n_layers >= 1, "must have n_layers >= 1"
 96
 97        self.stacked_obj = deepcopy(obj)
 98        self.verbose = verbose
 99        self.n_layers = n_layers
100        self.level = level
101        self.pi_method = pi_method
102        self.coef_ = None
103
104    def fit(self, X, y, **kwargs):
105        """Fit Regression algorithms to X and y.
106        Parameters
107        ----------
108        X : array-like,
109            Training vectors, where rows is the number of samples
110            and columns is the number of features.
111        y : array-like,
112            Training vectors, where rows is the number of samples
113            and columns is the number of features.
114        **kwargs: dict
115            Additional parameters to be passed to the fit method
116            of the base learner. For example, `sample_weight`.
117        Returns
118        -------
119        A fitted object
120        """
121
122        if isinstance(X, np.ndarray):
123            X = pd.DataFrame(X)
124
125        # init layer
126        self.stacked_obj = CustomRegressor(
127            obj=self.stacked_obj,
128            n_hidden_features=self.n_hidden_features,
129            activation_name=self.activation_name,
130            a=self.a,
131            nodes_sim=self.nodes_sim,
132            bias=self.bias,
133            dropout=self.dropout,
134            direct_link=self.direct_link,
135            n_clusters=self.n_clusters,
136            cluster_encode=self.cluster_encode,
137            type_clust=self.type_clust,
138            type_scaling=self.type_scaling,
139            col_sample=self.col_sample,
140            row_sample=self.row_sample,
141            seed=self.seed,
142            backend=self.backend,
143        )
144
145        if self.verbose > 0:
146            iterator = tqdm(range(self.n_layers - 1))
147        else:
148            iterator = range(self.n_layers - 1)
149
150        for _ in iterator:
151            self.stacked_obj = deepcopy(
152                CustomRegressor(
153                    obj=self.stacked_obj,
154                    n_hidden_features=self.n_hidden_features,
155                    activation_name=self.activation_name,
156                    a=self.a,
157                    nodes_sim=self.nodes_sim,
158                    bias=self.bias,
159                    dropout=self.dropout,
160                    direct_link=self.direct_link,
161                    n_clusters=self.n_clusters,
162                    cluster_encode=self.cluster_encode,
163                    type_clust=self.type_clust,
164                    type_scaling=self.type_scaling,
165                    col_sample=self.col_sample,
166                    row_sample=self.row_sample,
167                    seed=self.seed,
168                    backend=self.backend,
169                )
170            )
171
172        self.stacked_obj.fit(X, y, **kwargs)
173
174        if self.level is not None:
175            self.stacked_obj = PredictionInterval(
176                obj=self.stacked_obj, method=self.pi_method, level=self.level
177            )
178
179        if hasattr(self.stacked_obj, "clustering_obj_"):
180            self.clustering_obj_ = self.stacked_obj.clustering_obj_
181
182        if hasattr(self.stacked_obj, "coef_"):
183            self.coef_ = self.stacked_obj.coef_
184
185        if hasattr(self.stacked_obj, "scaler_"):
186            self.scaler_ = self.stacked_obj.scaler_
187
188        if hasattr(self.stacked_obj, "nn_scaler_"):
189            self.nn_scaler_ = self.stacked_obj.nn_scaler_
190
191        if hasattr(self.stacked_obj, "clustering_scaler_"):
192            self.clustering_scaler_ = self.stacked_obj.clustering_scaler_
193
194        return self
195
196    def partial_fit(self, X, y, **kwargs):
197        """Fit Regression algorithms to X and y.
198        Parameters
199        ----------
200        X : array-like,
201            Training vectors, where rows is the number of samples
202            and columns is the number of features.
203        y : array-like,
204            Training vectors, where rows is the number of samples
205            and columns is the number of features.
206        **kwargs: dict
207            Additional parameters to be passed to the fit method
208            of the base learner. For example, `sample_weight`.
209        Returns
210        -------
211        A fitted object
212        """
213        assert hasattr(self, "stacked_obj"), "model must be fitted first"
214        current_obj = self.stacked_obj
215        for _ in range(self.n_layers):
216            try:
217                input_X = current_obj.obj.cook_test_set(X)
218                current_obj.obj.partial_fit(input_X, y, **kwargs)
219                try:
220                    current_obj = current_obj.obj
221                except AttributeError:
222                    pass
223            except ValueError as e:
224                print(e)
225                pass
226        return self
227
228    def predict(self, X, **kwargs):
229        if self.level is not None:
230            return self.stacked_obj.predict(X, return_pi=True)
231        return self.stacked_obj.predict(X, **kwargs)
232
233    def score(self, X, y, scoring=None):
234        return self.stacked_obj.score(X, y, scoring)

Deep Regressor

Parameters:

obj: an object
    A base learner, see also https://www.researchgate.net/publication/380701207_Deep_Quasi-Randomized_neural_Networks_for_classification

verbose : int, optional (default=0)
    Monitor progress when fitting.

n_layers: int (default=2)
    Number of layers. `n_layers = 1` is a simple `CustomRegressor`

All the other parameters are nnetsauce `CustomRegressor`'s

Examples:

import nnetsauce as ns
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.linear_model import RidgeCV
data = load_diabetes()
X = data.data
y= data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=123)
obj = RidgeCV()
clf = ns.DeepRegressor(obj)
clf.fit(X_train, y_train)
print(clf.score(clf.predict(X_test), y_test))
def fit(self, X, y, **kwargs):
104    def fit(self, X, y, **kwargs):
105        """Fit Regression algorithms to X and y.
106        Parameters
107        ----------
108        X : array-like,
109            Training vectors, where rows is the number of samples
110            and columns is the number of features.
111        y : array-like,
112            Training vectors, where rows is the number of samples
113            and columns is the number of features.
114        **kwargs: dict
115            Additional parameters to be passed to the fit method
116            of the base learner. For example, `sample_weight`.
117        Returns
118        -------
119        A fitted object
120        """
121
122        if isinstance(X, np.ndarray):
123            X = pd.DataFrame(X)
124
125        # init layer
126        self.stacked_obj = CustomRegressor(
127            obj=self.stacked_obj,
128            n_hidden_features=self.n_hidden_features,
129            activation_name=self.activation_name,
130            a=self.a,
131            nodes_sim=self.nodes_sim,
132            bias=self.bias,
133            dropout=self.dropout,
134            direct_link=self.direct_link,
135            n_clusters=self.n_clusters,
136            cluster_encode=self.cluster_encode,
137            type_clust=self.type_clust,
138            type_scaling=self.type_scaling,
139            col_sample=self.col_sample,
140            row_sample=self.row_sample,
141            seed=self.seed,
142            backend=self.backend,
143        )
144
145        if self.verbose > 0:
146            iterator = tqdm(range(self.n_layers - 1))
147        else:
148            iterator = range(self.n_layers - 1)
149
150        for _ in iterator:
151            self.stacked_obj = deepcopy(
152                CustomRegressor(
153                    obj=self.stacked_obj,
154                    n_hidden_features=self.n_hidden_features,
155                    activation_name=self.activation_name,
156                    a=self.a,
157                    nodes_sim=self.nodes_sim,
158                    bias=self.bias,
159                    dropout=self.dropout,
160                    direct_link=self.direct_link,
161                    n_clusters=self.n_clusters,
162                    cluster_encode=self.cluster_encode,
163                    type_clust=self.type_clust,
164                    type_scaling=self.type_scaling,
165                    col_sample=self.col_sample,
166                    row_sample=self.row_sample,
167                    seed=self.seed,
168                    backend=self.backend,
169                )
170            )
171
172        self.stacked_obj.fit(X, y, **kwargs)
173
174        if self.level is not None:
175            self.stacked_obj = PredictionInterval(
176                obj=self.stacked_obj, method=self.pi_method, level=self.level
177            )
178
179        if hasattr(self.stacked_obj, "clustering_obj_"):
180            self.clustering_obj_ = self.stacked_obj.clustering_obj_
181
182        if hasattr(self.stacked_obj, "coef_"):
183            self.coef_ = self.stacked_obj.coef_
184
185        if hasattr(self.stacked_obj, "scaler_"):
186            self.scaler_ = self.stacked_obj.scaler_
187
188        if hasattr(self.stacked_obj, "nn_scaler_"):
189            self.nn_scaler_ = self.stacked_obj.nn_scaler_
190
191        if hasattr(self.stacked_obj, "clustering_scaler_"):
192            self.clustering_scaler_ = self.stacked_obj.clustering_scaler_
193
194        return self

Fit Regression algorithms to X and y.

Parameters

X : array-like, Training vectors, where rows is the number of samples and columns is the number of features. y : array-like, Training vectors, where rows is the number of samples and columns is the number of features. **kwargs: dict Additional parameters to be passed to the fit method of the base learner. For example, sample_weight.

Returns

A fitted object

def predict(self, X, **kwargs):
228    def predict(self, X, **kwargs):
229        if self.level is not None:
230            return self.stacked_obj.predict(X, return_pi=True)
231        return self.stacked_obj.predict(X, **kwargs)

Predict test data X.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

level: int
    Level of confidence (default = 95)

method: str
    'splitconformal', 'localconformal'
    prediction (if you specify `return_pi = True`)

**kwargs: additional parameters
        `return_pi = True` for conformal prediction,
        with `method` in ('splitconformal', 'localconformal')
        or `return_std = True` for `self.obj` in
        (`sklearn.linear_model.BayesianRidge`,
        `sklearn.linear_model.ARDRegressor`,
        `sklearn.gaussian_process.GaussianProcessRegressor`)`

Returns:

model predictions:
    an array if uncertainty quantification is not requested,
      or a tuple if with prediction intervals and simulations
      if `return_std = True` (mean, standard deviation,
      lower and upper prediction interval) or `return_pi = True`
      ()
def score(self, X, y, scoring=None):
233    def score(self, X, y, scoring=None):
234        return self.stacked_obj.score(X, y, scoring)

Compute the score of the model.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

scoring: str
    scoring method

Returns:

score: float
class DeepMTS(nnetsauce.MTS):
 11class DeepMTS(MTS):
 12    """Univariate and multivariate time series (DeepMTS) forecasting with Quasi-Randomized networks (Work in progress)
 13
 14    Parameters:
 15
 16        obj: object.
 17            any object containing a method fit (obj.fit()) and a method predict
 18            (obj.predict()).
 19
 20        n_layers: int.
 21            number of layers in the neural network.
 22
 23        n_hidden_features: int.
 24            number of nodes in the hidden layer.
 25
 26        activation_name: str.
 27            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'.
 28
 29        a: float.
 30            hyperparameter for 'prelu' or 'elu' activation function.
 31
 32        nodes_sim: str.
 33            type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
 34            'uniform'.
 35
 36        bias: boolean.
 37            indicates if the hidden layer contains a bias term (True) or not
 38            (False).
 39
 40        dropout: float.
 41            regularization parameter; (random) percentage of nodes dropped out
 42            of the training.
 43
 44        direct_link: boolean.
 45            indicates if the original predictors are included (True) in model's fitting or not (False).
 46
 47        n_clusters: int.
 48            number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering).
 49
 50        cluster_encode: bool.
 51            defines how the variable containing clusters is treated (default is one-hot)
 52            if `False`, then labels are used, without one-hot encoding.
 53
 54        type_clust: str.
 55            type of clustering method: currently k-means ('kmeans') or Gaussian
 56            Mixture Model ('gmm').
 57
 58        type_scaling: a tuple of 3 strings.
 59            scaling methods for inputs, hidden layer, and clustering respectively
 60            (and when relevant).
 61            Currently available: standardization ('std') or MinMax scaling ('minmax').
 62
 63        lags: int.
 64            number of lags used for each time series.
 65
 66        type_pi: str.
 67            type of prediction interval; currently:
 68            - "gaussian": simple, fast, but: assumes stationarity of Gaussian in-sample residuals and independence in the multivariate case
 69            - "kde": based on Kernel Density Estimation of in-sample residuals
 70            - "bootstrap": based on independent bootstrap of in-sample residuals
 71            - "block-bootstrap": based on basic block bootstrap of in-sample residuals
 72            - "scp-kde": Sequential split conformal prediction with Kernel Density Estimation of calibrated residuals
 73            - "scp-bootstrap": Sequential split conformal prediction with independent bootstrap of calibrated residuals
 74            - "scp-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of calibrated residuals
 75            - "scp2-kde": Sequential split conformal prediction with Kernel Density Estimation of standardized calibrated residuals
 76            - "scp2-bootstrap": Sequential split conformal prediction with independent bootstrap of standardized calibrated residuals
 77            - "scp2-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of standardized calibrated residuals
 78
 79        block_size: int.
 80            size of block for 'type_pi' in ("block-bootstrap", "scp-block-bootstrap", "scp2-block-bootstrap").
 81            Default is round(3.15*(n_residuals^1/3))
 82
 83        replications: int.
 84            number of replications (if needed, for predictive simulation). Default is 'None'.
 85
 86        kernel: str.
 87            the kernel to use for residuals density estimation (used for predictive simulation). Currently, either 'gaussian' or 'tophat'.
 88
 89        agg: str.
 90            either "mean" or "median" for simulation of bootstrap aggregating
 91
 92        seed: int.
 93            reproducibility seed for nodes_sim=='uniform' or predictive simulation.
 94
 95        backend: str.
 96            "cpu" or "gpu" or "tpu".
 97
 98        verbose: int.
 99            0: not printing; 1: printing
100
101        show_progress: bool.
102            True: progress bar when fitting each series; False: no progress bar when fitting each series
103
104    Attributes:
105
106        fit_objs_: dict
107            objects adjusted to each individual time series
108
109        y_: {array-like}
110            DeepMTS responses (most recent observations first)
111
112        X_: {array-like}
113            DeepMTS lags
114
115        xreg_: {array-like}
116            external regressors
117
118        y_means_: dict
119            a dictionary of each series mean values
120
121        preds_: {array-like}
122            successive model predictions
123
124        preds_std_: {array-like}
125            standard deviation around the predictions
126
127        return_std_: boolean
128            return uncertainty or not (set in predict)
129
130        df_: data frame
131            the input data frame, in case a data.frame is provided to `fit`
132
133    Examples:
134
135    Example 1:
136
137        ```python
138        import nnetsauce as ns
139        import numpy as np
140        from sklearn import linear_model
141        np.random.seed(123)
142
143        M = np.random.rand(10, 3)
144        M[:,0] = 10*M[:,0]
145        M[:,2] = 25*M[:,2]
146        print(M)
147
148        # Adjust Bayesian Ridge
149        regr4 = linear_model.BayesianRidge()
150        obj_DeepMTS = ns.DeepMTS(regr4, lags = 1, n_hidden_features=5)
151        obj_DeepMTS.fit(M)
152        print(obj_DeepMTS.predict())
153
154        # with credible intervals
155        print(obj_DeepMTS.predict(return_std=True, level=80))
156
157        print(obj_DeepMTS.predict(return_std=True, level=95))
158        ```
159
160    Example 2:
161
162        ```python
163        import nnetsauce as ns
164        import numpy as np
165        from sklearn import linear_model
166
167        dataset = {
168        'date' : ['2001-01-01', '2002-01-01', '2003-01-01', '2004-01-01', '2005-01-01'],
169        'series1' : [34, 30, 35.6, 33.3, 38.1],
170        'series2' : [4, 5.5, 5.6, 6.3, 5.1],
171        'series3' : [100, 100.5, 100.6, 100.2, 100.1]}
172        df = pd.DataFrame(dataset).set_index('date')
173        print(df)
174
175        # Adjust Bayesian Ridge
176        regr5 = linear_model.BayesianRidge()
177        obj_DeepMTS = ns.DeepMTS(regr5, lags = 1, n_hidden_features=5)
178        obj_DeepMTS.fit(df)
179        print(obj_DeepMTS.predict())
180
181        # with credible intervals
182        print(obj_DeepMTS.predict(return_std=True, level=80))
183
184        print(obj_DeepMTS.predict(return_std=True, level=95))
185        ```
186
187    """
188
189    # construct the object -----
190
191    def __init__(
192        self,
193        obj,
194        n_layers=3,
195        n_hidden_features=5,
196        activation_name="relu",
197        a=0.01,
198        nodes_sim="sobol",
199        bias=True,
200        dropout=0,
201        direct_link=True,
202        n_clusters=2,
203        cluster_encode=True,
204        type_clust="kmeans",
205        type_scaling=("std", "std", "std"),
206        lags=1,
207        type_pi="kde",
208        block_size=None,
209        replications=None,
210        kernel=None,
211        agg="mean",
212        seed=123,
213        backend="cpu",
214        verbose=0,
215        show_progress=True,
216    ):
217        assert int(lags) == lags, "parameter 'lags' should be an integer"
218        assert n_layers >= 1, "must have n_layers >= 1"
219        self.n_layers = int(n_layers)
220
221        if self.n_layers > 1:
222            for _ in range(self.n_layers - 1):
223                obj = CustomRegressor(
224                    obj=deepcopy(obj),
225                    n_hidden_features=n_hidden_features,
226                    activation_name=activation_name,
227                    a=a,
228                    nodes_sim=nodes_sim,
229                    bias=bias,
230                    dropout=dropout,
231                    direct_link=direct_link,
232                    n_clusters=n_clusters,
233                    cluster_encode=cluster_encode,
234                    type_clust=type_clust,
235                    type_scaling=type_scaling,
236                    seed=seed,
237                    backend=backend,
238                )
239
240        self.obj = deepcopy(obj)
241        super().__init__(
242            obj=self.obj,
243            n_hidden_features=n_hidden_features,
244            activation_name=activation_name,
245            a=a,
246            nodes_sim=nodes_sim,
247            bias=bias,
248            dropout=dropout,
249            direct_link=direct_link,
250            n_clusters=n_clusters,
251            cluster_encode=cluster_encode,
252            type_clust=type_clust,
253            type_scaling=type_scaling,
254            lags=lags,
255            type_pi=type_pi,
256            block_size=block_size,
257            replications=replications,
258            kernel=kernel,
259            agg=agg,
260            seed=seed,
261            backend=backend,
262            verbose=verbose,
263            show_progress=show_progress,
264        )

Univariate and multivariate time series (DeepMTS) forecasting with Quasi-Randomized networks (Work in progress)

Parameters:

obj: object.
    any object containing a method fit (obj.fit()) and a method predict
    (obj.predict()).

n_layers: int.
    number of layers in the neural network.

n_hidden_features: int.
    number of nodes in the hidden layer.

activation_name: str.
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'.

a: float.
    hyperparameter for 'prelu' or 'elu' activation function.

nodes_sim: str.
    type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
    'uniform'.

bias: boolean.
    indicates if the hidden layer contains a bias term (True) or not
    (False).

dropout: float.
    regularization parameter; (random) percentage of nodes dropped out
    of the training.

direct_link: boolean.
    indicates if the original predictors are included (True) in model's fitting or not (False).

n_clusters: int.
    number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering).

cluster_encode: bool.
    defines how the variable containing clusters is treated (default is one-hot)
    if `False`, then labels are used, without one-hot encoding.

type_clust: str.
    type of clustering method: currently k-means ('kmeans') or Gaussian
    Mixture Model ('gmm').

type_scaling: a tuple of 3 strings.
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax').

lags: int.
    number of lags used for each time series.

type_pi: str.
    type of prediction interval; currently:
    - "gaussian": simple, fast, but: assumes stationarity of Gaussian in-sample residuals and independence in the multivariate case
    - "kde": based on Kernel Density Estimation of in-sample residuals
    - "bootstrap": based on independent bootstrap of in-sample residuals
    - "block-bootstrap": based on basic block bootstrap of in-sample residuals
    - "scp-kde": Sequential split conformal prediction with Kernel Density Estimation of calibrated residuals
    - "scp-bootstrap": Sequential split conformal prediction with independent bootstrap of calibrated residuals
    - "scp-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of calibrated residuals
    - "scp2-kde": Sequential split conformal prediction with Kernel Density Estimation of standardized calibrated residuals
    - "scp2-bootstrap": Sequential split conformal prediction with independent bootstrap of standardized calibrated residuals
    - "scp2-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of standardized calibrated residuals

block_size: int.
    size of block for 'type_pi' in ("block-bootstrap", "scp-block-bootstrap", "scp2-block-bootstrap").
    Default is round(3.15*(n_residuals^1/3))

replications: int.
    number of replications (if needed, for predictive simulation). Default is 'None'.

kernel: str.
    the kernel to use for residuals density estimation (used for predictive simulation). Currently, either 'gaussian' or 'tophat'.

agg: str.
    either "mean" or "median" for simulation of bootstrap aggregating

seed: int.
    reproducibility seed for nodes_sim=='uniform' or predictive simulation.

backend: str.
    "cpu" or "gpu" or "tpu".

verbose: int.
    0: not printing; 1: printing

show_progress: bool.
    True: progress bar when fitting each series; False: no progress bar when fitting each series

Attributes:

fit_objs_: dict
    objects adjusted to each individual time series

y_: {array-like}
    DeepMTS responses (most recent observations first)

X_: {array-like}
    DeepMTS lags

xreg_: {array-like}
    external regressors

y_means_: dict
    a dictionary of each series mean values

preds_: {array-like}
    successive model predictions

preds_std_: {array-like}
    standard deviation around the predictions

return_std_: boolean
    return uncertainty or not (set in predict)

df_: data frame
    the input data frame, in case a data.frame is provided to `fit`

Examples:

Example 1:

import nnetsauce as ns
import numpy as np
from sklearn import linear_model
np.random.seed(123)

M = np.random.rand(10, 3)
M[:,0] = 10*M[:,0]
M[:,2] = 25*M[:,2]
print(M)

# Adjust Bayesian Ridge
regr4 = linear_model.BayesianRidge()
obj_DeepMTS = ns.DeepMTS(regr4, lags = 1, n_hidden_features=5)
obj_DeepMTS.fit(M)
print(obj_DeepMTS.predict())

# with credible intervals
print(obj_DeepMTS.predict(return_std=True, level=80))

print(obj_DeepMTS.predict(return_std=True, level=95))

Example 2:

import nnetsauce as ns
import numpy as np
from sklearn import linear_model

dataset = {
'date' : ['2001-01-01', '2002-01-01', '2003-01-01', '2004-01-01', '2005-01-01'],
'series1' : [34, 30, 35.6, 33.3, 38.1],
'series2' : [4, 5.5, 5.6, 6.3, 5.1],
'series3' : [100, 100.5, 100.6, 100.2, 100.1]}
df = pd.DataFrame(dataset).set_index('date')
print(df)

# Adjust Bayesian Ridge
regr5 = linear_model.BayesianRidge()
obj_DeepMTS = ns.DeepMTS(regr5, lags = 1, n_hidden_features=5)
obj_DeepMTS.fit(df)
print(obj_DeepMTS.predict())

# with credible intervals
print(obj_DeepMTS.predict(return_std=True, level=80))

print(obj_DeepMTS.predict(return_std=True, level=95))
class DiscreteTokenMTS(nnetsauce.MTS):
 12class DiscreteTokenMTS(MTS):
 13    """
 14    MTS for discrete token forecasting via nearest-neighbor in embedding space.
 15
 16    Maps continuous predictions to discrete tokens using nearest-neighbor lookup
 17    in a vocabulary (embedding space). Supports probabilistic decoding with
 18    temperature-controlled softmax and uncertainty quantification in token space.
 19
 20    Parameters
 21    ----------
 22    obj : object
 23        Base learner with fit() and predict() methods
 24
 25    vocab : np.ndarray of shape (vocab_size, n_series)
 26        Token vocabulary - each row is a token embedding vector
 27
 28    metric : {'euclidean', 'cosine'}, default='euclidean'
 29        Distance metric for nearest-neighbor lookup
 30
 31    return_mode : {'token_id', 'token_vector', 'both', 'probs'}, default='token_id'
 32        Output format:
 33        - 'token_id': integer token indices
 34        - 'token_vector': token embedding vectors
 35        - 'both': single DataFrame with token_id + dimensions
 36        - 'probs': probability distribution over all tokens
 37
 38    softmax_temperature : float, default=1.0
 39        Temperature for softmax when return_mode='probs'
 40        Lower values (0.1-0.5) → sharper distributions (more deterministic)
 41        Higher values (2.0-10.0) → smoother distributions (more exploratory)
 42
 43    normalize_vocab : bool, default=False
 44        Whether to center and scale vocabulary to zero mean, unit variance
 45
 46    **mts_kwargs : dict
 47        Additional parameters passed to MTS base class
 48
 49    Attributes
 50    ----------
 51    vocab : np.ndarray
 52        Normalized vocabulary (if normalize_vocab=True)
 53
 54    vocab_mean_ : np.ndarray
 55        Mean used for normalization (if normalize_vocab=True)
 56
 57    vocab_std_ : np.ndarray
 58        Std used for normalization (if normalize_vocab=True)
 59
 60    discretization_errors_ : pd.DataFrame or None
 61        Distances from predictions to nearest tokens
 62
 63    Warnings
 64    --------
 65    - Prediction intervals (lower/upper) are NOT discretized - only the mean
 66    - For uncertainty in token space, use predict_token_distribution()
 67    - Vocabulary quality strongly affects results - use diagnose_vocabulary()
 68
 69    Examples
 70    --------
 71    >>> # Basic token prediction
 72    >>> vocab = np.random.randn(100, 10)  # 100 tokens, 10 dimensions
 73    >>> model = DiscreteTokenMTS(
 74    ...     obj=Ridge(),
 75    ...     vocab=vocab,
 76    ...     lags=5,
 77    ...     return_mode='token_id'
 78    ... )
 79    >>> model.fit(X_train)
 80    >>> tokens = model.predict(h=10)
 81
 82    >>> # Probabilistic with temperature control
 83    >>> model = DiscreteTokenMTS(
 84    ...     obj=Ridge(),
 85    ...     vocab=vocab,
 86    ...     lags=5,
 87    ...     return_mode='probs',
 88    ...     softmax_temperature=1.5
 89    ... )
 90    >>> probs = model.predict(h=10)  # Returns probability distributions
 91
 92    >>> # Uncertainty-aware token distributions
 93    >>> freqs, entropy, mode = model.predict_token_distribution(
 94    ...     h=10,
 95    ...     replications=100
 96    ... )
 97    """
 98
 99    def __init__(
100        self,
101        obj,
102        vocab,
103        metric="euclidean",
104        return_mode="token_id",
105        softmax_temperature=1.0,
106        normalize_vocab=False,
107        **mts_kwargs,
108    ):
109        super().__init__(obj, **mts_kwargs)
110
111        # Convert and validate vocabulary
112        self.vocab_original = np.asarray(vocab, dtype=np.float64)
113        self._validate_vocabulary()
114
115        self.vocab_size = self.vocab_original.shape[0]
116        self.vocab_mean_ = None
117        self.vocab_std_ = None
118        self.normalize_vocab = normalize_vocab
119
120        # Normalize if requested
121        if normalize_vocab:
122            self._normalize_vocabulary()
123        else:
124            self.vocab = self.vocab_original.copy()
125
126        # Validate and set metric
127        assert metric in [
128            "euclidean",
129            "cosine",
130        ], "metric must be 'euclidean' or 'cosine'"
131        self.metric = metric
132        self.distance_func = (
133            euclidean_distances if metric == "euclidean" else cosine_distances
134        )
135
136        # Validate and set return mode
137        assert return_mode in [
138            "token_id",
139            "token_vector",
140            "both",
141            "probs",
142        ], "return_mode must be 'token_id', 'token_vector', 'both', or 'probs'"
143        self.return_mode = return_mode
144
145        # Validate temperature
146        assert softmax_temperature > 0, "softmax_temperature must be positive"
147        self.softmax_temperature = softmax_temperature
148
149        # Initialize error tracking
150        self.discretization_errors_ = None
151
152    def _validate_vocabulary(self):
153        """Comprehensive vocabulary validation"""
154        # Check shape
155        assert (
156            self.vocab_original.ndim == 2
157        ), "vocab must be 2D array (vocab_size, n_series)"
158        assert (
159            self.vocab_original.shape[0] > 0
160        ), "vocab must have at least one token"
161
162        # Check for NaN/Inf
163        if np.any(np.isnan(self.vocab_original)) or np.any(
164            np.isinf(self.vocab_original)
165        ):
166            raise ValueError("Vocabulary contains NaN or Inf values")
167
168        # Check for duplicates
169        unique_rows = np.unique(self.vocab_original, axis=0)
170        if len(unique_rows) < len(self.vocab_original):
171            n_duplicates = len(self.vocab_original) - len(unique_rows)
172            warnings.warn(
173                f"Vocabulary contains {n_duplicates} duplicate vectors. "
174                "This reduces effective vocabulary size.",
175                UserWarning,
176            )
177
178        # Check for near-duplicates
179        if len(self.vocab_original) > 1:
180            dists = euclidean_distances(self.vocab_original)
181            np.fill_diagonal(dists, np.inf)
182            min_dist = dists.min()
183
184            if min_dist < 1e-6:
185                warnings.warn(
186                    f"Vocabulary contains very close vectors (min distance: {min_dist:.2e}). "
187                    "Consider increasing token diversity.",
188                    UserWarning,
189                )
190
191    def _normalize_vocabulary(self):
192        """Center and scale vocabulary"""
193        self.vocab_mean_ = self.vocab_original.mean(axis=0)
194        self.vocab_std_ = self.vocab_original.std(axis=0) + 1e-8
195        self.vocab = (self.vocab_original - self.vocab_mean_) / self.vocab_std_
196
197    def fit(self, X, **kwargs):
198        """
199        Fit model and validate vocabulary dimensions match data.
200
201        Parameters
202        ----------
203        X : array-like of shape (n_samples, n_series)
204            Training data
205
206        **kwargs : dict
207            Additional parameters passed to parent fit
208
209        Returns
210        -------
211        self : object
212            Fitted estimator
213        """
214        # Call parent fit
215        super().fit(X, **kwargs)
216
217        # Validate vocabulary dimensions
218        n_series = X.shape[1] if X.ndim > 1 else 1
219        if self.vocab.shape[1] != n_series:
220            raise ValueError(
221                f"Vocabulary dimension ({self.vocab.shape[1]}) must match "
222                f"number of series ({n_series})"
223            )
224
225        # Additional check for cosine distance
226        if self.metric == "cosine":
227            norms = np.linalg.norm(self.vocab, axis=1)
228            zero_vectors = norms < 1e-10
229            if np.any(zero_vectors):
230                raise ValueError(
231                    f"Vocabulary contains {zero_vectors.sum()} zero/near-zero vectors. "
232                    "Cosine distance requires non-zero vectors."
233                )
234
235        return self
236
237    def _vectorized_map_to_tokens(self, continuous_preds):
238        """
239        Vectorized token mapping for efficiency.
240
241        Parameters
242        ----------
243        continuous_preds : np.ndarray of shape (h, n_series)
244            Continuous predictions
245
246        Returns
247        -------
248        result : depends on return_mode
249        errors : np.ndarray
250            Distances to nearest tokens
251        """
252        # Normalize predictions if vocabulary was normalized
253        if self.normalize_vocab:
254            continuous_preds = (
255                continuous_preds - self.vocab_mean_
256            ) / self.vocab_std_
257
258        # Compute all distances at once
259        dists = self.distance_func(continuous_preds, self.vocab)
260
261        # Find nearest tokens
262        nearest_indices = np.argmin(dists, axis=1)
263        min_dists = dists[np.arange(len(dists)), nearest_indices]
264
265        if self.return_mode == "token_id":
266            return nearest_indices, min_dists
267
268        elif self.return_mode == "token_vector":
269            token_vecs = self.vocab[nearest_indices]
270            # Denormalize if vocabulary was normalized
271            if self.normalize_vocab:
272                token_vecs = token_vecs * self.vocab_std_ + self.vocab_mean_
273            return token_vecs, min_dists
274
275        elif self.return_mode == "both":
276            # Return combined array: [token_id, dim_0, dim_1, ...]
277            token_ids = nearest_indices.reshape(-1, 1)
278            token_vecs = self.vocab[nearest_indices]
279            # Denormalize if vocabulary was normalized
280            if self.normalize_vocab:
281                token_vecs = token_vecs * self.vocab_std_ + self.vocab_mean_
282            combined = np.column_stack([token_ids, token_vecs])
283            return combined, min_dists
284
285        elif self.return_mode == "probs":
286            # Softmax of negative distances
287            probs = softmax(-dists / self.softmax_temperature, axis=1)
288            return probs, min_dists
289
290    def predict(
291        self,
292        h=5,
293        level=95,
294        quantiles=None,
295        return_discretization_error=False,
296        **kwargs,
297    ):
298        """
299        Generate discrete token predictions.
300
301        Parameters
302        ----------
303        h : int, default=5
304            Forecast horizon
305
306        level : int, default=95
307            Confidence level (only affects continuous forecasts)
308
309        quantiles : list of float, optional
310            Quantile levels
311
312        return_discretization_error : bool, default=False
313            If True, return (predictions, errors) tuple
314
315        **kwargs : dict
316            Additional parameters for parent predict
317
318        Returns
319        -------
320        predictions : pd.DataFrame
321            Discrete predictions. Format depends on return_mode:
322            - 'token_id': single column 'token_id'
323            - 'token_vector': columns 'dim_0', 'dim_1', ...
324            - 'both': columns 'token_id', 'dim_0', 'dim_1', ...
325            - 'probs': columns 'token_0_prob', 'token_1_prob', ...
326
327        errors : pd.DataFrame (if return_discretization_error=True)
328            Discretization errors (distances to nearest tokens)
329
330        Warnings
331        --------
332        When prediction intervals are requested but only mean is discretized,
333        a warning is issued. Use predict_token_distribution() for uncertainty
334        in token space.
335        """
336        # Get continuous predictions from parent
337        continuous_result = super().predict(
338            h=h, level=level, quantiles=quantiles, **kwargs
339        )
340
341        # FIXED: Robust type detection using duck typing
342        if hasattr(continuous_result, "_fields"):  # Namedtuple
343            if (
344                hasattr(continuous_result, "sims")
345                and continuous_result.sims is not None
346            ):
347                # Simulation-based forecast
348                return self._discretize_simulations(
349                    continuous_result.sims, return_discretization_error
350                )
351            elif hasattr(continuous_result, "mean"):
352                # Interval-based forecast - warn about information loss
353                warnings.warn(
354                    "Prediction intervals cannot be meaningfully discretized. "
355                    "Only mean predictions are converted to tokens. "
356                    "Use predict_token_distribution(replications=N) for "
357                    "uncertainty in token space.",
358                    UserWarning,
359                )
360                return self._discretize_dataframe(
361                    continuous_result.mean, return_discretization_error
362                )
363        elif isinstance(continuous_result, pd.DataFrame):
364            # Deterministic forecast
365            return self._discretize_dataframe(
366                continuous_result, return_discretization_error
367            )
368        else:
369            raise NotImplementedError(
370                f"Unhandled predict output type: {type(continuous_result)}"
371            )
372
373    def _discretize_dataframe(self, df, return_error=False):
374        """Discretize a continuous prediction DataFrame"""
375        # Use vectorized mapping
376        result, errors = self._vectorized_map_to_tokens(df.values)
377
378        # FIXED: Always return single DataFrame (even for 'both' mode)
379        if self.return_mode == "probs":
380            result_df = pd.DataFrame(
381                result,
382                index=df.index,
383                columns=[f"token_{i}_prob" for i in range(self.vocab_size)],
384            )
385        elif self.return_mode == "both":
386            # Combined format: token_id + dimensions
387            columns = ["token_id"] + [
388                f"dim_{i}" for i in range(self.vocab.shape[1])
389            ]
390            result_df = pd.DataFrame(result, index=df.index, columns=columns)
391            result_df["token_id"] = result_df["token_id"].astype(int)
392        elif self.return_mode == "token_id":
393            result_df = pd.DataFrame(
394                result.reshape(-1, 1), index=df.index, columns=["token_id"]
395            )
396        else:  # 'token_vector'
397            result_df = pd.DataFrame(
398                result,
399                index=df.index,
400                columns=[f"dim_{i}" for i in range(self.vocab.shape[1])],
401            )
402
403        if return_error:
404            error_df = pd.DataFrame(
405                errors.reshape(-1, 1),
406                index=df.index,
407                columns=["discretization_error"],
408            )
409            self.discretization_errors_ = error_df
410            return result_df, error_df
411
412        return result_df
413
414    def _discretize_simulations(self, sims, return_error=False):
415        """Discretize simulation paths"""
416        discrete_sims = []
417        all_errors = []
418
419        for sim_df in sims:
420            result, errors = self._vectorized_map_to_tokens(sim_df.values)
421
422            if self.return_mode == "probs":
423                discrete_df = pd.DataFrame(
424                    result,
425                    index=sim_df.index,
426                    columns=[f"token_{i}_prob" for i in range(self.vocab_size)],
427                )
428            elif self.return_mode == "both":
429                columns = ["token_id"] + [
430                    f"dim_{i}" for i in range(self.vocab.shape[1])
431                ]
432                discrete_df = pd.DataFrame(
433                    result, index=sim_df.index, columns=columns
434                )
435                discrete_df["token_id"] = discrete_df["token_id"].astype(int)
436            elif self.return_mode == "token_id":
437                discrete_df = pd.DataFrame(
438                    result.reshape(-1, 1),
439                    index=sim_df.index,
440                    columns=["token_id"],
441                )
442            else:  # 'token_vector'
443                discrete_df = pd.DataFrame(
444                    result,
445                    index=sim_df.index,
446                    columns=[f"dim_{i}" for i in range(self.vocab.shape[1])],
447                )
448
449            discrete_sims.append(discrete_df)
450
451            if return_error:
452                error_df = pd.DataFrame(
453                    errors.reshape(-1, 1),
454                    index=sim_df.index,
455                    columns=["discretization_error"],
456                )
457                all_errors.append(error_df)
458
459        if return_error:
460            return tuple(discrete_sims), tuple(all_errors)
461        return tuple(discrete_sims)
462
463    # ========== NEW: Uncertainty Quantification in Token Space ==========
464
465    def predict_top_k(self, h=5, k=5, **kwargs):
466        """
467        Predict top-k most probable tokens per timestep.
468
469        Parameters
470        ----------
471        h : int
472            Forecast horizon
473        k : int
474            Number of top tokens to return
475        **kwargs : dict
476            Additional parameters for parent predict
477
478        Returns
479        -------
480        predictions : pd.DataFrame
481            Columns: token_1, prob_1, token_2, prob_2, ..., token_k, prob_k
482        """
483        continuous_result = super().predict(h=h, **kwargs)
484
485        # Handle different return types
486        if hasattr(continuous_result, "mean"):
487            preds = continuous_result.mean.values
488            index = continuous_result.mean.index
489        elif isinstance(continuous_result, pd.DataFrame):
490            preds = continuous_result.values
491            index = continuous_result.index
492        else:
493            raise ValueError("Cannot extract continuous predictions")
494
495        # Compute probabilities
496        dists = self.distance_func(preds, self.vocab)
497        probs = softmax(-dists / self.softmax_temperature, axis=1)
498
499        # Get top-k
500        top_k_indices = np.argsort(probs, axis=1)[:, -k:][:, ::-1]
501        top_k_probs = np.take_along_axis(probs, top_k_indices, axis=1)
502
503        # Format as DataFrame
504        columns = []
505        data = []
506        for i in range(k):
507            columns.extend([f"token_{i+1}", f"prob_{i+1}"])
508            data.append(top_k_indices[:, i])
509            data.append(top_k_probs[:, i])
510
511        return pd.DataFrame(np.column_stack(data), index=index, columns=columns)
512
513    def predict_token_distribution(self, h=5, replications=100, **kwargs):
514        """
515        Generate token probability distribution from simulation ensemble.
516
517        This method provides meaningful uncertainty quantification in token space
518        by discretizing multiple simulation paths and computing token frequencies.
519
520        Parameters
521        ----------
522        h : int
523            Forecast horizon
524        replications : int
525            Number of simulation paths
526        **kwargs : dict
527            Additional parameters for parent predict
528
529        Returns
530        -------
531        frequencies : pd.DataFrame
532            Token frequencies across simulations
533            Columns: token_0_freq, token_1_freq, ..., token_V_freq
534
535        entropy : pd.Series
536            Shannon entropy per timestep (uncertainty measure)
537
538        mode_tokens : pd.DataFrame
539            Most frequent token per timestep
540
541        Examples
542        --------
543        >>> freqs, entropy, mode = model.predict_token_distribution(h=10, replications=100)
544        >>> # High entropy → uncertain prediction
545        >>> uncertain_steps = entropy[entropy > 2.0]
546        >>> # Use mode tokens for point predictions
547        >>> predictions = mode['mode_token'].values
548        """
549        # Force simulation mode
550        kwargs["replications"] = replications
551        continuous_result = super().predict(h=h, **kwargs)
552
553        # Extract simulations
554        if (
555            hasattr(continuous_result, "sims")
556            and continuous_result.sims is not None
557        ):
558            sims = continuous_result.sims
559            index = continuous_result.mean.index
560        else:
561            raise ValueError(
562                "predict_token_distribution requires simulation-based forecasting. "
563                "Ensure replications > 0 and type_pi supports simulations."
564            )
565
566        # Discretize all paths
567        all_tokens = []
568        for sim in sims:
569            tokens, _ = self._vectorized_map_to_tokens(sim.values)
570            if self.return_mode == "probs":
571                # For probs mode, get argmax token
572                tokens = np.argmax(tokens, axis=1)
573            elif self.return_mode == "both":
574                # Extract token_id column
575                tokens = tokens[:, 0].astype(int)
576            elif self.return_mode == "token_vector":
577                # Map back to token IDs
578                dists = self.distance_func(tokens, self.vocab)
579                tokens = np.argmin(dists, axis=1)
580            # else: token_id mode, already correct
581
582            all_tokens.append(tokens)
583
584        all_tokens = np.array(all_tokens)  # (replications, h)
585
586        # Compute frequency distribution
587        h_actual = all_tokens.shape[1]
588        token_freqs = np.zeros((h_actual, self.vocab_size))
589
590        for t in range(h_actual):
591            unique, counts = np.unique(all_tokens[:, t], return_counts=True)
592            token_freqs[t, unique] = counts / replications
593
594        # Compute entropy
595        epsilon = 1e-10
596        entropy = -np.sum(token_freqs * np.log(token_freqs + epsilon), axis=1)
597
598        # Get mode
599        mode_tokens = np.argmax(token_freqs, axis=1)
600
601        # Package results
602        freq_df = pd.DataFrame(
603            token_freqs,
604            index=index,
605            columns=[f"token_{i}_freq" for i in range(self.vocab_size)],
606        )
607
608        entropy_series = pd.Series(entropy, index=index, name="entropy")
609
610        mode_df = pd.DataFrame(mode_tokens, index=index, columns=["mode_token"])
611
612        return freq_df, entropy_series, mode_df
613
614    # ========== Utility Methods ==========
615
616    def tokens_to_vectors(self, token_ids):
617        """Convert token IDs to embedding vectors (in original scale)"""
618        token_ids = np.asarray(token_ids).astype(int)
619        assert np.all(
620            (token_ids >= 0) & (token_ids < self.vocab_size)
621        ), f"Token IDs must be in range [0, {self.vocab_size-1}]"
622        vectors = self.vocab[token_ids]
623        # Denormalize if vocabulary was normalized
624        if self.normalize_vocab:
625            vectors = vectors * self.vocab_std_ + self.vocab_mean_
626        return vectors
627
628    def get_token_neighbors(self, token_id, k=5):
629        """Find k nearest neighbors of a token"""
630        assert (
631            0 <= token_id < self.vocab_size
632        ), f"token_id must be in range [0, {self.vocab_size-1}]"
633
634        token_vec = self.vocab[token_id].reshape(1, -1)
635        dists = self.distance_func(token_vec, self.vocab).flatten()
636
637        sorted_indices = np.argsort(dists)
638        sorted_indices = sorted_indices[sorted_indices != token_id][:k]
639
640        return pd.DataFrame(
641            {"neighbor_id": sorted_indices, "distance": dists[sorted_indices]}
642        )
643
644    def compute_vocab_coverage(self, predictions):
645        """Compute vocabulary usage statistics"""
646        if "token_id" not in predictions.columns:
647            raise ValueError("predictions must have 'token_id' column")
648
649        token_ids = predictions["token_id"].values
650        unique_tokens = np.unique(token_ids)
651        freq = pd.Series(token_ids).value_counts().sort_index()
652
653        return {
654            "unique_tokens": len(unique_tokens),
655            "coverage_pct": 100 * len(unique_tokens) / self.vocab_size,
656            "token_frequencies": freq,
657            "most_common_token": freq.idxmax() if len(freq) > 0 else None,
658            "least_common_token": freq.idxmin() if len(freq) > 0 else None,
659        }
660
661    def diagnose_vocabulary(self):
662        """
663        Comprehensive vocabulary quality diagnostics.
664
665        Returns
666        -------
667        report : dict
668            Quality metrics including distances, condition number, coverage
669        """
670        # Use original vocabulary for diagnostics to get meaningful statistics
671        vocab_to_diagnose = self.vocab_original
672
673        report = {
674            "vocab_size": self.vocab_size,
675            "embedding_dim": vocab_to_diagnose.shape[1],
676            "normalized": self.normalize_vocab,
677        }
678
679        # Pairwise distances
680        dists = euclidean_distances(vocab_to_diagnose)
681        np.fill_diagonal(dists, np.inf)
682
683        report["min_pairwise_distance"] = dists.min()
684        report["max_pairwise_distance"] = dists.max()
685        report["mean_pairwise_distance"] = dists[dists != np.inf].mean()
686
687        # Condition number
688        U, s, Vt = np.linalg.svd(vocab_to_diagnose, full_matrices=False)
689        report["condition_number"] = s.max() / (s.min() + 1e-10)
690
691        # Coverage volume
692        ranges = vocab_to_diagnose.max(axis=0) - vocab_to_diagnose.min(axis=0)
693        report["coverage_volume"] = np.prod(ranges)
694
695        # Duplicates
696        unique_rows = np.unique(vocab_to_diagnose, axis=0)
697        report["duplicate_count"] = len(vocab_to_diagnose) - len(unique_rows)
698
699        return report
700
701    def print_vocabulary_report(self):
702        """Print human-readable vocabulary diagnostics"""
703        report = self.diagnose_vocabulary()
704
705        print("=" * 60)
706        print("VOCABULARY QUALITY REPORT")
707        print("=" * 60)
708        print(f"Vocabulary size: {report['vocab_size']} tokens")
709        print(f"Embedding dimension: {report['embedding_dim']}")
710        print(f"\nPairwise Distances:")
711        print(f"  Min:  {report['min_pairwise_distance']:.6f}")
712        print(f"  Mean: {report['mean_pairwise_distance']:.6f}")
713        print(f"  Max:  {report['max_pairwise_distance']:.6f}")
714        print(f"\nVocabulary Health:")
715        print(f"  Condition number: {report['condition_number']:.2f}")
716        if report["condition_number"] > 1000:
717            print(
718                "  ⚠️  WARNING: High condition number may indicate redundant tokens"
719            )
720        print(f"  Duplicate tokens: {report['duplicate_count']}")
721        if report["duplicate_count"] > 0:
722            print("  ⚠️  WARNING: Duplicates reduce effective vocabulary size")
723        print(f"  Coverage volume: {report['coverage_volume']:.2e}")
724        print("=" * 60)

MTS for discrete token forecasting via nearest-neighbor in embedding space.

Maps continuous predictions to discrete tokens using nearest-neighbor lookup in a vocabulary (embedding space). Supports probabilistic decoding with temperature-controlled softmax and uncertainty quantification in token space.

Parameters

obj : object Base learner with fit() and predict() methods

vocab : np.ndarray of shape (vocab_size, n_series) Token vocabulary - each row is a token embedding vector

metric : {'euclidean', 'cosine'}, default='euclidean' Distance metric for nearest-neighbor lookup

return_mode : {'token_id', 'token_vector', 'both', 'probs'}, default='token_id' Output format: - 'token_id': integer token indices - 'token_vector': token embedding vectors - 'both': single DataFrame with token_id + dimensions - 'probs': probability distribution over all tokens

softmax_temperature : float, default=1.0 Temperature for softmax when return_mode='probs' Lower values (0.1-0.5) → sharper distributions (more deterministic) Higher values (2.0-10.0) → smoother distributions (more exploratory)

normalize_vocab : bool, default=False Whether to center and scale vocabulary to zero mean, unit variance

**mts_kwargs : dict Additional parameters passed to MTS base class

Attributes

vocab : np.ndarray Normalized vocabulary (if normalize_vocab=True)

vocab_mean_ : np.ndarray Mean used for normalization (if normalize_vocab=True)

vocab_std_ : np.ndarray Std used for normalization (if normalize_vocab=True)

discretization_errors_ : pd.DataFrame or None Distances from predictions to nearest tokens

Warnings

  • Prediction intervals (lower/upper) are NOT discretized - only the mean
  • For uncertainty in token space, use predict_token_distribution()
  • Vocabulary quality strongly affects results - use diagnose_vocabulary()

Examples

>>> # Basic token prediction
>>> vocab = np.random.randn(100, 10)  # 100 tokens, 10 dimensions
>>> model = DiscreteTokenMTS(
...     obj=Ridge(),
...     vocab=vocab,
...     lags=5,
...     return_mode='token_id'
... )
>>> model.fit(X_train)
>>> tokens = model.predict(h=10)
>>> # Probabilistic with temperature control
>>> model = DiscreteTokenMTS(
...     obj=Ridge(),
...     vocab=vocab,
...     lags=5,
...     return_mode='probs',
...     softmax_temperature=1.5
... )
>>> probs = model.predict(h=10)  # Returns probability distributions
>>> # Uncertainty-aware token distributions
>>> freqs, entropy, mode = model.predict_token_distribution(
...     h=10,
...     replications=100
... )
def fit(self, X, **kwargs):
197    def fit(self, X, **kwargs):
198        """
199        Fit model and validate vocabulary dimensions match data.
200
201        Parameters
202        ----------
203        X : array-like of shape (n_samples, n_series)
204            Training data
205
206        **kwargs : dict
207            Additional parameters passed to parent fit
208
209        Returns
210        -------
211        self : object
212            Fitted estimator
213        """
214        # Call parent fit
215        super().fit(X, **kwargs)
216
217        # Validate vocabulary dimensions
218        n_series = X.shape[1] if X.ndim > 1 else 1
219        if self.vocab.shape[1] != n_series:
220            raise ValueError(
221                f"Vocabulary dimension ({self.vocab.shape[1]}) must match "
222                f"number of series ({n_series})"
223            )
224
225        # Additional check for cosine distance
226        if self.metric == "cosine":
227            norms = np.linalg.norm(self.vocab, axis=1)
228            zero_vectors = norms < 1e-10
229            if np.any(zero_vectors):
230                raise ValueError(
231                    f"Vocabulary contains {zero_vectors.sum()} zero/near-zero vectors. "
232                    "Cosine distance requires non-zero vectors."
233                )
234
235        return self

Fit model and validate vocabulary dimensions match data.

Parameters

X : array-like of shape (n_samples, n_series) Training data

**kwargs : dict Additional parameters passed to parent fit

Returns

self : object Fitted estimator

def predict( self, h=5, level=95, quantiles=None, return_discretization_error=False, **kwargs):
290    def predict(
291        self,
292        h=5,
293        level=95,
294        quantiles=None,
295        return_discretization_error=False,
296        **kwargs,
297    ):
298        """
299        Generate discrete token predictions.
300
301        Parameters
302        ----------
303        h : int, default=5
304            Forecast horizon
305
306        level : int, default=95
307            Confidence level (only affects continuous forecasts)
308
309        quantiles : list of float, optional
310            Quantile levels
311
312        return_discretization_error : bool, default=False
313            If True, return (predictions, errors) tuple
314
315        **kwargs : dict
316            Additional parameters for parent predict
317
318        Returns
319        -------
320        predictions : pd.DataFrame
321            Discrete predictions. Format depends on return_mode:
322            - 'token_id': single column 'token_id'
323            - 'token_vector': columns 'dim_0', 'dim_1', ...
324            - 'both': columns 'token_id', 'dim_0', 'dim_1', ...
325            - 'probs': columns 'token_0_prob', 'token_1_prob', ...
326
327        errors : pd.DataFrame (if return_discretization_error=True)
328            Discretization errors (distances to nearest tokens)
329
330        Warnings
331        --------
332        When prediction intervals are requested but only mean is discretized,
333        a warning is issued. Use predict_token_distribution() for uncertainty
334        in token space.
335        """
336        # Get continuous predictions from parent
337        continuous_result = super().predict(
338            h=h, level=level, quantiles=quantiles, **kwargs
339        )
340
341        # FIXED: Robust type detection using duck typing
342        if hasattr(continuous_result, "_fields"):  # Namedtuple
343            if (
344                hasattr(continuous_result, "sims")
345                and continuous_result.sims is not None
346            ):
347                # Simulation-based forecast
348                return self._discretize_simulations(
349                    continuous_result.sims, return_discretization_error
350                )
351            elif hasattr(continuous_result, "mean"):
352                # Interval-based forecast - warn about information loss
353                warnings.warn(
354                    "Prediction intervals cannot be meaningfully discretized. "
355                    "Only mean predictions are converted to tokens. "
356                    "Use predict_token_distribution(replications=N) for "
357                    "uncertainty in token space.",
358                    UserWarning,
359                )
360                return self._discretize_dataframe(
361                    continuous_result.mean, return_discretization_error
362                )
363        elif isinstance(continuous_result, pd.DataFrame):
364            # Deterministic forecast
365            return self._discretize_dataframe(
366                continuous_result, return_discretization_error
367            )
368        else:
369            raise NotImplementedError(
370                f"Unhandled predict output type: {type(continuous_result)}"
371            )

Generate discrete token predictions.

Parameters

h : int, default=5 Forecast horizon

level : int, default=95 Confidence level (only affects continuous forecasts)

quantiles : list of float, optional Quantile levels

return_discretization_error : bool, default=False If True, return (predictions, errors) tuple

**kwargs : dict Additional parameters for parent predict

Returns

predictions : pd.DataFrame Discrete predictions. Format depends on return_mode: - 'token_id': single column 'token_id' - 'token_vector': columns 'dim_0', 'dim_1', ... - 'both': columns 'token_id', 'dim_0', 'dim_1', ... - 'probs': columns 'token_0_prob', 'token_1_prob', ...

errors : pd.DataFrame (if return_discretization_error=True) Discretization errors (distances to nearest tokens)

Warnings

When prediction intervals are requested but only mean is discretized, a warning is issued. Use predict_token_distribution() for uncertainty in token space.

class Downloader:
 6class Downloader:
 7    """Download datasets from data sources (R-universe for now)"""
 8
 9    def __init__(self):
10        self.pkgname = None
11        self.dataset = None
12        self.source = None
13        self.url = None
14        self.request = None
15
16    def download(
17        self,
18        pkgname="MASS",
19        dataset="Boston",
20        source="https://cran.r-universe.dev/",
21        **kwargs
22    ):
23        """Download datasets from data sources (R-universe for now)
24
25        Examples:
26
27        ```python
28        import nnetsauce as ns
29
30        downloader = ns.Downloader()
31        df = downloader.download(pkgname="MASS", dataset="Boston")
32        ```
33
34        """
35        self.pkgname = pkgname
36        self.dataset = dataset
37        self.source = source
38        self.url = source + pkgname + "/data/" + dataset + "/json"
39        self.request = requests.get(self.url)
40        return pd.DataFrame(self.request.json(), **kwargs)

Download datasets from data sources (R-universe for now)

def download( self, pkgname='MASS', dataset='Boston', source='https://cran.r-universe.dev/', **kwargs):
16    def download(
17        self,
18        pkgname="MASS",
19        dataset="Boston",
20        source="https://cran.r-universe.dev/",
21        **kwargs
22    ):
23        """Download datasets from data sources (R-universe for now)
24
25        Examples:
26
27        ```python
28        import nnetsauce as ns
29
30        downloader = ns.Downloader()
31        df = downloader.download(pkgname="MASS", dataset="Boston")
32        ```
33
34        """
35        self.pkgname = pkgname
36        self.dataset = dataset
37        self.source = source
38        self.url = source + pkgname + "/data/" + dataset + "/json"
39        self.request = requests.get(self.url)
40        return pd.DataFrame(self.request.json(), **kwargs)

Download datasets from data sources (R-universe for now)

Examples:

import nnetsauce as ns

downloader = ns.Downloader()
df = downloader.download(pkgname="MASS", dataset="Boston")
class ElasticNet2Regressor(sklearn.base.BaseEstimator, sklearn.base.RegressorMixin):
 7class ElasticNet2Regressor(BaseEstimator, RegressorMixin):
 8    def __init__(
 9        self,
10        n_hidden_features=100,
11        alpha=1.0,
12        l1_ratio=0.5,
13        lambd=0.1,
14        activation_name="tanh",
15        a=0.01,
16        max_iter=1000,
17        tol=1e-4,
18        random_state=None,
19    ):
20        self.n_hidden_features = n_hidden_features
21        self.alpha = alpha
22        self.l1_ratio = l1_ratio
23        self.lambd = lambd
24        self.activation_name = activation_name
25        self.a = a
26        self.max_iter = max_iter
27        self.tol = tol
28        self.random_state = random_state
29
30    def _activation(self, Z):
31        if self.activation_name == "relu":
32            return np.maximum(0, Z)
33        elif self.activation_name == "tanh":
34            return np.tanh(Z)
35        elif self.activation_name == "sigmoid":
36            return 1 / (1 + np.exp(-Z))
37        elif self.activation_name == "prelu":
38            return np.where(Z > 0, Z, self.a * Z)
39        elif self.activation_name == "elu":
40            return np.where(Z > 0, Z, self.a * (np.exp(Z) - 1))
41        else:
42            raise ValueError(f"Unknown activation: {self.activation_name}")
43
44    def fit(self, X, y):
45        X, y = check_X_y(X, y)
46        rng = np.random.RandomState(self.random_state)
47
48        # Standardize inputs
49        self.X_mean_ = X.mean(axis=0)
50        self.X_std_ = X.std(axis=0) + 1e-8
51        X_scaled = (X - self.X_mean_) / self.X_std_
52
53        # Center response
54        self.y_mean_ = y.mean()
55        y_centered = y - self.y_mean_
56
57        # Random feature mapping
58        self.W_in_ = rng.randn(X.shape[1], self.n_hidden_features)
59        self.b_in_ = rng.randn(self.n_hidden_features)
60        H = self._activation(X_scaled @ self.W_in_ + self.b_in_)
61
62        # Doubly-constrained optimization with Elastic Net
63        beta = np.zeros(self.n_hidden_features)
64
65        for _ in range(self.max_iter):
66            beta_old = beta.copy()
67
68            # Gradient descent step with projection
69            grad = H.T @ (H @ beta - y_centered) / len(y)
70            step = 0.01 / (1 + self.alpha * (1 - self.l1_ratio))
71
72            # Soft thresholding (L1)
73            beta = beta - step * grad
74            threshold = step * self.alpha * self.l1_ratio
75            beta = np.sign(beta) * np.maximum(np.abs(beta) - threshold, 0)
76
77            # L2 projection (constraint)
78            norm = np.linalg.norm(beta)
79            if norm > self.lambd:
80                beta = beta * (self.lambd / norm)
81
82            if np.linalg.norm(beta - beta_old) < self.tol:
83                break
84
85        self.beta_ = beta
86        return self
87
88    def predict(self, X):
89        X = check_array(X)
90        X_scaled = (X - self.X_mean_) / self.X_std_
91        H = self._activation(X_scaled @ self.W_in_ + self.b_in_)
92        return H @ self.beta_ + self.y_mean_

Base class for all estimators in scikit-learn.

Inheriting from this class provides default implementations of:

  • setting and getting parameters used by GridSearchCV and friends;
  • textual and HTML representation displayed in terminals and IDEs;
  • estimator serialization;
  • parameters validation;
  • data validation;
  • feature names validation.

Read more in the :ref:User Guide <rolling_your_own_estimator>.

Notes

All estimators should specify all the parameters that can be set at the class level in their __init__ as explicit keyword arguments (no *args or **kwargs).

Examples

>>> import numpy as np
>>> from sklearn.base import BaseEstimator
>>> class MyEstimator(BaseEstimator):
...     def __init__(self, *, param=1):
...         self.param = param
...     def fit(self, X, y=None):
...         self.is_fitted_ = True
...         return self
...     def predict(self, X):
...         return np.full(shape=X.shape[0], fill_value=self.param)
>>> estimator = MyEstimator(param=2)
>>> estimator.get_params()
{'param': 2}
>>> X = np.array([[1, 2], [2, 3], [3, 4]])
>>> y = np.array([1, 0, 1])
>>> estimator.fit(X, y).predict(X)
array([2, 2, 2])
>>> estimator.set_params(param=3).fit(X, y).predict(X)
array([3, 3, 3])
def fit(self, X, y):
44    def fit(self, X, y):
45        X, y = check_X_y(X, y)
46        rng = np.random.RandomState(self.random_state)
47
48        # Standardize inputs
49        self.X_mean_ = X.mean(axis=0)
50        self.X_std_ = X.std(axis=0) + 1e-8
51        X_scaled = (X - self.X_mean_) / self.X_std_
52
53        # Center response
54        self.y_mean_ = y.mean()
55        y_centered = y - self.y_mean_
56
57        # Random feature mapping
58        self.W_in_ = rng.randn(X.shape[1], self.n_hidden_features)
59        self.b_in_ = rng.randn(self.n_hidden_features)
60        H = self._activation(X_scaled @ self.W_in_ + self.b_in_)
61
62        # Doubly-constrained optimization with Elastic Net
63        beta = np.zeros(self.n_hidden_features)
64
65        for _ in range(self.max_iter):
66            beta_old = beta.copy()
67
68            # Gradient descent step with projection
69            grad = H.T @ (H @ beta - y_centered) / len(y)
70            step = 0.01 / (1 + self.alpha * (1 - self.l1_ratio))
71
72            # Soft thresholding (L1)
73            beta = beta - step * grad
74            threshold = step * self.alpha * self.l1_ratio
75            beta = np.sign(beta) * np.maximum(np.abs(beta) - threshold, 0)
76
77            # L2 projection (constraint)
78            norm = np.linalg.norm(beta)
79            if norm > self.lambd:
80                beta = beta * (self.lambd / norm)
81
82            if np.linalg.norm(beta - beta_old) < self.tol:
83                break
84
85        self.beta_ = beta
86        return self
def predict(self, X):
88    def predict(self, X):
89        X = check_array(X)
90        X_scaled = (X - self.X_mean_) / self.X_std_
91        H = self._activation(X_scaled @ self.W_in_ + self.b_in_)
92        return H @ self.beta_ + self.y_mean_
class GLMClassifier(nnetsauce.glm.glm.GLM, sklearn.base.ClassifierMixin):
 23class GLMClassifier(GLM, ClassifierMixin):
 24    """Generalized 'linear' models using quasi-randomized networks (classification)
 25
 26    Parameters:
 27
 28        n_hidden_features: int
 29            number of nodes in the hidden layer
 30
 31        lambda1: float
 32            regularization parameter for GLM coefficients on original features
 33
 34        alpha1: float
 35            controls compromize between l1 and l2 norm of GLM coefficients on original features
 36
 37        lambda2: float
 38            regularization parameter for GLM coefficients on nonlinear features
 39
 40        alpha2: float
 41            controls compromize between l1 and l2 norm of GLM coefficients on nonlinear features
 42
 43        activation_name: str
 44            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
 45
 46        a: float
 47            hyperparameter for 'prelu' or 'elu' activation function
 48
 49        nodes_sim: str
 50            type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
 51            'uniform'
 52
 53        bias: boolean
 54            indicates if the hidden layer contains a bias term (True) or not
 55            (False)
 56
 57        dropout: float
 58            regularization parameter; (random) percentage of nodes dropped out
 59            of the training
 60
 61        direct_link: boolean
 62            indicates if the original predictors are included (True) in model's
 63            fitting or not (False)
 64
 65        n_clusters: int
 66            number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
 67                no clustering)
 68
 69        cluster_encode: bool
 70            defines how the variable containing clusters is treated (default is one-hot)
 71            if `False`, then labels are used, without one-hot encoding
 72
 73        type_clust: str
 74            type of clustering method: currently k-means ('kmeans') or Gaussian
 75            Mixture Model ('gmm')
 76
 77        type_scaling: a tuple of 3 strings
 78            scaling methods for inputs, hidden layer, and clustering respectively
 79            (and when relevant).
 80            Currently available: standardization ('std') or MinMax scaling ('minmax')
 81
 82        optimizer: object
 83            optimizer, from class nnetsauce.Optimizer
 84
 85        backend: str.
 86            "cpu" or "gpu" or "tpu".
 87
 88        seed: int
 89            reproducibility seed for nodes_sim=='uniform'
 90
 91    Attributes:
 92
 93        beta_: vector
 94            regression coefficients
 95
 96    Examples:
 97
 98    See [https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_classification.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_classification.py)
 99
100    """
101
102    # construct the object -----
103    _estimator_type = "classifier"
104
105    def __init__(
106        self,
107        n_hidden_features=5,
108        lambda1=0.01,
109        alpha1=0.5,
110        lambda2=0.01,
111        alpha2=0.5,
112        family="expit",
113        activation_name="relu",
114        a=0.01,
115        nodes_sim="sobol",
116        bias=True,
117        dropout=0,
118        direct_link=True,
119        n_clusters=2,
120        cluster_encode=True,
121        type_clust="kmeans",
122        type_scaling=("std", "std", "std"),
123        optimizer=Optimizer(),
124        backend="cpu",
125        seed=123,
126    ):
127        super().__init__(
128            n_hidden_features=n_hidden_features,
129            lambda1=lambda1,
130            alpha1=alpha1,
131            lambda2=lambda2,
132            alpha2=alpha2,
133            activation_name=activation_name,
134            a=a,
135            nodes_sim=nodes_sim,
136            bias=bias,
137            dropout=dropout,
138            direct_link=direct_link,
139            n_clusters=n_clusters,
140            cluster_encode=cluster_encode,
141            type_clust=type_clust,
142            type_scaling=type_scaling,
143            optimizer=optimizer,
144            backend=backend,
145            seed=seed,
146        )
147
148        self.family = family
149
150    def logit_loss(self, Y, row_index, XB):
151        self.n_classes = Y.shape[1]  # len(np.unique(y))
152        # Y = mo.one_hot_encode2(y, self.n_classes)
153        # Y = self.optimizer.one_hot_encode(y, self.n_classes)
154
155        # max_double = 709.0 # only if softmax
156        # XB[XB > max_double] = max_double
157        XB[XB > 709.0] = 709.0
158
159        if row_index is None:
160            return -np.mean(np.sum(Y * XB, axis=1) - logsumexp(XB))
161
162        return -np.mean(np.sum(Y[row_index, :] * XB, axis=1) - logsumexp(XB))
163
164    def expit_erf_loss(self, Y, row_index, XB):
165        # self.n_classes = len(np.unique(y))
166        # Y = mo.one_hot_encode2(y, self.n_classes)
167        # Y = self.optimizer.one_hot_encode(y, self.n_classes)
168        self.n_classes = Y.shape[1]
169
170        if row_index is None:
171            return -np.mean(np.sum(Y * XB, axis=1) - logsumexp(XB))
172
173        return -np.mean(np.sum(Y[row_index, :] * XB, axis=1) - logsumexp(XB))
174
175    def loss_func(
176        self,
177        beta,
178        group_index,
179        X,
180        Y,
181        y,
182        row_index=None,
183        type_loss="logit",
184        **kwargs
185    ):
186        res = {
187            "logit": self.logit_loss,
188            "expit": self.expit_erf_loss,
189            "erf": self.expit_erf_loss,
190        }
191
192        if row_index is None:
193            row_index = range(len(y))
194            XB = self.compute_XB(
195                X,
196                beta=np.reshape(beta, (X.shape[1], self.n_classes), order="F"),
197            )
198
199            return res[type_loss](Y, row_index, XB) + self.compute_penalty(
200                group_index=group_index, beta=beta
201            )
202
203        XB = self.compute_XB(
204            X,
205            beta=np.reshape(beta, (X.shape[1], self.n_classes), order="F"),
206            row_index=row_index,
207        )
208
209        return res[type_loss](Y, row_index, XB) + self.compute_penalty(
210            group_index=group_index, beta=beta
211        )
212
213    def fit(self, X, y, **kwargs):
214        """Fit GLM model to training data (X, y).
215
216        Args:
217
218            X: {array-like}, shape = [n_samples, n_features]
219                Training vectors, where n_samples is the number
220                of samples and n_features is the number of features.
221
222            y: array-like, shape = [n_samples]
223                Target values.
224
225            **kwargs: additional parameters to be passed to
226                    self.cook_training_set or self.obj.fit
227
228        Returns:
229
230            self: object
231
232        """
233
234        assert mx.is_factor(
235            y
236        ), "y must contain only integers"  # change is_factor and subsampling everywhere
237
238        self.classes_ = np.unique(y)  # for compatibility with sklearn
239        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
240
241        self.beta_ = None
242
243        n, p = X.shape
244
245        self.group_index = n * X.shape[1]
246
247        self.n_classes = len(np.unique(y))
248
249        output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
250
251        # Y = mo.one_hot_encode2(output_y, self.n_classes)
252        Y = self.optimizer.one_hot_encode(output_y, self.n_classes)
253
254        # initialization
255        if self.backend == "cpu":
256            beta_ = np.linalg.lstsq(scaled_Z, Y, rcond=None)[0]
257        else:
258            beta_ = jnp.linalg.lstsq(scaled_Z, Y, rcond=None)[0]
259
260        # optimization
261        # fit(self, loss_func, response, x0, **kwargs):
262        # loss_func(self, beta, group_index, X, y,
263        #          row_index=None, type_loss="gaussian",
264        #          **kwargs)
265        self.optimizer.fit(
266            self.loss_func,
267            response=y,
268            x0=beta_.flatten(order="F"),
269            group_index=self.group_index,
270            X=scaled_Z,
271            Y=Y,
272            y=y,
273            type_loss=self.family,
274        )
275
276        self.beta_ = self.optimizer.results[0]
277        self.classes_ = np.unique(y)
278
279        return self
280
281    def predict(self, X, **kwargs):
282        """Predict test data X.
283
284        Args:
285
286            X: {array-like}, shape = [n_samples, n_features]
287                Training vectors, where n_samples is the number
288                of samples and n_features is the number of features.
289
290            **kwargs: additional parameters to be passed to
291                    self.cook_test_set
292
293        Returns:
294
295            model predictions: {array-like}
296
297        """
298
299        return np.argmax(self.predict_proba(X, **kwargs), axis=1)
300
301    def predict_proba(self, X, **kwargs):
302        """Predict probabilities for test data X.
303
304        Args:
305
306            X: {array-like}, shape = [n_samples, n_features]
307                Training vectors, where n_samples is the number
308                of samples and n_features is the number of features.
309
310            **kwargs: additional parameters to be passed to
311                    self.cook_test_set
312
313        Returns:
314
315            probability estimates for test data: {array-like}
316
317        """
318        if len(X.shape) == 1:
319            n_features = X.shape[0]
320            new_X = mo.rbind(
321                X.reshape(1, n_features),
322                np.ones(n_features).reshape(1, n_features),
323            )
324
325            Z = self.cook_test_set(new_X, **kwargs)
326
327        else:
328            Z = self.cook_test_set(X, **kwargs)
329
330        ZB = mo.safe_sparse_dot(
331            Z,
332            self.beta_.reshape(
333                self.n_classes,
334                X.shape[1] + self.n_hidden_features + self.n_clusters,
335            ).T,
336        )
337
338        if self.family == "logit":
339            exp_ZB = np.exp(ZB)
340
341            return exp_ZB / exp_ZB.sum(axis=1)[:, None]
342
343        if self.family == "expit":
344            exp_ZB = expit(ZB)
345
346            return exp_ZB / exp_ZB.sum(axis=1)[:, None]
347
348        if self.family == "erf":
349            exp_ZB = 0.5 * (1 + erf(ZB))
350
351            return exp_ZB / exp_ZB.sum(axis=1)[:, None]
352
353    def score(self, X, y, scoring=None):
354        """Scoring function for classification.
355
356        Args:
357
358            X: {array-like}, shape = [n_samples, n_features]
359                Training vectors, where n_samples is the number
360                of samples and n_features is the number of features.
361
362            y: array-like, shape = [n_samples]
363                Target values.
364
365            scoring: str
366                scoring method (default is accuracy)
367
368        Returns:
369
370            score: float
371        """
372
373        if scoring is None:
374            scoring = "accuracy"
375
376        if scoring == "accuracy":
377            return skm2.accuracy_score(y, self.predict(X))
378
379        if scoring == "f1":
380            return skm2.f1_score(y, self.predict(X))
381
382        if scoring == "precision":
383            return skm2.precision_score(y, self.predict(X))
384
385        if scoring == "recall":
386            return skm2.recall_score(y, self.predict(X))
387
388        if scoring == "roc_auc":
389            return skm2.roc_auc_score(y, self.predict(X))
390
391        if scoring == "log_loss":
392            return skm2.log_loss(y, self.predict_proba(X))
393
394        if scoring == "balanced_accuracy":
395            return skm2.balanced_accuracy_score(y, self.predict(X))
396
397        if scoring == "average_precision":
398            return skm2.average_precision_score(y, self.predict(X))
399
400        if scoring == "neg_brier_score":
401            return -skm2.brier_score_loss(y, self.predict_proba(X))
402
403        if scoring == "neg_log_loss":
404            return -skm2.log_loss(y, self.predict_proba(X))
405
406    @property
407    def _estimator_type(self):
408        return "classifier"

Generalized 'linear' models using quasi-randomized networks (classification)

Parameters:

n_hidden_features: int
    number of nodes in the hidden layer

lambda1: float
    regularization parameter for GLM coefficients on original features

alpha1: float
    controls compromize between l1 and l2 norm of GLM coefficients on original features

lambda2: float
    regularization parameter for GLM coefficients on nonlinear features

alpha2: float
    controls compromize between l1 and l2 norm of GLM coefficients on nonlinear features

activation_name: str
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'

a: float
    hyperparameter for 'prelu' or 'elu' activation function

nodes_sim: str
    type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
    'uniform'

bias: boolean
    indicates if the hidden layer contains a bias term (True) or not
    (False)

dropout: float
    regularization parameter; (random) percentage of nodes dropped out
    of the training

direct_link: boolean
    indicates if the original predictors are included (True) in model's
    fitting or not (False)

n_clusters: int
    number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
        no clustering)

cluster_encode: bool
    defines how the variable containing clusters is treated (default is one-hot)
    if `False`, then labels are used, without one-hot encoding

type_clust: str
    type of clustering method: currently k-means ('kmeans') or Gaussian
    Mixture Model ('gmm')

type_scaling: a tuple of 3 strings
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax')

optimizer: object
    optimizer, from class Optimizer

backend: str.
    "cpu" or "gpu" or "tpu".

seed: int
    reproducibility seed for nodes_sim=='uniform'

Attributes:

beta_: vector
    regression coefficients

Examples:

See https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_classification.py

def fit(self, X, y, **kwargs):
213    def fit(self, X, y, **kwargs):
214        """Fit GLM model to training data (X, y).
215
216        Args:
217
218            X: {array-like}, shape = [n_samples, n_features]
219                Training vectors, where n_samples is the number
220                of samples and n_features is the number of features.
221
222            y: array-like, shape = [n_samples]
223                Target values.
224
225            **kwargs: additional parameters to be passed to
226                    self.cook_training_set or self.obj.fit
227
228        Returns:
229
230            self: object
231
232        """
233
234        assert mx.is_factor(
235            y
236        ), "y must contain only integers"  # change is_factor and subsampling everywhere
237
238        self.classes_ = np.unique(y)  # for compatibility with sklearn
239        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
240
241        self.beta_ = None
242
243        n, p = X.shape
244
245        self.group_index = n * X.shape[1]
246
247        self.n_classes = len(np.unique(y))
248
249        output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
250
251        # Y = mo.one_hot_encode2(output_y, self.n_classes)
252        Y = self.optimizer.one_hot_encode(output_y, self.n_classes)
253
254        # initialization
255        if self.backend == "cpu":
256            beta_ = np.linalg.lstsq(scaled_Z, Y, rcond=None)[0]
257        else:
258            beta_ = jnp.linalg.lstsq(scaled_Z, Y, rcond=None)[0]
259
260        # optimization
261        # fit(self, loss_func, response, x0, **kwargs):
262        # loss_func(self, beta, group_index, X, y,
263        #          row_index=None, type_loss="gaussian",
264        #          **kwargs)
265        self.optimizer.fit(
266            self.loss_func,
267            response=y,
268            x0=beta_.flatten(order="F"),
269            group_index=self.group_index,
270            X=scaled_Z,
271            Y=Y,
272            y=y,
273            type_loss=self.family,
274        )
275
276        self.beta_ = self.optimizer.results[0]
277        self.classes_ = np.unique(y)
278
279        return self

Fit GLM model to training data (X, y).

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

**kwargs: additional parameters to be passed to
        self.cook_training_set or self.obj.fit

Returns:

self: object
def predict(self, X, **kwargs):
281    def predict(self, X, **kwargs):
282        """Predict test data X.
283
284        Args:
285
286            X: {array-like}, shape = [n_samples, n_features]
287                Training vectors, where n_samples is the number
288                of samples and n_features is the number of features.
289
290            **kwargs: additional parameters to be passed to
291                    self.cook_test_set
292
293        Returns:
294
295            model predictions: {array-like}
296
297        """
298
299        return np.argmax(self.predict_proba(X, **kwargs), axis=1)

Predict test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

model predictions: {array-like}
def predict_proba(self, X, **kwargs):
301    def predict_proba(self, X, **kwargs):
302        """Predict probabilities for test data X.
303
304        Args:
305
306            X: {array-like}, shape = [n_samples, n_features]
307                Training vectors, where n_samples is the number
308                of samples and n_features is the number of features.
309
310            **kwargs: additional parameters to be passed to
311                    self.cook_test_set
312
313        Returns:
314
315            probability estimates for test data: {array-like}
316
317        """
318        if len(X.shape) == 1:
319            n_features = X.shape[0]
320            new_X = mo.rbind(
321                X.reshape(1, n_features),
322                np.ones(n_features).reshape(1, n_features),
323            )
324
325            Z = self.cook_test_set(new_X, **kwargs)
326
327        else:
328            Z = self.cook_test_set(X, **kwargs)
329
330        ZB = mo.safe_sparse_dot(
331            Z,
332            self.beta_.reshape(
333                self.n_classes,
334                X.shape[1] + self.n_hidden_features + self.n_clusters,
335            ).T,
336        )
337
338        if self.family == "logit":
339            exp_ZB = np.exp(ZB)
340
341            return exp_ZB / exp_ZB.sum(axis=1)[:, None]
342
343        if self.family == "expit":
344            exp_ZB = expit(ZB)
345
346            return exp_ZB / exp_ZB.sum(axis=1)[:, None]
347
348        if self.family == "erf":
349            exp_ZB = 0.5 * (1 + erf(ZB))
350
351            return exp_ZB / exp_ZB.sum(axis=1)[:, None]

Predict probabilities for test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

probability estimates for test data: {array-like}
def score(self, X, y, scoring=None):
353    def score(self, X, y, scoring=None):
354        """Scoring function for classification.
355
356        Args:
357
358            X: {array-like}, shape = [n_samples, n_features]
359                Training vectors, where n_samples is the number
360                of samples and n_features is the number of features.
361
362            y: array-like, shape = [n_samples]
363                Target values.
364
365            scoring: str
366                scoring method (default is accuracy)
367
368        Returns:
369
370            score: float
371        """
372
373        if scoring is None:
374            scoring = "accuracy"
375
376        if scoring == "accuracy":
377            return skm2.accuracy_score(y, self.predict(X))
378
379        if scoring == "f1":
380            return skm2.f1_score(y, self.predict(X))
381
382        if scoring == "precision":
383            return skm2.precision_score(y, self.predict(X))
384
385        if scoring == "recall":
386            return skm2.recall_score(y, self.predict(X))
387
388        if scoring == "roc_auc":
389            return skm2.roc_auc_score(y, self.predict(X))
390
391        if scoring == "log_loss":
392            return skm2.log_loss(y, self.predict_proba(X))
393
394        if scoring == "balanced_accuracy":
395            return skm2.balanced_accuracy_score(y, self.predict(X))
396
397        if scoring == "average_precision":
398            return skm2.average_precision_score(y, self.predict(X))
399
400        if scoring == "neg_brier_score":
401            return -skm2.brier_score_loss(y, self.predict_proba(X))
402
403        if scoring == "neg_log_loss":
404            return -skm2.log_loss(y, self.predict_proba(X))

Scoring function for classification.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

scoring: str
    scoring method (default is accuracy)

Returns:

score: float
class GLMRegressor(nnetsauce.glm.glm.GLM, sklearn.base.RegressorMixin):
 22class GLMRegressor(GLM, RegressorMixin):
 23    """Generalized 'linear' models using quasi-randomized networks (regression)
 24
 25    Attributes:
 26
 27        n_hidden_features: int
 28            number of nodes in the hidden layer
 29
 30        lambda1: float
 31            regularization parameter for GLM coefficients on original features
 32
 33        alpha1: float
 34            controls compromize between l1 and l2 norm of GLM coefficients on original features
 35
 36        lambda2: float
 37            regularization parameter for GLM coefficients on nonlinear features
 38
 39        alpha2: float
 40            controls compromize between l1 and l2 norm of GLM coefficients on nonlinear features
 41
 42        family: str
 43            "gaussian", "laplace", "poisson", or "quantile" (for now)
 44
 45        level: int, default=50
 46            The level of the quantiles to compute for family = "quantile".
 47            Default is the median.
 48
 49        activation_name: str
 50            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
 51
 52        a: float
 53            hyperparameter for 'prelu' or 'elu' activation function
 54
 55        nodes_sim: str
 56            type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
 57            'uniform'
 58
 59        bias: boolean
 60            indicates if the hidden layer contains a bias term (True) or not
 61            (False)
 62
 63        dropout: float
 64            regularization parameter; (random) percentage of nodes dropped out
 65            of the training
 66
 67        direct_link: boolean
 68            indicates if the original predictors are included (True) in model's
 69            fitting or not (False)
 70
 71        n_clusters: int
 72            number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
 73                no clustering)
 74
 75        cluster_encode: bool
 76            defines how the variable containing clusters is treated (default is one-hot)
 77            if `False`, then labels are used, without one-hot encoding
 78
 79        type_clust: str
 80            type of clustering method: currently k-means ('kmeans') or Gaussian
 81            Mixture Model ('gmm')
 82
 83        type_scaling: a tuple of 3 strings
 84            scaling methods for inputs, hidden layer, and clustering respectively
 85            (and when relevant).
 86            Currently available: standardization ('std') or MinMax scaling ('minmax')
 87
 88        optimizer: object
 89            optimizer, from class nnetsauce.utils.Optimizer
 90
 91        backend: str.
 92            "cpu" or "gpu" or "tpu".
 93
 94        seed: int
 95            reproducibility seed for nodes_sim=='uniform'
 96
 97        backend: str
 98            "cpu", "gpu", "tpu"
 99
100    Attributes:
101
102        beta_: vector
103            regression coefficients
104
105    Examples:
106
107        See [https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_regression.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_regression.py)
108
109    """
110
111    # construct the object -----
112
113    def __init__(
114        self,
115        n_hidden_features=5,
116        lambda1=0.01,
117        alpha1=0.5,
118        lambda2=0.01,
119        alpha2=0.5,
120        family="gaussian",
121        level=50,
122        activation_name="relu",
123        a=0.01,
124        nodes_sim="sobol",
125        bias=True,
126        dropout=0,
127        direct_link=True,
128        n_clusters=2,
129        cluster_encode=True,
130        type_clust="kmeans",
131        type_scaling=("std", "std", "std"),
132        optimizer=Optimizer(),
133        backend="cpu",
134        seed=123,
135    ):
136        super().__init__(
137            n_hidden_features=n_hidden_features,
138            lambda1=lambda1,
139            alpha1=alpha1,
140            lambda2=lambda2,
141            alpha2=alpha2,
142            activation_name=activation_name,
143            a=a,
144            nodes_sim=nodes_sim,
145            bias=bias,
146            dropout=dropout,
147            direct_link=direct_link,
148            n_clusters=n_clusters,
149            cluster_encode=cluster_encode,
150            type_clust=type_clust,
151            type_scaling=type_scaling,
152            optimizer=optimizer,
153            backend=backend,
154            seed=seed,
155        )
156
157        self.family = family
158        self.level = level
159        self.q = self.level / 100
160
161    def gaussian_loss(self, y, row_index, XB):
162        return 0.5 * np.mean(np.square(y[row_index] - XB))
163
164    def laplace_loss(self, y, row_index, XB):
165        return 0.5 * np.mean(np.abs(y[row_index] - XB))
166
167    def poisson_loss(self, y, row_index, XB):
168        return -np.mean(y[row_index] * XB - np.exp(XB))
169
170    def pinball_loss(self, y, row_index, XB, tau=0.5):
171        y = np.array(y[row_index])
172        y_pred = np.array(XB)
173        return mean_pinball_loss(y, y_pred, alpha=tau)
174        # return np.mean(np.maximum(tau * residuals, (tau - 1) * residuals))
175
176    def loss_func(
177        self,
178        beta,
179        group_index,
180        X,
181        y,
182        row_index=None,
183        type_loss="gaussian",
184        **kwargs
185    ):
186        res = {
187            "gaussian": self.gaussian_loss,
188            "laplace": self.laplace_loss,
189            "poisson": self.poisson_loss,
190            "quantile": self.pinball_loss,
191        }
192
193        if type_loss != "quantile":
194            if row_index is None:
195                row_index = range(len(y))
196                XB = self.compute_XB(X, beta=beta)
197
198                return res[type_loss](y, row_index, XB) + self.compute_penalty(
199                    group_index=group_index, beta=beta
200                )
201
202            XB = self.compute_XB(X, beta=beta, row_index=row_index)
203
204            return res[type_loss](y, row_index, XB) + self.compute_penalty(
205                group_index=group_index, beta=beta
206            )
207
208        else:  # quantile
209            assert (
210                self.q > 0 and self.q < 1
211            ), "'tau' must be comprised 0 < tau < 1"
212
213            if row_index is None:
214                row_index = range(len(y))
215                XB = self.compute_XB(X, beta=beta)
216                return res[type_loss](y, row_index, XB, self.q)
217
218            XB = self.compute_XB(X, beta=beta, row_index=row_index)
219            return res[type_loss](y, row_index, XB, self.q)
220
221    def fit(self, X, y, **kwargs):
222        """Fit GLM model to training data (X, y).
223
224        Args:
225
226            X: {array-like}, shape = [n_samples, n_features]
227                Training vectors, where n_samples is the number
228                of samples and n_features is the number of features.
229
230            y: array-like, shape = [n_samples]
231                Target values.
232
233            **kwargs: additional parameters to be passed to
234                    self.cook_training_set or self.obj.fit
235
236        Returns:
237
238            self: object
239
240        """
241        self.beta_ = None
242        self.n_iter = 0
243
244        _, self.group_index = X.shape
245
246        centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
247        # initialization
248        if self.backend == "cpu":
249            beta_ = np.linalg.lstsq(scaled_Z, centered_y, rcond=None)[0]
250        else:
251            beta_ = jnp.linalg.lstsq(scaled_Z, centered_y, rcond=None)[0]
252        # optimization
253        # fit(self, loss_func, response, x0, **kwargs):
254        # loss_func(self, beta, group_index, X, y,
255        #          row_index=None, type_loss="gaussian",
256        #          **kwargs)
257        self.optimizer.fit(
258            self.loss_func,
259            response=centered_y,
260            x0=beta_,
261            group_index=self.group_index,
262            X=scaled_Z,
263            y=centered_y,
264            type_loss=self.family,
265            **kwargs
266        )
267
268        self.beta_ = self.optimizer.results[0]
269
270        return self
271
272    def predict(self, X, **kwargs):
273        """Predict test data X.
274
275        Args:
276
277            X: {array-like}, shape = [n_samples, n_features]
278                Training vectors, where n_samples is the number
279                of samples and n_features is the number of features.
280
281            **kwargs: additional parameters to be passed to
282                    self.cook_test_set
283
284        Returns:
285
286            model predictions: {array-like}
287
288        """
289
290        if len(X.shape) == 1:
291            n_features = X.shape[0]
292            new_X = mo.rbind(
293                X.reshape(1, n_features),
294                np.ones(n_features).reshape(1, n_features),
295            )
296
297            return (
298                self.y_mean_
299                + np.dot(self.cook_test_set(new_X, **kwargs), self.beta_)
300            )[0]
301
302        return self.y_mean_ + np.dot(
303            self.cook_test_set(X, **kwargs), self.beta_
304        )
305
306    def score(self, X, y, scoring=None):
307        """Compute the score of the model.
308
309        Parameters:
310
311            X: {array-like}, shape = [n_samples, n_features]
312                Training vectors, where n_samples is the number
313                of samples and n_features is the number of features.
314
315            y: array-like, shape = [n_samples]
316                Target values.
317
318            scoring: str
319                scoring method
320
321        Returns:
322
323            score: float
324
325        """
326
327        if scoring is None:
328            return np.sqrt(np.mean((self.predict(X) - y) ** 2))
329
330        return skm2.get_scorer(scoring)(self, X, y)

Generalized 'linear' models using quasi-randomized networks (regression)

Attributes:

n_hidden_features: int
    number of nodes in the hidden layer

lambda1: float
    regularization parameter for GLM coefficients on original features

alpha1: float
    controls compromize between l1 and l2 norm of GLM coefficients on original features

lambda2: float
    regularization parameter for GLM coefficients on nonlinear features

alpha2: float
    controls compromize between l1 and l2 norm of GLM coefficients on nonlinear features

family: str
    "gaussian", "laplace", "poisson", or "quantile" (for now)

level: int, default=50
    The level of the quantiles to compute for family = "quantile".
    Default is the median.

activation_name: str
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'

a: float
    hyperparameter for 'prelu' or 'elu' activation function

nodes_sim: str
    type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
    'uniform'

bias: boolean
    indicates if the hidden layer contains a bias term (True) or not
    (False)

dropout: float
    regularization parameter; (random) percentage of nodes dropped out
    of the training

direct_link: boolean
    indicates if the original predictors are included (True) in model's
    fitting or not (False)

n_clusters: int
    number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
        no clustering)

cluster_encode: bool
    defines how the variable containing clusters is treated (default is one-hot)
    if `False`, then labels are used, without one-hot encoding

type_clust: str
    type of clustering method: currently k-means ('kmeans') or Gaussian
    Mixture Model ('gmm')

type_scaling: a tuple of 3 strings
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax')

optimizer: object
    optimizer, from class Optimizer

backend: str.
    "cpu" or "gpu" or "tpu".

seed: int
    reproducibility seed for nodes_sim=='uniform'

backend: str
    "cpu", "gpu", "tpu"

Attributes:

beta_: vector
    regression coefficients

Examples:

See [https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_regression.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_regression.py)
def fit(self, X, y, **kwargs):
221    def fit(self, X, y, **kwargs):
222        """Fit GLM model to training data (X, y).
223
224        Args:
225
226            X: {array-like}, shape = [n_samples, n_features]
227                Training vectors, where n_samples is the number
228                of samples and n_features is the number of features.
229
230            y: array-like, shape = [n_samples]
231                Target values.
232
233            **kwargs: additional parameters to be passed to
234                    self.cook_training_set or self.obj.fit
235
236        Returns:
237
238            self: object
239
240        """
241        self.beta_ = None
242        self.n_iter = 0
243
244        _, self.group_index = X.shape
245
246        centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
247        # initialization
248        if self.backend == "cpu":
249            beta_ = np.linalg.lstsq(scaled_Z, centered_y, rcond=None)[0]
250        else:
251            beta_ = jnp.linalg.lstsq(scaled_Z, centered_y, rcond=None)[0]
252        # optimization
253        # fit(self, loss_func, response, x0, **kwargs):
254        # loss_func(self, beta, group_index, X, y,
255        #          row_index=None, type_loss="gaussian",
256        #          **kwargs)
257        self.optimizer.fit(
258            self.loss_func,
259            response=centered_y,
260            x0=beta_,
261            group_index=self.group_index,
262            X=scaled_Z,
263            y=centered_y,
264            type_loss=self.family,
265            **kwargs
266        )
267
268        self.beta_ = self.optimizer.results[0]
269
270        return self

Fit GLM model to training data (X, y).

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

**kwargs: additional parameters to be passed to
        self.cook_training_set or self.obj.fit

Returns:

self: object
def predict(self, X, **kwargs):
272    def predict(self, X, **kwargs):
273        """Predict test data X.
274
275        Args:
276
277            X: {array-like}, shape = [n_samples, n_features]
278                Training vectors, where n_samples is the number
279                of samples and n_features is the number of features.
280
281            **kwargs: additional parameters to be passed to
282                    self.cook_test_set
283
284        Returns:
285
286            model predictions: {array-like}
287
288        """
289
290        if len(X.shape) == 1:
291            n_features = X.shape[0]
292            new_X = mo.rbind(
293                X.reshape(1, n_features),
294                np.ones(n_features).reshape(1, n_features),
295            )
296
297            return (
298                self.y_mean_
299                + np.dot(self.cook_test_set(new_X, **kwargs), self.beta_)
300            )[0]
301
302        return self.y_mean_ + np.dot(
303            self.cook_test_set(X, **kwargs), self.beta_
304        )

Predict test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

model predictions: {array-like}
def score(self, X, y, scoring=None):
306    def score(self, X, y, scoring=None):
307        """Compute the score of the model.
308
309        Parameters:
310
311            X: {array-like}, shape = [n_samples, n_features]
312                Training vectors, where n_samples is the number
313                of samples and n_features is the number of features.
314
315            y: array-like, shape = [n_samples]
316                Target values.
317
318            scoring: str
319                scoring method
320
321        Returns:
322
323            score: float
324
325        """
326
327        if scoring is None:
328            return np.sqrt(np.mean((self.predict(X) - y) ** 2))
329
330        return skm2.get_scorer(scoring)(self, X, y)

Compute the score of the model.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

scoring: str
    scoring method

Returns:

score: float
class KernelRidge(sklearn.base.BaseEstimator, sklearn.base.RegressorMixin):
 18class KernelRidge(BaseEstimator, RegressorMixin):
 19    """
 20    Kernel Ridge Regression with optional GPU support, Matérn kernels, and automatic input standardization.
 21
 22    Parameters:
 23    - alpha: float
 24        Regularization parameter.
 25    - kernel: str
 26        Kernel type ("linear", "rbf", or "matern").
 27    - gamma: float
 28        Kernel coefficient for "rbf". Ignored for other kernels.
 29    - nu: float
 30        Smoothness parameter for the Matérn kernel. Default is 1.5.
 31    - length_scale: float
 32        Length scale parameter for the Matérn kernel. Default is 1.0.
 33    - backend: str
 34        "cpu" or "gpu" (uses JAX if "gpu").
 35    """
 36
 37    def __init__(
 38        self,
 39        alpha=1.0,
 40        kernel="rbf",
 41        gamma=None,
 42        nu=1.5,
 43        length_scale=1.0,
 44        backend="cpu",
 45    ):
 46        if not JAX_AVAILABLE and backend != "cpu":
 47            raise RuntimeError(
 48                "JAX is required for this feature. Install with: pip install yourpackage[jax]"
 49            )
 50        self.alpha = alpha
 51        self.alpha_ = alpha
 52        self.kernel = kernel
 53        self.gamma = gamma
 54        self.nu = nu
 55        self.length_scale = length_scale
 56        self.backend = backend
 57        self.scaler = StandardScaler()
 58
 59        if backend == "gpu" and not JAX_AVAILABLE:
 60            raise ImportError(
 61                "JAX is not installed. Please install JAX to use the GPU backend."
 62            )
 63
 64    def _linear_kernel(self, X, Y):
 65        return jnp.dot(X, Y.T) if self.backend == "gpu" else np.dot(X, Y.T)
 66
 67    def _rbf_kernel(self, X, Y):
 68        if self.gamma is None:
 69            self.gamma = 1.0 / X.shape[1]
 70        if self.backend == "gpu":
 71            sq_dists = (
 72                jnp.sum(X**2, axis=1)[:, None]
 73                + jnp.sum(Y**2, axis=1)
 74                - 2 * jnp.dot(X, Y.T)
 75            )
 76            return jnp.exp(-self.gamma * sq_dists)
 77        else:
 78            sq_dists = (
 79                np.sum(X**2, axis=1)[:, None]
 80                + np.sum(Y**2, axis=1)
 81                - 2 * np.dot(X, Y.T)
 82            )
 83            return np.exp(-self.gamma * sq_dists)
 84
 85    def _matern_kernel(self, X, Y):
 86        """
 87        Compute the Matérn kernel using JAX for GPU or NumPy for CPU.
 88
 89        Parameters:
 90        - X: array-like, shape (n_samples_X, n_features)
 91        - Y: array-like, shape (n_samples_Y, n_features)
 92
 93        Returns:
 94        - Kernel matrix, shape (n_samples_X, n_samples_Y)
 95        """
 96        if self.backend == "gpu":
 97            # Compute pairwise distances
 98            dists = jnp.sqrt(
 99                jnp.sum((X[:, None, :] - Y[None, :, :]) ** 2, axis=2)
100            )
101            scaled_dists = jnp.sqrt(2 * self.nu) * dists / self.length_scale
102
103            # Matérn kernel formula
104            coeff = (2 ** (1 - self.nu)) / jnp.exp(gammaln(self.nu))
105            matern_kernel = (
106                coeff * (scaled_dists**self.nu) * kv(self.nu, scaled_dists)
107            )
108            matern_kernel = jnp.where(
109                dists == 0, 1.0, matern_kernel
110            )  # Handle the case where distance is 0
111            return matern_kernel
112        else:
113            # Use NumPy for CPU
114            from scipy.special import (
115                gammaln,
116                kv,
117            )  # Ensure scipy.special is used for CPU
118
119            dists = np.sqrt(
120                np.sum((X[:, None, :] - Y[None, :, :]) ** 2, axis=2)
121            )
122            scaled_dists = np.sqrt(2 * self.nu) * dists / self.length_scale
123
124            # Matérn kernel formula
125            coeff = (2 ** (1 - self.nu)) / np.exp(gammaln(self.nu))
126            matern_kernel = (
127                coeff * (scaled_dists**self.nu) * kv(self.nu, scaled_dists)
128            )
129            matern_kernel = np.where(
130                dists == 0, 1.0, matern_kernel
131            )  # Handle the case where distance is 0
132            return matern_kernel
133
134    def _get_kernel(self, X, Y):
135        if self.kernel == "linear":
136            return self._linear_kernel(X, Y)
137        elif self.kernel == "rbf":
138            return self._rbf_kernel(X, Y)
139        elif self.kernel == "matern":
140            return self._matern_kernel(X, Y)
141        else:
142            raise ValueError(f"Unsupported kernel: {self.kernel}")
143
144    def fit(self, X, y):
145        """
146        Fit the Kernel Ridge Regression model.
147
148        Parameters:
149        - X: array-like, shape (n_samples, n_features)
150            Training data.
151        - y: array-like, shape (n_samples,)
152            Target values.
153        """
154        # Standardize the inputs
155        X = self.scaler.fit_transform(X)
156        self.X_fit_ = X
157
158        # Center the response
159        self.y_mean_ = np.mean(y)
160        y_centered = y - self.y_mean_
161
162        n_samples = X.shape[0]
163
164        # Compute the kernel matrix
165        K = self._get_kernel(X, X)
166        self.K_ = K
167        self.y_fit_ = y_centered
168
169        if isinstance(self.alpha, (list, np.ndarray)):
170            # If alpha is a list or array, compute LOOE for each alpha
171            self.alphas_ = self.alpha  # Store the list of alphas
172            self.dual_coefs_ = []  # Store dual coefficients for each alpha
173            self.looe_ = []  # Store LOOE for each alpha
174
175            for alpha in self.alpha:
176                G = K + alpha * np.eye(n_samples)
177                G_inv = np.linalg.inv(G)
178                diag_G_inv = np.diag(G_inv)
179                dual_coef = np.linalg.solve(G, y_centered)
180                looe = np.sum((dual_coef / diag_G_inv) ** 2)  # Compute LOOE
181                self.dual_coefs_.append(dual_coef)
182                self.looe_.append(looe)
183
184            # Select the best alpha based on the smallest LOOE
185            best_index = np.argmin(self.looe_)
186            self.alpha_ = self.alpha[best_index]
187            self.dual_coef_ = self.dual_coefs_[best_index]
188        else:
189            # If alpha is a single value, proceed as usual
190            if self.backend == "gpu":
191                self.dual_coef_ = jnp.linalg.solve(
192                    K + self.alpha * jnp.eye(n_samples), y_centered
193                )
194            else:
195                self.dual_coef_ = np.linalg.solve(
196                    K + self.alpha * np.eye(n_samples), y_centered
197                )
198
199        return self
200
201    def predict(self, X, probs=False):
202        """
203        Predict using the Kernel Ridge Regression model.
204
205        Parameters:
206        - X: array-like, shape (n_samples, n_features)
207            Test data.
208
209        Returns:
210        - Predicted values, shape (n_samples,).
211        """
212        # Standardize the inputs
213        X = self.scaler.transform(X)
214        K = self._get_kernel(X, self.X_fit_)
215        if self.backend == "gpu":
216            preds = jnp.dot(K, self.dual_coef_) + self.y_mean_
217            if probs:
218                # Compute similarity to self.X_fit_
219                similarities = jnp.dot(
220                    preds, self.X_fit_.T
221                )  # Shape: (n_samples, n_fit_)
222                # Apply softmax to get probabilities
223                return jaxsoftmax(similarities, axis=1)
224            return preds
225        else:
226            preds = np.dot(K, self.dual_coef_) + self.y_mean_
227            if probs:
228                # Compute similarity to self.X_fit_
229                similarities = np.dot(
230                    preds, self.X_fit_.T
231                )  # Shape: (n_samples, n_fit_)
232                # Apply softmax to get probabilities
233                return softmax(similarities, axis=1)
234            return preds
235
236    def partial_fit(self, X, y):
237        """
238        Incrementally fit the Kernel Ridge Regression model with new data using a recursive approach.
239
240        Parameters:
241        - X: array-like, shape (n_samples, n_features)
242            New training data.
243        - y: array-like, shape (n_samples,)
244            New target values.
245
246        Returns:
247        - self: object
248            The updated model.
249        """
250        # Standardize the inputs
251        X = (
252            self.scaler.fit_transform(X)
253            if not hasattr(self, "X_fit_")
254            else self.scaler.transform(X)
255        )
256
257        if not hasattr(self, "X_fit_"):
258            # Initialize with the first batch of data
259            self.X_fit_ = X
260
261            # Center the response
262            self.y_mean_ = np.mean(y)
263            y_centered = y - self.y_mean_
264            self.y_fit_ = y_centered
265
266            n_samples = X.shape[0]
267
268            # Compute the kernel matrix for the initial data
269            self.K_ = self._get_kernel(X, X)
270
271            # Initialize dual coefficients for each alpha
272            if isinstance(self.alpha, (list, np.ndarray)):
273                self.dual_coefs_ = [np.zeros(n_samples) for _ in self.alpha]
274            else:
275                self.dual_coef_ = np.zeros(n_samples)
276        else:
277            # Incrementally update with new data
278            y_centered = y - self.y_mean_  # Center the new batch of responses
279            for x_new, y_new in zip(X, y_centered):
280                x_new = x_new.reshape(1, -1)  # Ensure x_new is 2D
281                k_new = self._get_kernel(self.X_fit_, x_new).flatten()
282
283                # Compute the kernel value for the new data point
284                k_self = self._get_kernel(x_new, x_new).item()
285
286                if isinstance(self.alpha, (list, np.ndarray)):
287                    # Update dual coefficients for each alpha
288                    for idx, alpha in enumerate(self.alpha):
289                        gamma_new = 1 / (k_self + alpha)
290                        residual = y_new - np.dot(self.dual_coefs_[idx], k_new)
291                        self.dual_coefs_[idx] = np.append(
292                            self.dual_coefs_[idx], gamma_new * residual
293                        )
294                else:
295                    # Update dual coefficients for a single alpha
296                    gamma_new = 1 / (k_self + self.alpha)
297                    residual = y_new - np.dot(self.dual_coef_, k_new)
298                    self.dual_coef_ = np.append(
299                        self.dual_coef_, gamma_new * residual
300                    )
301
302                # Update the kernel matrix
303                self.K_ = np.block(
304                    [
305                        [self.K_, k_new[:, None]],
306                        [k_new[None, :], np.array([[k_self]])],
307                    ]
308                )
309
310                # Update the stored data
311                self.X_fit_ = np.vstack([self.X_fit_, x_new])
312                self.y_fit_ = np.append(self.y_fit_, y_new)
313
314        # Select the best alpha based on LOOE after the batch
315        if isinstance(self.alpha, (list, np.ndarray)):
316            self.looe_ = []
317            for idx, alpha in enumerate(self.alpha):
318                G = self.K_ + alpha * np.eye(self.K_.shape[0])
319                G_inv = np.linalg.inv(G)
320                diag_G_inv = np.diag(G_inv)
321                looe = np.sum((self.dual_coefs_[idx] / diag_G_inv) ** 2)
322                self.looe_.append(looe)
323
324            # Select the best alpha
325            best_index = np.argmin(self.looe_)
326            self.alpha_ = self.alpha[best_index]
327            self.dual_coef_ = self.dual_coefs_[best_index]
328
329        return self

Kernel Ridge Regression with optional GPU support, Matérn kernels, and automatic input standardization.

Parameters:

  • alpha: float Regularization parameter.
  • kernel: str Kernel type ("linear", "rbf", or "matern").
  • gamma: float Kernel coefficient for "rbf". Ignored for other kernels.
  • nu: float Smoothness parameter for the Matérn kernel. Default is 1.5.
  • length_scale: float Length scale parameter for the Matérn kernel. Default is 1.0.
  • backend: str "cpu" or "gpu" (uses JAX if "gpu").
def fit(self, X, y):
144    def fit(self, X, y):
145        """
146        Fit the Kernel Ridge Regression model.
147
148        Parameters:
149        - X: array-like, shape (n_samples, n_features)
150            Training data.
151        - y: array-like, shape (n_samples,)
152            Target values.
153        """
154        # Standardize the inputs
155        X = self.scaler.fit_transform(X)
156        self.X_fit_ = X
157
158        # Center the response
159        self.y_mean_ = np.mean(y)
160        y_centered = y - self.y_mean_
161
162        n_samples = X.shape[0]
163
164        # Compute the kernel matrix
165        K = self._get_kernel(X, X)
166        self.K_ = K
167        self.y_fit_ = y_centered
168
169        if isinstance(self.alpha, (list, np.ndarray)):
170            # If alpha is a list or array, compute LOOE for each alpha
171            self.alphas_ = self.alpha  # Store the list of alphas
172            self.dual_coefs_ = []  # Store dual coefficients for each alpha
173            self.looe_ = []  # Store LOOE for each alpha
174
175            for alpha in self.alpha:
176                G = K + alpha * np.eye(n_samples)
177                G_inv = np.linalg.inv(G)
178                diag_G_inv = np.diag(G_inv)
179                dual_coef = np.linalg.solve(G, y_centered)
180                looe = np.sum((dual_coef / diag_G_inv) ** 2)  # Compute LOOE
181                self.dual_coefs_.append(dual_coef)
182                self.looe_.append(looe)
183
184            # Select the best alpha based on the smallest LOOE
185            best_index = np.argmin(self.looe_)
186            self.alpha_ = self.alpha[best_index]
187            self.dual_coef_ = self.dual_coefs_[best_index]
188        else:
189            # If alpha is a single value, proceed as usual
190            if self.backend == "gpu":
191                self.dual_coef_ = jnp.linalg.solve(
192                    K + self.alpha * jnp.eye(n_samples), y_centered
193                )
194            else:
195                self.dual_coef_ = np.linalg.solve(
196                    K + self.alpha * np.eye(n_samples), y_centered
197                )
198
199        return self

Fit the Kernel Ridge Regression model.

Parameters:

  • X: array-like, shape (n_samples, n_features) Training data.
  • y: array-like, shape (n_samples,) Target values.
def predict(self, X, probs=False):
201    def predict(self, X, probs=False):
202        """
203        Predict using the Kernel Ridge Regression model.
204
205        Parameters:
206        - X: array-like, shape (n_samples, n_features)
207            Test data.
208
209        Returns:
210        - Predicted values, shape (n_samples,).
211        """
212        # Standardize the inputs
213        X = self.scaler.transform(X)
214        K = self._get_kernel(X, self.X_fit_)
215        if self.backend == "gpu":
216            preds = jnp.dot(K, self.dual_coef_) + self.y_mean_
217            if probs:
218                # Compute similarity to self.X_fit_
219                similarities = jnp.dot(
220                    preds, self.X_fit_.T
221                )  # Shape: (n_samples, n_fit_)
222                # Apply softmax to get probabilities
223                return jaxsoftmax(similarities, axis=1)
224            return preds
225        else:
226            preds = np.dot(K, self.dual_coef_) + self.y_mean_
227            if probs:
228                # Compute similarity to self.X_fit_
229                similarities = np.dot(
230                    preds, self.X_fit_.T
231                )  # Shape: (n_samples, n_fit_)
232                # Apply softmax to get probabilities
233                return softmax(similarities, axis=1)
234            return preds

Predict using the Kernel Ridge Regression model.

Parameters:

  • X: array-like, shape (n_samples, n_features) Test data.

Returns:

  • Predicted values, shape (n_samples,).
class LazyClassifier(nnetsauce.LazyDeepClassifier):
757class LazyClassifier(LazyDeepClassifier):
758    """
759        Fitting -- almost -- all the classification algorithms with
760        nnetsauce's CustomClassifier and returning their scores (no layers).
761
762    Parameters:
763
764        verbose: int, optional (default=0)
765            Any positive number for verbosity.
766
767        ignore_warnings: bool, optional (default=True)
768            When set to True, the warning related to algorigms that are not able to run are ignored.
769
770        custom_metric: function, optional (default=None)
771            When function is provided, models are evaluated based on the custom evaluation metric provided.
772
773        predictions: bool, optional (default=False)
774            When set to True, the predictions of all the models models are returned as dataframe.
775
776        sort_by: string, optional (default='Accuracy')
777            Sort models by a metric. Available options are 'Accuracy', 'Balanced Accuracy', 'ROC AUC', 'F1 Score'
778            or a custom metric identified by its name and provided by custom_metric.
779
780        random_state: int, optional (default=42)
781            Reproducibiility seed.
782
783        estimators: list, optional (default='all')
784            list of Estimators names or just 'all' (default='all')
785
786        preprocess: bool
787            preprocessing is done when set to True
788
789        n_jobs : int, when possible, run in parallel
790            For now, only used by individual models that support it.
791
792        All the other parameters are the same as CustomClassifier's.
793
794    Attributes:
795
796        models_: dict-object
797            Returns a dictionary with each model pipeline as value
798            with key as name of models.
799
800        best_model_: object
801            Returns the best model pipeline based on the sort_by metric.
802
803    Examples:
804
805        import nnetsauce as ns
806        import numpy as np
807        from sklearn import datasets
808        from sklearn.utils import shuffle
809
810        dataset = datasets.load_iris()
811        X = dataset.data
812        y = dataset.target
813        X, y = shuffle(X, y, random_state=123)
814        X = X.astype(np.float32)
815        y = y.astype(np.float32)
816        X_train, X_test = X[:100], X[100:]
817        y_train, y_test = y[:100], y[100:]
818
819        clf = ns.LazyClassifier(verbose=0, ignore_warnings=True, custom_metric=None)
820        models, predictions = clf.fit(X_train, X_test, y_train, y_test)
821        model_dictionary = clf.provide_models(X_train,X_test,y_train,y_test)
822        print(models)
823
824    """
825
826    def __init__(
827        self,
828        verbose=0,
829        ignore_warnings=True,
830        custom_metric=None,
831        predictions=False,
832        sort_by="Accuracy",
833        random_state=42,
834        estimators="all",
835        preprocess=False,
836        n_jobs=None,
837        # CustomClassifier attributes
838        obj=None,
839        n_hidden_features=5,
840        activation_name="relu",
841        a=0.01,
842        nodes_sim="sobol",
843        bias=True,
844        dropout=0,
845        direct_link=True,
846        n_clusters=2,
847        cluster_encode=True,
848        type_clust="kmeans",
849        type_scaling=("std", "std", "std"),
850        col_sample=1,
851        row_sample=1,
852        seed=123,
853        backend="cpu",
854    ):
855        super().__init__(
856            verbose=verbose,
857            ignore_warnings=ignore_warnings,
858            custom_metric=custom_metric,
859            predictions=predictions,
860            sort_by=sort_by,
861            random_state=random_state,
862            estimators=estimators,
863            preprocess=preprocess,
864            n_jobs=n_jobs,
865            n_layers=1,
866            obj=obj,
867            n_hidden_features=n_hidden_features,
868            activation_name=activation_name,
869            a=a,
870            nodes_sim=nodes_sim,
871            bias=bias,
872            dropout=dropout,
873            direct_link=direct_link,
874            n_clusters=n_clusters,
875            cluster_encode=cluster_encode,
876            type_clust=type_clust,
877            type_scaling=type_scaling,
878            col_sample=col_sample,
879            row_sample=row_sample,
880            seed=seed,
881            backend=backend,
882        )

Fitting -- almost -- all the classification algorithms with nnetsauce's CustomClassifier and returning their scores (no layers).

Parameters:

verbose: int, optional (default=0)
    Any positive number for verbosity.

ignore_warnings: bool, optional (default=True)
    When set to True, the warning related to algorigms that are not able to run are ignored.

custom_metric: function, optional (default=None)
    When function is provided, models are evaluated based on the custom evaluation metric provided.

predictions: bool, optional (default=False)
    When set to True, the predictions of all the models models are returned as dataframe.

sort_by: string, optional (default='Accuracy')
    Sort models by a metric. Available options are 'Accuracy', 'Balanced Accuracy', 'ROC AUC', 'F1 Score'
    or a custom metric identified by its name and provided by custom_metric.

random_state: int, optional (default=42)
    Reproducibiility seed.

estimators: list, optional (default='all')
    list of Estimators names or just 'all' (default='all')

preprocess: bool
    preprocessing is done when set to True

n_jobs : int, when possible, run in parallel
    For now, only used by individual models that support it.

All the other parameters are the same as CustomClassifier's.

Attributes:

models_: dict-object
    Returns a dictionary with each model pipeline as value
    with key as name of models.

best_model_: object
    Returns the best model pipeline based on the sort_by metric.

Examples:

import nnetsauce as ns
import numpy as np
from sklearn import datasets
from sklearn.utils import shuffle

dataset = datasets.load_iris()
X = dataset.data
y = dataset.target
X, y = shuffle(X, y, random_state=123)
X = X.astype(np.float32)
y = y.astype(np.float32)
X_train, X_test = X[:100], X[100:]
y_train, y_test = y[:100], y[100:]

clf = ns.LazyClassifier(verbose=0, ignore_warnings=True, custom_metric=None)
models, predictions = clf.fit(X_train, X_test, y_train, y_test)
model_dictionary = clf.provide_models(X_train,X_test,y_train,y_test)
print(models)
class LazyRegressor(nnetsauce.LazyDeepRegressor):
654class LazyRegressor(LazyDeepRegressor):
655    """
656        Fitting -- almost -- all the regression algorithms with
657        nnetsauce's CustomRegressor and returning their scores.
658
659    Parameters:
660
661        verbose: int, optional (default=0)
662            Any positive number for verbosity.
663
664        ignore_warnings: bool, optional (default=True)
665            When set to True, the warning related to algorigms that are not able to run are ignored.
666
667        custom_metric: function, optional (default=None)
668            When function is provided, models are evaluated based on the custom evaluation metric provided.
669
670        predictions: bool, optional (default=False)
671            When set to True, the predictions of all the models models are returned as dataframe.
672
673        sort_by: string, optional (default='RMSE')
674            Sort models by a metric. Available options are 'R-Squared', 'Adjusted R-Squared', 'RMSE', 'Time Taken' and 'Custom Metric'.
675            or a custom metric identified by its name and provided by custom_metric.
676
677        random_state: int, optional (default=42)
678            Reproducibiility seed.
679
680        estimators: list, optional (default='all')
681            list of Estimators names or just 'all' (default='all')
682
683        preprocess: bool
684            preprocessing is done when set to True
685
686        n_jobs : int, when possible, run in parallel
687            For now, only used by individual models that support it.
688
689        All the other parameters are the same as CustomRegressor's.
690
691    Attributes:
692
693        models_: dict-object
694            Returns a dictionary with each model pipeline as value
695            with key as name of models.
696
697        best_model_: object
698            Returns the best model pipeline based on the sort_by metric.
699
700    Examples:
701
702        import nnetsauce as ns
703        import numpy as np
704        from sklearn import datasets
705        from sklearn.utils import shuffle
706
707        diabetes = datasets.load_diabetes()
708        X, y = shuffle(diabetes.data, diabetes.target, random_state=13)
709        X = X.astype(np.float32)
710
711        offset = int(X.shape[0] * 0.9)
712        X_train, y_train = X[:offset], y[:offset]
713        X_test, y_test = X[offset:], y[offset:]
714
715        reg = ns.LazyRegressor(verbose=0, ignore_warnings=False,
716                            custom_metric=None)
717        models, predictions = reg.fit(X_train, X_test, y_train, y_test)
718        print(models)
719
720    """
721
722    def __init__(
723        self,
724        verbose=0,
725        ignore_warnings=True,
726        custom_metric=None,
727        predictions=False,
728        sort_by="RMSE",
729        random_state=42,
730        estimators="all",
731        preprocess=False,
732        n_jobs=None,
733        # CustomRegressor attributes
734        obj=None,
735        n_hidden_features=5,
736        activation_name="relu",
737        a=0.01,
738        nodes_sim="sobol",
739        bias=True,
740        dropout=0,
741        direct_link=True,
742        n_clusters=2,
743        cluster_encode=True,
744        type_clust="kmeans",
745        type_scaling=("std", "std", "std"),
746        col_sample=1,
747        row_sample=1,
748        seed=123,
749        backend="cpu",
750    ):
751        super().__init__(
752            verbose=verbose,
753            ignore_warnings=ignore_warnings,
754            custom_metric=custom_metric,
755            predictions=predictions,
756            sort_by=sort_by,
757            random_state=random_state,
758            estimators=estimators,
759            preprocess=preprocess,
760            n_jobs=n_jobs,
761            n_layers=1,
762            obj=obj,
763            n_hidden_features=n_hidden_features,
764            activation_name=activation_name,
765            a=a,
766            nodes_sim=nodes_sim,
767            bias=bias,
768            dropout=dropout,
769            direct_link=direct_link,
770            n_clusters=n_clusters,
771            cluster_encode=cluster_encode,
772            type_clust=type_clust,
773            type_scaling=type_scaling,
774            col_sample=col_sample,
775            row_sample=row_sample,
776            seed=seed,
777            backend=backend,
778        )

Fitting -- almost -- all the regression algorithms with nnetsauce's CustomRegressor and returning their scores.

Parameters:

verbose: int, optional (default=0)
    Any positive number for verbosity.

ignore_warnings: bool, optional (default=True)
    When set to True, the warning related to algorigms that are not able to run are ignored.

custom_metric: function, optional (default=None)
    When function is provided, models are evaluated based on the custom evaluation metric provided.

predictions: bool, optional (default=False)
    When set to True, the predictions of all the models models are returned as dataframe.

sort_by: string, optional (default='RMSE')
    Sort models by a metric. Available options are 'R-Squared', 'Adjusted R-Squared', 'RMSE', 'Time Taken' and 'Custom Metric'.
    or a custom metric identified by its name and provided by custom_metric.

random_state: int, optional (default=42)
    Reproducibiility seed.

estimators: list, optional (default='all')
    list of Estimators names or just 'all' (default='all')

preprocess: bool
    preprocessing is done when set to True

n_jobs : int, when possible, run in parallel
    For now, only used by individual models that support it.

All the other parameters are the same as CustomRegressor's.

Attributes:

models_: dict-object
    Returns a dictionary with each model pipeline as value
    with key as name of models.

best_model_: object
    Returns the best model pipeline based on the sort_by metric.

Examples:

import nnetsauce as ns
import numpy as np
from sklearn import datasets
from sklearn.utils import shuffle

diabetes = datasets.load_diabetes()
X, y = shuffle(diabetes.data, diabetes.target, random_state=13)
X = X.astype(np.float32)

offset = int(X.shape[0] * 0.9)
X_train, y_train = X[:offset], y[:offset]
X_test, y_test = X[offset:], y[offset:]

reg = ns.LazyRegressor(verbose=0, ignore_warnings=False,
                    custom_metric=None)
models, predictions = reg.fit(X_train, X_test, y_train, y_test)
print(models)
class LazyDeepClassifier(nnetsauce.custom.custom.Custom, sklearn.base.ClassifierMixin):
 94class LazyDeepClassifier(Custom, ClassifierMixin):
 95    """
 96
 97    Fitting -- almost -- all the classification algorithms with layers of
 98    nnetsauce's CustomClassifier and returning their scores.
 99
100    Parameters:
101
102        verbose: int, optional (default=0)
103            Any positive number for verbosity.
104
105        ignore_warnings: bool, optional (default=True)
106            When set to True, the warning related to algorigms that are not
107            able to run are ignored.
108
109        custom_metric: function, optional (default=None)
110            When function is provided, models are evaluated based on the custom
111              evaluation metric provided.
112
113        predictions: bool, optional (default=False)
114            When set to True, the predictions of all the models models are
115            returned as data frame.
116
117        sort_by: string, optional (default='Accuracy')
118            Sort models by a metric. Available options are 'Accuracy',
119            'Balanced Accuracy', 'ROC AUC', 'F1 Score' or a custom metric
120            identified by its name and provided by custom_metric.
121
122        random_state: int, optional (default=42)
123            Reproducibiility seed.
124
125        estimators: list, optional (default='all')
126            list of Estimators names or just 'all' for > 90 classifiers
127            (default='all')
128
129        preprocess: bool, preprocessing is done when set to True
130
131        n_jobs: int, when possible, run in parallel
132            For now, only used by individual models that support it.
133
134        n_layers: int, optional (default=3)
135            Number of layers of CustomClassifiers to be used.
136
137        All the other parameters are the same as CustomClassifier's.
138
139    Attributes:
140
141        models_: dict-object
142            Returns a dictionary with each model pipeline as value
143            with key as name of models.
144
145        best_model_: object
146            Returns the best model pipeline.
147
148    Examples
149
150        ```python
151        import nnetsauce as ns
152        from sklearn.datasets import load_breast_cancer
153        from sklearn.model_selection import train_test_split
154        data = load_breast_cancer()
155        X = data.data
156        y= data.target
157        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2,
158            random_state=123)
159        clf = ns.LazyDeepClassifier(verbose=0, ignore_warnings=True, custom_metric=None)
160        models, predictions = clf.fit(X_train, X_test, y_train, y_test)
161        model_dictionary = clf.provide_models(X_train,X_test,y_train,y_test)
162        print(models)
163        ```
164
165    """
166
167    def __init__(
168        self,
169        verbose=0,
170        ignore_warnings=True,
171        custom_metric=None,
172        predictions=False,
173        sort_by="Accuracy",
174        random_state=42,
175        estimators="all",
176        preprocess=False,
177        n_jobs=None,
178        # Defining depth
179        n_layers=3,
180        # CustomClassifier attributes
181        obj=None,
182        n_hidden_features=5,
183        activation_name="relu",
184        a=0.01,
185        nodes_sim="sobol",
186        bias=True,
187        dropout=0,
188        direct_link=True,
189        n_clusters=2,
190        cluster_encode=True,
191        type_clust="kmeans",
192        type_scaling=("std", "std", "std"),
193        col_sample=1,
194        row_sample=1,
195        seed=123,
196        backend="cpu",
197    ):
198        self.verbose = verbose
199        self.ignore_warnings = ignore_warnings
200        self.custom_metric = custom_metric
201        self.predictions = predictions
202        self.sort_by = sort_by
203        self.models_ = {}
204        self.best_model_ = None
205        self.random_state = random_state
206        self.estimators = estimators
207        self.preprocess = preprocess
208        self.n_layers = n_layers - 1
209        self.n_jobs = n_jobs
210        super().__init__(
211            obj=obj,
212            n_hidden_features=n_hidden_features,
213            activation_name=activation_name,
214            a=a,
215            nodes_sim=nodes_sim,
216            bias=bias,
217            dropout=dropout,
218            direct_link=direct_link,
219            n_clusters=n_clusters,
220            cluster_encode=cluster_encode,
221            type_clust=type_clust,
222            type_scaling=type_scaling,
223            col_sample=col_sample,
224            row_sample=row_sample,
225            seed=seed,
226            backend=backend,
227        )
228
229    def fit(self, X_train, X_test, y_train, y_test):
230        """Fit classifiers to X_train and y_train, predict and score on X_test,
231        y_test.
232
233        Parameters:
234
235            X_train: array-like,
236                Training vectors, where rows is the number of samples
237                and columns is the number of features.
238
239            X_test: array-like,
240                Testing vectors, where rows is the number of samples
241                and columns is the number of features.
242
243            y_train: array-like,
244                Training vectors, where rows is the number of samples
245                and columns is the number of features.
246
247            y_test: array-like,
248                Testing vectors, where rows is the number of samples
249                and columns is the number of features.
250
251        Returns:
252
253            scores: Pandas DataFrame
254                Returns metrics of all the models in a Pandas DataFrame.
255
256            predictions: Pandas DataFrame
257                Returns predictions of all the models in a Pandas DataFrame.
258        """
259        Accuracy = []
260        B_Accuracy = []
261        ROC_AUC = []
262        F1 = []
263        names = []
264        TIME = []
265        predictions = {}
266
267        if self.custom_metric is not None:
268            CUSTOM_METRIC = []
269
270        if isinstance(X_train, np.ndarray):
271            X_train = pd.DataFrame(X_train)
272            X_test = pd.DataFrame(X_test)
273
274        numeric_features = X_train.select_dtypes(include=[np.number]).columns
275        categorical_features = X_train.select_dtypes(include=["object"]).columns
276
277        categorical_low, categorical_high = get_card_split(
278            X_train, categorical_features
279        )
280
281        if self.preprocess is True:
282            preprocessor = ColumnTransformer(
283                transformers=[
284                    ("numeric", numeric_transformer, numeric_features),
285                    (
286                        "categorical_low",
287                        categorical_transformer_low,
288                        categorical_low,
289                    ),
290                    (
291                        "categorical_high",
292                        categorical_transformer_high,
293                        categorical_high,
294                    ),
295                ]
296            )
297
298        # baseline models
299        try:
300            baseline_names = ["RandomForestClassifier", "XGBClassifier"]
301            baseline_models = [RandomForestClassifier(), xgb.XGBClassifier()]
302        except Exception as exception:
303            baseline_names = ["RandomForestClassifier"]
304            baseline_models = [RandomForestClassifier()]
305
306        for name, model in zip(baseline_names, baseline_models):
307            start = time.time()
308            try:
309                model.fit(X_train, y_train)
310                self.models_[name] = model
311                y_pred = model.predict(X_test)
312                accuracy = accuracy_score(y_test, y_pred, normalize=True)
313                b_accuracy = balanced_accuracy_score(y_test, y_pred)
314                f1 = f1_score(y_test, y_pred, average="weighted")
315                try:
316                    roc_auc = roc_auc_score(y_test, y_pred)
317                except Exception as exception:
318                    roc_auc = None
319                    if self.ignore_warnings is False:
320                        print("ROC AUC couldn't be calculated for " + name)
321                        print(exception)
322                names.append(name)
323                Accuracy.append(accuracy)
324                B_Accuracy.append(b_accuracy)
325                ROC_AUC.append(roc_auc)
326                F1.append(f1)
327                TIME.append(time.time() - start)
328                if self.custom_metric is not None:
329                    custom_metric = self.custom_metric(y_test, y_pred)
330                    CUSTOM_METRIC.append(custom_metric)
331                if self.verbose > 0:
332                    if self.custom_metric is not None:
333                        print(
334                            {
335                                "Model": name,
336                                "Accuracy": accuracy,
337                                "Balanced Accuracy": b_accuracy,
338                                "ROC AUC": roc_auc,
339                                "F1 Score": f1,
340                                self.custom_metric.__name__: custom_metric,
341                                "Time taken": time.time() - start,
342                            }
343                        )
344                    else:
345                        print(
346                            {
347                                "Model": name,
348                                "Accuracy": accuracy,
349                                "Balanced Accuracy": b_accuracy,
350                                "ROC AUC": roc_auc,
351                                "F1 Score": f1,
352                                "Time taken": time.time() - start,
353                            }
354                        )
355                if self.predictions:
356                    predictions[name] = y_pred
357            except Exception as exception:
358                if self.ignore_warnings is False:
359                    print(name + " model failed to execute")
360                    print(exception)
361
362        if self.estimators == "all":
363            self.classifiers = [
364                item
365                for sublist in [
366                    DEEPCLASSIFIERS,
367                    DEEPMULTITASKCLASSIFIERS,
368                    DEEPSIMPLEMULTITASKCLASSIFIERS,
369                ]
370                for item in sublist
371            ]
372        else:
373            self.classifiers = (
374                [
375                    ("DeepCustomClassifier(" + est[0] + ")", est[1])
376                    for est in all_estimators()
377                    if (
378                        issubclass(est[1], ClassifierMixin)
379                        and (est[0] in self.estimators)
380                    )
381                ]
382                + [
383                    (
384                        "DeepMultitaskClassifier(" + est[0] + ")",
385                        partial(MultitaskClassifier, obj=est[1]()),
386                    )
387                    for est in all_estimators()
388                    if (
389                        issubclass(est[1], RegressorMixin)
390                        and (est[0] in self.estimators)
391                    )
392                ]
393                + [
394                    (
395                        "DeepSimpleMultitaskClassifier(" + est[0] + ")",
396                        partial(SimpleMultitaskClassifier, obj=est[1]()),
397                    )
398                    for est in all_estimators()
399                    if (
400                        issubclass(est[1], RegressorMixin)
401                        and (est[0] in self.estimators)
402                    )
403                ]
404            )
405
406        if self.preprocess is True:
407            for name, model in tqdm(self.classifiers):  # do parallel exec
408                other_args = (
409                    {}
410                )  # use this trick for `random_state` too --> refactor
411                try:
412                    if (
413                        "n_jobs" in model().get_params().keys()
414                        and name.find("LogisticRegression") == -1
415                    ):
416                        other_args["n_jobs"] = self.n_jobs
417                except Exception:
418                    pass
419
420                start = time.time()
421
422                try:
423                    if "random_state" in model().get_params().keys():
424                        layer_clf = CustomClassifier(
425                            obj=model(random_state=self.random_state),
426                            n_hidden_features=self.n_hidden_features,
427                            activation_name=self.activation_name,
428                            a=self.a,
429                            nodes_sim=self.nodes_sim,
430                            bias=self.bias,
431                            dropout=self.dropout,
432                            direct_link=self.direct_link,
433                            n_clusters=self.n_clusters,
434                            cluster_encode=self.cluster_encode,
435                            type_clust=self.type_clust,
436                            type_scaling=self.type_scaling,
437                            col_sample=self.col_sample,
438                            row_sample=self.row_sample,
439                            seed=self.seed,
440                            backend=self.backend,
441                            cv_calibration=None,
442                        )
443
444                    else:
445                        layer_clf = CustomClassifier(
446                            obj=model(),
447                            n_hidden_features=self.n_hidden_features,
448                            activation_name=self.activation_name,
449                            a=self.a,
450                            nodes_sim=self.nodes_sim,
451                            bias=self.bias,
452                            dropout=self.dropout,
453                            direct_link=self.direct_link,
454                            n_clusters=self.n_clusters,
455                            cluster_encode=self.cluster_encode,
456                            type_clust=self.type_clust,
457                            type_scaling=self.type_scaling,
458                            col_sample=self.col_sample,
459                            row_sample=self.row_sample,
460                            seed=self.seed,
461                            backend=self.backend,
462                            cv_calibration=None,
463                        )
464
465                    layer_clf.fit(X_train, y_train)
466
467                    for _ in range(self.n_layers):
468                        layer_clf = deepcopy(
469                            CustomClassifier(
470                                obj=layer_clf,
471                                n_hidden_features=self.n_hidden_features,
472                                activation_name=self.activation_name,
473                                a=self.a,
474                                nodes_sim=self.nodes_sim,
475                                bias=self.bias,
476                                dropout=self.dropout,
477                                direct_link=self.direct_link,
478                                n_clusters=self.n_clusters,
479                                cluster_encode=self.cluster_encode,
480                                type_clust=self.type_clust,
481                                type_scaling=self.type_scaling,
482                                col_sample=self.col_sample,
483                                row_sample=self.row_sample,
484                                seed=self.seed,
485                                backend=self.backend,
486                                cv_calibration=None,
487                            )
488                        )
489
490                    pipe = Pipeline(
491                        [
492                            ("preprocessor", preprocessor),
493                            ("classifier", layer_clf),
494                        ]
495                    )
496
497                    pipe.fit(X_train, y_train)
498                    self.models_[name] = pipe
499                    y_pred = pipe.predict(X_test)
500                    accuracy = accuracy_score(y_test, y_pred, normalize=True)
501                    b_accuracy = balanced_accuracy_score(y_test, y_pred)
502                    f1 = f1_score(y_test, y_pred, average="weighted")
503                    try:
504                        roc_auc = roc_auc_score(y_test, y_pred)
505                    except Exception as exception:
506                        roc_auc = None
507                        if self.ignore_warnings is False:
508                            print("ROC AUC couldn't be calculated for " + name)
509                            print(exception)
510                    names.append(name)
511                    Accuracy.append(accuracy)
512                    B_Accuracy.append(b_accuracy)
513                    ROC_AUC.append(roc_auc)
514                    F1.append(f1)
515                    TIME.append(time.time() - start)
516                    if self.custom_metric is not None:
517                        custom_metric = self.custom_metric(y_test, y_pred)
518                        CUSTOM_METRIC.append(custom_metric)
519                    if self.verbose > 0:
520                        if self.custom_metric is not None:
521                            print(
522                                {
523                                    "Model": name,
524                                    "Accuracy": accuracy,
525                                    "Balanced Accuracy": b_accuracy,
526                                    "ROC AUC": roc_auc,
527                                    "F1 Score": f1,
528                                    self.custom_metric.__name__: custom_metric,
529                                    "Time taken": time.time() - start,
530                                }
531                            )
532                        else:
533                            print(
534                                {
535                                    "Model": name,
536                                    "Accuracy": accuracy,
537                                    "Balanced Accuracy": b_accuracy,
538                                    "ROC AUC": roc_auc,
539                                    "F1 Score": f1,
540                                    "Time taken": time.time() - start,
541                                }
542                            )
543                    if self.predictions:
544                        predictions[name] = y_pred
545                except Exception as exception:
546                    if self.ignore_warnings is False:
547                        print(name + " model failed to execute")
548                        print(exception)
549
550        else:  # no preprocessing
551            for name, model in tqdm(self.classifiers):  # do parallel exec
552                start = time.time()
553                try:
554                    if "random_state" in model().get_params().keys():
555                        layer_clf = CustomClassifier(
556                            obj=model(random_state=self.random_state),
557                            n_hidden_features=self.n_hidden_features,
558                            activation_name=self.activation_name,
559                            a=self.a,
560                            nodes_sim=self.nodes_sim,
561                            bias=self.bias,
562                            dropout=self.dropout,
563                            direct_link=self.direct_link,
564                            n_clusters=self.n_clusters,
565                            cluster_encode=self.cluster_encode,
566                            type_clust=self.type_clust,
567                            type_scaling=self.type_scaling,
568                            col_sample=self.col_sample,
569                            row_sample=self.row_sample,
570                            seed=self.seed,
571                            backend=self.backend,
572                            cv_calibration=None,
573                        )
574
575                    else:
576                        layer_clf = CustomClassifier(
577                            obj=model(),
578                            n_hidden_features=self.n_hidden_features,
579                            activation_name=self.activation_name,
580                            a=self.a,
581                            nodes_sim=self.nodes_sim,
582                            bias=self.bias,
583                            dropout=self.dropout,
584                            direct_link=self.direct_link,
585                            n_clusters=self.n_clusters,
586                            cluster_encode=self.cluster_encode,
587                            type_clust=self.type_clust,
588                            type_scaling=self.type_scaling,
589                            col_sample=self.col_sample,
590                            row_sample=self.row_sample,
591                            seed=self.seed,
592                            backend=self.backend,
593                            cv_calibration=None,
594                        )
595
596                    layer_clf.fit(X_train, y_train)
597
598                    for _ in range(self.n_layers):
599                        layer_clf = deepcopy(
600                            CustomClassifier(
601                                obj=layer_clf,
602                                n_hidden_features=self.n_hidden_features,
603                                activation_name=self.activation_name,
604                                a=self.a,
605                                nodes_sim=self.nodes_sim,
606                                bias=self.bias,
607                                dropout=self.dropout,
608                                direct_link=self.direct_link,
609                                n_clusters=self.n_clusters,
610                                cluster_encode=self.cluster_encode,
611                                type_clust=self.type_clust,
612                                type_scaling=self.type_scaling,
613                                col_sample=self.col_sample,
614                                row_sample=self.row_sample,
615                                seed=self.seed,
616                                backend=self.backend,
617                                cv_calibration=None,
618                            )
619                        )
620
621                        # layer_clf.fit(X_train, y_train)
622
623                    layer_clf.fit(X_train, y_train)
624
625                    self.models_[name] = layer_clf
626                    y_pred = layer_clf.predict(X_test)
627                    accuracy = accuracy_score(y_test, y_pred, normalize=True)
628                    b_accuracy = balanced_accuracy_score(y_test, y_pred)
629                    f1 = f1_score(y_test, y_pred, average="weighted")
630                    try:
631                        roc_auc = roc_auc_score(y_test, y_pred)
632                    except Exception as exception:
633                        roc_auc = None
634                        if self.ignore_warnings is False:
635                            print("ROC AUC couldn't be calculated for " + name)
636                            print(exception)
637                    names.append(name)
638                    Accuracy.append(accuracy)
639                    B_Accuracy.append(b_accuracy)
640                    ROC_AUC.append(roc_auc)
641                    F1.append(f1)
642                    TIME.append(time.time() - start)
643                    if self.custom_metric is not None:
644                        custom_metric = self.custom_metric(y_test, y_pred)
645                        CUSTOM_METRIC.append(custom_metric)
646                    if self.verbose > 0:
647                        if self.custom_metric is not None:
648                            print(
649                                {
650                                    "Model": name,
651                                    "Accuracy": accuracy,
652                                    "Balanced Accuracy": b_accuracy,
653                                    "ROC AUC": roc_auc,
654                                    "F1 Score": f1,
655                                    self.custom_metric.__name__: custom_metric,
656                                    "Time taken": time.time() - start,
657                                }
658                            )
659                        else:
660                            print(
661                                {
662                                    "Model": name,
663                                    "Accuracy": accuracy,
664                                    "Balanced Accuracy": b_accuracy,
665                                    "ROC AUC": roc_auc,
666                                    "F1 Score": f1,
667                                    "Time taken": time.time() - start,
668                                }
669                            )
670                    if self.predictions:
671                        predictions[name] = y_pred
672                except Exception as exception:
673                    if self.ignore_warnings is False:
674                        print(name + " model failed to execute")
675                        print(exception)
676
677        if self.custom_metric is None:
678            scores = pd.DataFrame(
679                {
680                    "Model": names,
681                    "Accuracy": Accuracy,
682                    "Balanced Accuracy": B_Accuracy,
683                    "ROC AUC": ROC_AUC,
684                    "F1 Score": F1,
685                    "Time Taken": TIME,
686                }
687            )
688        else:
689            scores = pd.DataFrame(
690                {
691                    "Model": names,
692                    "Accuracy": Accuracy,
693                    "Balanced Accuracy": B_Accuracy,
694                    "ROC AUC": ROC_AUC,
695                    "F1 Score": F1,
696                    "Custom metric": CUSTOM_METRIC,
697                    "Time Taken": TIME,
698                }
699            )
700        scores = scores.sort_values(by=self.sort_by, ascending=False).set_index(
701            "Model"
702        )
703
704        self.best_model_ = self.models_[scores.index[0]]
705
706        if self.predictions is True:
707            return scores, predictions
708
709        return scores
710
711    def get_best_model(self):
712        """
713        This function returns the best model pipeline based on the sort_by metric.
714
715        Returns:
716
717            best_model: object,
718                Returns the best model pipeline based on the sort_by metric.
719
720        """
721        return self.best_model_
722
723    def provide_models(self, X_train, X_test, y_train, y_test):
724        """Returns all the model objects trained. If fit hasn't been called yet,
725        then it's called to return the models.
726
727        Parameters:
728
729        X_train: array-like,
730            Training vectors, where rows is the number of samples
731            and columns is the number of features.
732
733        X_test: array-like,
734            Testing vectors, where rows is the number of samples
735            and columns is the number of features.
736
737        y_train: array-like,
738            Training vectors, where rows is the number of samples
739            and columns is the number of features.
740
741        y_test: array-like,
742            Testing vectors, where rows is the number of samples
743            and columns is the number of features.
744
745        Returns:
746
747            models: dict-object,
748                Returns a dictionary with each model's pipeline as value
749                and key = name of the model.
750        """
751        if len(self.models_.keys()) == 0:
752            self.fit(X_train, X_test, y_train, y_test)
753
754        return self.models_

Fitting -- almost -- all the classification algorithms with layers of nnetsauce's CustomClassifier and returning their scores.

Parameters:

verbose: int, optional (default=0)
    Any positive number for verbosity.

ignore_warnings: bool, optional (default=True)
    When set to True, the warning related to algorigms that are not
    able to run are ignored.

custom_metric: function, optional (default=None)
    When function is provided, models are evaluated based on the custom
      evaluation metric provided.

predictions: bool, optional (default=False)
    When set to True, the predictions of all the models models are
    returned as data frame.

sort_by: string, optional (default='Accuracy')
    Sort models by a metric. Available options are 'Accuracy',
    'Balanced Accuracy', 'ROC AUC', 'F1 Score' or a custom metric
    identified by its name and provided by custom_metric.

random_state: int, optional (default=42)
    Reproducibiility seed.

estimators: list, optional (default='all')
    list of Estimators names or just 'all' for > 90 classifiers
    (default='all')

preprocess: bool, preprocessing is done when set to True

n_jobs: int, when possible, run in parallel
    For now, only used by individual models that support it.

n_layers: int, optional (default=3)
    Number of layers of CustomClassifiers to be used.

All the other parameters are the same as CustomClassifier's.

Attributes:

models_: dict-object
    Returns a dictionary with each model pipeline as value
    with key as name of models.

best_model_: object
    Returns the best model pipeline.

Examples

import nnetsauce as ns
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
data = load_breast_cancer()
X = data.data
y= data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2,
    random_state=123)
clf = ns.LazyDeepClassifier(verbose=0, ignore_warnings=True, custom_metric=None)
models, predictions = clf.fit(X_train, X_test, y_train, y_test)
model_dictionary = clf.provide_models(X_train,X_test,y_train,y_test)
print(models)
def fit(self, X_train, X_test, y_train, y_test):
229    def fit(self, X_train, X_test, y_train, y_test):
230        """Fit classifiers to X_train and y_train, predict and score on X_test,
231        y_test.
232
233        Parameters:
234
235            X_train: array-like,
236                Training vectors, where rows is the number of samples
237                and columns is the number of features.
238
239            X_test: array-like,
240                Testing vectors, where rows is the number of samples
241                and columns is the number of features.
242
243            y_train: array-like,
244                Training vectors, where rows is the number of samples
245                and columns is the number of features.
246
247            y_test: array-like,
248                Testing vectors, where rows is the number of samples
249                and columns is the number of features.
250
251        Returns:
252
253            scores: Pandas DataFrame
254                Returns metrics of all the models in a Pandas DataFrame.
255
256            predictions: Pandas DataFrame
257                Returns predictions of all the models in a Pandas DataFrame.
258        """
259        Accuracy = []
260        B_Accuracy = []
261        ROC_AUC = []
262        F1 = []
263        names = []
264        TIME = []
265        predictions = {}
266
267        if self.custom_metric is not None:
268            CUSTOM_METRIC = []
269
270        if isinstance(X_train, np.ndarray):
271            X_train = pd.DataFrame(X_train)
272            X_test = pd.DataFrame(X_test)
273
274        numeric_features = X_train.select_dtypes(include=[np.number]).columns
275        categorical_features = X_train.select_dtypes(include=["object"]).columns
276
277        categorical_low, categorical_high = get_card_split(
278            X_train, categorical_features
279        )
280
281        if self.preprocess is True:
282            preprocessor = ColumnTransformer(
283                transformers=[
284                    ("numeric", numeric_transformer, numeric_features),
285                    (
286                        "categorical_low",
287                        categorical_transformer_low,
288                        categorical_low,
289                    ),
290                    (
291                        "categorical_high",
292                        categorical_transformer_high,
293                        categorical_high,
294                    ),
295                ]
296            )
297
298        # baseline models
299        try:
300            baseline_names = ["RandomForestClassifier", "XGBClassifier"]
301            baseline_models = [RandomForestClassifier(), xgb.XGBClassifier()]
302        except Exception as exception:
303            baseline_names = ["RandomForestClassifier"]
304            baseline_models = [RandomForestClassifier()]
305
306        for name, model in zip(baseline_names, baseline_models):
307            start = time.time()
308            try:
309                model.fit(X_train, y_train)
310                self.models_[name] = model
311                y_pred = model.predict(X_test)
312                accuracy = accuracy_score(y_test, y_pred, normalize=True)
313                b_accuracy = balanced_accuracy_score(y_test, y_pred)
314                f1 = f1_score(y_test, y_pred, average="weighted")
315                try:
316                    roc_auc = roc_auc_score(y_test, y_pred)
317                except Exception as exception:
318                    roc_auc = None
319                    if self.ignore_warnings is False:
320                        print("ROC AUC couldn't be calculated for " + name)
321                        print(exception)
322                names.append(name)
323                Accuracy.append(accuracy)
324                B_Accuracy.append(b_accuracy)
325                ROC_AUC.append(roc_auc)
326                F1.append(f1)
327                TIME.append(time.time() - start)
328                if self.custom_metric is not None:
329                    custom_metric = self.custom_metric(y_test, y_pred)
330                    CUSTOM_METRIC.append(custom_metric)
331                if self.verbose > 0:
332                    if self.custom_metric is not None:
333                        print(
334                            {
335                                "Model": name,
336                                "Accuracy": accuracy,
337                                "Balanced Accuracy": b_accuracy,
338                                "ROC AUC": roc_auc,
339                                "F1 Score": f1,
340                                self.custom_metric.__name__: custom_metric,
341                                "Time taken": time.time() - start,
342                            }
343                        )
344                    else:
345                        print(
346                            {
347                                "Model": name,
348                                "Accuracy": accuracy,
349                                "Balanced Accuracy": b_accuracy,
350                                "ROC AUC": roc_auc,
351                                "F1 Score": f1,
352                                "Time taken": time.time() - start,
353                            }
354                        )
355                if self.predictions:
356                    predictions[name] = y_pred
357            except Exception as exception:
358                if self.ignore_warnings is False:
359                    print(name + " model failed to execute")
360                    print(exception)
361
362        if self.estimators == "all":
363            self.classifiers = [
364                item
365                for sublist in [
366                    DEEPCLASSIFIERS,
367                    DEEPMULTITASKCLASSIFIERS,
368                    DEEPSIMPLEMULTITASKCLASSIFIERS,
369                ]
370                for item in sublist
371            ]
372        else:
373            self.classifiers = (
374                [
375                    ("DeepCustomClassifier(" + est[0] + ")", est[1])
376                    for est in all_estimators()
377                    if (
378                        issubclass(est[1], ClassifierMixin)
379                        and (est[0] in self.estimators)
380                    )
381                ]
382                + [
383                    (
384                        "DeepMultitaskClassifier(" + est[0] + ")",
385                        partial(MultitaskClassifier, obj=est[1]()),
386                    )
387                    for est in all_estimators()
388                    if (
389                        issubclass(est[1], RegressorMixin)
390                        and (est[0] in self.estimators)
391                    )
392                ]
393                + [
394                    (
395                        "DeepSimpleMultitaskClassifier(" + est[0] + ")",
396                        partial(SimpleMultitaskClassifier, obj=est[1]()),
397                    )
398                    for est in all_estimators()
399                    if (
400                        issubclass(est[1], RegressorMixin)
401                        and (est[0] in self.estimators)
402                    )
403                ]
404            )
405
406        if self.preprocess is True:
407            for name, model in tqdm(self.classifiers):  # do parallel exec
408                other_args = (
409                    {}
410                )  # use this trick for `random_state` too --> refactor
411                try:
412                    if (
413                        "n_jobs" in model().get_params().keys()
414                        and name.find("LogisticRegression") == -1
415                    ):
416                        other_args["n_jobs"] = self.n_jobs
417                except Exception:
418                    pass
419
420                start = time.time()
421
422                try:
423                    if "random_state" in model().get_params().keys():
424                        layer_clf = CustomClassifier(
425                            obj=model(random_state=self.random_state),
426                            n_hidden_features=self.n_hidden_features,
427                            activation_name=self.activation_name,
428                            a=self.a,
429                            nodes_sim=self.nodes_sim,
430                            bias=self.bias,
431                            dropout=self.dropout,
432                            direct_link=self.direct_link,
433                            n_clusters=self.n_clusters,
434                            cluster_encode=self.cluster_encode,
435                            type_clust=self.type_clust,
436                            type_scaling=self.type_scaling,
437                            col_sample=self.col_sample,
438                            row_sample=self.row_sample,
439                            seed=self.seed,
440                            backend=self.backend,
441                            cv_calibration=None,
442                        )
443
444                    else:
445                        layer_clf = CustomClassifier(
446                            obj=model(),
447                            n_hidden_features=self.n_hidden_features,
448                            activation_name=self.activation_name,
449                            a=self.a,
450                            nodes_sim=self.nodes_sim,
451                            bias=self.bias,
452                            dropout=self.dropout,
453                            direct_link=self.direct_link,
454                            n_clusters=self.n_clusters,
455                            cluster_encode=self.cluster_encode,
456                            type_clust=self.type_clust,
457                            type_scaling=self.type_scaling,
458                            col_sample=self.col_sample,
459                            row_sample=self.row_sample,
460                            seed=self.seed,
461                            backend=self.backend,
462                            cv_calibration=None,
463                        )
464
465                    layer_clf.fit(X_train, y_train)
466
467                    for _ in range(self.n_layers):
468                        layer_clf = deepcopy(
469                            CustomClassifier(
470                                obj=layer_clf,
471                                n_hidden_features=self.n_hidden_features,
472                                activation_name=self.activation_name,
473                                a=self.a,
474                                nodes_sim=self.nodes_sim,
475                                bias=self.bias,
476                                dropout=self.dropout,
477                                direct_link=self.direct_link,
478                                n_clusters=self.n_clusters,
479                                cluster_encode=self.cluster_encode,
480                                type_clust=self.type_clust,
481                                type_scaling=self.type_scaling,
482                                col_sample=self.col_sample,
483                                row_sample=self.row_sample,
484                                seed=self.seed,
485                                backend=self.backend,
486                                cv_calibration=None,
487                            )
488                        )
489
490                    pipe = Pipeline(
491                        [
492                            ("preprocessor", preprocessor),
493                            ("classifier", layer_clf),
494                        ]
495                    )
496
497                    pipe.fit(X_train, y_train)
498                    self.models_[name] = pipe
499                    y_pred = pipe.predict(X_test)
500                    accuracy = accuracy_score(y_test, y_pred, normalize=True)
501                    b_accuracy = balanced_accuracy_score(y_test, y_pred)
502                    f1 = f1_score(y_test, y_pred, average="weighted")
503                    try:
504                        roc_auc = roc_auc_score(y_test, y_pred)
505                    except Exception as exception:
506                        roc_auc = None
507                        if self.ignore_warnings is False:
508                            print("ROC AUC couldn't be calculated for " + name)
509                            print(exception)
510                    names.append(name)
511                    Accuracy.append(accuracy)
512                    B_Accuracy.append(b_accuracy)
513                    ROC_AUC.append(roc_auc)
514                    F1.append(f1)
515                    TIME.append(time.time() - start)
516                    if self.custom_metric is not None:
517                        custom_metric = self.custom_metric(y_test, y_pred)
518                        CUSTOM_METRIC.append(custom_metric)
519                    if self.verbose > 0:
520                        if self.custom_metric is not None:
521                            print(
522                                {
523                                    "Model": name,
524                                    "Accuracy": accuracy,
525                                    "Balanced Accuracy": b_accuracy,
526                                    "ROC AUC": roc_auc,
527                                    "F1 Score": f1,
528                                    self.custom_metric.__name__: custom_metric,
529                                    "Time taken": time.time() - start,
530                                }
531                            )
532                        else:
533                            print(
534                                {
535                                    "Model": name,
536                                    "Accuracy": accuracy,
537                                    "Balanced Accuracy": b_accuracy,
538                                    "ROC AUC": roc_auc,
539                                    "F1 Score": f1,
540                                    "Time taken": time.time() - start,
541                                }
542                            )
543                    if self.predictions:
544                        predictions[name] = y_pred
545                except Exception as exception:
546                    if self.ignore_warnings is False:
547                        print(name + " model failed to execute")
548                        print(exception)
549
550        else:  # no preprocessing
551            for name, model in tqdm(self.classifiers):  # do parallel exec
552                start = time.time()
553                try:
554                    if "random_state" in model().get_params().keys():
555                        layer_clf = CustomClassifier(
556                            obj=model(random_state=self.random_state),
557                            n_hidden_features=self.n_hidden_features,
558                            activation_name=self.activation_name,
559                            a=self.a,
560                            nodes_sim=self.nodes_sim,
561                            bias=self.bias,
562                            dropout=self.dropout,
563                            direct_link=self.direct_link,
564                            n_clusters=self.n_clusters,
565                            cluster_encode=self.cluster_encode,
566                            type_clust=self.type_clust,
567                            type_scaling=self.type_scaling,
568                            col_sample=self.col_sample,
569                            row_sample=self.row_sample,
570                            seed=self.seed,
571                            backend=self.backend,
572                            cv_calibration=None,
573                        )
574
575                    else:
576                        layer_clf = CustomClassifier(
577                            obj=model(),
578                            n_hidden_features=self.n_hidden_features,
579                            activation_name=self.activation_name,
580                            a=self.a,
581                            nodes_sim=self.nodes_sim,
582                            bias=self.bias,
583                            dropout=self.dropout,
584                            direct_link=self.direct_link,
585                            n_clusters=self.n_clusters,
586                            cluster_encode=self.cluster_encode,
587                            type_clust=self.type_clust,
588                            type_scaling=self.type_scaling,
589                            col_sample=self.col_sample,
590                            row_sample=self.row_sample,
591                            seed=self.seed,
592                            backend=self.backend,
593                            cv_calibration=None,
594                        )
595
596                    layer_clf.fit(X_train, y_train)
597
598                    for _ in range(self.n_layers):
599                        layer_clf = deepcopy(
600                            CustomClassifier(
601                                obj=layer_clf,
602                                n_hidden_features=self.n_hidden_features,
603                                activation_name=self.activation_name,
604                                a=self.a,
605                                nodes_sim=self.nodes_sim,
606                                bias=self.bias,
607                                dropout=self.dropout,
608                                direct_link=self.direct_link,
609                                n_clusters=self.n_clusters,
610                                cluster_encode=self.cluster_encode,
611                                type_clust=self.type_clust,
612                                type_scaling=self.type_scaling,
613                                col_sample=self.col_sample,
614                                row_sample=self.row_sample,
615                                seed=self.seed,
616                                backend=self.backend,
617                                cv_calibration=None,
618                            )
619                        )
620
621                        # layer_clf.fit(X_train, y_train)
622
623                    layer_clf.fit(X_train, y_train)
624
625                    self.models_[name] = layer_clf
626                    y_pred = layer_clf.predict(X_test)
627                    accuracy = accuracy_score(y_test, y_pred, normalize=True)
628                    b_accuracy = balanced_accuracy_score(y_test, y_pred)
629                    f1 = f1_score(y_test, y_pred, average="weighted")
630                    try:
631                        roc_auc = roc_auc_score(y_test, y_pred)
632                    except Exception as exception:
633                        roc_auc = None
634                        if self.ignore_warnings is False:
635                            print("ROC AUC couldn't be calculated for " + name)
636                            print(exception)
637                    names.append(name)
638                    Accuracy.append(accuracy)
639                    B_Accuracy.append(b_accuracy)
640                    ROC_AUC.append(roc_auc)
641                    F1.append(f1)
642                    TIME.append(time.time() - start)
643                    if self.custom_metric is not None:
644                        custom_metric = self.custom_metric(y_test, y_pred)
645                        CUSTOM_METRIC.append(custom_metric)
646                    if self.verbose > 0:
647                        if self.custom_metric is not None:
648                            print(
649                                {
650                                    "Model": name,
651                                    "Accuracy": accuracy,
652                                    "Balanced Accuracy": b_accuracy,
653                                    "ROC AUC": roc_auc,
654                                    "F1 Score": f1,
655                                    self.custom_metric.__name__: custom_metric,
656                                    "Time taken": time.time() - start,
657                                }
658                            )
659                        else:
660                            print(
661                                {
662                                    "Model": name,
663                                    "Accuracy": accuracy,
664                                    "Balanced Accuracy": b_accuracy,
665                                    "ROC AUC": roc_auc,
666                                    "F1 Score": f1,
667                                    "Time taken": time.time() - start,
668                                }
669                            )
670                    if self.predictions:
671                        predictions[name] = y_pred
672                except Exception as exception:
673                    if self.ignore_warnings is False:
674                        print(name + " model failed to execute")
675                        print(exception)
676
677        if self.custom_metric is None:
678            scores = pd.DataFrame(
679                {
680                    "Model": names,
681                    "Accuracy": Accuracy,
682                    "Balanced Accuracy": B_Accuracy,
683                    "ROC AUC": ROC_AUC,
684                    "F1 Score": F1,
685                    "Time Taken": TIME,
686                }
687            )
688        else:
689            scores = pd.DataFrame(
690                {
691                    "Model": names,
692                    "Accuracy": Accuracy,
693                    "Balanced Accuracy": B_Accuracy,
694                    "ROC AUC": ROC_AUC,
695                    "F1 Score": F1,
696                    "Custom metric": CUSTOM_METRIC,
697                    "Time Taken": TIME,
698                }
699            )
700        scores = scores.sort_values(by=self.sort_by, ascending=False).set_index(
701            "Model"
702        )
703
704        self.best_model_ = self.models_[scores.index[0]]
705
706        if self.predictions is True:
707            return scores, predictions
708
709        return scores

Fit classifiers to X_train and y_train, predict and score on X_test, y_test.

Parameters:

X_train: array-like,
    Training vectors, where rows is the number of samples
    and columns is the number of features.

X_test: array-like,
    Testing vectors, where rows is the number of samples
    and columns is the number of features.

y_train: array-like,
    Training vectors, where rows is the number of samples
    and columns is the number of features.

y_test: array-like,
    Testing vectors, where rows is the number of samples
    and columns is the number of features.

Returns:

scores: Pandas DataFrame
    Returns metrics of all the models in a Pandas DataFrame.

predictions: Pandas DataFrame
    Returns predictions of all the models in a Pandas DataFrame.
def provide_models(self, X_train, X_test, y_train, y_test):
723    def provide_models(self, X_train, X_test, y_train, y_test):
724        """Returns all the model objects trained. If fit hasn't been called yet,
725        then it's called to return the models.
726
727        Parameters:
728
729        X_train: array-like,
730            Training vectors, where rows is the number of samples
731            and columns is the number of features.
732
733        X_test: array-like,
734            Testing vectors, where rows is the number of samples
735            and columns is the number of features.
736
737        y_train: array-like,
738            Training vectors, where rows is the number of samples
739            and columns is the number of features.
740
741        y_test: array-like,
742            Testing vectors, where rows is the number of samples
743            and columns is the number of features.
744
745        Returns:
746
747            models: dict-object,
748                Returns a dictionary with each model's pipeline as value
749                and key = name of the model.
750        """
751        if len(self.models_.keys()) == 0:
752            self.fit(X_train, X_test, y_train, y_test)
753
754        return self.models_

Returns all the model objects trained. If fit hasn't been called yet, then it's called to return the models.

Parameters:

X_train: array-like, Training vectors, where rows is the number of samples and columns is the number of features.

X_test: array-like, Testing vectors, where rows is the number of samples and columns is the number of features.

y_train: array-like, Training vectors, where rows is the number of samples and columns is the number of features.

y_test: array-like, Testing vectors, where rows is the number of samples and columns is the number of features.

Returns:

models: dict-object,
    Returns a dictionary with each model's pipeline as value
    and key = name of the model.
class LazyDeepRegressor(nnetsauce.custom.custom.Custom, sklearn.base.RegressorMixin):
 90class LazyDeepRegressor(Custom, RegressorMixin):
 91    """
 92        Fitting -- almost -- all the regression algorithms with layers of
 93        nnetsauce's CustomRegressor and returning their scores.
 94
 95    Parameters:
 96
 97        verbose: int, optional (default=0)
 98            Any positive number for verbosity.
 99
100        ignore_warnings: bool, optional (default=True)
101            When set to True, the warning related to algorigms that are not able to run are ignored.
102
103        custom_metric: function, optional (default=None)
104            When function is provided, models are evaluated based on the custom evaluation metric provided.
105
106        predictions: bool, optional (default=False)
107            When set to True, the predictions of all the models models are returned as dataframe.
108
109        sort_by: string, optional (default='RMSE')
110            Sort models by a metric. Available options are 'R-Squared', 'Adjusted R-Squared', 'RMSE', 'Time Taken' and 'Custom Metric'.
111            or a custom metric identified by its name and provided by custom_metric.
112
113        random_state: int, optional (default=42)
114            Reproducibiility seed.
115
116        estimators: list, optional (default='all')
117            list of Estimators names or just 'all' (default='all')
118
119        preprocess: bool
120            preprocessing is done when set to True
121
122        n_jobs : int, when possible, run in parallel
123            For now, only used by individual models that support it.
124
125        n_layers: int, optional (default=3)
126            Number of layers of CustomRegressors to be used.
127
128        All the other parameters are the same as CustomRegressor's.
129
130    Attributes:
131
132        models_: dict-object
133            Returns a dictionary with each model pipeline as value
134            with key as name of models.
135
136        best_model_: object
137            Returns the best model pipeline based on the sort_by metric.
138
139    Examples:
140
141        import nnetsauce as ns
142        import numpy as np
143        from sklearn import datasets
144        from sklearn.utils import shuffle
145
146        diabetes = datasets.load_diabetes()
147        X, y = shuffle(diabetes.data, diabetes.target, random_state=13)
148        X = X.astype(np.float32)
149
150        offset = int(X.shape[0] * 0.9)
151        X_train, y_train = X[:offset], y[:offset]
152        X_test, y_test = X[offset:], y[offset:]
153
154        reg = ns.LazyDeepRegressor(verbose=0, ignore_warnings=False, custom_metric=None)
155        models, predictions = reg.fit(X_train, X_test, y_train, y_test)
156        print(models)
157
158    """
159
160    def __init__(
161        self,
162        verbose=0,
163        ignore_warnings=True,
164        custom_metric=None,
165        predictions=False,
166        sort_by="RMSE",
167        random_state=42,
168        estimators="all",
169        preprocess=False,
170        n_jobs=None,
171        # Defining depth
172        n_layers=3,
173        # CustomRegressor attributes
174        obj=None,
175        n_hidden_features=5,
176        activation_name="relu",
177        a=0.01,
178        nodes_sim="sobol",
179        bias=True,
180        dropout=0,
181        direct_link=True,
182        n_clusters=2,
183        cluster_encode=True,
184        type_clust="kmeans",
185        type_scaling=("std", "std", "std"),
186        col_sample=1,
187        row_sample=1,
188        seed=123,
189        backend="cpu",
190    ):
191        self.verbose = verbose
192        self.ignore_warnings = ignore_warnings
193        self.custom_metric = custom_metric
194        self.predictions = predictions
195        self.sort_by = sort_by
196        self.models_ = {}
197        self.best_model_ = None
198        self.random_state = random_state
199        self.estimators = estimators
200        self.preprocess = preprocess
201        self.n_layers = n_layers - 1
202        self.n_jobs = n_jobs
203        super().__init__(
204            obj=obj,
205            n_hidden_features=n_hidden_features,
206            activation_name=activation_name,
207            a=a,
208            nodes_sim=nodes_sim,
209            bias=bias,
210            dropout=dropout,
211            direct_link=direct_link,
212            n_clusters=n_clusters,
213            cluster_encode=cluster_encode,
214            type_clust=type_clust,
215            type_scaling=type_scaling,
216            col_sample=col_sample,
217            row_sample=row_sample,
218            seed=seed,
219            backend=backend,
220        )
221
222    def fit(self, X_train, X_test, y_train, y_test):
223        """Fit Regression algorithms to X_train and y_train, predict and score on X_test, y_test.
224
225        Parameters:
226
227            X_train : array-like,
228                Training vectors, where rows is the number of samples
229                and columns is the number of features.
230
231            X_test : array-like,
232                Testing vectors, where rows is the number of samples
233                and columns is the number of features.
234
235            y_train : array-like,
236                Training vectors, where rows is the number of samples
237                and columns is the number of features.
238
239            y_test : array-like,
240                Testing vectors, where rows is the number of samples
241                and columns is the number of features.
242
243        Returns:
244        -------
245        scores:  Pandas DataFrame
246            Returns metrics of all the models in a Pandas DataFrame.
247
248        predictions : Pandas DataFrame
249            Returns predictions of all the models in a Pandas DataFrame.
250
251        """
252        R2 = []
253        ADJR2 = []
254        RMSE = []
255        # WIN = []
256        names = []
257        TIME = []
258        predictions = {}
259
260        if self.custom_metric:
261            CUSTOM_METRIC = []
262
263        if isinstance(X_train, np.ndarray):
264            X_train = pd.DataFrame(X_train)
265            X_test = pd.DataFrame(X_test)
266
267        numeric_features = X_train.select_dtypes(include=[np.number]).columns
268        categorical_features = X_train.select_dtypes(include=["object"]).columns
269
270        categorical_low, categorical_high = get_card_split(
271            X_train, categorical_features
272        )
273
274        if self.preprocess is True:
275            preprocessor = ColumnTransformer(
276                transformers=[
277                    ("numeric", numeric_transformer, numeric_features),
278                    (
279                        "categorical_low",
280                        categorical_transformer_low,
281                        categorical_low,
282                    ),
283                    (
284                        "categorical_high",
285                        categorical_transformer_high,
286                        categorical_high,
287                    ),
288                ]
289            )
290
291        # base models
292        try:
293            baseline_names = ["RandomForestRegressor", "XGBRegressor"]
294            baseline_models = [RandomForestRegressor(), xgb.XGBRegressor()]
295        except Exception as exception:
296            baseline_names = ["RandomForestRegressor"]
297            baseline_models = [RandomForestRegressor()]
298
299        for name, model in zip(baseline_names, baseline_models):
300            start = time.time()
301            try:
302                model.fit(X_train, y_train)
303                self.models_[name] = model
304                y_pred = model.predict(X_test)
305                r_squared = r2_score(y_test, y_pred)
306                adj_rsquared = adjusted_rsquared(
307                    r_squared, X_test.shape[0], X_test.shape[1]
308                )
309                rmse = np.sqrt(np.mean((y_test - y_pred) ** 2))
310
311                names.append(name)
312                R2.append(r_squared)
313                ADJR2.append(adj_rsquared)
314                RMSE.append(rmse)
315                TIME.append(time.time() - start)
316
317                if self.custom_metric:
318                    custom_metric = self.custom_metric(y_test, y_pred)
319                    CUSTOM_METRIC.append(custom_metric)
320
321                if self.verbose > 0:
322                    scores_verbose = {
323                        "Model": name,
324                        "R-Squared": r_squared,
325                        "Adjusted R-Squared": adj_rsquared,
326                        "RMSE": rmse,
327                        "Time taken": time.time() - start,
328                    }
329
330                    if self.custom_metric:
331                        scores_verbose[self.custom_metric.__name__] = (
332                            custom_metric
333                        )
334
335                    print(scores_verbose)
336                if self.predictions:
337                    predictions[name] = y_pred
338            except Exception as exception:
339                if self.ignore_warnings is False:
340                    print(name + " model failed to execute")
341                    print(exception)
342
343        if self.estimators == "all":
344            self.regressors = DEEPREGRESSORS
345        else:
346            self.regressors = [
347                ("DeepCustomRegressor(" + est[0] + ")", est[1])
348                for est in all_estimators()
349                if (
350                    issubclass(est[1], RegressorMixin)
351                    and (est[0] in self.estimators)
352                )
353            ]
354
355        if self.preprocess is True:
356            for name, model in tqdm(self.regressors):  # do parallel exec
357                start = time.time()
358                try:
359                    if "random_state" in model().get_params().keys():
360                        layer_regr = CustomRegressor(
361                            obj=model(random_state=self.random_state),
362                            n_hidden_features=self.n_hidden_features,
363                            activation_name=self.activation_name,
364                            a=self.a,
365                            nodes_sim=self.nodes_sim,
366                            bias=self.bias,
367                            dropout=self.dropout,
368                            direct_link=self.direct_link,
369                            n_clusters=self.n_clusters,
370                            cluster_encode=self.cluster_encode,
371                            type_clust=self.type_clust,
372                            type_scaling=self.type_scaling,
373                            col_sample=self.col_sample,
374                            row_sample=self.row_sample,
375                            seed=self.seed,
376                            backend=self.backend,
377                        )
378                    else:
379                        layer_regr = CustomRegressor(
380                            obj=model(),
381                            n_hidden_features=self.n_hidden_features,
382                            activation_name=self.activation_name,
383                            a=self.a,
384                            nodes_sim=self.nodes_sim,
385                            bias=self.bias,
386                            dropout=self.dropout,
387                            direct_link=self.direct_link,
388                            n_clusters=self.n_clusters,
389                            cluster_encode=self.cluster_encode,
390                            type_clust=self.type_clust,
391                            type_scaling=self.type_scaling,
392                            col_sample=self.col_sample,
393                            row_sample=self.row_sample,
394                            seed=self.seed,
395                            backend=self.backend,
396                        )
397
398                    for _ in range(self.n_layers):
399                        layer_regr = deepcopy(
400                            CustomRegressor(
401                                obj=layer_regr,
402                                n_hidden_features=self.n_hidden_features,
403                                activation_name=self.activation_name,
404                                a=self.a,
405                                nodes_sim=self.nodes_sim,
406                                bias=self.bias,
407                                dropout=self.dropout,
408                                direct_link=self.direct_link,
409                                n_clusters=self.n_clusters,
410                                cluster_encode=self.cluster_encode,
411                                type_clust=self.type_clust,
412                                type_scaling=self.type_scaling,
413                                col_sample=self.col_sample,
414                                row_sample=self.row_sample,
415                                seed=self.seed,
416                                backend=self.backend,
417                            )
418                        )
419
420                    layer_regr.fit(X_train, y_train)
421
422                    pipe = Pipeline(
423                        steps=[
424                            ("preprocessor", preprocessor),
425                            ("regressor", layer_regr),
426                        ]
427                    )
428
429                    pipe.fit(X_train, y_train)
430
431                    self.models_[name] = pipe
432                    y_pred = pipe.predict(X_test)
433                    r_squared = r2_score(y_test, y_pred)
434                    adj_rsquared = adjusted_rsquared(
435                        r_squared, X_test.shape[0], X_test.shape[1]
436                    )
437                    rmse = np.sqrt(np.mean((y_test - y_pred) ** 2))
438
439                    names.append(name)
440                    R2.append(r_squared)
441                    ADJR2.append(adj_rsquared)
442                    RMSE.append(rmse)
443                    TIME.append(time.time() - start)
444
445                    if self.custom_metric:
446                        custom_metric = self.custom_metric(y_test, y_pred)
447                        CUSTOM_METRIC.append(custom_metric)
448
449                    if self.verbose > 0:
450                        scores_verbose = {
451                            "Model": name,
452                            "R-Squared": r_squared,
453                            "Adjusted R-Squared": adj_rsquared,
454                            "RMSE": rmse,
455                            "Time taken": time.time() - start,
456                        }
457
458                        if self.custom_metric:
459                            scores_verbose[self.custom_metric.__name__] = (
460                                custom_metric
461                            )
462
463                        print(scores_verbose)
464                    if self.predictions:
465                        predictions[name] = y_pred
466                except Exception as exception:
467                    if self.ignore_warnings is False:
468                        print(name + " model failed to execute")
469                        print(exception)
470
471        else:  # no preprocessing
472            for name, model in tqdm(self.regressors):  # do parallel exec
473                start = time.time()
474                try:
475                    if "random_state" in model().get_params().keys():
476                        layer_regr = CustomRegressor(
477                            obj=model(random_state=self.random_state),
478                            n_hidden_features=self.n_hidden_features,
479                            activation_name=self.activation_name,
480                            a=self.a,
481                            nodes_sim=self.nodes_sim,
482                            bias=self.bias,
483                            dropout=self.dropout,
484                            direct_link=self.direct_link,
485                            n_clusters=self.n_clusters,
486                            cluster_encode=self.cluster_encode,
487                            type_clust=self.type_clust,
488                            type_scaling=self.type_scaling,
489                            col_sample=self.col_sample,
490                            row_sample=self.row_sample,
491                            seed=self.seed,
492                            backend=self.backend,
493                        )
494                    else:
495                        layer_regr = CustomRegressor(
496                            obj=model(),
497                            n_hidden_features=self.n_hidden_features,
498                            activation_name=self.activation_name,
499                            a=self.a,
500                            nodes_sim=self.nodes_sim,
501                            bias=self.bias,
502                            dropout=self.dropout,
503                            direct_link=self.direct_link,
504                            n_clusters=self.n_clusters,
505                            cluster_encode=self.cluster_encode,
506                            type_clust=self.type_clust,
507                            type_scaling=self.type_scaling,
508                            col_sample=self.col_sample,
509                            row_sample=self.row_sample,
510                            seed=self.seed,
511                            backend=self.backend,
512                        )
513
514                    layer_regr.fit(X_train, y_train)
515
516                    for _ in range(self.n_layers):
517                        layer_regr = deepcopy(
518                            CustomRegressor(
519                                obj=layer_regr,
520                                n_hidden_features=self.n_hidden_features,
521                                activation_name=self.activation_name,
522                                a=self.a,
523                                nodes_sim=self.nodes_sim,
524                                bias=self.bias,
525                                dropout=self.dropout,
526                                direct_link=self.direct_link,
527                                n_clusters=self.n_clusters,
528                                cluster_encode=self.cluster_encode,
529                                type_clust=self.type_clust,
530                                type_scaling=self.type_scaling,
531                                col_sample=self.col_sample,
532                                row_sample=self.row_sample,
533                                seed=self.seed,
534                                backend=self.backend,
535                            )
536                        )
537
538                        # layer_regr.fit(X_train, y_train)
539
540                    layer_regr.fit(X_train, y_train)
541
542                    self.models_[name] = layer_regr
543                    y_pred = layer_regr.predict(X_test)
544
545                    r_squared = r2_score(y_test, y_pred)
546                    adj_rsquared = adjusted_rsquared(
547                        r_squared, X_test.shape[0], X_test.shape[1]
548                    )
549                    rmse = np.sqrt(np.mean((y_test - y_pred) ** 2))
550
551                    names.append(name)
552                    R2.append(r_squared)
553                    ADJR2.append(adj_rsquared)
554                    RMSE.append(rmse)
555                    TIME.append(time.time() - start)
556
557                    if self.custom_metric:
558                        custom_metric = self.custom_metric(y_test, y_pred)
559                        CUSTOM_METRIC.append(custom_metric)
560
561                    if self.verbose > 0:
562                        scores_verbose = {
563                            "Model": name,
564                            "R-Squared": r_squared,
565                            "Adjusted R-Squared": adj_rsquared,
566                            "RMSE": rmse,
567                            "Time taken": time.time() - start,
568                        }
569
570                        if self.custom_metric:
571                            scores_verbose[self.custom_metric.__name__] = (
572                                custom_metric
573                            )
574
575                        print(scores_verbose)
576                    if self.predictions:
577                        predictions[name] = y_pred
578                except Exception as exception:
579                    if self.ignore_warnings is False:
580                        print(name + " model failed to execute")
581                        print(exception)
582
583        scores = {
584            "Model": names,
585            "Adjusted R-Squared": ADJR2,
586            "R-Squared": R2,
587            "RMSE": RMSE,
588            "Time Taken": TIME,
589        }
590
591        if self.custom_metric:
592            scores["Custom metric"] = CUSTOM_METRIC
593
594        scores = pd.DataFrame(scores)
595        scores = scores.sort_values(by=self.sort_by, ascending=True).set_index(
596            "Model"
597        )
598
599        self.best_model_ = self.models_[scores.index[0]]
600
601        if self.predictions is True:
602            return scores, predictions
603
604        return scores
605
606    def get_best_model(self):
607        """
608        This function returns the best model pipeline based on the sort_by metric.
609
610        Returns:
611
612            best_model: object,
613                Returns the best model pipeline based on the sort_by metric.
614
615        """
616        return self.best_model_
617
618    def provide_models(self, X_train, X_test, y_train, y_test):
619        """
620        This function returns all the model objects trained in fit function.
621        If fit is not called already, then we call fit and then return the models.
622
623        Parameters:
624
625            X_train : array-like,
626                Training vectors, where rows is the number of samples
627                and columns is the number of features.
628
629            X_test : array-like,
630                Testing vectors, where rows is the number of samples
631                and columns is the number of features.
632
633            y_train : array-like,
634                Training vectors, where rows is the number of samples
635                and columns is the number of features.
636
637            y_test : array-like,
638                Testing vectors, where rows is the number of samples
639                and columns is the number of features.
640
641        Returns:
642
643            models: dict-object,
644                Returns a dictionary with each model pipeline as value
645                with key as name of models.
646
647        """
648        if len(self.models_.keys()) == 0:
649            self.fit(X_train, X_test, y_train, y_test)
650
651        return self.models_

Fitting -- almost -- all the regression algorithms with layers of nnetsauce's CustomRegressor and returning their scores.

Parameters:

verbose: int, optional (default=0)
    Any positive number for verbosity.

ignore_warnings: bool, optional (default=True)
    When set to True, the warning related to algorigms that are not able to run are ignored.

custom_metric: function, optional (default=None)
    When function is provided, models are evaluated based on the custom evaluation metric provided.

predictions: bool, optional (default=False)
    When set to True, the predictions of all the models models are returned as dataframe.

sort_by: string, optional (default='RMSE')
    Sort models by a metric. Available options are 'R-Squared', 'Adjusted R-Squared', 'RMSE', 'Time Taken' and 'Custom Metric'.
    or a custom metric identified by its name and provided by custom_metric.

random_state: int, optional (default=42)
    Reproducibiility seed.

estimators: list, optional (default='all')
    list of Estimators names or just 'all' (default='all')

preprocess: bool
    preprocessing is done when set to True

n_jobs : int, when possible, run in parallel
    For now, only used by individual models that support it.

n_layers: int, optional (default=3)
    Number of layers of CustomRegressors to be used.

All the other parameters are the same as CustomRegressor's.

Attributes:

models_: dict-object
    Returns a dictionary with each model pipeline as value
    with key as name of models.

best_model_: object
    Returns the best model pipeline based on the sort_by metric.

Examples:

import nnetsauce as ns
import numpy as np
from sklearn import datasets
from sklearn.utils import shuffle

diabetes = datasets.load_diabetes()
X, y = shuffle(diabetes.data, diabetes.target, random_state=13)
X = X.astype(np.float32)

offset = int(X.shape[0] * 0.9)
X_train, y_train = X[:offset], y[:offset]
X_test, y_test = X[offset:], y[offset:]

reg = ns.LazyDeepRegressor(verbose=0, ignore_warnings=False, custom_metric=None)
models, predictions = reg.fit(X_train, X_test, y_train, y_test)
print(models)
def fit(self, X_train, X_test, y_train, y_test):
222    def fit(self, X_train, X_test, y_train, y_test):
223        """Fit Regression algorithms to X_train and y_train, predict and score on X_test, y_test.
224
225        Parameters:
226
227            X_train : array-like,
228                Training vectors, where rows is the number of samples
229                and columns is the number of features.
230
231            X_test : array-like,
232                Testing vectors, where rows is the number of samples
233                and columns is the number of features.
234
235            y_train : array-like,
236                Training vectors, where rows is the number of samples
237                and columns is the number of features.
238
239            y_test : array-like,
240                Testing vectors, where rows is the number of samples
241                and columns is the number of features.
242
243        Returns:
244        -------
245        scores:  Pandas DataFrame
246            Returns metrics of all the models in a Pandas DataFrame.
247
248        predictions : Pandas DataFrame
249            Returns predictions of all the models in a Pandas DataFrame.
250
251        """
252        R2 = []
253        ADJR2 = []
254        RMSE = []
255        # WIN = []
256        names = []
257        TIME = []
258        predictions = {}
259
260        if self.custom_metric:
261            CUSTOM_METRIC = []
262
263        if isinstance(X_train, np.ndarray):
264            X_train = pd.DataFrame(X_train)
265            X_test = pd.DataFrame(X_test)
266
267        numeric_features = X_train.select_dtypes(include=[np.number]).columns
268        categorical_features = X_train.select_dtypes(include=["object"]).columns
269
270        categorical_low, categorical_high = get_card_split(
271            X_train, categorical_features
272        )
273
274        if self.preprocess is True:
275            preprocessor = ColumnTransformer(
276                transformers=[
277                    ("numeric", numeric_transformer, numeric_features),
278                    (
279                        "categorical_low",
280                        categorical_transformer_low,
281                        categorical_low,
282                    ),
283                    (
284                        "categorical_high",
285                        categorical_transformer_high,
286                        categorical_high,
287                    ),
288                ]
289            )
290
291        # base models
292        try:
293            baseline_names = ["RandomForestRegressor", "XGBRegressor"]
294            baseline_models = [RandomForestRegressor(), xgb.XGBRegressor()]
295        except Exception as exception:
296            baseline_names = ["RandomForestRegressor"]
297            baseline_models = [RandomForestRegressor()]
298
299        for name, model in zip(baseline_names, baseline_models):
300            start = time.time()
301            try:
302                model.fit(X_train, y_train)
303                self.models_[name] = model
304                y_pred = model.predict(X_test)
305                r_squared = r2_score(y_test, y_pred)
306                adj_rsquared = adjusted_rsquared(
307                    r_squared, X_test.shape[0], X_test.shape[1]
308                )
309                rmse = np.sqrt(np.mean((y_test - y_pred) ** 2))
310
311                names.append(name)
312                R2.append(r_squared)
313                ADJR2.append(adj_rsquared)
314                RMSE.append(rmse)
315                TIME.append(time.time() - start)
316
317                if self.custom_metric:
318                    custom_metric = self.custom_metric(y_test, y_pred)
319                    CUSTOM_METRIC.append(custom_metric)
320
321                if self.verbose > 0:
322                    scores_verbose = {
323                        "Model": name,
324                        "R-Squared": r_squared,
325                        "Adjusted R-Squared": adj_rsquared,
326                        "RMSE": rmse,
327                        "Time taken": time.time() - start,
328                    }
329
330                    if self.custom_metric:
331                        scores_verbose[self.custom_metric.__name__] = (
332                            custom_metric
333                        )
334
335                    print(scores_verbose)
336                if self.predictions:
337                    predictions[name] = y_pred
338            except Exception as exception:
339                if self.ignore_warnings is False:
340                    print(name + " model failed to execute")
341                    print(exception)
342
343        if self.estimators == "all":
344            self.regressors = DEEPREGRESSORS
345        else:
346            self.regressors = [
347                ("DeepCustomRegressor(" + est[0] + ")", est[1])
348                for est in all_estimators()
349                if (
350                    issubclass(est[1], RegressorMixin)
351                    and (est[0] in self.estimators)
352                )
353            ]
354
355        if self.preprocess is True:
356            for name, model in tqdm(self.regressors):  # do parallel exec
357                start = time.time()
358                try:
359                    if "random_state" in model().get_params().keys():
360                        layer_regr = CustomRegressor(
361                            obj=model(random_state=self.random_state),
362                            n_hidden_features=self.n_hidden_features,
363                            activation_name=self.activation_name,
364                            a=self.a,
365                            nodes_sim=self.nodes_sim,
366                            bias=self.bias,
367                            dropout=self.dropout,
368                            direct_link=self.direct_link,
369                            n_clusters=self.n_clusters,
370                            cluster_encode=self.cluster_encode,
371                            type_clust=self.type_clust,
372                            type_scaling=self.type_scaling,
373                            col_sample=self.col_sample,
374                            row_sample=self.row_sample,
375                            seed=self.seed,
376                            backend=self.backend,
377                        )
378                    else:
379                        layer_regr = CustomRegressor(
380                            obj=model(),
381                            n_hidden_features=self.n_hidden_features,
382                            activation_name=self.activation_name,
383                            a=self.a,
384                            nodes_sim=self.nodes_sim,
385                            bias=self.bias,
386                            dropout=self.dropout,
387                            direct_link=self.direct_link,
388                            n_clusters=self.n_clusters,
389                            cluster_encode=self.cluster_encode,
390                            type_clust=self.type_clust,
391                            type_scaling=self.type_scaling,
392                            col_sample=self.col_sample,
393                            row_sample=self.row_sample,
394                            seed=self.seed,
395                            backend=self.backend,
396                        )
397
398                    for _ in range(self.n_layers):
399                        layer_regr = deepcopy(
400                            CustomRegressor(
401                                obj=layer_regr,
402                                n_hidden_features=self.n_hidden_features,
403                                activation_name=self.activation_name,
404                                a=self.a,
405                                nodes_sim=self.nodes_sim,
406                                bias=self.bias,
407                                dropout=self.dropout,
408                                direct_link=self.direct_link,
409                                n_clusters=self.n_clusters,
410                                cluster_encode=self.cluster_encode,
411                                type_clust=self.type_clust,
412                                type_scaling=self.type_scaling,
413                                col_sample=self.col_sample,
414                                row_sample=self.row_sample,
415                                seed=self.seed,
416                                backend=self.backend,
417                            )
418                        )
419
420                    layer_regr.fit(X_train, y_train)
421
422                    pipe = Pipeline(
423                        steps=[
424                            ("preprocessor", preprocessor),
425                            ("regressor", layer_regr),
426                        ]
427                    )
428
429                    pipe.fit(X_train, y_train)
430
431                    self.models_[name] = pipe
432                    y_pred = pipe.predict(X_test)
433                    r_squared = r2_score(y_test, y_pred)
434                    adj_rsquared = adjusted_rsquared(
435                        r_squared, X_test.shape[0], X_test.shape[1]
436                    )
437                    rmse = np.sqrt(np.mean((y_test - y_pred) ** 2))
438
439                    names.append(name)
440                    R2.append(r_squared)
441                    ADJR2.append(adj_rsquared)
442                    RMSE.append(rmse)
443                    TIME.append(time.time() - start)
444
445                    if self.custom_metric:
446                        custom_metric = self.custom_metric(y_test, y_pred)
447                        CUSTOM_METRIC.append(custom_metric)
448
449                    if self.verbose > 0:
450                        scores_verbose = {
451                            "Model": name,
452                            "R-Squared": r_squared,
453                            "Adjusted R-Squared": adj_rsquared,
454                            "RMSE": rmse,
455                            "Time taken": time.time() - start,
456                        }
457
458                        if self.custom_metric:
459                            scores_verbose[self.custom_metric.__name__] = (
460                                custom_metric
461                            )
462
463                        print(scores_verbose)
464                    if self.predictions:
465                        predictions[name] = y_pred
466                except Exception as exception:
467                    if self.ignore_warnings is False:
468                        print(name + " model failed to execute")
469                        print(exception)
470
471        else:  # no preprocessing
472            for name, model in tqdm(self.regressors):  # do parallel exec
473                start = time.time()
474                try:
475                    if "random_state" in model().get_params().keys():
476                        layer_regr = CustomRegressor(
477                            obj=model(random_state=self.random_state),
478                            n_hidden_features=self.n_hidden_features,
479                            activation_name=self.activation_name,
480                            a=self.a,
481                            nodes_sim=self.nodes_sim,
482                            bias=self.bias,
483                            dropout=self.dropout,
484                            direct_link=self.direct_link,
485                            n_clusters=self.n_clusters,
486                            cluster_encode=self.cluster_encode,
487                            type_clust=self.type_clust,
488                            type_scaling=self.type_scaling,
489                            col_sample=self.col_sample,
490                            row_sample=self.row_sample,
491                            seed=self.seed,
492                            backend=self.backend,
493                        )
494                    else:
495                        layer_regr = CustomRegressor(
496                            obj=model(),
497                            n_hidden_features=self.n_hidden_features,
498                            activation_name=self.activation_name,
499                            a=self.a,
500                            nodes_sim=self.nodes_sim,
501                            bias=self.bias,
502                            dropout=self.dropout,
503                            direct_link=self.direct_link,
504                            n_clusters=self.n_clusters,
505                            cluster_encode=self.cluster_encode,
506                            type_clust=self.type_clust,
507                            type_scaling=self.type_scaling,
508                            col_sample=self.col_sample,
509                            row_sample=self.row_sample,
510                            seed=self.seed,
511                            backend=self.backend,
512                        )
513
514                    layer_regr.fit(X_train, y_train)
515
516                    for _ in range(self.n_layers):
517                        layer_regr = deepcopy(
518                            CustomRegressor(
519                                obj=layer_regr,
520                                n_hidden_features=self.n_hidden_features,
521                                activation_name=self.activation_name,
522                                a=self.a,
523                                nodes_sim=self.nodes_sim,
524                                bias=self.bias,
525                                dropout=self.dropout,
526                                direct_link=self.direct_link,
527                                n_clusters=self.n_clusters,
528                                cluster_encode=self.cluster_encode,
529                                type_clust=self.type_clust,
530                                type_scaling=self.type_scaling,
531                                col_sample=self.col_sample,
532                                row_sample=self.row_sample,
533                                seed=self.seed,
534                                backend=self.backend,
535                            )
536                        )
537
538                        # layer_regr.fit(X_train, y_train)
539
540                    layer_regr.fit(X_train, y_train)
541
542                    self.models_[name] = layer_regr
543                    y_pred = layer_regr.predict(X_test)
544
545                    r_squared = r2_score(y_test, y_pred)
546                    adj_rsquared = adjusted_rsquared(
547                        r_squared, X_test.shape[0], X_test.shape[1]
548                    )
549                    rmse = np.sqrt(np.mean((y_test - y_pred) ** 2))
550
551                    names.append(name)
552                    R2.append(r_squared)
553                    ADJR2.append(adj_rsquared)
554                    RMSE.append(rmse)
555                    TIME.append(time.time() - start)
556
557                    if self.custom_metric:
558                        custom_metric = self.custom_metric(y_test, y_pred)
559                        CUSTOM_METRIC.append(custom_metric)
560
561                    if self.verbose > 0:
562                        scores_verbose = {
563                            "Model": name,
564                            "R-Squared": r_squared,
565                            "Adjusted R-Squared": adj_rsquared,
566                            "RMSE": rmse,
567                            "Time taken": time.time() - start,
568                        }
569
570                        if self.custom_metric:
571                            scores_verbose[self.custom_metric.__name__] = (
572                                custom_metric
573                            )
574
575                        print(scores_verbose)
576                    if self.predictions:
577                        predictions[name] = y_pred
578                except Exception as exception:
579                    if self.ignore_warnings is False:
580                        print(name + " model failed to execute")
581                        print(exception)
582
583        scores = {
584            "Model": names,
585            "Adjusted R-Squared": ADJR2,
586            "R-Squared": R2,
587            "RMSE": RMSE,
588            "Time Taken": TIME,
589        }
590
591        if self.custom_metric:
592            scores["Custom metric"] = CUSTOM_METRIC
593
594        scores = pd.DataFrame(scores)
595        scores = scores.sort_values(by=self.sort_by, ascending=True).set_index(
596            "Model"
597        )
598
599        self.best_model_ = self.models_[scores.index[0]]
600
601        if self.predictions is True:
602            return scores, predictions
603
604        return scores

Fit Regression algorithms to X_train and y_train, predict and score on X_test, y_test.

Parameters:

X_train : array-like,
    Training vectors, where rows is the number of samples
    and columns is the number of features.

X_test : array-like,
    Testing vectors, where rows is the number of samples
    and columns is the number of features.

y_train : array-like,
    Training vectors, where rows is the number of samples
    and columns is the number of features.

y_test : array-like,
    Testing vectors, where rows is the number of samples
    and columns is the number of features.

Returns:

scores: Pandas DataFrame Returns metrics of all the models in a Pandas DataFrame.

predictions : Pandas DataFrame Returns predictions of all the models in a Pandas DataFrame.

def provide_models(self, X_train, X_test, y_train, y_test):
618    def provide_models(self, X_train, X_test, y_train, y_test):
619        """
620        This function returns all the model objects trained in fit function.
621        If fit is not called already, then we call fit and then return the models.
622
623        Parameters:
624
625            X_train : array-like,
626                Training vectors, where rows is the number of samples
627                and columns is the number of features.
628
629            X_test : array-like,
630                Testing vectors, where rows is the number of samples
631                and columns is the number of features.
632
633            y_train : array-like,
634                Training vectors, where rows is the number of samples
635                and columns is the number of features.
636
637            y_test : array-like,
638                Testing vectors, where rows is the number of samples
639                and columns is the number of features.
640
641        Returns:
642
643            models: dict-object,
644                Returns a dictionary with each model pipeline as value
645                with key as name of models.
646
647        """
648        if len(self.models_.keys()) == 0:
649            self.fit(X_train, X_test, y_train, y_test)
650
651        return self.models_

This function returns all the model objects trained in fit function. If fit is not called already, then we call fit and then return the models.

Parameters:

X_train : array-like,
    Training vectors, where rows is the number of samples
    and columns is the number of features.

X_test : array-like,
    Testing vectors, where rows is the number of samples
    and columns is the number of features.

y_train : array-like,
    Training vectors, where rows is the number of samples
    and columns is the number of features.

y_test : array-like,
    Testing vectors, where rows is the number of samples
    and columns is the number of features.

Returns:

models: dict-object,
    Returns a dictionary with each model pipeline as value
    with key as name of models.
class LazyMTS(nnetsauce.LazyDeepMTS):
 998class LazyMTS(LazyDeepMTS):
 999    """
1000    Fitting -- almost -- all the regression algorithms to multivariate time series
1001    and returning their scores (no layers).
1002
1003    Parameters:
1004
1005        verbose: int, optional (default=0)
1006            Any positive number for verbosity.
1007
1008        ignore_warnings: bool, optional (default=True)
1009            When set to True, the warning related to algorigms that are not
1010            able to run are ignored.
1011
1012        custom_metric: function, optional (default=None)
1013            When function is provided, models are evaluated based on the custom
1014              evaluation metric provided.
1015
1016        predictions: bool, optional (default=False)
1017            When set to True, the predictions of all the models models are returned as dataframe.
1018
1019        sort_by: string, optional (default='RMSE')
1020            Sort models by a metric. Available options are 'RMSE', 'MAE', 'MPL', 'MPE', 'MAPE',
1021            'R-Squared', 'Adjusted R-Squared' or a custom metric identified by its name and
1022            provided by custom_metric.
1023
1024        random_state: int, optional (default=42)
1025            Reproducibiility seed.
1026
1027        estimators: list, optional (default='all')
1028            list of Estimators (regression algorithms) names or just 'all' (default='all')
1029
1030        preprocess: bool, preprocessing is done when set to True
1031
1032        h: int, optional (default=None)
1033            Number of steps ahead to predict (when used, must be > 0 and < X_test.shape[0]).
1034
1035        All the other parameters are the same as MTS's.
1036
1037    Attributes:
1038
1039        models_: dict-object
1040            Returns a dictionary with each model pipeline as value
1041            with key as name of models.
1042
1043        best_model_: object
1044            Returns the best model pipeline based on the sort_by metric.
1045
1046    Examples:
1047
1048        See https://thierrymoudiki.github.io/blog/2023/10/29/python/quasirandomizednn/MTS-LazyPredict
1049
1050    """
1051
1052    def __init__(
1053        self,
1054        verbose=0,
1055        ignore_warnings=True,
1056        custom_metric=None,
1057        predictions=False,
1058        sort_by=None,  # leave it as is
1059        random_state=42,
1060        estimators="all",
1061        preprocess=False,
1062        h=None,
1063        # MTS attributes
1064        obj=None,
1065        n_hidden_features=5,
1066        activation_name="relu",
1067        a=0.01,
1068        nodes_sim="sobol",
1069        bias=True,
1070        dropout=0,
1071        direct_link=True,
1072        n_clusters=2,
1073        cluster_encode=True,
1074        type_clust="kmeans",
1075        type_scaling=("std", "std", "std"),
1076        lags=15,
1077        type_pi="scp2-kde",
1078        block_size=None,
1079        replications=None,
1080        kernel=None,
1081        agg="mean",
1082        seed=123,
1083        backend="cpu",
1084        show_progress=False,
1085    ):
1086        super().__init__(
1087            verbose=verbose,
1088            ignore_warnings=ignore_warnings,
1089            custom_metric=custom_metric,
1090            predictions=predictions,
1091            sort_by=sort_by,
1092            random_state=random_state,
1093            estimators=estimators,
1094            preprocess=preprocess,
1095            n_layers=1,
1096            h=h,
1097            obj=obj,
1098            n_hidden_features=n_hidden_features,
1099            activation_name=activation_name,
1100            a=a,
1101            nodes_sim=nodes_sim,
1102            bias=bias,
1103            dropout=dropout,
1104            direct_link=direct_link,
1105            n_clusters=n_clusters,
1106            cluster_encode=cluster_encode,
1107            type_clust=type_clust,
1108            type_scaling=type_scaling,
1109            lags=lags,
1110            type_pi=type_pi,
1111            block_size=block_size,
1112            replications=replications,
1113            kernel=kernel,
1114            agg=agg,
1115            seed=seed,
1116            backend=backend,
1117            show_progress=show_progress,
1118        )

Fitting -- almost -- all the regression algorithms to multivariate time series and returning their scores (no layers).

Parameters:

verbose: int, optional (default=0)
    Any positive number for verbosity.

ignore_warnings: bool, optional (default=True)
    When set to True, the warning related to algorigms that are not
    able to run are ignored.

custom_metric: function, optional (default=None)
    When function is provided, models are evaluated based on the custom
      evaluation metric provided.

predictions: bool, optional (default=False)
    When set to True, the predictions of all the models models are returned as dataframe.

sort_by: string, optional (default='RMSE')
    Sort models by a metric. Available options are 'RMSE', 'MAE', 'MPL', 'MPE', 'MAPE',
    'R-Squared', 'Adjusted R-Squared' or a custom metric identified by its name and
    provided by custom_metric.

random_state: int, optional (default=42)
    Reproducibiility seed.

estimators: list, optional (default='all')
    list of Estimators (regression algorithms) names or just 'all' (default='all')

preprocess: bool, preprocessing is done when set to True

h: int, optional (default=None)
    Number of steps ahead to predict (when used, must be > 0 and < X_test.shape[0]).

All the other parameters are the same as MTS's.

Attributes:

models_: dict-object
    Returns a dictionary with each model pipeline as value
    with key as name of models.

best_model_: object
    Returns the best model pipeline based on the sort_by metric.

Examples:

See https://thierrymoudiki.github.io/blog/2023/10/29/python/quasirandomizednn/MTS-LazyPredict
class LazyDeepMTS(nnetsauce.MTS):
104class LazyDeepMTS(MTS):
105    """
106
107    Fitting -- almost -- all the regression algorithms with layers of
108    nnetsauce's CustomRegressor to multivariate time series
109    and returning their scores.
110
111    Parameters:
112
113        verbose: int, optional (default=0)
114            Any positive number for verbosity.
115
116        ignore_warnings: bool, optional (default=True)
117            When set to True, the warning related to algorigms that are not
118            able to run are ignored.
119
120        custom_metric: function, optional (default=None)
121            When function is provided, models are evaluated based on the custom
122              evaluation metric provided.
123
124        predictions: bool, optional (default=False)
125            When set to True, the predictions of all the models models are returned as dataframe.
126
127        sort_by: string, optional (default='RMSE')
128            Sort models by a metric. Available options are 'RMSE', 'MAE', 'MPL', 'MPE', 'MAPE',
129            'R-Squared', 'Adjusted R-Squared' or a custom metric identified by its name and
130            provided by custom_metric.
131
132        random_state: int, optional (default=42)
133            Reproducibiility seed.
134
135        estimators: list, optional (default='all')
136            list of Estimators (regression algorithms) names or just 'all' (default='all')
137
138        preprocess: bool, preprocessing is done when set to True
139
140        n_layers: int, optional (default=1)
141            Number of layers in the network. When set to 1, the model is equivalent to a MTS.
142
143        h: int, optional (default=None)
144            Number of steps ahead to predict (when used, must be > 0 and < X_test.shape[0]).
145
146        All the other parameters are the same as MTS's.
147
148    Attributes:
149
150        models_: dict-object
151            Returns a dictionary with each model pipeline as value
152            with key as name of models.
153
154        best_model_: object
155            Returns the best model pipeline based on the sort_by metric.
156
157    Examples:
158
159        See https://thierrymoudiki.github.io/blog/2023/10/29/python/quasirandomizednn/MTS-LazyPredict
160
161    """
162
163    def __init__(
164        self,
165        verbose=0,
166        ignore_warnings=True,
167        custom_metric=None,
168        predictions=False,
169        sort_by=None,  # leave it as is
170        random_state=42,
171        estimators="all",
172        preprocess=False,
173        n_layers=1,
174        h=None,
175        # MTS attributes
176        obj=None,
177        n_hidden_features=5,
178        activation_name="relu",
179        a=0.01,
180        nodes_sim="sobol",
181        bias=True,
182        dropout=0,
183        direct_link=True,
184        n_clusters=2,
185        cluster_encode=True,
186        type_clust="kmeans",
187        type_scaling=("std", "std", "std"),
188        lags=15,
189        type_pi="scp2-kde",
190        block_size=None,
191        replications=None,
192        kernel=None,
193        agg="mean",
194        seed=123,
195        backend="cpu",
196        show_progress=False,
197    ):
198        self.verbose = verbose
199        self.ignore_warnings = ignore_warnings
200        self.custom_metric = custom_metric
201        self.predictions = predictions
202        self.sort_by = sort_by
203        self.models_ = {}
204        self.best_model_ = None
205        self.random_state = random_state
206        self.estimators = estimators
207        self.preprocess = preprocess
208        self.n_layers = n_layers
209        self.h = h
210        super().__init__(
211            obj=obj,
212            n_hidden_features=n_hidden_features,
213            activation_name=activation_name,
214            a=a,
215            nodes_sim=nodes_sim,
216            bias=bias,
217            dropout=dropout,
218            direct_link=direct_link,
219            n_clusters=n_clusters,
220            cluster_encode=cluster_encode,
221            type_clust=type_clust,
222            type_scaling=type_scaling,
223            seed=seed,
224            backend=backend,
225            lags=lags,
226            type_pi=type_pi,
227            block_size=block_size,
228            replications=replications,
229            kernel=kernel,
230            agg=agg,
231            verbose=verbose,
232            show_progress=show_progress,
233        )
234        if self.replications is not None or self.type_pi == "gaussian":
235            if self.sort_by is None:
236                self.sort_by = "WINKLERSCORE"
237        else:
238            if self.sort_by is None:
239                self.sort_by = "RMSE"
240
241    def fit(self, X_train, X_test, xreg=None, per_series=False, **kwargs):
242        """Fit Regression algorithms to X_train, predict and score on X_test.
243
244        Parameters:
245
246            X_train: array-like or data frame,
247                Training vectors, where rows is the number of samples
248                and columns is the number of features.
249
250            X_test: array-like or data frame,
251                Testing vectors, where rows is the number of samples
252                and columns is the number of features.
253
254            xreg: array-like, optional (default=None)
255                Additional (external) regressors to be passed to self.obj
256                xreg must be in 'increasing' order (most recent observations last)
257
258            per_series: bool, optional (default=False)
259                When set to True, the metrics are computed series by series.
260
261            **kwargs: dict, optional (default=None)
262                Additional parameters to be passed to `fit` method of `obj`.
263
264        Returns:
265
266            scores: Pandas DataFrame
267                Returns metrics of all the models in a Pandas DataFrame.
268
269            predictions: Pandas DataFrame
270                Returns predictions of all the models in a Pandas DataFrame.
271
272        """
273        R2 = []
274        ADJR2 = []
275        ME = []
276        MPL = []
277        RMSE = []
278        MAE = []
279        MPE = []
280        MAPE = []
281        WINKLERSCORE = []
282        COVERAGE = []
283
284        # WIN = []
285        names = []
286        TIME = []
287        predictions = {}
288
289        if self.custom_metric is not None:
290            CUSTOM_METRIC = []
291
292        if self.h is None:
293            assert X_test is not None, "If h is None, X_test must be provided."
294
295        if isinstance(X_train, np.ndarray):
296            X_train = pd.DataFrame(X_train)
297            X_test = pd.DataFrame(X_test)
298
299        self.series_names = X_train.columns.tolist()
300
301        X_train = convert_df_to_numeric(X_train)
302        X_test = convert_df_to_numeric(X_test)
303
304        numeric_features = X_train.select_dtypes(include=[np.number]).columns
305        categorical_features = X_train.select_dtypes(include=["object"]).columns
306
307        categorical_low, categorical_high = get_card_split(
308            X_train, categorical_features
309        )
310
311        if self.preprocess:
312            preprocessor = ColumnTransformer(
313                transformers=[
314                    ("numeric", numeric_transformer, numeric_features),
315                    (
316                        "categorical_low",
317                        categorical_transformer_low,
318                        categorical_low,
319                    ),
320                    (
321                        "categorical_high",
322                        categorical_transformer_high,
323                        categorical_high,
324                    ),
325                ]
326            )
327
328        # baselines (Classical MTS) ----
329        for i, name in enumerate(["ARIMA", "ETS", "Theta", "VAR", "VECM"]):
330            try:
331                start = time.time()
332                regr = ClassicalMTS(model=name)
333                regr.fit(X_train, **kwargs)
334                self.models_[name] = regr
335                if self.h is None:
336                    X_pred = regr.predict(h=X_test.shape[0], **kwargs)
337                else:
338                    assert self.h > 0, "h must be > 0"
339                    X_pred = regr.predict(h=self.h, **kwargs)
340                    try:
341                        X_test = X_test[0: self.h, :]
342                    except Exception as e:
343                        X_test = X_test.iloc[0: self.h, :]
344
345                rmse = mean_errors(
346                    actual=X_test,
347                    pred=X_pred,
348                    scoring="root_mean_squared_error",
349                    per_series=per_series,
350                )
351                mae = mean_errors(
352                    actual=X_test,
353                    pred=X_pred,
354                    scoring="mean_absolute_error",
355                    per_series=per_series,
356                )
357                mpl = mean_errors(
358                    actual=X_test,
359                    pred=X_pred,
360                    scoring="mean_pinball_loss",
361                    per_series=per_series,
362                )
363            except Exception:
364                continue
365
366            names.append(name)
367            RMSE.append(rmse)
368            MAE.append(mae)
369            MPL.append(mpl)
370
371            if self.custom_metric is not None:
372                try:
373                    if self.h is None:
374                        custom_metric = self.custom_metric(X_test, X_pred)
375                    else:
376                        custom_metric = self.custom_metric(X_test_h, X_pred)
377                    CUSTOM_METRIC.append(custom_metric)
378                except Exception as e:
379                    custom_metric = np.iinfo(np.float32).max
380                    CUSTOM_METRIC.append(np.iinfo(np.float32).max)
381
382            if (self.replications is not None) or (self.type_pi == "gaussian"):
383                if per_series == False:
384                    winklerscore = winkler_score(
385                        obj=X_pred, actual=X_test, level=95
386                    )
387                    coveragecalc = coverage(X_pred, X_test, level=95)
388                else:
389                    winklerscore = winkler_score(
390                        obj=X_pred, actual=X_test, level=95, per_series=True
391                    )
392                    coveragecalc = coverage(
393                        X_pred, X_test, level=95, per_series=True
394                    )
395                WINKLERSCORE.append(winklerscore)
396                COVERAGE.append(coveragecalc)
397            TIME.append(time.time() - start)
398
399        if self.estimators == "all":
400            if self.n_layers <= 1:
401                self.regressors = REGRESSORSMTS
402            else:
403                self.regressors = DEEPREGRESSORSMTS
404        else:
405            if self.n_layers <= 1:
406                self.regressors = [
407                    ("MTS(" + est[0] + ")", est[1])
408                    for est in all_estimators()
409                    if (
410                        issubclass(est[1], RegressorMixin)
411                        and (est[0] in self.estimators)
412                    )
413                ]
414            else:  # self.n_layers > 1
415                self.regressors = [
416                    ("DeepMTS(" + est[0] + ")", est[1])
417                    for est in all_estimators()
418                    if (
419                        issubclass(est[1], RegressorMixin)
420                        and (est[0] in self.estimators)
421                    )
422                ]
423
424        if self.preprocess is True:
425            for name, model in tqdm(self.regressors):  # do parallel exec
426                start = time.time()
427                try:
428                    if "random_state" in model().get_params().keys():
429                        pipe = Pipeline(
430                            steps=[
431                                ("preprocessor", preprocessor),
432                                (
433                                    "regressor",
434                                    DeepMTS(
435                                        obj=model(
436                                            random_state=self.random_state,
437                                            **kwargs,
438                                        ),
439                                        n_layers=self.n_layers,
440                                        n_hidden_features=self.n_hidden_features,
441                                        activation_name=self.activation_name,
442                                        a=self.a,
443                                        nodes_sim=self.nodes_sim,
444                                        bias=self.bias,
445                                        dropout=self.dropout,
446                                        direct_link=self.direct_link,
447                                        n_clusters=self.n_clusters,
448                                        cluster_encode=self.cluster_encode,
449                                        type_clust=self.type_clust,
450                                        type_scaling=self.type_scaling,
451                                        lags=self.lags,
452                                        type_pi=self.type_pi,
453                                        block_size=self.block_size,
454                                        replications=self.replications,
455                                        kernel=self.kernel,
456                                        agg=self.agg,
457                                        seed=self.seed,
458                                        backend=self.backend,
459                                        show_progress=self.show_progress,
460                                    ),
461                                ),
462                            ]
463                        )
464                    else:  # "random_state" in model().get_params().keys()
465                        pipe = Pipeline(
466                            steps=[
467                                ("preprocessor", preprocessor),
468                                (
469                                    "regressor",
470                                    DeepMTS(
471                                        obj=model(**kwargs),
472                                        n_layers=self.n_layers,
473                                        n_hidden_features=self.n_hidden_features,
474                                        activation_name=self.activation_name,
475                                        a=self.a,
476                                        nodes_sim=self.nodes_sim,
477                                        bias=self.bias,
478                                        dropout=self.dropout,
479                                        direct_link=self.direct_link,
480                                        n_clusters=self.n_clusters,
481                                        cluster_encode=self.cluster_encode,
482                                        type_clust=self.type_clust,
483                                        type_scaling=self.type_scaling,
484                                        lags=self.lags,
485                                        type_pi=self.type_pi,
486                                        block_size=self.block_size,
487                                        replications=self.replications,
488                                        kernel=self.kernel,
489                                        agg=self.agg,
490                                        seed=self.seed,
491                                        backend=self.backend,
492                                        show_progress=self.show_progress,
493                                    ),
494                                ),
495                            ]
496                        )
497
498                    pipe.fit(X_train, **kwargs)
499                    # pipe.fit(X_train, xreg=xreg)
500
501                    self.models_[name] = pipe
502
503                    if self.h is None:
504                        X_pred = pipe["regressor"].predict(h=self.h, **kwargs)
505                    else:
506                        assert self.h > 0, "h must be > 0"
507                        X_pred = pipe["regressor"].predict(h=self.h, **kwargs)
508
509                    if (self.replications is not None) or (
510                        self.type_pi == "gaussian"
511                    ):
512                        rmse = mean_errors(
513                            actual=X_test,
514                            pred=X_pred,
515                            scoring="root_mean_squared_error",
516                            per_series=per_series,
517                        )
518                        mae = mean_errors(
519                            actual=X_test,
520                            pred=X_pred,
521                            scoring="mean_absolute_error",
522                            per_series=per_series,
523                        )
524                        mpl = mean_errors(
525                            actual=X_test,
526                            pred=X_pred,
527                            scoring="mean_pinball_loss",
528                            per_series=per_series,
529                        )
530                        winklerscore = winkler_score(
531                            obj=X_pred,
532                            actual=X_test,
533                            level=95,
534                            per_series=per_series,
535                        )
536                        coveragecalc = coverage(
537                            X_pred, X_test, level=95, per_series=per_series
538                        )
539                    else:
540                        rmse = mean_errors(
541                            actual=X_test,
542                            pred=X_pred,
543                            scoring="root_mean_squared_error",
544                            per_series=per_series,
545                        )
546                        mae = mean_errors(
547                            actual=X_test,
548                            pred=X_pred,
549                            scoring="mean_absolute_error",
550                            per_series=per_series,
551                        )
552                        mpl = mean_errors(
553                            actual=X_test,
554                            pred=X_pred,
555                            scoring="mean_pinball_loss",
556                            per_series=per_series,
557                        )
558
559                    names.append(name)
560                    RMSE.append(rmse)
561                    MAE.append(mae)
562                    MPL.append(mpl)
563
564                    if (self.replications is not None) or (
565                        self.type_pi == "gaussian"
566                    ):
567                        WINKLERSCORE.append(winklerscore)
568                        COVERAGE.append(coveragecalc)
569                    TIME.append(time.time() - start)
570
571                    if self.custom_metric is not None:
572                        try:
573                            custom_metric = self.custom_metric(X_test, X_pred)
574                            CUSTOM_METRIC.append(custom_metric)
575                        except Exception as e:
576                            custom_metric = np.iinfo(np.float32).max
577                            CUSTOM_METRIC.append(custom_metric)
578
579                    if self.verbose > 0:
580                        if (self.replications is not None) or (
581                            self.type_pi == "gaussian"
582                        ):
583                            scores_verbose = {
584                                "Model": name,
585                                "RMSE": rmse,
586                                "MAE": mae,
587                                "MPL": mpl,
588                                "WINKLERSCORE": winklerscore,
589                                "COVERAGE": coveragecalc,
590                                "Time taken": time.time() - start,
591                            }
592                        else:
593                            scores_verbose = {
594                                "Model": name,
595                                "RMSE": rmse,
596                                "MAE": mae,
597                                "MPL": mpl,
598                                "Time taken": time.time() - start,
599                            }
600
601                        if self.custom_metric is not None:
602                            scores_verbose["Custom metric"] = custom_metric
603
604                    if self.predictions:
605                        predictions[name] = X_pred
606                except Exception as exception:
607                    if self.ignore_warnings is False:
608                        print(name + " model failed to execute")
609                        print(exception)
610
611        else:  # no preprocessing
612            for name, model in tqdm(self.regressors):  # do parallel exec
613                start = time.time()
614                try:
615                    if "random_state" in model().get_params().keys():
616                        pipe = DeepMTS(
617                            obj=model(random_state=self.random_state, **kwargs),
618                            n_layers=self.n_layers,
619                            n_hidden_features=self.n_hidden_features,
620                            activation_name=self.activation_name,
621                            a=self.a,
622                            nodes_sim=self.nodes_sim,
623                            bias=self.bias,
624                            dropout=self.dropout,
625                            direct_link=self.direct_link,
626                            n_clusters=self.n_clusters,
627                            cluster_encode=self.cluster_encode,
628                            type_clust=self.type_clust,
629                            type_scaling=self.type_scaling,
630                            lags=self.lags,
631                            type_pi=self.type_pi,
632                            block_size=self.block_size,
633                            replications=self.replications,
634                            kernel=self.kernel,
635                            agg=self.agg,
636                            seed=self.seed,
637                            backend=self.backend,
638                            show_progress=self.show_progress,
639                        )
640                    else:
641                        pipe = DeepMTS(
642                            obj=model(**kwargs),
643                            n_layers=self.n_layers,
644                            n_hidden_features=self.n_hidden_features,
645                            activation_name=self.activation_name,
646                            a=self.a,
647                            nodes_sim=self.nodes_sim,
648                            bias=self.bias,
649                            dropout=self.dropout,
650                            direct_link=self.direct_link,
651                            n_clusters=self.n_clusters,
652                            cluster_encode=self.cluster_encode,
653                            type_clust=self.type_clust,
654                            type_scaling=self.type_scaling,
655                            lags=self.lags,
656                            type_pi=self.type_pi,
657                            block_size=self.block_size,
658                            replications=self.replications,
659                            kernel=self.kernel,
660                            agg=self.agg,
661                            seed=self.seed,
662                            backend=self.backend,
663                            show_progress=self.show_progress,
664                        )
665
666                    pipe.fit(X_train, xreg, **kwargs)
667                    # pipe.fit(X_train, xreg=xreg) # DO xreg like in `ahead`
668
669                    self.models_[name] = pipe
670
671                    if self.preprocess is True:
672                        if self.h is None:
673                            X_pred = pipe["regressor"].predict(
674                                h=X_test.shape[0], **kwargs
675                            )
676                        else:
677                            assert (
678                                self.h > 0 and self.h <= X_test.shape[0]
679                            ), "h must be > 0 and < X_test.shape[0]"
680                            X_pred = pipe["regressor"].predict(
681                                h=self.h, **kwargs
682                            )
683
684                    else:
685                        if self.h is None:
686                            X_pred = pipe.predict(
687                                h=X_test.shape[0],
688                                **kwargs,
689                                # X_pred = pipe.predict(h=X_test.shape[0], new_xreg=new_xreg) ## DO xreg like in `ahead`
690                            )
691                        else:
692                            assert (
693                                self.h > 0 and self.h <= X_test.shape[0]
694                            ), "h must be > 0 and < X_test.shape[0]"
695                            X_pred = pipe.predict(h=self.h, **kwargs)
696
697                    if self.h is None:
698                        if (self.replications is not None) or (
699                            self.type_pi == "gaussian"
700                        ):
701                            rmse = mean_errors(
702                                actual=X_test,
703                                pred=X_pred.mean,
704                                scoring="root_mean_squared_error",
705                                per_series=per_series,
706                            )
707                            mae = mean_errors(
708                                actual=X_test,
709                                pred=X_pred.mean,
710                                scoring="mean_absolute_error",
711                                per_series=per_series,
712                            )
713                            mpl = mean_errors(
714                                actual=X_test,
715                                pred=X_pred.mean,
716                                scoring="mean_pinball_loss",
717                                per_series=per_series,
718                            )
719                            winklerscore = winkler_score(
720                                obj=X_pred,
721                                actual=X_test,
722                                level=95,
723                                per_series=per_series,
724                            )
725                            coveragecalc = coverage(
726                                X_pred, X_test, level=95, per_series=per_series
727                            )
728                        else:  # no prediction interval
729                            rmse = mean_errors(
730                                actual=X_test,
731                                pred=X_pred,
732                                scoring="root_mean_squared_error",
733                                per_series=per_series,
734                            )
735                            mae = mean_errors(
736                                actual=X_test,
737                                pred=X_pred,
738                                scoring="mean_absolute_error",
739                                per_series=per_series,
740                            )
741                            mpl = mean_errors(
742                                actual=X_test,
743                                pred=X_pred,
744                                scoring="mean_pinball_loss",
745                                per_series=per_series,
746                            )
747                    else:  # self.h is not None
748                        if (self.replications is not None) or (
749                            self.type_pi == "gaussian"
750                        ):
751                            if isinstance(X_test, pd.DataFrame):
752                                X_test_h = X_test.iloc[0: self.h, :]
753                                rmse = mean_errors(
754                                    actual=X_test_h,
755                                    pred=X_pred,
756                                    scoring="root_mean_squared_error",
757                                    per_series=per_series,
758                                )
759                                mae = mean_errors(
760                                    actual=X_test_h,
761                                    pred=X_pred,
762                                    scoring="mean_absolute_error",
763                                    per_series=per_series,
764                                )
765                                mpl = mean_errors(
766                                    actual=X_test_h,
767                                    pred=X_pred,
768                                    scoring="mean_pinball_loss",
769                                    per_series=per_series,
770                                )
771                                winklerscore = winkler_score(
772                                    obj=X_pred,
773                                    actual=X_test_h,
774                                    level=95,
775                                    per_series=per_series,
776                                )
777                                coveragecalc = coverage(
778                                    X_pred,
779                                    X_test_h,
780                                    level=95,
781                                    per_series=per_series,
782                                )
783                            else:
784                                X_test_h = X_test[0: self.h, :]
785                                rmse = mean_errors(
786                                    actual=X_test_h,
787                                    pred=X_pred,
788                                    scoring="root_mean_squared_error",
789                                    per_series=per_series,
790                                )
791                                mae = mean_errors(
792                                    actual=X_test_h,
793                                    pred=X_pred,
794                                    scoring="mean_absolute_error",
795                                    per_series=per_series,
796                                )
797                                mpl = mean_errors(
798                                    actual=X_test_h,
799                                    pred=X_pred,
800                                    scoring="mean_pinball_loss",
801                                    per_series=per_series,
802                                )
803                                winklerscore = winkler_score(
804                                    obj=X_pred,
805                                    actual=X_test_h,
806                                    level=95,
807                                    per_series=per_series,
808                                )
809                                coveragecalc = coverage(
810                                    X_pred,
811                                    X_test_h,
812                                    level=95,
813                                    per_series=per_series,
814                                )
815                        else:  # no prediction interval
816                            if isinstance(X_test, pd.DataFrame):
817                                X_test_h = X_test.iloc[0: self.h, :]
818                                rmse = mean_errors(
819                                    actual=X_test_h,
820                                    pred=X_pred,
821                                    scoring="root_mean_squared_error",
822                                    per_series=per_series,
823                                )
824                                mae = mean_errors(
825                                    actual=X_test_h,
826                                    pred=X_pred,
827                                    scoring="mean_absolute_error",
828                                    per_series=per_series,
829                                )
830                                mpl = mean_errors(
831                                    actual=X_test_h,
832                                    pred=X_pred,
833                                    scoring="mean_pinball_loss",
834                                    per_series=per_series,
835                                )
836                            else:
837                                X_test_h = X_test[0: self.h, :]
838                                rmse = mean_errors(
839                                    actual=X_test_h,
840                                    pred=X_pred,
841                                    scoring="root_mean_squared_error",
842                                    per_series=per_series,
843                                )
844                                mae = mean_errors(
845                                    actual=X_test_h,
846                                    pred=X_pred,
847                                    scoring="mean_absolute_error",
848                                    per_series=per_series,
849                                )
850
851                    names.append(name)
852                    RMSE.append(rmse)
853                    MAE.append(mae)
854                    MPL.append(mpl)
855                    if (self.replications is not None) or (
856                        self.type_pi == "gaussian"
857                    ):
858                        WINKLERSCORE.append(winklerscore)
859                        COVERAGE.append(coveragecalc)
860                    TIME.append(time.time() - start)
861
862                    if self.custom_metric is not None:
863                        try:
864                            if self.h is None:
865                                custom_metric = self.custom_metric(
866                                    X_test, X_pred
867                                )
868                            else:
869                                custom_metric = self.custom_metric(
870                                    X_test_h, X_pred
871                                )
872                            CUSTOM_METRIC.append(custom_metric)
873                        except Exception as e:
874                            custom_metric = np.iinfo(np.float32).max
875                            CUSTOM_METRIC.append(np.iinfo(np.float32).max)
876
877                    if self.verbose > 0:
878                        if (self.replications is not None) or (
879                            self.type_pi == "gaussian"
880                        ):
881                            scores_verbose = {
882                                "Model": name,
883                                "RMSE": rmse,
884                                "MAE": mae,
885                                "MPL": mpl,
886                                "WINKLERSCORE": winklerscore,
887                                "COVERAGE": coveragecalc,
888                                "Time taken": time.time() - start,
889                            }
890                        else:
891                            scores_verbose = {
892                                "Model": name,
893                                "RMSE": rmse,
894                                "MAE": mae,
895                                "MPL": mpl,
896                                "Time taken": time.time() - start,
897                            }
898
899                        if self.custom_metric is not None:
900                            scores_verbose["Custom metric"] = custom_metric
901
902                    if self.predictions:
903                        predictions[name] = X_pred
904
905                except Exception as exception:
906                    if self.ignore_warnings is False:
907                        print(name + " model failed to execute")
908                        print(exception)
909
910        if (self.replications is not None) or (self.type_pi == "gaussian"):
911            scores = {
912                "Model": names,
913                "RMSE": RMSE,
914                "MAE": MAE,
915                "MPL": MPL,
916                "WINKLERSCORE": WINKLERSCORE,
917                "COVERAGE": COVERAGE,
918                "Time Taken": TIME,
919            }
920        else:
921            scores = {
922                "Model": names,
923                "RMSE": RMSE,
924                "MAE": MAE,
925                "MPL": MPL,
926                "Time Taken": TIME,
927            }
928
929        if self.custom_metric is not None:
930            scores["Custom metric"] = CUSTOM_METRIC
931
932        if per_series:
933            scores = dict_to_dataframe_series(scores, self.series_names)
934        else:
935            scores = pd.DataFrame(scores)
936
937        try:  # case per_series, can't be sorted
938            scores = scores.sort_values(
939                by=self.sort_by, ascending=True
940            ).set_index("Model")
941
942            self.best_model_ = self.models_[scores.index[0]]
943        except Exception as e:
944            pass
945
946        if self.predictions is True:
947            return scores, predictions
948
949        return scores
950
951    def get_best_model(self):
952        """
953        This function returns the best model pipeline based on the sort_by metric.
954
955        Returns:
956
957            best_model: object,
958                Returns the best model pipeline based on the sort_by metric.
959
960        """
961        return self.best_model_
962
963    def provide_models(self, X_train, X_test):
964        """
965        This function returns all the model objects trained in fit function.
966        If fit is not called already, then we call fit and then return the models.
967
968        Parameters:
969
970            X_train : array-like,
971                Training vectors, where rows is the number of samples
972                and columns is the number of features.
973
974            X_test : array-like,
975                Testing vectors, where rows is the number of samples
976                and columns is the number of features.
977
978        Returns:
979
980            models: dict-object,
981                Returns a dictionary with each model pipeline as value
982                with key as name of models.
983
984        """
985        if self.h is None:
986            if len(self.models_.keys()) == 0:
987                self.fit(X_train, X_test)
988        else:
989            if len(self.models_.keys()) == 0:
990                if isinstance(X_test, pd.DataFrame):
991                    self.fit(X_train, X_test.iloc[0: self.h, :])
992                else:
993                    self.fit(X_train, X_test[0: self.h, :])
994
995        return self.models_

Fitting -- almost -- all the regression algorithms with layers of nnetsauce's CustomRegressor to multivariate time series and returning their scores.

Parameters:

verbose: int, optional (default=0)
    Any positive number for verbosity.

ignore_warnings: bool, optional (default=True)
    When set to True, the warning related to algorigms that are not
    able to run are ignored.

custom_metric: function, optional (default=None)
    When function is provided, models are evaluated based on the custom
      evaluation metric provided.

predictions: bool, optional (default=False)
    When set to True, the predictions of all the models models are returned as dataframe.

sort_by: string, optional (default='RMSE')
    Sort models by a metric. Available options are 'RMSE', 'MAE', 'MPL', 'MPE', 'MAPE',
    'R-Squared', 'Adjusted R-Squared' or a custom metric identified by its name and
    provided by custom_metric.

random_state: int, optional (default=42)
    Reproducibiility seed.

estimators: list, optional (default='all')
    list of Estimators (regression algorithms) names or just 'all' (default='all')

preprocess: bool, preprocessing is done when set to True

n_layers: int, optional (default=1)
    Number of layers in the network. When set to 1, the model is equivalent to a MTS.

h: int, optional (default=None)
    Number of steps ahead to predict (when used, must be > 0 and < X_test.shape[0]).

All the other parameters are the same as MTS's.

Attributes:

models_: dict-object
    Returns a dictionary with each model pipeline as value
    with key as name of models.

best_model_: object
    Returns the best model pipeline based on the sort_by metric.

Examples:

See https://thierrymoudiki.github.io/blog/2023/10/29/python/quasirandomizednn/MTS-LazyPredict
def fit(self, X_train, X_test, xreg=None, per_series=False, **kwargs):
241    def fit(self, X_train, X_test, xreg=None, per_series=False, **kwargs):
242        """Fit Regression algorithms to X_train, predict and score on X_test.
243
244        Parameters:
245
246            X_train: array-like or data frame,
247                Training vectors, where rows is the number of samples
248                and columns is the number of features.
249
250            X_test: array-like or data frame,
251                Testing vectors, where rows is the number of samples
252                and columns is the number of features.
253
254            xreg: array-like, optional (default=None)
255                Additional (external) regressors to be passed to self.obj
256                xreg must be in 'increasing' order (most recent observations last)
257
258            per_series: bool, optional (default=False)
259                When set to True, the metrics are computed series by series.
260
261            **kwargs: dict, optional (default=None)
262                Additional parameters to be passed to `fit` method of `obj`.
263
264        Returns:
265
266            scores: Pandas DataFrame
267                Returns metrics of all the models in a Pandas DataFrame.
268
269            predictions: Pandas DataFrame
270                Returns predictions of all the models in a Pandas DataFrame.
271
272        """
273        R2 = []
274        ADJR2 = []
275        ME = []
276        MPL = []
277        RMSE = []
278        MAE = []
279        MPE = []
280        MAPE = []
281        WINKLERSCORE = []
282        COVERAGE = []
283
284        # WIN = []
285        names = []
286        TIME = []
287        predictions = {}
288
289        if self.custom_metric is not None:
290            CUSTOM_METRIC = []
291
292        if self.h is None:
293            assert X_test is not None, "If h is None, X_test must be provided."
294
295        if isinstance(X_train, np.ndarray):
296            X_train = pd.DataFrame(X_train)
297            X_test = pd.DataFrame(X_test)
298
299        self.series_names = X_train.columns.tolist()
300
301        X_train = convert_df_to_numeric(X_train)
302        X_test = convert_df_to_numeric(X_test)
303
304        numeric_features = X_train.select_dtypes(include=[np.number]).columns
305        categorical_features = X_train.select_dtypes(include=["object"]).columns
306
307        categorical_low, categorical_high = get_card_split(
308            X_train, categorical_features
309        )
310
311        if self.preprocess:
312            preprocessor = ColumnTransformer(
313                transformers=[
314                    ("numeric", numeric_transformer, numeric_features),
315                    (
316                        "categorical_low",
317                        categorical_transformer_low,
318                        categorical_low,
319                    ),
320                    (
321                        "categorical_high",
322                        categorical_transformer_high,
323                        categorical_high,
324                    ),
325                ]
326            )
327
328        # baselines (Classical MTS) ----
329        for i, name in enumerate(["ARIMA", "ETS", "Theta", "VAR", "VECM"]):
330            try:
331                start = time.time()
332                regr = ClassicalMTS(model=name)
333                regr.fit(X_train, **kwargs)
334                self.models_[name] = regr
335                if self.h is None:
336                    X_pred = regr.predict(h=X_test.shape[0], **kwargs)
337                else:
338                    assert self.h > 0, "h must be > 0"
339                    X_pred = regr.predict(h=self.h, **kwargs)
340                    try:
341                        X_test = X_test[0: self.h, :]
342                    except Exception as e:
343                        X_test = X_test.iloc[0: self.h, :]
344
345                rmse = mean_errors(
346                    actual=X_test,
347                    pred=X_pred,
348                    scoring="root_mean_squared_error",
349                    per_series=per_series,
350                )
351                mae = mean_errors(
352                    actual=X_test,
353                    pred=X_pred,
354                    scoring="mean_absolute_error",
355                    per_series=per_series,
356                )
357                mpl = mean_errors(
358                    actual=X_test,
359                    pred=X_pred,
360                    scoring="mean_pinball_loss",
361                    per_series=per_series,
362                )
363            except Exception:
364                continue
365
366            names.append(name)
367            RMSE.append(rmse)
368            MAE.append(mae)
369            MPL.append(mpl)
370
371            if self.custom_metric is not None:
372                try:
373                    if self.h is None:
374                        custom_metric = self.custom_metric(X_test, X_pred)
375                    else:
376                        custom_metric = self.custom_metric(X_test_h, X_pred)
377                    CUSTOM_METRIC.append(custom_metric)
378                except Exception as e:
379                    custom_metric = np.iinfo(np.float32).max
380                    CUSTOM_METRIC.append(np.iinfo(np.float32).max)
381
382            if (self.replications is not None) or (self.type_pi == "gaussian"):
383                if per_series == False:
384                    winklerscore = winkler_score(
385                        obj=X_pred, actual=X_test, level=95
386                    )
387                    coveragecalc = coverage(X_pred, X_test, level=95)
388                else:
389                    winklerscore = winkler_score(
390                        obj=X_pred, actual=X_test, level=95, per_series=True
391                    )
392                    coveragecalc = coverage(
393                        X_pred, X_test, level=95, per_series=True
394                    )
395                WINKLERSCORE.append(winklerscore)
396                COVERAGE.append(coveragecalc)
397            TIME.append(time.time() - start)
398
399        if self.estimators == "all":
400            if self.n_layers <= 1:
401                self.regressors = REGRESSORSMTS
402            else:
403                self.regressors = DEEPREGRESSORSMTS
404        else:
405            if self.n_layers <= 1:
406                self.regressors = [
407                    ("MTS(" + est[0] + ")", est[1])
408                    for est in all_estimators()
409                    if (
410                        issubclass(est[1], RegressorMixin)
411                        and (est[0] in self.estimators)
412                    )
413                ]
414            else:  # self.n_layers > 1
415                self.regressors = [
416                    ("DeepMTS(" + est[0] + ")", est[1])
417                    for est in all_estimators()
418                    if (
419                        issubclass(est[1], RegressorMixin)
420                        and (est[0] in self.estimators)
421                    )
422                ]
423
424        if self.preprocess is True:
425            for name, model in tqdm(self.regressors):  # do parallel exec
426                start = time.time()
427                try:
428                    if "random_state" in model().get_params().keys():
429                        pipe = Pipeline(
430                            steps=[
431                                ("preprocessor", preprocessor),
432                                (
433                                    "regressor",
434                                    DeepMTS(
435                                        obj=model(
436                                            random_state=self.random_state,
437                                            **kwargs,
438                                        ),
439                                        n_layers=self.n_layers,
440                                        n_hidden_features=self.n_hidden_features,
441                                        activation_name=self.activation_name,
442                                        a=self.a,
443                                        nodes_sim=self.nodes_sim,
444                                        bias=self.bias,
445                                        dropout=self.dropout,
446                                        direct_link=self.direct_link,
447                                        n_clusters=self.n_clusters,
448                                        cluster_encode=self.cluster_encode,
449                                        type_clust=self.type_clust,
450                                        type_scaling=self.type_scaling,
451                                        lags=self.lags,
452                                        type_pi=self.type_pi,
453                                        block_size=self.block_size,
454                                        replications=self.replications,
455                                        kernel=self.kernel,
456                                        agg=self.agg,
457                                        seed=self.seed,
458                                        backend=self.backend,
459                                        show_progress=self.show_progress,
460                                    ),
461                                ),
462                            ]
463                        )
464                    else:  # "random_state" in model().get_params().keys()
465                        pipe = Pipeline(
466                            steps=[
467                                ("preprocessor", preprocessor),
468                                (
469                                    "regressor",
470                                    DeepMTS(
471                                        obj=model(**kwargs),
472                                        n_layers=self.n_layers,
473                                        n_hidden_features=self.n_hidden_features,
474                                        activation_name=self.activation_name,
475                                        a=self.a,
476                                        nodes_sim=self.nodes_sim,
477                                        bias=self.bias,
478                                        dropout=self.dropout,
479                                        direct_link=self.direct_link,
480                                        n_clusters=self.n_clusters,
481                                        cluster_encode=self.cluster_encode,
482                                        type_clust=self.type_clust,
483                                        type_scaling=self.type_scaling,
484                                        lags=self.lags,
485                                        type_pi=self.type_pi,
486                                        block_size=self.block_size,
487                                        replications=self.replications,
488                                        kernel=self.kernel,
489                                        agg=self.agg,
490                                        seed=self.seed,
491                                        backend=self.backend,
492                                        show_progress=self.show_progress,
493                                    ),
494                                ),
495                            ]
496                        )
497
498                    pipe.fit(X_train, **kwargs)
499                    # pipe.fit(X_train, xreg=xreg)
500
501                    self.models_[name] = pipe
502
503                    if self.h is None:
504                        X_pred = pipe["regressor"].predict(h=self.h, **kwargs)
505                    else:
506                        assert self.h > 0, "h must be > 0"
507                        X_pred = pipe["regressor"].predict(h=self.h, **kwargs)
508
509                    if (self.replications is not None) or (
510                        self.type_pi == "gaussian"
511                    ):
512                        rmse = mean_errors(
513                            actual=X_test,
514                            pred=X_pred,
515                            scoring="root_mean_squared_error",
516                            per_series=per_series,
517                        )
518                        mae = mean_errors(
519                            actual=X_test,
520                            pred=X_pred,
521                            scoring="mean_absolute_error",
522                            per_series=per_series,
523                        )
524                        mpl = mean_errors(
525                            actual=X_test,
526                            pred=X_pred,
527                            scoring="mean_pinball_loss",
528                            per_series=per_series,
529                        )
530                        winklerscore = winkler_score(
531                            obj=X_pred,
532                            actual=X_test,
533                            level=95,
534                            per_series=per_series,
535                        )
536                        coveragecalc = coverage(
537                            X_pred, X_test, level=95, per_series=per_series
538                        )
539                    else:
540                        rmse = mean_errors(
541                            actual=X_test,
542                            pred=X_pred,
543                            scoring="root_mean_squared_error",
544                            per_series=per_series,
545                        )
546                        mae = mean_errors(
547                            actual=X_test,
548                            pred=X_pred,
549                            scoring="mean_absolute_error",
550                            per_series=per_series,
551                        )
552                        mpl = mean_errors(
553                            actual=X_test,
554                            pred=X_pred,
555                            scoring="mean_pinball_loss",
556                            per_series=per_series,
557                        )
558
559                    names.append(name)
560                    RMSE.append(rmse)
561                    MAE.append(mae)
562                    MPL.append(mpl)
563
564                    if (self.replications is not None) or (
565                        self.type_pi == "gaussian"
566                    ):
567                        WINKLERSCORE.append(winklerscore)
568                        COVERAGE.append(coveragecalc)
569                    TIME.append(time.time() - start)
570
571                    if self.custom_metric is not None:
572                        try:
573                            custom_metric = self.custom_metric(X_test, X_pred)
574                            CUSTOM_METRIC.append(custom_metric)
575                        except Exception as e:
576                            custom_metric = np.iinfo(np.float32).max
577                            CUSTOM_METRIC.append(custom_metric)
578
579                    if self.verbose > 0:
580                        if (self.replications is not None) or (
581                            self.type_pi == "gaussian"
582                        ):
583                            scores_verbose = {
584                                "Model": name,
585                                "RMSE": rmse,
586                                "MAE": mae,
587                                "MPL": mpl,
588                                "WINKLERSCORE": winklerscore,
589                                "COVERAGE": coveragecalc,
590                                "Time taken": time.time() - start,
591                            }
592                        else:
593                            scores_verbose = {
594                                "Model": name,
595                                "RMSE": rmse,
596                                "MAE": mae,
597                                "MPL": mpl,
598                                "Time taken": time.time() - start,
599                            }
600
601                        if self.custom_metric is not None:
602                            scores_verbose["Custom metric"] = custom_metric
603
604                    if self.predictions:
605                        predictions[name] = X_pred
606                except Exception as exception:
607                    if self.ignore_warnings is False:
608                        print(name + " model failed to execute")
609                        print(exception)
610
611        else:  # no preprocessing
612            for name, model in tqdm(self.regressors):  # do parallel exec
613                start = time.time()
614                try:
615                    if "random_state" in model().get_params().keys():
616                        pipe = DeepMTS(
617                            obj=model(random_state=self.random_state, **kwargs),
618                            n_layers=self.n_layers,
619                            n_hidden_features=self.n_hidden_features,
620                            activation_name=self.activation_name,
621                            a=self.a,
622                            nodes_sim=self.nodes_sim,
623                            bias=self.bias,
624                            dropout=self.dropout,
625                            direct_link=self.direct_link,
626                            n_clusters=self.n_clusters,
627                            cluster_encode=self.cluster_encode,
628                            type_clust=self.type_clust,
629                            type_scaling=self.type_scaling,
630                            lags=self.lags,
631                            type_pi=self.type_pi,
632                            block_size=self.block_size,
633                            replications=self.replications,
634                            kernel=self.kernel,
635                            agg=self.agg,
636                            seed=self.seed,
637                            backend=self.backend,
638                            show_progress=self.show_progress,
639                        )
640                    else:
641                        pipe = DeepMTS(
642                            obj=model(**kwargs),
643                            n_layers=self.n_layers,
644                            n_hidden_features=self.n_hidden_features,
645                            activation_name=self.activation_name,
646                            a=self.a,
647                            nodes_sim=self.nodes_sim,
648                            bias=self.bias,
649                            dropout=self.dropout,
650                            direct_link=self.direct_link,
651                            n_clusters=self.n_clusters,
652                            cluster_encode=self.cluster_encode,
653                            type_clust=self.type_clust,
654                            type_scaling=self.type_scaling,
655                            lags=self.lags,
656                            type_pi=self.type_pi,
657                            block_size=self.block_size,
658                            replications=self.replications,
659                            kernel=self.kernel,
660                            agg=self.agg,
661                            seed=self.seed,
662                            backend=self.backend,
663                            show_progress=self.show_progress,
664                        )
665
666                    pipe.fit(X_train, xreg, **kwargs)
667                    # pipe.fit(X_train, xreg=xreg) # DO xreg like in `ahead`
668
669                    self.models_[name] = pipe
670
671                    if self.preprocess is True:
672                        if self.h is None:
673                            X_pred = pipe["regressor"].predict(
674                                h=X_test.shape[0], **kwargs
675                            )
676                        else:
677                            assert (
678                                self.h > 0 and self.h <= X_test.shape[0]
679                            ), "h must be > 0 and < X_test.shape[0]"
680                            X_pred = pipe["regressor"].predict(
681                                h=self.h, **kwargs
682                            )
683
684                    else:
685                        if self.h is None:
686                            X_pred = pipe.predict(
687                                h=X_test.shape[0],
688                                **kwargs,
689                                # X_pred = pipe.predict(h=X_test.shape[0], new_xreg=new_xreg) ## DO xreg like in `ahead`
690                            )
691                        else:
692                            assert (
693                                self.h > 0 and self.h <= X_test.shape[0]
694                            ), "h must be > 0 and < X_test.shape[0]"
695                            X_pred = pipe.predict(h=self.h, **kwargs)
696
697                    if self.h is None:
698                        if (self.replications is not None) or (
699                            self.type_pi == "gaussian"
700                        ):
701                            rmse = mean_errors(
702                                actual=X_test,
703                                pred=X_pred.mean,
704                                scoring="root_mean_squared_error",
705                                per_series=per_series,
706                            )
707                            mae = mean_errors(
708                                actual=X_test,
709                                pred=X_pred.mean,
710                                scoring="mean_absolute_error",
711                                per_series=per_series,
712                            )
713                            mpl = mean_errors(
714                                actual=X_test,
715                                pred=X_pred.mean,
716                                scoring="mean_pinball_loss",
717                                per_series=per_series,
718                            )
719                            winklerscore = winkler_score(
720                                obj=X_pred,
721                                actual=X_test,
722                                level=95,
723                                per_series=per_series,
724                            )
725                            coveragecalc = coverage(
726                                X_pred, X_test, level=95, per_series=per_series
727                            )
728                        else:  # no prediction interval
729                            rmse = mean_errors(
730                                actual=X_test,
731                                pred=X_pred,
732                                scoring="root_mean_squared_error",
733                                per_series=per_series,
734                            )
735                            mae = mean_errors(
736                                actual=X_test,
737                                pred=X_pred,
738                                scoring="mean_absolute_error",
739                                per_series=per_series,
740                            )
741                            mpl = mean_errors(
742                                actual=X_test,
743                                pred=X_pred,
744                                scoring="mean_pinball_loss",
745                                per_series=per_series,
746                            )
747                    else:  # self.h is not None
748                        if (self.replications is not None) or (
749                            self.type_pi == "gaussian"
750                        ):
751                            if isinstance(X_test, pd.DataFrame):
752                                X_test_h = X_test.iloc[0: self.h, :]
753                                rmse = mean_errors(
754                                    actual=X_test_h,
755                                    pred=X_pred,
756                                    scoring="root_mean_squared_error",
757                                    per_series=per_series,
758                                )
759                                mae = mean_errors(
760                                    actual=X_test_h,
761                                    pred=X_pred,
762                                    scoring="mean_absolute_error",
763                                    per_series=per_series,
764                                )
765                                mpl = mean_errors(
766                                    actual=X_test_h,
767                                    pred=X_pred,
768                                    scoring="mean_pinball_loss",
769                                    per_series=per_series,
770                                )
771                                winklerscore = winkler_score(
772                                    obj=X_pred,
773                                    actual=X_test_h,
774                                    level=95,
775                                    per_series=per_series,
776                                )
777                                coveragecalc = coverage(
778                                    X_pred,
779                                    X_test_h,
780                                    level=95,
781                                    per_series=per_series,
782                                )
783                            else:
784                                X_test_h = X_test[0: self.h, :]
785                                rmse = mean_errors(
786                                    actual=X_test_h,
787                                    pred=X_pred,
788                                    scoring="root_mean_squared_error",
789                                    per_series=per_series,
790                                )
791                                mae = mean_errors(
792                                    actual=X_test_h,
793                                    pred=X_pred,
794                                    scoring="mean_absolute_error",
795                                    per_series=per_series,
796                                )
797                                mpl = mean_errors(
798                                    actual=X_test_h,
799                                    pred=X_pred,
800                                    scoring="mean_pinball_loss",
801                                    per_series=per_series,
802                                )
803                                winklerscore = winkler_score(
804                                    obj=X_pred,
805                                    actual=X_test_h,
806                                    level=95,
807                                    per_series=per_series,
808                                )
809                                coveragecalc = coverage(
810                                    X_pred,
811                                    X_test_h,
812                                    level=95,
813                                    per_series=per_series,
814                                )
815                        else:  # no prediction interval
816                            if isinstance(X_test, pd.DataFrame):
817                                X_test_h = X_test.iloc[0: self.h, :]
818                                rmse = mean_errors(
819                                    actual=X_test_h,
820                                    pred=X_pred,
821                                    scoring="root_mean_squared_error",
822                                    per_series=per_series,
823                                )
824                                mae = mean_errors(
825                                    actual=X_test_h,
826                                    pred=X_pred,
827                                    scoring="mean_absolute_error",
828                                    per_series=per_series,
829                                )
830                                mpl = mean_errors(
831                                    actual=X_test_h,
832                                    pred=X_pred,
833                                    scoring="mean_pinball_loss",
834                                    per_series=per_series,
835                                )
836                            else:
837                                X_test_h = X_test[0: self.h, :]
838                                rmse = mean_errors(
839                                    actual=X_test_h,
840                                    pred=X_pred,
841                                    scoring="root_mean_squared_error",
842                                    per_series=per_series,
843                                )
844                                mae = mean_errors(
845                                    actual=X_test_h,
846                                    pred=X_pred,
847                                    scoring="mean_absolute_error",
848                                    per_series=per_series,
849                                )
850
851                    names.append(name)
852                    RMSE.append(rmse)
853                    MAE.append(mae)
854                    MPL.append(mpl)
855                    if (self.replications is not None) or (
856                        self.type_pi == "gaussian"
857                    ):
858                        WINKLERSCORE.append(winklerscore)
859                        COVERAGE.append(coveragecalc)
860                    TIME.append(time.time() - start)
861
862                    if self.custom_metric is not None:
863                        try:
864                            if self.h is None:
865                                custom_metric = self.custom_metric(
866                                    X_test, X_pred
867                                )
868                            else:
869                                custom_metric = self.custom_metric(
870                                    X_test_h, X_pred
871                                )
872                            CUSTOM_METRIC.append(custom_metric)
873                        except Exception as e:
874                            custom_metric = np.iinfo(np.float32).max
875                            CUSTOM_METRIC.append(np.iinfo(np.float32).max)
876
877                    if self.verbose > 0:
878                        if (self.replications is not None) or (
879                            self.type_pi == "gaussian"
880                        ):
881                            scores_verbose = {
882                                "Model": name,
883                                "RMSE": rmse,
884                                "MAE": mae,
885                                "MPL": mpl,
886                                "WINKLERSCORE": winklerscore,
887                                "COVERAGE": coveragecalc,
888                                "Time taken": time.time() - start,
889                            }
890                        else:
891                            scores_verbose = {
892                                "Model": name,
893                                "RMSE": rmse,
894                                "MAE": mae,
895                                "MPL": mpl,
896                                "Time taken": time.time() - start,
897                            }
898
899                        if self.custom_metric is not None:
900                            scores_verbose["Custom metric"] = custom_metric
901
902                    if self.predictions:
903                        predictions[name] = X_pred
904
905                except Exception as exception:
906                    if self.ignore_warnings is False:
907                        print(name + " model failed to execute")
908                        print(exception)
909
910        if (self.replications is not None) or (self.type_pi == "gaussian"):
911            scores = {
912                "Model": names,
913                "RMSE": RMSE,
914                "MAE": MAE,
915                "MPL": MPL,
916                "WINKLERSCORE": WINKLERSCORE,
917                "COVERAGE": COVERAGE,
918                "Time Taken": TIME,
919            }
920        else:
921            scores = {
922                "Model": names,
923                "RMSE": RMSE,
924                "MAE": MAE,
925                "MPL": MPL,
926                "Time Taken": TIME,
927            }
928
929        if self.custom_metric is not None:
930            scores["Custom metric"] = CUSTOM_METRIC
931
932        if per_series:
933            scores = dict_to_dataframe_series(scores, self.series_names)
934        else:
935            scores = pd.DataFrame(scores)
936
937        try:  # case per_series, can't be sorted
938            scores = scores.sort_values(
939                by=self.sort_by, ascending=True
940            ).set_index("Model")
941
942            self.best_model_ = self.models_[scores.index[0]]
943        except Exception as e:
944            pass
945
946        if self.predictions is True:
947            return scores, predictions
948
949        return scores

Fit Regression algorithms to X_train, predict and score on X_test.

Parameters:

X_train: array-like or data frame,
    Training vectors, where rows is the number of samples
    and columns is the number of features.

X_test: array-like or data frame,
    Testing vectors, where rows is the number of samples
    and columns is the number of features.

xreg: array-like, optional (default=None)
    Additional (external) regressors to be passed to self.obj
    xreg must be in 'increasing' order (most recent observations last)

per_series: bool, optional (default=False)
    When set to True, the metrics are computed series by series.

**kwargs: dict, optional (default=None)
    Additional parameters to be passed to `fit` method of `obj`.

Returns:

scores: Pandas DataFrame
    Returns metrics of all the models in a Pandas DataFrame.

predictions: Pandas DataFrame
    Returns predictions of all the models in a Pandas DataFrame.
def provide_models(self, X_train, X_test):
963    def provide_models(self, X_train, X_test):
964        """
965        This function returns all the model objects trained in fit function.
966        If fit is not called already, then we call fit and then return the models.
967
968        Parameters:
969
970            X_train : array-like,
971                Training vectors, where rows is the number of samples
972                and columns is the number of features.
973
974            X_test : array-like,
975                Testing vectors, where rows is the number of samples
976                and columns is the number of features.
977
978        Returns:
979
980            models: dict-object,
981                Returns a dictionary with each model pipeline as value
982                with key as name of models.
983
984        """
985        if self.h is None:
986            if len(self.models_.keys()) == 0:
987                self.fit(X_train, X_test)
988        else:
989            if len(self.models_.keys()) == 0:
990                if isinstance(X_test, pd.DataFrame):
991                    self.fit(X_train, X_test.iloc[0: self.h, :])
992                else:
993                    self.fit(X_train, X_test[0: self.h, :])
994
995        return self.models_

This function returns all the model objects trained in fit function. If fit is not called already, then we call fit and then return the models.

Parameters:

X_train : array-like,
    Training vectors, where rows is the number of samples
    and columns is the number of features.

X_test : array-like,
    Testing vectors, where rows is the number of samples
    and columns is the number of features.

Returns:

models: dict-object,
    Returns a dictionary with each model pipeline as value
    with key as name of models.
class MLARCH:
 10class MLARCH:
 11    """Machine Learning-agnostic ARCH for nearly-stationary time series (e.g., returns)
 12
 13    Parameters
 14    ----------
 15    model_mean : object
 16        Model for mean component
 17    model_sigma : object
 18        Model for volatility component (sklearn regressor)
 19    model_residuals : object
 20        Model for standardized residuals
 21    lags_vol : int, default=10
 22        Number of lags for squared residuals in volatility model
 23    """
 24
 25    def __init__(self, model_mean, model_sigma, model_residuals, lags_vol=10):
 26        self.model_mean = model_mean
 27        self.model_sigma = model_sigma
 28        self.model_residuals = model_residuals
 29        self.lags_vol = lags_vol
 30
 31    def _create_lags(self, y, lags):
 32        """Create lagged feature matrix"""
 33        n = len(y)
 34        if n <= lags:
 35            raise ValueError(f"Series length {n} must be > lags {lags}")
 36        X = np.zeros((n - lags, lags))
 37        for i in range(lags):
 38            X[:, i] = y[i: (n - lags + i)]
 39        return X
 40
 41    def fit(self, y, **kwargs):
 42        """Fit the MLARCH model
 43
 44        Parameters
 45        ----------
 46        y : array-like
 47            Target time series (should be stationary, e.g., returns)
 48
 49        Returns
 50        -------
 51        self
 52        """
 53        # Format input
 54        if isinstance(y, (pd.Series, pd.DataFrame)):
 55            y = y.values
 56        y = y.ravel()
 57
 58        if len(y) < self.lags_vol + 20:
 59            raise ValueError(f"Need at least {self.lags_vol + 20} observations")
 60
 61        # Step 1: Fit mean model
 62        self.model_mean.fit(y.reshape(-1, 1))
 63        mean_residuals = self.model_mean.residuals_.ravel()
 64
 65        # Step 2: Fit ARCH volatility model on lagged squared residuals
 66        resid_squared = mean_residuals**2
 67        X_vol = self._create_lags(resid_squared, self.lags_vol)
 68        y_vol = np.log(resid_squared[self.lags_vol:] + 1e-8)
 69
 70        self.model_sigma.fit(X_vol, y_vol)
 71
 72        # Get fitted volatility
 73        fitted_log_sigma = self.model_sigma.predict(X_vol)
 74        fitted_sigma = np.exp(fitted_log_sigma)
 75
 76        # Step 3: Compute standardized residuals with proper scaling
 77        standardized_residuals = mean_residuals[self.lags_vol:] / np.sqrt(
 78            fitted_sigma
 79        )
 80
 81        # Enforce zero mean and unit variance
 82        self.z_mean_ = np.mean(standardized_residuals)
 83        self.z_std_ = np.std(standardized_residuals)
 84        standardized_residuals = (
 85            standardized_residuals - self.z_mean_
 86        ) / self.z_std_
 87
 88        # Step 4: Fit residuals model
 89        self.model_residuals.fit(standardized_residuals.reshape(-1, 1))
 90
 91        # Store for prediction
 92        self.last_residuals_squared_ = resid_squared[-self.lags_vol:]
 93
 94        # Store diagnostics
 95        self.fitted_volatility_mean_ = np.mean(np.sqrt(fitted_sigma))
 96        self.fitted_volatility_std_ = np.std(np.sqrt(fitted_sigma))
 97
 98        return self
 99
100    def predict(self, h=5, level=95, return_sims=False):
101        """Predict future values
102
103        Parameters
104        ----------
105        h : int
106            Forecast horizon
107        level : int
108            Confidence level for prediction intervals
109        return_sims : bool
110            If True, return full simulation paths
111
112        Returns
113        -------
114        DescribeResult
115            Named tuple with mean, sims, lower, upper
116        """
117        DescribeResult = namedtuple(
118            "DescribeResult", ("mean", "sims", "lower", "upper")
119        )
120
121        # Get mean forecast
122        mean_forecast = self.model_mean.predict(h=h).values.ravel()
123
124        # Recursive ARCH volatility forecasting
125        sigma_forecast = np.zeros(h)
126        current_lags = self.last_residuals_squared_.copy()
127
128        for i in range(h):
129            X_t = current_lags.reshape(1, -1)
130            log_sigma_t = self.model_sigma.predict(X_t)[0]
131            sigma_forecast[i] = np.exp(log_sigma_t)
132            # Update lags with predicted variance
133            current_lags = np.append(current_lags[1:], sigma_forecast[i])
134
135        # Predict standardized residuals and rescale
136        z_forecast_normalized = self.model_residuals.predict(h=h).values.ravel()
137        z_forecast = z_forecast_normalized * self.z_std_ + self.z_mean_
138
139        # Combine: μ + z × σ
140        point_forecast = mean_forecast + z_forecast * np.sqrt(sigma_forecast)
141
142        # Generate prediction intervals
143        sims = None
144        if return_sims:
145            preds_z_for_sims = self.model_residuals.predict(h=h)
146            if hasattr(preds_z_for_sims, "sims") and isinstance(
147                preds_z_for_sims.sims, pd.DataFrame
148            ):
149                sims_z_normalized = preds_z_for_sims.sims
150                n_sims = sims_z_normalized.shape[1]
151
152                sims = np.zeros((h, n_sims))
153                for sim_idx in range(n_sims):
154                    # Rescale simulations
155                    z_sim = (
156                        sims_z_normalized.iloc[:, sim_idx].values * self.z_std_
157                        + self.z_mean_
158                    )
159                    sims[:, sim_idx] = mean_forecast + z_sim * np.sqrt(
160                        sigma_forecast
161                    )
162
163                alpha = 1 - level / 100
164                lower_bound = np.quantile(sims, alpha / 2, axis=1)
165                upper_bound = np.quantile(sims, 1 - alpha / 2, axis=1)
166            else:
167                # Fallback to Gaussian
168                z_score = norm.ppf(1 - (1 - level / 100) / 2)
169                margin = z_score * np.sqrt(sigma_forecast) * self.z_std_
170                lower_bound = point_forecast - margin
171                upper_bound = point_forecast + margin
172        else:
173            # Gaussian intervals with proper scaling
174            z_score = norm.ppf(1 - (1 - level / 100) / 2)
175            margin = z_score * np.sqrt(sigma_forecast) * self.z_std_
176            lower_bound = point_forecast - margin
177            upper_bound = point_forecast + margin
178
179        return DescribeResult(point_forecast, sims, lower_bound, upper_bound)

Machine Learning-agnostic ARCH for nearly-stationary time series (e.g., returns)

Parameters

model_mean : object Model for mean component model_sigma : object Model for volatility component (sklearn regressor) model_residuals : object Model for standardized residuals lags_vol : int, default=10 Number of lags for squared residuals in volatility model

def fit(self, y, **kwargs):
41    def fit(self, y, **kwargs):
42        """Fit the MLARCH model
43
44        Parameters
45        ----------
46        y : array-like
47            Target time series (should be stationary, e.g., returns)
48
49        Returns
50        -------
51        self
52        """
53        # Format input
54        if isinstance(y, (pd.Series, pd.DataFrame)):
55            y = y.values
56        y = y.ravel()
57
58        if len(y) < self.lags_vol + 20:
59            raise ValueError(f"Need at least {self.lags_vol + 20} observations")
60
61        # Step 1: Fit mean model
62        self.model_mean.fit(y.reshape(-1, 1))
63        mean_residuals = self.model_mean.residuals_.ravel()
64
65        # Step 2: Fit ARCH volatility model on lagged squared residuals
66        resid_squared = mean_residuals**2
67        X_vol = self._create_lags(resid_squared, self.lags_vol)
68        y_vol = np.log(resid_squared[self.lags_vol:] + 1e-8)
69
70        self.model_sigma.fit(X_vol, y_vol)
71
72        # Get fitted volatility
73        fitted_log_sigma = self.model_sigma.predict(X_vol)
74        fitted_sigma = np.exp(fitted_log_sigma)
75
76        # Step 3: Compute standardized residuals with proper scaling
77        standardized_residuals = mean_residuals[self.lags_vol:] / np.sqrt(
78            fitted_sigma
79        )
80
81        # Enforce zero mean and unit variance
82        self.z_mean_ = np.mean(standardized_residuals)
83        self.z_std_ = np.std(standardized_residuals)
84        standardized_residuals = (
85            standardized_residuals - self.z_mean_
86        ) / self.z_std_
87
88        # Step 4: Fit residuals model
89        self.model_residuals.fit(standardized_residuals.reshape(-1, 1))
90
91        # Store for prediction
92        self.last_residuals_squared_ = resid_squared[-self.lags_vol:]
93
94        # Store diagnostics
95        self.fitted_volatility_mean_ = np.mean(np.sqrt(fitted_sigma))
96        self.fitted_volatility_std_ = np.std(np.sqrt(fitted_sigma))
97
98        return self

Fit the MLARCH model

Parameters

y : array-like Target time series (should be stationary, e.g., returns)

Returns

self

def predict(self, h=5, level=95, return_sims=False):
100    def predict(self, h=5, level=95, return_sims=False):
101        """Predict future values
102
103        Parameters
104        ----------
105        h : int
106            Forecast horizon
107        level : int
108            Confidence level for prediction intervals
109        return_sims : bool
110            If True, return full simulation paths
111
112        Returns
113        -------
114        DescribeResult
115            Named tuple with mean, sims, lower, upper
116        """
117        DescribeResult = namedtuple(
118            "DescribeResult", ("mean", "sims", "lower", "upper")
119        )
120
121        # Get mean forecast
122        mean_forecast = self.model_mean.predict(h=h).values.ravel()
123
124        # Recursive ARCH volatility forecasting
125        sigma_forecast = np.zeros(h)
126        current_lags = self.last_residuals_squared_.copy()
127
128        for i in range(h):
129            X_t = current_lags.reshape(1, -1)
130            log_sigma_t = self.model_sigma.predict(X_t)[0]
131            sigma_forecast[i] = np.exp(log_sigma_t)
132            # Update lags with predicted variance
133            current_lags = np.append(current_lags[1:], sigma_forecast[i])
134
135        # Predict standardized residuals and rescale
136        z_forecast_normalized = self.model_residuals.predict(h=h).values.ravel()
137        z_forecast = z_forecast_normalized * self.z_std_ + self.z_mean_
138
139        # Combine: μ + z × σ
140        point_forecast = mean_forecast + z_forecast * np.sqrt(sigma_forecast)
141
142        # Generate prediction intervals
143        sims = None
144        if return_sims:
145            preds_z_for_sims = self.model_residuals.predict(h=h)
146            if hasattr(preds_z_for_sims, "sims") and isinstance(
147                preds_z_for_sims.sims, pd.DataFrame
148            ):
149                sims_z_normalized = preds_z_for_sims.sims
150                n_sims = sims_z_normalized.shape[1]
151
152                sims = np.zeros((h, n_sims))
153                for sim_idx in range(n_sims):
154                    # Rescale simulations
155                    z_sim = (
156                        sims_z_normalized.iloc[:, sim_idx].values * self.z_std_
157                        + self.z_mean_
158                    )
159                    sims[:, sim_idx] = mean_forecast + z_sim * np.sqrt(
160                        sigma_forecast
161                    )
162
163                alpha = 1 - level / 100
164                lower_bound = np.quantile(sims, alpha / 2, axis=1)
165                upper_bound = np.quantile(sims, 1 - alpha / 2, axis=1)
166            else:
167                # Fallback to Gaussian
168                z_score = norm.ppf(1 - (1 - level / 100) / 2)
169                margin = z_score * np.sqrt(sigma_forecast) * self.z_std_
170                lower_bound = point_forecast - margin
171                upper_bound = point_forecast + margin
172        else:
173            # Gaussian intervals with proper scaling
174            z_score = norm.ppf(1 - (1 - level / 100) / 2)
175            margin = z_score * np.sqrt(sigma_forecast) * self.z_std_
176            lower_bound = point_forecast - margin
177            upper_bound = point_forecast + margin
178
179        return DescribeResult(point_forecast, sims, lower_bound, upper_bound)

Predict future values

Parameters

h : int Forecast horizon level : int Confidence level for prediction intervals return_sims : bool If True, return full simulation paths

Returns

DescribeResult Named tuple with mean, sims, lower, upper

class MedianVotingRegressor(sklearn.ensemble._voting.VotingRegressor):
 6class MedianVotingRegressor(VotingRegressor):
 7    def predict(self, X):
 8        """
 9        Predict using the median of the base regressors' predictions.
10
11        Parameters:
12        X (array-like): Feature matrix for predictions.
13
14        Returns:
15        y_pred (array): Median of predictions from the base regressors.
16        """
17        predictions = np.asarray(
18            [regressor.predict(X) for regressor in self.estimators_]
19        )
20        return np.median(predictions, axis=0)

Prediction voting regressor for unfitted estimators.

A voting regressor is an ensemble meta-estimator that fits several base regressors, each on the whole dataset. Then it averages the individual predictions to form a final prediction.

For a detailed example, refer to :ref:sphx_glr_auto_examples_ensemble_plot_voting_regressor.py.

Read more in the :ref:User Guide <voting_regressor>.

New in version 0.21.

Parameters

estimators : list of (str, estimator) tuples Invoking the fit method on the VotingRegressor will fit clones of those original estimators that will be stored in the class attribute self.estimators_. An estimator can be set to 'drop' using set_params().

*Changed in version 0.21:*
``'drop'`` is accepted. Using None was deprecated in 0.22 and
support was removed in 0.24.

weights : array-like of shape (n_regressors,), default=None Sequence of weights (float or int) to weight the occurrences of predicted values before averaging. Uses uniform weights if None.

n_jobs : int, default=None The number of jobs to run in parallel for fit. None means 1 unless in a joblib.parallel_backend context. -1 means using all processors. See :term:Glossary <n_jobs> for more details.

verbose : bool, default=False If True, the time elapsed while fitting will be printed as it is completed.

*New in version 0.23.*

Attributes

estimators_ : list of regressors The collection of fitted sub-estimators as defined in estimators that are not 'drop'.

named_estimators_ : ~sklearn.utils.Bunch Attribute to access any fitted sub-estimators by name.

*New in version 0.20.*

n_features_in_ : int Number of features seen during :term:fit. Only defined if the underlying regressor exposes such an attribute when fit.

*New in version 0.24.*

feature_names_in_ : ndarray of shape (n_features_in_,) Names of features seen during :term:fit. Only defined if the underlying estimators expose such an attribute when fit.

*New in version 1.0.*

See Also

VotingClassifier : Soft Voting/Majority Rule classifier.

Examples

>>> import numpy as np
>>> from sklearn.linear_model import LinearRegression
>>> from sklearn.ensemble import RandomForestRegressor
>>> from sklearn.ensemble import VotingRegressor
>>> from sklearn.neighbors import KNeighborsRegressor
>>> r1 = LinearRegression()
>>> r2 = RandomForestRegressor(n_estimators=10, random_state=1)
>>> r3 = KNeighborsRegressor()
>>> X = np.array([[1, 1], [2, 4], [3, 9], [4, 16], [5, 25], [6, 36]])
>>> y = np.array([2, 6, 12, 20, 30, 42])
>>> er = VotingRegressor([('lr', r1), ('rf', r2), ('r3', r3)])
>>> print(er.fit(X, y).predict(X))
[ 6.8  8.4 12.5 17.8 26  34]

In the following example, we drop the 'lr' estimator with ~VotingRegressor.set_params() and fit the remaining two estimators:

>>> er = er.set_params(lr='drop')
>>> er = er.fit(X, y)
>>> len(er.estimators_)
2
def predict(self, X):
 7    def predict(self, X):
 8        """
 9        Predict using the median of the base regressors' predictions.
10
11        Parameters:
12        X (array-like): Feature matrix for predictions.
13
14        Returns:
15        y_pred (array): Median of predictions from the base regressors.
16        """
17        predictions = np.asarray(
18            [regressor.predict(X) for regressor in self.estimators_]
19        )
20        return np.median(predictions, axis=0)

Predict using the median of the base regressors' predictions.

Parameters: X (array-like): Feature matrix for predictions.

Returns: y_pred (array): Median of predictions from the base regressors.

class MTS(nnetsauce.Base):
  31class MTS(Base):
  32    """Univariate and multivariate time series (MTS) forecasting with Quasi-Randomized networks
  33
  34    Parameters:
  35
  36        obj: object.
  37            any object containing a method fit (obj.fit()) and a method predict
  38            (obj.predict()).
  39
  40        n_hidden_features: int.
  41            number of nodes in the hidden layer.
  42
  43        activation_name: str.
  44            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'.
  45
  46        a: float.
  47            hyperparameter for 'prelu' or 'elu' activation function.
  48
  49        nodes_sim: str.
  50            type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
  51            'uniform'.
  52
  53        bias: boolean.
  54            indicates if the hidden layer contains a bias term (True) or not
  55            (False).
  56
  57        dropout: float.
  58            regularization parameter; (random) percentage of nodes dropped out
  59            of the training.
  60
  61        direct_link: boolean.
  62            indicates if the original predictors are included (True) in model's fitting or not (False).
  63
  64        n_clusters: int.
  65            number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering).
  66
  67        cluster_encode: bool.
  68            defines how the variable containing clusters is treated (default is one-hot)
  69            if `False`, then labels are used, without one-hot encoding.
  70
  71        type_clust: str.
  72            type of clustering method: currently k-means ('kmeans') or Gaussian
  73            Mixture Model ('gmm').
  74
  75        type_scaling: a tuple of 3 strings.
  76            scaling methods for inputs, hidden layer, and clustering respectively
  77            (and when relevant).
  78            Currently available: standardization ('std') or MinMax scaling ('minmax').
  79
  80        lags: int.
  81            number of lags used for each time series.
  82            If string, lags must be one of 'AIC', 'AICc', or 'BIC'.
  83
  84        type_pi: str.
  85            type of prediction interval; currently:
  86            - "gaussian": simple, fast, but: assumes stationarity of Gaussian in-sample residuals and independence in the multivariate case
  87            - "quantile": use model-agnostic quantile regression under the hood
  88            - "kde": based on Kernel Density Estimation of in-sample residuals
  89            - "bootstrap": based on independent bootstrap of in-sample residuals
  90            - "block-bootstrap": based on basic block bootstrap of in-sample residuals
  91            - "scp-kde": Sequential split conformal prediction with Kernel Density Estimation of calibrated residuals
  92            - "scp-bootstrap": Sequential split conformal prediction with independent bootstrap of calibrated residuals
  93            - "scp-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of calibrated residuals
  94            - "scp2-kde": Sequential split conformal prediction with Kernel Density Estimation of standardized calibrated residuals
  95            - "scp2-bootstrap": Sequential split conformal prediction with independent bootstrap of standardized calibrated residuals
  96            - "scp2-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of standardized calibrated residuals
  97            - based on copulas of in-sample residuals: 'vine-tll', 'vine-bb1', 'vine-bb6', 'vine-bb7', 'vine-bb8', 'vine-clayton',
  98            'vine-frank', 'vine-gaussian', 'vine-gumbel', 'vine-indep', 'vine-joe', 'vine-student'
  99            - 'scp-vine-tll', 'scp-vine-bb1', 'scp-vine-bb6', 'scp-vine-bb7', 'scp-vine-bb8', 'scp-vine-clayton',
 100            'scp-vine-frank', 'scp-vine-gaussian', 'scp-vine-gumbel', 'scp-vine-indep', 'scp-vine-joe', 'scp-vine-student'
 101            - 'scp2-vine-tll', 'scp2-vine-bb1', 'scp2-vine-bb6', 'scp2-vine-bb7', 'scp2-vine-bb8', 'scp2-vine-clayton',
 102            'scp2-vine-frank', 'scp2-vine-gaussian', 'scp2-vine-gumbel', 'scp2-vine-indep', 'scp2-vine-joe', 'scp2-vine-student'
 103
 104        level: int.
 105            level of confidence for `type_pi == 'quantile'` (default is `95`)
 106
 107        block_size: int.
 108            size of block for 'type_pi' in ("block-bootstrap", "scp-block-bootstrap", "scp2-block-bootstrap").
 109            Default is round(3.15*(n_residuals^1/3))
 110
 111        replications: int.
 112            number of replications (if needed, for predictive simulation). Default is 'None'.
 113
 114        kernel: str.
 115            the kernel to use for residuals density estimation (used for predictive simulation). Currently, either 'gaussian' or 'tophat'.
 116
 117        agg: str.
 118            either "mean" or "median" for simulation of bootstrap aggregating
 119
 120        seed: int.
 121            reproducibility seed for nodes_sim=='uniform' or predictive simulation.
 122
 123        backend: str.
 124            "cpu" or "gpu" or "tpu".
 125
 126        verbose: int.
 127            0: not printing; 1: printing
 128
 129        show_progress: bool.
 130            True: progress bar when fitting each series; False: no progress bar when fitting each series
 131
 132    Attributes:
 133
 134        fit_objs_: dict
 135            objects adjusted to each individual time series
 136
 137        y_: {array-like}
 138            MTS responses (most recent observations first)
 139
 140        X_: {array-like}
 141            MTS lags
 142
 143        xreg_: {array-like}
 144            external regressors
 145
 146        y_means_: dict
 147            a dictionary of each series mean values
 148
 149        preds_: {array-like}
 150            successive model predictions
 151
 152        preds_std_: {array-like}
 153            standard deviation around the predictions for Bayesian base learners (`obj`)
 154
 155        gaussian_preds_std_: {array-like}
 156            standard deviation around the predictions for `type_pi='gaussian'`
 157
 158        return_std_: boolean
 159            return uncertainty or not (set in predict)
 160
 161        df_: data frame
 162            the input data frame, in case a data.frame is provided to `fit`
 163
 164        n_obs_: int
 165            number of time series observations (number of rows for multivariate)
 166
 167        level_: int
 168            level of confidence for prediction intervals (default is 95)
 169
 170        residuals_: {array-like}
 171            in-sample residuals (for `type_pi` not conformal prediction) or calibrated residuals
 172            (for `type_pi` in conformal prediction)
 173
 174        residuals_sims_: tuple of {array-like}
 175            simulations of in-sample residuals (for `type_pi` not conformal prediction) or
 176            calibrated residuals (for `type_pi` in conformal prediction)
 177
 178        kde_: A scikit-learn object, see https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KernelDensity.html
 179
 180        residuals_std_dev_: residuals standard deviation
 181
 182    Examples:
 183
 184    Example 1:
 185
 186    ```python
 187    import nnetsauce as ns
 188    import numpy as np
 189    from sklearn import linear_model
 190    np.random.seed(123)
 191
 192    M = np.random.rand(10, 3)
 193    M[:,0] = 10*M[:,0]
 194    M[:,2] = 25*M[:,2]
 195    print(M)
 196
 197    # Adjust Bayesian Ridge
 198    regr4 = linear_model.BayesianRidge()
 199    obj_MTS = ns.MTS(regr4, lags = 1, n_hidden_features=5)
 200    obj_MTS.fit(M)
 201    print(obj_MTS.predict())
 202
 203    # with credible intervals
 204    print(obj_MTS.predict(return_std=True, level=80))
 205
 206    print(obj_MTS.predict(return_std=True, level=95))
 207    ```
 208
 209    Example 2:
 210
 211    ```python
 212    import nnetsauce as ns
 213    import numpy as np
 214    from sklearn import linear_model
 215
 216    dataset = {
 217    'date' : ['2001-01-01', '2002-01-01', '2003-01-01', '2004-01-01', '2005-01-01'],
 218    'series1' : [34, 30, 35.6, 33.3, 38.1],
 219    'series2' : [4, 5.5, 5.6, 6.3, 5.1],
 220    'series3' : [100, 100.5, 100.6, 100.2, 100.1]}
 221    df = pd.DataFrame(dataset).set_index('date')
 222    print(df)
 223
 224    # Adjust Bayesian Ridge
 225    regr5 = linear_model.BayesianRidge()
 226    obj_MTS = ns.MTS(regr5, lags = 1, n_hidden_features=5)
 227    obj_MTS.fit(df)
 228    print(obj_MTS.predict())
 229
 230    # with credible intervals
 231    print(obj_MTS.predict(return_std=True, level=80))
 232
 233    print(obj_MTS.predict(return_std=True, level=95))
 234    ```
 235    """
 236
 237    # construct the object -----
 238
 239    def __init__(
 240        self,
 241        obj,
 242        n_hidden_features=5,
 243        activation_name="relu",
 244        a=0.01,
 245        nodes_sim="sobol",
 246        bias=True,
 247        dropout=0,
 248        direct_link=True,
 249        n_clusters=2,
 250        cluster_encode=True,
 251        type_clust="kmeans",
 252        type_scaling=("std", "std", "std"),
 253        lags=1,
 254        type_pi="kde",
 255        level=95,
 256        block_size=None,
 257        replications=None,
 258        kernel="gaussian",
 259        agg="mean",
 260        seed=123,
 261        backend="cpu",
 262        verbose=0,
 263        show_progress=True,
 264    ):
 265        super().__init__(
 266            n_hidden_features=n_hidden_features,
 267            activation_name=activation_name,
 268            a=a,
 269            nodes_sim=nodes_sim,
 270            bias=bias,
 271            dropout=dropout,
 272            direct_link=direct_link,
 273            n_clusters=n_clusters,
 274            cluster_encode=cluster_encode,
 275            type_clust=type_clust,
 276            type_scaling=type_scaling,
 277            seed=seed,
 278            backend=backend,
 279        )
 280
 281        # Add validation for lags parameter
 282        if isinstance(lags, str):
 283            assert lags in (
 284                "AIC",
 285                "AICc",
 286                "BIC",
 287            ), "if string, lags must be one of 'AIC', 'AICc', or 'BIC'"
 288        else:
 289            assert (
 290                int(lags) == lags
 291            ), "if numeric, lags parameter should be an integer"
 292
 293        self.obj = obj
 294        self.n_series = None
 295        self.lags = lags
 296        self.type_pi = type_pi
 297        self.level = level
 298        if self.type_pi == "quantile":
 299            self.obj = QuantileRegressor(
 300                self.obj, level=self.level, scoring="conformal"
 301            )
 302        self.block_size = block_size
 303        self.replications = replications
 304        self.kernel = kernel
 305        self.agg = agg
 306        self.verbose = verbose
 307        self.show_progress = show_progress
 308        self.series_names = ["series0"]
 309        self.input_dates = None
 310        self.quantiles = None
 311        self.fit_objs_ = {}
 312        self.y_ = None  # MTS responses (most recent observations first)
 313        self.X_ = None  # MTS lags
 314        self.xreg_ = None
 315        self.y_means_ = {}
 316        self.mean_ = None
 317        self.median_ = None
 318        self.upper_ = None
 319        self.lower_ = None
 320        self.output_dates_ = None
 321        self.preds_std_ = []
 322        self.gaussian_preds_std_ = None
 323        self.alpha_ = None
 324        self.return_std_ = None
 325        self.df_ = None
 326        self.residuals_ = []
 327        self.abs_calib_residuals_ = None
 328        self.calib_residuals_quantile_ = None
 329        self.residuals_sims_ = None
 330        self.kde_ = None
 331        self.sims_ = None
 332        self.residuals_std_dev_ = None
 333        self.n_obs_ = None
 334        self.level_ = None
 335        self.init_n_series_ = None
 336
 337    def fit(self, X, xreg=None, **kwargs):
 338        """Fit MTS model to training data X, with optional regressors xreg
 339
 340        Parameters:
 341
 342        X: {array-like}, shape = [n_samples, n_features]
 343            Training time series, where n_samples is the number
 344            of samples and n_features is the number of features;
 345            X must be in increasing order (most recent observations last)
 346
 347        xreg: {array-like}, shape = [n_samples, n_features_xreg]
 348            Additional (external) regressors to be passed to self.obj
 349            xreg must be in 'increasing' order (most recent observations last)
 350
 351        **kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity)
 352
 353        Returns:
 354
 355        self: object
 356        """
 357        try:
 358            self.init_n_series_ = X.shape[1]
 359        except IndexError as e:
 360            self.init_n_series_ = 1
 361
 362        # Automatic lag selection if requested
 363        if isinstance(self.lags, str):
 364            max_lags = min(25, X.shape[0] // 4)
 365            best_ic = float("inf")
 366            best_lags = 1
 367
 368            if self.verbose:
 369                print(
 370                    f"\nSelecting optimal number of lags using {self.lags}..."
 371                )
 372                iterator = tqdm(range(1, max_lags + 1))
 373            else:
 374                iterator = range(1, max_lags + 1)
 375
 376            for lag in iterator:
 377                # Convert DataFrame to numpy array before reversing
 378                if isinstance(X, pd.DataFrame):
 379                    X_values = X.values[::-1]
 380                else:
 381                    X_values = X[::-1]
 382
 383                # Try current lag value
 384                if self.init_n_series_ > 1:
 385                    mts_input = ts.create_train_inputs(X_values, lag)
 386                else:
 387                    mts_input = ts.create_train_inputs(
 388                        X_values.reshape(-1, 1), lag
 389                    )
 390
 391                # Cook training set and fit model
 392                dummy_y, scaled_Z = self.cook_training_set(
 393                    y=np.ones(mts_input[0].shape[0]), X=mts_input[1]
 394                )
 395                residuals_ = []
 396
 397                for i in range(self.init_n_series_):
 398                    y_mean = np.mean(mts_input[0][:, i])
 399                    centered_y_i = mts_input[0][:, i] - y_mean
 400                    self.obj.fit(X=scaled_Z, y=centered_y_i)
 401                    residuals_.append(
 402                        (centered_y_i - self.obj.predict(scaled_Z)).tolist()
 403                    )
 404
 405                self.residuals_ = np.asarray(residuals_).T
 406                ic = self._compute_information_criterion(
 407                    curr_lags=lag, criterion=self.lags
 408                )
 409
 410                if self.verbose:
 411                    print(f"Trying lags={lag}, {self.lags}={ic:.2f}")
 412
 413                if ic < best_ic:
 414                    best_ic = ic
 415                    best_lags = lag
 416
 417            if self.verbose:
 418                print(
 419                    f"\nSelected {best_lags} lags with {self.lags}={best_ic:.2f}"
 420                )
 421
 422            self.lags = best_lags
 423
 424        self.input_dates = None
 425        self.df_ = None
 426
 427        if isinstance(X, pd.DataFrame) is False:
 428            # input data set is a numpy array
 429            if xreg is None:
 430                X = pd.DataFrame(X)
 431                self.series_names = [
 432                    "series" + str(i) for i in range(X.shape[1])
 433                ]
 434            else:
 435                # xreg is not None
 436                X = mo.cbind(X, xreg)
 437                self.xreg_ = xreg
 438
 439        else:  # input data set is a DataFrame with column names
 440            X_index = None
 441            if X.index is not None:
 442                X_index = X.index
 443            if xreg is None:
 444                X = copy.deepcopy(mo.convert_df_to_numeric(X))
 445            else:
 446                X = copy.deepcopy(mo.cbind(mo.convert_df_to_numeric(X), xreg))
 447                self.xreg_ = xreg
 448            if X_index is not None:
 449                X.index = X_index
 450            self.series_names = X.columns.tolist()
 451
 452        if isinstance(X, pd.DataFrame):
 453            if self.df_ is None:
 454                self.df_ = X
 455                X = X.values
 456            else:
 457                input_dates_prev = pd.DatetimeIndex(self.df_.index.values)
 458                frequency = pd.infer_freq(input_dates_prev)
 459                self.df_ = pd.concat([self.df_, X], axis=0)
 460                self.input_dates = pd.date_range(
 461                    start=input_dates_prev[0],
 462                    periods=len(input_dates_prev) + X.shape[0],
 463                    freq=frequency,
 464                ).values.tolist()
 465                self.df_.index = self.input_dates
 466                X = self.df_.values
 467            self.df_.columns = self.series_names
 468        else:
 469            if self.df_ is None:
 470                self.df_ = pd.DataFrame(X, columns=self.series_names)
 471            else:
 472                self.df_ = pd.concat(
 473                    [self.df_, pd.DataFrame(X, columns=self.series_names)],
 474                    axis=0,
 475                )
 476
 477        self.input_dates = ts.compute_input_dates(self.df_)
 478
 479        try:
 480            # multivariate time series
 481            n, p = X.shape
 482        except:
 483            # univariate time series
 484            n = X.shape[0]
 485            p = 1
 486        self.n_obs_ = n
 487
 488        rep_1_n = np.repeat(1, n)
 489
 490        self.y_ = None
 491        self.X_ = None
 492        self.n_series = p
 493        self.fit_objs_.clear()
 494        self.y_means_.clear()
 495        residuals_ = []
 496        self.residuals_ = None
 497        self.residuals_sims_ = None
 498        self.kde_ = None
 499        self.sims_ = None
 500        self.scaled_Z_ = None
 501        self.centered_y_is_ = []
 502
 503        if self.init_n_series_ > 1:
 504            # multivariate time series
 505            mts_input = ts.create_train_inputs(X[::-1], self.lags)
 506        else:
 507            # univariate time series
 508            mts_input = ts.create_train_inputs(
 509                X.reshape(-1, 1)[::-1], self.lags
 510            )
 511
 512        self.y_ = mts_input[0]
 513
 514        self.X_ = mts_input[1]
 515
 516        dummy_y, scaled_Z = self.cook_training_set(y=rep_1_n, X=self.X_)
 517
 518        self.scaled_Z_ = scaled_Z
 519
 520        # loop on all the time series and adjust self.obj.fit
 521        if self.verbose > 0:
 522            print(
 523                f"\n Adjusting {type(self.obj).__name__} to multivariate time series... \n"
 524            )
 525
 526        if self.show_progress is True:
 527            iterator = tqdm(range(self.init_n_series_))
 528        else:
 529            iterator = range(self.init_n_series_)
 530
 531        if self.type_pi in (
 532            "gaussian",
 533            "kde",
 534            "bootstrap",
 535            "block-bootstrap",
 536        ) or self.type_pi.startswith("vine"):
 537            for i in iterator:
 538                y_mean = np.mean(self.y_[:, i])
 539                self.y_means_[i] = y_mean
 540                centered_y_i = self.y_[:, i] - y_mean
 541                self.centered_y_is_.append(centered_y_i)
 542                self.obj.fit(X=scaled_Z, y=centered_y_i)
 543                self.fit_objs_[i] = deepcopy(self.obj)
 544                residuals_.append(
 545                    (
 546                        centered_y_i - self.fit_objs_[i].predict(scaled_Z)
 547                    ).tolist()
 548                )
 549
 550        if self.type_pi == "quantile":
 551            for i in iterator:
 552                y_mean = np.mean(self.y_[:, i])
 553                self.y_means_[i] = y_mean
 554                centered_y_i = self.y_[:, i] - y_mean
 555                self.centered_y_is_.append(centered_y_i)
 556                self.obj.fit(X=scaled_Z, y=centered_y_i)
 557                self.fit_objs_[i] = deepcopy(self.obj)
 558
 559        if self.type_pi.startswith("scp"):
 560            # split conformal prediction
 561            for i in iterator:
 562                n_y = self.y_.shape[0]
 563                n_y_half = n_y // 2
 564                first_half_idx = range(0, n_y_half)
 565                second_half_idx = range(n_y_half, n_y)
 566                y_mean_temp = np.mean(self.y_[first_half_idx, i])
 567                centered_y_i_temp = self.y_[first_half_idx, i] - y_mean_temp
 568                self.obj.fit(X=scaled_Z[first_half_idx, :], y=centered_y_i_temp)
 569                # calibrated residuals actually
 570                residuals_.append(
 571                    (
 572                        self.y_[second_half_idx, i]
 573                        - (
 574                            y_mean_temp
 575                            + self.obj.predict(scaled_Z[second_half_idx, :])
 576                        )
 577                    ).tolist()
 578                )
 579                # fit on the second half
 580                y_mean = np.mean(self.y_[second_half_idx, i])
 581                self.y_means_[i] = y_mean
 582                centered_y_i = self.y_[second_half_idx, i] - y_mean
 583                self.obj.fit(X=scaled_Z[second_half_idx, :], y=centered_y_i)
 584                self.fit_objs_[i] = deepcopy(self.obj)
 585
 586        self.residuals_ = np.asarray(residuals_).T
 587
 588        if self.type_pi == "gaussian":
 589            self.gaussian_preds_std_ = np.std(self.residuals_, axis=0)
 590
 591        if self.type_pi.startswith("scp2"):
 592            # Calculate mean and standard deviation for each column
 593            data_mean = np.mean(self.residuals_, axis=0)
 594            self.residuals_std_dev_ = np.std(self.residuals_, axis=0)
 595            # Center and scale the array using broadcasting
 596            self.residuals_ = (
 597                self.residuals_ - data_mean[np.newaxis, :]
 598            ) / self.residuals_std_dev_[np.newaxis, :]
 599
 600        if self.replications != None and "kde" in self.type_pi:
 601            if self.verbose > 0:
 602                print(f"\n Simulate residuals using {self.kernel} kernel... \n")
 603            assert self.kernel in (
 604                "gaussian",
 605                "tophat",
 606            ), "currently, 'kernel' must be either 'gaussian' or 'tophat'"
 607            kernel_bandwidths = {"bandwidth": np.logspace(-6, 6, 150)}
 608            grid = GridSearchCV(
 609                KernelDensity(kernel=self.kernel, **kwargs),
 610                param_grid=kernel_bandwidths,
 611            )
 612            grid.fit(self.residuals_)
 613
 614            if self.verbose > 0:
 615                print(
 616                    f"\n Best parameters for {self.kernel} kernel: {grid.best_params_} \n"
 617                )
 618
 619            self.kde_ = grid.best_estimator_
 620
 621        return self
 622
 623    def partial_fit(self, X, xreg=None, **kwargs):
 624        """partial_fit MTS model to training data X, with optional regressors xreg
 625
 626        Parameters:
 627
 628        X: {array-like}, shape = [n_samples, n_features]
 629            Training time series, where n_samples is the number
 630            of samples and n_features is the number of features;
 631            X must be in increasing order (most recent observations last)
 632
 633        xreg: {array-like}, shape = [n_samples, n_features_xreg]
 634            Additional (external) regressors to be passed to self.obj
 635            xreg must be in 'increasing' order (most recent observations last)
 636
 637        **kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity)
 638
 639        Returns:
 640
 641        self: object
 642        """
 643        try:
 644            self.init_n_series_ = X.shape[1]
 645        except IndexError as e:
 646            self.init_n_series_ = 1
 647
 648        # Automatic lag selection if requested
 649        if isinstance(self.lags, str):
 650            max_lags = min(25, X.shape[0] // 4)
 651            best_ic = float("inf")
 652            best_lags = 1
 653
 654            if self.verbose:
 655                print(
 656                    f"\nSelecting optimal number of lags using {self.lags}..."
 657                )
 658                iterator = tqdm(range(1, max_lags + 1))
 659            else:
 660                iterator = range(1, max_lags + 1)
 661
 662            for lag in iterator:
 663                # Convert DataFrame to numpy array before reversing
 664                if isinstance(X, pd.DataFrame):
 665                    X_values = X.values[::-1]
 666                else:
 667                    X_values = X[::-1]
 668
 669                # Try current lag value
 670                if self.init_n_series_ > 1:
 671                    mts_input = ts.create_train_inputs(X_values, lag)
 672                else:
 673                    mts_input = ts.create_train_inputs(
 674                        X_values.reshape(-1, 1), lag
 675                    )
 676
 677                # Cook training set and partial_fit model
 678                dummy_y, scaled_Z = self.cook_training_set(
 679                    y=np.ones(mts_input[0].shape[0]), X=mts_input[1]
 680                )
 681                residuals_ = []
 682
 683                for i in range(self.init_n_series_):
 684                    y_mean = np.mean(mts_input[0][:, i])
 685                    centered_y_i = mts_input[0][:, i] - y_mean
 686                    self.obj.partial_fit(X=scaled_Z, y=centered_y_i)
 687                    residuals_.append(
 688                        (centered_y_i - self.obj.predict(scaled_Z)).tolist()
 689                    )
 690
 691                self.residuals_ = np.asarray(residuals_).T
 692                ic = self._compute_information_criterion(
 693                    curr_lags=lag, criterion=self.lags
 694                )
 695
 696                if self.verbose:
 697                    print(f"Trying lags={lag}, {self.lags}={ic:.2f}")
 698
 699                if ic < best_ic:
 700                    best_ic = ic
 701                    best_lags = lag
 702
 703            if self.verbose:
 704                print(
 705                    f"\nSelected {best_lags} lags with {self.lags}={best_ic:.2f}"
 706                )
 707
 708            self.lags = best_lags
 709
 710        self.input_dates = None
 711        self.df_ = None
 712
 713        if isinstance(X, pd.DataFrame) is False:
 714            # input data set is a numpy array
 715            if xreg is None:
 716                X = pd.DataFrame(X)
 717                if len(X.shape) > 1:
 718                    self.series_names = [
 719                        "series" + str(i) for i in range(X.shape[1])
 720                    ]
 721                else:
 722                    self.series_names = ["series0"]
 723            else:
 724                # xreg is not None
 725                X = mo.cbind(X, xreg)
 726                self.xreg_ = xreg
 727
 728        else:  # input data set is a DataFrame with column names
 729            X_index = None
 730            if X.index is not None:
 731                X_index = X.index
 732            if xreg is None:
 733                X = copy.deepcopy(mo.convert_df_to_numeric(X))
 734            else:
 735                X = copy.deepcopy(mo.cbind(mo.convert_df_to_numeric(X), xreg))
 736                self.xreg_ = xreg
 737            if X_index is not None:
 738                X.index = X_index
 739            self.series_names = X.columns.tolist()
 740
 741        if isinstance(X, pd.DataFrame):
 742            if self.df_ is None:
 743                self.df_ = X
 744                X = X.values
 745            else:
 746                input_dates_prev = pd.DatetimeIndex(self.df_.index.values)
 747                frequency = pd.infer_freq(input_dates_prev)
 748                self.df_ = pd.concat([self.df_, X], axis=0)
 749                self.input_dates = pd.date_range(
 750                    start=input_dates_prev[0],
 751                    periods=len(input_dates_prev) + X.shape[0],
 752                    freq=frequency,
 753                ).values.tolist()
 754                self.df_.index = self.input_dates
 755                X = self.df_.values
 756            self.df_.columns = self.series_names
 757        else:
 758            if self.df_ is None:
 759                self.df_ = pd.DataFrame(X, columns=self.series_names)
 760            else:
 761                self.df_ = pd.concat(
 762                    [self.df_, pd.DataFrame(X, columns=self.series_names)],
 763                    axis=0,
 764                )
 765
 766        self.input_dates = ts.compute_input_dates(self.df_)
 767
 768        try:
 769            # multivariate time series
 770            n, p = X.shape
 771        except:
 772            # univariate time series
 773            n = X.shape[0]
 774            p = 1
 775        self.n_obs_ = n
 776
 777        rep_1_n = np.repeat(1, n)
 778
 779        self.y_ = None
 780        self.X_ = None
 781        self.n_series = p
 782        self.fit_objs_.clear()
 783        self.y_means_.clear()
 784        residuals_ = []
 785        self.residuals_ = None
 786        self.residuals_sims_ = None
 787        self.kde_ = None
 788        self.sims_ = None
 789        self.scaled_Z_ = None
 790        self.centered_y_is_ = []
 791
 792        if self.init_n_series_ > 1:
 793            # multivariate time series
 794            mts_input = ts.create_train_inputs(X[::-1], self.lags)
 795        else:
 796            # univariate time series
 797            mts_input = ts.create_train_inputs(
 798                X.reshape(-1, 1)[::-1], self.lags
 799            )
 800
 801        self.y_ = mts_input[0]
 802
 803        self.X_ = mts_input[1]
 804
 805        dummy_y, scaled_Z = self.cook_training_set(y=rep_1_n, X=self.X_)
 806
 807        self.scaled_Z_ = scaled_Z
 808
 809        # loop on all the time series and adjust self.obj.partial_fit
 810        if self.verbose > 0:
 811            print(
 812                f"\n Adjusting {type(self.obj).__name__} to multivariate time series... \n"
 813            )
 814
 815        if self.show_progress is True:
 816            iterator = tqdm(range(self.init_n_series_))
 817        else:
 818            iterator = range(self.init_n_series_)
 819
 820        if self.type_pi in (
 821            "gaussian",
 822            "kde",
 823            "bootstrap",
 824            "block-bootstrap",
 825        ) or self.type_pi.startswith("vine"):
 826            for i in iterator:
 827                y_mean = np.mean(self.y_[:, i])
 828                self.y_means_[i] = y_mean
 829                centered_y_i = self.y_[:, i] - y_mean
 830                self.centered_y_is_.append(centered_y_i)
 831                self.obj.partial_fit(X=scaled_Z, y=centered_y_i)
 832                self.fit_objs_[i] = deepcopy(self.obj)
 833                residuals_.append(
 834                    (
 835                        centered_y_i - self.fit_objs_[i].predict(scaled_Z)
 836                    ).tolist()
 837                )
 838
 839        if self.type_pi == "quantile":
 840            for i in iterator:
 841                y_mean = np.mean(self.y_[:, i])
 842                self.y_means_[i] = y_mean
 843                centered_y_i = self.y_[:, i] - y_mean
 844                self.centered_y_is_.append(centered_y_i)
 845                self.obj.partial_fit(X=scaled_Z, y=centered_y_i)
 846                self.fit_objs_[i] = deepcopy(self.obj)
 847
 848        if self.type_pi.startswith("scp"):
 849            # split conformal prediction
 850            for i in iterator:
 851                n_y = self.y_.shape[0]
 852                n_y_half = n_y // 2
 853                first_half_idx = range(0, n_y_half)
 854                second_half_idx = range(n_y_half, n_y)
 855                y_mean_temp = np.mean(self.y_[first_half_idx, i])
 856                centered_y_i_temp = self.y_[first_half_idx, i] - y_mean_temp
 857                self.obj.partial_fit(
 858                    X=scaled_Z[first_half_idx, :], y=centered_y_i_temp
 859                )
 860                # calibrated residuals actually
 861                residuals_.append(
 862                    (
 863                        self.y_[second_half_idx, i]
 864                        - (
 865                            y_mean_temp
 866                            + self.obj.predict(scaled_Z[second_half_idx, :])
 867                        )
 868                    ).tolist()
 869                )
 870                # partial_fit on the second half
 871                y_mean = np.mean(self.y_[second_half_idx, i])
 872                self.y_means_[i] = y_mean
 873                centered_y_i = self.y_[second_half_idx, i] - y_mean
 874                self.obj.partial_fit(
 875                    X=scaled_Z[second_half_idx, :], y=centered_y_i
 876                )
 877                self.fit_objs_[i] = deepcopy(self.obj)
 878
 879        self.residuals_ = np.asarray(residuals_).T
 880
 881        if self.type_pi == "gaussian":
 882            self.gaussian_preds_std_ = np.std(self.residuals_, axis=0)
 883
 884        if self.type_pi.startswith("scp2"):
 885            # Calculate mean and standard deviation for each column
 886            data_mean = np.mean(self.residuals_, axis=0)
 887            self.residuals_std_dev_ = np.std(self.residuals_, axis=0)
 888            # Center and scale the array using broadcasting
 889            self.residuals_ = (
 890                self.residuals_ - data_mean[np.newaxis, :]
 891            ) / self.residuals_std_dev_[np.newaxis, :]
 892
 893        if self.replications != None and "kde" in self.type_pi:
 894            if self.verbose > 0:
 895                print(f"\n Simulate residuals using {self.kernel} kernel... \n")
 896            assert self.kernel in (
 897                "gaussian",
 898                "tophat",
 899            ), "currently, 'kernel' must be either 'gaussian' or 'tophat'"
 900            kernel_bandwidths = {"bandwidth": np.logspace(-6, 6, 150)}
 901            grid = GridSearchCV(
 902                KernelDensity(kernel=self.kernel, **kwargs),
 903                param_grid=kernel_bandwidths,
 904            )
 905            grid.fit(self.residuals_)
 906
 907            if self.verbose > 0:
 908                print(
 909                    f"\n Best parameters for {self.kernel} kernel: {grid.best_params_} \n"
 910                )
 911
 912            self.kde_ = grid.best_estimator_
 913
 914        return self
 915
 916    def _predict_quantiles(self, h, quantiles, **kwargs):
 917        """Predict arbitrary quantiles from simulated paths."""
 918        # Ensure output dates are set
 919        self.output_dates_, _ = ts.compute_output_dates(self.df_, h)
 920
 921        # Trigger full prediction to generate self.sims_
 922        if not hasattr(self, "sims_") or self.sims_ is None:
 923            _ = self.predict(h=h, level=95, **kwargs)  # Any level triggers sim
 924
 925        result_dict = {}
 926
 927        # Stack simulations: (R, h, n_series)
 928        sims_array = np.stack([sim.values for sim in self.sims_], axis=0)
 929
 930        # Compute quantiles over replication axis
 931        q_values = np.quantile(
 932            sims_array, quantiles, axis=0
 933        )  # (n_q, h, n_series)
 934
 935        for i, q in enumerate(quantiles):
 936            # Clean label: 0.05 → "05", 0.1 → "10", 0.95 → "95"
 937            q_label = (
 938                f"{int(q * 100):02d}"
 939                if (q * 100).is_integer()
 940                else f"{q:.3f}".replace(".", "_")
 941            )
 942            for series_id in range(self.init_n_series_):
 943                series_name = self.series_names[series_id]
 944                col_name = f"quantile_{q_label}_{series_name}"
 945                result_dict[col_name] = q_values[i, :, series_id]
 946
 947        df_return_quantiles = pd.DataFrame(
 948            result_dict, index=self.output_dates_
 949        )
 950
 951        return df_return_quantiles
 952
 953    def predict(self, h=5, level=95, quantiles=None, **kwargs):
 954        """Forecast all the time series, h steps ahead"""
 955
 956        if quantiles is not None:
 957            # Validate
 958            quantiles = np.asarray(quantiles)
 959            if not ((quantiles > 0) & (quantiles < 1)).all():
 960                raise ValueError("quantiles must be between 0 and 1.")
 961            # Delegate to dedicated method
 962            return self._predict_quantiles(h=h, quantiles=quantiles, **kwargs)
 963
 964        if isinstance(level, list) or isinstance(level, np.ndarray):
 965            # Store results
 966            result_dict = {}
 967            # Loop through alphas and calculate lower/upper for each alpha level
 968            # E.g [0.5, 2.5, 5, 16.5, 25, 50]
 969            for lev in level:
 970                # Get the forecast for this alpha
 971                res = self.predict(h=h, level=lev, **kwargs)
 972                # Adjust index and collect lower/upper bounds
 973                res.lower.index = pd.to_datetime(res.lower.index)
 974                res.upper.index = pd.to_datetime(res.upper.index)
 975                # Loop over each time series (multivariate) and flatten results
 976                if isinstance(res.lower, pd.DataFrame):
 977                    for (
 978                        series
 979                    ) in (
 980                        res.lower.columns
 981                    ):  # Assumes 'lower' and 'upper' have multiple series
 982                        result_dict[f"lower_{lev}_{series}"] = (
 983                            res.lower[series].to_numpy().flatten()
 984                        )
 985                        result_dict[f"upper_{lev}_{series}"] = (
 986                            res.upper[series].to_numpy().flatten()
 987                        )
 988                else:
 989                    for series_id in range(
 990                        self.n_series
 991                    ):  # Assumes 'lower' and 'upper' have multiple series
 992                        result_dict[f"lower_{lev}_{series_id}"] = (
 993                            res.lower[series_id, :].to_numpy().flatten()
 994                        )
 995                        result_dict[f"upper_{lev}_{series_id}"] = (
 996                            res.upper[series_id, :].to_numpy().flatten()
 997                        )
 998            return pd.DataFrame(result_dict, index=self.output_dates_)
 999
1000        # only one prediction interval
1001        self.output_dates_, frequency = ts.compute_output_dates(self.df_, h)
1002
1003        self.level_ = level
1004
1005        self.return_std_ = False  # do not remove (/!\)
1006
1007        self.mean_ = None  # do not remove (/!\)
1008
1009        self.mean_ = deepcopy(self.y_)  # do not remove (/!\)
1010
1011        self.lower_ = None  # do not remove (/!\)
1012
1013        self.upper_ = None  # do not remove (/!\)
1014
1015        self.sims_ = None  # do not remove (/!\)
1016
1017        y_means_ = np.asarray(
1018            [self.y_means_[i] for i in range(self.init_n_series_)]
1019        )
1020
1021        n_features = self.init_n_series_ * self.lags
1022
1023        self.alpha_ = 100 - level
1024
1025        pi_multiplier = norm.ppf(1 - self.alpha_ / 200)
1026
1027        if "return_std" in kwargs:  # bayesian forecasting
1028            self.return_std_ = True
1029            self.preds_std_ = []
1030            DescribeResult = namedtuple(
1031                "DescribeResult", ("mean", "lower", "upper")
1032            )  # to be updated
1033
1034        if "return_pi" in kwargs:  # split conformal, without simulation
1035            mean_pi_ = []
1036            lower_pi_ = []
1037            upper_pi_ = []
1038            median_pi_ = []
1039            DescribeResult = namedtuple(
1040                "DescribeResult", ("mean", "lower", "upper")
1041            )  # to be updated
1042
1043        if self.kde_ != None and "kde" in self.type_pi:  # kde
1044            target_cols = self.df_.columns[
1045                : self.init_n_series_
1046            ]  # Get target column names
1047            if self.verbose == 1:
1048                self.residuals_sims_ = tuple(
1049                    self.kde_.sample(
1050                        n_samples=h, random_state=self.seed + 100 * i
1051                    )  # Keep full sample
1052                    for i in tqdm(range(self.replications))
1053                )
1054            elif self.verbose == 0:
1055                self.residuals_sims_ = tuple(
1056                    self.kde_.sample(
1057                        n_samples=h, random_state=self.seed + 100 * i
1058                    )  # Keep full sample
1059                    for i in range(self.replications)
1060                )
1061
1062            # Convert to DataFrames after sampling
1063            self.residuals_sims_ = tuple(
1064                pd.DataFrame(
1065                    sim,  # Keep all columns
1066                    columns=target_cols,  # Use original target column names
1067                    index=self.output_dates_,
1068                )
1069                for sim in self.residuals_sims_
1070            )
1071
1072        if self.type_pi in ("bootstrap", "scp-bootstrap", "scp2-bootstrap"):
1073            assert self.replications is not None and isinstance(
1074                self.replications, int
1075            ), "'replications' must be provided and be an integer"
1076            if self.verbose == 1:
1077                self.residuals_sims_ = tuple(
1078                    ts.bootstrap(
1079                        self.residuals_,
1080                        h=h,
1081                        block_size=None,
1082                        seed=self.seed + 100 * i,
1083                    )
1084                    for i in tqdm(range(self.replications))
1085                )
1086            elif self.verbose == 0:
1087                self.residuals_sims_ = tuple(
1088                    ts.bootstrap(
1089                        self.residuals_,
1090                        h=h,
1091                        block_size=None,
1092                        seed=self.seed + 100 * i,
1093                    )
1094                    for i in range(self.replications)
1095                )
1096
1097        if self.type_pi in (
1098            "block-bootstrap",
1099            "scp-block-bootstrap",
1100            "scp2-block-bootstrap",
1101        ):
1102            if self.block_size is None:
1103                self.block_size = int(
1104                    np.ceil(3.15 * (self.residuals_.shape[0] ** (1 / 3)))
1105                )
1106
1107            assert self.replications is not None and isinstance(
1108                self.replications, int
1109            ), "'replications' must be provided and be an integer"
1110            if self.verbose == 1:
1111                self.residuals_sims_ = tuple(
1112                    ts.bootstrap(
1113                        self.residuals_,
1114                        h=h,
1115                        block_size=self.block_size,
1116                        seed=self.seed + 100 * i,
1117                    )
1118                    for i in tqdm(range(self.replications))
1119                )
1120            elif self.verbose == 0:
1121                self.residuals_sims_ = tuple(
1122                    ts.bootstrap(
1123                        self.residuals_,
1124                        h=h,
1125                        block_size=self.block_size,
1126                        seed=self.seed + 100 * i,
1127                    )
1128                    for i in range(self.replications)
1129                )
1130
1131        if "vine" in self.type_pi:
1132            if self.verbose == 1:
1133                self.residuals_sims_ = tuple(
1134                    vinecopula_sample(
1135                        x=self.residuals_,
1136                        n_samples=h,
1137                        method=self.type_pi,
1138                        random_state=self.seed + 100 * i,
1139                    )
1140                    for i in tqdm(range(self.replications))
1141                )
1142            elif self.verbose == 0:
1143                self.residuals_sims_ = tuple(
1144                    vinecopula_sample(
1145                        x=self.residuals_,
1146                        n_samples=h,
1147                        method=self.type_pi,
1148                        random_state=self.seed + 100 * i,
1149                    )
1150                    for i in range(self.replications)
1151                )
1152
1153        mean_ = deepcopy(self.mean_)
1154
1155        for i in range(h):
1156            new_obs = ts.reformat_response(mean_, self.lags)
1157            new_X = new_obs.reshape(1, -1)
1158            cooked_new_X = self.cook_test_set(new_X, **kwargs)
1159
1160            if "return_std" in kwargs:
1161                self.preds_std_.append(
1162                    [
1163                        np.asarray(
1164                            self.fit_objs_[i].predict(
1165                                cooked_new_X, return_std=True
1166                            )[1]
1167                        ).item()
1168                        for i in range(self.n_series)
1169                    ]
1170                )
1171
1172            if "return_pi" in kwargs:
1173                for i in range(self.n_series):
1174                    preds_pi = self.fit_objs_[i].predict(cooked_new_X, **kwargs)
1175                    mean_pi_.append(preds_pi.mean[0])
1176                    lower_pi_.append(preds_pi.lower[0])
1177                    upper_pi_.append(preds_pi.upper[0])
1178
1179            if self.type_pi != "quantile":
1180                predicted_cooked_new_X = np.asarray(
1181                    [
1182                        np.asarray(
1183                            self.fit_objs_[i].predict(cooked_new_X)
1184                        ).item()
1185                        for i in range(self.init_n_series_)
1186                    ]
1187                )
1188            else:
1189                predicted_cooked_new_X = np.asarray(
1190                    [
1191                        np.asarray(
1192                            self.fit_objs_[i]
1193                            .predict(cooked_new_X, return_pi=True)
1194                            .upper
1195                        ).item()
1196                        for i in range(self.init_n_series_)
1197                    ]
1198                )
1199
1200            preds = np.asarray(y_means_ + predicted_cooked_new_X)
1201
1202            # Create full row with both predictions and external regressors
1203            if self.xreg_ is not None and "xreg" in kwargs:
1204                next_xreg = kwargs["xreg"].iloc[i: i + 1].values.flatten()
1205                full_row = np.concatenate([preds, next_xreg])
1206            else:
1207                full_row = preds
1208
1209            # Create a new row with same number of columns as mean_
1210            new_row = np.zeros((1, mean_.shape[1]))
1211            new_row[0, : full_row.shape[0]] = full_row
1212
1213            # Maintain the full dimensionality by using vstack instead of rbind
1214            mean_ = np.vstack([new_row, mean_[:-1]])
1215
1216        # Final output should only include the target columns
1217        self.mean_ = pd.DataFrame(
1218            mean_[0: min(h, self.n_obs_ - self.lags), : self.init_n_series_][
1219                ::-1
1220            ],
1221            columns=self.df_.columns[: self.init_n_series_],
1222            index=self.output_dates_,
1223        )
1224
1225        # function's return ----------------------------------------------------------------------
1226        if (
1227            (("return_std" not in kwargs) and ("return_pi" not in kwargs))
1228            and (self.type_pi not in ("gaussian", "scp"))
1229        ) or ("vine" in self.type_pi):
1230            if self.replications is None:
1231                return self.mean_.iloc[:, : self.init_n_series_]
1232
1233            # if "return_std" not in kwargs and self.replications is not None
1234            meanf = []
1235            medianf = []
1236            lower = []
1237            upper = []
1238
1239            if "scp2" in self.type_pi:
1240                if self.verbose == 1:
1241                    self.sims_ = tuple(
1242                        (
1243                            self.mean_
1244                            + self.residuals_sims_[i]
1245                            * self.residuals_std_dev_[np.newaxis, :]
1246                            for i in tqdm(range(self.replications))
1247                        )
1248                    )
1249                elif self.verbose == 0:
1250                    self.sims_ = tuple(
1251                        (
1252                            self.mean_
1253                            + self.residuals_sims_[i]
1254                            * self.residuals_std_dev_[np.newaxis, :]
1255                            for i in range(self.replications)
1256                        )
1257                    )
1258            else:
1259                if self.verbose == 1:
1260                    self.sims_ = tuple(
1261                        (
1262                            self.mean_ + self.residuals_sims_[i]
1263                            for i in tqdm(range(self.replications))
1264                        )
1265                    )
1266                elif self.verbose == 0:
1267                    self.sims_ = tuple(
1268                        (
1269                            self.mean_ + self.residuals_sims_[i]
1270                            for i in range(self.replications)
1271                        )
1272                    )
1273
1274            DescribeResult = namedtuple(
1275                "DescribeResult", ("mean", "sims", "lower", "upper")
1276            )
1277            for ix in range(self.init_n_series_):
1278                sims_ix = getsims(self.sims_, ix)
1279                if self.agg == "mean":
1280                    meanf.append(np.mean(sims_ix, axis=1))
1281                else:
1282                    medianf.append(np.median(sims_ix, axis=1))
1283                lower.append(np.quantile(sims_ix, q=self.alpha_ / 200, axis=1))
1284                upper.append(
1285                    np.quantile(sims_ix, q=1 - self.alpha_ / 200, axis=1)
1286                )
1287            self.mean_ = pd.DataFrame(
1288                np.asarray(meanf).T,
1289                columns=self.series_names[
1290                    : self.init_n_series_
1291                ],  # self.df_.columns,
1292                index=self.output_dates_,
1293            )
1294
1295            self.lower_ = pd.DataFrame(
1296                np.asarray(lower).T,
1297                columns=self.series_names[
1298                    : self.init_n_series_
1299                ],  # self.df_.columns,
1300                index=self.output_dates_,
1301            )
1302
1303            self.upper_ = pd.DataFrame(
1304                np.asarray(upper).T,
1305                columns=self.series_names[
1306                    : self.init_n_series_
1307                ],  # self.df_.columns,
1308                index=self.output_dates_,
1309            )
1310
1311            try:
1312                self.median_ = pd.DataFrame(
1313                    np.asarray(medianf).T,
1314                    columns=self.series_names[
1315                        : self.init_n_series_
1316                    ],  # self.df_.columns,
1317                    index=self.output_dates_,
1318                )
1319            except Exception as e:
1320                pass
1321
1322            return DescribeResult(
1323                self.mean_, self.sims_, self.lower_, self.upper_
1324            )
1325
1326        if (
1327            (("return_std" in kwargs) or ("return_pi" in kwargs))
1328            and (self.type_pi not in ("gaussian", "scp"))
1329        ) or "vine" in self.type_pi:
1330            DescribeResult = namedtuple(
1331                "DescribeResult", ("mean", "lower", "upper")
1332            )
1333
1334            self.mean_ = pd.DataFrame(
1335                np.asarray(self.mean_),
1336                columns=self.series_names,  # self.df_.columns,
1337                index=self.output_dates_,
1338            )
1339
1340            if "return_std" in kwargs:
1341                self.preds_std_ = np.asarray(self.preds_std_)
1342
1343                self.lower_ = pd.DataFrame(
1344                    self.mean_.values - pi_multiplier * self.preds_std_,
1345                    columns=self.series_names,  # self.df_.columns,
1346                    index=self.output_dates_,
1347                )
1348
1349                self.upper_ = pd.DataFrame(
1350                    self.mean_.values + pi_multiplier * self.preds_std_,
1351                    columns=self.series_names,  # self.df_.columns,
1352                    index=self.output_dates_,
1353                )
1354
1355            if "return_pi" in kwargs:
1356                self.lower_ = pd.DataFrame(
1357                    np.asarray(lower_pi_).reshape(h, self.n_series)
1358                    + y_means_[np.newaxis, :],
1359                    columns=self.series_names,  # self.df_.columns,
1360                    index=self.output_dates_,
1361                )
1362
1363                self.upper_ = pd.DataFrame(
1364                    np.asarray(upper_pi_).reshape(h, self.n_series)
1365                    + y_means_[np.newaxis, :],
1366                    columns=self.series_names,  # self.df_.columns,
1367                    index=self.output_dates_,
1368                )
1369
1370            res = DescribeResult(self.mean_, self.lower_, self.upper_)
1371
1372            if self.xreg_ is not None:
1373                if len(self.xreg_.shape) > 1:
1374                    res2 = mx.tuple_map(
1375                        res,
1376                        lambda x: mo.delete_last_columns(
1377                            x, num_columns=self.xreg_.shape[1]
1378                        ),
1379                    )
1380                else:
1381                    res2 = mx.tuple_map(
1382                        res, lambda x: mo.delete_last_columns(x, num_columns=1)
1383                    )
1384                return DescribeResult(res2[0], res2[1], res2[2])
1385
1386            return res
1387
1388        if self.type_pi == "gaussian":
1389            DescribeResult = namedtuple(
1390                "DescribeResult", ("mean", "lower", "upper")
1391            )
1392
1393            self.mean_ = pd.DataFrame(
1394                np.asarray(self.mean_),
1395                columns=self.series_names,  # self.df_.columns,
1396                index=self.output_dates_,
1397            )
1398
1399            # Use Bayesian std if available, otherwise use gaussian residual std
1400            if "return_std" in kwargs and len(self.preds_std_) > 0:
1401                preds_std_to_use = np.asarray(self.preds_std_)
1402            else:
1403                preds_std_to_use = self.gaussian_preds_std_
1404
1405            self.lower_ = pd.DataFrame(
1406                self.mean_.values - pi_multiplier * preds_std_to_use,
1407                columns=self.series_names,  # self.df_.columns,
1408                index=self.output_dates_,
1409            )
1410
1411            self.upper_ = pd.DataFrame(
1412                self.mean_.values + pi_multiplier * preds_std_to_use,
1413                columns=self.series_names,  # self.df_.columns,
1414                index=self.output_dates_,
1415            )
1416
1417            res = DescribeResult(self.mean_, self.lower_, self.upper_)
1418
1419            if self.xreg_ is not None:
1420                if len(self.xreg_.shape) > 1:
1421                    res2 = mx.tuple_map(
1422                        res,
1423                        lambda x: mo.delete_last_columns(
1424                            x, num_columns=self.xreg_.shape[1]
1425                        ),
1426                    )
1427                else:
1428                    res2 = mx.tuple_map(
1429                        res, lambda x: mo.delete_last_columns(x, num_columns=1)
1430                    )
1431                return DescribeResult(res2[0], res2[1], res2[2])
1432
1433            return res
1434
1435        if self.type_pi == "quantile":
1436            DescribeResult = namedtuple("DescribeResult", ("mean"))
1437
1438            self.mean_ = pd.DataFrame(
1439                np.asarray(self.mean_),
1440                columns=self.series_names,  # self.df_.columns,
1441                index=self.output_dates_,
1442            )
1443
1444            res = DescribeResult(self.mean_)
1445
1446            if self.xreg_ is not None:
1447                if len(self.xreg_.shape) > 1:
1448                    res2 = mx.tuple_map(
1449                        res,
1450                        lambda x: mo.delete_last_columns(
1451                            x, num_columns=self.xreg_.shape[1]
1452                        ),
1453                    )
1454                else:
1455                    res2 = mx.tuple_map(
1456                        res, lambda x: mo.delete_last_columns(x, num_columns=1)
1457                    )
1458                return DescribeResult(res2[0])
1459
1460            return res
1461
1462        # After prediction loop, ensure sims only contain target columns
1463        if self.sims_ is not None:
1464            if self.verbose == 1:
1465                self.sims_ = tuple(
1466                    sim[:h,]  # Only keep target columns and h rows
1467                    for sim in tqdm(self.sims_)
1468                )
1469            elif self.verbose == 0:
1470                self.sims_ = tuple(
1471                    sim[:h,]  # Only keep target columns and h rows
1472                    for sim in self.sims_
1473                )
1474
1475            # Convert numpy arrays to DataFrames with proper columns
1476            self.sims_ = tuple(
1477                pd.DataFrame(
1478                    sim,
1479                    columns=self.df_.columns[: self.init_n_series_],
1480                    index=self.output_dates_,
1481                )
1482                for sim in self.sims_
1483            )
1484
1485        if self.type_pi in (
1486            "kde",
1487            "bootstrap",
1488            "block-bootstrap",
1489            "vine-copula",
1490        ):
1491            if self.xreg_ is not None:
1492                # Use getsimsxreg when external regressors are present
1493                target_cols = self.df_.columns[: self.init_n_series_]
1494                self.sims_ = getsimsxreg(
1495                    self.sims_, self.output_dates_, target_cols
1496                )
1497            else:
1498                # Use original getsims for backward compatibility
1499                self.sims_ = getsims(self.sims_)
1500
1501    def _crps_ensemble(self, y_true, simulations, axis=0):
1502        """
1503        Compute the Continuous Ranked Probability Score (CRPS) for an ensemble of simulations.
1504
1505        The CRPS is a measure of the distance between the cumulative distribution
1506        function (CDF) of a forecast and the CDF of the observed value. This method
1507        computes the CRPS in a vectorized form for an ensemble of simulations, efficiently
1508        handling the case where there is only one simulation.
1509
1510        Parameters
1511        ----------
1512        y_true : array_like, shape (n,)
1513            A 1D array of true values (observations).
1514            Each element represents the true value for a given sample.
1515
1516        simulations : array_like, shape (n, R)
1517            A 2D array of simulated values. Each row corresponds to a different sample
1518            and each column corresponds to a different simulation of that sample.
1519
1520        axis : int, optional, default=0
1521            Axis along which to transpose the simulations if needed.
1522            If axis=0, the simulations are transposed to shape (R, n).
1523
1524        Returns
1525        -------
1526        crps : ndarray, shape (n,)
1527            A 1D array of CRPS scores, one for each sample.
1528
1529        Notes
1530        -----
1531        The CRPS score is computed as:
1532
1533        CRPS(y_true, simulations) = E[|X - y|] - 0.5 * E[|X - X'|]
1534
1535        Where:
1536        - `X` is the ensemble of simulations.
1537        - `y` is the true value.
1538        - `X'` is a second independent sample from the ensemble.
1539
1540        The calculation is vectorized to optimize performance for large datasets.
1541
1542        The edge case where `R=1` (only one simulation) is handled by returning
1543        only `term1` (i.e., no ensemble spread).
1544        """
1545        sims = np.asarray(simulations)  # Convert simulations to numpy array
1546        if axis == 0:
1547            sims = sims.T  # Transpose if the axis is 0
1548        n, R = sims.shape  # n = number of samples, R = number of simulations
1549        # Term 1: E|X - y|, average absolute difference between simulations and true value
1550        term1 = np.mean(np.abs(sims - y_true[:, np.newaxis]), axis=1)
1551        # Handle edge case: if R == 1, return term1 (no spread in ensemble)
1552        if R == 1:
1553            return term1
1554        # Term 2: 0.5 * E|X - X'|, using efficient sorted formula
1555        sims_sorted = np.sort(sims, axis=1)  # Sort simulations along each row
1556        # Correct coefficients for efficient calculation
1557        j = np.arange(R)  # 0-indexed positions in the sorted simulations
1558        coefficients = (2 * (j + 1) - R - 1) / (
1559            R * (R - 1)
1560        )  # Efficient coefficient calculation
1561        # Dot product along the second axis (over the simulations)
1562        term2 = np.dot(sims_sorted, coefficients)
1563        # Return CRPS score: term1 - 0.5 * term2
1564        return term1 - 0.5 * term2
1565
1566    def score(
1567        self,
1568        X,
1569        training_index,
1570        testing_index,
1571        scoring=None,
1572        alpha=0.5,
1573        **kwargs,
1574    ):
1575        """Train on training_index, score on testing_index."""
1576
1577        assert (
1578            bool(set(training_index).intersection(set(testing_index))) == False
1579        ), "Non-overlapping 'training_index' and 'testing_index' required"
1580
1581        # Dimensions
1582        try:
1583            # multivariate time series
1584            n, p = X.shape
1585        except:
1586            # univariate time series
1587            n = X.shape[0]
1588            p = 1
1589
1590        # Training and testing sets
1591        if p > 1:
1592            X_train = X[training_index, :]
1593            X_test = X[testing_index, :]
1594        else:
1595            X_train = X[training_index]
1596            X_test = X[testing_index]
1597
1598        # Horizon
1599        h = len(testing_index)
1600        assert (
1601            len(training_index) + h
1602        ) <= n, "Please check lengths of training and testing windows"
1603
1604        # Fit and predict
1605        self.fit(X_train, **kwargs)
1606        preds = self.predict(h=h, **kwargs)
1607
1608        if scoring is None:
1609            scoring = "neg_root_mean_squared_error"
1610
1611        if scoring == "pinball":
1612            # Predict requested quantile
1613            q_pred = self.predict(h=h, quantiles=[alpha], **kwargs)
1614            # Handle multivariate
1615            scores = []
1616            for j in range(p):
1617                series_name = getattr(self, "series_names", [f"Series_{j}"])[j]
1618                q_label = (
1619                    f"{int(alpha * 100):02d}"
1620                    if (alpha * 100).is_integer()
1621                    else f"{alpha:.3f}".replace(".", "_")
1622                )
1623                col = f"quantile_{q_label}_{series_name}"
1624                if col not in q_pred.columns:
1625                    raise ValueError(
1626                        f"Column '{col}' not found in quantile forecast output."
1627                    )
1628                y_true_j = X_test[:, j]
1629                y_pred_j = q_pred[col].values
1630                # Compute pinball loss for this series
1631                loss = mean_pinball_loss(y_true_j, y_pred_j, alpha=alpha)
1632                scores.append(loss)
1633            # Return average over series
1634            return np.mean(scores)
1635
1636        if scoring == "crps":
1637            # Ensure simulations exist
1638            preds = self.predict(h=h, **kwargs)  # triggers self.sims_
1639            # Extract simulations: list of DataFrames → (R, h, p)
1640            sims_vals = np.stack(
1641                [sim.values for sim in self.sims_], axis=0
1642            )  # (R, h, p)
1643            crps_scores = []
1644            for j in range(p):
1645                y_true_j = X_test[:, j]
1646                sims_j = sims_vals[:, :, j]  # (R, h)
1647                crps_j = self._crps_ensemble(np.asarray(y_true_j), sims_j)
1648                crps_scores.append(np.mean(crps_j))  # average over horizon
1649            return np.mean(crps_scores)  # average over series
1650
1651        # check inputs
1652        assert scoring in (
1653            "explained_variance",
1654            "neg_mean_absolute_error",
1655            "neg_mean_squared_error",
1656            "neg_root_mean_squared_error",
1657            "neg_mean_squared_log_error",
1658            "neg_median_absolute_error",
1659            "r2",
1660        ), "'scoring' should be in ('explained_variance', 'neg_mean_absolute_error', \
1661                               'neg_mean_squared_error', 'neg_root_mean_squared_error', 'neg_mean_squared_log_error', \
1662                               'neg_median_absolute_error', 'r2')"
1663
1664        scoring_options = {
1665            "explained_variance": skm2.explained_variance_score,
1666            "neg_mean_absolute_error": skm2.mean_absolute_error,
1667            "neg_mean_squared_error": lambda x, y: np.mean((x - y) ** 2),
1668            "neg_root_mean_squared_error": lambda x, y: np.sqrt(
1669                np.mean((x - y) ** 2)
1670            ),
1671            "neg_mean_squared_log_error": skm2.mean_squared_log_error,
1672            "neg_median_absolute_error": skm2.median_absolute_error,
1673            "r2": skm2.r2_score,
1674        }
1675
1676        return scoring_options[scoring](X_test, preds)
1677
1678    def plot(self, series=None, type_axis="dates", type_plot="pi"):
1679        """Plot time series forecast
1680
1681        Parameters:
1682
1683        series: {integer} or {string}
1684            series index or name
1685
1686        """
1687
1688        assert all(
1689            [
1690                self.mean_ is not None,
1691                self.lower_ is not None,
1692                self.upper_ is not None,
1693                self.output_dates_ is not None,
1694            ]
1695        ), "model forecasting must be obtained first (with predict)"
1696
1697        if series is None:
1698            # assert (
1699            #    self.init_n_series_ == 1
1700            # ), "please specify series index or name (n_series > 1)"
1701            series = 0
1702
1703        if isinstance(series, str):
1704            assert (
1705                series in self.series_names
1706            ), f"series {series} doesn't exist in the input dataset"
1707            series_idx = self.df_.columns.get_loc(series)
1708        else:
1709            assert isinstance(series, int) and (
1710                0 <= series < self.n_series
1711            ), f"check series index (< {self.n_series})"
1712            series_idx = series
1713
1714        y_all = list(self.df_.iloc[:, series_idx]) + list(
1715            self.mean_.iloc[:, series_idx]
1716        )
1717        y_test = list(self.mean_.iloc[:, series_idx])
1718        n_points_all = len(y_all)
1719        n_points_train = self.df_.shape[0]
1720
1721        if type_axis == "numeric":
1722            x_all = [i for i in range(n_points_all)]
1723            x_test = [i for i in range(n_points_train, n_points_all)]
1724
1725        if type_axis == "dates":  # use dates
1726            x_all = np.concatenate(
1727                (self.input_dates.values, self.output_dates_.values), axis=None
1728            )
1729            x_test = self.output_dates_.values
1730
1731        if type_plot == "pi":
1732            fig, ax = plt.subplots()
1733            ax.plot(x_all, y_all, "-")
1734            ax.plot(x_test, y_test, "-", color="orange")
1735            ax.fill_between(
1736                x_test,
1737                self.lower_.iloc[:, series_idx],
1738                self.upper_.iloc[:, series_idx],
1739                alpha=0.2,
1740                color="orange",
1741            )
1742            if self.replications is None:
1743                if self.n_series > 1:
1744                    plt.title(
1745                        f"prediction intervals for {series}",
1746                        loc="left",
1747                        fontsize=12,
1748                        fontweight=0,
1749                        color="black",
1750                    )
1751                else:
1752                    plt.title(
1753                        f"prediction intervals for input time series",
1754                        loc="left",
1755                        fontsize=12,
1756                        fontweight=0,
1757                        color="black",
1758                    )
1759                plt.show()
1760            else:  # self.replications is not None
1761                if self.n_series > 1:
1762                    plt.title(
1763                        f"prediction intervals for {self.replications} simulations of {series}",
1764                        loc="left",
1765                        fontsize=12,
1766                        fontweight=0,
1767                        color="black",
1768                    )
1769                else:
1770                    plt.title(
1771                        f"prediction intervals for {self.replications} simulations of input time series",
1772                        loc="left",
1773                        fontsize=12,
1774                        fontweight=0,
1775                        color="black",
1776                    )
1777                plt.show()
1778
1779        if type_plot == "spaghetti":
1780            palette = plt.get_cmap("Set1")
1781            sims_ix = getsims(self.sims_, series_idx)
1782            plt.plot(x_all, y_all, "-")
1783            for col_ix in range(
1784                sims_ix.shape[1]
1785            ):  # avoid this when there are thousands of simulations
1786                plt.plot(
1787                    x_test,
1788                    sims_ix[:, col_ix],
1789                    "-",
1790                    color=palette(col_ix),
1791                    linewidth=1,
1792                    alpha=0.9,
1793                )
1794            plt.plot(x_all, y_all, "-", color="black")
1795            plt.plot(x_test, y_test, "-", color="blue")
1796            # Add titles
1797            if self.n_series > 1:
1798                plt.title(
1799                    f"{self.replications} simulations of {series}",
1800                    loc="left",
1801                    fontsize=12,
1802                    fontweight=0,
1803                    color="black",
1804                )
1805            else:
1806                plt.title(
1807                    f"{self.replications} simulations of input time series",
1808                    loc="left",
1809                    fontsize=12,
1810                    fontweight=0,
1811                    color="black",
1812                )
1813            plt.xlabel("Time")
1814            plt.ylabel("Values")
1815            # Show the graph
1816            plt.show()
1817
1818    def cross_val_score(
1819        self,
1820        X,
1821        scoring="root_mean_squared_error",
1822        n_jobs=None,
1823        verbose=0,
1824        xreg=None,
1825        initial_window=5,
1826        horizon=3,
1827        fixed_window=False,
1828        show_progress=True,
1829        level=95,
1830        alpha=0.5,
1831        **kwargs,
1832    ):
1833        """Evaluate a score by time series cross-validation.
1834
1835        Parameters:
1836
1837            X: {array-like, sparse matrix} of shape (n_samples, n_features)
1838                The data to fit.
1839
1840            scoring: str or a function
1841                A str in ('root_mean_squared_error', 'mean_squared_error', 'mean_error',
1842                'mean_absolute_error', 'mean_error', 'mean_percentage_error',
1843                'mean_absolute_percentage_error',  'winkler_score', 'coverage')
1844                Or a function defined as 'coverage' and 'winkler_score' in `utils.timeseries`
1845
1846            n_jobs: int, default=None
1847                Number of jobs to run in parallel.
1848
1849            verbose: int, default=0
1850                The verbosity level.
1851
1852            xreg: array-like, optional (default=None)
1853                Additional (external) regressors to be passed to `fit`
1854                xreg must be in 'increasing' order (most recent observations last)
1855
1856            initial_window: int
1857                initial number of consecutive values in each training set sample
1858
1859            horizon: int
1860                number of consecutive values in test set sample
1861
1862            fixed_window: boolean
1863                if False, all training samples start at index 0, and the training
1864                window's size is increasing.
1865                if True, the training window's size is fixed, and the window is
1866                rolling forward
1867
1868            show_progress: boolean
1869                if True, a progress bar is printed
1870
1871            level: int
1872                confidence level for prediction intervals
1873
1874            alpha: float
1875                quantile level for pinball loss if scoring='pinball'
1876                0 < alpha < 1
1877
1878            **kwargs: dict
1879                additional parameters to be passed to `fit` and `predict`
1880
1881        Returns:
1882
1883            A tuple: descriptive statistics or errors and raw errors
1884
1885        """
1886        tscv = TimeSeriesSplit()
1887
1888        tscv_obj = tscv.split(
1889            X,
1890            initial_window=initial_window,
1891            horizon=horizon,
1892            fixed_window=fixed_window,
1893        )
1894
1895        if isinstance(scoring, str):
1896            assert scoring in (
1897                "pinball",
1898                "crps",
1899                "root_mean_squared_error",
1900                "mean_squared_error",
1901                "mean_error",
1902                "mean_absolute_error",
1903                "mean_percentage_error",
1904                "mean_absolute_percentage_error",
1905                "winkler_score",
1906                "coverage",
1907            ), "must have scoring in ('pinball', 'crps', 'root_mean_squared_error', 'mean_squared_error', 'mean_error', 'mean_absolute_error', 'mean_error', 'mean_percentage_error', 'mean_absolute_percentage_error',  'winkler_score', 'coverage')"
1908
1909            def err_func(X_test, X_pred, scoring, alpha=0.5):
1910                if (self.replications is not None) or (
1911                    self.type_pi == "gaussian"
1912                ):  # probabilistic
1913                    if scoring == "pinball":
1914                        # Predict requested quantile
1915                        q_pred = self.predict(
1916                            h=len(X_test), quantiles=[alpha], **kwargs
1917                        )
1918                        # Handle multivariate
1919                        scores = []
1920                        p = X_test.shape[1] if len(X_test.shape) > 1 else 1
1921                        for j in range(p):
1922                            series_name = getattr(
1923                                self, "series_names", [f"Series_{j}"]
1924                            )[j]
1925                            q_label = (
1926                                f"{int(alpha * 100):02d}"
1927                                if (alpha * 100).is_integer()
1928                                else f"{alpha:.3f}".replace(".", "_")
1929                            )
1930                            col = f"quantile_{q_label}_{series_name}"
1931                            if col not in q_pred.columns:
1932                                raise ValueError(
1933                                    f"Column '{col}' not found in quantile forecast output."
1934                                )
1935                            try:
1936                                y_true_j = X_test[:, j] if p > 1 else X_test
1937                            except:
1938                                y_true_j = (
1939                                    X_test.iloc[:, j]
1940                                    if p > 1
1941                                    else X_test.values
1942                                )
1943                            y_pred_j = q_pred[col].values
1944                            # Compute pinball loss for this series
1945                            loss = mean_pinball_loss(
1946                                y_true_j, y_pred_j, alpha=alpha
1947                            )
1948                            scores.append(loss)
1949                        # Return average over series
1950                        return np.mean(scores)
1951                    elif scoring == "crps":
1952                        # Ensure simulations exist
1953                        _ = self.predict(
1954                            h=len(X_test), **kwargs
1955                        )  # triggers self.sims_
1956                        # Extract simulations: list of DataFrames → (R, h, p)
1957                        sims_vals = np.stack(
1958                            [sim.values for sim in self.sims_], axis=0
1959                        )  # (R, h, p)
1960                        crps_scores = []
1961                        p = X_test.shape[1] if len(X_test.shape) > 1 else 1
1962                        for j in range(p):
1963                            try:
1964                                y_true_j = X_test[:, j] if p > 1 else X_test
1965                            except Exception as e:
1966                                y_true_j = (
1967                                    X_test.iloc[:, j]
1968                                    if p > 1
1969                                    else X_test.values
1970                                )
1971                            sims_j = sims_vals[:, :, j]  # (R, h)
1972                            crps_j = self._crps_ensemble(
1973                                np.asarray(y_true_j), sims_j
1974                            )
1975                            crps_scores.append(
1976                                np.mean(crps_j)
1977                            )  # average over horizon
1978                        return np.mean(crps_scores)  # average over series
1979                    if scoring == "winkler_score":
1980                        return winkler_score(X_pred, X_test, level=level)
1981                    elif scoring == "coverage":
1982                        return coverage(X_pred, X_test, level=level)
1983                    else:
1984                        return mean_errors(
1985                            pred=X_pred.mean, actual=X_test, scoring=scoring
1986                        )
1987                else:  # not probabilistic
1988                    return mean_errors(
1989                        pred=X_pred, actual=X_test, scoring=scoring
1990                    )
1991
1992        else:  # isinstance(scoring, str) = False
1993            err_func = scoring
1994
1995        errors = []
1996
1997        train_indices = []
1998
1999        test_indices = []
2000
2001        for train_index, test_index in tscv_obj:
2002            train_indices.append(train_index)
2003            test_indices.append(test_index)
2004
2005        if show_progress is True:
2006            iterator = tqdm(
2007                zip(train_indices, test_indices), total=len(train_indices)
2008            )
2009        else:
2010            iterator = zip(train_indices, test_indices)
2011
2012        for train_index, test_index in iterator:
2013            if verbose == 1:
2014                print(f"TRAIN: {train_index}")
2015                print(f"TEST: {test_index}")
2016
2017            if isinstance(X, pd.DataFrame):
2018                self.fit(X.iloc[train_index, :], xreg=xreg, **kwargs)
2019                X_test = X.iloc[test_index, :]
2020            else:
2021                self.fit(X[train_index, :], xreg=xreg, **kwargs)
2022                X_test = X[test_index, :]
2023            X_pred = self.predict(h=int(len(test_index)), level=level, **kwargs)
2024
2025            errors.append(err_func(X_test, X_pred, scoring, alpha=alpha))
2026
2027        res = np.asarray(errors)
2028
2029        return res, describe(res)
2030
2031    def _compute_information_criterion(self, curr_lags, criterion="AIC"):
2032        """Compute information criterion using existing residuals
2033
2034        Parameters
2035        ----------
2036        curr_lags : int
2037            Current number of lags being evaluated
2038        criterion : str
2039            One of 'AIC', 'AICc', or 'BIC'
2040
2041        Returns
2042        -------
2043        float
2044            Information criterion value or inf if parameters exceed observations
2045        """
2046        # Get dimensions
2047        n_obs = self.residuals_.shape[0]
2048        n_features = int(self.init_n_series_ * curr_lags)
2049        n_hidden = int(self.n_hidden_features)
2050        # Calculate number of parameters
2051        term1 = int(n_features * n_hidden)
2052        term2 = int(n_hidden * self.init_n_series_)
2053        n_params = term1 + term2
2054        # Check if we have enough observations for the number of parameters
2055        if n_obs <= n_params + 1:
2056            return float("inf")  # Return infinity if too many parameters
2057        # Compute RSS using existing residuals
2058        rss = np.sum(self.residuals_**2)
2059        # Compute criterion
2060        if criterion == "AIC":
2061            ic = n_obs * np.log(rss / n_obs) + 2 * n_params
2062        elif criterion == "AICc":
2063            ic = n_obs * np.log(rss / n_obs) + 2 * n_params * (
2064                n_obs / (n_obs - n_params - 1)
2065            )
2066        else:  # BIC
2067            ic = n_obs * np.log(rss / n_obs) + n_params * np.log(n_obs)
2068
2069        return ic

Univariate and multivariate time series (MTS) forecasting with Quasi-Randomized networks

Parameters:

obj: object.
    any object containing a method fit (obj.fit()) and a method predict
    (obj.predict()).

n_hidden_features: int.
    number of nodes in the hidden layer.

activation_name: str.
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'.

a: float.
    hyperparameter for 'prelu' or 'elu' activation function.

nodes_sim: str.
    type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
    'uniform'.

bias: boolean.
    indicates if the hidden layer contains a bias term (True) or not
    (False).

dropout: float.
    regularization parameter; (random) percentage of nodes dropped out
    of the training.

direct_link: boolean.
    indicates if the original predictors are included (True) in model's fitting or not (False).

n_clusters: int.
    number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering).

cluster_encode: bool.
    defines how the variable containing clusters is treated (default is one-hot)
    if `False`, then labels are used, without one-hot encoding.

type_clust: str.
    type of clustering method: currently k-means ('kmeans') or Gaussian
    Mixture Model ('gmm').

type_scaling: a tuple of 3 strings.
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax').

lags: int.
    number of lags used for each time series.
    If string, lags must be one of 'AIC', 'AICc', or 'BIC'.

type_pi: str.
    type of prediction interval; currently:
    - "gaussian": simple, fast, but: assumes stationarity of Gaussian in-sample residuals and independence in the multivariate case
    - "quantile": use model-agnostic quantile regression under the hood
    - "kde": based on Kernel Density Estimation of in-sample residuals
    - "bootstrap": based on independent bootstrap of in-sample residuals
    - "block-bootstrap": based on basic block bootstrap of in-sample residuals
    - "scp-kde": Sequential split conformal prediction with Kernel Density Estimation of calibrated residuals
    - "scp-bootstrap": Sequential split conformal prediction with independent bootstrap of calibrated residuals
    - "scp-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of calibrated residuals
    - "scp2-kde": Sequential split conformal prediction with Kernel Density Estimation of standardized calibrated residuals
    - "scp2-bootstrap": Sequential split conformal prediction with independent bootstrap of standardized calibrated residuals
    - "scp2-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of standardized calibrated residuals
    - based on copulas of in-sample residuals: 'vine-tll', 'vine-bb1', 'vine-bb6', 'vine-bb7', 'vine-bb8', 'vine-clayton',
    'vine-frank', 'vine-gaussian', 'vine-gumbel', 'vine-indep', 'vine-joe', 'vine-student'
    - 'scp-vine-tll', 'scp-vine-bb1', 'scp-vine-bb6', 'scp-vine-bb7', 'scp-vine-bb8', 'scp-vine-clayton',
    'scp-vine-frank', 'scp-vine-gaussian', 'scp-vine-gumbel', 'scp-vine-indep', 'scp-vine-joe', 'scp-vine-student'
    - 'scp2-vine-tll', 'scp2-vine-bb1', 'scp2-vine-bb6', 'scp2-vine-bb7', 'scp2-vine-bb8', 'scp2-vine-clayton',
    'scp2-vine-frank', 'scp2-vine-gaussian', 'scp2-vine-gumbel', 'scp2-vine-indep', 'scp2-vine-joe', 'scp2-vine-student'

level: int.
    level of confidence for `type_pi == 'quantile'` (default is `95`)

block_size: int.
    size of block for 'type_pi' in ("block-bootstrap", "scp-block-bootstrap", "scp2-block-bootstrap").
    Default is round(3.15*(n_residuals^1/3))

replications: int.
    number of replications (if needed, for predictive simulation). Default is 'None'.

kernel: str.
    the kernel to use for residuals density estimation (used for predictive simulation). Currently, either 'gaussian' or 'tophat'.

agg: str.
    either "mean" or "median" for simulation of bootstrap aggregating

seed: int.
    reproducibility seed for nodes_sim=='uniform' or predictive simulation.

backend: str.
    "cpu" or "gpu" or "tpu".

verbose: int.
    0: not printing; 1: printing

show_progress: bool.
    True: progress bar when fitting each series; False: no progress bar when fitting each series

Attributes:

fit_objs_: dict
    objects adjusted to each individual time series

y_: {array-like}
    MTS responses (most recent observations first)

X_: {array-like}
    MTS lags

xreg_: {array-like}
    external regressors

y_means_: dict
    a dictionary of each series mean values

preds_: {array-like}
    successive model predictions

preds_std_: {array-like}
    standard deviation around the predictions for Bayesian base learners (`obj`)

gaussian_preds_std_: {array-like}
    standard deviation around the predictions for `type_pi='gaussian'`

return_std_: boolean
    return uncertainty or not (set in predict)

df_: data frame
    the input data frame, in case a data.frame is provided to `fit`

n_obs_: int
    number of time series observations (number of rows for multivariate)

level_: int
    level of confidence for prediction intervals (default is 95)

residuals_: {array-like}
    in-sample residuals (for `type_pi` not conformal prediction) or calibrated residuals
    (for `type_pi` in conformal prediction)

residuals_sims_: tuple of {array-like}
    simulations of in-sample residuals (for `type_pi` not conformal prediction) or
    calibrated residuals (for `type_pi` in conformal prediction)

kde_: A scikit-learn object, see https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KernelDensity.html

residuals_std_dev_: residuals standard deviation

Examples:

Example 1:

import nnetsauce as ns
import numpy as np
from sklearn import linear_model
np.random.seed(123)

M = np.random.rand(10, 3)
M[:,0] = 10*M[:,0]
M[:,2] = 25*M[:,2]
print(M)

# Adjust Bayesian Ridge
regr4 = linear_model.BayesianRidge()
obj_MTS = ns.MTS(regr4, lags = 1, n_hidden_features=5)
obj_MTS.fit(M)
print(obj_MTS.predict())

# with credible intervals
print(obj_MTS.predict(return_std=True, level=80))

print(obj_MTS.predict(return_std=True, level=95))

Example 2:

import nnetsauce as ns
import numpy as np
from sklearn import linear_model

dataset = {
'date' : ['2001-01-01', '2002-01-01', '2003-01-01', '2004-01-01', '2005-01-01'],
'series1' : [34, 30, 35.6, 33.3, 38.1],
'series2' : [4, 5.5, 5.6, 6.3, 5.1],
'series3' : [100, 100.5, 100.6, 100.2, 100.1]}
df = pd.DataFrame(dataset).set_index('date')
print(df)

# Adjust Bayesian Ridge
regr5 = linear_model.BayesianRidge()
obj_MTS = ns.MTS(regr5, lags = 1, n_hidden_features=5)
obj_MTS.fit(df)
print(obj_MTS.predict())

# with credible intervals
print(obj_MTS.predict(return_std=True, level=80))

print(obj_MTS.predict(return_std=True, level=95))
def fit(self, X, xreg=None, **kwargs):
337    def fit(self, X, xreg=None, **kwargs):
338        """Fit MTS model to training data X, with optional regressors xreg
339
340        Parameters:
341
342        X: {array-like}, shape = [n_samples, n_features]
343            Training time series, where n_samples is the number
344            of samples and n_features is the number of features;
345            X must be in increasing order (most recent observations last)
346
347        xreg: {array-like}, shape = [n_samples, n_features_xreg]
348            Additional (external) regressors to be passed to self.obj
349            xreg must be in 'increasing' order (most recent observations last)
350
351        **kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity)
352
353        Returns:
354
355        self: object
356        """
357        try:
358            self.init_n_series_ = X.shape[1]
359        except IndexError as e:
360            self.init_n_series_ = 1
361
362        # Automatic lag selection if requested
363        if isinstance(self.lags, str):
364            max_lags = min(25, X.shape[0] // 4)
365            best_ic = float("inf")
366            best_lags = 1
367
368            if self.verbose:
369                print(
370                    f"\nSelecting optimal number of lags using {self.lags}..."
371                )
372                iterator = tqdm(range(1, max_lags + 1))
373            else:
374                iterator = range(1, max_lags + 1)
375
376            for lag in iterator:
377                # Convert DataFrame to numpy array before reversing
378                if isinstance(X, pd.DataFrame):
379                    X_values = X.values[::-1]
380                else:
381                    X_values = X[::-1]
382
383                # Try current lag value
384                if self.init_n_series_ > 1:
385                    mts_input = ts.create_train_inputs(X_values, lag)
386                else:
387                    mts_input = ts.create_train_inputs(
388                        X_values.reshape(-1, 1), lag
389                    )
390
391                # Cook training set and fit model
392                dummy_y, scaled_Z = self.cook_training_set(
393                    y=np.ones(mts_input[0].shape[0]), X=mts_input[1]
394                )
395                residuals_ = []
396
397                for i in range(self.init_n_series_):
398                    y_mean = np.mean(mts_input[0][:, i])
399                    centered_y_i = mts_input[0][:, i] - y_mean
400                    self.obj.fit(X=scaled_Z, y=centered_y_i)
401                    residuals_.append(
402                        (centered_y_i - self.obj.predict(scaled_Z)).tolist()
403                    )
404
405                self.residuals_ = np.asarray(residuals_).T
406                ic = self._compute_information_criterion(
407                    curr_lags=lag, criterion=self.lags
408                )
409
410                if self.verbose:
411                    print(f"Trying lags={lag}, {self.lags}={ic:.2f}")
412
413                if ic < best_ic:
414                    best_ic = ic
415                    best_lags = lag
416
417            if self.verbose:
418                print(
419                    f"\nSelected {best_lags} lags with {self.lags}={best_ic:.2f}"
420                )
421
422            self.lags = best_lags
423
424        self.input_dates = None
425        self.df_ = None
426
427        if isinstance(X, pd.DataFrame) is False:
428            # input data set is a numpy array
429            if xreg is None:
430                X = pd.DataFrame(X)
431                self.series_names = [
432                    "series" + str(i) for i in range(X.shape[1])
433                ]
434            else:
435                # xreg is not None
436                X = mo.cbind(X, xreg)
437                self.xreg_ = xreg
438
439        else:  # input data set is a DataFrame with column names
440            X_index = None
441            if X.index is not None:
442                X_index = X.index
443            if xreg is None:
444                X = copy.deepcopy(mo.convert_df_to_numeric(X))
445            else:
446                X = copy.deepcopy(mo.cbind(mo.convert_df_to_numeric(X), xreg))
447                self.xreg_ = xreg
448            if X_index is not None:
449                X.index = X_index
450            self.series_names = X.columns.tolist()
451
452        if isinstance(X, pd.DataFrame):
453            if self.df_ is None:
454                self.df_ = X
455                X = X.values
456            else:
457                input_dates_prev = pd.DatetimeIndex(self.df_.index.values)
458                frequency = pd.infer_freq(input_dates_prev)
459                self.df_ = pd.concat([self.df_, X], axis=0)
460                self.input_dates = pd.date_range(
461                    start=input_dates_prev[0],
462                    periods=len(input_dates_prev) + X.shape[0],
463                    freq=frequency,
464                ).values.tolist()
465                self.df_.index = self.input_dates
466                X = self.df_.values
467            self.df_.columns = self.series_names
468        else:
469            if self.df_ is None:
470                self.df_ = pd.DataFrame(X, columns=self.series_names)
471            else:
472                self.df_ = pd.concat(
473                    [self.df_, pd.DataFrame(X, columns=self.series_names)],
474                    axis=0,
475                )
476
477        self.input_dates = ts.compute_input_dates(self.df_)
478
479        try:
480            # multivariate time series
481            n, p = X.shape
482        except:
483            # univariate time series
484            n = X.shape[0]
485            p = 1
486        self.n_obs_ = n
487
488        rep_1_n = np.repeat(1, n)
489
490        self.y_ = None
491        self.X_ = None
492        self.n_series = p
493        self.fit_objs_.clear()
494        self.y_means_.clear()
495        residuals_ = []
496        self.residuals_ = None
497        self.residuals_sims_ = None
498        self.kde_ = None
499        self.sims_ = None
500        self.scaled_Z_ = None
501        self.centered_y_is_ = []
502
503        if self.init_n_series_ > 1:
504            # multivariate time series
505            mts_input = ts.create_train_inputs(X[::-1], self.lags)
506        else:
507            # univariate time series
508            mts_input = ts.create_train_inputs(
509                X.reshape(-1, 1)[::-1], self.lags
510            )
511
512        self.y_ = mts_input[0]
513
514        self.X_ = mts_input[1]
515
516        dummy_y, scaled_Z = self.cook_training_set(y=rep_1_n, X=self.X_)
517
518        self.scaled_Z_ = scaled_Z
519
520        # loop on all the time series and adjust self.obj.fit
521        if self.verbose > 0:
522            print(
523                f"\n Adjusting {type(self.obj).__name__} to multivariate time series... \n"
524            )
525
526        if self.show_progress is True:
527            iterator = tqdm(range(self.init_n_series_))
528        else:
529            iterator = range(self.init_n_series_)
530
531        if self.type_pi in (
532            "gaussian",
533            "kde",
534            "bootstrap",
535            "block-bootstrap",
536        ) or self.type_pi.startswith("vine"):
537            for i in iterator:
538                y_mean = np.mean(self.y_[:, i])
539                self.y_means_[i] = y_mean
540                centered_y_i = self.y_[:, i] - y_mean
541                self.centered_y_is_.append(centered_y_i)
542                self.obj.fit(X=scaled_Z, y=centered_y_i)
543                self.fit_objs_[i] = deepcopy(self.obj)
544                residuals_.append(
545                    (
546                        centered_y_i - self.fit_objs_[i].predict(scaled_Z)
547                    ).tolist()
548                )
549
550        if self.type_pi == "quantile":
551            for i in iterator:
552                y_mean = np.mean(self.y_[:, i])
553                self.y_means_[i] = y_mean
554                centered_y_i = self.y_[:, i] - y_mean
555                self.centered_y_is_.append(centered_y_i)
556                self.obj.fit(X=scaled_Z, y=centered_y_i)
557                self.fit_objs_[i] = deepcopy(self.obj)
558
559        if self.type_pi.startswith("scp"):
560            # split conformal prediction
561            for i in iterator:
562                n_y = self.y_.shape[0]
563                n_y_half = n_y // 2
564                first_half_idx = range(0, n_y_half)
565                second_half_idx = range(n_y_half, n_y)
566                y_mean_temp = np.mean(self.y_[first_half_idx, i])
567                centered_y_i_temp = self.y_[first_half_idx, i] - y_mean_temp
568                self.obj.fit(X=scaled_Z[first_half_idx, :], y=centered_y_i_temp)
569                # calibrated residuals actually
570                residuals_.append(
571                    (
572                        self.y_[second_half_idx, i]
573                        - (
574                            y_mean_temp
575                            + self.obj.predict(scaled_Z[second_half_idx, :])
576                        )
577                    ).tolist()
578                )
579                # fit on the second half
580                y_mean = np.mean(self.y_[second_half_idx, i])
581                self.y_means_[i] = y_mean
582                centered_y_i = self.y_[second_half_idx, i] - y_mean
583                self.obj.fit(X=scaled_Z[second_half_idx, :], y=centered_y_i)
584                self.fit_objs_[i] = deepcopy(self.obj)
585
586        self.residuals_ = np.asarray(residuals_).T
587
588        if self.type_pi == "gaussian":
589            self.gaussian_preds_std_ = np.std(self.residuals_, axis=0)
590
591        if self.type_pi.startswith("scp2"):
592            # Calculate mean and standard deviation for each column
593            data_mean = np.mean(self.residuals_, axis=0)
594            self.residuals_std_dev_ = np.std(self.residuals_, axis=0)
595            # Center and scale the array using broadcasting
596            self.residuals_ = (
597                self.residuals_ - data_mean[np.newaxis, :]
598            ) / self.residuals_std_dev_[np.newaxis, :]
599
600        if self.replications != None and "kde" in self.type_pi:
601            if self.verbose > 0:
602                print(f"\n Simulate residuals using {self.kernel} kernel... \n")
603            assert self.kernel in (
604                "gaussian",
605                "tophat",
606            ), "currently, 'kernel' must be either 'gaussian' or 'tophat'"
607            kernel_bandwidths = {"bandwidth": np.logspace(-6, 6, 150)}
608            grid = GridSearchCV(
609                KernelDensity(kernel=self.kernel, **kwargs),
610                param_grid=kernel_bandwidths,
611            )
612            grid.fit(self.residuals_)
613
614            if self.verbose > 0:
615                print(
616                    f"\n Best parameters for {self.kernel} kernel: {grid.best_params_} \n"
617                )
618
619            self.kde_ = grid.best_estimator_
620
621        return self

Fit MTS model to training data X, with optional regressors xreg

Parameters:

X: {array-like}, shape = [n_samples, n_features] Training time series, where n_samples is the number of samples and n_features is the number of features; X must be in increasing order (most recent observations last)

xreg: {array-like}, shape = [n_samples, n_features_xreg] Additional (external) regressors to be passed to self.obj xreg must be in 'increasing' order (most recent observations last)

**kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity)

Returns:

self: object

def predict(self, h=5, level=95, quantiles=None, **kwargs):
 953    def predict(self, h=5, level=95, quantiles=None, **kwargs):
 954        """Forecast all the time series, h steps ahead"""
 955
 956        if quantiles is not None:
 957            # Validate
 958            quantiles = np.asarray(quantiles)
 959            if not ((quantiles > 0) & (quantiles < 1)).all():
 960                raise ValueError("quantiles must be between 0 and 1.")
 961            # Delegate to dedicated method
 962            return self._predict_quantiles(h=h, quantiles=quantiles, **kwargs)
 963
 964        if isinstance(level, list) or isinstance(level, np.ndarray):
 965            # Store results
 966            result_dict = {}
 967            # Loop through alphas and calculate lower/upper for each alpha level
 968            # E.g [0.5, 2.5, 5, 16.5, 25, 50]
 969            for lev in level:
 970                # Get the forecast for this alpha
 971                res = self.predict(h=h, level=lev, **kwargs)
 972                # Adjust index and collect lower/upper bounds
 973                res.lower.index = pd.to_datetime(res.lower.index)
 974                res.upper.index = pd.to_datetime(res.upper.index)
 975                # Loop over each time series (multivariate) and flatten results
 976                if isinstance(res.lower, pd.DataFrame):
 977                    for (
 978                        series
 979                    ) in (
 980                        res.lower.columns
 981                    ):  # Assumes 'lower' and 'upper' have multiple series
 982                        result_dict[f"lower_{lev}_{series}"] = (
 983                            res.lower[series].to_numpy().flatten()
 984                        )
 985                        result_dict[f"upper_{lev}_{series}"] = (
 986                            res.upper[series].to_numpy().flatten()
 987                        )
 988                else:
 989                    for series_id in range(
 990                        self.n_series
 991                    ):  # Assumes 'lower' and 'upper' have multiple series
 992                        result_dict[f"lower_{lev}_{series_id}"] = (
 993                            res.lower[series_id, :].to_numpy().flatten()
 994                        )
 995                        result_dict[f"upper_{lev}_{series_id}"] = (
 996                            res.upper[series_id, :].to_numpy().flatten()
 997                        )
 998            return pd.DataFrame(result_dict, index=self.output_dates_)
 999
1000        # only one prediction interval
1001        self.output_dates_, frequency = ts.compute_output_dates(self.df_, h)
1002
1003        self.level_ = level
1004
1005        self.return_std_ = False  # do not remove (/!\)
1006
1007        self.mean_ = None  # do not remove (/!\)
1008
1009        self.mean_ = deepcopy(self.y_)  # do not remove (/!\)
1010
1011        self.lower_ = None  # do not remove (/!\)
1012
1013        self.upper_ = None  # do not remove (/!\)
1014
1015        self.sims_ = None  # do not remove (/!\)
1016
1017        y_means_ = np.asarray(
1018            [self.y_means_[i] for i in range(self.init_n_series_)]
1019        )
1020
1021        n_features = self.init_n_series_ * self.lags
1022
1023        self.alpha_ = 100 - level
1024
1025        pi_multiplier = norm.ppf(1 - self.alpha_ / 200)
1026
1027        if "return_std" in kwargs:  # bayesian forecasting
1028            self.return_std_ = True
1029            self.preds_std_ = []
1030            DescribeResult = namedtuple(
1031                "DescribeResult", ("mean", "lower", "upper")
1032            )  # to be updated
1033
1034        if "return_pi" in kwargs:  # split conformal, without simulation
1035            mean_pi_ = []
1036            lower_pi_ = []
1037            upper_pi_ = []
1038            median_pi_ = []
1039            DescribeResult = namedtuple(
1040                "DescribeResult", ("mean", "lower", "upper")
1041            )  # to be updated
1042
1043        if self.kde_ != None and "kde" in self.type_pi:  # kde
1044            target_cols = self.df_.columns[
1045                : self.init_n_series_
1046            ]  # Get target column names
1047            if self.verbose == 1:
1048                self.residuals_sims_ = tuple(
1049                    self.kde_.sample(
1050                        n_samples=h, random_state=self.seed + 100 * i
1051                    )  # Keep full sample
1052                    for i in tqdm(range(self.replications))
1053                )
1054            elif self.verbose == 0:
1055                self.residuals_sims_ = tuple(
1056                    self.kde_.sample(
1057                        n_samples=h, random_state=self.seed + 100 * i
1058                    )  # Keep full sample
1059                    for i in range(self.replications)
1060                )
1061
1062            # Convert to DataFrames after sampling
1063            self.residuals_sims_ = tuple(
1064                pd.DataFrame(
1065                    sim,  # Keep all columns
1066                    columns=target_cols,  # Use original target column names
1067                    index=self.output_dates_,
1068                )
1069                for sim in self.residuals_sims_
1070            )
1071
1072        if self.type_pi in ("bootstrap", "scp-bootstrap", "scp2-bootstrap"):
1073            assert self.replications is not None and isinstance(
1074                self.replications, int
1075            ), "'replications' must be provided and be an integer"
1076            if self.verbose == 1:
1077                self.residuals_sims_ = tuple(
1078                    ts.bootstrap(
1079                        self.residuals_,
1080                        h=h,
1081                        block_size=None,
1082                        seed=self.seed + 100 * i,
1083                    )
1084                    for i in tqdm(range(self.replications))
1085                )
1086            elif self.verbose == 0:
1087                self.residuals_sims_ = tuple(
1088                    ts.bootstrap(
1089                        self.residuals_,
1090                        h=h,
1091                        block_size=None,
1092                        seed=self.seed + 100 * i,
1093                    )
1094                    for i in range(self.replications)
1095                )
1096
1097        if self.type_pi in (
1098            "block-bootstrap",
1099            "scp-block-bootstrap",
1100            "scp2-block-bootstrap",
1101        ):
1102            if self.block_size is None:
1103                self.block_size = int(
1104                    np.ceil(3.15 * (self.residuals_.shape[0] ** (1 / 3)))
1105                )
1106
1107            assert self.replications is not None and isinstance(
1108                self.replications, int
1109            ), "'replications' must be provided and be an integer"
1110            if self.verbose == 1:
1111                self.residuals_sims_ = tuple(
1112                    ts.bootstrap(
1113                        self.residuals_,
1114                        h=h,
1115                        block_size=self.block_size,
1116                        seed=self.seed + 100 * i,
1117                    )
1118                    for i in tqdm(range(self.replications))
1119                )
1120            elif self.verbose == 0:
1121                self.residuals_sims_ = tuple(
1122                    ts.bootstrap(
1123                        self.residuals_,
1124                        h=h,
1125                        block_size=self.block_size,
1126                        seed=self.seed + 100 * i,
1127                    )
1128                    for i in range(self.replications)
1129                )
1130
1131        if "vine" in self.type_pi:
1132            if self.verbose == 1:
1133                self.residuals_sims_ = tuple(
1134                    vinecopula_sample(
1135                        x=self.residuals_,
1136                        n_samples=h,
1137                        method=self.type_pi,
1138                        random_state=self.seed + 100 * i,
1139                    )
1140                    for i in tqdm(range(self.replications))
1141                )
1142            elif self.verbose == 0:
1143                self.residuals_sims_ = tuple(
1144                    vinecopula_sample(
1145                        x=self.residuals_,
1146                        n_samples=h,
1147                        method=self.type_pi,
1148                        random_state=self.seed + 100 * i,
1149                    )
1150                    for i in range(self.replications)
1151                )
1152
1153        mean_ = deepcopy(self.mean_)
1154
1155        for i in range(h):
1156            new_obs = ts.reformat_response(mean_, self.lags)
1157            new_X = new_obs.reshape(1, -1)
1158            cooked_new_X = self.cook_test_set(new_X, **kwargs)
1159
1160            if "return_std" in kwargs:
1161                self.preds_std_.append(
1162                    [
1163                        np.asarray(
1164                            self.fit_objs_[i].predict(
1165                                cooked_new_X, return_std=True
1166                            )[1]
1167                        ).item()
1168                        for i in range(self.n_series)
1169                    ]
1170                )
1171
1172            if "return_pi" in kwargs:
1173                for i in range(self.n_series):
1174                    preds_pi = self.fit_objs_[i].predict(cooked_new_X, **kwargs)
1175                    mean_pi_.append(preds_pi.mean[0])
1176                    lower_pi_.append(preds_pi.lower[0])
1177                    upper_pi_.append(preds_pi.upper[0])
1178
1179            if self.type_pi != "quantile":
1180                predicted_cooked_new_X = np.asarray(
1181                    [
1182                        np.asarray(
1183                            self.fit_objs_[i].predict(cooked_new_X)
1184                        ).item()
1185                        for i in range(self.init_n_series_)
1186                    ]
1187                )
1188            else:
1189                predicted_cooked_new_X = np.asarray(
1190                    [
1191                        np.asarray(
1192                            self.fit_objs_[i]
1193                            .predict(cooked_new_X, return_pi=True)
1194                            .upper
1195                        ).item()
1196                        for i in range(self.init_n_series_)
1197                    ]
1198                )
1199
1200            preds = np.asarray(y_means_ + predicted_cooked_new_X)
1201
1202            # Create full row with both predictions and external regressors
1203            if self.xreg_ is not None and "xreg" in kwargs:
1204                next_xreg = kwargs["xreg"].iloc[i: i + 1].values.flatten()
1205                full_row = np.concatenate([preds, next_xreg])
1206            else:
1207                full_row = preds
1208
1209            # Create a new row with same number of columns as mean_
1210            new_row = np.zeros((1, mean_.shape[1]))
1211            new_row[0, : full_row.shape[0]] = full_row
1212
1213            # Maintain the full dimensionality by using vstack instead of rbind
1214            mean_ = np.vstack([new_row, mean_[:-1]])
1215
1216        # Final output should only include the target columns
1217        self.mean_ = pd.DataFrame(
1218            mean_[0: min(h, self.n_obs_ - self.lags), : self.init_n_series_][
1219                ::-1
1220            ],
1221            columns=self.df_.columns[: self.init_n_series_],
1222            index=self.output_dates_,
1223        )
1224
1225        # function's return ----------------------------------------------------------------------
1226        if (
1227            (("return_std" not in kwargs) and ("return_pi" not in kwargs))
1228            and (self.type_pi not in ("gaussian", "scp"))
1229        ) or ("vine" in self.type_pi):
1230            if self.replications is None:
1231                return self.mean_.iloc[:, : self.init_n_series_]
1232
1233            # if "return_std" not in kwargs and self.replications is not None
1234            meanf = []
1235            medianf = []
1236            lower = []
1237            upper = []
1238
1239            if "scp2" in self.type_pi:
1240                if self.verbose == 1:
1241                    self.sims_ = tuple(
1242                        (
1243                            self.mean_
1244                            + self.residuals_sims_[i]
1245                            * self.residuals_std_dev_[np.newaxis, :]
1246                            for i in tqdm(range(self.replications))
1247                        )
1248                    )
1249                elif self.verbose == 0:
1250                    self.sims_ = tuple(
1251                        (
1252                            self.mean_
1253                            + self.residuals_sims_[i]
1254                            * self.residuals_std_dev_[np.newaxis, :]
1255                            for i in range(self.replications)
1256                        )
1257                    )
1258            else:
1259                if self.verbose == 1:
1260                    self.sims_ = tuple(
1261                        (
1262                            self.mean_ + self.residuals_sims_[i]
1263                            for i in tqdm(range(self.replications))
1264                        )
1265                    )
1266                elif self.verbose == 0:
1267                    self.sims_ = tuple(
1268                        (
1269                            self.mean_ + self.residuals_sims_[i]
1270                            for i in range(self.replications)
1271                        )
1272                    )
1273
1274            DescribeResult = namedtuple(
1275                "DescribeResult", ("mean", "sims", "lower", "upper")
1276            )
1277            for ix in range(self.init_n_series_):
1278                sims_ix = getsims(self.sims_, ix)
1279                if self.agg == "mean":
1280                    meanf.append(np.mean(sims_ix, axis=1))
1281                else:
1282                    medianf.append(np.median(sims_ix, axis=1))
1283                lower.append(np.quantile(sims_ix, q=self.alpha_ / 200, axis=1))
1284                upper.append(
1285                    np.quantile(sims_ix, q=1 - self.alpha_ / 200, axis=1)
1286                )
1287            self.mean_ = pd.DataFrame(
1288                np.asarray(meanf).T,
1289                columns=self.series_names[
1290                    : self.init_n_series_
1291                ],  # self.df_.columns,
1292                index=self.output_dates_,
1293            )
1294
1295            self.lower_ = pd.DataFrame(
1296                np.asarray(lower).T,
1297                columns=self.series_names[
1298                    : self.init_n_series_
1299                ],  # self.df_.columns,
1300                index=self.output_dates_,
1301            )
1302
1303            self.upper_ = pd.DataFrame(
1304                np.asarray(upper).T,
1305                columns=self.series_names[
1306                    : self.init_n_series_
1307                ],  # self.df_.columns,
1308                index=self.output_dates_,
1309            )
1310
1311            try:
1312                self.median_ = pd.DataFrame(
1313                    np.asarray(medianf).T,
1314                    columns=self.series_names[
1315                        : self.init_n_series_
1316                    ],  # self.df_.columns,
1317                    index=self.output_dates_,
1318                )
1319            except Exception as e:
1320                pass
1321
1322            return DescribeResult(
1323                self.mean_, self.sims_, self.lower_, self.upper_
1324            )
1325
1326        if (
1327            (("return_std" in kwargs) or ("return_pi" in kwargs))
1328            and (self.type_pi not in ("gaussian", "scp"))
1329        ) or "vine" in self.type_pi:
1330            DescribeResult = namedtuple(
1331                "DescribeResult", ("mean", "lower", "upper")
1332            )
1333
1334            self.mean_ = pd.DataFrame(
1335                np.asarray(self.mean_),
1336                columns=self.series_names,  # self.df_.columns,
1337                index=self.output_dates_,
1338            )
1339
1340            if "return_std" in kwargs:
1341                self.preds_std_ = np.asarray(self.preds_std_)
1342
1343                self.lower_ = pd.DataFrame(
1344                    self.mean_.values - pi_multiplier * self.preds_std_,
1345                    columns=self.series_names,  # self.df_.columns,
1346                    index=self.output_dates_,
1347                )
1348
1349                self.upper_ = pd.DataFrame(
1350                    self.mean_.values + pi_multiplier * self.preds_std_,
1351                    columns=self.series_names,  # self.df_.columns,
1352                    index=self.output_dates_,
1353                )
1354
1355            if "return_pi" in kwargs:
1356                self.lower_ = pd.DataFrame(
1357                    np.asarray(lower_pi_).reshape(h, self.n_series)
1358                    + y_means_[np.newaxis, :],
1359                    columns=self.series_names,  # self.df_.columns,
1360                    index=self.output_dates_,
1361                )
1362
1363                self.upper_ = pd.DataFrame(
1364                    np.asarray(upper_pi_).reshape(h, self.n_series)
1365                    + y_means_[np.newaxis, :],
1366                    columns=self.series_names,  # self.df_.columns,
1367                    index=self.output_dates_,
1368                )
1369
1370            res = DescribeResult(self.mean_, self.lower_, self.upper_)
1371
1372            if self.xreg_ is not None:
1373                if len(self.xreg_.shape) > 1:
1374                    res2 = mx.tuple_map(
1375                        res,
1376                        lambda x: mo.delete_last_columns(
1377                            x, num_columns=self.xreg_.shape[1]
1378                        ),
1379                    )
1380                else:
1381                    res2 = mx.tuple_map(
1382                        res, lambda x: mo.delete_last_columns(x, num_columns=1)
1383                    )
1384                return DescribeResult(res2[0], res2[1], res2[2])
1385
1386            return res
1387
1388        if self.type_pi == "gaussian":
1389            DescribeResult = namedtuple(
1390                "DescribeResult", ("mean", "lower", "upper")
1391            )
1392
1393            self.mean_ = pd.DataFrame(
1394                np.asarray(self.mean_),
1395                columns=self.series_names,  # self.df_.columns,
1396                index=self.output_dates_,
1397            )
1398
1399            # Use Bayesian std if available, otherwise use gaussian residual std
1400            if "return_std" in kwargs and len(self.preds_std_) > 0:
1401                preds_std_to_use = np.asarray(self.preds_std_)
1402            else:
1403                preds_std_to_use = self.gaussian_preds_std_
1404
1405            self.lower_ = pd.DataFrame(
1406                self.mean_.values - pi_multiplier * preds_std_to_use,
1407                columns=self.series_names,  # self.df_.columns,
1408                index=self.output_dates_,
1409            )
1410
1411            self.upper_ = pd.DataFrame(
1412                self.mean_.values + pi_multiplier * preds_std_to_use,
1413                columns=self.series_names,  # self.df_.columns,
1414                index=self.output_dates_,
1415            )
1416
1417            res = DescribeResult(self.mean_, self.lower_, self.upper_)
1418
1419            if self.xreg_ is not None:
1420                if len(self.xreg_.shape) > 1:
1421                    res2 = mx.tuple_map(
1422                        res,
1423                        lambda x: mo.delete_last_columns(
1424                            x, num_columns=self.xreg_.shape[1]
1425                        ),
1426                    )
1427                else:
1428                    res2 = mx.tuple_map(
1429                        res, lambda x: mo.delete_last_columns(x, num_columns=1)
1430                    )
1431                return DescribeResult(res2[0], res2[1], res2[2])
1432
1433            return res
1434
1435        if self.type_pi == "quantile":
1436            DescribeResult = namedtuple("DescribeResult", ("mean"))
1437
1438            self.mean_ = pd.DataFrame(
1439                np.asarray(self.mean_),
1440                columns=self.series_names,  # self.df_.columns,
1441                index=self.output_dates_,
1442            )
1443
1444            res = DescribeResult(self.mean_)
1445
1446            if self.xreg_ is not None:
1447                if len(self.xreg_.shape) > 1:
1448                    res2 = mx.tuple_map(
1449                        res,
1450                        lambda x: mo.delete_last_columns(
1451                            x, num_columns=self.xreg_.shape[1]
1452                        ),
1453                    )
1454                else:
1455                    res2 = mx.tuple_map(
1456                        res, lambda x: mo.delete_last_columns(x, num_columns=1)
1457                    )
1458                return DescribeResult(res2[0])
1459
1460            return res
1461
1462        # After prediction loop, ensure sims only contain target columns
1463        if self.sims_ is not None:
1464            if self.verbose == 1:
1465                self.sims_ = tuple(
1466                    sim[:h,]  # Only keep target columns and h rows
1467                    for sim in tqdm(self.sims_)
1468                )
1469            elif self.verbose == 0:
1470                self.sims_ = tuple(
1471                    sim[:h,]  # Only keep target columns and h rows
1472                    for sim in self.sims_
1473                )
1474
1475            # Convert numpy arrays to DataFrames with proper columns
1476            self.sims_ = tuple(
1477                pd.DataFrame(
1478                    sim,
1479                    columns=self.df_.columns[: self.init_n_series_],
1480                    index=self.output_dates_,
1481                )
1482                for sim in self.sims_
1483            )
1484
1485        if self.type_pi in (
1486            "kde",
1487            "bootstrap",
1488            "block-bootstrap",
1489            "vine-copula",
1490        ):
1491            if self.xreg_ is not None:
1492                # Use getsimsxreg when external regressors are present
1493                target_cols = self.df_.columns[: self.init_n_series_]
1494                self.sims_ = getsimsxreg(
1495                    self.sims_, self.output_dates_, target_cols
1496                )
1497            else:
1498                # Use original getsims for backward compatibility
1499                self.sims_ = getsims(self.sims_)

Forecast all the time series, h steps ahead

def score( self, X, training_index, testing_index, scoring=None, alpha=0.5, **kwargs):
1566    def score(
1567        self,
1568        X,
1569        training_index,
1570        testing_index,
1571        scoring=None,
1572        alpha=0.5,
1573        **kwargs,
1574    ):
1575        """Train on training_index, score on testing_index."""
1576
1577        assert (
1578            bool(set(training_index).intersection(set(testing_index))) == False
1579        ), "Non-overlapping 'training_index' and 'testing_index' required"
1580
1581        # Dimensions
1582        try:
1583            # multivariate time series
1584            n, p = X.shape
1585        except:
1586            # univariate time series
1587            n = X.shape[0]
1588            p = 1
1589
1590        # Training and testing sets
1591        if p > 1:
1592            X_train = X[training_index, :]
1593            X_test = X[testing_index, :]
1594        else:
1595            X_train = X[training_index]
1596            X_test = X[testing_index]
1597
1598        # Horizon
1599        h = len(testing_index)
1600        assert (
1601            len(training_index) + h
1602        ) <= n, "Please check lengths of training and testing windows"
1603
1604        # Fit and predict
1605        self.fit(X_train, **kwargs)
1606        preds = self.predict(h=h, **kwargs)
1607
1608        if scoring is None:
1609            scoring = "neg_root_mean_squared_error"
1610
1611        if scoring == "pinball":
1612            # Predict requested quantile
1613            q_pred = self.predict(h=h, quantiles=[alpha], **kwargs)
1614            # Handle multivariate
1615            scores = []
1616            for j in range(p):
1617                series_name = getattr(self, "series_names", [f"Series_{j}"])[j]
1618                q_label = (
1619                    f"{int(alpha * 100):02d}"
1620                    if (alpha * 100).is_integer()
1621                    else f"{alpha:.3f}".replace(".", "_")
1622                )
1623                col = f"quantile_{q_label}_{series_name}"
1624                if col not in q_pred.columns:
1625                    raise ValueError(
1626                        f"Column '{col}' not found in quantile forecast output."
1627                    )
1628                y_true_j = X_test[:, j]
1629                y_pred_j = q_pred[col].values
1630                # Compute pinball loss for this series
1631                loss = mean_pinball_loss(y_true_j, y_pred_j, alpha=alpha)
1632                scores.append(loss)
1633            # Return average over series
1634            return np.mean(scores)
1635
1636        if scoring == "crps":
1637            # Ensure simulations exist
1638            preds = self.predict(h=h, **kwargs)  # triggers self.sims_
1639            # Extract simulations: list of DataFrames → (R, h, p)
1640            sims_vals = np.stack(
1641                [sim.values for sim in self.sims_], axis=0
1642            )  # (R, h, p)
1643            crps_scores = []
1644            for j in range(p):
1645                y_true_j = X_test[:, j]
1646                sims_j = sims_vals[:, :, j]  # (R, h)
1647                crps_j = self._crps_ensemble(np.asarray(y_true_j), sims_j)
1648                crps_scores.append(np.mean(crps_j))  # average over horizon
1649            return np.mean(crps_scores)  # average over series
1650
1651        # check inputs
1652        assert scoring in (
1653            "explained_variance",
1654            "neg_mean_absolute_error",
1655            "neg_mean_squared_error",
1656            "neg_root_mean_squared_error",
1657            "neg_mean_squared_log_error",
1658            "neg_median_absolute_error",
1659            "r2",
1660        ), "'scoring' should be in ('explained_variance', 'neg_mean_absolute_error', \
1661                               'neg_mean_squared_error', 'neg_root_mean_squared_error', 'neg_mean_squared_log_error', \
1662                               'neg_median_absolute_error', 'r2')"
1663
1664        scoring_options = {
1665            "explained_variance": skm2.explained_variance_score,
1666            "neg_mean_absolute_error": skm2.mean_absolute_error,
1667            "neg_mean_squared_error": lambda x, y: np.mean((x - y) ** 2),
1668            "neg_root_mean_squared_error": lambda x, y: np.sqrt(
1669                np.mean((x - y) ** 2)
1670            ),
1671            "neg_mean_squared_log_error": skm2.mean_squared_log_error,
1672            "neg_median_absolute_error": skm2.median_absolute_error,
1673            "r2": skm2.r2_score,
1674        }
1675
1676        return scoring_options[scoring](X_test, preds)

Train on training_index, score on testing_index.

class MTSStacker(nnetsauce.MTS):
 12class MTSStacker(MTS):
 13    """
 14    Sequential stacking for time series with unified strategy.
 15
 16    Core Strategy:
 17    1. Split data: half1 (base models) | half2 (meta-model)
 18    2. Train base models on half1, predict half2
 19    3. Create augmented dataset: [original_series | base_pred_1 | base_pred_2 | ...]
 20       Stack as additional time series, extract target series
 21    4. Train meta-MTS on half2 with augmented data
 22    5. Retrain base models on half2 for temporal alignment
 23    6. At prediction: base models forecast → augment → meta-model predicts
 24    """
 25
 26    def __init__(
 27        self,
 28        base_models,
 29        meta_model,
 30        split_ratio=0.5,
 31    ):
 32        """
 33        Parameters
 34        ----------
 35        base_models : list of sklearn-compatible models
 36            Base models (e.g., Ridge, Lasso, RandomForest)
 37        meta_model : nnetsauce.MTS instance
 38            MTS with type_pi='scp2-kde' or similar
 39        split_ratio : float
 40            Proportion for half1 (default: 0.5)
 41        """
 42        self.base_models = base_models
 43        self.meta_model = meta_model
 44        self.split_ratio = split_ratio
 45        self.fitted_base_models_ = []
 46        self.split_idx_ = None
 47        self.mean_ = None
 48        self.lower_ = None
 49        self.upper_ = None
 50        self.sims_ = None
 51        self.output_dates_ = None
 52
 53    def fit(self, X, xreg=None, **kwargs):
 54        """
 55        Fit MTSStacker using sequential stacking strategy.
 56
 57        Parameters
 58        ----------
 59        X : array-like or DataFrame, shape (n_samples, n_features)
 60            Training time series (most recent observations last)
 61        xreg : array-like, optional
 62            External regressors
 63        **kwargs : dict
 64            Additional parameters for base and meta models
 65
 66        Returns
 67        -------
 68        self : object
 69        """
 70        # 1. Store attributes and convert to DataFrame if needed
 71        if isinstance(X, pd.DataFrame):
 72            self.df_ = X.copy()
 73            X_array = X.values
 74            self.series_names = X.columns.tolist()
 75        else:
 76            X_array = np.asarray(X)
 77            self.df_ = pd.DataFrame(X_array)
 78            self.series_names = [f"series{i}" for i in range(X_array.shape[1])]
 79
 80        n_samples = X_array.shape[0]
 81        self.n_series_ = X_array.shape[1] if X_array.ndim > 1 else 1
 82
 83        # 2. Split data into half1 and half2
 84        split_idx = int(n_samples * self.split_ratio)
 85        self.split_idx_ = split_idx
 86
 87        if split_idx < self.meta_model.lags:
 88            raise ValueError(
 89                f"Split creates insufficient data: split_idx={split_idx} < "
 90                f"lags={self.meta_model.lags}. Reduce split_ratio or use fewer lags."
 91            )
 92
 93        half1 = X_array[:split_idx]
 94        half2 = X_array[split_idx:]
 95
 96        # 3. Train base models on half1 and predict half2
 97        base_preds = []
 98        temp_base_models = []
 99
100        for base_model in self.base_models:
101            # Wrap in MTS with same config as meta_model
102            base_mts = MTS(
103                obj=clone(base_model),
104                lags=self.meta_model.lags,
105                n_hidden_features=self.meta_model.n_hidden_features,
106                replications=self.meta_model.replications,
107                kernel=self.meta_model.kernel,
108                type_pi=None,  # No prediction intervals for base models
109            )
110            base_mts.fit(half1)
111
112            # Predict half2
113            pred = base_mts.predict(h=len(half2))
114
115            # Handle different return types
116            if isinstance(pred, pd.DataFrame):
117                base_preds.append(pred.values)
118            elif isinstance(pred, np.ndarray):
119                base_preds.append(pred)
120            elif hasattr(pred, "mean"):
121                # Named tuple with mean attribute
122                mean_pred = pred.mean
123                base_preds.append(
124                    mean_pred.values
125                    if isinstance(mean_pred, pd.DataFrame)
126                    else mean_pred
127                )
128            else:
129                raise ValueError(f"Unexpected prediction type: {type(pred)}")
130
131            temp_base_models.append(base_mts)
132
133        # 4. Create augmented dataset: [original | base_pred_1 | base_pred_2 | ...]
134        base_preds_array = np.hstack(
135            base_preds
136        )  # shape: (len(half2), n_series * n_base_models)
137
138        if isinstance(X, pd.DataFrame):
139            half2_df = pd.DataFrame(
140                half2,
141                index=self.df_.index[split_idx:],
142                columns=self.series_names,
143            )
144            base_preds_df = pd.DataFrame(
145                base_preds_array,
146                index=self.df_.index[split_idx:],
147                columns=[
148                    f"base_{i}_{j}"
149                    for i in range(len(self.base_models))
150                    for j in range(self.n_series_)
151                ],
152            )
153            augmented = pd.concat([half2_df, base_preds_df], axis=1)
154        else:
155            augmented = np.hstack([half2, base_preds_array])
156
157        # 5. Train meta-model on augmented half2
158        self.meta_model.fit(augmented, xreg=xreg, **kwargs)
159
160        # Store meta-model attributes
161        self.output_dates_ = self.meta_model.output_dates_
162        self.fit_objs_ = self.meta_model.fit_objs_
163        self.y_ = self.meta_model.y_
164        self.X_ = self.meta_model.X_
165        self.xreg_ = self.meta_model.xreg_
166        self.y_means_ = self.meta_model.y_means_
167        self.residuals_ = self.meta_model.residuals_
168
169        # 6. FIXED: Retrain base models on half2 for temporal alignment
170        self.fitted_base_models_ = []
171        for i, base_model in enumerate(self.base_models):
172            base_mts_final = MTS(
173                obj=clone(base_model),
174                lags=self.meta_model.lags,
175                n_hidden_features=self.meta_model.n_hidden_features,
176                replications=self.meta_model.replications,
177                kernel=self.meta_model.kernel,
178                type_pi=None,
179            )
180            base_mts_final.fit(half2)
181            self.fitted_base_models_.append(base_mts_final)
182
183        return self
184
185    def predict(self, h=5, level=95, **kwargs):
186        """
187        Forecast h steps ahead using stacked predictions.
188
189        FIXED: Now properly generates base model forecasts and uses them
190        to create augmented features for the meta-model.
191
192        Parameters
193        ----------
194        h : int
195            Forecast horizon
196        level : int
197            Confidence level for prediction intervals
198        **kwargs : dict
199            Additional parameters for prediction
200
201        Returns
202        -------
203        DescribeResult or DataFrame
204            Predictions with optional intervals/simulations
205        """
206        # Step 1: Generate base model forecasts for horizon h
207        base_forecasts = []
208
209        for base_mts in self.fitted_base_models_:
210            # Each base model forecasts h steps ahead
211            forecast = base_mts.predict(h=h)
212
213            # Extract mean prediction
214            if isinstance(forecast, pd.DataFrame):
215                base_forecasts.append(forecast.values)
216            elif isinstance(forecast, np.ndarray):
217                base_forecasts.append(forecast)
218            elif hasattr(forecast, "mean"):
219                mean_pred = forecast.mean
220                base_forecasts.append(
221                    mean_pred.values
222                    if isinstance(mean_pred, pd.DataFrame)
223                    else mean_pred
224                )
225            else:
226                raise ValueError(f"Unexpected forecast type: {type(forecast)}")
227
228        # Step 2: Stack base forecasts into augmented features
229        base_forecasts_array = np.hstack(
230            base_forecasts
231        )  # shape: (h, n_series * n_base)
232
233        # Step 3: Create augmented input for meta-model
234        # The meta-model needs the original series structure + base predictions
235        # We use recursive forecasting: predict one step, update history, repeat
236
237        # Get last window of data from training
238        last_window = self.df_.iloc[-self.meta_model.lags:].values
239
240        # Initialize containers for results
241        all_forecasts = []
242        all_lowers = [] if level is not None else None
243        all_uppers = [] if level is not None else None
244        all_sims = (
245            []
246            if hasattr(self.meta_model, "type_pi") and self.meta_model.type_pi
247            else None
248        )
249
250        # Recursive forecasting
251        current_window = last_window.copy()
252
253        for step in range(h):
254            # Create augmented input: [current_window_last_row | base_forecast_step]
255            # Note: meta-model was trained on [original | base_preds]
256            # For prediction, we need to simulate this structure
257
258            # Use the base forecast for this step
259            base_forecast_step = base_forecasts_array[
260                step: step + 1, :
261            ]  # shape: (1, n_base_features)
262
263            # Create a dummy augmented dataset for this step
264            # Combine last observed values with base predictions
265            last_obs = current_window[-1:, :]  # shape: (1, n_series)
266            augmented_step = np.hstack([last_obs, base_forecast_step])
267
268            # Convert to DataFrame if needed
269            if isinstance(self.df_, pd.DataFrame):
270                augmented_df = pd.DataFrame(
271                    augmented_step,
272                    columns=(
273                        self.series_names
274                        + [
275                            f"base_{i}_{j}"
276                            for i in range(len(self.base_models))
277                            for j in range(self.n_series_)
278                        ]
279                    ),
280                )
281            else:
282                augmented_df = augmented_step
283
284            # Predict one step with meta-model
285            # This is tricky: we need to use meta-model's internal predict
286            # but with our augmented data structure
287
288            # For now, use the standard predict and extract one step
289            step_result = self.meta_model.predict(h=1, level=level, **kwargs)
290
291            # Extract forecasts
292            if isinstance(step_result, pd.DataFrame):
293                forecast_step = step_result.iloc[0, : self.n_series_].values
294                all_forecasts.append(forecast_step)
295            elif isinstance(step_result, np.ndarray):
296                forecast_step = step_result[0, : self.n_series_]
297                all_forecasts.append(forecast_step)
298            elif hasattr(step_result, "mean"):
299                mean_pred = step_result.mean
300                if isinstance(mean_pred, pd.DataFrame):
301                    forecast_step = mean_pred.iloc[0, : self.n_series_].values
302                else:
303                    forecast_step = mean_pred[0, : self.n_series_]
304                all_forecasts.append(forecast_step)
305
306                # Extract intervals if available
307                if hasattr(step_result, "lower") and all_lowers is not None:
308                    lower_pred = step_result.lower
309                    if isinstance(lower_pred, pd.DataFrame):
310                        all_lowers.append(
311                            lower_pred.iloc[0, : self.n_series_].values
312                        )
313                    else:
314                        all_lowers.append(lower_pred[0, : self.n_series_])
315
316                if hasattr(step_result, "upper") and all_uppers is not None:
317                    upper_pred = step_result.upper
318                    if isinstance(upper_pred, pd.DataFrame):
319                        all_uppers.append(
320                            upper_pred.iloc[0, : self.n_series_].values
321                        )
322                    else:
323                        all_uppers.append(upper_pred[0, : self.n_series_])
324
325                # Extract simulations if available
326                if hasattr(step_result, "sims") and all_sims is not None:
327                    all_sims.append(step_result.sims)
328
329            # Update window for next iteration
330            current_window = np.vstack(
331                [current_window[1:], forecast_step.reshape(1, -1)]
332            )
333
334        # Combine all forecasts
335        forecasts_array = np.array(all_forecasts)
336
337        # Create output dates
338        if hasattr(self.df_, "index") and isinstance(
339            self.df_.index, pd.DatetimeIndex
340        ):
341            last_date = self.df_.index[-1]
342            freq = pd.infer_freq(self.df_.index)
343            if freq:
344                output_dates = pd.date_range(
345                    start=last_date, periods=h + 1, freq=freq
346                )[1:]
347            else:
348                output_dates = pd.RangeIndex(
349                    start=len(self.df_), stop=len(self.df_) + h
350                )
351        else:
352            output_dates = pd.RangeIndex(
353                start=len(self.df_), stop=len(self.df_) + h
354            )
355
356        self.output_dates_ = output_dates
357
358        # Format output
359        mean_df = pd.DataFrame(
360            forecasts_array,
361            index=output_dates,
362            columns=self.series_names[: self.n_series_],
363        )
364        self.mean_ = mean_df
365
366        # Return based on what was computed
367        if all_lowers and all_uppers:
368            lowers_array = np.array(all_lowers)
369            uppers_array = np.array(all_uppers)
370
371            lower_df = pd.DataFrame(
372                lowers_array,
373                index=output_dates,
374                columns=self.series_names[: self.n_series_],
375            )
376            upper_df = pd.DataFrame(
377                uppers_array,
378                index=output_dates,
379                columns=self.series_names[: self.n_series_],
380            )
381
382            self.lower_ = lower_df
383            self.upper_ = upper_df
384
385            if all_sims:
386                self.sims_ = tuple(all_sims)
387                DescribeResult = namedtuple(
388                    "DescribeResult", ("mean", "sims", "lower", "upper")
389                )
390                return DescribeResult(mean_df, self.sims_, lower_df, upper_df)
391            else:
392                DescribeResult = namedtuple(
393                    "DescribeResult", ("mean", "lower", "upper")
394                )
395                return DescribeResult(mean_df, lower_df, upper_df)
396        else:
397            return mean_df
398
399    def plot(self, series=None, **kwargs):
400        """
401        Plot the time series with forecasts and prediction intervals.
402
403        Parameters
404        ----------
405        series : str or int, optional
406            Name or index of the series to plot (default: 0)
407        **kwargs : dict
408            Additional parameters for plotting
409        """
410        # Ensure we have predictions
411        if self.mean_ is None:
412            raise ValueError(
413                "Model forecasting must be obtained first (call predict)"
414            )
415
416        # Convert series name to index if needed
417        if isinstance(series, str):
418            if series in self.series_names:
419                series_idx = self.series_names.index(series)
420            else:
421                raise ValueError(
422                    f"Series '{series}' doesn't exist in the input dataset"
423                )
424        else:
425            series_idx = series if series is not None else 0
426
427        # Check bounds
428        if series_idx < 0 or series_idx >= self.n_series_:
429            raise ValueError(
430                f"Series index {series_idx} is out of bounds (0 to {self.n_series_ - 1})"
431            )
432
433        # Prepare data for plotting
434        import matplotlib.pyplot as plt
435        import matplotlib.dates as mdates
436
437        # Get historical data
438        historical_data = self.df_.iloc[:, series_idx]
439        forecast_data = self.mean_.iloc[:, series_idx]
440
441        # Get prediction intervals if available
442        has_intervals = self.lower_ is not None and self.upper_ is not None
443        if has_intervals:
444            lower_data = self.lower_.iloc[:, series_idx]
445            upper_data = self.upper_.iloc[:, series_idx]
446
447        # Create figure
448        fig, ax = plt.subplots(figsize=(12, 6))
449
450        # Plot historical data
451        if isinstance(self.df_.index, pd.DatetimeIndex):
452            hist_index = self.df_.index
453            ax.plot(
454                hist_index,
455                historical_data,
456                "-",
457                label="Historical",
458                color="blue",
459                linewidth=1.5,
460            )
461
462            # Plot forecast
463            forecast_index = self.mean_.index
464            ax.plot(
465                forecast_index,
466                forecast_data,
467                "-",
468                label="Forecast",
469                color="red",
470                linewidth=1.5,
471            )
472
473            # Plot prediction intervals
474            if has_intervals:
475                ax.fill_between(
476                    forecast_index,
477                    lower_data,
478                    upper_data,
479                    alpha=0.3,
480                    color="red",
481                    label="Prediction Interval",
482                )
483
484            # Add vertical line at the split point
485            if self.split_idx_ is not None:
486                split_date = hist_index[self.split_idx_]
487                ax.axvline(
488                    x=split_date,
489                    color="gray",
490                    linestyle="--",
491                    alpha=0.5,
492                    label="Train Split",
493                )
494
495            # Format x-axis for dates
496            ax.xaxis.set_major_formatter(mdates.DateFormatter("%Y-%m-%d"))
497            fig.autofmt_xdate()
498        else:
499            # Numeric indices
500            n_points_train = len(self.df_)
501            n_points_forecast = len(self.mean_)
502
503            x_hist = np.arange(n_points_train)
504            x_forecast = np.arange(
505                n_points_train, n_points_train + n_points_forecast
506            )
507
508            ax.plot(
509                x_hist,
510                historical_data,
511                "-",
512                label="Historical",
513                color="blue",
514                linewidth=1.5,
515            )
516            ax.plot(
517                x_forecast,
518                forecast_data,
519                "-",
520                label="Forecast",
521                color="red",
522                linewidth=1.5,
523            )
524
525            if has_intervals:
526                ax.fill_between(
527                    x_forecast,
528                    lower_data,
529                    upper_data,
530                    alpha=0.3,
531                    color="red",
532                    label="Prediction Interval",
533                )
534
535            if self.split_idx_ is not None:
536                ax.axvline(
537                    x=self.split_idx_,
538                    color="gray",
539                    linestyle="--",
540                    alpha=0.5,
541                    label="Train Split",
542                )
543
544        # Set title and labels
545        series_name = (
546            self.series_names[series_idx]
547            if series_idx < len(self.series_names)
548            else f"Series {series_idx}"
549        )
550        plt.title(f"Forecast for {series_name}", fontsize=14, fontweight="bold")
551        plt.xlabel("Time")
552        plt.ylabel("Value")
553        plt.legend()
554        plt.grid(True, alpha=0.3)
555        plt.tight_layout()
556        plt.show()

Sequential stacking for time series with unified strategy.

Core Strategy:

  1. Split data: half1 (base models) | half2 (meta-model)
  2. Train base models on half1, predict half2
  3. Create augmented dataset: [original_series | base_pred_1 | base_pred_2 | ...] Stack as additional time series, extract target series
  4. Train meta-MTS on half2 with augmented data
  5. Retrain base models on half2 for temporal alignment
  6. At prediction: base models forecast → augment → meta-model predicts
def fit(self, X, xreg=None, **kwargs):
 53    def fit(self, X, xreg=None, **kwargs):
 54        """
 55        Fit MTSStacker using sequential stacking strategy.
 56
 57        Parameters
 58        ----------
 59        X : array-like or DataFrame, shape (n_samples, n_features)
 60            Training time series (most recent observations last)
 61        xreg : array-like, optional
 62            External regressors
 63        **kwargs : dict
 64            Additional parameters for base and meta models
 65
 66        Returns
 67        -------
 68        self : object
 69        """
 70        # 1. Store attributes and convert to DataFrame if needed
 71        if isinstance(X, pd.DataFrame):
 72            self.df_ = X.copy()
 73            X_array = X.values
 74            self.series_names = X.columns.tolist()
 75        else:
 76            X_array = np.asarray(X)
 77            self.df_ = pd.DataFrame(X_array)
 78            self.series_names = [f"series{i}" for i in range(X_array.shape[1])]
 79
 80        n_samples = X_array.shape[0]
 81        self.n_series_ = X_array.shape[1] if X_array.ndim > 1 else 1
 82
 83        # 2. Split data into half1 and half2
 84        split_idx = int(n_samples * self.split_ratio)
 85        self.split_idx_ = split_idx
 86
 87        if split_idx < self.meta_model.lags:
 88            raise ValueError(
 89                f"Split creates insufficient data: split_idx={split_idx} < "
 90                f"lags={self.meta_model.lags}. Reduce split_ratio or use fewer lags."
 91            )
 92
 93        half1 = X_array[:split_idx]
 94        half2 = X_array[split_idx:]
 95
 96        # 3. Train base models on half1 and predict half2
 97        base_preds = []
 98        temp_base_models = []
 99
100        for base_model in self.base_models:
101            # Wrap in MTS with same config as meta_model
102            base_mts = MTS(
103                obj=clone(base_model),
104                lags=self.meta_model.lags,
105                n_hidden_features=self.meta_model.n_hidden_features,
106                replications=self.meta_model.replications,
107                kernel=self.meta_model.kernel,
108                type_pi=None,  # No prediction intervals for base models
109            )
110            base_mts.fit(half1)
111
112            # Predict half2
113            pred = base_mts.predict(h=len(half2))
114
115            # Handle different return types
116            if isinstance(pred, pd.DataFrame):
117                base_preds.append(pred.values)
118            elif isinstance(pred, np.ndarray):
119                base_preds.append(pred)
120            elif hasattr(pred, "mean"):
121                # Named tuple with mean attribute
122                mean_pred = pred.mean
123                base_preds.append(
124                    mean_pred.values
125                    if isinstance(mean_pred, pd.DataFrame)
126                    else mean_pred
127                )
128            else:
129                raise ValueError(f"Unexpected prediction type: {type(pred)}")
130
131            temp_base_models.append(base_mts)
132
133        # 4. Create augmented dataset: [original | base_pred_1 | base_pred_2 | ...]
134        base_preds_array = np.hstack(
135            base_preds
136        )  # shape: (len(half2), n_series * n_base_models)
137
138        if isinstance(X, pd.DataFrame):
139            half2_df = pd.DataFrame(
140                half2,
141                index=self.df_.index[split_idx:],
142                columns=self.series_names,
143            )
144            base_preds_df = pd.DataFrame(
145                base_preds_array,
146                index=self.df_.index[split_idx:],
147                columns=[
148                    f"base_{i}_{j}"
149                    for i in range(len(self.base_models))
150                    for j in range(self.n_series_)
151                ],
152            )
153            augmented = pd.concat([half2_df, base_preds_df], axis=1)
154        else:
155            augmented = np.hstack([half2, base_preds_array])
156
157        # 5. Train meta-model on augmented half2
158        self.meta_model.fit(augmented, xreg=xreg, **kwargs)
159
160        # Store meta-model attributes
161        self.output_dates_ = self.meta_model.output_dates_
162        self.fit_objs_ = self.meta_model.fit_objs_
163        self.y_ = self.meta_model.y_
164        self.X_ = self.meta_model.X_
165        self.xreg_ = self.meta_model.xreg_
166        self.y_means_ = self.meta_model.y_means_
167        self.residuals_ = self.meta_model.residuals_
168
169        # 6. FIXED: Retrain base models on half2 for temporal alignment
170        self.fitted_base_models_ = []
171        for i, base_model in enumerate(self.base_models):
172            base_mts_final = MTS(
173                obj=clone(base_model),
174                lags=self.meta_model.lags,
175                n_hidden_features=self.meta_model.n_hidden_features,
176                replications=self.meta_model.replications,
177                kernel=self.meta_model.kernel,
178                type_pi=None,
179            )
180            base_mts_final.fit(half2)
181            self.fitted_base_models_.append(base_mts_final)
182
183        return self

Fit MTSStacker using sequential stacking strategy.

Parameters

X : array-like or DataFrame, shape (n_samples, n_features) Training time series (most recent observations last) xreg : array-like, optional External regressors **kwargs : dict Additional parameters for base and meta models

Returns

self : object

def predict(self, h=5, level=95, **kwargs):
185    def predict(self, h=5, level=95, **kwargs):
186        """
187        Forecast h steps ahead using stacked predictions.
188
189        FIXED: Now properly generates base model forecasts and uses them
190        to create augmented features for the meta-model.
191
192        Parameters
193        ----------
194        h : int
195            Forecast horizon
196        level : int
197            Confidence level for prediction intervals
198        **kwargs : dict
199            Additional parameters for prediction
200
201        Returns
202        -------
203        DescribeResult or DataFrame
204            Predictions with optional intervals/simulations
205        """
206        # Step 1: Generate base model forecasts for horizon h
207        base_forecasts = []
208
209        for base_mts in self.fitted_base_models_:
210            # Each base model forecasts h steps ahead
211            forecast = base_mts.predict(h=h)
212
213            # Extract mean prediction
214            if isinstance(forecast, pd.DataFrame):
215                base_forecasts.append(forecast.values)
216            elif isinstance(forecast, np.ndarray):
217                base_forecasts.append(forecast)
218            elif hasattr(forecast, "mean"):
219                mean_pred = forecast.mean
220                base_forecasts.append(
221                    mean_pred.values
222                    if isinstance(mean_pred, pd.DataFrame)
223                    else mean_pred
224                )
225            else:
226                raise ValueError(f"Unexpected forecast type: {type(forecast)}")
227
228        # Step 2: Stack base forecasts into augmented features
229        base_forecasts_array = np.hstack(
230            base_forecasts
231        )  # shape: (h, n_series * n_base)
232
233        # Step 3: Create augmented input for meta-model
234        # The meta-model needs the original series structure + base predictions
235        # We use recursive forecasting: predict one step, update history, repeat
236
237        # Get last window of data from training
238        last_window = self.df_.iloc[-self.meta_model.lags:].values
239
240        # Initialize containers for results
241        all_forecasts = []
242        all_lowers = [] if level is not None else None
243        all_uppers = [] if level is not None else None
244        all_sims = (
245            []
246            if hasattr(self.meta_model, "type_pi") and self.meta_model.type_pi
247            else None
248        )
249
250        # Recursive forecasting
251        current_window = last_window.copy()
252
253        for step in range(h):
254            # Create augmented input: [current_window_last_row | base_forecast_step]
255            # Note: meta-model was trained on [original | base_preds]
256            # For prediction, we need to simulate this structure
257
258            # Use the base forecast for this step
259            base_forecast_step = base_forecasts_array[
260                step: step + 1, :
261            ]  # shape: (1, n_base_features)
262
263            # Create a dummy augmented dataset for this step
264            # Combine last observed values with base predictions
265            last_obs = current_window[-1:, :]  # shape: (1, n_series)
266            augmented_step = np.hstack([last_obs, base_forecast_step])
267
268            # Convert to DataFrame if needed
269            if isinstance(self.df_, pd.DataFrame):
270                augmented_df = pd.DataFrame(
271                    augmented_step,
272                    columns=(
273                        self.series_names
274                        + [
275                            f"base_{i}_{j}"
276                            for i in range(len(self.base_models))
277                            for j in range(self.n_series_)
278                        ]
279                    ),
280                )
281            else:
282                augmented_df = augmented_step
283
284            # Predict one step with meta-model
285            # This is tricky: we need to use meta-model's internal predict
286            # but with our augmented data structure
287
288            # For now, use the standard predict and extract one step
289            step_result = self.meta_model.predict(h=1, level=level, **kwargs)
290
291            # Extract forecasts
292            if isinstance(step_result, pd.DataFrame):
293                forecast_step = step_result.iloc[0, : self.n_series_].values
294                all_forecasts.append(forecast_step)
295            elif isinstance(step_result, np.ndarray):
296                forecast_step = step_result[0, : self.n_series_]
297                all_forecasts.append(forecast_step)
298            elif hasattr(step_result, "mean"):
299                mean_pred = step_result.mean
300                if isinstance(mean_pred, pd.DataFrame):
301                    forecast_step = mean_pred.iloc[0, : self.n_series_].values
302                else:
303                    forecast_step = mean_pred[0, : self.n_series_]
304                all_forecasts.append(forecast_step)
305
306                # Extract intervals if available
307                if hasattr(step_result, "lower") and all_lowers is not None:
308                    lower_pred = step_result.lower
309                    if isinstance(lower_pred, pd.DataFrame):
310                        all_lowers.append(
311                            lower_pred.iloc[0, : self.n_series_].values
312                        )
313                    else:
314                        all_lowers.append(lower_pred[0, : self.n_series_])
315
316                if hasattr(step_result, "upper") and all_uppers is not None:
317                    upper_pred = step_result.upper
318                    if isinstance(upper_pred, pd.DataFrame):
319                        all_uppers.append(
320                            upper_pred.iloc[0, : self.n_series_].values
321                        )
322                    else:
323                        all_uppers.append(upper_pred[0, : self.n_series_])
324
325                # Extract simulations if available
326                if hasattr(step_result, "sims") and all_sims is not None:
327                    all_sims.append(step_result.sims)
328
329            # Update window for next iteration
330            current_window = np.vstack(
331                [current_window[1:], forecast_step.reshape(1, -1)]
332            )
333
334        # Combine all forecasts
335        forecasts_array = np.array(all_forecasts)
336
337        # Create output dates
338        if hasattr(self.df_, "index") and isinstance(
339            self.df_.index, pd.DatetimeIndex
340        ):
341            last_date = self.df_.index[-1]
342            freq = pd.infer_freq(self.df_.index)
343            if freq:
344                output_dates = pd.date_range(
345                    start=last_date, periods=h + 1, freq=freq
346                )[1:]
347            else:
348                output_dates = pd.RangeIndex(
349                    start=len(self.df_), stop=len(self.df_) + h
350                )
351        else:
352            output_dates = pd.RangeIndex(
353                start=len(self.df_), stop=len(self.df_) + h
354            )
355
356        self.output_dates_ = output_dates
357
358        # Format output
359        mean_df = pd.DataFrame(
360            forecasts_array,
361            index=output_dates,
362            columns=self.series_names[: self.n_series_],
363        )
364        self.mean_ = mean_df
365
366        # Return based on what was computed
367        if all_lowers and all_uppers:
368            lowers_array = np.array(all_lowers)
369            uppers_array = np.array(all_uppers)
370
371            lower_df = pd.DataFrame(
372                lowers_array,
373                index=output_dates,
374                columns=self.series_names[: self.n_series_],
375            )
376            upper_df = pd.DataFrame(
377                uppers_array,
378                index=output_dates,
379                columns=self.series_names[: self.n_series_],
380            )
381
382            self.lower_ = lower_df
383            self.upper_ = upper_df
384
385            if all_sims:
386                self.sims_ = tuple(all_sims)
387                DescribeResult = namedtuple(
388                    "DescribeResult", ("mean", "sims", "lower", "upper")
389                )
390                return DescribeResult(mean_df, self.sims_, lower_df, upper_df)
391            else:
392                DescribeResult = namedtuple(
393                    "DescribeResult", ("mean", "lower", "upper")
394                )
395                return DescribeResult(mean_df, lower_df, upper_df)
396        else:
397            return mean_df

Forecast h steps ahead using stacked predictions.

FIXED: Now properly generates base model forecasts and uses them to create augmented features for the meta-model.

Parameters

h : int Forecast horizon level : int Confidence level for prediction intervals **kwargs : dict Additional parameters for prediction

Returns

DescribeResult or DataFrame Predictions with optional intervals/simulations

class MultiOutputMTS(nnetsauce.MTS):
 14class MultiOutputMTS(MTS):
 15    """MTS subclass optimized for multivariate time series with vectorized models
 16
 17    Enforces n_series >= 2 and uses single vectorized fit call instead of per-series loop.
 18    Works with sklearn models supporting multi-output (Ridge, Lasso, LinearRegression, etc.)
 19    """
 20
 21    def fit(self, X, xreg=None, **kwargs):
 22        """Fit with vectorized multi-output model - requires n_series >= 2"""
 23
 24        # Validate multivariate input
 25        try:
 26            self.init_n_series_ = X.shape[1]
 27        except IndexError:
 28            raise ValueError(
 29                "MultiOutputMTS requires multivariate input (n_samples, n_series)"
 30            )
 31
 32        if self.init_n_series_ < 2:
 33            raise ValueError(
 34                f"MultiOutputMTS requires at least 2 series, got {self.init_n_series_}"
 35            )
 36
 37        # Automatic lag selection if requested (copied from parent)
 38        if isinstance(self.lags, str):
 39            max_lags = min(25, X.shape[0] // 4)
 40            best_ic = float("inf")
 41            best_lags = 1
 42
 43            if self.verbose:
 44                print(
 45                    f"\nSelecting optimal number of lags using {self.lags}..."
 46                )
 47                iterator = tqdm(range(1, max_lags + 1))
 48            else:
 49                iterator = range(1, max_lags + 1)
 50
 51            for lag in iterator:
 52                if isinstance(X, pd.DataFrame):
 53                    X_values = X.values[::-1]
 54                else:
 55                    X_values = X[::-1]
 56
 57                mts_input = ts.create_train_inputs(X_values, lag)
 58                dummy_y, scaled_Z = self.cook_training_set(
 59                    y=np.ones(mts_input[0].shape[0]), X=mts_input[1]
 60                )
 61
 62                # Vectorized fit for lag selection
 63                y_means = np.mean(mts_input[0], axis=0)
 64                centered_y = mts_input[0] - y_means[np.newaxis, :]
 65                self.obj.fit(X=scaled_Z, y=centered_y)
 66                residuals = centered_y - self.obj.predict(scaled_Z)
 67                self.residuals_ = residuals  # Keep (n_obs, n_series) shape
 68
 69                ic = self._compute_information_criterion(
 70                    curr_lags=lag, criterion=self.lags
 71                )
 72
 73                if self.verbose:
 74                    print(f"Trying lags={lag}, {self.lags}={ic:.2f}")
 75
 76                if ic < best_ic:
 77                    best_ic = ic
 78                    best_lags = lag
 79
 80            if self.verbose:
 81                print(
 82                    f"\nSelected {best_lags} lags with {self.lags}={best_ic:.2f}"
 83                )
 84
 85            self.lags = best_lags
 86
 87        # Data preprocessing (from parent)
 88        self.input_dates = None
 89        self.df_ = None
 90
 91        if isinstance(X, pd.DataFrame) is False:
 92            if xreg is None:
 93                X = pd.DataFrame(X)
 94                self.series_names = [
 95                    "series" + str(i) for i in range(X.shape[1])
 96                ]
 97            else:
 98                X = mo.cbind(X, xreg)
 99                self.xreg_ = xreg
100        else:
101            X_index = None
102            if X.index is not None:
103                X_index = X.index
104            if xreg is None:
105                X = copy.deepcopy(mo.convert_df_to_numeric(X))
106            else:
107                X = copy.deepcopy(mo.cbind(mo.convert_df_to_numeric(X), xreg))
108                self.xreg_ = xreg
109            if X_index is not None:
110                X.index = X_index
111            self.series_names = X.columns.tolist()
112
113        if isinstance(X, pd.DataFrame):
114            if self.df_ is None:
115                self.df_ = X
116                X = X.values
117            else:
118                input_dates_prev = pd.DatetimeIndex(self.df_.index.values)
119                frequency = pd.infer_freq(input_dates_prev)
120                self.df_ = pd.concat([self.df_, X], axis=0)
121                self.input_dates = pd.date_range(
122                    start=input_dates_prev[0],
123                    periods=len(input_dates_prev) + X.shape[0],
124                    freq=frequency,
125                ).values.tolist()
126                self.df_.index = self.input_dates
127                X = self.df_.values
128            self.df_.columns = self.series_names
129        else:
130            if self.df_ is None:
131                self.df_ = pd.DataFrame(X, columns=self.series_names)
132            else:
133                self.df_ = pd.concat(
134                    [self.df_, pd.DataFrame(X, columns=self.series_names)],
135                    axis=0,
136                )
137
138        self.input_dates = ts.compute_input_dates(self.df_)
139
140        n, p = X.shape
141        self.n_obs_ = n
142        rep_1_n = np.repeat(1, n)
143
144        self.y_ = None
145        self.X_ = None
146        self.n_series = p
147        self.fit_objs_.clear()
148        self.y_means_.clear()
149        self.residuals_ = None
150        self.residuals_sims_ = None
151        self.kde_ = None
152        self.sims_ = None
153        self.scaled_Z_ = None
154        self.centered_y_is_ = []
155
156        # Create training inputs
157        mts_input = ts.create_train_inputs(X[::-1], self.lags)
158        self.y_ = mts_input[0]
159        self.X_ = mts_input[1]
160
161        dummy_y, scaled_Z = self.cook_training_set(y=rep_1_n, X=self.X_)
162        self.scaled_Z_ = scaled_Z
163
164        if self.verbose > 0:
165            print(
166                f"\n Adjusting {type(self.obj).__name__} to multivariate time series (vectorized)... \n"
167            )
168
169        # VECTORIZED FITTING - NO LOOP
170        y_means_array = np.array(
171            [np.mean(self.y_[:, i]) for i in range(self.init_n_series_)]
172        )
173        for i in range(self.init_n_series_):
174            self.y_means_[i] = y_means_array[i]
175
176        centered_y_all = self.y_ - y_means_array[np.newaxis, :]
177        self.centered_y_is_ = [
178            centered_y_all[:, i] for i in range(self.init_n_series_)
179        ]
180
181        # Single vectorized fit for all series
182        self.obj.fit(scaled_Z, centered_y_all)
183
184        # All series share the same model
185        for i in range(self.init_n_series_):
186            self.fit_objs_[i] = self.obj
187
188        # Vectorized residuals - ONLY target columns (n_obs, n_series)
189        preds_all = self.obj.predict(scaled_Z)
190        residuals_raw = centered_y_all - preds_all
191
192        # CRITICAL: Ensure residuals only have n_series columns, not all scaled_Z columns
193        # In case there's some dimension mismatch, explicitly slice
194        self.residuals_ = residuals_raw[:, : self.init_n_series_]
195
196        # Handle type_pi
197        if self.type_pi == "gaussian":
198            self.gaussian_preds_std_ = np.std(self.residuals_, axis=0)
199
200        if self.type_pi.startswith("scp2"):
201            data_mean = np.mean(self.residuals_, axis=0)
202            self.residuals_std_dev_ = np.std(self.residuals_, axis=0)
203            self.residuals_ = (
204                self.residuals_ - data_mean[np.newaxis, :]
205            ) / self.residuals_std_dev_[np.newaxis, :]
206
207        if self.replications is not None and "kde" in self.type_pi:
208            if self.verbose > 0:
209                print(f"\n Simulate residuals using {self.kernel} kernel... \n")
210            assert self.kernel in (
211                "gaussian",
212                "tophat",
213            ), "currently, 'kernel' must be either 'gaussian' or 'tophat'"
214            kernel_bandwidths = {"bandwidth": np.logspace(-6, 6, 150)}
215            grid = GridSearchCV(
216                KernelDensity(kernel=self.kernel, **kwargs),
217                param_grid=kernel_bandwidths,
218            )
219            grid.fit(self.residuals_)
220            if self.verbose > 0:
221                print(
222                    f"\n Best parameters for {self.kernel} kernel: {grid.best_params_} \n"
223                )
224            self.kde_ = grid.best_estimator_
225
226        return self
227
228    def predict(self, h=5, level=95, quantiles=None, **kwargs):
229        """Override predict to handle vectorized model predictions"""
230
231        # Delegate to parent for quantiles and multiple levels
232        if quantiles is not None or isinstance(level, (list, np.ndarray)):
233            return super().predict(
234                h=h, level=level, quantiles=quantiles, **kwargs
235            )
236
237        # Store original obj temporarily
238        original_obj = self.obj
239
240        # Create wrapper that extracts the i-th output for each series
241        class VectorizedWrapper:
242            def __init__(self, model, series_idx):
243                self.model = model
244                self.series_idx = series_idx
245
246            def predict(self, X, **kw):
247                """Predict and return only the output for this series index"""
248                preds = self.model.predict(X, **kw)
249                # preds shape: (n_samples, n_series) or (n_series,)
250                if len(preds.shape) == 1:
251                    # Single prediction: (n_series,)
252                    return preds[self.series_idx: self.series_idx + 1]
253                else:
254                    # Multiple predictions: (n_samples, n_series)
255                    return preds[
256                        :, self.series_idx: self.series_idx + 1
257                    ].flatten()
258
259        # Wrap each series with its own index
260        for i in range(self.init_n_series_):
261            self.fit_objs_[i] = VectorizedWrapper(original_obj, i)
262
263        try:
264            result = super().predict(
265                h=h, level=level, quantiles=quantiles, **kwargs
266            )
267        finally:
268            # Restore original
269            for i in range(self.init_n_series_):
270                self.fit_objs_[i] = original_obj
271
272        return result

MTS subclass optimized for multivariate time series with vectorized models

Enforces n_series >= 2 and uses single vectorized fit call instead of per-series loop. Works with sklearn models supporting multi-output (Ridge, Lasso, LinearRegression, etc.)

def fit(self, X, xreg=None, **kwargs):
 21    def fit(self, X, xreg=None, **kwargs):
 22        """Fit with vectorized multi-output model - requires n_series >= 2"""
 23
 24        # Validate multivariate input
 25        try:
 26            self.init_n_series_ = X.shape[1]
 27        except IndexError:
 28            raise ValueError(
 29                "MultiOutputMTS requires multivariate input (n_samples, n_series)"
 30            )
 31
 32        if self.init_n_series_ < 2:
 33            raise ValueError(
 34                f"MultiOutputMTS requires at least 2 series, got {self.init_n_series_}"
 35            )
 36
 37        # Automatic lag selection if requested (copied from parent)
 38        if isinstance(self.lags, str):
 39            max_lags = min(25, X.shape[0] // 4)
 40            best_ic = float("inf")
 41            best_lags = 1
 42
 43            if self.verbose:
 44                print(
 45                    f"\nSelecting optimal number of lags using {self.lags}..."
 46                )
 47                iterator = tqdm(range(1, max_lags + 1))
 48            else:
 49                iterator = range(1, max_lags + 1)
 50
 51            for lag in iterator:
 52                if isinstance(X, pd.DataFrame):
 53                    X_values = X.values[::-1]
 54                else:
 55                    X_values = X[::-1]
 56
 57                mts_input = ts.create_train_inputs(X_values, lag)
 58                dummy_y, scaled_Z = self.cook_training_set(
 59                    y=np.ones(mts_input[0].shape[0]), X=mts_input[1]
 60                )
 61
 62                # Vectorized fit for lag selection
 63                y_means = np.mean(mts_input[0], axis=0)
 64                centered_y = mts_input[0] - y_means[np.newaxis, :]
 65                self.obj.fit(X=scaled_Z, y=centered_y)
 66                residuals = centered_y - self.obj.predict(scaled_Z)
 67                self.residuals_ = residuals  # Keep (n_obs, n_series) shape
 68
 69                ic = self._compute_information_criterion(
 70                    curr_lags=lag, criterion=self.lags
 71                )
 72
 73                if self.verbose:
 74                    print(f"Trying lags={lag}, {self.lags}={ic:.2f}")
 75
 76                if ic < best_ic:
 77                    best_ic = ic
 78                    best_lags = lag
 79
 80            if self.verbose:
 81                print(
 82                    f"\nSelected {best_lags} lags with {self.lags}={best_ic:.2f}"
 83                )
 84
 85            self.lags = best_lags
 86
 87        # Data preprocessing (from parent)
 88        self.input_dates = None
 89        self.df_ = None
 90
 91        if isinstance(X, pd.DataFrame) is False:
 92            if xreg is None:
 93                X = pd.DataFrame(X)
 94                self.series_names = [
 95                    "series" + str(i) for i in range(X.shape[1])
 96                ]
 97            else:
 98                X = mo.cbind(X, xreg)
 99                self.xreg_ = xreg
100        else:
101            X_index = None
102            if X.index is not None:
103                X_index = X.index
104            if xreg is None:
105                X = copy.deepcopy(mo.convert_df_to_numeric(X))
106            else:
107                X = copy.deepcopy(mo.cbind(mo.convert_df_to_numeric(X), xreg))
108                self.xreg_ = xreg
109            if X_index is not None:
110                X.index = X_index
111            self.series_names = X.columns.tolist()
112
113        if isinstance(X, pd.DataFrame):
114            if self.df_ is None:
115                self.df_ = X
116                X = X.values
117            else:
118                input_dates_prev = pd.DatetimeIndex(self.df_.index.values)
119                frequency = pd.infer_freq(input_dates_prev)
120                self.df_ = pd.concat([self.df_, X], axis=0)
121                self.input_dates = pd.date_range(
122                    start=input_dates_prev[0],
123                    periods=len(input_dates_prev) + X.shape[0],
124                    freq=frequency,
125                ).values.tolist()
126                self.df_.index = self.input_dates
127                X = self.df_.values
128            self.df_.columns = self.series_names
129        else:
130            if self.df_ is None:
131                self.df_ = pd.DataFrame(X, columns=self.series_names)
132            else:
133                self.df_ = pd.concat(
134                    [self.df_, pd.DataFrame(X, columns=self.series_names)],
135                    axis=0,
136                )
137
138        self.input_dates = ts.compute_input_dates(self.df_)
139
140        n, p = X.shape
141        self.n_obs_ = n
142        rep_1_n = np.repeat(1, n)
143
144        self.y_ = None
145        self.X_ = None
146        self.n_series = p
147        self.fit_objs_.clear()
148        self.y_means_.clear()
149        self.residuals_ = None
150        self.residuals_sims_ = None
151        self.kde_ = None
152        self.sims_ = None
153        self.scaled_Z_ = None
154        self.centered_y_is_ = []
155
156        # Create training inputs
157        mts_input = ts.create_train_inputs(X[::-1], self.lags)
158        self.y_ = mts_input[0]
159        self.X_ = mts_input[1]
160
161        dummy_y, scaled_Z = self.cook_training_set(y=rep_1_n, X=self.X_)
162        self.scaled_Z_ = scaled_Z
163
164        if self.verbose > 0:
165            print(
166                f"\n Adjusting {type(self.obj).__name__} to multivariate time series (vectorized)... \n"
167            )
168
169        # VECTORIZED FITTING - NO LOOP
170        y_means_array = np.array(
171            [np.mean(self.y_[:, i]) for i in range(self.init_n_series_)]
172        )
173        for i in range(self.init_n_series_):
174            self.y_means_[i] = y_means_array[i]
175
176        centered_y_all = self.y_ - y_means_array[np.newaxis, :]
177        self.centered_y_is_ = [
178            centered_y_all[:, i] for i in range(self.init_n_series_)
179        ]
180
181        # Single vectorized fit for all series
182        self.obj.fit(scaled_Z, centered_y_all)
183
184        # All series share the same model
185        for i in range(self.init_n_series_):
186            self.fit_objs_[i] = self.obj
187
188        # Vectorized residuals - ONLY target columns (n_obs, n_series)
189        preds_all = self.obj.predict(scaled_Z)
190        residuals_raw = centered_y_all - preds_all
191
192        # CRITICAL: Ensure residuals only have n_series columns, not all scaled_Z columns
193        # In case there's some dimension mismatch, explicitly slice
194        self.residuals_ = residuals_raw[:, : self.init_n_series_]
195
196        # Handle type_pi
197        if self.type_pi == "gaussian":
198            self.gaussian_preds_std_ = np.std(self.residuals_, axis=0)
199
200        if self.type_pi.startswith("scp2"):
201            data_mean = np.mean(self.residuals_, axis=0)
202            self.residuals_std_dev_ = np.std(self.residuals_, axis=0)
203            self.residuals_ = (
204                self.residuals_ - data_mean[np.newaxis, :]
205            ) / self.residuals_std_dev_[np.newaxis, :]
206
207        if self.replications is not None and "kde" in self.type_pi:
208            if self.verbose > 0:
209                print(f"\n Simulate residuals using {self.kernel} kernel... \n")
210            assert self.kernel in (
211                "gaussian",
212                "tophat",
213            ), "currently, 'kernel' must be either 'gaussian' or 'tophat'"
214            kernel_bandwidths = {"bandwidth": np.logspace(-6, 6, 150)}
215            grid = GridSearchCV(
216                KernelDensity(kernel=self.kernel, **kwargs),
217                param_grid=kernel_bandwidths,
218            )
219            grid.fit(self.residuals_)
220            if self.verbose > 0:
221                print(
222                    f"\n Best parameters for {self.kernel} kernel: {grid.best_params_} \n"
223                )
224            self.kde_ = grid.best_estimator_
225
226        return self

Fit with vectorized multi-output model - requires n_series >= 2

def predict(self, h=5, level=95, quantiles=None, **kwargs):
228    def predict(self, h=5, level=95, quantiles=None, **kwargs):
229        """Override predict to handle vectorized model predictions"""
230
231        # Delegate to parent for quantiles and multiple levels
232        if quantiles is not None or isinstance(level, (list, np.ndarray)):
233            return super().predict(
234                h=h, level=level, quantiles=quantiles, **kwargs
235            )
236
237        # Store original obj temporarily
238        original_obj = self.obj
239
240        # Create wrapper that extracts the i-th output for each series
241        class VectorizedWrapper:
242            def __init__(self, model, series_idx):
243                self.model = model
244                self.series_idx = series_idx
245
246            def predict(self, X, **kw):
247                """Predict and return only the output for this series index"""
248                preds = self.model.predict(X, **kw)
249                # preds shape: (n_samples, n_series) or (n_series,)
250                if len(preds.shape) == 1:
251                    # Single prediction: (n_series,)
252                    return preds[self.series_idx: self.series_idx + 1]
253                else:
254                    # Multiple predictions: (n_samples, n_series)
255                    return preds[
256                        :, self.series_idx: self.series_idx + 1
257                    ].flatten()
258
259        # Wrap each series with its own index
260        for i in range(self.init_n_series_):
261            self.fit_objs_[i] = VectorizedWrapper(original_obj, i)
262
263        try:
264            result = super().predict(
265                h=h, level=level, quantiles=quantiles, **kwargs
266            )
267        finally:
268            # Restore original
269            for i in range(self.init_n_series_):
270                self.fit_objs_[i] = original_obj
271
272        return result

Override predict to handle vectorized model predictions

class MultitaskClassifier(nnetsauce.Base, sklearn.base.ClassifierMixin):
 16class MultitaskClassifier(Base, ClassifierMixin):
 17    """Multitask Classification model based on regression models, with shared covariates
 18
 19    Parameters:
 20
 21        obj: object
 22            any object (must be a regression model) containing a method fit (obj.fit())
 23            and a method predict (obj.predict())
 24
 25        n_hidden_features: int
 26            number of nodes in the hidden layer
 27
 28        activation_name: str
 29            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
 30
 31        a: float
 32            hyperparameter for 'prelu' or 'elu' activation function
 33
 34        nodes_sim: str
 35            type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
 36            'uniform'
 37
 38        bias: boolean
 39            indicates if the hidden layer contains a bias term (True) or not
 40            (False)
 41
 42        dropout: float
 43            regularization parameter; (random) percentage of nodes dropped out
 44            of the training
 45
 46        direct_link: boolean
 47            indicates if the original predictors are included (True) in model's
 48            fitting or not (False)
 49
 50        n_clusters: int
 51            number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
 52                no clustering)
 53
 54        cluster_encode: bool
 55            defines how the variable containing clusters is treated (default is one-hot)
 56            if `False`, then labels are used, without one-hot encoding
 57
 58        type_clust: str
 59            type of clustering method: currently k-means ('kmeans') or Gaussian
 60            Mixture Model ('gmm')
 61
 62        type_scaling: a tuple of 3 strings
 63            scaling methods for inputs, hidden layer, and clustering respectively
 64            (and when relevant).
 65            Currently available: standardization ('std') or MinMax scaling ('minmax')
 66
 67        col_sample: float
 68            percentage of covariates randomly chosen for training
 69
 70        row_sample: float
 71            percentage of rows chosen for training, by stratified bootstrapping
 72
 73        seed: int
 74            reproducibility seed for nodes_sim=='uniform'
 75
 76        backend: str
 77            "cpu" or "gpu" or "tpu"
 78
 79    Attributes:
 80
 81        fit_objs_: dict
 82            objects adjusted to each individual time series
 83
 84        n_classes_: int
 85            number of classes for the classifier
 86
 87    Examples:
 88
 89    See also [https://github.com/Techtonique/nnetsauce/blob/master/examples/mtask_classification.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/mtask_classification.py)
 90
 91    ```python
 92    import nnetsauce as ns
 93    import numpy as np
 94    from sklearn.datasets import load_breast_cancer
 95    from sklearn.linear_model import LinearRegression
 96    from sklearn.model_selection import train_test_split
 97    from sklearn import metrics
 98    from time import time
 99
100    breast_cancer = load_breast_cancer()
101    Z = breast_cancer.data
102    t = breast_cancer.target
103
104    X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2,
105                                                        random_state=123+2*10)
106
107    # Linear Regression is used
108    regr = LinearRegression()
109    fit_obj = ns.MultitaskClassifier(regr, n_hidden_features=5,
110                                n_clusters=2, type_clust="gmm")
111
112    start = time()
113    fit_obj.fit(X_train, y_train)
114    print(f"Elapsed {time() - start}")
115
116    print(fit_obj.score(X_test, y_test))
117    print(fit_obj.score(X_test, y_test, scoring="roc_auc"))
118
119    start = time()
120    preds = fit_obj.predict(X_test)
121    print(f"Elapsed {time() - start}")
122    print(metrics.classification_report(preds, y_test))
123    ```
124
125    """
126
127    # construct the object -----
128    _estimator_type = "classifier"
129
130    def __init__(
131        self,
132        obj,
133        n_hidden_features=5,
134        activation_name="relu",
135        a=0.01,
136        nodes_sim="sobol",
137        bias=True,
138        dropout=0,
139        direct_link=True,
140        n_clusters=2,
141        cluster_encode=True,
142        type_clust="kmeans",
143        type_scaling=("std", "std", "std"),
144        col_sample=1,
145        row_sample=1,
146        seed=123,
147        backend="cpu",
148    ):
149        super().__init__(
150            n_hidden_features=n_hidden_features,
151            activation_name=activation_name,
152            a=a,
153            nodes_sim=nodes_sim,
154            bias=bias,
155            dropout=dropout,
156            direct_link=direct_link,
157            n_clusters=n_clusters,
158            cluster_encode=cluster_encode,
159            type_clust=type_clust,
160            type_scaling=type_scaling,
161            col_sample=col_sample,
162            row_sample=row_sample,
163            seed=seed,
164            backend=backend,
165        )
166
167        self.type_fit = "classification"
168        self.obj = obj
169        self.fit_objs_ = {}
170
171    def fit(self, X, y, sample_weight=None, **kwargs):
172        """Fit MultitaskClassifier to training data (X, y).
173
174        Args:
175
176            X: {array-like}, shape = [n_samples, n_features]
177                Training vectors, where n_samples is the number
178                of samples and n_features is the number of features.
179
180            y: array-like, shape = [n_samples]
181                Target values.
182
183            **kwargs: additional parameters to be passed to
184                    self.cook_training_set or self.obj.fit
185
186        Returns:
187
188            self: object
189
190        """
191
192        assert mx.is_factor(y), "y must contain only integers"
193
194        output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
195
196        self.classes_ = np.unique(y)  # for compatibility with sklearn
197        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
198
199        # multitask response
200        Y = mo.one_hot_encode2(output_y, self.n_classes_)
201
202        # if sample_weight is None:
203        for i in range(self.n_classes_):
204            self.fit_objs_[i] = deepcopy(
205                self.obj.fit(scaled_Z, Y[:, i], **kwargs)
206            )
207
208        self.classes_ = np.unique(y)
209        return self
210
211    def predict(self, X, **kwargs):
212        """Predict test data X.
213
214        Args:
215
216            X: {array-like}, shape = [n_samples, n_features]
217                Training vectors, where n_samples is the number
218                of samples and n_features is the number of features.
219
220            **kwargs: additional parameters to be passed to
221                    self.cook_test_set
222
223        Returns:
224
225            model predictions: {array-like}
226
227        """
228        return np.argmax(self.predict_proba(X, **kwargs), axis=1)
229
230    def predict_proba(self, X, **kwargs):
231        """Predict probabilities for test data X.
232
233        Args:
234
235            X: {array-like}, shape = [n_samples, n_features]
236                Training vectors, where n_samples is the number
237                of samples and n_features is the number of features.
238
239            **kwargs: additional parameters to be passed to
240                    self.cook_test_set
241
242        Returns:
243
244            probability estimates for test data: {array-like}
245
246        """
247
248        shape_X = X.shape
249
250        probs = np.zeros((shape_X[0], self.n_classes_))
251
252        if len(shape_X) == 1:
253            n_features = shape_X[0]
254
255            new_X = mo.rbind(
256                X.reshape(1, n_features),
257                np.ones(n_features).reshape(1, n_features),
258            )
259
260            Z = self.cook_test_set(new_X, **kwargs)
261
262            # loop on all the classes
263            for i in range(self.n_classes_):
264                probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs)[0]
265
266        else:
267            Z = self.cook_test_set(X, **kwargs)
268
269            # loop on all the classes
270            for i in range(self.n_classes_):
271                probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs)
272
273        expit_raw_probs = expit(probs)
274
275        return expit_raw_probs / expit_raw_probs.sum(axis=1)[:, None]
276
277    def decision_function(self, X, **kwargs):
278        """Compute the decision function of X.
279
280        Parameters:
281            X: {array-like}, shape = [n_samples, n_features]
282                Samples to compute decision function for.
283
284            **kwargs: additional parameters to be passed to
285                    self.cook_test_set
286
287        Returns:
288            array-like of shape (n_samples,) or (n_samples, n_classes)
289            Decision function of the input samples. The order of outputs is the same
290            as that of the classes passed to fit.
291        """
292        if not hasattr(self.obj, "decision_function"):
293            # If base classifier doesn't have decision_function, use predict_proba
294            proba = self.predict_proba(X, **kwargs)
295            if proba.shape[1] == 2:
296                return proba[:, 1]  # For binary classification
297            return proba  # For multiclass
298
299        if len(X.shape) == 1:
300            n_features = X.shape[0]
301            new_X = mo.rbind(
302                X.reshape(1, n_features),
303                np.ones(n_features).reshape(1, n_features),
304            )
305
306            return (
307                self.obj.decision_function(
308                    self.cook_test_set(new_X, **kwargs), **kwargs
309                )
310            )[0]
311
312        return self.obj.decision_function(
313            self.cook_test_set(X, **kwargs), **kwargs
314        )
315
316    @property
317    def _estimator_type(self):
318        return "classifier"

Multitask Classification model based on regression models, with shared covariates

Parameters:

obj: object
    any object (must be a regression model) containing a method fit (obj.fit())
    and a method predict (obj.predict())

n_hidden_features: int
    number of nodes in the hidden layer

activation_name: str
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'

a: float
    hyperparameter for 'prelu' or 'elu' activation function

nodes_sim: str
    type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
    'uniform'

bias: boolean
    indicates if the hidden layer contains a bias term (True) or not
    (False)

dropout: float
    regularization parameter; (random) percentage of nodes dropped out
    of the training

direct_link: boolean
    indicates if the original predictors are included (True) in model's
    fitting or not (False)

n_clusters: int
    number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
        no clustering)

cluster_encode: bool
    defines how the variable containing clusters is treated (default is one-hot)
    if `False`, then labels are used, without one-hot encoding

type_clust: str
    type of clustering method: currently k-means ('kmeans') or Gaussian
    Mixture Model ('gmm')

type_scaling: a tuple of 3 strings
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax')

col_sample: float
    percentage of covariates randomly chosen for training

row_sample: float
    percentage of rows chosen for training, by stratified bootstrapping

seed: int
    reproducibility seed for nodes_sim=='uniform'

backend: str
    "cpu" or "gpu" or "tpu"

Attributes:

fit_objs_: dict
    objects adjusted to each individual time series

n_classes_: int
    number of classes for the classifier

Examples:

See also https://github.com/Techtonique/nnetsauce/blob/master/examples/mtask_classification.py

import nnetsauce as ns
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics
from time import time

breast_cancer = load_breast_cancer()
Z = breast_cancer.data
t = breast_cancer.target

X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2,
                                                    random_state=123+2*10)

# Linear Regression is used
regr = LinearRegression()
fit_obj = ns.MultitaskClassifier(regr, n_hidden_features=5,
                            n_clusters=2, type_clust="gmm")

start = time()
fit_obj.fit(X_train, y_train)
print(f"Elapsed {time() - start}")

print(fit_obj.score(X_test, y_test))
print(fit_obj.score(X_test, y_test, scoring="roc_auc"))

start = time()
preds = fit_obj.predict(X_test)
print(f"Elapsed {time() - start}")
print(metrics.classification_report(preds, y_test))
def fit(self, X, y, sample_weight=None, **kwargs):
171    def fit(self, X, y, sample_weight=None, **kwargs):
172        """Fit MultitaskClassifier to training data (X, y).
173
174        Args:
175
176            X: {array-like}, shape = [n_samples, n_features]
177                Training vectors, where n_samples is the number
178                of samples and n_features is the number of features.
179
180            y: array-like, shape = [n_samples]
181                Target values.
182
183            **kwargs: additional parameters to be passed to
184                    self.cook_training_set or self.obj.fit
185
186        Returns:
187
188            self: object
189
190        """
191
192        assert mx.is_factor(y), "y must contain only integers"
193
194        output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
195
196        self.classes_ = np.unique(y)  # for compatibility with sklearn
197        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
198
199        # multitask response
200        Y = mo.one_hot_encode2(output_y, self.n_classes_)
201
202        # if sample_weight is None:
203        for i in range(self.n_classes_):
204            self.fit_objs_[i] = deepcopy(
205                self.obj.fit(scaled_Z, Y[:, i], **kwargs)
206            )
207
208        self.classes_ = np.unique(y)
209        return self

Fit MultitaskClassifier to training data (X, y).

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

**kwargs: additional parameters to be passed to
        self.cook_training_set or self.obj.fit

Returns:

self: object
def predict(self, X, **kwargs):
211    def predict(self, X, **kwargs):
212        """Predict test data X.
213
214        Args:
215
216            X: {array-like}, shape = [n_samples, n_features]
217                Training vectors, where n_samples is the number
218                of samples and n_features is the number of features.
219
220            **kwargs: additional parameters to be passed to
221                    self.cook_test_set
222
223        Returns:
224
225            model predictions: {array-like}
226
227        """
228        return np.argmax(self.predict_proba(X, **kwargs), axis=1)

Predict test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

model predictions: {array-like}
def predict_proba(self, X, **kwargs):
230    def predict_proba(self, X, **kwargs):
231        """Predict probabilities for test data X.
232
233        Args:
234
235            X: {array-like}, shape = [n_samples, n_features]
236                Training vectors, where n_samples is the number
237                of samples and n_features is the number of features.
238
239            **kwargs: additional parameters to be passed to
240                    self.cook_test_set
241
242        Returns:
243
244            probability estimates for test data: {array-like}
245
246        """
247
248        shape_X = X.shape
249
250        probs = np.zeros((shape_X[0], self.n_classes_))
251
252        if len(shape_X) == 1:
253            n_features = shape_X[0]
254
255            new_X = mo.rbind(
256                X.reshape(1, n_features),
257                np.ones(n_features).reshape(1, n_features),
258            )
259
260            Z = self.cook_test_set(new_X, **kwargs)
261
262            # loop on all the classes
263            for i in range(self.n_classes_):
264                probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs)[0]
265
266        else:
267            Z = self.cook_test_set(X, **kwargs)
268
269            # loop on all the classes
270            for i in range(self.n_classes_):
271                probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs)
272
273        expit_raw_probs = expit(probs)
274
275        return expit_raw_probs / expit_raw_probs.sum(axis=1)[:, None]

Predict probabilities for test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

probability estimates for test data: {array-like}
class NeuralNetRegressor(sklearn.base.BaseEstimator, sklearn.base.RegressorMixin):
112class NeuralNetRegressor(BaseEstimator, RegressorMixin):
113    """
114    (Pretrained) Neural Network Regressor.
115
116    Parameters:
117
118        hidden_layer_sizes : tuple, default=(100,)
119            The number of neurons in each hidden layer.
120        max_iter : int, default=100
121            The maximum number of iterations to train the model.
122        learning_rate : float, default=0.01
123            The learning rate for the optimizer.
124        l1_ratio : float, default=0.5
125            The ratio of L1 regularization.
126        alpha : float, default=1e-6
127            The regularization parameter.
128        activation_name : str, default="relu"
129            The activation function to use.
130        dropout : float, default=0.0
131            The dropout rate.
132        random_state : int, default=None
133            The random state for the random number generator.
134        weights : list, default=None
135            The weights to initialize the model with.
136
137    Attributes:
138
139        weights : list
140            The weights of the model.
141        params : list
142            The parameters of the model.
143        scaler_ : sklearn.preprocessing.StandardScaler
144            The scaler used to standardize the input features.
145        y_mean_ : float
146            The mean of the target variable.
147
148    Methods:
149
150        fit(X, y)
151            Fit the model to the data.
152        predict(X)
153            Predict the target variable.
154        get_weights()
155            Get the weights of the model.
156        set_weights(weights)
157            Set the weights of the model.
158    """
159
160    def __init__(
161        self,
162        hidden_layer_sizes=None,
163        max_iter=100,
164        learning_rate=0.01,
165        l1_ratio=0.5,
166        alpha=1e-6,
167        activation_name="relu",
168        dropout=0,
169        weights=None,
170        random_state=None,
171    ):
172        if not JAX_AVAILABLE:
173            raise RuntimeError(
174                "JAX is required for this feature. Install with: pip install yourpackage[jax]"
175            )
176
177        if weights is None and hidden_layer_sizes is None:
178            hidden_layer_sizes = (100,)  # default value if neither is provided
179        self.hidden_layer_sizes = hidden_layer_sizes
180        self.max_iter = max_iter
181        self.learning_rate = learning_rate
182        self.l1_ratio = l1_ratio
183        self.alpha = alpha
184        self.activation_name = activation_name
185        self.dropout = dropout
186        self.weights = weights
187        self.random_state = random_state
188        self.params = None
189        self.scaler_ = StandardScaler()
190        self.y_mean_ = None
191
192    def _validate_weights(self, input_dim):
193        """Validate that weights dimensions are coherent."""
194        if not self.weights:
195            return False
196
197        try:
198            # Check each layer's weights and biases
199            prev_dim = input_dim
200            for W, b in self.weights:
201                # Check weight matrix dimensions
202                if W.shape[0] != prev_dim:
203                    raise ValueError(
204                        f"Weight matrix input dimension {W.shape[0]} does not match, previous layer output dimension {prev_dim}"
205                    )
206                # Check bias dimension matches weight matrix output
207                if W.shape[1] != b.shape[0]:
208                    raise ValueError(
209                        f"Bias dimension {b.shape[0]} does not match weight matrix, output dimension {W.shape[1]}"
210                    )
211                prev_dim = W.shape[1]
212
213            # Check final output dimension is 1 for regression
214            if prev_dim != 1:
215                raise ValueError(
216                    f"Final layer output dimension {prev_dim} must be 1 for regression"
217                )
218
219            return True
220        except (AttributeError, IndexError):
221            raise ValueError(
222                "Weights format is invalid. Expected list of (weight, bias) tuples"
223            )
224
225    def fit(self, X, y):
226        # Standardize the input features
227        X = self.scaler_.fit_transform(X)
228        # Ensure y is 2D for consistency
229        y = y.reshape(-1, 1)
230        self.y_mean_ = jnp.mean(y)
231        y = y - self.y_mean_
232        # Validate or initialize weights
233        if self.weights is not None:
234            if self._validate_weights(X.shape[1]):
235                self.params = self.weights
236        else:
237            if self.hidden_layer_sizes is None:
238                raise ValueError(
239                    "Either weights or hidden_layer_sizes must be provided"
240                )
241            self.params = initialize_params(
242                X.shape[1], self.hidden_layer_sizes, self.random_state
243            )
244        loss_fn = partial(loss, l1_ratio=self.l1_ratio, alpha=self.alpha)
245        grad_loss = jit(grad(loss_fn))  # compiled gradient evaluation function
246        perex_grads = jit(
247            vmap(grad_loss, in_axes=(None, 0, 0))
248        )  # fast per-example grads
249        # Training loop
250        for _ in range(self.max_iter):
251            grads = perex_grads(self.params, X, y)
252            # Average gradients across examples
253            grads = jax.tree_map(lambda g: jnp.mean(g, axis=0), grads)
254            # Update parameters
255            self.params = [
256                (W - self.learning_rate * dW, b - self.learning_rate * db)
257                for (W, b), (dW, db) in zip(self.params, grads)
258            ]
259        # Store final weights
260        self.weights = self.params
261        return self
262
263    def get_weights(self):
264        """Return the current weights of the model."""
265        if self.weights is None:
266            raise ValueError(
267                "No weights available. Model has not been fitted yet."
268            )
269        return self.weights
270
271    def set_weights(self, weights):
272        """Set the weights of the model manually."""
273        self.weights = weights
274        self.params = weights
275
276    def predict(self, X):
277        X = self.scaler_.transform(X)
278        if self.params is None:
279            raise ValueError("Model has not been fitted yet.")
280        predictions = predict_internal(
281            self.params,
282            X,
283            activation_func=self.activation_name,
284            dropout=self.dropout,
285            seed=self.random_state,
286        )
287        return predictions.reshape(-1) + self.y_mean_

(Pretrained) Neural Network Regressor.

Parameters:

hidden_layer_sizes : tuple, default=(100,)
    The number of neurons in each hidden layer.
max_iter : int, default=100
    The maximum number of iterations to train the model.
learning_rate : float, default=0.01
    The learning rate for the optimizer.
l1_ratio : float, default=0.5
    The ratio of L1 regularization.
alpha : float, default=1e-6
    The regularization parameter.
activation_name : str, default="relu"
    The activation function to use.
dropout : float, default=0.0
    The dropout rate.
random_state : int, default=None
    The random state for the random number generator.
weights : list, default=None
    The weights to initialize the model with.

Attributes:

weights : list
    The weights of the model.
params : list
    The parameters of the model.
scaler_ : sklearn.preprocessing.StandardScaler
    The scaler used to standardize the input features.
y_mean_ : float
    The mean of the target variable.

Methods:

fit(X, y)
    Fit the model to the data.
predict(X)
    Predict the target variable.
get_weights()
    Get the weights of the model.
set_weights(weights)
    Set the weights of the model.
def fit(self, X, y):
225    def fit(self, X, y):
226        # Standardize the input features
227        X = self.scaler_.fit_transform(X)
228        # Ensure y is 2D for consistency
229        y = y.reshape(-1, 1)
230        self.y_mean_ = jnp.mean(y)
231        y = y - self.y_mean_
232        # Validate or initialize weights
233        if self.weights is not None:
234            if self._validate_weights(X.shape[1]):
235                self.params = self.weights
236        else:
237            if self.hidden_layer_sizes is None:
238                raise ValueError(
239                    "Either weights or hidden_layer_sizes must be provided"
240                )
241            self.params = initialize_params(
242                X.shape[1], self.hidden_layer_sizes, self.random_state
243            )
244        loss_fn = partial(loss, l1_ratio=self.l1_ratio, alpha=self.alpha)
245        grad_loss = jit(grad(loss_fn))  # compiled gradient evaluation function
246        perex_grads = jit(
247            vmap(grad_loss, in_axes=(None, 0, 0))
248        )  # fast per-example grads
249        # Training loop
250        for _ in range(self.max_iter):
251            grads = perex_grads(self.params, X, y)
252            # Average gradients across examples
253            grads = jax.tree_map(lambda g: jnp.mean(g, axis=0), grads)
254            # Update parameters
255            self.params = [
256                (W - self.learning_rate * dW, b - self.learning_rate * db)
257                for (W, b), (dW, db) in zip(self.params, grads)
258            ]
259        # Store final weights
260        self.weights = self.params
261        return self
def predict(self, X):
276    def predict(self, X):
277        X = self.scaler_.transform(X)
278        if self.params is None:
279            raise ValueError("Model has not been fitted yet.")
280        predictions = predict_internal(
281            self.params,
282            X,
283            activation_func=self.activation_name,
284            dropout=self.dropout,
285            seed=self.random_state,
286        )
287        return predictions.reshape(-1) + self.y_mean_
class NeuralNetClassifier(sklearn.base.BaseEstimator, sklearn.base.ClassifierMixin):
 10class NeuralNetClassifier(BaseEstimator, ClassifierMixin):
 11    """
 12    (Pretrained) Neural Network Classifier.
 13
 14    Parameters:
 15
 16        hidden_layer_sizes : tuple, default=(100,)
 17            The number of neurons in each hidden layer.
 18        max_iter : int, default=100
 19            The maximum number of iterations to train the model.
 20        learning_rate : float, default=0.01
 21            The learning rate for the optimizer.
 22        l1_ratio : float, default=0.5
 23            The ratio of L1 regularization.
 24        alpha : float, default=1e-6
 25            The regularization parameter.
 26        activation_name : str, default="relu"
 27            The activation function to use.
 28        dropout : float, default=0.0
 29            The dropout rate.
 30        random_state : int, default=None
 31            The random state for the random number generator.
 32        weights : list, default=None
 33            The weights to initialize the model with.
 34
 35    Attributes:
 36
 37        weights : list
 38            The weights of the model.
 39        params : list
 40            The parameters of the model.
 41        scaler_ : sklearn.preprocessing.StandardScaler
 42            The scaler used to standardize the input features.
 43        y_mean_ : float
 44            The mean of the target variable.
 45
 46    Methods:
 47
 48        fit(X, y)
 49            Fit the model to the data.
 50        predict(X)
 51            Predict the target variable.
 52        predict_proba(X)
 53            Predict the probability of the target variable.
 54        get_weights()
 55            Get the weights of the model.
 56        set_weights(weights)
 57            Set the weights of the model.
 58    """
 59
 60    _estimator_type = "classifier"
 61
 62    def __init__(
 63        self,
 64        hidden_layer_sizes=(100,),
 65        max_iter=100,
 66        learning_rate=0.01,
 67        weights=None,
 68        l1_ratio=0.5,
 69        alpha=1e-6,
 70        activation_name="relu",
 71        dropout=0.0,
 72        random_state=None,
 73    ):
 74        self.hidden_layer_sizes = hidden_layer_sizes
 75        self.max_iter = max_iter
 76        self.learning_rate = learning_rate
 77        self.weights = weights
 78        self.l1_ratio = l1_ratio
 79        self.alpha = alpha
 80        self.activation_name = activation_name
 81        self.dropout = dropout
 82        self.random_state = random_state
 83        self.regr = None
 84
 85    def fit(self, X, y):
 86        """Fit the model to the data.
 87
 88        Parameters:
 89
 90            X: {array-like}, shape = [n_samples, n_features]
 91                Training vectors, where n_samples is the number of samples and
 92                n_features is the number of features.
 93            y: array-like, shape = [n_samples]
 94                Target values.
 95        """
 96        regressor = NeuralNetRegressor(
 97            hidden_layer_sizes=self.hidden_layer_sizes,
 98            max_iter=self.max_iter,
 99            learning_rate=self.learning_rate,
100            weights=self.weights,
101            l1_ratio=self.l1_ratio,
102            alpha=self.alpha,
103            activation_name=self.activation_name,
104            dropout=self.dropout,
105            random_state=self.random_state,
106        )
107        self.regr = SimpleMultitaskClassifier(regressor)
108        self.regr.fit(X, y)
109        self.classes_ = np.unique(y)
110        self.n_classes_ = len(self.classes_)
111        self.n_tasks_ = 1
112        self.n_features_in_ = X.shape[1]
113        self.n_outputs_ = 1
114        self.n_samples_fit_ = X.shape[0]
115        self.n_samples_test_ = X.shape[0]
116        self.n_features_out_ = 1
117        self.n_outputs_ = 1
118        self.n_features_in_ = X.shape[1]
119        self.n_features_out_ = 1
120        self.n_outputs_ = 1
121        return self
122
123    def predict_proba(self, X):
124        """Predict the probability of the target variable.
125
126        Parameters:
127
128            X: {array-like}, shape = [n_samples, n_features]
129                Training vectors, where n_samples is the number of samples and
130                n_features is the number of features.
131        """
132        return self.regr.predict_proba(X)
133
134    def predict(self, X):
135        """Predict the target variable.
136
137        Parameters:
138
139            X: {array-like}, shape = [n_samples, n_features]
140                Training vectors, where n_samples is the number of samples and
141                n_features is the number of features.
142        """
143        return self.regr.predict(X)
144
145    @property
146    def _estimator_type(self):
147        return "classifier"

(Pretrained) Neural Network Classifier.

Parameters:

hidden_layer_sizes : tuple, default=(100,)
    The number of neurons in each hidden layer.
max_iter : int, default=100
    The maximum number of iterations to train the model.
learning_rate : float, default=0.01
    The learning rate for the optimizer.
l1_ratio : float, default=0.5
    The ratio of L1 regularization.
alpha : float, default=1e-6
    The regularization parameter.
activation_name : str, default="relu"
    The activation function to use.
dropout : float, default=0.0
    The dropout rate.
random_state : int, default=None
    The random state for the random number generator.
weights : list, default=None
    The weights to initialize the model with.

Attributes:

weights : list
    The weights of the model.
params : list
    The parameters of the model.
scaler_ : sklearn.preprocessing.StandardScaler
    The scaler used to standardize the input features.
y_mean_ : float
    The mean of the target variable.

Methods:

fit(X, y)
    Fit the model to the data.
predict(X)
    Predict the target variable.
predict_proba(X)
    Predict the probability of the target variable.
get_weights()
    Get the weights of the model.
set_weights(weights)
    Set the weights of the model.
def fit(self, X, y):
 85    def fit(self, X, y):
 86        """Fit the model to the data.
 87
 88        Parameters:
 89
 90            X: {array-like}, shape = [n_samples, n_features]
 91                Training vectors, where n_samples is the number of samples and
 92                n_features is the number of features.
 93            y: array-like, shape = [n_samples]
 94                Target values.
 95        """
 96        regressor = NeuralNetRegressor(
 97            hidden_layer_sizes=self.hidden_layer_sizes,
 98            max_iter=self.max_iter,
 99            learning_rate=self.learning_rate,
100            weights=self.weights,
101            l1_ratio=self.l1_ratio,
102            alpha=self.alpha,
103            activation_name=self.activation_name,
104            dropout=self.dropout,
105            random_state=self.random_state,
106        )
107        self.regr = SimpleMultitaskClassifier(regressor)
108        self.regr.fit(X, y)
109        self.classes_ = np.unique(y)
110        self.n_classes_ = len(self.classes_)
111        self.n_tasks_ = 1
112        self.n_features_in_ = X.shape[1]
113        self.n_outputs_ = 1
114        self.n_samples_fit_ = X.shape[0]
115        self.n_samples_test_ = X.shape[0]
116        self.n_features_out_ = 1
117        self.n_outputs_ = 1
118        self.n_features_in_ = X.shape[1]
119        self.n_features_out_ = 1
120        self.n_outputs_ = 1
121        return self

Fit the model to the data.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number of samples and
    n_features is the number of features.
y: array-like, shape = [n_samples]
    Target values.
def predict_proba(self, X):
123    def predict_proba(self, X):
124        """Predict the probability of the target variable.
125
126        Parameters:
127
128            X: {array-like}, shape = [n_samples, n_features]
129                Training vectors, where n_samples is the number of samples and
130                n_features is the number of features.
131        """
132        return self.regr.predict_proba(X)

Predict the probability of the target variable.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number of samples and
    n_features is the number of features.
def predict(self, X):
134    def predict(self, X):
135        """Predict the target variable.
136
137        Parameters:
138
139            X: {array-like}, shape = [n_samples, n_features]
140                Training vectors, where n_samples is the number of samples and
141                n_features is the number of features.
142        """
143        return self.regr.predict(X)

Predict the target variable.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number of samples and
    n_features is the number of features.
class PredictionInterval(sklearn.base.BaseEstimator, sklearn.base.RegressorMixin):
 21class PredictionInterval(BaseEstimator, RegressorMixin):
 22    """Class PredictionInterval: Obtain prediction intervals.
 23
 24    Attributes:
 25
 26        obj: an object;
 27            fitted object containing methods `fit` and `predict`
 28
 29        method: a string;
 30            method for constructing the prediction intervals.
 31            Currently "splitconformal" (default) and "localconformal"
 32
 33        level: a float;
 34            Confidence level for prediction intervals. Default is 95,
 35            equivalent to a miscoverage error of 5 (%)
 36
 37        replications: an integer;
 38            Number of replications for simulated conformal (default is `None`)
 39
 40        type_pi: a string;
 41            type of prediction interval: currently `None`
 42            (split conformal without simulation)
 43            for type_pi in:
 44                - 'bootstrap': Bootstrap resampling.
 45                - 'kde': Kernel Density Estimation.
 46
 47        type_split: a string;
 48            "random" (random split of data) or "sequential" (sequential split of data)
 49
 50        seed: an integer;
 51            Reproducibility of fit (there's a random split between fitting and calibration data)
 52    """
 53
 54    def __init__(
 55        self,
 56        obj,
 57        method="splitconformal",
 58        level=95,
 59        type_pi=None,
 60        type_split="random",
 61        replications=None,
 62        kernel=None,
 63        agg="mean",
 64        seed=123,
 65    ):
 66        self.obj = obj
 67        self.method = method
 68        self.level = level
 69        self.type_pi = type_pi
 70        self.type_split = type_split
 71        self.replications = replications
 72        self.kernel = kernel
 73        self.agg = agg
 74        self.seed = seed
 75        self.alpha_ = 1 - self.level / 100
 76        self.quantile_ = None
 77        self.icp_ = None
 78        self.calibrated_residuals_ = None
 79        self.scaled_calibrated_residuals_ = None
 80        self.calibrated_residuals_scaler_ = None
 81        self.kde_ = None
 82        self.aic_ = None
 83        self.aicc_ = None
 84        self.bic_ = None
 85        self.sse_ = None
 86
 87    def fit(self, X, y, sample_weight=None, **kwargs):
 88        """Fit the `method` to training data (X, y).
 89
 90        Args:
 91
 92            X: array-like, shape = [n_samples, n_features];
 93                Training set vectors, where n_samples is the number
 94                of samples and n_features is the number of features.
 95
 96            y: array-like, shape = [n_samples, ]; Target values.
 97
 98            sample_weight: array-like, shape = [n_samples]
 99                Sample weights.
100
101        """
102
103        if self.type_split == "random":
104            X_train, X_calibration, y_train, y_calibration = train_test_split(
105                X, y, test_size=0.5, random_state=self.seed
106            )
107
108        elif self.type_split == "sequential":
109            n_x = X.shape[0]
110            n_x_half = n_x // 2
111            first_half_idx = range(0, n_x_half)
112            second_half_idx = range(n_x_half, n_x)
113            X_train = X[first_half_idx, :]
114            X_calibration = X[second_half_idx, :]
115            y_train = y[first_half_idx]
116            y_calibration = y[second_half_idx]
117
118        if self.method == "splitconformal":
119            self.obj.fit(X_train, y_train)
120            preds_calibration = self.obj.predict(X_calibration)
121            self.calibrated_residuals_ = y_calibration - preds_calibration
122            absolute_residuals = np.abs(self.calibrated_residuals_)
123            self.calibrated_residuals_scaler_ = StandardScaler(
124                with_mean=True, with_std=True
125            )
126            self.scaled_calibrated_residuals_ = (
127                self.calibrated_residuals_scaler_.fit_transform(
128                    self.calibrated_residuals_.reshape(-1, 1)
129                ).ravel()
130            )
131            try:
132                # numpy version >= 1.22
133                self.quantile_ = np.quantile(
134                    a=absolute_residuals, q=self.level / 100, method="higher"
135                )
136            except Exception:
137                # numpy version < 1.22
138                self.quantile_ = np.quantile(
139                    a=absolute_residuals,
140                    q=self.level / 100,
141                    interpolation="higher",
142                )
143
144        if self.method == "localconformal":
145            mad_estimator = ExtraTreesRegressor()
146            normalizer = RegressorNormalizer(
147                self.obj, mad_estimator, AbsErrorErrFunc()
148            )
149            nc = RegressorNc(self.obj, AbsErrorErrFunc(), normalizer)
150            self.icp_ = IcpRegressor(nc)
151            self.icp_.fit(X_train, y_train)
152            self.icp_.calibrate(X_calibration, y_calibration)
153
154            # FIX: Store calibration residuals from the ICP scorer so that
155            # simulation-based prediction intervals are available in predict().
156            raw_residuals = self.icp_.nc_function.err_func.apply(
157                self.icp_.nc_function.predict(X_calibration), y_calibration
158            )
159            self.calibrated_residuals_ = raw_residuals
160            self.calibrated_residuals_scaler_ = StandardScaler(
161                with_mean=True, with_std=True
162            )
163            self.scaled_calibrated_residuals_ = (
164                self.calibrated_residuals_scaler_.fit_transform(
165                    self.calibrated_residuals_.reshape(-1, 1)
166                ).ravel()
167            )
168
169        # Calculate AIC
170        # Get predictions
171        preds = self.obj.predict(X_calibration)
172
173        # Calculate SSE
174        self.sse_ = np.sum((y_calibration - preds) ** 2)
175
176        # Get number of parameters from the base model
177        n_params = (
178            getattr(self.obj, "n_hidden_features", 0) + X_calibration.shape[1]
179        )
180
181        # Calculate AIC
182        n_samples = len(y_calibration)
183        temp = n_samples * np.log(self.sse_ / n_samples)
184        self.aic_ = temp + 2 * n_params
185        self.bic_ = temp + np.log(n_samples) * n_params
186
187        return self
188
189    def _simulate_from_residuals(self, pred, n_obs):
190        """Shared helper: draw `self.replications` simulations from calibrated
191        residuals and return (sims, mean, lower, upper).
192
193        Args:
194            pred: 1-D array of point predictions, shape [n_obs].
195            n_obs: int, number of test observations.
196
197        Returns:
198            sims_   : 2-D array, shape [n_obs, replications]
199            mean_   : 1-D array, shape [n_obs]
200            lower_  : 1-D array, shape [n_obs]
201            upper_  : 1-D array, shape [n_obs]
202        """
203        type_pi = self.type_pi if self.type_pi is not None else "kde"
204        replications = (
205            self.replications if self.replications is not None else 100
206        )
207
208        assert type_pi in (
209            "bootstrap",
210            "kde",
211            "normal",
212            "ecdf",
213            "permutation",
214            "smooth-bootstrap",
215        ), (
216            "`type_pi` must be in ('bootstrap', 'kde', 'normal', 'ecdf', "
217            "'permutation', 'smooth-bootstrap')"
218        )
219
220        scale = self.calibrated_residuals_scaler_.scale_[0]
221
222        if type_pi == "bootstrap":
223            np.random.seed(self.seed)
224            residuals_sims = np.asarray(
225                [
226                    np.random.choice(
227                        a=self.scaled_calibrated_residuals_,
228                        size=n_obs,
229                    )
230                    for _ in range(replications)
231                ]
232            ).T  # shape [n_obs, replications]
233
234        elif type_pi == "kde":
235            kde = gaussian_kde(dataset=self.scaled_calibrated_residuals_)
236            residuals_sims = np.asarray(
237                [
238                    kde.resample(size=n_obs, seed=self.seed + i).ravel()
239                    for i in range(replications)
240                ]
241            ).T  # shape [n_obs, replications]
242
243        else:  # normal / ecdf / permutation / smooth-bootstrap
244            residuals_sims = np.asarray(
245                simulate_replications(
246                    data=self.scaled_calibrated_residuals_,
247                    method=type_pi,
248                    num_replications=replications,
249                    n_obs=n_obs,
250                    seed=self.seed,
251                )
252            ).T  # shape [n_obs, replications]
253
254        sims = np.asarray(
255            [
256                pred + scale * residuals_sims[:, i].ravel()
257                for i in range(replications)
258            ]
259        ).T  # shape [n_obs, replications]
260
261        mean_ = np.mean(sims, axis=1)
262        lower_ = np.quantile(sims, q=self.alpha_ / 200, axis=1)
263        upper_ = np.quantile(sims, q=1 - self.alpha_ / 200, axis=1)
264
265        return sims, mean_, lower_, upper_
266
267    def predict(self, X, return_pi=False):
268        """Obtain predictions and prediction intervals
269
270        Args:
271
272            X: array-like, shape = [n_samples, n_features];
273                Testing set vectors, where n_samples is the number
274                of samples and n_features is the number of features.
275
276            return_pi: boolean
277                Whether the prediction interval is returned or not.
278                Default is False, for compatibility with other _estimators_.
279                If True, a tuple containing the predictions + lower and upper
280                bounds is returned.
281
282        """
283
284        if self.method == "splitconformal":
285            pred = self.obj.predict(X)
286
287        if self.method == "localconformal":
288            pred = self.icp_.predict(X)
289
290        # ------------------------------------------------------------------ #
291        # splitconformal
292        # ------------------------------------------------------------------ #
293        if self.method == "splitconformal":
294            if self.replications is None and self.type_pi is None:
295                # Plain split-conformal: symmetric quantile band
296                if return_pi:
297                    DescribeResult = namedtuple(
298                        "DescribeResult", ("mean", "lower", "upper")
299                    )
300                    return DescribeResult(
301                        pred, pred - self.quantile_, pred + self.quantile_
302                    )
303                else:
304                    return pred
305
306            else:
307                # FIX: simulation-based prediction intervals for splitconformal.
308                # Previously this branch raised NotImplementedError even though
309                # all the necessary logic was present — it was simply unreachable
310                # because the raise fired unconditionally.  The code has been
311                # moved into _simulate_from_residuals() and called here.
312
313                if self.type_pi is None:
314                    warnings.warn(
315                        "type_pi must be set when replications is not None; "
316                        "defaulting to 'kde'."
317                    )
318                if self.replications is None:
319                    warnings.warn(
320                        "replications must be set when type_pi is not None; "
321                        "defaulting to 100."
322                    )
323
324                (
325                    self.sims_,
326                    self.mean_,
327                    self.lower_,
328                    self.upper_,
329                ) = self._simulate_from_residuals(pred, X.shape[0])
330
331                DescribeResult = namedtuple(
332                    "DescribeResult", ("mean", "sims", "lower", "upper")
333                )
334                return DescribeResult(
335                    self.mean_, self.sims_, self.lower_, self.upper_
336                )
337
338        # ------------------------------------------------------------------ #
339        # localconformal
340        # ------------------------------------------------------------------ #
341        if self.method == "localconformal":
342            if self.replications is None:
343                if return_pi:
344                    predictions_bounds = self.icp_.predict(
345                        X, significance=1 - self.level
346                    )
347                    DescribeResult = namedtuple(
348                        "DescribeResult", ("mean", "lower", "upper")
349                    )
350                    return DescribeResult(
351                        pred,
352                        predictions_bounds[:, 0],
353                        predictions_bounds[:, 1],
354                    )
355                else:
356                    return pred
357
358            else:
359                # FIX: simulation-based prediction intervals for localconformal.
360                # Previously this always raised NotImplementedError.  Now we
361                # reuse the calibration residuals stored during fit() and apply
362                # the same simulation logic used by splitconformal via the
363                # shared helper _simulate_from_residuals().
364
365                if self.type_pi is None:
366                    warnings.warn(
367                        "type_pi must be set when replications is not None; "
368                        "defaulting to 'kde'."
369                    )
370
371                (
372                    self.sims_,
373                    self.mean_,
374                    self.lower_,
375                    self.upper_,
376                ) = self._simulate_from_residuals(pred, X.shape[0])
377
378                DescribeResult = namedtuple(
379                    "DescribeResult", ("mean", "sims", "lower", "upper")
380                )
381                return DescribeResult(
382                    self.mean_, self.sims_, self.lower_, self.upper_
383                )

Class PredictionInterval: Obtain prediction intervals.

Attributes:

obj: an object;
    fitted object containing methods `fit` and `predict`

method: a string;
    method for constructing the prediction intervals.
    Currently "splitconformal" (default) and "localconformal"

level: a float;
    Confidence level for prediction intervals. Default is 95,
    equivalent to a miscoverage error of 5 (%)

replications: an integer;
    Number of replications for simulated conformal (default is `None`)

type_pi: a string;
    type of prediction interval: currently `None`
    (split conformal without simulation)
    for type_pi in:
        - 'bootstrap': Bootstrap resampling.
        - 'kde': Kernel Density Estimation.

type_split: a string;
    "random" (random split of data) or "sequential" (sequential split of data)

seed: an integer;
    Reproducibility of fit (there's a random split between fitting and calibration data)
def fit(self, X, y, sample_weight=None, **kwargs):
 87    def fit(self, X, y, sample_weight=None, **kwargs):
 88        """Fit the `method` to training data (X, y).
 89
 90        Args:
 91
 92            X: array-like, shape = [n_samples, n_features];
 93                Training set vectors, where n_samples is the number
 94                of samples and n_features is the number of features.
 95
 96            y: array-like, shape = [n_samples, ]; Target values.
 97
 98            sample_weight: array-like, shape = [n_samples]
 99                Sample weights.
100
101        """
102
103        if self.type_split == "random":
104            X_train, X_calibration, y_train, y_calibration = train_test_split(
105                X, y, test_size=0.5, random_state=self.seed
106            )
107
108        elif self.type_split == "sequential":
109            n_x = X.shape[0]
110            n_x_half = n_x // 2
111            first_half_idx = range(0, n_x_half)
112            second_half_idx = range(n_x_half, n_x)
113            X_train = X[first_half_idx, :]
114            X_calibration = X[second_half_idx, :]
115            y_train = y[first_half_idx]
116            y_calibration = y[second_half_idx]
117
118        if self.method == "splitconformal":
119            self.obj.fit(X_train, y_train)
120            preds_calibration = self.obj.predict(X_calibration)
121            self.calibrated_residuals_ = y_calibration - preds_calibration
122            absolute_residuals = np.abs(self.calibrated_residuals_)
123            self.calibrated_residuals_scaler_ = StandardScaler(
124                with_mean=True, with_std=True
125            )
126            self.scaled_calibrated_residuals_ = (
127                self.calibrated_residuals_scaler_.fit_transform(
128                    self.calibrated_residuals_.reshape(-1, 1)
129                ).ravel()
130            )
131            try:
132                # numpy version >= 1.22
133                self.quantile_ = np.quantile(
134                    a=absolute_residuals, q=self.level / 100, method="higher"
135                )
136            except Exception:
137                # numpy version < 1.22
138                self.quantile_ = np.quantile(
139                    a=absolute_residuals,
140                    q=self.level / 100,
141                    interpolation="higher",
142                )
143
144        if self.method == "localconformal":
145            mad_estimator = ExtraTreesRegressor()
146            normalizer = RegressorNormalizer(
147                self.obj, mad_estimator, AbsErrorErrFunc()
148            )
149            nc = RegressorNc(self.obj, AbsErrorErrFunc(), normalizer)
150            self.icp_ = IcpRegressor(nc)
151            self.icp_.fit(X_train, y_train)
152            self.icp_.calibrate(X_calibration, y_calibration)
153
154            # FIX: Store calibration residuals from the ICP scorer so that
155            # simulation-based prediction intervals are available in predict().
156            raw_residuals = self.icp_.nc_function.err_func.apply(
157                self.icp_.nc_function.predict(X_calibration), y_calibration
158            )
159            self.calibrated_residuals_ = raw_residuals
160            self.calibrated_residuals_scaler_ = StandardScaler(
161                with_mean=True, with_std=True
162            )
163            self.scaled_calibrated_residuals_ = (
164                self.calibrated_residuals_scaler_.fit_transform(
165                    self.calibrated_residuals_.reshape(-1, 1)
166                ).ravel()
167            )
168
169        # Calculate AIC
170        # Get predictions
171        preds = self.obj.predict(X_calibration)
172
173        # Calculate SSE
174        self.sse_ = np.sum((y_calibration - preds) ** 2)
175
176        # Get number of parameters from the base model
177        n_params = (
178            getattr(self.obj, "n_hidden_features", 0) + X_calibration.shape[1]
179        )
180
181        # Calculate AIC
182        n_samples = len(y_calibration)
183        temp = n_samples * np.log(self.sse_ / n_samples)
184        self.aic_ = temp + 2 * n_params
185        self.bic_ = temp + np.log(n_samples) * n_params
186
187        return self

Fit the method to training data (X, y).

Args:

X: array-like, shape = [n_samples, n_features];
    Training set vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples, ]; Target values.

sample_weight: array-like, shape = [n_samples]
    Sample weights.
def predict(self, X, return_pi=False):
267    def predict(self, X, return_pi=False):
268        """Obtain predictions and prediction intervals
269
270        Args:
271
272            X: array-like, shape = [n_samples, n_features];
273                Testing set vectors, where n_samples is the number
274                of samples and n_features is the number of features.
275
276            return_pi: boolean
277                Whether the prediction interval is returned or not.
278                Default is False, for compatibility with other _estimators_.
279                If True, a tuple containing the predictions + lower and upper
280                bounds is returned.
281
282        """
283
284        if self.method == "splitconformal":
285            pred = self.obj.predict(X)
286
287        if self.method == "localconformal":
288            pred = self.icp_.predict(X)
289
290        # ------------------------------------------------------------------ #
291        # splitconformal
292        # ------------------------------------------------------------------ #
293        if self.method == "splitconformal":
294            if self.replications is None and self.type_pi is None:
295                # Plain split-conformal: symmetric quantile band
296                if return_pi:
297                    DescribeResult = namedtuple(
298                        "DescribeResult", ("mean", "lower", "upper")
299                    )
300                    return DescribeResult(
301                        pred, pred - self.quantile_, pred + self.quantile_
302                    )
303                else:
304                    return pred
305
306            else:
307                # FIX: simulation-based prediction intervals for splitconformal.
308                # Previously this branch raised NotImplementedError even though
309                # all the necessary logic was present — it was simply unreachable
310                # because the raise fired unconditionally.  The code has been
311                # moved into _simulate_from_residuals() and called here.
312
313                if self.type_pi is None:
314                    warnings.warn(
315                        "type_pi must be set when replications is not None; "
316                        "defaulting to 'kde'."
317                    )
318                if self.replications is None:
319                    warnings.warn(
320                        "replications must be set when type_pi is not None; "
321                        "defaulting to 100."
322                    )
323
324                (
325                    self.sims_,
326                    self.mean_,
327                    self.lower_,
328                    self.upper_,
329                ) = self._simulate_from_residuals(pred, X.shape[0])
330
331                DescribeResult = namedtuple(
332                    "DescribeResult", ("mean", "sims", "lower", "upper")
333                )
334                return DescribeResult(
335                    self.mean_, self.sims_, self.lower_, self.upper_
336                )
337
338        # ------------------------------------------------------------------ #
339        # localconformal
340        # ------------------------------------------------------------------ #
341        if self.method == "localconformal":
342            if self.replications is None:
343                if return_pi:
344                    predictions_bounds = self.icp_.predict(
345                        X, significance=1 - self.level
346                    )
347                    DescribeResult = namedtuple(
348                        "DescribeResult", ("mean", "lower", "upper")
349                    )
350                    return DescribeResult(
351                        pred,
352                        predictions_bounds[:, 0],
353                        predictions_bounds[:, 1],
354                    )
355                else:
356                    return pred
357
358            else:
359                # FIX: simulation-based prediction intervals for localconformal.
360                # Previously this always raised NotImplementedError.  Now we
361                # reuse the calibration residuals stored during fit() and apply
362                # the same simulation logic used by splitconformal via the
363                # shared helper _simulate_from_residuals().
364
365                if self.type_pi is None:
366                    warnings.warn(
367                        "type_pi must be set when replications is not None; "
368                        "defaulting to 'kde'."
369                    )
370
371                (
372                    self.sims_,
373                    self.mean_,
374                    self.lower_,
375                    self.upper_,
376                ) = self._simulate_from_residuals(pred, X.shape[0])
377
378                DescribeResult = namedtuple(
379                    "DescribeResult", ("mean", "sims", "lower", "upper")
380                )
381                return DescribeResult(
382                    self.mean_, self.sims_, self.lower_, self.upper_
383                )

Obtain predictions and prediction intervals

Args:

X: array-like, shape = [n_samples, n_features];
    Testing set vectors, where n_samples is the number
    of samples and n_features is the number of features.

return_pi: boolean
    Whether the prediction interval is returned or not.
    Default is False, for compatibility with other _estimators_.
    If True, a tuple containing the predictions + lower and upper
    bounds is returned.
class PredictionSet(sklearn.base.BaseEstimator, sklearn.base.ClassifierMixin):
 20class PredictionSet(BaseEstimator, ClassifierMixin):
 21    """Class PredictionSet: Obtain prediction sets.
 22
 23    Attributes:
 24
 25        obj: an object;
 26            fitted object containing methods `fit` and `predict`
 27
 28        method: a string;
 29            method for constructing the prediction sets.
 30            Currently "icp" (default, inductive conformal) and "tcp" (transductive conformal)
 31
 32        level: a float;
 33            Confidence level for prediction sets. Default is None,
 34            95 is equivalent to a miscoverage error of 5 (%)
 35
 36        seed: an integer;
 37            Reproducibility of fit (there's a random split between fitting and calibration data)
 38    """
 39
 40    def __init__(
 41        self,
 42        obj,
 43        method="icp",
 44        level=None,
 45        seed=123,
 46    ):
 47        self.obj = obj
 48        self.method = method
 49        self.level = level
 50        self.seed = seed
 51        if self.level is not None:
 52            self.alpha_ = 1 - self.level / 100
 53        self.quantile_ = None
 54        self.icp_ = None
 55        self.tcp_ = None
 56
 57        if self.method == "icp":
 58            self.icp_ = IcpClassifier(
 59                ClassifierNc(ClassifierAdapter(self.obj), MarginErrFunc()),
 60            )
 61        elif self.method == "tcp":
 62            self.tcp_ = TcpClassifier(
 63                ClassifierNc(ClassifierAdapter(self.obj), MarginErrFunc()),
 64            )
 65        else:
 66            raise ValueError("`self.method` must be in ('icp', 'tcp')")
 67
 68    def fit(self, X, y, sample_weight=None, **kwargs):
 69        """Fit the `method` to training data (X, y).
 70
 71        Args:
 72
 73            X: array-like, shape = [n_samples, n_features];
 74                Training set vectors, where n_samples is the number
 75                of samples and n_features is the number of features.
 76
 77            y: array-like, shape = [n_samples, ]; Target values.
 78
 79            sample_weight: array-like, shape = [n_samples]
 80                Sample weights.
 81
 82        """
 83        if self.method == "icp":
 84            X_train, X_calibration, y_train, y_calibration = train_test_split(
 85                X, y, test_size=0.5, random_state=self.seed
 86            )
 87            self.icp_.fit(X_train, y_train)
 88            self.icp_.calibrate(X_calibration, y_calibration)
 89
 90        elif self.method == "tcp":
 91            self.tcp_.fit(X, y)
 92
 93        return self
 94
 95    def predict(self, X, **kwargs):
 96        """Obtain predictions and prediction sets
 97
 98        Args:
 99
100            X: array-like, shape = [n_samples, n_features];
101                Testing set vectors, where n_samples is the number
102                of samples and n_features is the number of features.
103
104        """
105
106        if self.method == "icp":
107            return self.icp_.predict(X, significance=self.alpha_, **kwargs)
108
109        elif self.method == "tcp":
110            return self.tcp_.predict(X, significance=self.alpha_, **kwargs)
111
112        else:
113            raise ValueError("`self.method` must be in ('icp', 'tcp')")
114
115    def predict_proba(self, X):
116        predictions = self.predict(X)
117        return np.eye(len(np.unique(predictions)))[predictions]

Class PredictionSet: Obtain prediction sets.

Attributes:

obj: an object;
    fitted object containing methods `fit` and `predict`

method: a string;
    method for constructing the prediction sets.
    Currently "icp" (default, inductive conformal) and "tcp" (transductive conformal)

level: a float;
    Confidence level for prediction sets. Default is None,
    95 is equivalent to a miscoverage error of 5 (%)

seed: an integer;
    Reproducibility of fit (there's a random split between fitting and calibration data)
def fit(self, X, y, sample_weight=None, **kwargs):
68    def fit(self, X, y, sample_weight=None, **kwargs):
69        """Fit the `method` to training data (X, y).
70
71        Args:
72
73            X: array-like, shape = [n_samples, n_features];
74                Training set vectors, where n_samples is the number
75                of samples and n_features is the number of features.
76
77            y: array-like, shape = [n_samples, ]; Target values.
78
79            sample_weight: array-like, shape = [n_samples]
80                Sample weights.
81
82        """
83        if self.method == "icp":
84            X_train, X_calibration, y_train, y_calibration = train_test_split(
85                X, y, test_size=0.5, random_state=self.seed
86            )
87            self.icp_.fit(X_train, y_train)
88            self.icp_.calibrate(X_calibration, y_calibration)
89
90        elif self.method == "tcp":
91            self.tcp_.fit(X, y)
92
93        return self

Fit the method to training data (X, y).

Args:

X: array-like, shape = [n_samples, n_features];
    Training set vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples, ]; Target values.

sample_weight: array-like, shape = [n_samples]
    Sample weights.
def predict(self, X, **kwargs):
 95    def predict(self, X, **kwargs):
 96        """Obtain predictions and prediction sets
 97
 98        Args:
 99
100            X: array-like, shape = [n_samples, n_features];
101                Testing set vectors, where n_samples is the number
102                of samples and n_features is the number of features.
103
104        """
105
106        if self.method == "icp":
107            return self.icp_.predict(X, significance=self.alpha_, **kwargs)
108
109        elif self.method == "tcp":
110            return self.tcp_.predict(X, significance=self.alpha_, **kwargs)
111
112        else:
113            raise ValueError("`self.method` must be in ('icp', 'tcp')")

Obtain predictions and prediction sets

Args:

X: array-like, shape = [n_samples, n_features];
    Testing set vectors, where n_samples is the number
    of samples and n_features is the number of features.
def predict_proba(self, X):
115    def predict_proba(self, X):
116        predictions = self.predict(X)
117        return np.eye(len(np.unique(predictions)))[predictions]
class SimpleMultitaskClassifier(nnetsauce.Base, sklearn.base.ClassifierMixin):
 19class SimpleMultitaskClassifier(Base, ClassifierMixin):
 20    """Multitask Classification model based on regression models, with shared covariates
 21
 22    Parameters:
 23
 24        obj: object
 25            any object (must be a regression model) containing a method fit (obj.fit())
 26            and a method predict (obj.predict())
 27
 28        seed: int
 29            reproducibility seed
 30
 31    Attributes:
 32
 33        fit_objs_: dict
 34            objects adjusted to each individual time series
 35
 36        n_classes_: int
 37            number of classes for the classifier
 38
 39    Examples:
 40
 41    ```python
 42    import nnetsauce as ns
 43    import numpy as np
 44    from sklearn.datasets import load_breast_cancer
 45    from sklearn.linear_model import LinearRegression
 46    from sklearn.model_selection import train_test_split
 47    from sklearn import metrics
 48    from time import time
 49
 50    breast_cancer = load_breast_cancer()
 51    Z = breast_cancer.data
 52    t = breast_cancer.target
 53
 54    X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2,
 55                                                        random_state=123+2*10)
 56
 57    # Linear Regression is used
 58    regr = LinearRegression()
 59    fit_obj = ns.SimpleMultitaskClassifier(regr)
 60
 61    start = time()
 62    fit_obj.fit(X_train, y_train)
 63    print(f"Elapsed {time() - start}")
 64
 65    print(fit_obj.score(X_test, y_test))
 66    print(fit_obj.score(X_test, y_test, scoring="roc_auc"))
 67
 68    start = time()
 69    preds = fit_obj.predict(X_test)
 70    print(f"Elapsed {time() - start}")
 71    print(metrics.classification_report(preds, y_test))
 72    ```
 73
 74    """
 75
 76    # construct the object -----
 77    _estimator_type = "classifier"
 78
 79    def __init__(
 80        self,
 81        obj,
 82    ):
 83        self.type_fit = "classification"
 84        self.obj = obj
 85        self.fit_objs_ = {}
 86        self.multioutput_model_ = None
 87        self.X_scaler_ = StandardScaler()
 88        self.scaled_X_ = None
 89
 90    def fit(self, X, y, sample_weight=None, **kwargs):
 91        """Fit SimpleMultitaskClassifier to training data (X, y).
 92
 93        Args:
 94
 95            X: {array-like}, shape = [n_samples, n_features]
 96                Training vectors, where n_samples is the number
 97                of samples and n_features is the number of features.
 98
 99            y: array-like, shape = [n_samples]
100                Target values.
101
102            **kwargs: additional parameters to be passed to
103                    self.cook_training_set or self.obj.fit
104
105        Returns:
106
107            self: object
108
109        """
110
111        assert mx.is_factor(y), "y must contain only integers"
112
113        self.classes_ = np.unique(y)  # for compatibility with sklearn
114        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
115
116        self.scaled_X_ = self.X_scaler_.fit_transform(X)
117
118        # multitask response
119        Y = mo.one_hot_encode2(y, self.n_classes_)
120
121        # Try MultiOutputRegressor first (more efficient)
122        try:
123            self.multioutput_model_ = MultiOutputRegressor(deepcopy(self.obj))
124            try:
125                self.multioutput_model_.fit(
126                    self.scaled_X_, Y, sample_weight=sample_weight, **kwargs
127                )
128            except TypeError:
129                # If sample_weight not supported, try without it
130                self.multioutput_model_.fit(self.scaled_X_, Y, **kwargs)
131        except Exception:
132            # Fallback: fit separate models for each class
133            self.multioutput_model_ = None
134            try:
135                for i in range(self.n_classes_):
136                    self.fit_objs_[i] = deepcopy(
137                        self.obj.fit(
138                            self.scaled_X_,
139                            Y[:, i],
140                            sample_weight=sample_weight,
141                            **kwargs
142                        )
143                    )
144            except TypeError:
145                for i in range(self.n_classes_):
146                    self.fit_objs_[i] = deepcopy(
147                        self.obj.fit(self.scaled_X_, Y[:, i], **kwargs)
148                    )
149        return self
150
151    def predict(self, X, **kwargs):
152        """Predict test data X.
153
154        Args:
155
156            X: {array-like}, shape = [n_samples, n_features]
157                Training vectors, where n_samples is the number
158                of samples and n_features is the number of features.
159
160            **kwargs: additional parameters
161
162        Returns:
163
164            model predictions: {array-like}
165
166        """
167        return np.argmax(self.predict_proba(X, **kwargs), axis=1)
168
169    def predict_proba(self, X, **kwargs):
170        """Predict probabilities for test data X.
171
172        Args:
173
174            X: {array-like}, shape = [n_samples, n_features]
175                Training vectors, where n_samples is the number
176                of samples and n_features is the number of features.
177
178            **kwargs: additional parameters
179
180        Returns:
181
182            probability estimates for test data: {array-like}
183
184        """
185
186        shape_X = X.shape
187
188        if self.multioutput_model_ is not None:
189            # Use MultiOutputRegressor for prediction
190            if len(shape_X) == 1:  # one example
191                n_features = shape_X[0]
192                new_X = mo.rbind(
193                    X.reshape(1, n_features),
194                    np.ones(n_features).reshape(1, n_features),
195                )
196                Z = self.X_scaler_.transform(new_X, **kwargs)
197                probs = self.multioutput_model_.predict(Z, **kwargs)[:1, :]
198            else:  # multiple rows
199                Z = self.X_scaler_.transform(X, **kwargs)
200                probs = self.multioutput_model_.predict(Z, **kwargs)
201        else:
202            # Use separate models for each class
203            probs = np.zeros((shape_X[0], self.n_classes_))
204
205            if len(shape_X) == 1:  # one example
206                n_features = shape_X[0]
207
208                new_X = mo.rbind(
209                    X.reshape(1, n_features),
210                    np.ones(n_features).reshape(1, n_features),
211                )
212
213                Z = self.X_scaler_.transform(new_X, **kwargs)
214
215                # Fallback to standard model
216                for i in range(self.n_classes_):
217                    probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs)[0]
218
219            else:  # multiple rows
220                Z = self.X_scaler_.transform(X, **kwargs)
221
222                # Fallback to standard model
223                for i in range(self.n_classes_):
224                    probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs)
225
226        expit_raw_probs = expit(probs)
227
228        # Add small epsilon to avoid division by zero
229        row_sums = expit_raw_probs.sum(axis=1)[:, None]
230        row_sums[row_sums < 1e-10] = 1e-10
231
232        return expit_raw_probs / row_sums
233
234    def decision_function(self, X, **kwargs):
235        """Compute the decision function of X.
236
237        Parameters:
238            X: {array-like}, shape = [n_samples, n_features]
239                Samples to compute decision function for.
240
241            **kwargs: additional parameters to be passed to
242                    self.cook_test_set
243
244        Returns:
245            array-like of shape (n_samples,) or (n_samples, n_classes)
246            Decision function of the input samples. The order of outputs is the same
247            as that of the classes passed to fit.
248        """
249        if not hasattr(self.obj, "decision_function"):
250            # If base classifier doesn't have decision_function, use predict_proba
251            proba = self.predict_proba(X, **kwargs)
252            if proba.shape[1] == 2:
253                return proba[:, 1]  # For binary classification
254            return proba  # For multiclass
255
256        if len(X.shape) == 1:
257            n_features = X.shape[0]
258            new_X = mo.rbind(
259                X.reshape(1, n_features),
260                np.ones(n_features).reshape(1, n_features),
261            )
262
263            return (
264                self.obj.decision_function(
265                    self.cook_test_set(new_X, **kwargs), **kwargs
266                )
267            )[0]
268
269        return self.obj.decision_function(
270            self.cook_test_set(X, **kwargs), **kwargs
271        )
272
273    @property
274    def _estimator_type(self):
275        return "classifier"

Multitask Classification model based on regression models, with shared covariates

Parameters:

obj: object
    any object (must be a regression model) containing a method fit (obj.fit())
    and a method predict (obj.predict())

seed: int
    reproducibility seed

Attributes:

fit_objs_: dict
    objects adjusted to each individual time series

n_classes_: int
    number of classes for the classifier

Examples:

import nnetsauce as ns
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics
from time import time

breast_cancer = load_breast_cancer()
Z = breast_cancer.data
t = breast_cancer.target

X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2,
                                                    random_state=123+2*10)

# Linear Regression is used
regr = LinearRegression()
fit_obj = ns.SimpleMultitaskClassifier(regr)

start = time()
fit_obj.fit(X_train, y_train)
print(f"Elapsed {time() - start}")

print(fit_obj.score(X_test, y_test))
print(fit_obj.score(X_test, y_test, scoring="roc_auc"))

start = time()
preds = fit_obj.predict(X_test)
print(f"Elapsed {time() - start}")
print(metrics.classification_report(preds, y_test))
def fit(self, X, y, sample_weight=None, **kwargs):
 90    def fit(self, X, y, sample_weight=None, **kwargs):
 91        """Fit SimpleMultitaskClassifier to training data (X, y).
 92
 93        Args:
 94
 95            X: {array-like}, shape = [n_samples, n_features]
 96                Training vectors, where n_samples is the number
 97                of samples and n_features is the number of features.
 98
 99            y: array-like, shape = [n_samples]
100                Target values.
101
102            **kwargs: additional parameters to be passed to
103                    self.cook_training_set or self.obj.fit
104
105        Returns:
106
107            self: object
108
109        """
110
111        assert mx.is_factor(y), "y must contain only integers"
112
113        self.classes_ = np.unique(y)  # for compatibility with sklearn
114        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
115
116        self.scaled_X_ = self.X_scaler_.fit_transform(X)
117
118        # multitask response
119        Y = mo.one_hot_encode2(y, self.n_classes_)
120
121        # Try MultiOutputRegressor first (more efficient)
122        try:
123            self.multioutput_model_ = MultiOutputRegressor(deepcopy(self.obj))
124            try:
125                self.multioutput_model_.fit(
126                    self.scaled_X_, Y, sample_weight=sample_weight, **kwargs
127                )
128            except TypeError:
129                # If sample_weight not supported, try without it
130                self.multioutput_model_.fit(self.scaled_X_, Y, **kwargs)
131        except Exception:
132            # Fallback: fit separate models for each class
133            self.multioutput_model_ = None
134            try:
135                for i in range(self.n_classes_):
136                    self.fit_objs_[i] = deepcopy(
137                        self.obj.fit(
138                            self.scaled_X_,
139                            Y[:, i],
140                            sample_weight=sample_weight,
141                            **kwargs
142                        )
143                    )
144            except TypeError:
145                for i in range(self.n_classes_):
146                    self.fit_objs_[i] = deepcopy(
147                        self.obj.fit(self.scaled_X_, Y[:, i], **kwargs)
148                    )
149        return self

Fit SimpleMultitaskClassifier to training data (X, y).

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

**kwargs: additional parameters to be passed to
        self.cook_training_set or self.obj.fit

Returns:

self: object
def predict(self, X, **kwargs):
151    def predict(self, X, **kwargs):
152        """Predict test data X.
153
154        Args:
155
156            X: {array-like}, shape = [n_samples, n_features]
157                Training vectors, where n_samples is the number
158                of samples and n_features is the number of features.
159
160            **kwargs: additional parameters
161
162        Returns:
163
164            model predictions: {array-like}
165
166        """
167        return np.argmax(self.predict_proba(X, **kwargs), axis=1)

Predict test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters

Returns:

model predictions: {array-like}
def predict_proba(self, X, **kwargs):
169    def predict_proba(self, X, **kwargs):
170        """Predict probabilities for test data X.
171
172        Args:
173
174            X: {array-like}, shape = [n_samples, n_features]
175                Training vectors, where n_samples is the number
176                of samples and n_features is the number of features.
177
178            **kwargs: additional parameters
179
180        Returns:
181
182            probability estimates for test data: {array-like}
183
184        """
185
186        shape_X = X.shape
187
188        if self.multioutput_model_ is not None:
189            # Use MultiOutputRegressor for prediction
190            if len(shape_X) == 1:  # one example
191                n_features = shape_X[0]
192                new_X = mo.rbind(
193                    X.reshape(1, n_features),
194                    np.ones(n_features).reshape(1, n_features),
195                )
196                Z = self.X_scaler_.transform(new_X, **kwargs)
197