nnetsauce

  1from .attention import AttentionMechanism
  2from .base.base import Base
  3from .base.baseRegressor import BaseRegressor
  4from .boosting.adaBoostClassifier import AdaBoostClassifier
  5from .custom.customClassifier import CustomClassifier
  6from .custom.customRegressor import CustomRegressor
  7from .custom.customBackpropRegressor import CustomBackPropRegressor
  8from .datasets import Downloader
  9from .deep.deepClassifier import DeepClassifier
 10from .deep.deepRegressor import DeepRegressor
 11from .deep.deepMTS import DeepMTS
 12from .glm.glmClassifier import GLMClassifier
 13from .glm.glmRegressor import GLMRegressor
 14from .kernel.kernel import KernelRidge
 15from .lazypredict.lazydeepClassifier import LazyDeepClassifier, LazyClassifier
 16from .lazypredict.lazydeepRegressor import LazyDeepRegressor, LazyRegressor
 17from .lazypredict.lazydeepClassifier import LazyDeepClassifier
 18from .lazypredict.lazydeepRegressor import LazyDeepRegressor
 19from .lazypredict.lazydeepMTS import LazyDeepMTS, LazyMTS
 20from .mts.mts import MTS
 21from .mts.mlarch import MLARCH
 22from .mts.classical import ClassicalMTS
 23from .mts.stackedmts import MTSStacker
 24from .multitask.multitaskClassifier import MultitaskClassifier
 25from .multitask.simplemultitaskClassifier import SimpleMultitaskClassifier
 26from .neuralnet.neuralnetregression import NeuralNetRegressor
 27from .neuralnet.neuralnetclassification import NeuralNetClassifier
 28from .optimizers.optimizer import Optimizer
 29from .predictioninterval import PredictionInterval
 30from .predictionset import PredictionSet
 31from .quantile.quantileregression import QuantileRegressor
 32from .quantile.quantileclassification import QuantileClassifier
 33from .randombag.randomBagClassifier import RandomBagClassifier
 34from .randombag.randomBagRegressor import RandomBagRegressor
 35from .randomfourier.randomfourier import RandomFourierEstimator
 36from .rff.rffridge import (
 37    RandomFourierFeaturesRidge,
 38    RandomFourierFeaturesRidgeGCV,
 39)
 40from .ridge.ridge import RidgeRegressor
 41from .ridge2.ridge2Classifier import Ridge2Classifier
 42from .ridge2.ridge2Regressor import Ridge2Regressor
 43from .ridge2.ridge2MultitaskClassifier import Ridge2MultitaskClassifier
 44from .ridge2.ridge2MTSJAX import Ridge2Forecaster
 45from .rvfl.bayesianrvflRegressor import BayesianRVFLRegressor
 46from .rvfl.bayesianrvfl2Regressor import BayesianRVFL2Regressor
 47from .sampling import SubSampler
 48from .updater import RegressorUpdater, ClassifierUpdater
 49from .votingregressor import MedianVotingRegressor
 50
 51__all__ = [
 52    "AdaBoostClassifier",
 53    "AttentionMechanism",
 54    "Base",
 55    "BaseRegressor",
 56    "BayesianRVFLRegressor",
 57    "BayesianRVFL2Regressor",
 58    "ClassicalMTS",
 59    "CustomClassifier",
 60    "CustomRegressor",
 61    "CustomBackPropRegressor",
 62    "DeepClassifier",
 63    "DeepRegressor",
 64    "DeepMTS",
 65    "Downloader",
 66    "GLMClassifier",
 67    "GLMRegressor",
 68    "KernelRidge",
 69    "LazyClassifier",
 70    "LazyRegressor",
 71    "LazyDeepClassifier",
 72    "LazyDeepRegressor",
 73    "LazyMTS",
 74    "LazyDeepMTS",
 75    "MLARCH",
 76    "MedianVotingRegressor",
 77    "MTS",
 78    "MTSStacker",
 79    "MultitaskClassifier",
 80    "NeuralNetRegressor",
 81    "NeuralNetClassifier",
 82    "PredictionInterval",
 83    "PredictionSet",
 84    "SimpleMultitaskClassifier",
 85    "Optimizer",
 86    "QuantileRegressor",
 87    "QuantileClassifier",
 88    "RandomBagRegressor",
 89    "RandomBagClassifier",
 90    "RandomFourierEstimator",
 91    "RandomFourierFeaturesRidge",
 92    "RandomFourierFeaturesRidgeGCV",
 93    "RegressorUpdater",
 94    "ClassifierUpdater",
 95    "RidgeRegressor",
 96    "Ridge2Regressor",
 97    "Ridge2Classifier",
 98    "Ridge2MultitaskClassifier",
 99    "Ridge2Forecaster",
100    "SubSampler",
101]
class AdaBoostClassifier(nnetsauce.boosting.bst.Boosting, sklearn.base.ClassifierMixin):
 21class AdaBoostClassifier(Boosting, ClassifierMixin):
 22    """AdaBoost Classification (SAMME) model class derived from class Boosting
 23
 24    Parameters:
 25
 26        obj: object
 27            any object containing a method fit (obj.fit()) and a method predict
 28            (obj.predict())
 29
 30        n_estimators: int
 31            number of boosting iterations
 32
 33        learning_rate: float
 34            learning rate of the boosting procedure
 35
 36        n_hidden_features: int
 37            number of nodes in the hidden layer
 38
 39        reg_lambda: float
 40            regularization parameter for weights
 41
 42        reg_alpha: float
 43            controls compromize between l1 and l2 norm of weights
 44
 45        activation_name: str
 46            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
 47
 48        a: float
 49            hyperparameter for 'prelu' or 'elu' activation function
 50
 51        nodes_sim: str
 52            type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
 53            'uniform'
 54
 55        bias: boolean
 56            indicates if the hidden layer contains a bias term (True) or not
 57            (False)
 58
 59        dropout: float
 60            regularization parameter; (random) percentage of nodes dropped out
 61            of the training
 62
 63        direct_link: boolean
 64            indicates if the original predictors are included (True) in model's
 65            fitting or not (False)
 66
 67        n_clusters: int
 68            number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
 69                no clustering)
 70
 71        cluster_encode: bool
 72            defines how the variable containing clusters is treated (default is one-hot)
 73            if `False`, then labels are used, without one-hot encoding
 74
 75        type_clust: str
 76            type of clustering method: currently k-means ('kmeans') or Gaussian
 77            Mixture Model ('gmm')
 78
 79        type_scaling: a tuple of 3 strings
 80            scaling methods for inputs, hidden layer, and clustering respectively
 81            (and when relevant).
 82            Currently available: standardization ('std') or MinMax scaling ('minmax')
 83
 84        col_sample: float
 85            percentage of covariates randomly chosen for training
 86
 87        row_sample: float
 88            percentage of rows chosen for training, by stratified bootstrapping
 89
 90        seed: int
 91            reproducibility seed for nodes_sim=='uniform'
 92
 93        verbose: int
 94            0 for no output, 1 for a progress bar (default is 1)
 95
 96        method: str
 97            type of Adaboost method, 'SAMME' (discrete) or 'SAMME.R' (real)
 98
 99        backend: str
100            "cpu" or "gpu" or "tpu"
101
102    Attributes:
103
104        alpha_: list
105            AdaBoost coefficients alpha_m
106
107        base_learners_: dict
108            a dictionary containing the base learners
109
110    Examples:
111
112    See also [https://github.com/Techtonique/nnetsauce/blob/master/examples/adaboost_classification.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/adaboost_classification.py)
113
114    ```python
115    import nnetsauce as ns
116    import numpy as np
117    from sklearn.datasets import load_breast_cancer
118    from sklearn.linear_model import LogisticRegression
119    from sklearn.model_selection import train_test_split
120    from sklearn import metrics
121    from time import time
122
123    breast_cancer = load_breast_cancer()
124    Z = breast_cancer.data
125    t = breast_cancer.target
126    np.random.seed(123)
127    X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2)
128
129    # SAMME.R
130    clf = LogisticRegression(solver='liblinear', multi_class = 'ovr',
131                            random_state=123)
132    fit_obj = ns.AdaBoostClassifier(clf,
133                                    n_hidden_features=int(11.22338867),
134                                    direct_link=True,
135                                    n_estimators=250, learning_rate=0.01126343,
136                                    col_sample=0.72684326, row_sample=0.86429443,
137                                    dropout=0.63078613, n_clusters=2,
138                                    type_clust="gmm",
139                                    verbose=1, seed = 123,
140                                    method="SAMME.R")
141
142    start = time()
143    fit_obj.fit(X_train, y_train)
144    print(f"Elapsed {time() - start}")
145
146    start = time()
147    print(fit_obj.score(X_test, y_test))
148    print(f"Elapsed {time() - start}")
149
150    preds = fit_obj.predict(X_test)
151
152    print(metrics.classification_report(preds, y_test))
153
154    ```
155
156    """
157
158    # construct the object -----
159    _estimator_type = "classifier"
160
161    def __init__(
162        self,
163        obj,
164        n_estimators=10,
165        learning_rate=0.1,
166        n_hidden_features=1,
167        reg_lambda=0,
168        reg_alpha=0.5,
169        activation_name="relu",
170        a=0.01,
171        nodes_sim="sobol",
172        bias=True,
173        dropout=0,
174        direct_link=False,
175        n_clusters=2,
176        cluster_encode=True,
177        type_clust="kmeans",
178        type_scaling=("std", "std", "std"),
179        col_sample=1,
180        row_sample=1,
181        seed=123,
182        verbose=1,
183        method="SAMME",
184        backend="cpu",
185    ):
186        self.type_fit = "classification"
187        self.verbose = verbose
188        self.method = method
189        self.reg_lambda = reg_lambda
190        self.reg_alpha = reg_alpha
191
192        super().__init__(
193            obj=obj,
194            n_estimators=n_estimators,
195            learning_rate=learning_rate,
196            n_hidden_features=n_hidden_features,
197            activation_name=activation_name,
198            a=a,
199            nodes_sim=nodes_sim,
200            bias=bias,
201            dropout=dropout,
202            direct_link=direct_link,
203            n_clusters=n_clusters,
204            cluster_encode=cluster_encode,
205            type_clust=type_clust,
206            type_scaling=type_scaling,
207            col_sample=col_sample,
208            row_sample=row_sample,
209            seed=seed,
210            backend=backend,
211        )
212
213        self.alpha_ = []
214        self.base_learners_ = dict.fromkeys(range(n_estimators))
215
216    def fit(self, X, y, sample_weight=None, **kwargs):
217        """Fit Boosting model to training data (X, y).
218
219        Parameters:
220
221            X: {array-like}, shape = [n_samples, n_features]
222                Training vectors, where n_samples is the number
223                of samples and n_features is the number of features.
224
225            y: array-like, shape = [n_samples]
226                Target values.
227
228            **kwargs: additional parameters to be passed to
229                    self.cook_training_set or self.obj.fit
230
231        Returns:
232
233             self: object
234        """
235
236        assert mx.is_factor(y), "y must contain only integers"
237
238        assert self.method in (
239            "SAMME",
240            "SAMME.R",
241        ), "`method` must be either 'SAMME' or 'SAMME.R'"
242
243        assert (self.reg_lambda <= 1) & (
244            self.reg_lambda >= 0
245        ), "must have self.reg_lambda <= 1 &  self.reg_lambda >= 0"
246
247        assert (self.reg_alpha <= 1) & (
248            self.reg_alpha >= 0
249        ), "must have self.reg_alpha <= 1 &  self.reg_alpha >= 0"
250
251        # training
252        n, p = X.shape
253        self.n_classes = len(np.unique(y))
254        self.classes_ = np.unique(y)  # for compatibility with sklearn
255        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
256
257        if sample_weight is None:
258            w_m = np.repeat(1.0 / n, n)
259        else:
260            w_m = np.asarray(sample_weight)
261
262        base_learner = CustomClassifier(
263            self.obj,
264            n_hidden_features=self.n_hidden_features,
265            activation_name=self.activation_name,
266            a=self.a,
267            nodes_sim=self.nodes_sim,
268            bias=self.bias,
269            dropout=self.dropout,
270            direct_link=self.direct_link,
271            n_clusters=self.n_clusters,
272            type_clust=self.type_clust,
273            type_scaling=self.type_scaling,
274            col_sample=self.col_sample,
275            row_sample=self.row_sample,
276            seed=self.seed,
277        )
278
279        if self.verbose == 1:
280            pbar = Progbar(self.n_estimators)
281
282        if self.method == "SAMME":
283            err_m = 1e6
284            err_bound = 1 - 1 / self.n_classes
285            self.alpha_.append(1.0)
286            x_range_n = range(n)
287
288            for m in range(self.n_estimators):
289                preds = base_learner.fit(
290                    X, y, sample_weight=w_m.ravel(), **kwargs
291                ).predict(X)
292
293                self.base_learners_.update({m: deepcopy(base_learner)})
294
295                cond = [y[i] != preds[i] for i in x_range_n]
296
297                err_m = max(
298                    sum([elt[0] * elt[1] for elt in zip(cond, w_m)]),
299                    2.220446049250313e-16,
300                )  # sum(w_m) == 1
301
302                if self.reg_lambda > 0:
303                    err_m += self.reg_lambda * (
304                        (1 - self.reg_alpha) * 0.5 * sum([x**2 for x in w_m])
305                        + self.reg_alpha * sum([abs(x) for x in w_m])
306                    )
307
308                err_m = min(err_m, err_bound)
309
310                alpha_m = self.learning_rate * log(
311                    (self.n_classes - 1) * (1 - err_m) / err_m
312                )
313
314                self.alpha_.append(alpha_m)
315
316                w_m_temp = [exp(alpha_m * cond[i]) for i in x_range_n]
317
318                sum_w_m = sum(w_m_temp)
319
320                w_m = np.asarray([w_m_temp[i] / sum_w_m for i in x_range_n])
321
322                base_learner.set_params(seed=self.seed + (m + 1) * 1000)
323
324                if self.verbose == 1:
325                    pbar.update(m)
326
327            if self.verbose == 1:
328                pbar.update(self.n_estimators)
329
330            self.n_estimators = len(self.base_learners_)
331            self.classes_ = np.unique(y)
332
333            return self
334
335        if self.method == "SAMME.R":
336            Y = mo.one_hot_encode2(y, self.n_classes)
337
338            if sample_weight is None:
339                w_m = np.repeat(1.0 / n, n)  # (N, 1)
340
341            else:
342                w_m = np.asarray(sample_weight)
343
344            for m in range(self.n_estimators):
345                probs = base_learner.fit(
346                    X, y, sample_weight=w_m.ravel(), **kwargs
347                ).predict_proba(X)
348
349                np.clip(
350                    a=probs, a_min=2.220446049250313e-16, a_max=1.0, out=probs
351                )
352
353                self.base_learners_.update({m: deepcopy(base_learner)})
354
355                w_m *= np.exp(
356                    -1.0
357                    * self.learning_rate
358                    * (1.0 - 1.0 / self.n_classes)
359                    * xlogy(Y, probs).sum(axis=1)
360                )
361
362                w_m /= np.sum(w_m)
363
364                base_learner.set_params(seed=self.seed + (m + 1) * 1000)
365
366                if self.verbose == 1:
367                    pbar.update(m)
368
369            if self.verbose == 1:
370                pbar.update(self.n_estimators)
371
372            self.n_estimators = len(self.base_learners_)
373            self.classes_ = np.unique(y)
374
375            return self
376
377    def predict(self, X, **kwargs):
378        """Predict test data X.
379
380        Parameters:
381
382            X: {array-like}, shape = [n_samples, n_features]
383                Training vectors, where n_samples is the number
384                of samples and n_features is the number of features.
385
386            **kwargs: additional parameters to be passed to
387                  self.cook_test_set
388
389        Returns:
390
391            model predictions: {array-like}
392        """
393        return self.predict_proba(X, **kwargs).argmax(axis=1)
394
395    def predict_proba(self, X, **kwargs):
396        """Predict probabilities for test data X.
397
398        Parameters:
399
400            X: {array-like}, shape = [n_samples, n_features]
401                Training vectors, where n_samples is the number
402                of samples and n_features is the number of features.
403
404            **kwargs: additional parameters to be passed to
405                  self.cook_test_set
406
407        Returns:
408
409            probability estimates for test data: {array-like}
410
411        """
412
413        n_iter = len(self.base_learners_)
414
415        if self.method == "SAMME":
416            ensemble_learner = np.zeros((X.shape[0], self.n_classes))
417
418            # if self.verbose == 1:
419            #    pbar = Progbar(n_iter)
420
421            for idx, base_learner in self.base_learners_.items():
422                preds = base_learner.predict(X, **kwargs)
423
424                ensemble_learner += self.alpha_[idx] * mo.one_hot_encode2(
425                    preds, self.n_classes
426                )
427
428                # if self.verbose == 1:
429                #    pbar.update(idx)
430
431            # if self.verbose == 1:
432            #    pbar.update(n_iter)
433
434            expit_ensemble_learner = expit(ensemble_learner)
435
436            sum_ensemble = expit_ensemble_learner.sum(axis=1)
437
438            return expit_ensemble_learner / sum_ensemble[:, None]
439
440        # if self.method == "SAMME.R":
441        ensemble_learner = 0
442
443        # if self.verbose == 1:
444        #    pbar = Progbar(n_iter)
445
446        for idx, base_learner in self.base_learners_.items():
447            probs = base_learner.predict_proba(X, **kwargs)
448
449            np.clip(a=probs, a_min=2.220446049250313e-16, a_max=1.0, out=probs)
450
451            log_preds_proba = np.log(probs)
452
453            ensemble_learner += (
454                log_preds_proba - log_preds_proba.mean(axis=1)[:, None]
455            )
456
457            # if self.verbose == 1:
458            #    pbar.update(idx)
459
460        ensemble_learner *= self.n_classes - 1
461
462        # if self.verbose == 1:
463        #    pbar.update(n_iter)
464
465        expit_ensemble_learner = expit(ensemble_learner)
466
467        sum_ensemble = expit_ensemble_learner.sum(axis=1)
468
469        return expit_ensemble_learner / sum_ensemble[:, None]
470
471    @property
472    def _estimator_type(self):
473        return "classifier"

AdaBoost Classification (SAMME) model class derived from class Boosting

Parameters:

obj: object
    any object containing a method fit (obj.fit()) and a method predict
    (obj.predict())

n_estimators: int
    number of boosting iterations

learning_rate: float
    learning rate of the boosting procedure

n_hidden_features: int
    number of nodes in the hidden layer

reg_lambda: float
    regularization parameter for weights

reg_alpha: float
    controls compromize between l1 and l2 norm of weights

activation_name: str
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'

a: float
    hyperparameter for 'prelu' or 'elu' activation function

nodes_sim: str
    type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
    'uniform'

bias: boolean
    indicates if the hidden layer contains a bias term (True) or not
    (False)

dropout: float
    regularization parameter; (random) percentage of nodes dropped out
    of the training

direct_link: boolean
    indicates if the original predictors are included (True) in model's
    fitting or not (False)

n_clusters: int
    number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
        no clustering)

cluster_encode: bool
    defines how the variable containing clusters is treated (default is one-hot)
    if `False`, then labels are used, without one-hot encoding

type_clust: str
    type of clustering method: currently k-means ('kmeans') or Gaussian
    Mixture Model ('gmm')

type_scaling: a tuple of 3 strings
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax')

col_sample: float
    percentage of covariates randomly chosen for training

row_sample: float
    percentage of rows chosen for training, by stratified bootstrapping

seed: int
    reproducibility seed for nodes_sim=='uniform'

verbose: int
    0 for no output, 1 for a progress bar (default is 1)

method: str
    type of Adaboost method, 'SAMME' (discrete) or 'SAMME.R' (real)

backend: str
    "cpu" or "gpu" or "tpu"

Attributes:

alpha_: list
    AdaBoost coefficients alpha_m

base_learners_: dict
    a dictionary containing the base learners

Examples:

See also https://github.com/Techtonique/nnetsauce/blob/master/examples/adaboost_classification.py

import nnetsauce as ns
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics
from time import time

breast_cancer = load_breast_cancer()
Z = breast_cancer.data
t = breast_cancer.target
np.random.seed(123)
X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2)

# SAMME.R
clf = LogisticRegression(solver='liblinear', multi_class = 'ovr',
                        random_state=123)
fit_obj = ns.AdaBoostClassifier(clf,
                                n_hidden_features=int(11.22338867),
                                direct_link=True,
                                n_estimators=250, learning_rate=0.01126343,
                                col_sample=0.72684326, row_sample=0.86429443,
                                dropout=0.63078613, n_clusters=2,
                                type_clust="gmm",
                                verbose=1, seed = 123,
                                method="SAMME.R")

start = time()
fit_obj.fit(X_train, y_train)
print(f"Elapsed {time() - start}")

start = time()
print(fit_obj.score(X_test, y_test))
print(f"Elapsed {time() - start}")

preds = fit_obj.predict(X_test)

print(metrics.classification_report(preds, y_test))
def fit(self, X, y, sample_weight=None, **kwargs):
216    def fit(self, X, y, sample_weight=None, **kwargs):
217        """Fit Boosting model to training data (X, y).
218
219        Parameters:
220
221            X: {array-like}, shape = [n_samples, n_features]
222                Training vectors, where n_samples is the number
223                of samples and n_features is the number of features.
224
225            y: array-like, shape = [n_samples]
226                Target values.
227
228            **kwargs: additional parameters to be passed to
229                    self.cook_training_set or self.obj.fit
230
231        Returns:
232
233             self: object
234        """
235
236        assert mx.is_factor(y), "y must contain only integers"
237
238        assert self.method in (
239            "SAMME",
240            "SAMME.R",
241        ), "`method` must be either 'SAMME' or 'SAMME.R'"
242
243        assert (self.reg_lambda <= 1) & (
244            self.reg_lambda >= 0
245        ), "must have self.reg_lambda <= 1 &  self.reg_lambda >= 0"
246
247        assert (self.reg_alpha <= 1) & (
248            self.reg_alpha >= 0
249        ), "must have self.reg_alpha <= 1 &  self.reg_alpha >= 0"
250
251        # training
252        n, p = X.shape
253        self.n_classes = len(np.unique(y))
254        self.classes_ = np.unique(y)  # for compatibility with sklearn
255        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
256
257        if sample_weight is None:
258            w_m = np.repeat(1.0 / n, n)
259        else:
260            w_m = np.asarray(sample_weight)
261
262        base_learner = CustomClassifier(
263            self.obj,
264            n_hidden_features=self.n_hidden_features,
265            activation_name=self.activation_name,
266            a=self.a,
267            nodes_sim=self.nodes_sim,
268            bias=self.bias,
269            dropout=self.dropout,
270            direct_link=self.direct_link,
271            n_clusters=self.n_clusters,
272            type_clust=self.type_clust,
273            type_scaling=self.type_scaling,
274            col_sample=self.col_sample,
275            row_sample=self.row_sample,
276            seed=self.seed,
277        )
278
279        if self.verbose == 1:
280            pbar = Progbar(self.n_estimators)
281
282        if self.method == "SAMME":
283            err_m = 1e6
284            err_bound = 1 - 1 / self.n_classes
285            self.alpha_.append(1.0)
286            x_range_n = range(n)
287
288            for m in range(self.n_estimators):
289                preds = base_learner.fit(
290                    X, y, sample_weight=w_m.ravel(), **kwargs
291                ).predict(X)
292
293                self.base_learners_.update({m: deepcopy(base_learner)})
294
295                cond = [y[i] != preds[i] for i in x_range_n]
296
297                err_m = max(
298                    sum([elt[0] * elt[1] for elt in zip(cond, w_m)]),
299                    2.220446049250313e-16,
300                )  # sum(w_m) == 1
301
302                if self.reg_lambda > 0:
303                    err_m += self.reg_lambda * (
304                        (1 - self.reg_alpha) * 0.5 * sum([x**2 for x in w_m])
305                        + self.reg_alpha * sum([abs(x) for x in w_m])
306                    )
307
308                err_m = min(err_m, err_bound)
309
310                alpha_m = self.learning_rate * log(
311                    (self.n_classes - 1) * (1 - err_m) / err_m
312                )
313
314                self.alpha_.append(alpha_m)
315
316                w_m_temp = [exp(alpha_m * cond[i]) for i in x_range_n]
317
318                sum_w_m = sum(w_m_temp)
319
320                w_m = np.asarray([w_m_temp[i] / sum_w_m for i in x_range_n])
321
322                base_learner.set_params(seed=self.seed + (m + 1) * 1000)
323
324                if self.verbose == 1:
325                    pbar.update(m)
326
327            if self.verbose == 1:
328                pbar.update(self.n_estimators)
329
330            self.n_estimators = len(self.base_learners_)
331            self.classes_ = np.unique(y)
332
333            return self
334
335        if self.method == "SAMME.R":
336            Y = mo.one_hot_encode2(y, self.n_classes)
337
338            if sample_weight is None:
339                w_m = np.repeat(1.0 / n, n)  # (N, 1)
340
341            else:
342                w_m = np.asarray(sample_weight)
343
344            for m in range(self.n_estimators):
345                probs = base_learner.fit(
346                    X, y, sample_weight=w_m.ravel(), **kwargs
347                ).predict_proba(X)
348
349                np.clip(
350                    a=probs, a_min=2.220446049250313e-16, a_max=1.0, out=probs
351                )
352
353                self.base_learners_.update({m: deepcopy(base_learner)})
354
355                w_m *= np.exp(
356                    -1.0
357                    * self.learning_rate
358                    * (1.0 - 1.0 / self.n_classes)
359                    * xlogy(Y, probs).sum(axis=1)
360                )
361
362                w_m /= np.sum(w_m)
363
364                base_learner.set_params(seed=self.seed + (m + 1) * 1000)
365
366                if self.verbose == 1:
367                    pbar.update(m)
368
369            if self.verbose == 1:
370                pbar.update(self.n_estimators)
371
372            self.n_estimators = len(self.base_learners_)
373            self.classes_ = np.unique(y)
374
375            return self

Fit Boosting model to training data (X, y).

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

**kwargs: additional parameters to be passed to
        self.cook_training_set or self.obj.fit

Returns:

 self: object
def predict(self, X, **kwargs):
377    def predict(self, X, **kwargs):
378        """Predict test data X.
379
380        Parameters:
381
382            X: {array-like}, shape = [n_samples, n_features]
383                Training vectors, where n_samples is the number
384                of samples and n_features is the number of features.
385
386            **kwargs: additional parameters to be passed to
387                  self.cook_test_set
388
389        Returns:
390
391            model predictions: {array-like}
392        """
393        return self.predict_proba(X, **kwargs).argmax(axis=1)

Predict test data X.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
      self.cook_test_set

Returns:

model predictions: {array-like}
def predict_proba(self, X, **kwargs):
395    def predict_proba(self, X, **kwargs):
396        """Predict probabilities for test data X.
397
398        Parameters:
399
400            X: {array-like}, shape = [n_samples, n_features]
401                Training vectors, where n_samples is the number
402                of samples and n_features is the number of features.
403
404            **kwargs: additional parameters to be passed to
405                  self.cook_test_set
406
407        Returns:
408
409            probability estimates for test data: {array-like}
410
411        """
412
413        n_iter = len(self.base_learners_)
414
415        if self.method == "SAMME":
416            ensemble_learner = np.zeros((X.shape[0], self.n_classes))
417
418            # if self.verbose == 1:
419            #    pbar = Progbar(n_iter)
420
421            for idx, base_learner in self.base_learners_.items():
422                preds = base_learner.predict(X, **kwargs)
423
424                ensemble_learner += self.alpha_[idx] * mo.one_hot_encode2(
425                    preds, self.n_classes
426                )
427
428                # if self.verbose == 1:
429                #    pbar.update(idx)
430
431            # if self.verbose == 1:
432            #    pbar.update(n_iter)
433
434            expit_ensemble_learner = expit(ensemble_learner)
435
436            sum_ensemble = expit_ensemble_learner.sum(axis=1)
437
438            return expit_ensemble_learner / sum_ensemble[:, None]
439
440        # if self.method == "SAMME.R":
441        ensemble_learner = 0
442
443        # if self.verbose == 1:
444        #    pbar = Progbar(n_iter)
445
446        for idx, base_learner in self.base_learners_.items():
447            probs = base_learner.predict_proba(X, **kwargs)
448
449            np.clip(a=probs, a_min=2.220446049250313e-16, a_max=1.0, out=probs)
450
451            log_preds_proba = np.log(probs)
452
453            ensemble_learner += (
454                log_preds_proba - log_preds_proba.mean(axis=1)[:, None]
455            )
456
457            # if self.verbose == 1:
458            #    pbar.update(idx)
459
460        ensemble_learner *= self.n_classes - 1
461
462        # if self.verbose == 1:
463        #    pbar.update(n_iter)
464
465        expit_ensemble_learner = expit(ensemble_learner)
466
467        sum_ensemble = expit_ensemble_learner.sum(axis=1)
468
469        return expit_ensemble_learner / sum_ensemble[:, None]

Predict probabilities for test data X.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
      self.cook_test_set

Returns:

probability estimates for test data: {array-like}
class AttentionMechanism:
 10class AttentionMechanism:
 11    """
 12    A comprehensive class implementing various attention mechanisms
 13    for both univariate time series and tabular data using JAX.
 14
 15    Supported attention types:
 16    - Scaled Dot-Product Attention
 17    - Additive (Bahdanau) Attention
 18    - Multi-Head Attention
 19    - Self-Attention
 20    - Temporal Attention (for sequences)
 21    - Feature Attention (for tabular data)
 22    - Cross-Attention
 23    - Context Vector Attention
 24    """
 25
 26    def __init__(
 27        self,
 28        input_dim: int,
 29        hidden_dim: int = 64,
 30        num_heads: int = 4,
 31        dropout: float = 0.1,
 32        seed: int = 42,
 33    ):
 34        """
 35        Args:
 36            input_dim: Dimension of input features
 37            hidden_dim: Hidden dimension for attention computations
 38            num_heads: Number of attention heads for multi-head attention
 39            dropout: Dropout rate
 40            seed: Random seed for parameter initialization
 41        """
 42        self.input_dim = input_dim
 43        self.hidden_dim = hidden_dim
 44        self.num_heads = num_heads
 45        self.dropout = dropout
 46
 47        # Initialize random key
 48        self.rng = random.PRNGKey(seed)
 49
 50        # Initialize parameters
 51        self.params = self._initialize_parameters()
 52
 53        assert (
 54            hidden_dim % num_heads == 0
 55        ), "hidden_dim must be divisible by num_heads"
 56        self.head_dim = hidden_dim // num_heads
 57
 58    def _initialize_parameters(self) -> Dict:
 59        """Initialize all network parameters using JAX"""
 60        keys = random.split(self.rng, 20)
 61
 62        def init_weight(key, shape):
 63            return random.normal(key, shape) * np.sqrt(2.0 / shape[0])
 64
 65        def init_bias(shape):
 66            return jnp.zeros(shape)
 67
 68        params = {
 69            # Scaled Dot-Product Attention
 70            "query_w": init_weight(keys[0], (self.input_dim, self.hidden_dim)),
 71            "query_b": init_bias((self.hidden_dim,)),
 72            "key_w": init_weight(keys[1], (self.input_dim, self.hidden_dim)),
 73            "key_b": init_bias((self.hidden_dim,)),
 74            "value_w": init_weight(keys[2], (self.input_dim, self.hidden_dim)),
 75            "value_b": init_bias((self.hidden_dim,)),
 76            # Additive Attention
 77            "additive_query_w": init_weight(
 78                keys[3], (self.input_dim, self.hidden_dim)
 79            ),
 80            "additive_query_b": init_bias((self.hidden_dim,)),
 81            "additive_key_w": init_weight(
 82                keys[4], (self.input_dim, self.hidden_dim)
 83            ),
 84            "additive_key_b": init_bias((self.hidden_dim,)),
 85            "additive_v_w": init_weight(keys[5], (self.hidden_dim, 1)),
 86            "additive_v_b": init_bias((1,)),
 87            # Multi-Head Attention
 88            "mha_query_w": init_weight(
 89                keys[6], (self.input_dim, self.hidden_dim)
 90            ),
 91            "mha_query_b": init_bias((self.hidden_dim,)),
 92            "mha_key_w": init_weight(
 93                keys[7], (self.input_dim, self.hidden_dim)
 94            ),
 95            "mha_key_b": init_bias((self.hidden_dim,)),
 96            "mha_value_w": init_weight(
 97                keys[8], (self.input_dim, self.hidden_dim)
 98            ),
 99            "mha_value_b": init_bias((self.hidden_dim,)),
100            "mha_output_w": init_weight(
101                keys[9], (self.hidden_dim, self.hidden_dim)
102            ),
103            "mha_output_b": init_bias((self.hidden_dim,)),
104            # Feature Attention
105            "feature_w1": init_weight(
106                keys[10], (self.input_dim, self.hidden_dim)
107            ),
108            "feature_b1": init_bias((self.hidden_dim,)),
109            "feature_w2": init_weight(
110                keys[11], (self.hidden_dim, self.input_dim)
111            ),
112            "feature_b2": init_bias((self.input_dim,)),
113            # Temporal Attention
114            "temporal_query_w": init_weight(
115                keys[12], (self.input_dim, self.hidden_dim)
116            ),
117            "temporal_query_b": init_bias((self.hidden_dim,)),
118            "temporal_key_w": init_weight(
119                keys[13], (self.input_dim, self.hidden_dim)
120            ),
121            "temporal_key_b": init_bias((self.hidden_dim,)),
122            # Context Vector Attention
123            "context_vector": random.normal(keys[14], (1, 1, self.hidden_dim)),
124            "context_query_w": init_weight(
125                keys[15], (self.hidden_dim, self.hidden_dim)
126            ),
127            "context_query_b": init_bias((self.hidden_dim,)),
128            "context_key_w": init_weight(
129                keys[16], (self.input_dim, self.hidden_dim)
130            ),
131            "context_key_b": init_bias((self.hidden_dim,)),
132            "context_value_w": init_weight(
133                keys[17], (self.input_dim, self.hidden_dim)
134            ),
135            "context_value_b": init_bias((self.hidden_dim,)),
136        }
137
138        return params
139
140    @staticmethod
141    @jit
142    def _apply_dropout(
143        x: jnp.ndarray,
144        key: jax.random.PRNGKey,
145        rate: float,
146        training: bool = True,
147    ) -> jnp.ndarray:
148        """Apply dropout"""
149        if training and rate > 0:
150            keep_prob = 1 - rate
151            mask = random.bernoulli(key, keep_prob, x.shape)
152            return jnp.where(mask, x / keep_prob, 0)
153        return x
154
155    @partial(jit, static_argnums=(0,))
156    def scaled_dot_product_attention(
157        self,
158        query: jnp.ndarray,
159        key: jnp.ndarray,
160        value: jnp.ndarray,
161        params: Dict,
162        mask: Optional[jnp.ndarray] = None,
163        training: bool = False,
164    ) -> Tuple[jnp.ndarray, jnp.ndarray]:
165        """
166        Scaled Dot-Product Attention
167
168        Args:
169            query: (batch_size, seq_len, input_dim) or (batch_size, input_dim)
170            key: (batch_size, seq_len, input_dim)
171            value: (batch_size, seq_len, input_dim)
172            params: Parameter dictionary
173            mask: Optional mask (batch_size, seq_len)
174            training: Whether in training mode
175
176        Returns:
177            context: Attended context vector
178            attention_weights: Attention weights
179        """
180        # Project inputs
181        Q = jnp.dot(query, params["query_w"]) + params["query_b"]
182        K = jnp.dot(key, params["key_w"]) + params["key_b"]
183        V = jnp.dot(value, params["value_w"]) + params["value_b"]
184
185        # Compute attention scores
186        scores = jnp.matmul(Q, jnp.swapaxes(K, -2, -1))
187        scores = scores / jnp.sqrt(self.hidden_dim)
188
189        # Apply mask if provided
190        if mask is not None:
191            scores = jnp.where(mask == 0, -1e9, scores)
192
193        # Compute attention weights
194        attention_weights = jax.nn.softmax(scores, axis=-1)
195
196        # Apply attention to values
197        context = jnp.matmul(attention_weights, V)
198
199        return context, attention_weights
200
201    @partial(jit, static_argnums=(0,))
202    def additive_attention(
203        self,
204        query: jnp.ndarray,
205        key: jnp.ndarray,
206        value: jnp.ndarray,
207        params: Dict,
208        mask: Optional[jnp.ndarray] = None,
209    ) -> Tuple[jnp.ndarray, jnp.ndarray]:
210        """
211        Additive (Bahdanau) Attention
212
213        Args:
214            query: (batch_size, hidden_dim) or (batch_size, 1, hidden_dim)
215            key: (batch_size, seq_len, hidden_dim)
216            value: (batch_size, seq_len, hidden_dim)
217            params: Parameter dictionary
218            mask: Optional mask
219
220        Returns:
221            context: Attended context vector
222            attention_weights: Attention weights
223        """
224        # Ensure query has seq_len dimension
225        if query.ndim == 2:
226            query = jnp.expand_dims(query, axis=1)
227
228        # Project query and key
229        Q = (
230            jnp.dot(query, params["additive_query_w"])
231            + params["additive_query_b"]
232        )
233        K = jnp.dot(key, params["additive_key_w"]) + params["additive_key_b"]
234
235        # Additive attention: score = v^T tanh(W_q Q + W_k K)
236        combined = jnp.tanh(Q + K)
237        scores = (
238            jnp.dot(combined, params["additive_v_w"]) + params["additive_v_b"]
239        )
240        scores = jnp.squeeze(scores, axis=-1)
241
242        # Apply mask if provided
243        if mask is not None:
244            scores = jnp.where(mask == 0, -1e9, scores)
245
246        # Compute attention weights
247        attention_weights = jax.nn.softmax(scores, axis=-1)
248
249        # Apply attention to values
250        context = jnp.matmul(jnp.expand_dims(attention_weights, axis=1), value)
251        context = jnp.squeeze(context, axis=1)
252
253        return context, attention_weights
254
255    @partial(jit, static_argnums=(0,))
256    def multi_head_attention(
257        self,
258        query: jnp.ndarray,
259        key: jnp.ndarray,
260        value: jnp.ndarray,
261        params: Dict,
262        mask: Optional[jnp.ndarray] = None,
263    ) -> Tuple[jnp.ndarray, jnp.ndarray]:
264        """
265        Multi-Head Attention
266
267        Args:
268            query: (batch_size, seq_len_q, input_dim)
269            key: (batch_size, seq_len_k, input_dim)
270            value: (batch_size, seq_len_v, input_dim)
271            params: Parameter dictionary
272            mask: Optional mask
273
274        Returns:
275            output: Multi-head attention output
276            attention_weights: Attention weights from all heads
277        """
278        batch_size = query.shape[0]
279
280        # Project and reshape for multi-head attention
281        Q = jnp.dot(query, params["mha_query_w"]) + params["mha_query_b"]
282        K = jnp.dot(key, params["mha_key_w"]) + params["mha_key_b"]
283        V = jnp.dot(value, params["mha_value_w"]) + params["mha_value_b"]
284
285        Q = Q.reshape(batch_size, -1, self.num_heads, self.head_dim)
286        K = K.reshape(batch_size, -1, self.num_heads, self.head_dim)
287        V = V.reshape(batch_size, -1, self.num_heads, self.head_dim)
288
289        # Transpose for attention: (batch, num_heads, seq_len, head_dim)
290        Q = jnp.transpose(Q, (0, 2, 1, 3))
291        K = jnp.transpose(K, (0, 2, 1, 3))
292        V = jnp.transpose(V, (0, 2, 1, 3))
293
294        # Compute attention scores
295        scores = jnp.matmul(Q, jnp.swapaxes(K, -2, -1)) / jnp.sqrt(
296            self.head_dim
297        )
298
299        # Apply mask if provided
300        if mask is not None:
301            mask_expanded = jnp.expand_dims(jnp.expand_dims(mask, 1), 2)
302            scores = jnp.where(mask_expanded == 0, -1e9, scores)
303
304        # Attention weights
305        attention_weights = jax.nn.softmax(scores, axis=-1)
306
307        # Apply attention to values
308        context = jnp.matmul(attention_weights, V)
309
310        # Reshape back: (batch, seq_len, hidden_dim)
311        context = jnp.transpose(context, (0, 2, 1, 3))
312        context = context.reshape(batch_size, -1, self.hidden_dim)
313
314        # Final linear projection
315        output = (
316            jnp.dot(context, params["mha_output_w"]) + params["mha_output_b"]
317        )
318
319        return output, attention_weights
320
321    @partial(jit, static_argnums=(0,))
322    def self_attention(
323        self, x: jnp.ndarray, params: Dict, mask: Optional[jnp.ndarray] = None
324    ) -> Tuple[jnp.ndarray, jnp.ndarray]:
325        """Self-Attention mechanism"""
326        return self.scaled_dot_product_attention(x, x, x, params, mask)
327
328    @partial(jit, static_argnums=(0,))
329    def temporal_attention(
330        self, x: jnp.ndarray, params: Dict, mask: Optional[jnp.ndarray] = None
331    ) -> Tuple[jnp.ndarray, jnp.ndarray]:
332        """
333        Temporal Attention for time series data
334
335        Args:
336            x: (batch_size, seq_len, input_dim)
337            params: Parameter dictionary
338            mask: Optional mask
339
340        Returns:
341            context: Temporally attended context
342            attention_weights: Temporal attention weights
343        """
344        # Use last time step as query
345        query = x[:, -1:, :]
346
347        Q = (
348            jnp.dot(query, params["temporal_query_w"])
349            + params["temporal_query_b"]
350        )
351        K = jnp.dot(x, params["temporal_key_w"]) + params["temporal_key_b"]
352
353        # Compute attention scores
354        scores = jnp.matmul(Q, jnp.swapaxes(K, -2, -1)) / jnp.sqrt(
355            self.hidden_dim
356        )
357        scores = jnp.squeeze(scores, axis=1)
358
359        # Apply mask if provided
360        if mask is not None:
361            scores = jnp.where(mask == 0, -1e9, scores)
362
363        # Attention weights
364        attention_weights = jax.nn.softmax(scores, axis=-1)
365
366        # Apply attention
367        context = jnp.matmul(jnp.expand_dims(attention_weights, axis=1), x)
368        context = jnp.squeeze(context, axis=1)
369
370        return context, attention_weights
371
372    @partial(jit, static_argnums=(0,))
373    def feature_attention_tabular(
374        self, x: jnp.ndarray, params: Dict
375    ) -> Tuple[jnp.ndarray, jnp.ndarray]:
376        """
377        Feature Attention for tabular data
378
379        Args:
380            x: (batch_size, num_features)
381            params: Parameter dictionary
382
383        Returns:
384            output: Feature-weighted output
385            attention_weights: Feature importance weights
386        """
387        # Compute feature attention weights
388        hidden = jnp.dot(x, params["feature_w1"]) + params["feature_b1"]
389        hidden = jnp.tanh(hidden)
390        logits = jnp.dot(hidden, params["feature_w2"]) + params["feature_b2"]
391        attention_weights = jax.nn.softmax(logits, axis=-1)
392
393        # Apply attention to features
394        output = x * attention_weights
395
396        return output, attention_weights
397
398    @partial(jit, static_argnums=(0,))
399    def context_vector_attention(
400        self, x: jnp.ndarray, params: Dict, mask: Optional[jnp.ndarray] = None
401    ) -> Tuple[jnp.ndarray, jnp.ndarray]:
402        """
403        Context Vector Attention
404        Uses a learnable global context vector as the query.
405
406        Args:
407            x: (batch_size, seq_len, input_dim)
408            params: Parameter dictionary
409            mask: Optional mask (batch_size, seq_len)
410
411        Returns:
412            context: Global context representation (batch_size, hidden_dim)
413            attention_weights: Attention weights (batch_size, seq_len)
414        """
415        batch_size = x.shape[0]
416
417        # Expand context vector for batch
418        context_vec = jnp.broadcast_to(
419            params["context_vector"], (batch_size, 1, self.hidden_dim)
420        )
421
422        # Project context vector and input
423        Q = (
424            jnp.dot(context_vec, params["context_query_w"])
425            + params["context_query_b"]
426        )
427        K = jnp.dot(x, params["context_key_w"]) + params["context_key_b"]
428        V = jnp.dot(x, params["context_value_w"]) + params["context_value_b"]
429
430        # Compute attention scores
431        scores = jnp.matmul(Q, jnp.swapaxes(K, -2, -1)) / jnp.sqrt(
432            self.hidden_dim
433        )
434        scores = jnp.squeeze(scores, axis=1)
435
436        # Apply mask if provided
437        if mask is not None:
438            scores = jnp.where(mask == 0, -1e9, scores)
439
440        # Compute attention weights
441        attention_weights = jax.nn.softmax(scores, axis=-1)
442
443        # Apply attention to values
444        context = jnp.matmul(jnp.expand_dims(attention_weights, axis=1), V)
445        context = jnp.squeeze(context, axis=1)
446
447        return context, attention_weights
448
449    @partial(jit, static_argnums=(0,))
450    def cross_attention(
451        self,
452        query: jnp.ndarray,
453        key_value: jnp.ndarray,
454        params: Dict,
455        mask: Optional[jnp.ndarray] = None,
456    ) -> Tuple[jnp.ndarray, jnp.ndarray]:
457        """Cross-Attention between two different sequences"""
458        return self.scaled_dot_product_attention(
459            query, key_value, key_value, params, mask
460        )
461
462    def __call__(
463        self,
464        x: jnp.ndarray,
465        attention_type: str = "scaled_dot_product",
466        query: Optional[jnp.ndarray] = None,
467        key_value: Optional[jnp.ndarray] = None,
468        mask: Optional[jnp.ndarray] = None,
469        training: bool = False,
470    ) -> Tuple[jnp.ndarray, jnp.ndarray]:
471        """
472        Forward pass with specified attention mechanism
473
474        Args:
475            x: Input tensor
476            attention_type: Type of attention to use
477            query: Optional query for cross-attention
478            key_value: Optional key-value for cross-attention
479            mask: Optional mask
480            training: Whether in training mode
481
482        Returns:
483            output: Attention output
484            attention_weights: Attention weights
485        """
486        if attention_type == "scaled_dot_product":
487            return self.scaled_dot_product_attention(
488                x, x, x, self.params, mask, training
489            )
490        elif attention_type == "additive":
491            return self.additive_attention(
492                x[:, -1:, :], x, x, self.params, mask
493            )
494        elif attention_type == "multi_head":
495            return self.multi_head_attention(x, x, x, self.params, mask)
496        elif attention_type == "self":
497            return self.self_attention(x, self.params, mask)
498        elif attention_type == "temporal":
499            return self.temporal_attention(x, self.params, mask)
500        elif attention_type == "feature":
501            return self.feature_attention_tabular(x, self.params)
502        elif attention_type == "cross":
503            if query is None or key_value is None:
504                raise ValueError(
505                    "Cross-attention requires both query and key_value"
506                )
507            return self.cross_attention(query, key_value, self.params, mask)
508        elif attention_type == "context_vector":
509            return self.context_vector_attention(x, self.params, mask)
510        else:
511            raise ValueError(f"Unknown attention type: {attention_type}")

A comprehensive class implementing various attention mechanisms for both univariate time series and tabular data using JAX.

Supported attention types:

  • Scaled Dot-Product Attention
  • Additive (Bahdanau) Attention
  • Multi-Head Attention
  • Self-Attention
  • Temporal Attention (for sequences)
  • Feature Attention (for tabular data)
  • Cross-Attention
  • Context Vector Attention
class Base(sklearn.base.BaseEstimator):
  48class Base(BaseEstimator):
  49    """Base model from which all the other classes inherit.
  50
  51    This class contains the most important data preprocessing/feature engineering methods.
  52
  53    Parameters:
  54
  55        n_hidden_features: int
  56            number of nodes in the hidden layer
  57
  58        activation_name: str
  59            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
  60
  61        a: float
  62            hyperparameter for 'prelu' or 'elu' activation function
  63
  64        nodes_sim: str
  65            type of simulation for hidden layer nodes: 'sobol', 'hammersley', 'halton',
  66            'uniform'
  67
  68        bias: boolean
  69            indicates if the hidden layer contains a bias term (True) or
  70            not (False)
  71
  72        dropout: float
  73            regularization parameter; (random) percentage of nodes dropped out
  74            of the training
  75
  76        direct_link: boolean
  77            indicates if the original features are included (True) in model's
  78            fitting or not (False)
  79
  80        n_clusters: int
  81            number of clusters for type_clust='kmeans' or type_clust='gmm'
  82            clustering (could be 0: no clustering)
  83
  84        cluster_encode: bool
  85            defines how the variable containing clusters is treated (default is one-hot);
  86            if `False`, then labels are used, without one-hot encoding
  87
  88        type_clust: str
  89            type of clustering method: currently k-means ('kmeans') or Gaussian
  90            Mixture Model ('gmm')
  91
  92        type_scaling: a tuple of 3 strings
  93            scaling methods for inputs, hidden layer, and clustering respectively
  94            (and when relevant).
  95            Currently available: standardization ('std') or MinMax scaling ('minmax') or robust scaling ('robust') or  max absolute scaling ('maxabs')
  96
  97        col_sample: float
  98            percentage of features randomly chosen for training
  99
 100        row_sample: float
 101            percentage of rows chosen for training, by stratified bootstrapping
 102
 103        seed: int
 104            reproducibility seed for nodes_sim=='uniform', clustering and dropout
 105
 106        backend: str
 107            "cpu" or "gpu" or "tpu"
 108
 109    """
 110
 111    # construct the object -----
 112
 113    def __init__(
 114        self,
 115        n_hidden_features=5,
 116        activation_name="relu",
 117        a=0.01,
 118        nodes_sim="sobol",
 119        bias=True,
 120        dropout=0,
 121        direct_link=True,
 122        n_clusters=2,
 123        cluster_encode=True,
 124        type_clust="kmeans",
 125        type_scaling=("std", "std", "std"),
 126        col_sample=1,
 127        row_sample=1,
 128        seed=123,
 129        backend="cpu",
 130    ):
 131        # input checks -----
 132
 133        sys_platform = platform.system()
 134
 135        if (sys_platform == "Windows") and (backend in ("gpu", "tpu")):
 136            warnings.warn(
 137                "No GPU/TPU computing on Windows yet, backend set to 'cpu'"
 138            )
 139            backend = "cpu"
 140
 141        assert activation_name in (
 142            "relu",
 143            "tanh",
 144            "sigmoid",
 145            "prelu",
 146            "elu",
 147        ), "'activation_name' must be in ('relu', 'tanh', 'sigmoid','prelu', 'elu')"
 148
 149        assert nodes_sim in (
 150            "sobol",
 151            "hammersley",
 152            "uniform",
 153            "halton",
 154        ), "'nodes_sim' must be in ('sobol', 'hammersley', 'uniform', 'halton')"
 155
 156        assert type_clust in (
 157            "kmeans",
 158            "gmm",
 159        ), "'type_clust' must be in ('kmeans', 'gmm')"
 160
 161        assert (len(type_scaling) == 3) & all(
 162            type_scaling[i] in ("minmax", "std", "robust", "maxabs")
 163            for i in range(len(type_scaling))
 164        ), "'type_scaling' must have length 3, and available scaling methods are 'minmax' scaling, standardization ('std'), robust scaling ('robust') and max absolute ('maxabs')"
 165
 166        assert (col_sample >= 0) & (
 167            col_sample <= 1
 168        ), "'col_sample' must be comprised between 0 and 1 (both included)"
 169
 170        assert backend in (
 171            "cpu",
 172            "gpu",
 173            "tpu",
 174        ), "must have 'backend' in ('cpu', 'gpu', 'tpu')"
 175
 176        self.n_hidden_features = n_hidden_features
 177        self.activation_name = activation_name
 178        self.a = a
 179        self.nodes_sim = nodes_sim
 180        self.bias = bias
 181        self.seed = seed
 182        self.backend = backend
 183        self.dropout = dropout
 184        self.direct_link = direct_link
 185        self.cluster_encode = cluster_encode
 186        self.type_clust = type_clust
 187        self.type_scaling = type_scaling
 188        self.col_sample = col_sample
 189        self.row_sample = row_sample
 190        self.n_clusters = n_clusters
 191        if isinstance(self, RegressorMixin):
 192            self.type_fit = "regression"
 193        elif isinstance(self, ClassifierMixin):
 194            self.type_fit = "classification"
 195        self.subsampler_ = None
 196        self.index_col_ = None
 197        self.index_row_ = True
 198        self.clustering_obj_ = None
 199        self.clustering_scaler_ = None
 200        self.nn_scaler_ = None
 201        self.scaler_ = None
 202        self.encoder_ = None
 203        self.W_ = None
 204        self.X_ = None
 205        self.y_ = None
 206        self.y_mean_ = None
 207        self.beta_ = None
 208
 209        # activation function -----
 210        if sys_platform in ("Linux", "Darwin"):
 211            activation_options = {
 212                "relu": ac.relu if (self.backend == "cpu") else jnn.relu,
 213                "tanh": np.tanh if (self.backend == "cpu") else jnp.tanh,
 214                "sigmoid": (
 215                    ac.sigmoid if (self.backend == "cpu") else jnn.sigmoid
 216                ),
 217                "prelu": partial(ac.prelu, a=a),
 218                "elu": (
 219                    partial(ac.elu, a=a)
 220                    if (self.backend == "cpu")
 221                    else partial(jnn.elu, a=a)
 222                ),
 223            }
 224        else:  # on Windows currently, no JAX
 225            activation_options = {
 226                "relu": (
 227                    ac.relu if (self.backend == "cpu") else NotImplementedError
 228                ),
 229                "tanh": (
 230                    np.tanh if (self.backend == "cpu") else NotImplementedError
 231                ),
 232                "sigmoid": (
 233                    ac.sigmoid
 234                    if (self.backend == "cpu")
 235                    else NotImplementedError
 236                ),
 237                "prelu": partial(ac.prelu, a=a),
 238                "elu": (
 239                    partial(ac.elu, a=a)
 240                    if (self.backend == "cpu")
 241                    else NotImplementedError
 242                ),
 243            }
 244        self.activation_func = activation_options[activation_name]
 245
 246    # "preprocessing" methods to be inherited -----
 247
 248    def encode_clusters(self, X=None, predict=False, scaler=None, **kwargs):  #
 249        """Create new covariates with kmeans or GMM clustering
 250
 251        Parameters:
 252
 253            X: {array-like}, shape = [n_samples, n_features]
 254                Training vectors, where n_samples is the number
 255                of samples and n_features is the number of features.
 256
 257            predict: boolean
 258                is False on training set and True on test set
 259
 260            scaler: {object} of class StandardScaler, MinMaxScaler, RobustScaler or MaxAbsScaler
 261                if scaler has already been fitted on training data (online training), it can be passed here
 262
 263            **kwargs:
 264                additional parameters to be passed to the
 265                clustering method
 266
 267        Returns:
 268
 269            Clusters' matrix, one-hot encoded: {array-like}
 270
 271        """
 272
 273        np.random.seed(self.seed)
 274
 275        if X is None:
 276            X = self.X_
 277
 278        if isinstance(X, pd.DataFrame):
 279            X = copy.deepcopy(X.values.astype(float))
 280
 281        if len(X.shape) == 1:
 282            X = X.reshape(1, -1)
 283
 284        if predict is False:  # encode training set
 285            # scale input data before clustering
 286            self.clustering_scaler_, scaled_X = mo.scale_covariates(
 287                X, choice=self.type_scaling[2], scaler=self.clustering_scaler_
 288            )
 289
 290            self.clustering_obj_, X_clustered = mo.cluster_covariates(
 291                scaled_X,
 292                self.n_clusters,
 293                self.seed,
 294                type_clust=self.type_clust,
 295                **kwargs
 296            )
 297
 298            if self.cluster_encode:
 299                return mo.one_hot_encode(X_clustered, self.n_clusters).astype(
 300                    np.float16
 301                )
 302
 303            return X_clustered.astype(np.float16)
 304
 305        # if predict == True, encode test set
 306        X_clustered = self.clustering_obj_.predict(
 307            self.clustering_scaler_.transform(X)
 308        )
 309
 310        if self.cluster_encode == True:
 311            return mo.one_hot_encode(X_clustered, self.n_clusters).astype(
 312                np.float16
 313            )
 314
 315        return X_clustered.astype(np.float16)
 316
 317    def create_layer(self, scaled_X, W=None):
 318        """Create hidden layer.
 319
 320        Parameters:
 321
 322            scaled_X: {array-like}, shape = [n_samples, n_features]
 323                Training vectors, where n_samples is the number
 324                of samples and n_features is the number of features
 325
 326            W: {array-like}, shape = [n_features, hidden_features]
 327                if provided, constructs the hidden layer with W; otherwise computed internally
 328
 329        Returns:
 330
 331            Hidden layer matrix: {array-like}
 332
 333        """
 334
 335        n_features = scaled_X.shape[1]
 336
 337        # hash_sim = {
 338        #         "sobol": generate_sobol,
 339        #         "hammersley": generate_hammersley,
 340        #         "uniform": generate_uniform,
 341        #         "halton": generate_halton
 342        #     }
 343
 344        if self.bias is False:  # no bias term in the hidden layer
 345            if W is None:
 346                if self.nodes_sim == "sobol":
 347                    self.W_ = generate_sobol(
 348                        n_dims=n_features,
 349                        n_points=self.n_hidden_features,
 350                        seed=self.seed,
 351                    )
 352                elif self.nodes_sim == "hammersley":
 353                    self.W_ = generate_hammersley(
 354                        n_dims=n_features,
 355                        n_points=self.n_hidden_features,
 356                        seed=self.seed,
 357                    )
 358                elif self.nodes_sim == "uniform":
 359                    self.W_ = generate_uniform(
 360                        n_dims=n_features,
 361                        n_points=self.n_hidden_features,
 362                        seed=self.seed,
 363                    )
 364                else:
 365                    self.W_ = generate_halton(
 366                        n_dims=n_features,
 367                        n_points=self.n_hidden_features,
 368                        seed=self.seed,
 369                    )
 370
 371                assert (
 372                    scaled_X.shape[1] == self.W_.shape[0]
 373                ), "check dimensions of covariates X and matrix W"
 374
 375                return mo.dropout(
 376                    x=self.activation_func(
 377                        mo.safe_sparse_dot(
 378                            a=scaled_X, b=self.W_, backend=self.backend
 379                        )
 380                    ),
 381                    drop_prob=self.dropout,
 382                    seed=self.seed,
 383                )
 384
 385            # W is not none
 386            assert (
 387                scaled_X.shape[1] == W.shape[0]
 388            ), "check dimensions of covariates X and matrix W"
 389
 390            # self.W_ = W
 391            return mo.dropout(
 392                x=self.activation_func(
 393                    mo.safe_sparse_dot(a=scaled_X, b=W, backend=self.backend)
 394                ),
 395                drop_prob=self.dropout,
 396                seed=self.seed,
 397            )
 398
 399        # with bias term in the hidden layer
 400        if W is None:
 401            n_features_1 = n_features + 1
 402
 403            if self.nodes_sim == "sobol":
 404                self.W_ = generate_sobol(
 405                    n_dims=n_features_1,
 406                    n_points=self.n_hidden_features,
 407                    seed=self.seed,
 408                )
 409            elif self.nodes_sim == "hammersley":
 410                self.W_ = generate_hammersley(
 411                    n_dims=n_features_1,
 412                    n_points=self.n_hidden_features,
 413                    seed=self.seed,
 414                )
 415            elif self.nodes_sim == "uniform":
 416                self.W_ = generate_uniform(
 417                    n_dims=n_features_1,
 418                    n_points=self.n_hidden_features,
 419                    seed=self.seed,
 420                )
 421            else:
 422                self.W_ = generate_halton(
 423                    n_dims=n_features_1,
 424                    n_points=self.n_hidden_features,
 425                    seed=self.seed,
 426                )
 427
 428            # self.W_ = hash_sim[self.nodes_sim](
 429            #         n_dims=n_features_1,
 430            #         n_points=self.n_hidden_features,
 431            #         seed=self.seed,
 432            #     )
 433
 434            return mo.dropout(
 435                x=self.activation_func(
 436                    mo.safe_sparse_dot(
 437                        a=mo.cbind(
 438                            np.ones(scaled_X.shape[0]),
 439                            scaled_X,
 440                            backend=self.backend,
 441                        ),
 442                        b=self.W_,
 443                        backend=self.backend,
 444                    )
 445                ),
 446                drop_prob=self.dropout,
 447                seed=self.seed,
 448            )
 449
 450        # W is not None
 451        # self.W_ = W
 452        return mo.dropout(
 453            x=self.activation_func(
 454                mo.safe_sparse_dot(
 455                    a=mo.cbind(
 456                        np.ones(scaled_X.shape[0]),
 457                        scaled_X,
 458                        backend=self.backend,
 459                    ),
 460                    b=W,
 461                    backend=self.backend,
 462                )
 463            ),
 464            drop_prob=self.dropout,
 465            seed=self.seed,
 466        )
 467
 468    def _jax_create_layer(
 469        self, scaled_X: jnp.ndarray, W: Optional[jnp.ndarray] = None
 470    ) -> jnp.ndarray:
 471        """JAX-compatible version of create_layer that exactly matches the original functionality."""
 472        key = jax.random.PRNGKey(self.seed)
 473        n_features = scaled_X.shape[1]
 474
 475        # Generate weights if not provided
 476        if W is None:
 477            if self.bias:
 478                n_features_1 = n_features + 1
 479                shape = (n_features_1, self.n_hidden_features)
 480            else:
 481                shape = (n_features, self.n_hidden_features)
 482
 483            # JAX-compatible weight generation matching original behavior
 484            if self.nodes_sim == "sobol":
 485                W_np = generate_sobol(
 486                    n_dims=n_features_1,
 487                    n_points=self.n_hidden_features,
 488                    seed=self.seed,
 489                )
 490                W = jnp.asarray(W_np)
 491            elif self.nodes_sim == "hammersley":
 492                W_np = generate_hammersley(
 493                    n_dims=n_features_1,
 494                    n_points=self.n_hidden_features,
 495                    seed=self.seed,
 496                )
 497                W = jnp.asarray(W_np)
 498            elif self.nodes_sim == "uniform":
 499                key, subkey = jax.random.split(key)
 500                W = jax.random.uniform(
 501                    subkey, shape=shape, minval=-1.0, maxval=1.0
 502                )
 503            else:  # halton
 504                W_np = generate_halton(
 505                    n_dims=n_features_1,
 506                    n_points=self.n_hidden_features,
 507                    seed=self.seed,
 508                )
 509                W = jnp.asarray(W_np)
 510
 511            self.W_ = np.array(W)  # Store as numpy for original methods
 512
 513        # Prepare input with bias if needed
 514        if self.bias:
 515            X_with_bias = jnp.hstack(
 516                [jnp.ones((scaled_X.shape[0], 1)), scaled_X]
 517            )
 518            print("X_with_bias shape:", X_with_bias.shape)
 519            print("W shape:", W.shape)
 520            linear_output = jnp.dot(X_with_bias, W)
 521        else:
 522            linear_output = jnp.dot(scaled_X, W)
 523
 524        # Apply activation function
 525        if self.activation_name == "relu":
 526            activated = jax.nn.relu(linear_output)
 527        elif self.activation_name == "tanh":
 528            activated = jnp.tanh(linear_output)
 529        elif self.activation_name == "sigmoid":
 530            activated = jax.nn.sigmoid(linear_output)
 531        else:  # leaky relu
 532            activated = jax.nn.leaky_relu(linear_output, negative_slope=self.a)
 533
 534        # Apply dropout
 535        if self.dropout > 0:
 536            key, subkey = jax.random.split(key)
 537            mask = jax.random.bernoulli(
 538                subkey, p=1 - self.dropout, shape=activated.shape
 539            )
 540            activated = jnp.where(mask, activated / (1 - self.dropout), 0)
 541
 542        return activated
 543
 544    def cook_training_set(self, y=None, X=None, W=None, **kwargs):
 545        """Create new hidden features for training set, with hidden layer, center the response.
 546
 547        Parameters:
 548
 549            y: array-like, shape = [n_samples]
 550                Target values
 551
 552            X: {array-like}, shape = [n_samples, n_features]
 553                Training vectors, where n_samples is the number
 554                of samples and n_features is the number of features
 555
 556            W: {array-like}, shape = [n_features, hidden_features]
 557                if provided, constructs the hidden layer via W
 558
 559        Returns:
 560
 561            (centered response, direct link + hidden layer matrix): {tuple}
 562
 563        """
 564
 565        # either X and y are stored or not
 566        # assert ((y is None) & (X is None)) | ((y is not None) & (X is not None))
 567        if self.n_hidden_features > 0:  # has a hidden layer
 568            assert (
 569                len(self.type_scaling) >= 2
 570            ), "must have len(self.type_scaling) >= 2 when self.n_hidden_features > 0"
 571
 572        if X is None:
 573            if self.col_sample == 1:
 574                input_X = self.X_
 575            else:
 576                n_features = self.X_.shape[1]
 577                new_n_features = int(np.ceil(n_features * self.col_sample))
 578                assert (
 579                    new_n_features >= 1
 580                ), "check class attribute 'col_sample' and the number of covariates provided for X"
 581                np.random.seed(self.seed)
 582                index_col = np.random.choice(
 583                    range(n_features), size=new_n_features, replace=False
 584                )
 585                self.index_col_ = index_col
 586                input_X = self.X_[:, self.index_col_]
 587
 588        else:  # X is not None # keep X vs self.X_
 589            if isinstance(X, pd.DataFrame):
 590                X = copy.deepcopy(X.values.astype(float))
 591
 592            if self.col_sample == 1:
 593                input_X = X
 594            else:
 595                n_features = X.shape[1]
 596                new_n_features = int(np.ceil(n_features * self.col_sample))
 597                assert (
 598                    new_n_features >= 1
 599                ), "check class attribute 'col_sample' and the number of covariates provided for X"
 600                np.random.seed(self.seed)
 601                index_col = np.random.choice(
 602                    range(n_features), size=new_n_features, replace=False
 603                )
 604                self.index_col_ = index_col
 605                input_X = X[:, self.index_col_]
 606
 607        if self.n_clusters <= 0:
 608            # data without any clustering: self.n_clusters is None -----
 609
 610            if self.n_hidden_features > 0:  # with hidden layer
 611                self.nn_scaler_, scaled_X = mo.scale_covariates(
 612                    input_X, choice=self.type_scaling[1], scaler=self.nn_scaler_
 613                )
 614                Phi_X = (
 615                    self.create_layer(scaled_X)
 616                    if W is None
 617                    else self.create_layer(scaled_X, W=W)
 618                )
 619                Z = (
 620                    mo.cbind(input_X, Phi_X, backend=self.backend)
 621                    if self.direct_link is True
 622                    else Phi_X
 623                )
 624                self.scaler_, scaled_Z = mo.scale_covariates(
 625                    Z, choice=self.type_scaling[0], scaler=self.scaler_
 626                )
 627            else:  # no hidden layer
 628                Z = input_X
 629                self.scaler_, scaled_Z = mo.scale_covariates(
 630                    Z, choice=self.type_scaling[0], scaler=self.scaler_
 631                )
 632
 633        else:
 634            # data with clustering: self.n_clusters is not None ----- # keep
 635
 636            augmented_X = mo.cbind(
 637                input_X,
 638                self.encode_clusters(input_X, **kwargs),
 639                backend=self.backend,
 640            )
 641
 642            if self.n_hidden_features > 0:  # with hidden layer
 643                self.nn_scaler_, scaled_X = mo.scale_covariates(
 644                    augmented_X,
 645                    choice=self.type_scaling[1],
 646                    scaler=self.nn_scaler_,
 647                )
 648                Phi_X = (
 649                    self.create_layer(scaled_X)
 650                    if W is None
 651                    else self.create_layer(scaled_X, W=W)
 652                )
 653                Z = (
 654                    mo.cbind(augmented_X, Phi_X, backend=self.backend)
 655                    if self.direct_link is True
 656                    else Phi_X
 657                )
 658                self.scaler_, scaled_Z = mo.scale_covariates(
 659                    Z, choice=self.type_scaling[0], scaler=self.scaler_
 660                )
 661            else:  # no hidden layer
 662                Z = augmented_X
 663                self.scaler_, scaled_Z = mo.scale_covariates(
 664                    Z, choice=self.type_scaling[0], scaler=self.scaler_
 665                )
 666
 667        # Returning model inputs -----
 668        if mx.is_factor(y) is False:  # regression
 669            # center y
 670            if y is None:
 671                self.y_mean_, centered_y = mo.center_response(self.y_)
 672            else:
 673                self.y_mean_, centered_y = mo.center_response(y)
 674
 675            # y is subsampled
 676            if self.row_sample < 1:
 677                n, p = Z.shape
 678
 679                self.subsampler_ = (
 680                    SubSampler(
 681                        y=self.y_, row_sample=self.row_sample, seed=self.seed
 682                    )
 683                    if y is None
 684                    else SubSampler(
 685                        y=y, row_sample=self.row_sample, seed=self.seed
 686                    )
 687                )
 688
 689                self.index_row_ = self.subsampler_.subsample()
 690
 691                n_row_sample = len(self.index_row_)
 692                # regression
 693                return (
 694                    centered_y[self.index_row_].reshape(n_row_sample),
 695                    self.scaler_.transform(
 696                        Z[self.index_row_, :].reshape(n_row_sample, p)
 697                    ),
 698                )
 699            # y is not subsampled
 700            # regression
 701            return (centered_y, self.scaler_.transform(Z))
 702
 703        # classification
 704        # y is subsampled
 705        if self.row_sample < 1:
 706            n, p = Z.shape
 707
 708            self.subsampler_ = (
 709                SubSampler(
 710                    y=self.y_, row_sample=self.row_sample, seed=self.seed
 711                )
 712                if y is None
 713                else SubSampler(y=y, row_sample=self.row_sample, seed=self.seed)
 714            )
 715
 716            self.index_row_ = self.subsampler_.subsample()
 717
 718            n_row_sample = len(self.index_row_)
 719            # classification
 720            return (
 721                y[self.index_row_].reshape(n_row_sample),
 722                self.scaler_.transform(
 723                    Z[self.index_row_, :].reshape(n_row_sample, p)
 724                ),
 725            )
 726        # y is not subsampled
 727        # classification
 728        return (y, self.scaler_.transform(Z))
 729
 730    def cook_test_set(self, X, **kwargs):
 731        """Transform data from test set, with hidden layer.
 732
 733        Parameters:
 734
 735            X: {array-like}, shape = [n_samples, n_features]
 736                Training vectors, where n_samples is the number
 737                of samples and n_features is the number of features
 738
 739            **kwargs: additional parameters to be passed to self.encode_cluster
 740
 741        Returns:
 742
 743            Transformed test set : {array-like}
 744        """
 745
 746        if isinstance(X, pd.DataFrame):
 747            X = copy.deepcopy(X.values.astype(float))
 748
 749        if len(X.shape) == 1:
 750            X = X.reshape(1, -1)
 751
 752        if (
 753            self.n_clusters == 0
 754        ):  # data without clustering: self.n_clusters is None -----
 755            if self.n_hidden_features > 0:
 756                # if hidden layer
 757                scaled_X = (
 758                    self.nn_scaler_.transform(X)
 759                    if (self.col_sample == 1)
 760                    else self.nn_scaler_.transform(X[:, self.index_col_])
 761                )
 762                Phi_X = self.create_layer(scaled_X, self.W_)
 763                if self.direct_link:
 764                    return self.scaler_.transform(
 765                        mo.cbind(scaled_X, Phi_X, backend=self.backend)
 766                    )
 767                # when self.direct_link == False
 768                return self.scaler_.transform(Phi_X)
 769            # if no hidden layer # self.n_hidden_features == 0
 770            return self.scaler_.transform(X)
 771
 772        # data with clustering: self.n_clusters > 0 -----
 773        if self.col_sample == 1:
 774            predicted_clusters = self.encode_clusters(
 775                X=X, predict=True, **kwargs
 776            )
 777            augmented_X = mo.cbind(X, predicted_clusters, backend=self.backend)
 778        else:
 779            predicted_clusters = self.encode_clusters(
 780                X=X[:, self.index_col_], predict=True, **kwargs
 781            )
 782            augmented_X = mo.cbind(
 783                X[:, self.index_col_], predicted_clusters, backend=self.backend
 784            )
 785
 786        if self.n_hidden_features > 0:  # if hidden layer
 787            scaled_X = self.nn_scaler_.transform(augmented_X)
 788            Phi_X = self.create_layer(scaled_X, self.W_)
 789            if self.direct_link:
 790                return self.scaler_.transform(
 791                    mo.cbind(augmented_X, Phi_X, backend=self.backend)
 792                )
 793            return self.scaler_.transform(Phi_X)
 794
 795        # if no hidden layer
 796        return self.scaler_.transform(augmented_X)
 797
 798    def cook_training_set_jax(self, y=None, X=None, W=None, **kwargs):
 799        """JAX-compatible version of cook_training_set that maintains side effects."""
 800        # Initialize random key
 801        key = jax.random.PRNGKey(self.seed)
 802
 803        # Convert inputs to JAX arrays
 804        X = jnp.asarray(X) if X is not None else jnp.asarray(self.X_)
 805        y = jnp.asarray(y) if y is not None else jnp.asarray(self.y_)
 806
 807        # Handle column sampling
 808        if self.col_sample < 1:
 809            n_features = X.shape[1]
 810            new_n_features = int(jnp.ceil(n_features * self.col_sample))
 811            assert new_n_features >= 1, "Invalid col_sample"
 812
 813            key, subkey = jax.random.split(key)
 814            index_col = jax.random.choice(
 815                subkey, n_features, shape=(new_n_features,), replace=False
 816            )
 817            self.index_col_ = np.array(
 818                index_col
 819            )  # Store as numpy for original methods
 820            input_X = X[:, index_col]
 821            n_features = (
 822                new_n_features  # Update n_features after column sampling
 823            )
 824        else:
 825            input_X = X
 826            n_features = X.shape[1]
 827
 828        augmented_X = input_X
 829
 830        # JAX-compatible scaling
 831        def jax_scale(data, mean=None, std=None):
 832            if mean is None:
 833                mean = jnp.mean(data, axis=0)
 834            if std is None:
 835                std = jnp.std(data, axis=0)
 836            return (data - mean) / (std + 1e-10), mean, std
 837
 838        # Hidden layer processing
 839        if self.n_hidden_features > 0:
 840            # Initialize weights if not provided
 841            if W is None:
 842                shape = (n_features, self.n_hidden_features)
 843
 844                # JAX-compatible weight generation
 845                if self.nodes_sim == "uniform":
 846                    key, subkey = jax.random.split(key)
 847                    W = jax.random.uniform(
 848                        subkey, shape=shape, minval=-1.0, maxval=1.0
 849                    ) * (1 / jnp.sqrt(n_features))
 850                else:
 851                    # For other sequences, use numpy generation then convert to JAX
 852                    if self.nodes_sim == "sobol":
 853                        W_np = generate_sobol(
 854                            n_dims=shape[0],
 855                            n_points=shape[1],
 856                            seed=self.seed,
 857                        )
 858                    elif self.nodes_sim == "hammersley":
 859                        W_np = generate_hammersley(
 860                            n_dims=shape[0],
 861                            n_points=shape[1],
 862                            seed=self.seed,
 863                        )
 864                    elif self.nodes_sim == "halton":
 865                        W_np = generate_halton(
 866                            n_dims=shape[0],
 867                            n_points=shape[1],
 868                            seed=self.seed,
 869                        )
 870                    else:  # default to uniform
 871                        key, subkey = jax.random.split(key)
 872                        W = jax.random.uniform(
 873                            subkey, shape=shape, minval=-1.0, maxval=1.0
 874                        ) * (1 / jnp.sqrt(n_features))
 875
 876                    if self.nodes_sim in ["sobol", "hammersley", "halton"]:
 877                        W = jnp.asarray(W_np) * (1 / jnp.sqrt(n_features))
 878
 879                self.W_ = np.array(W)  # Store as numpy for original methods
 880
 881            # Scale features
 882            scaled_X, self.nn_mean_, self.nn_std_ = jax_scale(
 883                augmented_X,
 884                getattr(self, "nn_mean_", None),
 885                getattr(self, "nn_std_", None),
 886            )
 887
 888            # Create hidden layer with proper bias handling
 889            linear_output = jnp.dot(scaled_X, W)
 890
 891            # Apply activation
 892            if self.activation_name == "relu":
 893                Phi_X = jax.nn.relu(linear_output)
 894            elif self.activation_name == "tanh":
 895                Phi_X = jnp.tanh(linear_output)
 896            elif self.activation_name == "sigmoid":
 897                Phi_X = jax.nn.sigmoid(linear_output)
 898            else:  # leaky relu
 899                Phi_X = jax.nn.leaky_relu(linear_output, negative_slope=self.a)
 900
 901            # Apply dropout
 902            if self.dropout > 0:
 903                key, subkey = jax.random.split(key)
 904                mask = jax.random.bernoulli(
 905                    subkey, p=1 - self.dropout, shape=Phi_X.shape
 906                )
 907                Phi_X = jnp.where(mask, Phi_X / (1 - self.dropout), 0)
 908
 909            Z = jnp.hstack([scaled_X, Phi_X]) if self.direct_link else Phi_X
 910        else:
 911            Z = augmented_X
 912
 913        # Final scaling
 914        scaled_Z, self.scale_mean_, self.scale_std_ = jax_scale(
 915            Z,
 916            getattr(self, "scale_mean_", None),
 917            getattr(self, "scale_std_", None),
 918        )
 919
 920        # Center response for regression
 921        if not hasattr(mx, "is_factor") or not mx.is_factor(
 922            y
 923        ):  # regression case
 924            self.y_mean_ = float(
 925                jnp.mean(y)
 926            )  # Convert to Python float for compatibility
 927            centered_y = y - self.y_mean_
 928        else:
 929            centered_y = y
 930
 931        # Handle row sampling
 932        if self.row_sample < 1:
 933            key, subkey = jax.random.split(key)
 934            n_samples = Z.shape[0]
 935            n_row_sample = int(jnp.ceil(n_samples * self.row_sample))
 936            index_row = jax.random.choice(
 937                subkey, n_samples, shape=(n_row_sample,), replace=False
 938            )
 939            self.index_row_ = np.array(
 940                index_row
 941            )  # Store as numpy for original methods
 942            return (centered_y[index_row], scaled_Z[index_row])
 943
 944        return (centered_y, scaled_Z)
 945
 946    def cook_test_set_jax(self, X, **kwargs):
 947        """JAX-compatible test set processing with matching dimension handling."""
 948        X = jnp.asarray(X)
 949
 950        if len(X.shape) == 1:
 951            X = X.reshape(1, -1)
 952
 953        # Handle column sampling
 954        input_X = (
 955            X if self.col_sample == 1 else X[:, jnp.asarray(self.index_col_)]
 956        )
 957
 958        augmented_X = input_X
 959
 960        # JAX-compatible scaling
 961        scaled_X = (augmented_X - self.nn_mean_) / (self.nn_std_ + 1e-10)
 962
 963        # Process hidden layer if needed
 964        if self.n_hidden_features > 0:
 965            Phi_X = self._jax_create_layer(scaled_X, jnp.asarray(self.W_))
 966            Z = jnp.hstack([scaled_X, Phi_X]) if self.direct_link else Phi_X
 967        else:
 968            Z = augmented_X
 969
 970        # Final scaling
 971        scaled_Z = (Z - self.scale_mean_) / (self.scale_std_ + 1e-10)
 972
 973        return scaled_Z
 974
 975    def _jax_create_layer(self, X, W):
 976        """JAX-compatible hidden layer creation."""
 977        # print("X", X.shape)
 978        # print("W", W.shape)
 979        # print("self.W_", self.W_.shape)
 980        linear_output = jnp.dot(X, W)
 981
 982        if self.activation_name == "relu":
 983            return jax.nn.relu(linear_output)
 984        elif self.activation_name == "tanh":
 985            return jnp.tanh(linear_output)
 986        elif self.activation_name == "sigmoid":
 987            return jax.nn.sigmoid(linear_output)
 988        else:  # leaky relu
 989            return jax.nn.leaky_relu(linear_output, negative_slope=self.a)
 990
 991    def cross_val_score(
 992        self,
 993        X,
 994        y,
 995        cv=5,
 996        scoring="accuracy",
 997        random_state=42,
 998        n_jobs=-1,
 999        epsilon=0.5,
1000        penalized=True,
1001        objective="abs",
1002        **kwargs
1003    ):
1004        """
1005        Penalized Cross-validation score for a model.
1006
1007        Parameters:
1008
1009            X: {array-like}, shape = [n_samples, n_features]
1010                Training vectors, where n_samples is the number
1011                of samples and n_features is the number of features
1012
1013            y: array-like, shape = [n_samples]
1014                Target values
1015
1016            X_test: {array-like}, shape = [n_samples, n_features]
1017                Test vectors, where n_samples is the number
1018                of samples and n_features is the number of features
1019
1020            y_test: array-like, shape = [n_samples]
1021                Target values
1022
1023            cv: int
1024                Number of folds
1025
1026            scoring: str
1027                Scoring metric
1028
1029            random_state: int
1030                Random state
1031
1032            n_jobs: int
1033                Number of jobs to run in parallel
1034
1035            epsilon: float
1036                Penalty parameter
1037
1038            penalized: bool
1039                Whether to obtain penalized cross-validation score or not
1040
1041            objective: str
1042                'abs': Minimize the absolute difference between cross-validation score and validation score
1043                'relative': Minimize the relative difference between cross-validation score and validation score
1044        Returns:
1045
1046            A namedtuple with the following fields:
1047                - cv_score: float
1048                    cross-validation score
1049                - val_score: float
1050                    validation score
1051                - penalized_score: float
1052                    penalized cross-validation score: cv_score / val_score + epsilon*(1/val_score + 1/cv_score)
1053                    If higher scoring metric is better, minimize the function result.
1054                    If lower scoring metric is better, maximize the function result.
1055        """
1056        if scoring == "accuracy":
1057            scoring_func = accuracy_score
1058        elif scoring == "balanced_accuracy":
1059            scoring_func = balanced_accuracy_score
1060        elif scoring == "f1":
1061            scoring_func = f1_score
1062        elif scoring == "roc_auc":
1063            scoring_func = roc_auc_score
1064        elif scoring == "r2":
1065            scoring_func = r2_score
1066        elif scoring == "mse":
1067            scoring_func = mean_squared_error
1068        elif scoring == "mae":
1069            scoring_func = mean_absolute_error
1070        elif scoring == "mape":
1071            scoring_func = mean_absolute_percentage_error
1072        elif scoring == "rmse":
1073
1074            def scoring_func(y_true, y_pred):
1075                return np.sqrt(mean_squared_error(y_true, y_pred))
1076
1077        X_train, X_val, y_train, y_val = train_test_split(
1078            X, y, test_size=0.2, random_state=random_state
1079        )
1080
1081        res = cross_val_score(
1082            self, X_train, y_train, cv=cv, scoring=scoring, n_jobs=n_jobs
1083        )  # cross-validation error
1084
1085        if penalized == False:
1086            return res
1087
1088        DescribeResult = namedtuple(
1089            "DescribeResult", ["cv_score", "val_score", "penalized_score"]
1090        )
1091
1092        numerator = res.mean()
1093
1094        # Evaluate on the (cv+1)-th fold
1095        preds_val = self.fit(X_train, y_train).predict(X_val)
1096        try:
1097            denominator = scoring(y_val, preds_val)  # validation error
1098        except Exception as e:
1099            denominator = scoring_func(y_val, preds_val)
1100
1101        # if higher is better
1102        if objective == "abs":
1103            penalized_score = np.abs(numerator - denominator) + epsilon * (
1104                1 / denominator + 1 / numerator
1105            )
1106        elif objective == "relative":
1107            ratio = numerator / denominator
1108            penalized_score = np.abs(ratio - 1) + epsilon * (
1109                1 / denominator + 1 / numerator
1110            )
1111
1112        return DescribeResult(
1113            cv_score=numerator,
1114            val_score=denominator,
1115            penalized_score=penalized_score,
1116        )

Base model from which all the other classes inherit.

This class contains the most important data preprocessing/feature engineering methods.

Parameters:

n_hidden_features: int
    number of nodes in the hidden layer

activation_name: str
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'

a: float
    hyperparameter for 'prelu' or 'elu' activation function

nodes_sim: str
    type of simulation for hidden layer nodes: 'sobol', 'hammersley', 'halton',
    'uniform'

bias: boolean
    indicates if the hidden layer contains a bias term (True) or
    not (False)

dropout: float
    regularization parameter; (random) percentage of nodes dropped out
    of the training

direct_link: boolean
    indicates if the original features are included (True) in model's
    fitting or not (False)

n_clusters: int
    number of clusters for type_clust='kmeans' or type_clust='gmm'
    clustering (could be 0: no clustering)

cluster_encode: bool
    defines how the variable containing clusters is treated (default is one-hot);
    if `False`, then labels are used, without one-hot encoding

type_clust: str
    type of clustering method: currently k-means ('kmeans') or Gaussian
    Mixture Model ('gmm')

type_scaling: a tuple of 3 strings
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax') or robust scaling ('robust') or  max absolute scaling ('maxabs')

col_sample: float
    percentage of features randomly chosen for training

row_sample: float
    percentage of rows chosen for training, by stratified bootstrapping

seed: int
    reproducibility seed for nodes_sim=='uniform', clustering and dropout

backend: str
    "cpu" or "gpu" or "tpu"
def encode_clusters(self, X=None, predict=False, scaler=None, **kwargs):
248    def encode_clusters(self, X=None, predict=False, scaler=None, **kwargs):  #
249        """Create new covariates with kmeans or GMM clustering
250
251        Parameters:
252
253            X: {array-like}, shape = [n_samples, n_features]
254                Training vectors, where n_samples is the number
255                of samples and n_features is the number of features.
256
257            predict: boolean
258                is False on training set and True on test set
259
260            scaler: {object} of class StandardScaler, MinMaxScaler, RobustScaler or MaxAbsScaler
261                if scaler has already been fitted on training data (online training), it can be passed here
262
263            **kwargs:
264                additional parameters to be passed to the
265                clustering method
266
267        Returns:
268
269            Clusters' matrix, one-hot encoded: {array-like}
270
271        """
272
273        np.random.seed(self.seed)
274
275        if X is None:
276            X = self.X_
277
278        if isinstance(X, pd.DataFrame):
279            X = copy.deepcopy(X.values.astype(float))
280
281        if len(X.shape) == 1:
282            X = X.reshape(1, -1)
283
284        if predict is False:  # encode training set
285            # scale input data before clustering
286            self.clustering_scaler_, scaled_X = mo.scale_covariates(
287                X, choice=self.type_scaling[2], scaler=self.clustering_scaler_
288            )
289
290            self.clustering_obj_, X_clustered = mo.cluster_covariates(
291                scaled_X,
292                self.n_clusters,
293                self.seed,
294                type_clust=self.type_clust,
295                **kwargs
296            )
297
298            if self.cluster_encode:
299                return mo.one_hot_encode(X_clustered, self.n_clusters).astype(
300                    np.float16
301                )
302
303            return X_clustered.astype(np.float16)
304
305        # if predict == True, encode test set
306        X_clustered = self.clustering_obj_.predict(
307            self.clustering_scaler_.transform(X)
308        )
309
310        if self.cluster_encode == True:
311            return mo.one_hot_encode(X_clustered, self.n_clusters).astype(
312                np.float16
313            )
314
315        return X_clustered.astype(np.float16)

Create new covariates with kmeans or GMM clustering

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

predict: boolean
    is False on training set and True on test set

scaler: {object} of class StandardScaler, MinMaxScaler, RobustScaler or MaxAbsScaler
    if scaler has already been fitted on training data (online training), it can be passed here

**kwargs:
    additional parameters to be passed to the
    clustering method

Returns:

Clusters' matrix, one-hot encoded: {array-like}
def create_layer(self, scaled_X, W=None):
317    def create_layer(self, scaled_X, W=None):
318        """Create hidden layer.
319
320        Parameters:
321
322            scaled_X: {array-like}, shape = [n_samples, n_features]
323                Training vectors, where n_samples is the number
324                of samples and n_features is the number of features
325
326            W: {array-like}, shape = [n_features, hidden_features]
327                if provided, constructs the hidden layer with W; otherwise computed internally
328
329        Returns:
330
331            Hidden layer matrix: {array-like}
332
333        """
334
335        n_features = scaled_X.shape[1]
336
337        # hash_sim = {
338        #         "sobol": generate_sobol,
339        #         "hammersley": generate_hammersley,
340        #         "uniform": generate_uniform,
341        #         "halton": generate_halton
342        #     }
343
344        if self.bias is False:  # no bias term in the hidden layer
345            if W is None:
346                if self.nodes_sim == "sobol":
347                    self.W_ = generate_sobol(
348                        n_dims=n_features,
349                        n_points=self.n_hidden_features,
350                        seed=self.seed,
351                    )
352                elif self.nodes_sim == "hammersley":
353                    self.W_ = generate_hammersley(
354                        n_dims=n_features,
355                        n_points=self.n_hidden_features,
356                        seed=self.seed,
357                    )
358                elif self.nodes_sim == "uniform":
359                    self.W_ = generate_uniform(
360                        n_dims=n_features,
361                        n_points=self.n_hidden_features,
362                        seed=self.seed,
363                    )
364                else:
365                    self.W_ = generate_halton(
366                        n_dims=n_features,
367                        n_points=self.n_hidden_features,
368                        seed=self.seed,
369                    )
370
371                assert (
372                    scaled_X.shape[1] == self.W_.shape[0]
373                ), "check dimensions of covariates X and matrix W"
374
375                return mo.dropout(
376                    x=self.activation_func(
377                        mo.safe_sparse_dot(
378                            a=scaled_X, b=self.W_, backend=self.backend
379                        )
380                    ),
381                    drop_prob=self.dropout,
382                    seed=self.seed,
383                )
384
385            # W is not none
386            assert (
387                scaled_X.shape[1] == W.shape[0]
388            ), "check dimensions of covariates X and matrix W"
389
390            # self.W_ = W
391            return mo.dropout(
392                x=self.activation_func(
393                    mo.safe_sparse_dot(a=scaled_X, b=W, backend=self.backend)
394                ),
395                drop_prob=self.dropout,
396                seed=self.seed,
397            )
398
399        # with bias term in the hidden layer
400        if W is None:
401            n_features_1 = n_features + 1
402
403            if self.nodes_sim == "sobol":
404                self.W_ = generate_sobol(
405                    n_dims=n_features_1,
406                    n_points=self.n_hidden_features,
407                    seed=self.seed,
408                )
409            elif self.nodes_sim == "hammersley":
410                self.W_ = generate_hammersley(
411                    n_dims=n_features_1,
412                    n_points=self.n_hidden_features,
413                    seed=self.seed,
414                )
415            elif self.nodes_sim == "uniform":
416                self.W_ = generate_uniform(
417                    n_dims=n_features_1,
418                    n_points=self.n_hidden_features,
419                    seed=self.seed,
420                )
421            else:
422                self.W_ = generate_halton(
423                    n_dims=n_features_1,
424                    n_points=self.n_hidden_features,
425                    seed=self.seed,
426                )
427
428            # self.W_ = hash_sim[self.nodes_sim](
429            #         n_dims=n_features_1,
430            #         n_points=self.n_hidden_features,
431            #         seed=self.seed,
432            #     )
433
434            return mo.dropout(
435                x=self.activation_func(
436                    mo.safe_sparse_dot(
437                        a=mo.cbind(
438                            np.ones(scaled_X.shape[0]),
439                            scaled_X,
440                            backend=self.backend,
441                        ),
442                        b=self.W_,
443                        backend=self.backend,
444                    )
445                ),
446                drop_prob=self.dropout,
447                seed=self.seed,
448            )
449
450        # W is not None
451        # self.W_ = W
452        return mo.dropout(
453            x=self.activation_func(
454                mo.safe_sparse_dot(
455                    a=mo.cbind(
456                        np.ones(scaled_X.shape[0]),
457                        scaled_X,
458                        backend=self.backend,
459                    ),
460                    b=W,
461                    backend=self.backend,
462                )
463            ),
464            drop_prob=self.dropout,
465            seed=self.seed,
466        )

Create hidden layer.

Parameters:

scaled_X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features

W: {array-like}, shape = [n_features, hidden_features]
    if provided, constructs the hidden layer with W; otherwise computed internally

Returns:

Hidden layer matrix: {array-like}
def cook_training_set(self, y=None, X=None, W=None, **kwargs):
544    def cook_training_set(self, y=None, X=None, W=None, **kwargs):
545        """Create new hidden features for training set, with hidden layer, center the response.
546
547        Parameters:
548
549            y: array-like, shape = [n_samples]
550                Target values
551
552            X: {array-like}, shape = [n_samples, n_features]
553                Training vectors, where n_samples is the number
554                of samples and n_features is the number of features
555
556            W: {array-like}, shape = [n_features, hidden_features]
557                if provided, constructs the hidden layer via W
558
559        Returns:
560
561            (centered response, direct link + hidden layer matrix): {tuple}
562
563        """
564
565        # either X and y are stored or not
566        # assert ((y is None) & (X is None)) | ((y is not None) & (X is not None))
567        if self.n_hidden_features > 0:  # has a hidden layer
568            assert (
569                len(self.type_scaling) >= 2
570            ), "must have len(self.type_scaling) >= 2 when self.n_hidden_features > 0"
571
572        if X is None:
573            if self.col_sample == 1:
574                input_X = self.X_
575            else:
576                n_features = self.X_.shape[1]
577                new_n_features = int(np.ceil(n_features * self.col_sample))
578                assert (
579                    new_n_features >= 1
580                ), "check class attribute 'col_sample' and the number of covariates provided for X"
581                np.random.seed(self.seed)
582                index_col = np.random.choice(
583                    range(n_features), size=new_n_features, replace=False
584                )
585                self.index_col_ = index_col
586                input_X = self.X_[:, self.index_col_]
587
588        else:  # X is not None # keep X vs self.X_
589            if isinstance(X, pd.DataFrame):
590                X = copy.deepcopy(X.values.astype(float))
591
592            if self.col_sample == 1:
593                input_X = X
594            else:
595                n_features = X.shape[1]
596                new_n_features = int(np.ceil(n_features * self.col_sample))
597                assert (
598                    new_n_features >= 1
599                ), "check class attribute 'col_sample' and the number of covariates provided for X"
600                np.random.seed(self.seed)
601                index_col = np.random.choice(
602                    range(n_features), size=new_n_features, replace=False
603                )
604                self.index_col_ = index_col
605                input_X = X[:, self.index_col_]
606
607        if self.n_clusters <= 0:
608            # data without any clustering: self.n_clusters is None -----
609
610            if self.n_hidden_features > 0:  # with hidden layer
611                self.nn_scaler_, scaled_X = mo.scale_covariates(
612                    input_X, choice=self.type_scaling[1], scaler=self.nn_scaler_
613                )
614                Phi_X = (
615                    self.create_layer(scaled_X)
616                    if W is None
617                    else self.create_layer(scaled_X, W=W)
618                )
619                Z = (
620                    mo.cbind(input_X, Phi_X, backend=self.backend)
621                    if self.direct_link is True
622                    else Phi_X
623                )
624                self.scaler_, scaled_Z = mo.scale_covariates(
625                    Z, choice=self.type_scaling[0], scaler=self.scaler_
626                )
627            else:  # no hidden layer
628                Z = input_X
629                self.scaler_, scaled_Z = mo.scale_covariates(
630                    Z, choice=self.type_scaling[0], scaler=self.scaler_
631                )
632
633        else:
634            # data with clustering: self.n_clusters is not None ----- # keep
635
636            augmented_X = mo.cbind(
637                input_X,
638                self.encode_clusters(input_X, **kwargs),
639                backend=self.backend,
640            )
641
642            if self.n_hidden_features > 0:  # with hidden layer
643                self.nn_scaler_, scaled_X = mo.scale_covariates(
644                    augmented_X,
645                    choice=self.type_scaling[1],
646                    scaler=self.nn_scaler_,
647                )
648                Phi_X = (
649                    self.create_layer(scaled_X)
650                    if W is None
651                    else self.create_layer(scaled_X, W=W)
652                )
653                Z = (
654                    mo.cbind(augmented_X, Phi_X, backend=self.backend)
655                    if self.direct_link is True
656                    else Phi_X
657                )
658                self.scaler_, scaled_Z = mo.scale_covariates(
659                    Z, choice=self.type_scaling[0], scaler=self.scaler_
660                )
661            else:  # no hidden layer
662                Z = augmented_X
663                self.scaler_, scaled_Z = mo.scale_covariates(
664                    Z, choice=self.type_scaling[0], scaler=self.scaler_
665                )
666
667        # Returning model inputs -----
668        if mx.is_factor(y) is False:  # regression
669            # center y
670            if y is None:
671                self.y_mean_, centered_y = mo.center_response(self.y_)
672            else:
673                self.y_mean_, centered_y = mo.center_response(y)
674
675            # y is subsampled
676            if self.row_sample < 1:
677                n, p = Z.shape
678
679                self.subsampler_ = (
680                    SubSampler(
681                        y=self.y_, row_sample=self.row_sample, seed=self.seed
682                    )
683                    if y is None
684                    else SubSampler(
685                        y=y, row_sample=self.row_sample, seed=self.seed
686                    )
687                )
688
689                self.index_row_ = self.subsampler_.subsample()
690
691                n_row_sample = len(self.index_row_)
692                # regression
693                return (
694                    centered_y[self.index_row_].reshape(n_row_sample),
695                    self.scaler_.transform(
696                        Z[self.index_row_, :].reshape(n_row_sample, p)
697                    ),
698                )
699            # y is not subsampled
700            # regression
701            return (centered_y, self.scaler_.transform(Z))
702
703        # classification
704        # y is subsampled
705        if self.row_sample < 1:
706            n, p = Z.shape
707
708            self.subsampler_ = (
709                SubSampler(
710                    y=self.y_, row_sample=self.row_sample, seed=self.seed
711                )
712                if y is None
713                else SubSampler(y=y, row_sample=self.row_sample, seed=self.seed)
714            )
715
716            self.index_row_ = self.subsampler_.subsample()
717
718            n_row_sample = len(self.index_row_)
719            # classification
720            return (
721                y[self.index_row_].reshape(n_row_sample),
722                self.scaler_.transform(
723                    Z[self.index_row_, :].reshape(n_row_sample, p)
724                ),
725            )
726        # y is not subsampled
727        # classification
728        return (y, self.scaler_.transform(Z))

Create new hidden features for training set, with hidden layer, center the response.

Parameters:

y: array-like, shape = [n_samples]
    Target values

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features

W: {array-like}, shape = [n_features, hidden_features]
    if provided, constructs the hidden layer via W

Returns:

(centered response, direct link + hidden layer matrix): {tuple}
def cook_test_set(self, X, **kwargs):
730    def cook_test_set(self, X, **kwargs):
731        """Transform data from test set, with hidden layer.
732
733        Parameters:
734
735            X: {array-like}, shape = [n_samples, n_features]
736                Training vectors, where n_samples is the number
737                of samples and n_features is the number of features
738
739            **kwargs: additional parameters to be passed to self.encode_cluster
740
741        Returns:
742
743            Transformed test set : {array-like}
744        """
745
746        if isinstance(X, pd.DataFrame):
747            X = copy.deepcopy(X.values.astype(float))
748
749        if len(X.shape) == 1:
750            X = X.reshape(1, -1)
751
752        if (
753            self.n_clusters == 0
754        ):  # data without clustering: self.n_clusters is None -----
755            if self.n_hidden_features > 0:
756                # if hidden layer
757                scaled_X = (
758                    self.nn_scaler_.transform(X)
759                    if (self.col_sample == 1)
760                    else self.nn_scaler_.transform(X[:, self.index_col_])
761                )
762                Phi_X = self.create_layer(scaled_X, self.W_)
763                if self.direct_link:
764                    return self.scaler_.transform(
765                        mo.cbind(scaled_X, Phi_X, backend=self.backend)
766                    )
767                # when self.direct_link == False
768                return self.scaler_.transform(Phi_X)
769            # if no hidden layer # self.n_hidden_features == 0
770            return self.scaler_.transform(X)
771
772        # data with clustering: self.n_clusters > 0 -----
773        if self.col_sample == 1:
774            predicted_clusters = self.encode_clusters(
775                X=X, predict=True, **kwargs
776            )
777            augmented_X = mo.cbind(X, predicted_clusters, backend=self.backend)
778        else:
779            predicted_clusters = self.encode_clusters(
780                X=X[:, self.index_col_], predict=True, **kwargs
781            )
782            augmented_X = mo.cbind(
783                X[:, self.index_col_], predicted_clusters, backend=self.backend
784            )
785
786        if self.n_hidden_features > 0:  # if hidden layer
787            scaled_X = self.nn_scaler_.transform(augmented_X)
788            Phi_X = self.create_layer(scaled_X, self.W_)
789            if self.direct_link:
790                return self.scaler_.transform(
791                    mo.cbind(augmented_X, Phi_X, backend=self.backend)
792                )
793            return self.scaler_.transform(Phi_X)
794
795        # if no hidden layer
796        return self.scaler_.transform(augmented_X)

Transform data from test set, with hidden layer.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features

**kwargs: additional parameters to be passed to self.encode_cluster

Returns:

Transformed test set : {array-like}
class BaseRegressor(nnetsauce.Base, sklearn.base.RegressorMixin):
 15class BaseRegressor(Base, RegressorMixin):
 16    """Random Vector Functional Link Network regression without shrinkage
 17
 18    Parameters:
 19
 20        n_hidden_features: int
 21            number of nodes in the hidden layer
 22
 23        activation_name: str
 24            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
 25
 26        a: float
 27            hyperparameter for 'prelu' or 'elu' activation function
 28
 29        nodes_sim: str
 30            type of simulation for hidden layer nodes: 'sobol', 'hammersley', 'halton',
 31            'uniform'
 32
 33        bias: boolean
 34            indicates if the hidden layer contains a bias term (True) or
 35            not (False)
 36
 37        dropout: float
 38            regularization parameter; (random) percentage of nodes dropped out
 39            of the training
 40
 41        direct_link: boolean
 42            indicates if the original features are included (True) in model's
 43            fitting or not (False)
 44
 45        n_clusters: int
 46            number of clusters for type_clust='kmeans' or type_clust='gmm'
 47            clustering (could be 0: no clustering)
 48
 49        cluster_encode: bool
 50            defines how the variable containing clusters is treated (default is one-hot);
 51            if `False`, then labels are used, without one-hot encoding
 52
 53        type_clust: str
 54            type of clustering method: currently k-means ('kmeans') or Gaussian
 55            Mixture Model ('gmm')
 56
 57        type_scaling: a tuple of 3 strings
 58            scaling methods for inputs, hidden layer, and clustering respectively
 59            (and when relevant).
 60            Currently available: standardization ('std') or MinMax scaling ('minmax')
 61
 62        col_sample: float
 63            percentage of features randomly chosen for training
 64
 65        row_sample: float
 66            percentage of rows chosen for training, by stratified bootstrapping
 67
 68        seed: int
 69            reproducibility seed for nodes_sim=='uniform', clustering and dropout
 70
 71        backend: str
 72            "cpu" or "gpu" or "tpu"
 73
 74    Attributes:
 75
 76        beta_: vector
 77            regression coefficients
 78
 79        GCV_: float
 80            Generalized Cross-Validation error
 81
 82    """
 83
 84    # construct the object -----
 85
 86    def __init__(
 87        self,
 88        n_hidden_features=5,
 89        activation_name="relu",
 90        a=0.01,
 91        nodes_sim="sobol",
 92        bias=True,
 93        dropout=0,
 94        direct_link=True,
 95        n_clusters=2,
 96        cluster_encode=True,
 97        type_clust="kmeans",
 98        type_scaling=("std", "std", "std"),
 99        col_sample=1,
100        row_sample=1,
101        seed=123,
102        backend="cpu",
103    ):
104        super().__init__(
105            n_hidden_features=n_hidden_features,
106            activation_name=activation_name,
107            a=a,
108            nodes_sim=nodes_sim,
109            bias=bias,
110            dropout=dropout,
111            direct_link=direct_link,
112            n_clusters=n_clusters,
113            cluster_encode=cluster_encode,
114            type_clust=type_clust,
115            type_scaling=type_scaling,
116            col_sample=col_sample,
117            row_sample=row_sample,
118            seed=seed,
119            backend=backend,
120        )
121
122    def fit(self, X, y, **kwargs):
123        """Fit BaseRegressor to training data (X, y)
124
125        Parameters:
126
127            X: {array-like}, shape = [n_samples, n_features]
128                Training vectors, where n_samples is the number
129                of samples and n_features is the number of features
130
131            y: array-like, shape = [n_samples]
132                Target values
133
134            **kwargs: additional parameters to be passed to self.cook_training_set
135
136        Returns:
137
138            self: object
139        """
140
141        centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
142
143        fit_obj = lmf.beta_Sigma_hat(
144            X=scaled_Z, y=centered_y, backend=self.backend
145        )
146
147        self.beta_ = fit_obj["beta_hat"]
148
149        self.GCV_ = fit_obj["GCV"]
150
151        return self
152
153    def predict(self, X, **kwargs):
154        """Predict test data X.
155
156        Parameters:
157
158            X: {array-like}, shape = [n_samples, n_features]
159                Training vectors, where n_samples is the number
160                of samples and n_features is the number of features
161
162            **kwargs: additional parameters to be passed to self.cook_test_set
163
164        Returns:
165
166            model predictions: {array-like}
167        """
168
169        if len(X.shape) == 1:
170            n_features = X.shape[0]
171            new_X = mo.rbind(
172                X.reshape(1, n_features),
173                np.ones(n_features).reshape(1, n_features),
174            )
175
176            return (
177                self.y_mean_
178                + mo.safe_sparse_dot(
179                    a=self.cook_test_set(new_X, **kwargs),
180                    b=self.beta_,
181                    backend=self.backend,
182                )
183            )[0]
184
185        return self.y_mean_ + mo.safe_sparse_dot(
186            a=self.cook_test_set(X, **kwargs),
187            b=self.beta_,
188            backend=self.backend,
189        )

Random Vector Functional Link Network regression without shrinkage

Parameters:

n_hidden_features: int
    number of nodes in the hidden layer

activation_name: str
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'

a: float
    hyperparameter for 'prelu' or 'elu' activation function

nodes_sim: str
    type of simulation for hidden layer nodes: 'sobol', 'hammersley', 'halton',
    'uniform'

bias: boolean
    indicates if the hidden layer contains a bias term (True) or
    not (False)

dropout: float
    regularization parameter; (random) percentage of nodes dropped out
    of the training

direct_link: boolean
    indicates if the original features are included (True) in model's
    fitting or not (False)

n_clusters: int
    number of clusters for type_clust='kmeans' or type_clust='gmm'
    clustering (could be 0: no clustering)

cluster_encode: bool
    defines how the variable containing clusters is treated (default is one-hot);
    if `False`, then labels are used, without one-hot encoding

type_clust: str
    type of clustering method: currently k-means ('kmeans') or Gaussian
    Mixture Model ('gmm')

type_scaling: a tuple of 3 strings
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax')

col_sample: float
    percentage of features randomly chosen for training

row_sample: float
    percentage of rows chosen for training, by stratified bootstrapping

seed: int
    reproducibility seed for nodes_sim=='uniform', clustering and dropout

backend: str
    "cpu" or "gpu" or "tpu"

Attributes:

beta_: vector
    regression coefficients

GCV_: float
    Generalized Cross-Validation error
def fit(self, X, y, **kwargs):
122    def fit(self, X, y, **kwargs):
123        """Fit BaseRegressor to training data (X, y)
124
125        Parameters:
126
127            X: {array-like}, shape = [n_samples, n_features]
128                Training vectors, where n_samples is the number
129                of samples and n_features is the number of features
130
131            y: array-like, shape = [n_samples]
132                Target values
133
134            **kwargs: additional parameters to be passed to self.cook_training_set
135
136        Returns:
137
138            self: object
139        """
140
141        centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
142
143        fit_obj = lmf.beta_Sigma_hat(
144            X=scaled_Z, y=centered_y, backend=self.backend
145        )
146
147        self.beta_ = fit_obj["beta_hat"]
148
149        self.GCV_ = fit_obj["GCV"]
150
151        return self

Fit BaseRegressor to training data (X, y)

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features

y: array-like, shape = [n_samples]
    Target values

**kwargs: additional parameters to be passed to self.cook_training_set

Returns:

self: object
def predict(self, X, **kwargs):
153    def predict(self, X, **kwargs):
154        """Predict test data X.
155
156        Parameters:
157
158            X: {array-like}, shape = [n_samples, n_features]
159                Training vectors, where n_samples is the number
160                of samples and n_features is the number of features
161
162            **kwargs: additional parameters to be passed to self.cook_test_set
163
164        Returns:
165
166            model predictions: {array-like}
167        """
168
169        if len(X.shape) == 1:
170            n_features = X.shape[0]
171            new_X = mo.rbind(
172                X.reshape(1, n_features),
173                np.ones(n_features).reshape(1, n_features),
174            )
175
176            return (
177                self.y_mean_
178                + mo.safe_sparse_dot(
179                    a=self.cook_test_set(new_X, **kwargs),
180                    b=self.beta_,
181                    backend=self.backend,
182                )
183            )[0]
184
185        return self.y_mean_ + mo.safe_sparse_dot(
186            a=self.cook_test_set(X, **kwargs),
187            b=self.beta_,
188            backend=self.backend,
189        )

Predict test data X.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features

**kwargs: additional parameters to be passed to self.cook_test_set

Returns:

model predictions: {array-like}
class BayesianRVFLRegressor(nnetsauce.Base, sklearn.base.RegressorMixin):
 15class BayesianRVFLRegressor(Base, RegressorMixin):
 16    """Bayesian Random Vector Functional Link Network regression with one prior
 17
 18    Parameters:
 19
 20        n_hidden_features: int
 21            number of nodes in the hidden layer
 22
 23        activation_name: str
 24            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
 25
 26        a: float
 27            hyperparameter for 'prelu' or 'elu' activation function
 28
 29        nodes_sim: str
 30            type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 'uniform'
 31
 32        bias: boolean
 33            indicates if the hidden layer contains a bias term (True) or not (False)
 34
 35        dropout: float
 36            regularization parameter; (random) percentage of nodes dropped out
 37            of the training
 38
 39        direct_link: boolean
 40            indicates if the original features are included (True) in model''s fitting or not (False)
 41
 42        n_clusters: int
 43            number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering)
 44
 45        cluster_encode: bool
 46            defines how the variable containing clusters is treated (default is one-hot)
 47            if `False`, then labels are used, without one-hot encoding
 48
 49        type_clust: str
 50            type of clustering method: currently k-means ('kmeans') or Gaussian Mixture Model ('gmm')
 51
 52        type_scaling: a tuple of 3 strings
 53            scaling methods for inputs, hidden layer, and clustering respectively
 54            (and when relevant).
 55            Currently available: standardization ('std') or MinMax scaling ('minmax')
 56
 57        seed: int
 58            reproducibility seed for nodes_sim=='uniform'
 59
 60        s: float
 61            std. dev. of regression parameters in Bayesian Ridge Regression
 62
 63        sigma: float
 64            std. dev. of residuals in Bayesian Ridge Regression
 65
 66        return_std: boolean
 67            if True, uncertainty around predictions is evaluated
 68
 69        backend: str
 70            "cpu" or "gpu" or "tpu"
 71
 72    Attributes:
 73
 74        beta_: array-like
 75            regression''s coefficients
 76
 77        Sigma_: array-like
 78            covariance of the distribution of fitted parameters
 79
 80        GCV_: float
 81            Generalized cross-validation error
 82
 83        y_mean_: float
 84            average response
 85
 86    Examples:
 87
 88    ```python
 89    TBD
 90    ```
 91
 92    """
 93
 94    # construct the object -----
 95
 96    def __init__(
 97        self,
 98        n_hidden_features=5,
 99        activation_name="relu",
100        a=0.01,
101        nodes_sim="sobol",
102        bias=True,
103        dropout=0,
104        direct_link=True,
105        n_clusters=2,
106        cluster_encode=True,
107        type_clust="kmeans",
108        type_scaling=("std", "std", "std"),
109        seed=123,
110        s=0.1,
111        sigma=0.05,
112        return_std=True,
113        backend="cpu",
114    ):
115        super().__init__(
116            n_hidden_features=n_hidden_features,
117            activation_name=activation_name,
118            a=a,
119            nodes_sim=nodes_sim,
120            bias=bias,
121            dropout=dropout,
122            direct_link=direct_link,
123            n_clusters=n_clusters,
124            cluster_encode=cluster_encode,
125            type_clust=type_clust,
126            type_scaling=type_scaling,
127            seed=seed,
128            backend=backend,
129        )
130        self.s = s
131        self.sigma = sigma
132        self.beta_ = None
133        self.Sigma_ = None
134        self.GCV_ = None
135        self.return_std = return_std
136
137    def fit(self, X, y, **kwargs):
138        """Fit BayesianRVFLRegressor to training data (X, y).
139
140        Parameters:
141
142            X: {array-like}, shape = [n_samples, n_features]
143                Training vectors, where n_samples is the number
144                of samples and n_features is the number of features.
145
146            y: array-like, shape = [n_samples]
147                Target values.
148
149            **kwargs: additional parameters to be passed to
150                    self.cook_training_set
151
152        Returns:
153
154            self: object
155
156        """
157
158        centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
159
160        fit_obj = lmf.beta_Sigma_hat_rvfl(
161            X=scaled_Z,
162            y=centered_y,
163            s=self.s,
164            sigma=self.sigma,
165            fit_intercept=False,
166            return_cov=self.return_std,
167            backend=self.backend,
168        )
169
170        self.beta_ = fit_obj["beta_hat"]
171
172        if self.return_std == True:
173            self.Sigma_ = fit_obj["Sigma_hat"]
174
175        self.GCV_ = fit_obj["GCV"]
176
177        return self
178
179    def predict(self, X, return_std=False, **kwargs):
180        """Predict test data X.
181
182        Parameters:
183
184            X: {array-like}, shape = [n_samples, n_features]
185                Training vectors, where n_samples is the number
186                of samples and n_features is the number of features.
187
188            return_std: {boolean}, standard dev. is returned or not
189
190            **kwargs: additional parameters to be passed to
191                    self.cook_test_set
192
193        Returns:
194
195            model predictions: {array-like}
196
197        """
198
199        if len(X.shape) == 1:  # one observation in the test set only
200            n_features = X.shape[0]
201            new_X = mo.rbind(
202                x=X.reshape(1, n_features),
203                y=np.ones(n_features).reshape(1, n_features),
204                backend=self.backend,
205            )
206
207        self.return_std = return_std
208
209        if self.return_std == False:
210            if len(X.shape) == 1:
211                return (
212                    self.y_mean_
213                    + mo.safe_sparse_dot(
214                        a=self.cook_test_set(new_X, **kwargs),
215                        b=self.beta_,
216                        backend=self.backend,
217                    )
218                )[0]
219
220            return self.y_mean_ + mo.safe_sparse_dot(
221                a=self.cook_test_set(X, **kwargs),
222                b=self.beta_,
223                backend=self.backend,
224            )
225
226        else:  # confidence interval required for preds?
227            if len(X.shape) == 1:
228                Z = self.cook_test_set(new_X, **kwargs)
229
230                pred_obj = lmf.beta_Sigma_hat_rvfl(
231                    s=self.s,
232                    sigma=self.sigma,
233                    X_star=Z,
234                    return_cov=True,
235                    beta_hat_=self.beta_,
236                    Sigma_hat_=self.Sigma_,
237                    backend=self.backend,
238                )
239
240                return (
241                    self.y_mean_ + pred_obj["preds"][0],
242                    pred_obj["preds_std"][0],
243                )
244
245            Z = self.cook_test_set(X, **kwargs)
246
247            pred_obj = lmf.beta_Sigma_hat_rvfl(
248                s=self.s,
249                sigma=self.sigma,
250                X_star=Z,
251                return_cov=True,
252                beta_hat_=self.beta_,
253                Sigma_hat_=self.Sigma_,
254                backend=self.backend,
255            )
256
257            return (self.y_mean_ + pred_obj["preds"], pred_obj["preds_std"])

Bayesian Random Vector Functional Link Network regression with one prior

Parameters:

n_hidden_features: int
    number of nodes in the hidden layer

activation_name: str
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'

a: float
    hyperparameter for 'prelu' or 'elu' activation function

nodes_sim: str
    type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 'uniform'

bias: boolean
    indicates if the hidden layer contains a bias term (True) or not (False)

dropout: float
    regularization parameter; (random) percentage of nodes dropped out
    of the training

direct_link: boolean
    indicates if the original features are included (True) in model''s fitting or not (False)

n_clusters: int
    number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering)

cluster_encode: bool
    defines how the variable containing clusters is treated (default is one-hot)
    if `False`, then labels are used, without one-hot encoding

type_clust: str
    type of clustering method: currently k-means ('kmeans') or Gaussian Mixture Model ('gmm')

type_scaling: a tuple of 3 strings
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax')

seed: int
    reproducibility seed for nodes_sim=='uniform'

s: float
    std. dev. of regression parameters in Bayesian Ridge Regression

sigma: float
    std. dev. of residuals in Bayesian Ridge Regression

return_std: boolean
    if True, uncertainty around predictions is evaluated

backend: str
    "cpu" or "gpu" or "tpu"

Attributes:

beta_: array-like
    regression''s coefficients

Sigma_: array-like
    covariance of the distribution of fitted parameters

GCV_: float
    Generalized cross-validation error

y_mean_: float
    average response

Examples:

TBD
def fit(self, X, y, **kwargs):
137    def fit(self, X, y, **kwargs):
138        """Fit BayesianRVFLRegressor to training data (X, y).
139
140        Parameters:
141
142            X: {array-like}, shape = [n_samples, n_features]
143                Training vectors, where n_samples is the number
144                of samples and n_features is the number of features.
145
146            y: array-like, shape = [n_samples]
147                Target values.
148
149            **kwargs: additional parameters to be passed to
150                    self.cook_training_set
151
152        Returns:
153
154            self: object
155
156        """
157
158        centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
159
160        fit_obj = lmf.beta_Sigma_hat_rvfl(
161            X=scaled_Z,
162            y=centered_y,
163            s=self.s,
164            sigma=self.sigma,
165            fit_intercept=False,
166            return_cov=self.return_std,
167            backend=self.backend,
168        )
169
170        self.beta_ = fit_obj["beta_hat"]
171
172        if self.return_std == True:
173            self.Sigma_ = fit_obj["Sigma_hat"]
174
175        self.GCV_ = fit_obj["GCV"]
176
177        return self

Fit BayesianRVFLRegressor to training data (X, y).

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

**kwargs: additional parameters to be passed to
        self.cook_training_set

Returns:

self: object
def predict(self, X, return_std=False, **kwargs):
179    def predict(self, X, return_std=False, **kwargs):
180        """Predict test data X.
181
182        Parameters:
183
184            X: {array-like}, shape = [n_samples, n_features]
185                Training vectors, where n_samples is the number
186                of samples and n_features is the number of features.
187
188            return_std: {boolean}, standard dev. is returned or not
189
190            **kwargs: additional parameters to be passed to
191                    self.cook_test_set
192
193        Returns:
194
195            model predictions: {array-like}
196
197        """
198
199        if len(X.shape) == 1:  # one observation in the test set only
200            n_features = X.shape[0]
201            new_X = mo.rbind(
202                x=X.reshape(1, n_features),
203                y=np.ones(n_features).reshape(1, n_features),
204                backend=self.backend,
205            )
206
207        self.return_std = return_std
208
209        if self.return_std == False:
210            if len(X.shape) == 1:
211                return (
212                    self.y_mean_
213                    + mo.safe_sparse_dot(
214                        a=self.cook_test_set(new_X, **kwargs),
215                        b=self.beta_,
216                        backend=self.backend,
217                    )
218                )[0]
219
220            return self.y_mean_ + mo.safe_sparse_dot(
221                a=self.cook_test_set(X, **kwargs),
222                b=self.beta_,
223                backend=self.backend,
224            )
225
226        else:  # confidence interval required for preds?
227            if len(X.shape) == 1:
228                Z = self.cook_test_set(new_X, **kwargs)
229
230                pred_obj = lmf.beta_Sigma_hat_rvfl(
231                    s=self.s,
232                    sigma=self.sigma,
233                    X_star=Z,
234                    return_cov=True,
235                    beta_hat_=self.beta_,
236                    Sigma_hat_=self.Sigma_,
237                    backend=self.backend,
238                )
239
240                return (
241                    self.y_mean_ + pred_obj["preds"][0],
242                    pred_obj["preds_std"][0],
243                )
244
245            Z = self.cook_test_set(X, **kwargs)
246
247            pred_obj = lmf.beta_Sigma_hat_rvfl(
248                s=self.s,
249                sigma=self.sigma,
250                X_star=Z,
251                return_cov=True,
252                beta_hat_=self.beta_,
253                Sigma_hat_=self.Sigma_,
254                backend=self.backend,
255            )
256
257            return (self.y_mean_ + pred_obj["preds"], pred_obj["preds_std"])

Predict test data X.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

return_std: {boolean}, standard dev. is returned or not

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

model predictions: {array-like}
class BayesianRVFL2Regressor(nnetsauce.Base, sklearn.base.RegressorMixin):
 15class BayesianRVFL2Regressor(Base, RegressorMixin):
 16    """Bayesian Random Vector Functional Link Network regression with two priors
 17
 18    Parameters:
 19
 20        n_hidden_features: int
 21            number of nodes in the hidden layer
 22
 23        activation_name: str
 24            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
 25
 26        a: float
 27            hyperparameter for 'prelu' or 'elu' activation function
 28
 29        nodes_sim: str
 30            type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 'uniform'
 31
 32        bias: boolean
 33            indicates if the hidden layer contains a bias term (True) or not (False)
 34
 35        dropout: float
 36            regularization parameter; (random) percentage of nodes dropped out
 37            of the training
 38
 39        direct_link: boolean
 40            indicates if the original features are included (True) in model''s fitting or not (False)
 41
 42        n_clusters: int
 43            number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering)
 44
 45        cluster_encode: bool
 46            defines how the variable containing clusters is treated (default is one-hot)
 47            if `False`, then labels are used, without one-hot encoding
 48
 49        type_clust: str
 50            type of clustering method: currently k-means ('kmeans') or Gaussian Mixture Model ('gmm')
 51
 52        type_scaling: a tuple of 3 strings
 53            scaling methods for inputs, hidden layer, and clustering respectively
 54            (and when relevant).
 55            Currently available: standardization ('std') or MinMax scaling ('minmax')
 56
 57        seed: int
 58            reproducibility seed for nodes_sim=='uniform'
 59
 60        s1: float
 61            std. dev. of init. regression parameters in Bayesian Ridge Regression
 62
 63        s2: float
 64            std. dev. of augmented regression parameters in Bayesian Ridge Regression
 65
 66        sigma: float
 67            std. dev. of residuals in Bayesian Ridge Regression
 68
 69        return_std: boolean
 70            if True, uncertainty around predictions is evaluated
 71
 72        backend: str
 73            "cpu" or "gpu" or "tpu"
 74
 75    Attributes:
 76
 77        beta_: array-like
 78            regression''s coefficients
 79
 80        Sigma_: array-like
 81            covariance of the distribution of fitted parameters
 82
 83        GCV_: float
 84            Generalized cross-validation error
 85
 86        y_mean_: float
 87            average response
 88
 89    Examples:
 90
 91    ```python
 92    TBD
 93    ```
 94
 95    """
 96
 97    # construct the object -----
 98
 99    def __init__(
100        self,
101        n_hidden_features=5,
102        activation_name="relu",
103        a=0.01,
104        nodes_sim="sobol",
105        bias=True,
106        dropout=0,
107        direct_link=True,
108        n_clusters=0,
109        cluster_encode=True,
110        type_clust="kmeans",
111        type_scaling=("std", "std", "std"),
112        seed=123,
113        s1=0.1,
114        s2=0.1,
115        sigma=0.05,
116        return_std=True,
117        backend="cpu",
118    ):
119        super().__init__(
120            n_hidden_features=n_hidden_features,
121            activation_name=activation_name,
122            a=a,
123            nodes_sim=nodes_sim,
124            bias=bias,
125            dropout=dropout,
126            direct_link=direct_link,
127            n_clusters=n_clusters,
128            cluster_encode=cluster_encode,
129            type_clust=type_clust,
130            type_scaling=type_scaling,
131            seed=seed,
132            backend=backend,
133        )
134
135        self.s1 = s1
136        self.s2 = s2
137        self.sigma = sigma
138        self.beta_ = None
139        self.Sigma_ = None
140        self.GCV_ = None
141        self.return_std = return_std
142        self.coef_ = None
143
144    def fit(self, X, y, **kwargs):
145        """Fit BayesianRVFL2Regressor to training data (X, y)
146
147        Parameters:
148
149            X: {array-like}, shape = [n_samples, n_features]
150                Training vectors, where n_samples is the number
151                of samples and n_features is the number of features
152
153            y: array-like, shape = [n_samples]
154                Target values
155
156            **kwargs: additional parameters to be passed to
157                    self.cook_training_set
158
159        Returns:
160
161            self: object
162
163        """
164
165        centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
166
167        n, p = X.shape
168        q = self.n_hidden_features
169
170        if self.direct_link == True:
171            r = p + self.n_clusters
172
173            block11 = (self.s1**2) * np.eye(r)
174            block12 = np.zeros((r, q))
175            block21 = np.zeros((q, r))
176            block22 = (self.s2**2) * np.eye(q)
177
178            Sigma_prior = mo.rbind(
179                x=mo.cbind(x=block11, y=block12, backend=self.backend),
180                y=mo.cbind(x=block21, y=block22, backend=self.backend),
181                backend=self.backend,
182            )
183
184        else:
185            Sigma_prior = (self.s2**2) * np.eye(q)
186
187        fit_obj = lmf.beta_Sigma_hat_rvfl2(
188            X=scaled_Z,
189            y=centered_y,
190            Sigma=Sigma_prior,
191            sigma=self.sigma,
192            fit_intercept=False,
193            return_cov=self.return_std,
194            backend=self.backend,
195        )
196
197        self.beta_ = fit_obj["beta_hat"]
198
199        self.coef_ = self.beta_
200
201        if self.return_std == True:
202            self.Sigma_ = fit_obj["Sigma_hat"]
203
204        self.GCV_ = fit_obj["GCV"]
205
206        return self
207
208    def predict(self, X, return_std=False, **kwargs):
209        """Predict test data X.
210
211        Parameters:
212
213            X: {array-like}, shape = [n_samples, n_features]
214                Training vectors, where n_samples is the number
215                of samples and n_features is the number of features.
216
217            return_std: {boolean}, standard dev. is returned or not
218
219            **kwargs: additional parameters to be passed to
220                    self.cook_test_set
221
222        Returns:
223
224            model predictions: {array-like}
225
226        """
227
228        if len(X.shape) == 1:  # one observation in the test set only
229            n_features = X.shape[0]
230            new_X = mo.rbind(
231                x=X.reshape(1, n_features),
232                y=np.ones(n_features).reshape(1, n_features),
233                backend=self.backend,
234            )
235
236        self.return_std = return_std
237
238        if self.return_std == False:
239            if len(X.shape) == 1:
240                return (
241                    self.y_mean_
242                    + mo.safe_sparse_dot(
243                        self.cook_test_set(new_X, **kwargs),
244                        self.beta_,
245                        backend=self.backend,
246                    )
247                )[0]
248
249            return self.y_mean_ + mo.safe_sparse_dot(
250                self.cook_test_set(X, **kwargs),
251                self.beta_,
252                backend=self.backend,
253            )
254
255        else:  # confidence interval required for preds?
256            if len(X.shape) == 1:
257                Z = self.cook_test_set(new_X, **kwargs)
258
259                pred_obj = lmf.beta_Sigma_hat_rvfl2(
260                    X_star=Z,
261                    return_cov=self.return_std,
262                    beta_hat_=self.beta_,
263                    Sigma_hat_=self.Sigma_,
264                    backend=self.backend,
265                )
266
267                return (
268                    self.y_mean_ + pred_obj["preds"][0],
269                    pred_obj["preds_std"][0],
270                )
271
272            Z = self.cook_test_set(X, **kwargs)
273
274            pred_obj = lmf.beta_Sigma_hat_rvfl2(
275                X_star=Z,
276                return_cov=self.return_std,
277                beta_hat_=self.beta_,
278                Sigma_hat_=self.Sigma_,
279                backend=self.backend,
280            )
281
282            return (self.y_mean_ + pred_obj["preds"], pred_obj["preds_std"])

Bayesian Random Vector Functional Link Network regression with two priors

Parameters:

n_hidden_features: int
    number of nodes in the hidden layer

activation_name: str
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'

a: float
    hyperparameter for 'prelu' or 'elu' activation function

nodes_sim: str
    type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 'uniform'

bias: boolean
    indicates if the hidden layer contains a bias term (True) or not (False)

dropout: float
    regularization parameter; (random) percentage of nodes dropped out
    of the training

direct_link: boolean
    indicates if the original features are included (True) in model''s fitting or not (False)

n_clusters: int
    number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering)

cluster_encode: bool
    defines how the variable containing clusters is treated (default is one-hot)
    if `False`, then labels are used, without one-hot encoding

type_clust: str
    type of clustering method: currently k-means ('kmeans') or Gaussian Mixture Model ('gmm')

type_scaling: a tuple of 3 strings
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax')

seed: int
    reproducibility seed for nodes_sim=='uniform'

s1: float
    std. dev. of init. regression parameters in Bayesian Ridge Regression

s2: float
    std. dev. of augmented regression parameters in Bayesian Ridge Regression

sigma: float
    std. dev. of residuals in Bayesian Ridge Regression

return_std: boolean
    if True, uncertainty around predictions is evaluated

backend: str
    "cpu" or "gpu" or "tpu"

Attributes:

beta_: array-like
    regression''s coefficients

Sigma_: array-like
    covariance of the distribution of fitted parameters

GCV_: float
    Generalized cross-validation error

y_mean_: float
    average response

Examples:

TBD
def fit(self, X, y, **kwargs):
144    def fit(self, X, y, **kwargs):
145        """Fit BayesianRVFL2Regressor to training data (X, y)
146
147        Parameters:
148
149            X: {array-like}, shape = [n_samples, n_features]
150                Training vectors, where n_samples is the number
151                of samples and n_features is the number of features
152
153            y: array-like, shape = [n_samples]
154                Target values
155
156            **kwargs: additional parameters to be passed to
157                    self.cook_training_set
158
159        Returns:
160
161            self: object
162
163        """
164
165        centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
166
167        n, p = X.shape
168        q = self.n_hidden_features
169
170        if self.direct_link == True:
171            r = p + self.n_clusters
172
173            block11 = (self.s1**2) * np.eye(r)
174            block12 = np.zeros((r, q))
175            block21 = np.zeros((q, r))
176            block22 = (self.s2**2) * np.eye(q)
177
178            Sigma_prior = mo.rbind(
179                x=mo.cbind(x=block11, y=block12, backend=self.backend),
180                y=mo.cbind(x=block21, y=block22, backend=self.backend),
181                backend=self.backend,
182            )
183
184        else:
185            Sigma_prior = (self.s2**2) * np.eye(q)
186
187        fit_obj = lmf.beta_Sigma_hat_rvfl2(
188            X=scaled_Z,
189            y=centered_y,
190            Sigma=Sigma_prior,
191            sigma=self.sigma,
192            fit_intercept=False,
193            return_cov=self.return_std,
194            backend=self.backend,
195        )
196
197        self.beta_ = fit_obj["beta_hat"]
198
199        self.coef_ = self.beta_
200
201        if self.return_std == True:
202            self.Sigma_ = fit_obj["Sigma_hat"]
203
204        self.GCV_ = fit_obj["GCV"]
205
206        return self

Fit BayesianRVFL2Regressor to training data (X, y)

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features

y: array-like, shape = [n_samples]
    Target values

**kwargs: additional parameters to be passed to
        self.cook_training_set

Returns:

self: object
def predict(self, X, return_std=False, **kwargs):
208    def predict(self, X, return_std=False, **kwargs):
209        """Predict test data X.
210
211        Parameters:
212
213            X: {array-like}, shape = [n_samples, n_features]
214                Training vectors, where n_samples is the number
215                of samples and n_features is the number of features.
216
217            return_std: {boolean}, standard dev. is returned or not
218
219            **kwargs: additional parameters to be passed to
220                    self.cook_test_set
221
222        Returns:
223
224            model predictions: {array-like}
225
226        """
227
228        if len(X.shape) == 1:  # one observation in the test set only
229            n_features = X.shape[0]
230            new_X = mo.rbind(
231                x=X.reshape(1, n_features),
232                y=np.ones(n_features).reshape(1, n_features),
233                backend=self.backend,
234            )
235
236        self.return_std = return_std
237
238        if self.return_std == False:
239            if len(X.shape) == 1:
240                return (
241                    self.y_mean_
242                    + mo.safe_sparse_dot(
243                        self.cook_test_set(new_X, **kwargs),
244                        self.beta_,
245                        backend=self.backend,
246                    )
247                )[0]
248
249            return self.y_mean_ + mo.safe_sparse_dot(
250                self.cook_test_set(X, **kwargs),
251                self.beta_,
252                backend=self.backend,
253            )
254
255        else:  # confidence interval required for preds?
256            if len(X.shape) == 1:
257                Z = self.cook_test_set(new_X, **kwargs)
258
259                pred_obj = lmf.beta_Sigma_hat_rvfl2(
260                    X_star=Z,
261                    return_cov=self.return_std,
262                    beta_hat_=self.beta_,
263                    Sigma_hat_=self.Sigma_,
264                    backend=self.backend,
265                )
266
267                return (
268                    self.y_mean_ + pred_obj["preds"][0],
269                    pred_obj["preds_std"][0],
270                )
271
272            Z = self.cook_test_set(X, **kwargs)
273
274            pred_obj = lmf.beta_Sigma_hat_rvfl2(
275                X_star=Z,
276                return_cov=self.return_std,
277                beta_hat_=self.beta_,
278                Sigma_hat_=self.Sigma_,
279                backend=self.backend,
280            )
281
282            return (self.y_mean_ + pred_obj["preds"], pred_obj["preds_std"])

Predict test data X.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

return_std: {boolean}, standard dev. is returned or not

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

model predictions: {array-like}
class ClassicalMTS(nnetsauce.MTS):
 42class ClassicalMTS(MTS):
 43    """Time series with statistical models (statsmodels), mostly for benchmarks
 44
 45    Parameters:
 46
 47        model: type of model: str.
 48            currently, 'VAR', 'VECM', 'ARIMA', 'ETS', 'Theta'
 49            Default is None
 50
 51        obj: object
 52            A time series model from statsmodels
 53
 54    Attributes:
 55
 56        df_: data frame
 57            the input data frame, in case a data.frame is provided to `fit`
 58
 59        level_: int
 60            level of confidence for prediction intervals (default is 95)
 61
 62    Examples:
 63    See examples/classical_mts_timeseries.py
 64    """
 65
 66    # construct the object -----
 67
 68    def __init__(self, model="VAR", obj=None):
 69        if obj is not None:
 70            self.model = None
 71            self.obj = obj
 72        else:
 73            self.model = model
 74            if self.model == "VAR":
 75                self.obj = VAR
 76            elif self.model == "VECM":
 77                self.obj = VECM
 78            elif self.model == "ARIMA":
 79                self.obj = ARIMA
 80            elif self.model == "ETS":
 81                self.obj = ExponentialSmoothing
 82            elif self.model == "Theta":
 83                self.obj = ThetaModel
 84            else:
 85                raise ValueError("model not recognized")
 86        self.n_series = None
 87        self.replications = None
 88        self.mean_ = None
 89        self.upper_ = None
 90        self.lower_ = None
 91        self.output_dates_ = None
 92        self.alpha_ = None
 93        self.df_ = None
 94        self.residuals_ = []
 95        self.sims_ = None
 96        self.level_ = None
 97
 98    def fit(self, X, **kwargs):
 99        """Fit ClassicalMTS model to training data X, with optional regressors xreg
100
101        Parameters:
102
103        X: {array-like}, shape = [n_samples, n_features]
104            Training time series, where n_samples is the number
105            of samples and n_features is the number of features;
106            X must be in increasing order (most recent observations last)
107
108        **kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity)
109
110        Returns:
111
112        self: object
113        """
114
115        try:
116            self.n_series = X.shape[1]
117        except Exception:
118            self.n_series = 1
119
120        if (isinstance(X, pd.DataFrame) is False) and isinstance(
121            X, pd.Series
122        ) is False:  # input data set is a numpy array
123            X = pd.DataFrame(X)
124            if self.n_series > 1:
125                self.series_names = [
126                    "series" + str(i) for i in range(X.shape[1])
127                ]
128            else:
129                self.series_names = "series0"
130
131        else:  # input data set is a DataFrame or Series with column names
132            X_index = None
133            if X.index is not None and len(X.shape) > 1:
134                X_index = X.index
135                X = copy.deepcopy(mo.convert_df_to_numeric(X))
136            if X_index is not None:
137                try:
138                    X.index = X_index
139                except Exception:
140                    pass
141            if isinstance(X, pd.DataFrame):
142                self.series_names = X.columns.tolist()
143            else:
144                self.series_names = X.name
145
146        if isinstance(X, pd.DataFrame) or isinstance(X, pd.Series):
147            self.df_ = X
148            X = X.values
149            self.df_.columns = self.series_names
150            self.input_dates = ts.compute_input_dates(self.df_)
151        else:
152            self.df_ = pd.DataFrame(X, columns=self.series_names)
153
154        if self.model == "Theta":
155            try:
156                self.obj = self.obj(self.df_, **kwargs).fit()
157            except Exception as e:
158                self.obj = self.obj(self.df_.values, **kwargs).fit()
159            self.residuals_ = None
160        else:
161            self.obj = self.obj(X, **kwargs).fit()
162            try:
163                self.residuals_ = self.obj.resid
164            except Exception as e:  # Theta
165                self.residuals_ = None
166
167        return self
168
169    def predict(self, h=5, level=95, **kwargs):
170        """Forecast all the time series, h steps ahead
171
172        Parameters:
173
174        h: {integer}
175            Forecasting horizon
176
177        **kwargs: additional parameters to be passed to
178                self.cook_test_set
179
180        Returns:
181
182        model predictions for horizon = h: {array-like}
183
184        """
185
186        self.output_dates_, frequency = ts.compute_output_dates(self.df_, h)
187        self.level_ = level
188        self.lower_ = None  # do not remove (/!\)
189        self.upper_ = None  # do not remove (/!\)
190        self.sims_ = None  # do not remove (/!\)
191        self.level_ = level
192        self.alpha_ = 100 - level
193
194        pi_multiplier = norm.ppf(1 - self.alpha_ / 200)
195
196        # Named tuple for forecast results
197        DescribeResult = namedtuple(
198            "DescribeResult", ("mean", "lower", "upper")
199        )
200
201        if (
202            self.obj is not None
203        ):  # try all the special cases of the else section (there's probably a better way)
204            try:
205                (
206                    mean_forecast,
207                    lower_bound,
208                    upper_bound,
209                ) = self.obj.forecast_interval(
210                    self.obj.endog, steps=h, alpha=self.alpha_ / 100, **kwargs
211                )
212
213            except Exception as e:
214                try:
215                    forecast_result = self.obj.predict(steps=h)
216                    mean_forecast = forecast_result
217                    (
218                        lower_bound,
219                        upper_bound,
220                    ) = self._compute_confidence_intervals(
221                        forecast_result, alpha=self.alpha_ / 100, **kwargs
222                    )
223
224                except Exception as e:
225                    try:
226                        forecast_result = self.obj.get_forecast(steps=h)
227                        mean_forecast = forecast_result.predicted_mean
228                        lower_bound = forecast_result.conf_int()[:, 0]
229                        upper_bound = forecast_result.conf_int()[:, 1]
230
231                    except Exception as e:
232                        try:
233                            forecast_result = self.obj.forecast(steps=h)
234                            residuals = self.obj.resid
235                            std_errors = np.std(residuals)
236                            mean_forecast = forecast_result
237                            lower_bound = (
238                                forecast_result - pi_multiplier * std_errors
239                            )
240                            upper_bound = (
241                                forecast_result + pi_multiplier * std_errors
242                            )
243
244                        except Exception as e:
245                            try:
246                                mean_forecast = self.obj.forecast(
247                                    steps=h
248                                ).values
249                                forecast_result = self.obj.prediction_intervals(
250                                    steps=h, alpha=self.alpha_ / 100, **kwargs
251                                )
252                                lower_bound = forecast_result["lower"].values
253                                upper_bound = forecast_result["upper"].values
254                            except Exception:
255                                mean_forecast = self.obj.forecast(steps=h)
256                                forecast_result = self.obj.prediction_intervals(
257                                    steps=h, alpha=self.alpha_ / 100, **kwargs
258                                )
259                                lower_bound = forecast_result["lower"]
260                                upper_bound = forecast_result["upper"]
261
262        else:
263            if self.model == "VAR":
264                (
265                    mean_forecast,
266                    lower_bound,
267                    upper_bound,
268                ) = self.obj.forecast_interval(
269                    self.obj.endog, steps=h, alpha=self.alpha_ / 100, **kwargs
270                )
271
272            elif self.model == "VECM":
273                forecast_result = self.obj.predict(steps=h)
274                mean_forecast = forecast_result
275                lower_bound, upper_bound = self._compute_confidence_intervals(
276                    forecast_result, alpha=self.alpha_ / 100, **kwargs
277                )
278
279            elif self.model == "ARIMA":
280                forecast_result = self.obj.get_forecast(steps=h)
281                mean_forecast = forecast_result.predicted_mean
282                lower_bound = forecast_result.conf_int()[:, 0]
283                upper_bound = forecast_result.conf_int()[:, 1]
284
285            elif self.model == "ETS":
286                forecast_result = self.obj.forecast(steps=h)
287                residuals = self.obj.resid
288                std_errors = np.std(residuals)
289                mean_forecast = forecast_result
290                lower_bound = forecast_result - pi_multiplier * std_errors
291                upper_bound = forecast_result + pi_multiplier * std_errors
292
293            elif self.model == "Theta":
294                try:
295                    mean_forecast = self.obj.forecast(steps=h).values
296                    forecast_result = self.obj.prediction_intervals(
297                        steps=h, alpha=self.alpha_ / 100, **kwargs
298                    )
299                    lower_bound = forecast_result["lower"].values
300                    upper_bound = forecast_result["upper"].values
301                except Exception:
302                    mean_forecast = self.obj.forecast(steps=h)
303                    forecast_result = self.obj.prediction_intervals(
304                        steps=h, alpha=self.alpha_ / 100, **kwargs
305                    )
306                    lower_bound = forecast_result["lower"]
307                    upper_bound = forecast_result["upper"]
308
309            else:
310                raise ValueError("model not recognized")
311
312        try:
313            self.mean_ = pd.DataFrame(
314                mean_forecast,
315                columns=self.series_names,
316                index=self.output_dates_,
317            )
318            self.lower_ = pd.DataFrame(
319                lower_bound, columns=self.series_names, index=self.output_dates_
320            )
321            self.upper_ = pd.DataFrame(
322                upper_bound, columns=self.series_names, index=self.output_dates_
323            )
324        except Exception:
325            self.mean_ = pd.Series(
326                mean_forecast, name=self.series_names, index=self.output_dates_
327            )
328            self.lower_ = pd.Series(
329                lower_bound, name=self.series_names, index=self.output_dates_
330            )
331            self.upper_ = pd.Series(
332                upper_bound, name=self.series_names, index=self.output_dates_
333            )
334
335        return DescribeResult(
336            mean=self.mean_, lower=self.lower_, upper=self.upper_
337        )
338
339    def _compute_confidence_intervals(self, forecast_result, alpha):
340        """
341        Compute confidence intervals for VECM forecasts.
342        Uses the covariance of residuals to approximate the confidence intervals.
343        """
344        residuals = self.obj.resid
345        cov_matrix = np.cov(residuals.T)  # Covariance matrix of residuals
346        std_errors = np.sqrt(np.diag(cov_matrix))  # Standard errors
347
348        z_value = norm.ppf(1 - alpha / 2)  # Z-score for the given alpha level
349        lower_bound = forecast_result - z_value * std_errors
350        upper_bound = forecast_result + z_value * std_errors
351
352        return lower_bound, upper_bound
353
354    def score(self, X, training_index, testing_index, scoring=None, **kwargs):
355        """Train on training_index, score on testing_index."""
356
357        assert (
358            bool(set(training_index).intersection(set(testing_index))) == False
359        ), "Non-overlapping 'training_index' and 'testing_index' required"
360
361        # Dimensions
362        try:
363            # multivariate time series
364            n, p = X.shape
365        except:
366            # univariate time series
367            n = X.shape[0]
368            p = 1
369
370        # Training and testing sets
371        if p > 1:
372            X_train = X[training_index, :]
373            X_test = X[testing_index, :]
374        else:
375            X_train = X[training_index]
376            X_test = X[testing_index]
377
378        # Horizon
379        h = len(testing_index)
380        assert (
381            len(training_index) + h
382        ) <= n, "Please check lengths of training and testing windows"
383
384        # Fit and predict
385        self.fit(X_train, **kwargs)
386        preds = self.predict(h=h, **kwargs)
387
388        if scoring is None:
389            scoring = "neg_root_mean_squared_error"
390
391        # check inputs
392        assert scoring in (
393            "explained_variance",
394            "neg_mean_absolute_error",
395            "neg_mean_squared_error",
396            "neg_root_mean_squared_error",
397            "neg_mean_squared_log_error",
398            "neg_median_absolute_error",
399            "r2",
400        ), "'scoring' should be in ('explained_variance', 'neg_mean_absolute_error', \
401                               'neg_mean_squared_error', 'neg_root_mean_squared_error', 'neg_mean_squared_log_error', \
402                               'neg_median_absolute_error', 'r2')"
403
404        scoring_options = {
405            "explained_variance": skm2.explained_variance_score,
406            "neg_mean_absolute_error": skm2.mean_absolute_error,
407            "neg_mean_squared_error": lambda x, y: np.mean((x - y) ** 2),
408            "neg_root_mean_squared_error": lambda x, y: np.sqrt(
409                np.mean((x - y) ** 2)
410            ),
411            "neg_mean_squared_log_error": skm2.mean_squared_log_error,
412            "neg_median_absolute_error": skm2.median_absolute_error,
413            "r2": skm2.r2_score,
414        }
415
416        # if p > 1:
417        #     return tuple(
418        #         [
419        #             scoring_options[scoring](
420        #                 X_test[:, i], preds[:, i]#, **kwargs
421        #             )
422        #             for i in range(p)
423        #         ]
424        #     )
425        # else:
426        return scoring_options[scoring](X_test, preds)
427
428    def plot(self, series=None, type_axis="dates", type_plot="pi"):
429        """Plot time series forecast
430
431        Parameters:
432
433        series: {integer} or {string}
434            series index or name
435
436        """
437
438        assert all(
439            [
440                self.mean_ is not None,
441                self.lower_ is not None,
442                self.upper_ is not None,
443                self.output_dates_ is not None,
444            ]
445        ), "model forecasting must be obtained first (with predict)"
446
447        if series is None:
448            assert (
449                self.n_series == 1
450            ), "please specify series index or name (n_series > 1)"
451            series = 0
452
453        if isinstance(series, str):
454            assert (
455                series in self.series_names
456            ), f"series {series} doesn't exist in the input dataset"
457            series_idx = self.df_.columns.get_loc(series)
458        else:
459            assert isinstance(series, int) and (
460                0 <= series < self.n_series
461            ), f"check series index (< {self.n_series})"
462            series_idx = series
463
464        if isinstance(self.df_, pd.DataFrame):
465            y_all = list(self.df_.iloc[:, series_idx]) + list(
466                self.mean_.iloc[:, series_idx]
467            )
468            y_test = list(self.mean_.iloc[:, series_idx])
469        else:
470            y_all = list(self.df_.values) + list(self.mean_.values)
471            y_test = list(self.mean_.values)
472        n_points_all = len(y_all)
473        n_points_train = self.df_.shape[0]
474
475        if type_axis == "numeric":
476            x_all = [i for i in range(n_points_all)]
477            x_test = [i for i in range(n_points_train, n_points_all)]
478
479        if type_axis == "dates":  # use dates
480            x_all = np.concatenate(
481                (self.input_dates.values, self.output_dates_.values), axis=None
482            )
483            x_test = self.output_dates_.values
484
485        if type_plot == "pi":
486            fig, ax = plt.subplots()
487            ax.plot(x_all, y_all, "-")
488            ax.plot(x_test, y_test, "-", color="orange")
489            try:
490                ax.fill_between(
491                    x_test,
492                    self.lower_.iloc[:, series_idx],
493                    self.upper_.iloc[:, series_idx],
494                    alpha=0.2,
495                    color="orange",
496                )
497            except Exception:
498                ax.fill_between(
499                    x_test,
500                    self.lower_.values,
501                    self.upper_.values,
502                    alpha=0.2,
503                    color="orange",
504                )
505            if self.replications is None:
506                if self.n_series > 1:
507                    plt.title(
508                        f"prediction intervals for {series}",
509                        loc="left",
510                        fontsize=12,
511                        fontweight=0,
512                        color="black",
513                    )
514                else:
515                    plt.title(
516                        f"prediction intervals for input time series",
517                        loc="left",
518                        fontsize=12,
519                        fontweight=0,
520                        color="black",
521                    )
522                plt.show()
523            else:  # self.replications is not None
524                if self.n_series > 1:
525                    plt.title(
526                        f"prediction intervals for {self.replications} simulations of {series}",
527                        loc="left",
528                        fontsize=12,
529                        fontweight=0,
530                        color="black",
531                    )
532                else:
533                    plt.title(
534                        f"prediction intervals for {self.replications} simulations of input time series",
535                        loc="left",
536                        fontsize=12,
537                        fontweight=0,
538                        color="black",
539                    )
540                plt.show()
541
542        if type_plot == "spaghetti":
543            palette = plt.get_cmap("Set1")
544            sims_ix = getsims(self.sims_, series_idx)
545            plt.plot(x_all, y_all, "-")
546            for col_ix in range(
547                sims_ix.shape[1]
548            ):  # avoid this when there are thousands of simulations
549                plt.plot(
550                    x_test,
551                    sims_ix[:, col_ix],
552                    "-",
553                    color=palette(col_ix),
554                    linewidth=1,
555                    alpha=0.9,
556                )
557            plt.plot(x_all, y_all, "-", color="black")
558            plt.plot(x_test, y_test, "-", color="blue")
559            # Add titles
560            if self.n_series > 1:
561                plt.title(
562                    f"{self.replications} simulations of {series}",
563                    loc="left",
564                    fontsize=12,
565                    fontweight=0,
566                    color="black",
567                )
568            else:
569                plt.title(
570                    f"{self.replications} simulations of input time series",
571                    loc="left",
572                    fontsize=12,
573                    fontweight=0,
574                    color="black",
575                )
576            plt.xlabel("Time")
577            plt.ylabel("Values")
578            # Show the graph
579            plt.show()
580
581    def cross_val_score(
582        self,
583        X,
584        scoring="root_mean_squared_error",
585        n_jobs=None,
586        verbose=0,
587        xreg=None,
588        initial_window=5,
589        horizon=3,
590        fixed_window=False,
591        show_progress=True,
592        level=95,
593        **kwargs,
594    ):
595        """Evaluate a score by time series cross-validation.
596
597        Parameters:
598
599            X: {array-like, sparse matrix} of shape (n_samples, n_features)
600                The data to fit.
601
602            scoring: str or a function
603                A str in ('root_mean_squared_error', 'mean_squared_error', 'mean_error',
604                'mean_absolute_error', 'mean_error', 'mean_percentage_error',
605                'mean_absolute_percentage_error',  'winkler_score', 'coverage')
606                Or a function defined as 'coverage' and 'winkler_score' in `utils.timeseries`
607
608            n_jobs: int, default=None
609                Number of jobs to run in parallel.
610
611            verbose: int, default=0
612                The verbosity level.
613
614            xreg: array-like, optional (default=None)
615                Additional (external) regressors to be passed to `fit`
616                xreg must be in 'increasing' order (most recent observations last)
617
618            initial_window: int
619                initial number of consecutive values in each training set sample
620
621            horizon: int
622                number of consecutive values in test set sample
623
624            fixed_window: boolean
625                if False, all training samples start at index 0, and the training
626                window's size is increasing.
627                if True, the training window's size is fixed, and the window is
628                rolling forward
629
630            show_progress: boolean
631                if True, a progress bar is printed
632
633            **kwargs: dict
634                additional parameters to be passed to `fit` and `predict`
635
636        Returns:
637
638            A tuple: descriptive statistics or errors and raw errors
639
640        """
641        tscv = TimeSeriesSplit()
642
643        tscv_obj = tscv.split(
644            X,
645            initial_window=initial_window,
646            horizon=horizon,
647            fixed_window=fixed_window,
648        )
649
650        if isinstance(scoring, str):
651            assert scoring in (
652                "root_mean_squared_error",
653                "mean_squared_error",
654                "mean_error",
655                "mean_absolute_error",
656                "mean_percentage_error",
657                "mean_absolute_percentage_error",
658                "winkler_score",
659                "coverage",
660            ), "must have scoring in ('root_mean_squared_error', 'mean_squared_error', 'mean_error', 'mean_absolute_error', 'mean_error', 'mean_percentage_error', 'mean_absolute_percentage_error',  'winkler_score', 'coverage')"
661
662            def err_func(X_test, X_pred, scoring):
663                if (self.replications is not None) or (
664                    self.type_pi == "gaussian"
665                ):  # probabilistic
666                    if scoring == "winkler_score":
667                        return winkler_score(X_pred, X_test, level=level)
668                    elif scoring == "coverage":
669                        return coverage(X_pred, X_test, level=level)
670                    else:
671                        return mean_errors(
672                            pred=X_pred.mean, actual=X_test, scoring=scoring
673                        )
674                else:  # not probabilistic
675                    return mean_errors(
676                        pred=X_pred, actual=X_test, scoring=scoring
677                    )
678
679        else:  # isinstance(scoring, str) = False
680            err_func = scoring
681
682        errors = []
683
684        train_indices = []
685
686        test_indices = []
687
688        for train_index, test_index in tscv_obj:
689            train_indices.append(train_index)
690            test_indices.append(test_index)
691
692        if show_progress is True:
693            iterator = tqdm(
694                zip(train_indices, test_indices), total=len(train_indices)
695            )
696        else:
697            iterator = zip(train_indices, test_indices)
698
699        for train_index, test_index in iterator:
700            if verbose == 1:
701                print(f"TRAIN: {train_index}")
702                print(f"TEST: {test_index}")
703
704            if isinstance(X, pd.DataFrame):
705                self.fit(X.iloc[train_index, :], xreg=xreg, **kwargs)
706                X_test = X.iloc[test_index, :]
707            else:
708                self.fit(X[train_index, :], xreg=xreg, **kwargs)
709                X_test = X[test_index, :]
710            X_pred = self.predict(h=int(len(test_index)), level=level, **kwargs)
711
712            errors.append(err_func(X_test, X_pred, scoring))
713
714        res = np.asarray(errors)
715
716        return res, describe(res)

Time series with statistical models (statsmodels), mostly for benchmarks

Parameters:

model: type of model: str.
    currently, 'VAR', 'VECM', 'ARIMA', 'ETS', 'Theta'
    Default is None

obj: object
    A time series model from statsmodels

Attributes:

df_: data frame
    the input data frame, in case a data.frame is provided to `fit`

level_: int
    level of confidence for prediction intervals (default is 95)

Examples: See examples/classical_mts_timeseries.py

def fit(self, X, **kwargs):
 98    def fit(self, X, **kwargs):
 99        """Fit ClassicalMTS model to training data X, with optional regressors xreg
100
101        Parameters:
102
103        X: {array-like}, shape = [n_samples, n_features]
104            Training time series, where n_samples is the number
105            of samples and n_features is the number of features;
106            X must be in increasing order (most recent observations last)
107
108        **kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity)
109
110        Returns:
111
112        self: object
113        """
114
115        try:
116            self.n_series = X.shape[1]
117        except Exception:
118            self.n_series = 1
119
120        if (isinstance(X, pd.DataFrame) is False) and isinstance(
121            X, pd.Series
122        ) is False:  # input data set is a numpy array
123            X = pd.DataFrame(X)
124            if self.n_series > 1:
125                self.series_names = [
126                    "series" + str(i) for i in range(X.shape[1])
127                ]
128            else:
129                self.series_names = "series0"
130
131        else:  # input data set is a DataFrame or Series with column names
132            X_index = None
133            if X.index is not None and len(X.shape) > 1:
134                X_index = X.index
135                X = copy.deepcopy(mo.convert_df_to_numeric(X))
136            if X_index is not None:
137                try:
138                    X.index = X_index
139                except Exception:
140                    pass
141            if isinstance(X, pd.DataFrame):
142                self.series_names = X.columns.tolist()
143            else:
144                self.series_names = X.name
145
146        if isinstance(X, pd.DataFrame) or isinstance(X, pd.Series):
147            self.df_ = X
148            X = X.values
149            self.df_.columns = self.series_names
150            self.input_dates = ts.compute_input_dates(self.df_)
151        else:
152            self.df_ = pd.DataFrame(X, columns=self.series_names)
153
154        if self.model == "Theta":
155            try:
156                self.obj = self.obj(self.df_, **kwargs).fit()
157            except Exception as e:
158                self.obj = self.obj(self.df_.values, **kwargs).fit()
159            self.residuals_ = None
160        else:
161            self.obj = self.obj(X, **kwargs).fit()
162            try:
163                self.residuals_ = self.obj.resid
164            except Exception as e:  # Theta
165                self.residuals_ = None
166
167        return self

Fit ClassicalMTS model to training data X, with optional regressors xreg

Parameters:

X: {array-like}, shape = [n_samples, n_features] Training time series, where n_samples is the number of samples and n_features is the number of features; X must be in increasing order (most recent observations last)

**kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity)

Returns:

self: object

def predict(self, h=5, level=95, **kwargs):
169    def predict(self, h=5, level=95, **kwargs):
170        """Forecast all the time series, h steps ahead
171
172        Parameters:
173
174        h: {integer}
175            Forecasting horizon
176
177        **kwargs: additional parameters to be passed to
178                self.cook_test_set
179
180        Returns:
181
182        model predictions for horizon = h: {array-like}
183
184        """
185
186        self.output_dates_, frequency = ts.compute_output_dates(self.df_, h)
187        self.level_ = level
188        self.lower_ = None  # do not remove (/!\)
189        self.upper_ = None  # do not remove (/!\)
190        self.sims_ = None  # do not remove (/!\)
191        self.level_ = level
192        self.alpha_ = 100 - level
193
194        pi_multiplier = norm.ppf(1 - self.alpha_ / 200)
195
196        # Named tuple for forecast results
197        DescribeResult = namedtuple(
198            "DescribeResult", ("mean", "lower", "upper")
199        )
200
201        if (
202            self.obj is not None
203        ):  # try all the special cases of the else section (there's probably a better way)
204            try:
205                (
206                    mean_forecast,
207                    lower_bound,
208                    upper_bound,
209                ) = self.obj.forecast_interval(
210                    self.obj.endog, steps=h, alpha=self.alpha_ / 100, **kwargs
211                )
212
213            except Exception as e:
214                try:
215                    forecast_result = self.obj.predict(steps=h)
216                    mean_forecast = forecast_result
217                    (
218                        lower_bound,
219                        upper_bound,
220                    ) = self._compute_confidence_intervals(
221                        forecast_result, alpha=self.alpha_ / 100, **kwargs
222                    )
223
224                except Exception as e:
225                    try:
226                        forecast_result = self.obj.get_forecast(steps=h)
227                        mean_forecast = forecast_result.predicted_mean
228                        lower_bound = forecast_result.conf_int()[:, 0]
229                        upper_bound = forecast_result.conf_int()[:, 1]
230
231                    except Exception as e:
232                        try:
233                            forecast_result = self.obj.forecast(steps=h)
234                            residuals = self.obj.resid
235                            std_errors = np.std(residuals)
236                            mean_forecast = forecast_result
237                            lower_bound = (
238                                forecast_result - pi_multiplier * std_errors
239                            )
240                            upper_bound = (
241                                forecast_result + pi_multiplier * std_errors
242                            )
243
244                        except Exception as e:
245                            try:
246                                mean_forecast = self.obj.forecast(
247                                    steps=h
248                                ).values
249                                forecast_result = self.obj.prediction_intervals(
250                                    steps=h, alpha=self.alpha_ / 100, **kwargs
251                                )
252                                lower_bound = forecast_result["lower"].values
253                                upper_bound = forecast_result["upper"].values
254                            except Exception:
255                                mean_forecast = self.obj.forecast(steps=h)
256                                forecast_result = self.obj.prediction_intervals(
257                                    steps=h, alpha=self.alpha_ / 100, **kwargs
258                                )
259                                lower_bound = forecast_result["lower"]
260                                upper_bound = forecast_result["upper"]
261
262        else:
263            if self.model == "VAR":
264                (
265                    mean_forecast,
266                    lower_bound,
267                    upper_bound,
268                ) = self.obj.forecast_interval(
269                    self.obj.endog, steps=h, alpha=self.alpha_ / 100, **kwargs
270                )
271
272            elif self.model == "VECM":
273                forecast_result = self.obj.predict(steps=h)
274                mean_forecast = forecast_result
275                lower_bound, upper_bound = self._compute_confidence_intervals(
276                    forecast_result, alpha=self.alpha_ / 100, **kwargs
277                )
278
279            elif self.model == "ARIMA":
280                forecast_result = self.obj.get_forecast(steps=h)
281                mean_forecast = forecast_result.predicted_mean
282                lower_bound = forecast_result.conf_int()[:, 0]
283                upper_bound = forecast_result.conf_int()[:, 1]
284
285            elif self.model == "ETS":
286                forecast_result = self.obj.forecast(steps=h)
287                residuals = self.obj.resid
288                std_errors = np.std(residuals)
289                mean_forecast = forecast_result
290                lower_bound = forecast_result - pi_multiplier * std_errors
291                upper_bound = forecast_result + pi_multiplier * std_errors
292
293            elif self.model == "Theta":
294                try:
295                    mean_forecast = self.obj.forecast(steps=h).values
296                    forecast_result = self.obj.prediction_intervals(
297                        steps=h, alpha=self.alpha_ / 100, **kwargs
298                    )
299                    lower_bound = forecast_result["lower"].values
300                    upper_bound = forecast_result["upper"].values
301                except Exception:
302                    mean_forecast = self.obj.forecast(steps=h)
303                    forecast_result = self.obj.prediction_intervals(
304                        steps=h, alpha=self.alpha_ / 100, **kwargs
305                    )
306                    lower_bound = forecast_result["lower"]
307                    upper_bound = forecast_result["upper"]
308
309            else:
310                raise ValueError("model not recognized")
311
312        try:
313            self.mean_ = pd.DataFrame(
314                mean_forecast,
315                columns=self.series_names,
316                index=self.output_dates_,
317            )
318            self.lower_ = pd.DataFrame(
319                lower_bound, columns=self.series_names, index=self.output_dates_
320            )
321            self.upper_ = pd.DataFrame(
322                upper_bound, columns=self.series_names, index=self.output_dates_
323            )
324        except Exception:
325            self.mean_ = pd.Series(
326                mean_forecast, name=self.series_names, index=self.output_dates_
327            )
328            self.lower_ = pd.Series(
329                lower_bound, name=self.series_names, index=self.output_dates_
330            )
331            self.upper_ = pd.Series(
332                upper_bound, name=self.series_names, index=self.output_dates_
333            )
334
335        return DescribeResult(
336            mean=self.mean_, lower=self.lower_, upper=self.upper_
337        )

Forecast all the time series, h steps ahead

Parameters:

h: {integer} Forecasting horizon

**kwargs: additional parameters to be passed to self.cook_test_set

Returns:

model predictions for horizon = h: {array-like}

def score(self, X, training_index, testing_index, scoring=None, **kwargs):
354    def score(self, X, training_index, testing_index, scoring=None, **kwargs):
355        """Train on training_index, score on testing_index."""
356
357        assert (
358            bool(set(training_index).intersection(set(testing_index))) == False
359        ), "Non-overlapping 'training_index' and 'testing_index' required"
360
361        # Dimensions
362        try:
363            # multivariate time series
364            n, p = X.shape
365        except:
366            # univariate time series
367            n = X.shape[0]
368            p = 1
369
370        # Training and testing sets
371        if p > 1:
372            X_train = X[training_index, :]
373            X_test = X[testing_index, :]
374        else:
375            X_train = X[training_index]
376            X_test = X[testing_index]
377
378        # Horizon
379        h = len(testing_index)
380        assert (
381            len(training_index) + h
382        ) <= n, "Please check lengths of training and testing windows"
383
384        # Fit and predict
385        self.fit(X_train, **kwargs)
386        preds = self.predict(h=h, **kwargs)
387
388        if scoring is None:
389            scoring = "neg_root_mean_squared_error"
390
391        # check inputs
392        assert scoring in (
393            "explained_variance",
394            "neg_mean_absolute_error",
395            "neg_mean_squared_error",
396            "neg_root_mean_squared_error",
397            "neg_mean_squared_log_error",
398            "neg_median_absolute_error",
399            "r2",
400        ), "'scoring' should be in ('explained_variance', 'neg_mean_absolute_error', \
401                               'neg_mean_squared_error', 'neg_root_mean_squared_error', 'neg_mean_squared_log_error', \
402                               'neg_median_absolute_error', 'r2')"
403
404        scoring_options = {
405            "explained_variance": skm2.explained_variance_score,
406            "neg_mean_absolute_error": skm2.mean_absolute_error,
407            "neg_mean_squared_error": lambda x, y: np.mean((x - y) ** 2),
408            "neg_root_mean_squared_error": lambda x, y: np.sqrt(
409                np.mean((x - y) ** 2)
410            ),
411            "neg_mean_squared_log_error": skm2.mean_squared_log_error,
412            "neg_median_absolute_error": skm2.median_absolute_error,
413            "r2": skm2.r2_score,
414        }
415
416        # if p > 1:
417        #     return tuple(
418        #         [
419        #             scoring_options[scoring](
420        #                 X_test[:, i], preds[:, i]#, **kwargs
421        #             )
422        #             for i in range(p)
423        #         ]
424        #     )
425        # else:
426        return scoring_options[scoring](X_test, preds)

Train on training_index, score on testing_index.

class CustomClassifier(nnetsauce.custom.custom.Custom, sklearn.base.ClassifierMixin):
 16class CustomClassifier(Custom, ClassifierMixin):
 17    """Custom Classification model
 18
 19    Attributes:
 20
 21        obj: object
 22            any object containing a method fit (obj.fit()) and a method predict
 23            (obj.predict())
 24
 25        n_hidden_features: int
 26            number of nodes in the hidden layer
 27
 28        activation_name: str
 29            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
 30
 31        a: float
 32            hyperparameter for 'prelu' or 'elu' activation function
 33
 34        nodes_sim: str
 35            type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
 36            'uniform'
 37
 38        bias: boolean
 39            indicates if the hidden layer contains a bias term (True) or not
 40            (False)
 41
 42        dropout: float
 43            regularization parameter; (random) percentage of nodes dropped out
 44            of the training
 45
 46        direct_link: boolean
 47            indicates if the original predictors are included (True) in model''s
 48            fitting or not (False)
 49
 50        n_clusters: int
 51            number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
 52                no clustering)
 53
 54        cluster_encode: bool
 55            defines how the variable containing clusters is treated (default is one-hot)
 56            if `False`, then labels are used, without one-hot encoding
 57
 58        type_clust: str
 59            type of clustering method: currently k-means ('kmeans') or Gaussian
 60            Mixture Model ('gmm')
 61
 62        type_scaling: a tuple of 3 strings
 63            scaling methods for inputs, hidden layer, and clustering respectively
 64            (and when relevant).
 65            Currently available: standardization ('std') or MinMax scaling ('minmax')
 66
 67        col_sample: float
 68            percentage of covariates randomly chosen for training
 69
 70        row_sample: float
 71            percentage of rows chosen for training, by stratified bootstrapping
 72
 73        cv_calibration: int, cross-validation generator, or iterable, default=2
 74            Determines the cross-validation splitting strategy. Same as
 75            `sklearn.calibration.CalibratedClassifierCV`
 76
 77        calibration_method: str
 78            {‘sigmoid’, ‘isotonic’}, default=’sigmoid’
 79            The method to use for calibration. Same as
 80            `sklearn.calibration.CalibratedClassifierCV`
 81
 82        seed: int
 83            reproducibility seed for nodes_sim=='uniform'
 84
 85        backend: str
 86            "cpu" or "gpu" or "tpu"
 87
 88    Examples:
 89
 90    Note: it's better to use the `DeepClassifier` or `LazyDeepClassifier` classes directly
 91
 92    ```python
 93    import nnetsauce as ns
 94    from sklearn.ensemble import RandomForestClassifier
 95    from sklearn.model_selection import train_test_split
 96    from sklearn.datasets import load_digits
 97    from time import time
 98
 99    digits = load_digits()
100    X = digits.data
101    y = digits.target
102    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
103                                                        random_state=123)
104
105    # layer 1 (base layer) ----
106    layer1_regr = RandomForestClassifier(n_estimators=10, random_state=123)
107
108    start = time()
109
110    layer1_regr.fit(X_train, y_train)
111
112    # Accuracy in layer 1
113    print(layer1_regr.score(X_test, y_test))
114
115    # layer 2 using layer 1 ----
116    layer2_regr = ns.CustomClassifier(obj = layer1_regr, n_hidden_features=5,
117                            direct_link=True, bias=True,
118                            nodes_sim='uniform', activation_name='relu',
119                            n_clusters=2, seed=123)
120    layer2_regr.fit(X_train, y_train)
121
122    # Accuracy in layer 2
123    print(layer2_regr.score(X_test, y_test))
124
125    # layer 3 using layer 2 ----
126    layer3_regr = ns.CustomClassifier(obj = layer2_regr, n_hidden_features=10,
127                            direct_link=True, bias=True, dropout=0.7,
128                            nodes_sim='uniform', activation_name='relu',
129                            n_clusters=2, seed=123)
130    layer3_regr.fit(X_train, y_train)
131
132    # Accuracy in layer 3
133    print(layer3_regr.score(X_test, y_test))
134
135    print(f"Elapsed {time() - start}")
136    ```
137
138    """
139
140    # construct the object -----
141    _estimator_type = "classifier"
142
143    def __init__(
144        self,
145        obj,
146        n_hidden_features=5,
147        activation_name="relu",
148        a=0.01,
149        nodes_sim="sobol",
150        bias=True,
151        dropout=0,
152        direct_link=True,
153        n_clusters=2,
154        cluster_encode=True,
155        type_clust="kmeans",
156        type_scaling=("std", "std", "std"),
157        col_sample=1,
158        row_sample=1,
159        cv_calibration=2,
160        calibration_method="sigmoid",
161        seed=123,
162        backend="cpu",
163    ):
164        super().__init__(
165            obj=obj,
166            n_hidden_features=n_hidden_features,
167            activation_name=activation_name,
168            a=a,
169            nodes_sim=nodes_sim,
170            bias=bias,
171            dropout=dropout,
172            direct_link=direct_link,
173            n_clusters=n_clusters,
174            cluster_encode=cluster_encode,
175            type_clust=type_clust,
176            type_scaling=type_scaling,
177            col_sample=col_sample,
178            row_sample=row_sample,
179            seed=seed,
180            backend=backend,
181        )
182        self.coef_ = None
183        self.intercept_ = None
184        self.type_fit = "classification"
185        self.cv_calibration = cv_calibration
186        self.calibration_method = calibration_method
187
188    def __sklearn_clone__(self):
189        """Create a clone of the estimator.
190
191        This is required for scikit-learn's calibration system to work properly.
192        """
193        # Create a new instance with the same parameters
194        clone = CustomClassifier(
195            obj=self.obj,
196            n_hidden_features=self.n_hidden_features,
197            activation_name=self.activation_name,
198            a=self.a,
199            nodes_sim=self.nodes_sim,
200            bias=self.bias,
201            dropout=self.dropout,
202            direct_link=self.direct_link,
203            n_clusters=self.n_clusters,
204            cluster_encode=self.cluster_encode,
205            type_clust=self.type_clust,
206            type_scaling=self.type_scaling,
207            col_sample=self.col_sample,
208            row_sample=self.row_sample,
209            cv_calibration=self.cv_calibration,
210            calibration_method=self.calibration_method,
211            seed=self.seed,
212            backend=self.backend,
213        )
214        return clone
215
216    def fit(self, X, y, sample_weight=None, **kwargs):
217        """Fit custom model to training data (X, y).
218
219        Parameters:
220
221            X: {array-like}, shape = [n_samples, n_features]
222                Training vectors, where n_samples is the number
223                of samples and n_features is the number of features.
224
225            y: array-like, shape = [n_samples]
226                Target values.
227
228            sample_weight: array-like, shape = [n_samples]
229                Sample weights.
230
231            **kwargs: additional parameters to be passed to
232                        self.cook_training_set or self.obj.fit
233
234        Returns:
235
236            self: object
237        """
238
239        if len(X.shape) == 1:
240            if isinstance(X, pd.DataFrame):
241                X = pd.DataFrame(X.values.reshape(1, -1), columns=X.columns)
242            else:
243                X = X.reshape(1, -1)
244
245        output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
246        self.classes_ = np.unique(y)
247        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
248
249        # Wrap in CalibratedClassifierCV if needed
250        if self.cv_calibration is not None:
251            self.obj = CalibratedClassifierCV(
252                self.obj, cv=self.cv_calibration, method=self.calibration_method
253            )
254
255        # if sample_weights, else: (must use self.row_index)
256        if sample_weight is not None:
257            self.obj.fit(
258                scaled_Z,
259                output_y,
260                sample_weight=sample_weight[self.index_row_].ravel(),
261                **kwargs
262            )
263            return self
264
265        # if sample_weight is None:
266        self.obj.fit(scaled_Z, output_y, **kwargs)
267        self.classes_ = np.unique(y)  # for compatibility with sklearn
268        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
269
270        if hasattr(self.obj, "coef_"):
271            self.coef_ = self.obj.coef_
272
273        if hasattr(self.obj, "intercept_"):
274            self.intercept_ = self.obj.intercept_
275
276        return self
277
278    def partial_fit(self, X, y, sample_weight=None, **kwargs):
279        """Partial fit custom model to training data (X, y).
280
281        Parameters:
282
283            X: {array-like}, shape = [n_samples, n_features]
284                Subset of training vectors, where n_samples is the number
285                of samples and n_features is the number of features.
286
287            y: array-like, shape = [n_samples]
288                Subset of target values.
289
290            sample_weight: array-like, shape = [n_samples]
291                Sample weights.
292
293            **kwargs: additional parameters to be passed to
294                        self.cook_training_set or self.obj.fit
295
296        Returns:
297
298            self: object
299        """
300
301        if len(X.shape) == 1:
302            if isinstance(X, pd.DataFrame):
303                X = pd.DataFrame(X.values.reshape(1, -1), columns=X.columns)
304            else:
305                X = X.reshape(1, -1)
306            y = np.array([y], dtype=np.integer)
307
308        output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
309        self.n_classes_ = len(np.unique(y))  # for compatibility with sklearn
310
311        # if sample_weights, else: (must use self.row_index)
312        if sample_weight is not None:
313            try:
314                self.obj.partial_fit(
315                    scaled_Z,
316                    output_y,
317                    sample_weight=sample_weight[self.index_row_].ravel(),
318                    # **kwargs
319                )
320            except:
321                NotImplementedError
322
323            return self
324
325        # if sample_weight is None:
326        # try:
327        self.obj.partial_fit(scaled_Z, output_y)
328        # except:
329        #    raise NotImplementedError
330
331        self.classes_ = np.unique(y)  # for compatibility with sklearn
332        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
333
334        return self
335
336    def predict(self, X, **kwargs):
337        """Predict test data X.
338
339        Parameters:
340
341            X: {array-like}, shape = [n_samples, n_features]
342                Training vectors, where n_samples is the number
343                of samples and n_features is the number of features.
344
345            **kwargs: additional parameters to be passed to
346                    self.cook_test_set
347
348        Returns:
349
350            model predictions: {array-like}
351        """
352
353        if len(X.shape) == 1:
354            n_features = X.shape[0]
355            new_X = mo.rbind(
356                X.reshape(1, n_features),
357                np.ones(n_features).reshape(1, n_features),
358            )
359
360            return (
361                self.obj.predict(self.cook_test_set(new_X, **kwargs), **kwargs)
362            )[0]
363
364        return self.obj.predict(self.cook_test_set(X, **kwargs), **kwargs)
365
366    def predict_proba(self, X, **kwargs):
367        """Predict probabilities for test data X.
368
369        Args:
370
371            X: {array-like}, shape = [n_samples, n_features]
372                Training vectors, where n_samples is the number
373                of samples and n_features is the number of features.
374
375            **kwargs: additional parameters to be passed to
376                    self.cook_test_set
377
378        Returns:
379
380            probability estimates for test data: {array-like}
381        """
382
383        if len(X.shape) == 1:
384            n_features = X.shape[0]
385            new_X = mo.rbind(
386                X.reshape(1, n_features),
387                np.ones(n_features).reshape(1, n_features),
388            )
389            return (
390                self.obj.predict_proba(
391                    self.cook_test_set(new_X, **kwargs), **kwargs
392                )
393            )[0]
394        return self.obj.predict_proba(self.cook_test_set(X, **kwargs), **kwargs)
395
396    def decision_function(self, X, **kwargs):
397        """Compute the decision function of X.
398
399        Parameters:
400            X: {array-like}, shape = [n_samples, n_features]
401                Samples to compute decision function for.
402
403            **kwargs: additional parameters to be passed to
404                    self.cook_test_set
405
406        Returns:
407            array-like of shape (n_samples,) or (n_samples, n_classes)
408            Decision function of the input samples. The order of outputs is the same
409            as that of the classes passed to fit.
410        """
411        if not hasattr(self.obj, "decision_function"):
412            # If base classifier doesn't have decision_function, use predict_proba
413            proba = self.predict_proba(X, **kwargs)
414            if proba.shape[1] == 2:
415                return proba[:, 1]  # For binary classification
416            return proba  # For multiclass
417
418        if len(X.shape) == 1:
419            n_features = X.shape[0]
420            new_X = mo.rbind(
421                X.reshape(1, n_features),
422                np.ones(n_features).reshape(1, n_features),
423            )
424
425            return (
426                self.obj.decision_function(
427                    self.cook_test_set(new_X, **kwargs), **kwargs
428                )
429            )[0]
430
431        return self.obj.decision_function(
432            self.cook_test_set(X, **kwargs), **kwargs
433        )
434
435    def score(self, X, y, scoring=None):
436        """Scoring function for classification.
437
438        Args:
439
440            X: {array-like}, shape = [n_samples, n_features]
441                Training vectors, where n_samples is the number
442                of samples and n_features is the number of features.
443
444            y: array-like, shape = [n_samples]
445                Target values.
446
447            scoring: str
448                scoring method (default is accuracy)
449
450        Returns:
451
452            score: float
453        """
454
455        if scoring is None:
456            scoring = "accuracy"
457
458        if scoring == "accuracy":
459            return skm2.accuracy_score(y, self.predict(X))
460
461        if scoring == "f1":
462            return skm2.f1_score(y, self.predict(X))
463
464        if scoring == "precision":
465            return skm2.precision_score(y, self.predict(X))
466
467        if scoring == "recall":
468            return skm2.recall_score(y, self.predict(X))
469
470        if scoring == "roc_auc":
471            return skm2.roc_auc_score(y, self.predict(X))
472
473        if scoring == "log_loss":
474            return skm2.log_loss(y, self.predict_proba(X))
475
476        if scoring == "balanced_accuracy":
477            return skm2.balanced_accuracy_score(y, self.predict(X))
478
479        if scoring == "average_precision":
480            return skm2.average_precision_score(y, self.predict(X))
481
482        if scoring == "neg_brier_score":
483            return -skm2.brier_score_loss(y, self.predict_proba(X))
484
485        if scoring == "neg_log_loss":
486            return -skm2.log_loss(y, self.predict_proba(X))
487
488    @property
489    def _estimator_type(self):
490        return "classifier"

Custom Classification model

Attributes:

obj: object
    any object containing a method fit (obj.fit()) and a method predict
    (obj.predict())

n_hidden_features: int
    number of nodes in the hidden layer

activation_name: str
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'

a: float
    hyperparameter for 'prelu' or 'elu' activation function

nodes_sim: str
    type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
    'uniform'

bias: boolean
    indicates if the hidden layer contains a bias term (True) or not
    (False)

dropout: float
    regularization parameter; (random) percentage of nodes dropped out
    of the training

direct_link: boolean
    indicates if the original predictors are included (True) in model''s
    fitting or not (False)

n_clusters: int
    number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
        no clustering)

cluster_encode: bool
    defines how the variable containing clusters is treated (default is one-hot)
    if `False`, then labels are used, without one-hot encoding

type_clust: str
    type of clustering method: currently k-means ('kmeans') or Gaussian
    Mixture Model ('gmm')

type_scaling: a tuple of 3 strings
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax')

col_sample: float
    percentage of covariates randomly chosen for training

row_sample: float
    percentage of rows chosen for training, by stratified bootstrapping

cv_calibration: int, cross-validation generator, or iterable, default=2
    Determines the cross-validation splitting strategy. Same as
    `sklearn.calibration.CalibratedClassifierCV`

calibration_method: str
    {‘sigmoid’, ‘isotonic’}, default=’sigmoid’
    The method to use for calibration. Same as
    `sklearn.calibration.CalibratedClassifierCV`

seed: int
    reproducibility seed for nodes_sim=='uniform'

backend: str
    "cpu" or "gpu" or "tpu"

Examples:

Note: it's better to use the DeepClassifier or LazyDeepClassifier classes directly

import nnetsauce as ns
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_digits
from time import time

digits = load_digits()
X = digits.data
y = digits.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
                                                    random_state=123)

# layer 1 (base layer) ----
layer1_regr = RandomForestClassifier(n_estimators=10, random_state=123)

start = time()

layer1_regr.fit(X_train, y_train)

# Accuracy in layer 1
print(layer1_regr.score(X_test, y_test))

# layer 2 using layer 1 ----
layer2_regr = ns.CustomClassifier(obj = layer1_regr, n_hidden_features=5,
                        direct_link=True, bias=True,
                        nodes_sim='uniform', activation_name='relu',
                        n_clusters=2, seed=123)
layer2_regr.fit(X_train, y_train)

# Accuracy in layer 2
print(layer2_regr.score(X_test, y_test))

# layer 3 using layer 2 ----
layer3_regr = ns.CustomClassifier(obj = layer2_regr, n_hidden_features=10,
                        direct_link=True, bias=True, dropout=0.7,
                        nodes_sim='uniform', activation_name='relu',
                        n_clusters=2, seed=123)
layer3_regr.fit(X_train, y_train)

# Accuracy in layer 3
print(layer3_regr.score(X_test, y_test))

print(f"Elapsed {time() - start}")
def fit(self, X, y, sample_weight=None, **kwargs):
216    def fit(self, X, y, sample_weight=None, **kwargs):
217        """Fit custom model to training data (X, y).
218
219        Parameters:
220
221            X: {array-like}, shape = [n_samples, n_features]
222                Training vectors, where n_samples is the number
223                of samples and n_features is the number of features.
224
225            y: array-like, shape = [n_samples]
226                Target values.
227
228            sample_weight: array-like, shape = [n_samples]
229                Sample weights.
230
231            **kwargs: additional parameters to be passed to
232                        self.cook_training_set or self.obj.fit
233
234        Returns:
235
236            self: object
237        """
238
239        if len(X.shape) == 1:
240            if isinstance(X, pd.DataFrame):
241                X = pd.DataFrame(X.values.reshape(1, -1), columns=X.columns)
242            else:
243                X = X.reshape(1, -1)
244
245        output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
246        self.classes_ = np.unique(y)
247        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
248
249        # Wrap in CalibratedClassifierCV if needed
250        if self.cv_calibration is not None:
251            self.obj = CalibratedClassifierCV(
252                self.obj, cv=self.cv_calibration, method=self.calibration_method
253            )
254
255        # if sample_weights, else: (must use self.row_index)
256        if sample_weight is not None:
257            self.obj.fit(
258                scaled_Z,
259                output_y,
260                sample_weight=sample_weight[self.index_row_].ravel(),
261                **kwargs
262            )
263            return self
264
265        # if sample_weight is None:
266        self.obj.fit(scaled_Z, output_y, **kwargs)
267        self.classes_ = np.unique(y)  # for compatibility with sklearn
268        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
269
270        if hasattr(self.obj, "coef_"):
271            self.coef_ = self.obj.coef_
272
273        if hasattr(self.obj, "intercept_"):
274            self.intercept_ = self.obj.intercept_
275
276        return self

Fit custom model to training data (X, y).

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

sample_weight: array-like, shape = [n_samples]
    Sample weights.

**kwargs: additional parameters to be passed to
            self.cook_training_set or self.obj.fit

Returns:

self: object
def predict(self, X, **kwargs):
336    def predict(self, X, **kwargs):
337        """Predict test data X.
338
339        Parameters:
340
341            X: {array-like}, shape = [n_samples, n_features]
342                Training vectors, where n_samples is the number
343                of samples and n_features is the number of features.
344
345            **kwargs: additional parameters to be passed to
346                    self.cook_test_set
347
348        Returns:
349
350            model predictions: {array-like}
351        """
352
353        if len(X.shape) == 1:
354            n_features = X.shape[0]
355            new_X = mo.rbind(
356                X.reshape(1, n_features),
357                np.ones(n_features).reshape(1, n_features),
358            )
359
360            return (
361                self.obj.predict(self.cook_test_set(new_X, **kwargs), **kwargs)
362            )[0]
363
364        return self.obj.predict(self.cook_test_set(X, **kwargs), **kwargs)

Predict test data X.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

model predictions: {array-like}
def predict_proba(self, X, **kwargs):
366    def predict_proba(self, X, **kwargs):
367        """Predict probabilities for test data X.
368
369        Args:
370
371            X: {array-like}, shape = [n_samples, n_features]
372                Training vectors, where n_samples is the number
373                of samples and n_features is the number of features.
374
375            **kwargs: additional parameters to be passed to
376                    self.cook_test_set
377
378        Returns:
379
380            probability estimates for test data: {array-like}
381        """
382
383        if len(X.shape) == 1:
384            n_features = X.shape[0]
385            new_X = mo.rbind(
386                X.reshape(1, n_features),
387                np.ones(n_features).reshape(1, n_features),
388            )
389            return (
390                self.obj.predict_proba(
391                    self.cook_test_set(new_X, **kwargs), **kwargs
392                )
393            )[0]
394        return self.obj.predict_proba(self.cook_test_set(X, **kwargs), **kwargs)

Predict probabilities for test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

probability estimates for test data: {array-like}
def score(self, X, y, scoring=None):
435    def score(self, X, y, scoring=None):
436        """Scoring function for classification.
437
438        Args:
439
440            X: {array-like}, shape = [n_samples, n_features]
441                Training vectors, where n_samples is the number
442                of samples and n_features is the number of features.
443
444            y: array-like, shape = [n_samples]
445                Target values.
446
447            scoring: str
448                scoring method (default is accuracy)
449
450        Returns:
451
452            score: float
453        """
454
455        if scoring is None:
456            scoring = "accuracy"
457
458        if scoring == "accuracy":
459            return skm2.accuracy_score(y, self.predict(X))
460
461        if scoring == "f1":
462            return skm2.f1_score(y, self.predict(X))
463
464        if scoring == "precision":
465            return skm2.precision_score(y, self.predict(X))
466
467        if scoring == "recall":
468            return skm2.recall_score(y, self.predict(X))
469
470        if scoring == "roc_auc":
471            return skm2.roc_auc_score(y, self.predict(X))
472
473        if scoring == "log_loss":
474            return skm2.log_loss(y, self.predict_proba(X))
475
476        if scoring == "balanced_accuracy":
477            return skm2.balanced_accuracy_score(y, self.predict(X))
478
479        if scoring == "average_precision":
480            return skm2.average_precision_score(y, self.predict(X))
481
482        if scoring == "neg_brier_score":
483            return -skm2.brier_score_loss(y, self.predict_proba(X))
484
485        if scoring == "neg_log_loss":
486            return -skm2.log_loss(y, self.predict_proba(X))

Scoring function for classification.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

scoring: str
    scoring method (default is accuracy)

Returns:

score: float
class CustomRegressor(nnetsauce.custom.custom.Custom, sklearn.base.RegressorMixin):
 18class CustomRegressor(Custom, RegressorMixin):
 19    """Custom Regression model
 20
 21    This class is used to 'augment' any regression model with transformed features.
 22
 23    Parameters:
 24
 25        obj: object
 26            any object containing a method fit (obj.fit()) and a method predict
 27            (obj.predict())
 28
 29        n_hidden_features: int
 30            number of nodes in the hidden layer
 31
 32        activation_name: str
 33            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
 34
 35        a: float
 36            hyperparameter for 'prelu' or 'elu' activation function
 37
 38        nodes_sim: str
 39            type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
 40            'uniform'
 41
 42        bias: boolean
 43            indicates if the hidden layer contains a bias term (True) or not
 44            (False)
 45
 46        dropout: float
 47            regularization parameter; (random) percentage of nodes dropped out
 48            of the training
 49
 50        direct_link: boolean
 51            indicates if the original predictors are included (True) in model's
 52            fitting or not (False)
 53
 54        n_clusters: int
 55            number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
 56                no clustering)
 57
 58        cluster_encode: bool
 59            defines how the variable containing clusters is treated (default is one-hot)
 60            if `False`, then labels are used, without one-hot encoding
 61
 62        type_clust: str
 63            type of clustering method: currently k-means ('kmeans') or Gaussian
 64            Mixture Model ('gmm')
 65
 66        type_scaling: a tuple of 3 strings
 67            scaling methods for inputs, hidden layer, and clustering respectively
 68            (and when relevant).
 69            Currently available: standardization ('std') or MinMax scaling ('minmax')
 70
 71        type_pi: str.
 72            type of prediction interval; currently `None` (split or local
 73            conformal without simulation), "kde" or "bootstrap" (simulated split
 74            conformal).
 75
 76        replications: int.
 77            number of replications (if needed) for predictive simulation.
 78            Used only in `self.predict`, for `self.kernel` in ('gaussian',
 79            'tophat') and `self.type_pi = 'kde'`. Default is `None`.
 80
 81        kernel: str.
 82            the kernel to use for kernel density estimation (used for predictive
 83            simulation in `self.predict`, with `method='splitconformal'` and
 84            `type_pi = 'kde'`). Currently, either 'gaussian' or 'tophat'.
 85
 86        type_split: str.
 87            Type of splitting for conformal prediction. None (default), or
 88            "random" (random split of data) or "sequential" (sequential split of data)
 89
 90        col_sample: float
 91            percentage of covariates randomly chosen for training
 92
 93        row_sample: float
 94            percentage of rows chosen for training, by stratified bootstrapping
 95
 96        level: float
 97            confidence level for prediction intervals
 98
 99        pi_method: str
100            method for prediction intervals: 'splitconformal' or 'localconformal'
101
102        seed: int
103            reproducibility seed for nodes_sim=='uniform'
104
105        type_fit: str
106            'regression'
107
108        backend: str
109            "cpu" or "gpu" or "tpu"
110
111    Examples:
112
113    See [https://thierrymoudiki.github.io/blog/2024/03/18/python/conformal-and-bayesian-regression](https://thierrymoudiki.github.io/blog/2024/03/18/python/conformal-and-bayesian-regression)
114
115    """
116
117    # construct the object -----
118
119    def __init__(
120        self,
121        obj,
122        n_hidden_features=5,
123        activation_name="relu",
124        a=0.01,
125        nodes_sim="sobol",
126        bias=True,
127        dropout=0,
128        direct_link=True,
129        n_clusters=2,
130        cluster_encode=True,
131        type_clust="kmeans",
132        type_scaling=("std", "std", "std"),
133        type_pi=None,
134        replications=None,
135        kernel=None,
136        type_split=None,
137        col_sample=1,
138        row_sample=1,
139        level=None,
140        pi_method=None,
141        seed=123,
142        backend="cpu",
143    ):
144        super().__init__(
145            obj=obj,
146            n_hidden_features=n_hidden_features,
147            activation_name=activation_name,
148            a=a,
149            nodes_sim=nodes_sim,
150            bias=bias,
151            dropout=dropout,
152            direct_link=direct_link,
153            n_clusters=n_clusters,
154            cluster_encode=cluster_encode,
155            type_clust=type_clust,
156            type_scaling=type_scaling,
157            col_sample=col_sample,
158            row_sample=row_sample,
159            seed=seed,
160            backend=backend,
161        )
162
163        self.type_fit = "regression"
164        self.type_pi = type_pi
165        self.replications = replications
166        self.kernel = kernel
167        self.type_split = type_split
168        self.level = level
169        self.pi_method = pi_method
170        self.coef_ = None
171        self.intercept_ = None
172        self.X_ = None
173        self.y_ = None
174        self.aic_ = None
175        self.aicc_ = None
176        self.bic_ = None
177
178    def fit(self, X, y, sample_weight=None, **kwargs):
179        """Fit custom model to training data (X, y).
180
181        Parameters:
182
183            X: {array-like}, shape = [n_samples, n_features]
184                Training vectors, where n_samples is the number
185                of samples and n_features is the number of features.
186
187            y: array-like, shape = [n_samples]
188                Target values.
189
190            sample_weight: array-like, shape = [n_samples]
191                Sample weights.
192
193            **kwargs: additional parameters to be passed to
194                self.cook_training_set or self.obj.fit
195
196        Returns:
197
198            self: object
199
200        """
201
202        centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
203
204        if self.level is not None:
205            self.obj = PredictionInterval(
206                obj=self.obj, method=self.pi_method, level=self.level
207            )
208
209        # if sample_weights, else: (must use self.row_index)
210        if sample_weight is not None:
211            self.obj.fit(
212                scaled_Z,
213                centered_y,
214                sample_weight=sample_weight[self.index_row_].ravel(),
215                **kwargs
216            )
217
218            return self
219
220        self.obj.fit(scaled_Z, centered_y, **kwargs)
221
222        self.X_ = X
223
224        self.y_ = y
225
226        # Compute SSE
227        centered_y_pred = self.obj.predict(scaled_Z)
228        self.sse_ = np.sum((centered_y - centered_y_pred) ** 2)
229
230        # Get number of parameters
231        n_params = (
232            self.n_hidden_features + X.shape[1]
233        )  # hidden features + original features
234        if self.n_clusters > 0:
235            n_params += self.n_clusters  # add clusters if used
236
237        # Compute information criteria
238        n_samples = X.shape[0]
239        temp = n_samples * np.log(self.sse_ / n_samples)
240        self.aic_ = temp + 2 * n_params
241        self.bic_ = temp + np.log(n_samples) * n_params
242
243        if hasattr(self.obj, "coef_"):
244            self.coef_ = self.obj.coef_
245
246        if hasattr(self.obj, "intercept_"):
247            self.intercept_ = self.obj.intercept_
248
249        return self
250
251    def partial_fit(self, X, y, **kwargs):
252        """Partial fit custom model to training data (X, y).
253
254        Parameters:
255
256            X: {array-like}, shape = [n_samples, n_features]
257                Subset of training vectors, where n_samples is the number
258                of samples and n_features is the number of features.
259
260            y: array-like, shape = [n_samples]
261                Subset of target values.
262
263            **kwargs: additional parameters to be passed to
264                self.cook_training_set or self.obj.fit
265
266        Returns:
267
268            self: object
269
270        """
271
272        if len(X.shape) == 1:
273            if isinstance(X, pd.DataFrame):
274                X = pd.DataFrame(X.values.reshape(1, -1), columns=X.columns)
275            else:
276                X = X.reshape(1, -1)
277            y = np.array([y])
278
279        centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
280
281        self.obj.partial_fit(scaled_Z, centered_y, **kwargs)
282
283        self.X_ = X
284
285        self.y_ = y
286
287        return self
288
289    def predict(self, X, level=95, method="splitconformal", **kwargs):
290        """Predict test data X.
291
292        Parameters:
293
294            X: {array-like}, shape = [n_samples, n_features]
295                Training vectors, where n_samples is the number
296                of samples and n_features is the number of features.
297
298            level: int
299                Level of confidence (default = 95)
300
301            method: str
302                'splitconformal', 'localconformal'
303                prediction (if you specify `return_pi = True`)
304
305            **kwargs: additional parameters
306                    `return_pi = True` for conformal prediction,
307                    with `method` in ('splitconformal', 'localconformal')
308                    or `return_std = True` for `self.obj` in
309                    (`sklearn.linear_model.BayesianRidge`,
310                    `sklearn.linear_model.ARDRegressor`,
311                    `sklearn.gaussian_process.GaussianProcessRegressor`)`
312
313        Returns:
314
315            model predictions:
316                an array if uncertainty quantification is not requested,
317                  or a tuple if with prediction intervals and simulations
318                  if `return_std = True` (mean, standard deviation,
319                  lower and upper prediction interval) or `return_pi = True`
320                  ()
321
322        """
323
324        if "return_std" in kwargs:
325            alpha = 100 - level
326            pi_multiplier = norm.ppf(1 - alpha / 200)
327
328            if len(X.shape) == 1:
329                n_features = X.shape[0]
330                new_X = mo.rbind(
331                    X.reshape(1, n_features),
332                    np.ones(n_features).reshape(1, n_features),
333                )
334
335                mean_, std_ = self.obj.predict(
336                    self.cook_test_set(new_X, **kwargs), return_std=True
337                )[0]
338
339                preds = self.y_mean_ + mean_
340                lower = self.y_mean_ + (mean_ - pi_multiplier * std_)
341                upper = self.y_mean_ + (mean_ + pi_multiplier * std_)
342
343                DescribeResults = namedtuple(
344                    "DescribeResults", ["mean", "std", "lower", "upper"]
345                )
346
347                return DescribeResults(preds, std_, lower, upper)
348
349            # len(X.shape) > 1
350            mean_, std_ = self.obj.predict(
351                self.cook_test_set(X, **kwargs), return_std=True
352            )
353
354            preds = self.y_mean_ + mean_
355            lower = self.y_mean_ + (mean_ - pi_multiplier * std_)
356            upper = self.y_mean_ + (mean_ + pi_multiplier * std_)
357
358            DescribeResults = namedtuple(
359                "DescribeResults", ["mean", "std", "lower", "upper"]
360            )
361
362            return DescribeResults(preds, std_, lower, upper)
363
364        if "return_pi" in kwargs:
365            assert method in (
366                "splitconformal",
367                "localconformal",
368            ), "method must be in ('splitconformal', 'localconformal')"
369            self.pi = PredictionInterval(
370                obj=self,
371                method=method,
372                level=level,
373                type_pi=self.type_pi,
374                replications=self.replications,
375                kernel=self.kernel,
376            )
377
378            if len(self.X_.shape) == 1:
379                if isinstance(X, pd.DataFrame):
380                    self.X_ = pd.DataFrame(
381                        self.X_.values.reshape(1, -1), columns=self.X_.columns
382                    )
383                else:
384                    self.X_ = self.X_.reshape(1, -1)
385                self.y_ = np.array([self.y_])
386
387            self.pi.fit(self.X_, self.y_)
388            # self.X_ = None # consumes memory to keep, dangerous to delete (side effect)
389            # self.y_ = None # consumes memory to keep, dangerous to delete (side effect)
390            preds = self.pi.predict(X, return_pi=True)
391            return preds
392
393        # "return_std" not in kwargs
394        if len(X.shape) == 1:
395            n_features = X.shape[0]
396            new_X = mo.rbind(
397                X.reshape(1, n_features),
398                np.ones(n_features).reshape(1, n_features),
399            )
400
401            return (
402                self.y_mean_
403                + self.obj.predict(
404                    self.cook_test_set(new_X, **kwargs), **kwargs
405                )
406            )[0]
407
408        # len(X.shape) > 1
409        return self.y_mean_ + self.obj.predict(
410            self.cook_test_set(X, **kwargs), **kwargs
411        )
412
413    def score(self, X, y, scoring=None):
414        """Compute the score of the model.
415
416        Parameters:
417
418            X: {array-like}, shape = [n_samples, n_features]
419                Training vectors, where n_samples is the number
420                of samples and n_features is the number of features.
421
422            y: array-like, shape = [n_samples]
423                Target values.
424
425            scoring: str
426                scoring method
427
428        Returns:
429
430            score: float
431
432        """
433
434        if scoring is None:
435            return np.sqrt(np.mean((self.predict(X) - y) ** 2))
436
437        return skm2.get_scorer(scoring)(self, X, y)

Custom Regression model

This class is used to 'augment' any regression model with transformed features.

Parameters:

obj: object
    any object containing a method fit (obj.fit()) and a method predict
    (obj.predict())

n_hidden_features: int
    number of nodes in the hidden layer

activation_name: str
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'

a: float
    hyperparameter for 'prelu' or 'elu' activation function

nodes_sim: str
    type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
    'uniform'

bias: boolean
    indicates if the hidden layer contains a bias term (True) or not
    (False)

dropout: float
    regularization parameter; (random) percentage of nodes dropped out
    of the training

direct_link: boolean
    indicates if the original predictors are included (True) in model's
    fitting or not (False)

n_clusters: int
    number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
        no clustering)

cluster_encode: bool
    defines how the variable containing clusters is treated (default is one-hot)
    if `False`, then labels are used, without one-hot encoding

type_clust: str
    type of clustering method: currently k-means ('kmeans') or Gaussian
    Mixture Model ('gmm')

type_scaling: a tuple of 3 strings
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax')

type_pi: str.
    type of prediction interval; currently `None` (split or local
    conformal without simulation), "kde" or "bootstrap" (simulated split
    conformal).

replications: int.
    number of replications (if needed) for predictive simulation.
    Used only in `self.predict`, for `self.kernel` in ('gaussian',
    'tophat') and `self.type_pi = 'kde'`. Default is `None`.

kernel: str.
    the kernel to use for kernel density estimation (used for predictive
    simulation in `self.predict`, with `method='splitconformal'` and
    `type_pi = 'kde'`). Currently, either 'gaussian' or 'tophat'.

type_split: str.
    Type of splitting for conformal prediction. None (default), or
    "random" (random split of data) or "sequential" (sequential split of data)

col_sample: float
    percentage of covariates randomly chosen for training

row_sample: float
    percentage of rows chosen for training, by stratified bootstrapping

level: float
    confidence level for prediction intervals

pi_method: str
    method for prediction intervals: 'splitconformal' or 'localconformal'

seed: int
    reproducibility seed for nodes_sim=='uniform'

type_fit: str
    'regression'

backend: str
    "cpu" or "gpu" or "tpu"

Examples:

See https://thierrymoudiki.github.io/blog/2024/03/18/python/conformal-and-bayesian-regression

def fit(self, X, y, sample_weight=None, **kwargs):
178    def fit(self, X, y, sample_weight=None, **kwargs):
179        """Fit custom model to training data (X, y).
180
181        Parameters:
182
183            X: {array-like}, shape = [n_samples, n_features]
184                Training vectors, where n_samples is the number
185                of samples and n_features is the number of features.
186
187            y: array-like, shape = [n_samples]
188                Target values.
189
190            sample_weight: array-like, shape = [n_samples]
191                Sample weights.
192
193            **kwargs: additional parameters to be passed to
194                self.cook_training_set or self.obj.fit
195
196        Returns:
197
198            self: object
199
200        """
201
202        centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
203
204        if self.level is not None:
205            self.obj = PredictionInterval(
206                obj=self.obj, method=self.pi_method, level=self.level
207            )
208
209        # if sample_weights, else: (must use self.row_index)
210        if sample_weight is not None:
211            self.obj.fit(
212                scaled_Z,
213                centered_y,
214                sample_weight=sample_weight[self.index_row_].ravel(),
215                **kwargs
216            )
217
218            return self
219
220        self.obj.fit(scaled_Z, centered_y, **kwargs)
221
222        self.X_ = X
223
224        self.y_ = y
225
226        # Compute SSE
227        centered_y_pred = self.obj.predict(scaled_Z)
228        self.sse_ = np.sum((centered_y - centered_y_pred) ** 2)
229
230        # Get number of parameters
231        n_params = (
232            self.n_hidden_features + X.shape[1]
233        )  # hidden features + original features
234        if self.n_clusters > 0:
235            n_params += self.n_clusters  # add clusters if used
236
237        # Compute information criteria
238        n_samples = X.shape[0]
239        temp = n_samples * np.log(self.sse_ / n_samples)
240        self.aic_ = temp + 2 * n_params
241        self.bic_ = temp + np.log(n_samples) * n_params
242
243        if hasattr(self.obj, "coef_"):
244            self.coef_ = self.obj.coef_
245
246        if hasattr(self.obj, "intercept_"):
247            self.intercept_ = self.obj.intercept_
248
249        return self

Fit custom model to training data (X, y).

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

sample_weight: array-like, shape = [n_samples]
    Sample weights.

**kwargs: additional parameters to be passed to
    self.cook_training_set or self.obj.fit

Returns:

self: object
def predict(self, X, level=95, method='splitconformal', **kwargs):
289    def predict(self, X, level=95, method="splitconformal", **kwargs):
290        """Predict test data X.
291
292        Parameters:
293
294            X: {array-like}, shape = [n_samples, n_features]
295                Training vectors, where n_samples is the number
296                of samples and n_features is the number of features.
297
298            level: int
299                Level of confidence (default = 95)
300
301            method: str
302                'splitconformal', 'localconformal'
303                prediction (if you specify `return_pi = True`)
304
305            **kwargs: additional parameters
306                    `return_pi = True` for conformal prediction,
307                    with `method` in ('splitconformal', 'localconformal')
308                    or `return_std = True` for `self.obj` in
309                    (`sklearn.linear_model.BayesianRidge`,
310                    `sklearn.linear_model.ARDRegressor`,
311                    `sklearn.gaussian_process.GaussianProcessRegressor`)`
312
313        Returns:
314
315            model predictions:
316                an array if uncertainty quantification is not requested,
317                  or a tuple if with prediction intervals and simulations
318                  if `return_std = True` (mean, standard deviation,
319                  lower and upper prediction interval) or `return_pi = True`
320                  ()
321
322        """
323
324        if "return_std" in kwargs:
325            alpha = 100 - level
326            pi_multiplier = norm.ppf(1 - alpha / 200)
327
328            if len(X.shape) == 1:
329                n_features = X.shape[0]
330                new_X = mo.rbind(
331                    X.reshape(1, n_features),
332                    np.ones(n_features).reshape(1, n_features),
333                )
334
335                mean_, std_ = self.obj.predict(
336                    self.cook_test_set(new_X, **kwargs), return_std=True
337                )[0]
338
339                preds = self.y_mean_ + mean_
340                lower = self.y_mean_ + (mean_ - pi_multiplier * std_)
341                upper = self.y_mean_ + (mean_ + pi_multiplier * std_)
342
343                DescribeResults = namedtuple(
344                    "DescribeResults", ["mean", "std", "lower", "upper"]
345                )
346
347                return DescribeResults(preds, std_, lower, upper)
348
349            # len(X.shape) > 1
350            mean_, std_ = self.obj.predict(
351                self.cook_test_set(X, **kwargs), return_std=True
352            )
353
354            preds = self.y_mean_ + mean_
355            lower = self.y_mean_ + (mean_ - pi_multiplier * std_)
356            upper = self.y_mean_ + (mean_ + pi_multiplier * std_)
357
358            DescribeResults = namedtuple(
359                "DescribeResults", ["mean", "std", "lower", "upper"]
360            )
361
362            return DescribeResults(preds, std_, lower, upper)
363
364        if "return_pi" in kwargs:
365            assert method in (
366                "splitconformal",
367                "localconformal",
368            ), "method must be in ('splitconformal', 'localconformal')"
369            self.pi = PredictionInterval(
370                obj=self,
371                method=method,
372                level=level,
373                type_pi=self.type_pi,
374                replications=self.replications,
375                kernel=self.kernel,
376            )
377
378            if len(self.X_.shape) == 1:
379                if isinstance(X, pd.DataFrame):
380                    self.X_ = pd.DataFrame(
381                        self.X_.values.reshape(1, -1), columns=self.X_.columns
382                    )
383                else:
384                    self.X_ = self.X_.reshape(1, -1)
385                self.y_ = np.array([self.y_])
386
387            self.pi.fit(self.X_, self.y_)
388            # self.X_ = None # consumes memory to keep, dangerous to delete (side effect)
389            # self.y_ = None # consumes memory to keep, dangerous to delete (side effect)
390            preds = self.pi.predict(X, return_pi=True)
391            return preds
392
393        # "return_std" not in kwargs
394        if len(X.shape) == 1:
395            n_features = X.shape[0]
396            new_X = mo.rbind(
397                X.reshape(1, n_features),
398                np.ones(n_features).reshape(1, n_features),
399            )
400
401            return (
402                self.y_mean_
403                + self.obj.predict(
404                    self.cook_test_set(new_X, **kwargs), **kwargs
405                )
406            )[0]
407
408        # len(X.shape) > 1
409        return self.y_mean_ + self.obj.predict(
410            self.cook_test_set(X, **kwargs), **kwargs
411        )

Predict test data X.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

level: int
    Level of confidence (default = 95)

method: str
    'splitconformal', 'localconformal'
    prediction (if you specify `return_pi = True`)

**kwargs: additional parameters
        `return_pi = True` for conformal prediction,
        with `method` in ('splitconformal', 'localconformal')
        or `return_std = True` for `self.obj` in
        (`sklearn.linear_model.BayesianRidge`,
        `sklearn.linear_model.ARDRegressor`,
        `sklearn.gaussian_process.GaussianProcessRegressor`)`

Returns:

model predictions:
    an array if uncertainty quantification is not requested,
      or a tuple if with prediction intervals and simulations
      if `return_std = True` (mean, standard deviation,
      lower and upper prediction interval) or `return_pi = True`
      ()
def score(self, X, y, scoring=None):
413    def score(self, X, y, scoring=None):
414        """Compute the score of the model.
415
416        Parameters:
417
418            X: {array-like}, shape = [n_samples, n_features]
419                Training vectors, where n_samples is the number
420                of samples and n_features is the number of features.
421
422            y: array-like, shape = [n_samples]
423                Target values.
424
425            scoring: str
426                scoring method
427
428        Returns:
429
430            score: float
431
432        """
433
434        if scoring is None:
435            return np.sqrt(np.mean((self.predict(X) - y) ** 2))
436
437        return skm2.get_scorer(scoring)(self, X, y)

Compute the score of the model.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

scoring: str
    scoring method

Returns:

score: float
class CustomBackPropRegressor(nnetsauce.custom.custom.Custom, sklearn.base.RegressorMixin):
 20class CustomBackPropRegressor(Custom, RegressorMixin):
 21    """
 22    Finite difference trainer for nnetsauce models.
 23
 24    Parameters
 25    ----------
 26
 27    base_model : str
 28        The name of the base model (e.g., 'RidgeCV').
 29
 30    type_grad : {'finitediff', 'autodiff'}, optional
 31        Type of gradient computation to use (default='finitediff').
 32
 33    lr : float, optional
 34        Learning rate for optimization (default=1e-4).
 35
 36    optimizer : {'gd', 'sgd', 'adam', 'cd'}, optional
 37        Optimization algorithm: gradient descent ('gd'), stochastic gradient descent ('sgd'),
 38        Adam ('adam'), or coordinate descent ('cd'). Default is 'gd'.
 39
 40    eps : float, optional
 41        Scaling factor for adaptive finite difference step size (default=1e-3).
 42
 43    batch_size : int, optional
 44        Batch size for 'sgd' optimizer (default=32).
 45
 46    alpha : float, optional
 47        Elastic net penalty strength (default=0.0).
 48
 49    l1_ratio : float, optional
 50        Elastic net mixing parameter (0 = Ridge, 1 = Lasso, default=0.0).
 51
 52    type_loss : {'mse', 'quantile'}, optional
 53        Type of loss function to use (default='mse').
 54
 55    q : float, optional
 56        Quantile for quantile loss (default=0.5).
 57
 58    **kwargs
 59        Additional parameters to pass to the scikit-learn model.
 60
 61    """
 62
 63    def __init__(
 64        self,
 65        base_model,
 66        type_grad="finitediff",
 67        lr=1e-4,
 68        optimizer="gd",
 69        eps=1e-3,
 70        batch_size=32,
 71        alpha=0.0,
 72        l1_ratio=0.0,
 73        type_loss="mse",
 74        q=0.5,
 75        backend="cpu",
 76        **kwargs,
 77    ):
 78        super().__init__(base_model, True, **kwargs)
 79        self.base_model = base_model
 80        self.custom_kwargs = kwargs
 81        self.backend = backend
 82        self.model = ns.CustomRegressor(
 83            self.base_model, backend=self.backend, **self.custom_kwargs
 84        )
 85        assert isinstance(
 86            self.model, ns.CustomRegressor
 87        ), "'model' must be of class ns.CustomRegressor"
 88        self.type_grad = type_grad
 89        self.lr = lr
 90        self.optimizer = optimizer
 91        self.eps = eps
 92        self.loss_history_ = []
 93        self.opt_state = None
 94        self.batch_size = batch_size  # for SGD
 95        self.loss_history_ = []
 96        self._cd_index = 0  # For coordinate descent
 97        self.alpha = alpha
 98        self.l1_ratio = l1_ratio
 99        self.type_loss = type_loss
100        self.q = q
101
102    def _loss(self, X, y, **kwargs):
103        """
104        Compute the loss (with elastic net penalty) for the current model.
105
106        Parameters
107        ----------
108
109        X : array-like of shape (n_samples, n_features)
110            Input data.
111
112        y : array-like of shape (n_samples,)
113            Target values.
114
115        **kwargs
116            Additional keyword arguments for loss calculation.
117
118        Returns
119        -------
120        float
121            The computed loss value.
122        """
123        y_pred = self.model.predict(X)
124        if self.type_loss == "mse":
125            loss = np.mean((y - y_pred) ** 2)
126        elif self.type_loss == "quantile":
127            loss = mean_pinball_loss(y, y_pred, alpha=self.q, **kwargs)
128        W = self.model.W_
129        l1 = np.sum(np.abs(W))
130        l2 = np.sum(W**2)
131        return loss + self.alpha * (
132            self.l1_ratio * l1 + 0.5 * (1 - self.l1_ratio) * l2
133        )
134
135    def _compute_grad(self, X, y):
136        """
137        Compute the gradient of the loss with respect to W_ using finite differences.
138
139        Parameters
140        ----------
141
142        X : array-like of shape (n_samples, n_features)
143            Input data.
144
145        y : array-like of shape (n_samples,)
146            Target values.
147
148        Returns
149        -------
150
151        ndarray
152            Gradient array with the same shape as W_.
153        """
154        if self.type_grad == "autodiff":
155            raise NotImplementedError(
156                "Automatic differentiation is not implemented yet."
157            )
158            # Use JAX for automatic differentiation
159            W = deepcopy(self.model.W_)
160            W_flat = W.flatten()
161            n_params = W_flat.size
162
163            def loss_fn(W_flat):
164                W_reshaped = W_flat.reshape(W.shape)
165                self.model.W_ = W_reshaped
166                return self._loss(X, y)
167
168            grad_fn = jax.grad(loss_fn)
169            grad_flat = grad_fn(W_flat)
170            grad = grad_flat.reshape(W.shape)
171
172            # Add elastic net gradient
173            l1_grad = self.alpha * self.l1_ratio * np.sign(W)
174            l2_grad = self.alpha * (1 - self.l1_ratio) * W
175            grad += l1_grad + l2_grad
176
177            self.model.W_ = W
178            return grad
179
180        # Finite difference gradient computation
181        W = deepcopy(self.model.W_)
182        shape = W.shape
183        W_flat = W.flatten()
184        n_params = W_flat.size
185
186        # Adaptive finite difference step
187        h_vec = self.eps * np.maximum(1.0, np.abs(W_flat))
188        eye = np.eye(n_params)
189
190        loss_plus = np.zeros(n_params)
191        loss_minus = np.zeros(n_params)
192
193        for i in range(n_params):
194            h_i = h_vec[i]
195            Wp = W_flat.copy()
196            Wp[i] += h_i
197            Wm = W_flat.copy()
198            Wm[i] -= h_i
199
200            self.model.W_ = Wp.reshape(shape)
201            loss_plus[i] = self._loss(X, y)
202
203            self.model.W_ = Wm.reshape(shape)
204            loss_minus[i] = self._loss(X, y)
205
206        grad = ((loss_plus - loss_minus) / (2 * h_vec)).reshape(shape)
207
208        # Add elastic net gradient
209        l1_grad = self.alpha * self.l1_ratio * np.sign(W)
210        l2_grad = self.alpha * (1 - self.l1_ratio) * W
211        grad += l1_grad + l2_grad
212
213        self.model.W_ = W  # restore original
214        return grad
215
216    def fit(
217        self,
218        X,
219        y,
220        epochs=10,
221        verbose=True,
222        show_progress=True,
223        sample_weight=None,
224        **kwargs,
225    ):
226        """
227        Fit the model using finite difference optimization.
228
229        Parameters
230        ----------
231
232        X : array-like of shape (n_samples, n_features)
233            Training data.
234
235        y : array-like of shape (n_samples,)
236            Target values.
237
238        epochs : int, optional
239            Number of optimization steps (default=10).
240
241        verbose : bool, optional
242            Whether to print progress messages (default=True).
243
244        show_progress : bool, optional
245            Whether to show tqdm progress bar (default=True).
246
247        sample_weight : array-like, optional
248            Sample weights.
249
250        **kwargs
251            Additional keyword arguments.
252
253        Returns
254        -------
255
256        self : object
257            Returns self.
258        """
259
260        self.model.fit(X, y)
261
262        iterator = tqdm(range(epochs)) if show_progress else range(epochs)
263
264        for epoch in iterator:
265            grad = self._compute_grad(X, y)
266
267            if self.optimizer == "gd":
268                self.model.W_ -= self.lr * grad
269                self.model.W_ = np.clip(self.model.W_, 0, 1)
270                # print("self.model.W_", self.model.W_)
271
272            elif self.optimizer == "sgd":
273                # Sample a mini-batch for stochastic gradient
274                n_samples = X.shape[0]
275                idxs = np.random.choice(
276                    n_samples, self.batch_size, replace=False
277                )
278                if isinstance(X, pd.DataFrame):
279                    X_batch = X.iloc[idxs, :]
280                else:
281                    X_batch = X[idxs, :]
282                y_batch = y[idxs]
283                grad = self._compute_grad(X_batch, y_batch)
284
285                self.model.W_ -= self.lr * grad
286                self.model.W_ = np.clip(self.model.W_, 0, 1)
287
288            elif self.optimizer == "adam":
289                if self.opt_state is None:
290                    self.opt_state = {
291                        "m": np.zeros_like(grad),
292                        "v": np.zeros_like(grad),
293                        "t": 0,
294                    }
295                beta1, beta2, eps = 0.9, 0.999, 1e-8
296                self.opt_state["t"] += 1
297                self.opt_state["m"] = (
298                    beta1 * self.opt_state["m"] + (1 - beta1) * grad
299                )
300                self.opt_state["v"] = beta2 * self.opt_state["v"] + (
301                    1 - beta2
302                ) * (grad**2)
303                m_hat = self.opt_state["m"] / (1 - beta1 ** self.opt_state["t"])
304                v_hat = self.opt_state["v"] / (1 - beta2 ** self.opt_state["t"])
305
306                self.model.W_ -= self.lr * m_hat / (np.sqrt(v_hat) + eps)
307                self.model.W_ = np.clip(self.model.W_, 0, 1)
308                # print("self.model.W_", self.model.W_)
309
310            elif self.optimizer == "cd":  # coordinate descent
311                W_shape = self.model.W_.shape
312                W_flat_size = self.model.W_.size
313                W_flat = self.model.W_.flatten()
314                grad_flat = grad.flatten()
315
316                # Update only one coordinate per epoch (cyclic)
317                idx = self._cd_index % W_flat_size
318                W_flat[idx] -= self.lr * grad_flat[idx]
319                # Clip the updated value
320                W_flat[idx] = np.clip(W_flat[idx], 0, 1)
321
322                # Restore W_
323                self.model.W_ = W_flat.reshape(W_shape)
324
325                self._cd_index += 1
326
327            else:
328                raise ValueError(f"Unsupported optimizer: {self.optimizer}")
329
330            loss = self._loss(X, y)
331            self.loss_history_.append(loss)
332
333            if verbose:
334                print(f"Epoch {epoch+1}: Loss = {loss:.6f}")
335
336        # if sample_weights, else: (must use self.row_index)
337        if sample_weight in kwargs:
338            self.model.fit(
339                X,
340                y,
341                sample_weight=sample_weight[self.index_row_].ravel(),
342                **kwargs,
343            )
344
345            return self
346
347        return self
348
349    def predict(self, X, level=95, method="splitconformal", **kwargs):
350        """
351        Predict using the trained model.
352
353        Parameters
354        ----------
355
356        X : array-like of shape (n_samples, n_features)
357            Input data.
358
359        level : int, optional
360            Level of confidence for prediction intervals (default=95).
361
362        method : {'splitconformal', 'localconformal'}, optional
363            Method for conformal prediction (default='splitconformal').
364
365        **kwargs
366            Additional keyword arguments. Use `return_pi=True` for prediction intervals,
367            or `return_std=True` for standard deviation estimates.
368
369        Returns
370        -------
371
372        array or tuple
373            Model predictions, or a tuple with prediction intervals or standard deviations if requested.
374        """
375        if "return_std" in kwargs:
376            alpha = 100 - level
377            pi_multiplier = norm.ppf(1 - alpha / 200)
378
379            if len(X.shape) == 1:
380                n_features = X.shape[0]
381                new_X = mo.rbind(
382                    X.reshape(1, n_features),
383                    np.ones(n_features).reshape(1, n_features),
384                )
385
386                mean_, std_ = self.model.predict(new_X, return_std=True)[0]
387
388                preds = mean_
389                lower = mean_ - pi_multiplier * std_
390                upper = mean_ + pi_multiplier * std_
391
392                DescribeResults = namedtuple(
393                    "DescribeResults", ["mean", "std", "lower", "upper"]
394                )
395
396                return DescribeResults(preds, std_, lower, upper)
397
398            # len(X.shape) > 1
399            mean_, std_ = self.model.predict(X, return_std=True)
400
401            preds = mean_
402            lower = mean_ - pi_multiplier * std_
403            upper = mean_ + pi_multiplier * std_
404
405            DescribeResults = namedtuple(
406                "DescribeResults", ["mean", "std", "lower", "upper"]
407            )
408
409            return DescribeResults(preds, std_, lower, upper)
410
411        if "return_pi" in kwargs:
412            assert method in (
413                "splitconformal",
414                "localconformal",
415            ), "method must be in ('splitconformal', 'localconformal')"
416            self.pi = ns.PredictionInterval(
417                obj=self,
418                method=method,
419                level=level,
420                type_pi=self.type_pi,
421                replications=self.replications,
422                kernel=self.kernel,
423            )
424
425            if len(self.X_.shape) == 1:
426                if isinstance(X, pd.DataFrame):
427                    self.X_ = pd.DataFrame(
428                        self.X_.values.reshape(1, -1), columns=self.X_.columns
429                    )
430                else:
431                    self.X_ = self.X_.reshape(1, -1)
432                self.y_ = np.array([self.y_])
433
434            self.pi.fit(self.X_, self.y_)
435            # self.X_ = None # consumes memory to keep, dangerous to delete (side effect)
436            # self.y_ = None # consumes memory to keep, dangerous to delete (side effect)
437            preds = self.pi.predict(X, return_pi=True)
438            return preds
439
440        # "return_std" not in kwargs
441        if len(X.shape) == 1:
442            n_features = X.shape[0]
443            new_X = mo.rbind(
444                X.reshape(1, n_features),
445                np.ones(n_features).reshape(1, n_features),
446            )
447
448            return (0 + self.model.predict(new_X, **kwargs))[0]
449
450        # len(X.shape) > 1
451        return self.model.predict(X, **kwargs)

Finite difference trainer for nnetsauce models.

Parameters

base_model : str The name of the base model (e.g., 'RidgeCV').

type_grad : {'finitediff', 'autodiff'}, optional Type of gradient computation to use (default='finitediff').

lr : float, optional Learning rate for optimization (default=1e-4).

optimizer : {'gd', 'sgd', 'adam', 'cd'}, optional Optimization algorithm: gradient descent ('gd'), stochastic gradient descent ('sgd'), Adam ('adam'), or coordinate descent ('cd'). Default is 'gd'.

eps : float, optional Scaling factor for adaptive finite difference step size (default=1e-3).

batch_size : int, optional Batch size for 'sgd' optimizer (default=32).

alpha : float, optional Elastic net penalty strength (default=0.0).

l1_ratio : float, optional Elastic net mixing parameter (0 = Ridge, 1 = Lasso, default=0.0).

type_loss : {'mse', 'quantile'}, optional Type of loss function to use (default='mse').

q : float, optional Quantile for quantile loss (default=0.5).

**kwargs Additional parameters to pass to the scikit-learn model.

def fit( self, X, y, epochs=10, verbose=True, show_progress=True, sample_weight=None, **kwargs):
216    def fit(
217        self,
218        X,
219        y,
220        epochs=10,
221        verbose=True,
222        show_progress=True,
223        sample_weight=None,
224        **kwargs,
225    ):
226        """
227        Fit the model using finite difference optimization.
228
229        Parameters
230        ----------
231
232        X : array-like of shape (n_samples, n_features)
233            Training data.
234
235        y : array-like of shape (n_samples,)
236            Target values.
237
238        epochs : int, optional
239            Number of optimization steps (default=10).
240
241        verbose : bool, optional
242            Whether to print progress messages (default=True).
243
244        show_progress : bool, optional
245            Whether to show tqdm progress bar (default=True).
246
247        sample_weight : array-like, optional
248            Sample weights.
249
250        **kwargs
251            Additional keyword arguments.
252
253        Returns
254        -------
255
256        self : object
257            Returns self.
258        """
259
260        self.model.fit(X, y)
261
262        iterator = tqdm(range(epochs)) if show_progress else range(epochs)
263
264        for epoch in iterator:
265            grad = self._compute_grad(X, y)
266
267            if self.optimizer == "gd":
268                self.model.W_ -= self.lr * grad
269                self.model.W_ = np.clip(self.model.W_, 0, 1)
270                # print("self.model.W_", self.model.W_)
271
272            elif self.optimizer == "sgd":
273                # Sample a mini-batch for stochastic gradient
274                n_samples = X.shape[0]
275                idxs = np.random.choice(
276                    n_samples, self.batch_size, replace=False
277                )
278                if isinstance(X, pd.DataFrame):
279                    X_batch = X.iloc[idxs, :]
280                else:
281                    X_batch = X[idxs, :]
282                y_batch = y[idxs]
283                grad = self._compute_grad(X_batch, y_batch)
284
285                self.model.W_ -= self.lr * grad
286                self.model.W_ = np.clip(self.model.W_, 0, 1)
287
288            elif self.optimizer == "adam":
289                if self.opt_state is None:
290                    self.opt_state = {
291                        "m": np.zeros_like(grad),
292                        "v": np.zeros_like(grad),
293                        "t": 0,
294                    }
295                beta1, beta2, eps = 0.9, 0.999, 1e-8
296                self.opt_state["t"] += 1
297                self.opt_state["m"] = (
298                    beta1 * self.opt_state["m"] + (1 - beta1) * grad
299                )
300                self.opt_state["v"] = beta2 * self.opt_state["v"] + (
301                    1 - beta2
302                ) * (grad**2)
303                m_hat = self.opt_state["m"] / (1 - beta1 ** self.opt_state["t"])
304                v_hat = self.opt_state["v"] / (1 - beta2 ** self.opt_state["t"])
305
306                self.model.W_ -= self.lr * m_hat / (np.sqrt(v_hat) + eps)
307                self.model.W_ = np.clip(self.model.W_, 0, 1)
308                # print("self.model.W_", self.model.W_)
309
310            elif self.optimizer == "cd":  # coordinate descent
311                W_shape = self.model.W_.shape
312                W_flat_size = self.model.W_.size
313                W_flat = self.model.W_.flatten()
314                grad_flat = grad.flatten()
315
316                # Update only one coordinate per epoch (cyclic)
317                idx = self._cd_index % W_flat_size
318                W_flat[idx] -= self.lr * grad_flat[idx]
319                # Clip the updated value
320                W_flat[idx] = np.clip(W_flat[idx], 0, 1)
321
322                # Restore W_
323                self.model.W_ = W_flat.reshape(W_shape)
324
325                self._cd_index += 1
326
327            else:
328                raise ValueError(f"Unsupported optimizer: {self.optimizer}")
329
330            loss = self._loss(X, y)
331            self.loss_history_.append(loss)
332
333            if verbose:
334                print(f"Epoch {epoch+1}: Loss = {loss:.6f}")
335
336        # if sample_weights, else: (must use self.row_index)
337        if sample_weight in kwargs:
338            self.model.fit(
339                X,
340                y,
341                sample_weight=sample_weight[self.index_row_].ravel(),
342                **kwargs,
343            )
344
345            return self
346
347        return self

Fit the model using finite difference optimization.

Parameters

X : array-like of shape (n_samples, n_features) Training data.

y : array-like of shape (n_samples,) Target values.

epochs : int, optional Number of optimization steps (default=10).

verbose : bool, optional Whether to print progress messages (default=True).

show_progress : bool, optional Whether to show tqdm progress bar (default=True).

sample_weight : array-like, optional Sample weights.

**kwargs Additional keyword arguments.

Returns

self : object Returns self.

def predict(self, X, level=95, method='splitconformal', **kwargs):
349    def predict(self, X, level=95, method="splitconformal", **kwargs):
350        """
351        Predict using the trained model.
352
353        Parameters
354        ----------
355
356        X : array-like of shape (n_samples, n_features)
357            Input data.
358
359        level : int, optional
360            Level of confidence for prediction intervals (default=95).
361
362        method : {'splitconformal', 'localconformal'}, optional
363            Method for conformal prediction (default='splitconformal').
364
365        **kwargs
366            Additional keyword arguments. Use `return_pi=True` for prediction intervals,
367            or `return_std=True` for standard deviation estimates.
368
369        Returns
370        -------
371
372        array or tuple
373            Model predictions, or a tuple with prediction intervals or standard deviations if requested.
374        """
375        if "return_std" in kwargs:
376            alpha = 100 - level
377            pi_multiplier = norm.ppf(1 - alpha / 200)
378
379            if len(X.shape) == 1:
380                n_features = X.shape[0]
381                new_X = mo.rbind(
382                    X.reshape(1, n_features),
383                    np.ones(n_features).reshape(1, n_features),
384                )
385
386                mean_, std_ = self.model.predict(new_X, return_std=True)[0]
387
388                preds = mean_
389                lower = mean_ - pi_multiplier * std_
390                upper = mean_ + pi_multiplier * std_
391
392                DescribeResults = namedtuple(
393                    "DescribeResults", ["mean", "std", "lower", "upper"]
394                )
395
396                return DescribeResults(preds, std_, lower, upper)
397
398            # len(X.shape) > 1
399            mean_, std_ = self.model.predict(X, return_std=True)
400
401            preds = mean_
402            lower = mean_ - pi_multiplier * std_
403            upper = mean_ + pi_multiplier * std_
404
405            DescribeResults = namedtuple(
406                "DescribeResults", ["mean", "std", "lower", "upper"]
407            )
408
409            return DescribeResults(preds, std_, lower, upper)
410
411        if "return_pi" in kwargs:
412            assert method in (
413                "splitconformal",
414                "localconformal",
415            ), "method must be in ('splitconformal', 'localconformal')"
416            self.pi = ns.PredictionInterval(
417                obj=self,
418                method=method,
419                level=level,
420                type_pi=self.type_pi,
421                replications=self.replications,
422                kernel=self.kernel,
423            )
424
425            if len(self.X_.shape) == 1:
426                if isinstance(X, pd.DataFrame):
427                    self.X_ = pd.DataFrame(
428                        self.X_.values.reshape(1, -1), columns=self.X_.columns
429                    )
430                else:
431                    self.X_ = self.X_.reshape(1, -1)
432                self.y_ = np.array([self.y_])
433
434            self.pi.fit(self.X_, self.y_)
435            # self.X_ = None # consumes memory to keep, dangerous to delete (side effect)
436            # self.y_ = None # consumes memory to keep, dangerous to delete (side effect)
437            preds = self.pi.predict(X, return_pi=True)
438            return preds
439
440        # "return_std" not in kwargs
441        if len(X.shape) == 1:
442            n_features = X.shape[0]
443            new_X = mo.rbind(
444                X.reshape(1, n_features),
445                np.ones(n_features).reshape(1, n_features),
446            )
447
448            return (0 + self.model.predict(new_X, **kwargs))[0]
449
450        # len(X.shape) > 1
451        return self.model.predict(X, **kwargs)

Predict using the trained model.

Parameters

X : array-like of shape (n_samples, n_features) Input data.

level : int, optional Level of confidence for prediction intervals (default=95).

method : {'splitconformal', 'localconformal'}, optional Method for conformal prediction (default='splitconformal').

**kwargs Additional keyword arguments. Use return_pi=True for prediction intervals, or return_std=True for standard deviation estimates.

Returns

array or tuple Model predictions, or a tuple with prediction intervals or standard deviations if requested.

class DeepClassifier(nnetsauce.CustomClassifier, sklearn.base.ClassifierMixin):
 36class DeepClassifier(CustomClassifier, ClassifierMixin):
 37    """
 38    Deep Classifier
 39
 40    Parameters:
 41
 42        obj: an object
 43            A base learner, see also https://www.researchgate.net/publication/380701207_Deep_Quasi-Randomized_neural_Networks_for_classification
 44
 45        n_layers: int (default=3)
 46            Number of layers. `n_layers = 1` is a simple `CustomClassifier`
 47
 48        verbose : int, optional (default=0)
 49            Monitor progress when fitting.
 50
 51        All the other parameters are nnetsauce `CustomClassifier`'s
 52
 53    Examples:
 54
 55        ```python
 56        import nnetsauce as ns
 57        from sklearn.datasets import load_breast_cancer
 58        from sklearn.model_selection import train_test_split
 59        from sklearn.linear_model import LogisticRegressionCV
 60        data = load_breast_cancer()
 61        X = data.data
 62        y= data.target
 63        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=123)
 64        obj = LogisticRegressionCV()
 65        clf = ns.DeepClassifier(obj)
 66        clf.fit(X_train, y_train)
 67        print(clf.score(clf.predict(X_test), y_test))
 68        ```
 69    """
 70
 71    _estimator_type = "classifier"
 72
 73    def __init__(
 74        self,
 75        obj,
 76        # Defining depth
 77        n_layers=3,
 78        verbose=0,
 79        # CustomClassifier attributes
 80        n_hidden_features=5,
 81        activation_name="relu",
 82        a=0.01,
 83        nodes_sim="sobol",
 84        bias=True,
 85        dropout=0,
 86        direct_link=True,
 87        n_clusters=2,
 88        cluster_encode=True,
 89        type_clust="kmeans",
 90        type_scaling=("std", "std", "std"),
 91        col_sample=1,
 92        row_sample=1,
 93        cv_calibration=2,
 94        calibration_method="sigmoid",
 95        seed=123,
 96        backend="cpu",
 97    ):
 98        super().__init__(
 99            obj=obj,
100            n_hidden_features=n_hidden_features,
101            activation_name=activation_name,
102            a=a,
103            nodes_sim=nodes_sim,
104            bias=bias,
105            dropout=dropout,
106            direct_link=direct_link,
107            n_clusters=n_clusters,
108            cluster_encode=cluster_encode,
109            type_clust=type_clust,
110            type_scaling=type_scaling,
111            col_sample=col_sample,
112            row_sample=row_sample,
113            seed=seed,
114            backend=backend,
115        )
116        self.coef_ = None
117        self.intercept_ = None
118        self.type_fit = "classification"
119        self.cv_calibration = cv_calibration
120        self.calibration_method = calibration_method
121
122        # Only wrap in CalibratedClassifierCV if not already wrapped
123        # if not isinstance(obj, CalibratedClassifierCV):
124        #     self.obj = CalibratedClassifierCV(
125        #         self.obj,
126        #         cv=self.cv_calibration,
127        #         method=self.calibration_method
128        #     )
129        # else:
130        self.coef_ = None
131        self.intercept_ = None
132        self.type_fit = "classification"
133        self.cv_calibration = cv_calibration
134        self.calibration_method = calibration_method
135        self.obj = obj
136
137        assert n_layers >= 1, "must have n_layers >= 1"
138        self.stacked_obj = obj
139        self.verbose = verbose
140        self.n_layers = n_layers
141        self.classes_ = None
142        self.n_classes_ = None
143
144    def fit(self, X, y, **kwargs):
145        """Fit Classification algorithms to X and y.
146        Parameters
147        ----------
148        X : array-like,
149            Training vectors, where rows is the number of samples
150            and columns is the number of features.
151        y : array-like,
152            Training vectors, where rows is the number of samples
153            and columns is the number of features.
154        **kwargs: dict
155            Additional parameters to be passed to the fit method
156            of the base learner. For example, `sample_weight`.
157
158        Returns
159        -------
160        A fitted object
161        """
162
163        self.classes_ = np.unique(y)
164        self.n_classes_ = len(
165            self.classes_
166        )  # for compatibility with         scikit-learn
167
168        if isinstance(X, np.ndarray):
169            X = pd.DataFrame(X)
170
171        # init layer
172        self.stacked_obj = CustomClassifier(
173            obj=self.stacked_obj,
174            n_hidden_features=self.n_hidden_features,
175            activation_name=self.activation_name,
176            a=self.a,
177            nodes_sim=self.nodes_sim,
178            bias=self.bias,
179            dropout=self.dropout,
180            direct_link=self.direct_link,
181            n_clusters=self.n_clusters,
182            cluster_encode=self.cluster_encode,
183            type_clust=self.type_clust,
184            type_scaling=self.type_scaling,
185            col_sample=self.col_sample,
186            row_sample=self.row_sample,
187            cv_calibration=None,
188            calibration_method=None,
189            seed=self.seed,
190            backend=self.backend,
191        )
192
193        if self.verbose > 0:
194            iterator = tqdm(range(self.n_layers - 1))
195        else:
196            iterator = range(self.n_layers - 1)
197
198        for _ in iterator:
199            self.stacked_obj = deepcopy(
200                CustomClassifier(
201                    obj=self.stacked_obj,
202                    n_hidden_features=self.n_hidden_features,
203                    activation_name=self.activation_name,
204                    a=self.a,
205                    nodes_sim=self.nodes_sim,
206                    bias=self.bias,
207                    dropout=self.dropout,
208                    direct_link=self.direct_link,
209                    n_clusters=self.n_clusters,
210                    cluster_encode=self.cluster_encode,
211                    type_clust=self.type_clust,
212                    type_scaling=self.type_scaling,
213                    col_sample=self.col_sample,
214                    row_sample=self.row_sample,
215                    cv_calibration=None,
216                    calibration_method=None,
217                    seed=self.seed,
218                    backend=self.backend,
219                )
220            )
221            self.stacked_obj.fit(X, y, **kwargs)
222
223        return self
224
225    def partial_fit(self, X, y, **kwargs):
226        """Fit Regression algorithms to X and y.
227        Parameters
228        ----------
229        X : array-like,
230            Training vectors, where rows is the number of samples
231            and columns is the number of features.
232        y : array-like,
233            Training vectors, where rows is the number of samples
234            and columns is the number of features.
235        **kwargs: dict
236            Additional parameters to be passed to the fit method
237            of the base learner. For example, `sample_weight`.
238        Returns
239        -------
240        A fitted object
241        """
242        assert hasattr(self, "stacked_obj"), "model must be fitted first"
243        current_obj = self.stacked_obj
244        for _ in range(self.n_layers):
245            try:
246                input_X = current_obj.obj.cook_test_set(X)
247                current_obj.obj.partial_fit(input_X, y, **kwargs)
248                try:
249                    current_obj = current_obj.obj
250                except AttributeError:
251                    pass
252            except ValueError:
253                pass
254        return self
255
256    def predict(self, X):
257        return self.stacked_obj.predict(X)
258
259    def predict_proba(self, X):
260        return self.stacked_obj.predict_proba(X)
261
262    def score(self, X, y, scoring=None):
263        return self.stacked_obj.score(X, y, scoring)
264
265    def cross_val_optim(
266        self,
267        X_train,
268        y_train,
269        X_test=None,
270        y_test=None,
271        scoring="accuracy",
272        surrogate_obj=None,
273        cv=5,
274        n_jobs=None,
275        n_init=10,
276        n_iter=190,
277        abs_tol=1e-3,
278        verbose=2,
279        seed=123,
280        **kwargs,
281    ):
282        """Cross-validation function and hyperparameters' search
283
284        Parameters:
285
286            X_train: array-like,
287                Training vectors, where rows is the number of samples
288                and columns is the number of features.
289
290            y_train: array-like,
291                Training vectors, where rows is the number of samples
292                and columns is the number of features.
293
294            X_test: array-like,
295                Testing vectors, where rows is the number of samples
296                and columns is the number of features.
297
298            y_test: array-like,
299                Testing vectors, where rows is the number of samples
300                and columns is the number of features.
301
302            scoring: str
303                scoring metric; see https://scikit-learn.org/stable/modules/model_evaluation.html#the-scoring-parameter-defining-model-evaluation-rules
304
305            surrogate_obj: an object;
306                An ML model for estimating the uncertainty around the objective function
307
308            cv: int;
309                number of cross-validation folds
310
311            n_jobs: int;
312                number of jobs for parallel execution
313
314            n_init: an integer;
315                number of points in the initial setting, when `x_init` and `y_init` are not provided
316
317            n_iter: an integer;
318                number of iterations of the minimization algorithm
319
320            abs_tol: a float;
321                tolerance for convergence of the optimizer (early stopping based on acquisition function)
322
323            verbose: int
324                controls verbosity
325
326            seed: int
327                reproducibility seed
328
329            **kwargs: dict
330                additional parameters to be passed to the estimator
331
332        Examples:
333
334            ```python
335            ```
336        """
337
338        num_to_activation_name = {1: "relu", 2: "sigmoid", 3: "tanh"}
339        num_to_nodes_sim = {1: "sobol", 2: "uniform", 3: "hammersley"}
340        num_to_type_clust = {1: "kmeans", 2: "gmm"}
341
342        def deepclassifier_cv(
343            X_train,
344            y_train,
345            # Defining depth
346            n_layers=3,
347            # CustomClassifier attributes
348            n_hidden_features=5,
349            activation_name="relu",
350            nodes_sim="sobol",
351            dropout=0,
352            n_clusters=2,
353            type_clust="kmeans",
354            cv=5,
355            n_jobs=None,
356            scoring="accuracy",
357            seed=123,
358        ):
359            self.set_params(
360                **{
361                    "n_layers": n_layers,
362                    # CustomClassifier attributes
363                    "n_hidden_features": n_hidden_features,
364                    "activation_name": activation_name,
365                    "nodes_sim": nodes_sim,
366                    "dropout": dropout,
367                    "n_clusters": n_clusters,
368                    "type_clust": type_clust,
369                    **kwargs,
370                }
371            )
372            return -cross_val_score(
373                estimator=self,
374                X=X_train,
375                y=y_train,
376                scoring=scoring,
377                cv=cv,
378                n_jobs=n_jobs,
379                verbose=0,
380            ).mean()
381
382        # objective function for hyperparams tuning
383        def crossval_objective(xx):
384            return deepclassifier_cv(
385                X_train=X_train,
386                y_train=y_train,
387                # Defining depth
388                n_layers=int(np.ceil(xx[0])),
389                # CustomClassifier attributes
390                n_hidden_features=int(np.ceil(xx[1])),
391                activation_name=num_to_activation_name[np.ceil(xx[2])],
392                nodes_sim=num_to_nodes_sim[int(np.ceil(xx[3]))],
393                dropout=xx[4],
394                n_clusters=int(np.ceil(xx[5])),
395                type_clust=num_to_type_clust[int(np.ceil(xx[6]))],
396                cv=cv,
397                n_jobs=n_jobs,
398                scoring=scoring,
399                seed=seed,
400            )
401
402        if surrogate_obj is None:
403            gp_opt = gp.GPOpt(
404                objective_func=crossval_objective,
405                lower_bound=np.array([0, 3, 0, 0, 0.0, 0, 0]),
406                upper_bound=np.array([5, 100, 3, 3, 0.4, 5, 2]),
407                params_names=[
408                    "n_layers",
409                    # CustomClassifier attributes
410                    "n_hidden_features",
411                    "activation_name",
412                    "nodes_sim",
413                    "dropout",
414                    "n_clusters",
415                    "type_clust",
416                ],
417                method="bayesian",
418                n_init=n_init,
419                n_iter=n_iter,
420                seed=seed,
421            )
422        else:
423            gp_opt = gp.GPOpt(
424                objective_func=crossval_objective,
425                lower_bound=np.array([0, 3, 0, 0, 0.0, 0, 0]),
426                upper_bound=np.array([5, 100, 3, 3, 0.4, 5, 2]),
427                params_names=[
428                    "n_layers",
429                    # CustomClassifier attributes
430                    "n_hidden_features",
431                    "activation_name",
432                    "nodes_sim",
433                    "dropout",
434                    "n_clusters",
435                    "type_clust",
436                ],
437                acquisition="ucb",
438                method="splitconformal",
439                surrogate_obj=ns.PredictionInterval(
440                    obj=surrogate_obj, method="splitconformal"
441                ),
442                n_init=n_init,
443                n_iter=n_iter,
444                seed=seed,
445            )
446
447        res = gp_opt.optimize(verbose=verbose, abs_tol=abs_tol)
448        res.best_params["n_layers"] = int(np.ceil(res.best_params["n_layers"]))
449        res.best_params["n_hidden_features"] = int(
450            np.ceil(res.best_params["n_hidden_features"])
451        )
452        res.best_params["activation_name"] = num_to_activation_name[
453            np.ceil(res.best_params["activation_name"])
454        ]
455        res.best_params["nodes_sim"] = num_to_nodes_sim[
456            int(np.ceil(res.best_params["nodes_sim"]))
457        ]
458        res.best_params["dropout"] = res.best_params["dropout"]
459        res.best_params["n_clusters"] = int(
460            np.ceil(res.best_params["n_clusters"])
461        )
462        res.best_params["type_clust"] = num_to_type_clust[
463            int(np.ceil(res.best_params["type_clust"]))
464        ]
465
466        # out-of-sample error
467        if X_test is not None and y_test is not None:
468            self.set_params(**res.best_params, verbose=0, seed=seed)
469            preds = self.fit(X_train, y_train).predict(X_test)
470            # check error on y_test
471            oos_err = getattr(metrics, scoring + "_score")(
472                y_true=y_test, y_pred=preds
473            )
474            result = namedtuple("result", res._fields + ("test_" + scoring,))
475            return result(*res, oos_err)
476        else:
477            return res
478
479    def lazy_cross_val_optim(
480        self,
481        X_train,
482        y_train,
483        X_test=None,
484        y_test=None,
485        scoring="accuracy",
486        surrogate_objs=None,
487        customize=False,
488        cv=5,
489        n_jobs=None,
490        n_init=10,
491        n_iter=190,
492        abs_tol=1e-3,
493        verbose=1,
494        seed=123,
495    ):
496        """Automated Cross-validation function and hyperparameters' search using multiple surrogates
497
498        Parameters:
499
500            X_train: array-like,
501                Training vectors, where rows is the number of samples
502                and columns is the number of features.
503
504            y_train: array-like,
505                Training vectors, where rows is the number of samples
506                and columns is the number of features.
507
508            X_test: array-like,
509                Testing vectors, where rows is the number of samples
510                and columns is the number of features.
511
512            y_test: array-like,
513                Testing vectors, where rows is the number of samples
514                and columns is the number of features.
515
516            scoring: str
517                scoring metric; see https://scikit-learn.org/stable/modules/model_evaluation.html#the-scoring-parameter-defining-model-evaluation-rules
518
519            surrogate_objs: object names as a list of strings;
520                ML models for estimating the uncertainty around the objective function
521
522            customize: boolean
523                if True, the surrogate is transformed into a quasi-randomized network (default is False)
524
525            cv: int;
526                number of cross-validation folds
527
528            n_jobs: int;
529                number of jobs for parallel execution
530
531            n_init: an integer;
532                number of points in the initial setting, when `x_init` and `y_init` are not provided
533
534            n_iter: an integer;
535                number of iterations of the minimization algorithm
536
537            abs_tol: a float;
538                tolerance for convergence of the optimizer (early stopping based on acquisition function)
539
540            verbose: int
541                controls verbosity
542
543            seed: int
544                reproducibility seed
545
546        Examples:
547
548            ```python
549            ```
550        """
551
552        removed_regressors = [
553            "TheilSenRegressor",
554            "ARDRegression",
555            "CCA",
556            "GaussianProcessRegressor",
557            "GradientBoostingRegressor",
558            "HistGradientBoostingRegressor",
559            "IsotonicRegression",
560            "MultiOutputRegressor",
561            "MultiTaskElasticNet",
562            "MultiTaskElasticNetCV",
563            "MultiTaskLasso",
564            "MultiTaskLassoCV",
565            "OrthogonalMatchingPursuit",
566            "OrthogonalMatchingPursuitCV",
567            "PLSCanonical",
568            "PLSRegression",
569            "RadiusNeighborsRegressor",
570            "RegressorChain",
571            "StackingRegressor",
572            "VotingRegressor",
573        ]
574
575        results = []
576
577        for est in all_estimators():
578            if surrogate_objs is None:
579                if issubclass(est[1], RegressorMixin) and (
580                    est[0] not in removed_regressors
581                ):
582                    try:
583                        if customize == True:
584                            surr_obj = ns.CustomClassifier(obj=est[1]())
585                        else:
586                            surr_obj = est[1]()
587                        res = self.cross_val_optim(
588                            X_train=X_train,
589                            y_train=y_train,
590                            X_test=X_test,
591                            y_test=y_test,
592                            surrogate_obj=surr_obj,
593                            cv=cv,
594                            n_jobs=n_jobs,
595                            scoring=scoring,
596                            n_init=n_init,
597                            n_iter=n_iter,
598                            abs_tol=abs_tol,
599                            verbose=verbose,
600                            seed=seed,
601                        )
602                        if customize == True:
603                            results.append((f"CustomClassifier({est[0]})", res))
604                        else:
605                            results.append((est[0], res))
606                    except:
607                        pass
608
609            else:
610                if (
611                    issubclass(est[1], RegressorMixin)
612                    and (est[0] not in removed_regressors)
613                    and est[0] in surrogate_objs
614                ):
615                    try:
616                        if customize == True:
617                            surr_obj = ns.CustomClassifier(obj=est[1]())
618                        else:
619                            surr_obj = est[1]()
620                        res = self.cross_val_optim(
621                            X_train=X_train,
622                            y_train=y_train,
623                            X_test=X_test,
624                            y_test=y_test,
625                            surrogate_obj=surr_obj,
626                            cv=cv,
627                            n_jobs=n_jobs,
628                            scoring=scoring,
629                            n_init=n_init,
630                            n_iter=n_iter,
631                            abs_tol=abs_tol,
632                            verbose=verbose,
633                            seed=seed,
634                        )
635                        if customize == True:
636                            results.append((f"CustomClassifier({est[0]})", res))
637                        else:
638                            results.append((est[0], res))
639                    except:
640                        pass
641
642        return results
643
644    @property
645    def _estimator_type(self):
646        return "classifier"

Deep Classifier

Parameters:

obj: an object
    A base learner, see also https://www.researchgate.net/publication/380701207_Deep_Quasi-Randomized_neural_Networks_for_classification

n_layers: int (default=3)
    Number of layers. `n_layers = 1` is a simple `CustomClassifier`

verbose : int, optional (default=0)
    Monitor progress when fitting.

All the other parameters are nnetsauce `CustomClassifier`'s

Examples:

import nnetsauce as ns
    from sklearn.datasets import load_breast_cancer
    from sklearn.model_selection import train_test_split
    from sklearn.linear_model import LogisticRegressionCV
    data = load_breast_cancer()
    X = data.data
    y= data.target
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=123)
    obj = LogisticRegressionCV()
    clf = ns.DeepClassifier(obj)
    clf.fit(X_train, y_train)
    print(clf.score(clf.predict(X_test), y_test))
    

def fit(self, X, y, **kwargs):
144    def fit(self, X, y, **kwargs):
145        """Fit Classification algorithms to X and y.
146        Parameters
147        ----------
148        X : array-like,
149            Training vectors, where rows is the number of samples
150            and columns is the number of features.
151        y : array-like,
152            Training vectors, where rows is the number of samples
153            and columns is the number of features.
154        **kwargs: dict
155            Additional parameters to be passed to the fit method
156            of the base learner. For example, `sample_weight`.
157
158        Returns
159        -------
160        A fitted object
161        """
162
163        self.classes_ = np.unique(y)
164        self.n_classes_ = len(
165            self.classes_
166        )  # for compatibility with         scikit-learn
167
168        if isinstance(X, np.ndarray):
169            X = pd.DataFrame(X)
170
171        # init layer
172        self.stacked_obj = CustomClassifier(
173            obj=self.stacked_obj,
174            n_hidden_features=self.n_hidden_features,
175            activation_name=self.activation_name,
176            a=self.a,
177            nodes_sim=self.nodes_sim,
178            bias=self.bias,
179            dropout=self.dropout,
180            direct_link=self.direct_link,
181            n_clusters=self.n_clusters,
182            cluster_encode=self.cluster_encode,
183            type_clust=self.type_clust,
184            type_scaling=self.type_scaling,
185            col_sample=self.col_sample,
186            row_sample=self.row_sample,
187            cv_calibration=None,
188            calibration_method=None,
189            seed=self.seed,
190            backend=self.backend,
191        )
192
193        if self.verbose > 0:
194            iterator = tqdm(range(self.n_layers - 1))
195        else:
196            iterator = range(self.n_layers - 1)
197
198        for _ in iterator:
199            self.stacked_obj = deepcopy(
200                CustomClassifier(
201                    obj=self.stacked_obj,
202                    n_hidden_features=self.n_hidden_features,
203                    activation_name=self.activation_name,
204                    a=self.a,
205                    nodes_sim=self.nodes_sim,
206                    bias=self.bias,
207                    dropout=self.dropout,
208                    direct_link=self.direct_link,
209                    n_clusters=self.n_clusters,
210                    cluster_encode=self.cluster_encode,
211                    type_clust=self.type_clust,
212                    type_scaling=self.type_scaling,
213                    col_sample=self.col_sample,
214                    row_sample=self.row_sample,
215                    cv_calibration=None,
216                    calibration_method=None,
217                    seed=self.seed,
218                    backend=self.backend,
219                )
220            )
221            self.stacked_obj.fit(X, y, **kwargs)
222
223        return self

Fit Classification algorithms to X and y.

Parameters

X : array-like, Training vectors, where rows is the number of samples and columns is the number of features. y : array-like, Training vectors, where rows is the number of samples and columns is the number of features. **kwargs: dict Additional parameters to be passed to the fit method of the base learner. For example, sample_weight.

Returns

A fitted object

def predict(self, X):
256    def predict(self, X):
257        return self.stacked_obj.predict(X)

Predict test data X.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

model predictions: {array-like}
def predict_proba(self, X):
259    def predict_proba(self, X):
260        return self.stacked_obj.predict_proba(X)

Predict probabilities for test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

probability estimates for test data: {array-like}
def score(self, X, y, scoring=None):
262    def score(self, X, y, scoring=None):
263        return self.stacked_obj.score(X, y, scoring)

Scoring function for classification.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

scoring: str
    scoring method (default is accuracy)

Returns:

score: float
class DeepRegressor(nnetsauce.CustomRegressor, sklearn.base.RegressorMixin):
 13class DeepRegressor(CustomRegressor, RegressorMixin):
 14    """
 15    Deep Regressor
 16
 17    Parameters:
 18
 19        obj: an object
 20            A base learner, see also https://www.researchgate.net/publication/380701207_Deep_Quasi-Randomized_neural_Networks_for_classification
 21
 22        verbose : int, optional (default=0)
 23            Monitor progress when fitting.
 24
 25        n_layers: int (default=2)
 26            Number of layers. `n_layers = 1` is a simple `CustomRegressor`
 27
 28        All the other parameters are nnetsauce `CustomRegressor`'s
 29
 30    Examples:
 31
 32        ```python
 33        import nnetsauce as ns
 34        from sklearn.datasets import load_diabetes
 35        from sklearn.model_selection import train_test_split
 36        from sklearn.linear_model import RidgeCV
 37        data = load_diabetes()
 38        X = data.data
 39        y= data.target
 40        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=123)
 41        obj = RidgeCV()
 42        clf = ns.DeepRegressor(obj)
 43        clf.fit(X_train, y_train)
 44        print(clf.score(clf.predict(X_test), y_test))
 45        ```
 46
 47    """
 48
 49    def __init__(
 50        self,
 51        obj,
 52        # Defining depth
 53        n_layers=2,
 54        verbose=0,
 55        # CustomRegressor attributes
 56        n_hidden_features=5,
 57        activation_name="relu",
 58        a=0.01,
 59        nodes_sim="sobol",
 60        bias=True,
 61        dropout=0,
 62        direct_link=True,
 63        n_clusters=2,
 64        cluster_encode=True,
 65        type_clust="kmeans",
 66        type_scaling=("std", "std", "std"),
 67        col_sample=1,
 68        row_sample=1,
 69        level=None,
 70        pi_method="splitconformal",
 71        seed=123,
 72        backend="cpu",
 73    ):
 74        super().__init__(
 75            obj=obj,
 76            n_hidden_features=n_hidden_features,
 77            activation_name=activation_name,
 78            a=a,
 79            nodes_sim=nodes_sim,
 80            bias=bias,
 81            dropout=dropout,
 82            direct_link=direct_link,
 83            n_clusters=n_clusters,
 84            cluster_encode=cluster_encode,
 85            type_clust=type_clust,
 86            type_scaling=type_scaling,
 87            col_sample=col_sample,
 88            row_sample=row_sample,
 89            level=level,
 90            pi_method=pi_method,
 91            seed=seed,
 92            backend=backend,
 93        )
 94
 95        assert n_layers >= 1, "must have n_layers >= 1"
 96
 97        self.stacked_obj = deepcopy(obj)
 98        self.verbose = verbose
 99        self.n_layers = n_layers
100        self.level = level
101        self.pi_method = pi_method
102        self.coef_ = None
103
104    def fit(self, X, y, **kwargs):
105        """Fit Regression algorithms to X and y.
106        Parameters
107        ----------
108        X : array-like,
109            Training vectors, where rows is the number of samples
110            and columns is the number of features.
111        y : array-like,
112            Training vectors, where rows is the number of samples
113            and columns is the number of features.
114        **kwargs: dict
115            Additional parameters to be passed to the fit method
116            of the base learner. For example, `sample_weight`.
117        Returns
118        -------
119        A fitted object
120        """
121
122        if isinstance(X, np.ndarray):
123            X = pd.DataFrame(X)
124
125        # init layer
126        self.stacked_obj = CustomRegressor(
127            obj=self.stacked_obj,
128            n_hidden_features=self.n_hidden_features,
129            activation_name=self.activation_name,
130            a=self.a,
131            nodes_sim=self.nodes_sim,
132            bias=self.bias,
133            dropout=self.dropout,
134            direct_link=self.direct_link,
135            n_clusters=self.n_clusters,
136            cluster_encode=self.cluster_encode,
137            type_clust=self.type_clust,
138            type_scaling=self.type_scaling,
139            col_sample=self.col_sample,
140            row_sample=self.row_sample,
141            seed=self.seed,
142            backend=self.backend,
143        )
144
145        if self.verbose > 0:
146            iterator = tqdm(range(self.n_layers - 1))
147        else:
148            iterator = range(self.n_layers - 1)
149
150        for _ in iterator:
151            self.stacked_obj = deepcopy(
152                CustomRegressor(
153                    obj=self.stacked_obj,
154                    n_hidden_features=self.n_hidden_features,
155                    activation_name=self.activation_name,
156                    a=self.a,
157                    nodes_sim=self.nodes_sim,
158                    bias=self.bias,
159                    dropout=self.dropout,
160                    direct_link=self.direct_link,
161                    n_clusters=self.n_clusters,
162                    cluster_encode=self.cluster_encode,
163                    type_clust=self.type_clust,
164                    type_scaling=self.type_scaling,
165                    col_sample=self.col_sample,
166                    row_sample=self.row_sample,
167                    seed=self.seed,
168                    backend=self.backend,
169                )
170            )
171
172        self.stacked_obj.fit(X, y, **kwargs)
173
174        if self.level is not None:
175            self.stacked_obj = PredictionInterval(
176                obj=self.stacked_obj, method=self.pi_method, level=self.level
177            )
178
179        if hasattr(self.stacked_obj, "clustering_obj_"):
180            self.clustering_obj_ = self.stacked_obj.clustering_obj_
181
182        if hasattr(self.stacked_obj, "coef_"):
183            self.coef_ = self.stacked_obj.coef_
184
185        if hasattr(self.stacked_obj, "scaler_"):
186            self.scaler_ = self.stacked_obj.scaler_
187
188        if hasattr(self.stacked_obj, "nn_scaler_"):
189            self.nn_scaler_ = self.stacked_obj.nn_scaler_
190
191        if hasattr(self.stacked_obj, "clustering_scaler_"):
192            self.clustering_scaler_ = self.stacked_obj.clustering_scaler_
193
194        return self
195
196    def partial_fit(self, X, y, **kwargs):
197        """Fit Regression algorithms to X and y.
198        Parameters
199        ----------
200        X : array-like,
201            Training vectors, where rows is the number of samples
202            and columns is the number of features.
203        y : array-like,
204            Training vectors, where rows is the number of samples
205            and columns is the number of features.
206        **kwargs: dict
207            Additional parameters to be passed to the fit method
208            of the base learner. For example, `sample_weight`.
209        Returns
210        -------
211        A fitted object
212        """
213        assert hasattr(self, "stacked_obj"), "model must be fitted first"
214        current_obj = self.stacked_obj
215        for _ in range(self.n_layers):
216            try:
217                input_X = current_obj.obj.cook_test_set(X)
218                current_obj.obj.partial_fit(input_X, y, **kwargs)
219                try:
220                    current_obj = current_obj.obj
221                except AttributeError:
222                    pass
223            except ValueError as e:
224                print(e)
225                pass
226        return self
227
228    def predict(self, X, **kwargs):
229        if self.level is not None:
230            return self.stacked_obj.predict(X, return_pi=True)
231        return self.stacked_obj.predict(X, **kwargs)
232
233    def score(self, X, y, scoring=None):
234        return self.stacked_obj.score(X, y, scoring)

Deep Regressor

Parameters:

obj: an object
    A base learner, see also https://www.researchgate.net/publication/380701207_Deep_Quasi-Randomized_neural_Networks_for_classification

verbose : int, optional (default=0)
    Monitor progress when fitting.

n_layers: int (default=2)
    Number of layers. `n_layers = 1` is a simple `CustomRegressor`

All the other parameters are nnetsauce `CustomRegressor`'s

Examples:

import nnetsauce as ns
    from sklearn.datasets import load_diabetes
    from sklearn.model_selection import train_test_split
    from sklearn.linear_model import RidgeCV
    data = load_diabetes()
    X = data.data
    y= data.target
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=123)
    obj = RidgeCV()
    clf = ns.DeepRegressor(obj)
    clf.fit(X_train, y_train)
    print(clf.score(clf.predict(X_test), y_test))
    

def fit(self, X, y, **kwargs):
104    def fit(self, X, y, **kwargs):
105        """Fit Regression algorithms to X and y.
106        Parameters
107        ----------
108        X : array-like,
109            Training vectors, where rows is the number of samples
110            and columns is the number of features.
111        y : array-like,
112            Training vectors, where rows is the number of samples
113            and columns is the number of features.
114        **kwargs: dict
115            Additional parameters to be passed to the fit method
116            of the base learner. For example, `sample_weight`.
117        Returns
118        -------
119        A fitted object
120        """
121
122        if isinstance(X, np.ndarray):
123            X = pd.DataFrame(X)
124
125        # init layer
126        self.stacked_obj = CustomRegressor(
127            obj=self.stacked_obj,
128            n_hidden_features=self.n_hidden_features,
129            activation_name=self.activation_name,
130            a=self.a,
131            nodes_sim=self.nodes_sim,
132            bias=self.bias,
133            dropout=self.dropout,
134            direct_link=self.direct_link,
135            n_clusters=self.n_clusters,
136            cluster_encode=self.cluster_encode,
137            type_clust=self.type_clust,
138            type_scaling=self.type_scaling,
139            col_sample=self.col_sample,
140            row_sample=self.row_sample,
141            seed=self.seed,
142            backend=self.backend,
143        )
144
145        if self.verbose > 0:
146            iterator = tqdm(range(self.n_layers - 1))
147        else:
148            iterator = range(self.n_layers - 1)
149
150        for _ in iterator:
151            self.stacked_obj = deepcopy(
152                CustomRegressor(
153                    obj=self.stacked_obj,
154                    n_hidden_features=self.n_hidden_features,
155                    activation_name=self.activation_name,
156                    a=self.a,
157                    nodes_sim=self.nodes_sim,
158                    bias=self.bias,
159                    dropout=self.dropout,
160                    direct_link=self.direct_link,
161                    n_clusters=self.n_clusters,
162                    cluster_encode=self.cluster_encode,
163                    type_clust=self.type_clust,
164                    type_scaling=self.type_scaling,
165                    col_sample=self.col_sample,
166                    row_sample=self.row_sample,
167                    seed=self.seed,
168                    backend=self.backend,
169                )
170            )
171
172        self.stacked_obj.fit(X, y, **kwargs)
173
174        if self.level is not None:
175            self.stacked_obj = PredictionInterval(
176                obj=self.stacked_obj, method=self.pi_method, level=self.level
177            )
178
179        if hasattr(self.stacked_obj, "clustering_obj_"):
180            self.clustering_obj_ = self.stacked_obj.clustering_obj_
181
182        if hasattr(self.stacked_obj, "coef_"):
183            self.coef_ = self.stacked_obj.coef_
184
185        if hasattr(self.stacked_obj, "scaler_"):
186            self.scaler_ = self.stacked_obj.scaler_
187
188        if hasattr(self.stacked_obj, "nn_scaler_"):
189            self.nn_scaler_ = self.stacked_obj.nn_scaler_
190
191        if hasattr(self.stacked_obj, "clustering_scaler_"):
192            self.clustering_scaler_ = self.stacked_obj.clustering_scaler_
193
194        return self

Fit Regression algorithms to X and y.

Parameters

X : array-like, Training vectors, where rows is the number of samples and columns is the number of features. y : array-like, Training vectors, where rows is the number of samples and columns is the number of features. **kwargs: dict Additional parameters to be passed to the fit method of the base learner. For example, sample_weight.

Returns

A fitted object

def predict(self, X, **kwargs):
228    def predict(self, X, **kwargs):
229        if self.level is not None:
230            return self.stacked_obj.predict(X, return_pi=True)
231        return self.stacked_obj.predict(X, **kwargs)

Predict test data X.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

level: int
    Level of confidence (default = 95)

method: str
    'splitconformal', 'localconformal'
    prediction (if you specify `return_pi = True`)

**kwargs: additional parameters
        `return_pi = True` for conformal prediction,
        with `method` in ('splitconformal', 'localconformal')
        or `return_std = True` for `self.obj` in
        (`sklearn.linear_model.BayesianRidge`,
        `sklearn.linear_model.ARDRegressor`,
        `sklearn.gaussian_process.GaussianProcessRegressor`)`

Returns:

model predictions:
    an array if uncertainty quantification is not requested,
      or a tuple if with prediction intervals and simulations
      if `return_std = True` (mean, standard deviation,
      lower and upper prediction interval) or `return_pi = True`
      ()
def score(self, X, y, scoring=None):
233    def score(self, X, y, scoring=None):
234        return self.stacked_obj.score(X, y, scoring)

Compute the score of the model.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

scoring: str
    scoring method

Returns:

score: float
class DeepMTS(nnetsauce.MTS):
 11class DeepMTS(MTS):
 12    """Univariate and multivariate time series (DeepMTS) forecasting with Quasi-Randomized networks (Work in progress)
 13
 14    Parameters:
 15
 16        obj: object.
 17            any object containing a method fit (obj.fit()) and a method predict
 18            (obj.predict()).
 19
 20        n_layers: int.
 21            number of layers in the neural network.
 22
 23        n_hidden_features: int.
 24            number of nodes in the hidden layer.
 25
 26        activation_name: str.
 27            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'.
 28
 29        a: float.
 30            hyperparameter for 'prelu' or 'elu' activation function.
 31
 32        nodes_sim: str.
 33            type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
 34            'uniform'.
 35
 36        bias: boolean.
 37            indicates if the hidden layer contains a bias term (True) or not
 38            (False).
 39
 40        dropout: float.
 41            regularization parameter; (random) percentage of nodes dropped out
 42            of the training.
 43
 44        direct_link: boolean.
 45            indicates if the original predictors are included (True) in model's fitting or not (False).
 46
 47        n_clusters: int.
 48            number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering).
 49
 50        cluster_encode: bool.
 51            defines how the variable containing clusters is treated (default is one-hot)
 52            if `False`, then labels are used, without one-hot encoding.
 53
 54        type_clust: str.
 55            type of clustering method: currently k-means ('kmeans') or Gaussian
 56            Mixture Model ('gmm').
 57
 58        type_scaling: a tuple of 3 strings.
 59            scaling methods for inputs, hidden layer, and clustering respectively
 60            (and when relevant).
 61            Currently available: standardization ('std') or MinMax scaling ('minmax').
 62
 63        lags: int.
 64            number of lags used for each time series.
 65
 66        type_pi: str.
 67            type of prediction interval; currently:
 68            - "gaussian": simple, fast, but: assumes stationarity of Gaussian in-sample residuals and independence in the multivariate case
 69            - "kde": based on Kernel Density Estimation of in-sample residuals
 70            - "bootstrap": based on independent bootstrap of in-sample residuals
 71            - "block-bootstrap": based on basic block bootstrap of in-sample residuals
 72            - "scp-kde": Sequential split conformal prediction with Kernel Density Estimation of calibrated residuals
 73            - "scp-bootstrap": Sequential split conformal prediction with independent bootstrap of calibrated residuals
 74            - "scp-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of calibrated residuals
 75            - "scp2-kde": Sequential split conformal prediction with Kernel Density Estimation of standardized calibrated residuals
 76            - "scp2-bootstrap": Sequential split conformal prediction with independent bootstrap of standardized calibrated residuals
 77            - "scp2-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of standardized calibrated residuals
 78
 79        block_size: int.
 80            size of block for 'type_pi' in ("block-bootstrap", "scp-block-bootstrap", "scp2-block-bootstrap").
 81            Default is round(3.15*(n_residuals^1/3))
 82
 83        replications: int.
 84            number of replications (if needed, for predictive simulation). Default is 'None'.
 85
 86        kernel: str.
 87            the kernel to use for residuals density estimation (used for predictive simulation). Currently, either 'gaussian' or 'tophat'.
 88
 89        agg: str.
 90            either "mean" or "median" for simulation of bootstrap aggregating
 91
 92        seed: int.
 93            reproducibility seed for nodes_sim=='uniform' or predictive simulation.
 94
 95        backend: str.
 96            "cpu" or "gpu" or "tpu".
 97
 98        verbose: int.
 99            0: not printing; 1: printing
100
101        show_progress: bool.
102            True: progress bar when fitting each series; False: no progress bar when fitting each series
103
104    Attributes:
105
106        fit_objs_: dict
107            objects adjusted to each individual time series
108
109        y_: {array-like}
110            DeepMTS responses (most recent observations first)
111
112        X_: {array-like}
113            DeepMTS lags
114
115        xreg_: {array-like}
116            external regressors
117
118        y_means_: dict
119            a dictionary of each series mean values
120
121        preds_: {array-like}
122            successive model predictions
123
124        preds_std_: {array-like}
125            standard deviation around the predictions
126
127        return_std_: boolean
128            return uncertainty or not (set in predict)
129
130        df_: data frame
131            the input data frame, in case a data.frame is provided to `fit`
132
133    Examples:
134
135    Example 1:
136
137        ```python
138        import nnetsauce as ns
139        import numpy as np
140        from sklearn import linear_model
141        np.random.seed(123)
142
143        M = np.random.rand(10, 3)
144        M[:,0] = 10*M[:,0]
145        M[:,2] = 25*M[:,2]
146        print(M)
147
148        # Adjust Bayesian Ridge
149        regr4 = linear_model.BayesianRidge()
150        obj_DeepMTS = ns.DeepMTS(regr4, lags = 1, n_hidden_features=5)
151        obj_DeepMTS.fit(M)
152        print(obj_DeepMTS.predict())
153
154        # with credible intervals
155        print(obj_DeepMTS.predict(return_std=True, level=80))
156
157        print(obj_DeepMTS.predict(return_std=True, level=95))
158        ```
159
160    Example 2:
161
162        ```python
163        import nnetsauce as ns
164        import numpy as np
165        from sklearn import linear_model
166
167        dataset = {
168        'date' : ['2001-01-01', '2002-01-01', '2003-01-01', '2004-01-01', '2005-01-01'],
169        'series1' : [34, 30, 35.6, 33.3, 38.1],
170        'series2' : [4, 5.5, 5.6, 6.3, 5.1],
171        'series3' : [100, 100.5, 100.6, 100.2, 100.1]}
172        df = pd.DataFrame(dataset).set_index('date')
173        print(df)
174
175        # Adjust Bayesian Ridge
176        regr5 = linear_model.BayesianRidge()
177        obj_DeepMTS = ns.DeepMTS(regr5, lags = 1, n_hidden_features=5)
178        obj_DeepMTS.fit(df)
179        print(obj_DeepMTS.predict())
180
181        # with credible intervals
182        print(obj_DeepMTS.predict(return_std=True, level=80))
183
184        print(obj_DeepMTS.predict(return_std=True, level=95))
185        ```
186
187    """
188
189    # construct the object -----
190
191    def __init__(
192        self,
193        obj,
194        n_layers=3,
195        n_hidden_features=5,
196        activation_name="relu",
197        a=0.01,
198        nodes_sim="sobol",
199        bias=True,
200        dropout=0,
201        direct_link=True,
202        n_clusters=2,
203        cluster_encode=True,
204        type_clust="kmeans",
205        type_scaling=("std", "std", "std"),
206        lags=1,
207        type_pi="kde",
208        block_size=None,
209        replications=None,
210        kernel=None,
211        agg="mean",
212        seed=123,
213        backend="cpu",
214        verbose=0,
215        show_progress=True,
216    ):
217        assert int(lags) == lags, "parameter 'lags' should be an integer"
218        assert n_layers >= 1, "must have n_layers >= 1"
219        self.n_layers = int(n_layers)
220
221        if self.n_layers > 1:
222            for _ in range(self.n_layers - 1):
223                obj = CustomRegressor(
224                    obj=deepcopy(obj),
225                    n_hidden_features=n_hidden_features,
226                    activation_name=activation_name,
227                    a=a,
228                    nodes_sim=nodes_sim,
229                    bias=bias,
230                    dropout=dropout,
231                    direct_link=direct_link,
232                    n_clusters=n_clusters,
233                    cluster_encode=cluster_encode,
234                    type_clust=type_clust,
235                    type_scaling=type_scaling,
236                    seed=seed,
237                    backend=backend,
238                )
239
240        self.obj = deepcopy(obj)
241        super().__init__(
242            obj=self.obj,
243            n_hidden_features=n_hidden_features,
244            activation_name=activation_name,
245            a=a,
246            nodes_sim=nodes_sim,
247            bias=bias,
248            dropout=dropout,
249            direct_link=direct_link,
250            n_clusters=n_clusters,
251            cluster_encode=cluster_encode,
252            type_clust=type_clust,
253            type_scaling=type_scaling,
254            lags=lags,
255            type_pi=type_pi,
256            block_size=block_size,
257            replications=replications,
258            kernel=kernel,
259            agg=agg,
260            seed=seed,
261            backend=backend,
262            verbose=verbose,
263            show_progress=show_progress,
264        )

Univariate and multivariate time series (DeepMTS) forecasting with Quasi-Randomized networks (Work in progress)

Parameters:

obj: object.
    any object containing a method fit (obj.fit()) and a method predict
    (obj.predict()).

n_layers: int.
    number of layers in the neural network.

n_hidden_features: int.
    number of nodes in the hidden layer.

activation_name: str.
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'.

a: float.
    hyperparameter for 'prelu' or 'elu' activation function.

nodes_sim: str.
    type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
    'uniform'.

bias: boolean.
    indicates if the hidden layer contains a bias term (True) or not
    (False).

dropout: float.
    regularization parameter; (random) percentage of nodes dropped out
    of the training.

direct_link: boolean.
    indicates if the original predictors are included (True) in model's fitting or not (False).

n_clusters: int.
    number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering).

cluster_encode: bool.
    defines how the variable containing clusters is treated (default is one-hot)
    if `False`, then labels are used, without one-hot encoding.

type_clust: str.
    type of clustering method: currently k-means ('kmeans') or Gaussian
    Mixture Model ('gmm').

type_scaling: a tuple of 3 strings.
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax').

lags: int.
    number of lags used for each time series.

type_pi: str.
    type of prediction interval; currently:
    - "gaussian": simple, fast, but: assumes stationarity of Gaussian in-sample residuals and independence in the multivariate case
    - "kde": based on Kernel Density Estimation of in-sample residuals
    - "bootstrap": based on independent bootstrap of in-sample residuals
    - "block-bootstrap": based on basic block bootstrap of in-sample residuals
    - "scp-kde": Sequential split conformal prediction with Kernel Density Estimation of calibrated residuals
    - "scp-bootstrap": Sequential split conformal prediction with independent bootstrap of calibrated residuals
    - "scp-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of calibrated residuals
    - "scp2-kde": Sequential split conformal prediction with Kernel Density Estimation of standardized calibrated residuals
    - "scp2-bootstrap": Sequential split conformal prediction with independent bootstrap of standardized calibrated residuals
    - "scp2-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of standardized calibrated residuals

block_size: int.
    size of block for 'type_pi' in ("block-bootstrap", "scp-block-bootstrap", "scp2-block-bootstrap").
    Default is round(3.15*(n_residuals^1/3))

replications: int.
    number of replications (if needed, for predictive simulation). Default is 'None'.

kernel: str.
    the kernel to use for residuals density estimation (used for predictive simulation). Currently, either 'gaussian' or 'tophat'.

agg: str.
    either "mean" or "median" for simulation of bootstrap aggregating

seed: int.
    reproducibility seed for nodes_sim=='uniform' or predictive simulation.

backend: str.
    "cpu" or "gpu" or "tpu".

verbose: int.
    0: not printing; 1: printing

show_progress: bool.
    True: progress bar when fitting each series; False: no progress bar when fitting each series

Attributes:

fit_objs_: dict
    objects adjusted to each individual time series

y_: {array-like}
    DeepMTS responses (most recent observations first)

X_: {array-like}
    DeepMTS lags

xreg_: {array-like}
    external regressors

y_means_: dict
    a dictionary of each series mean values

preds_: {array-like}
    successive model predictions

preds_std_: {array-like}
    standard deviation around the predictions

return_std_: boolean
    return uncertainty or not (set in predict)

df_: data frame
    the input data frame, in case a data.frame is provided to `fit`

Examples:

Example 1:

import nnetsauce as ns
    import numpy as np
    from sklearn import linear_model
    np.random.seed(123)
 
M = np.random.rand(10, 3) M[:,0] = 10M[:,0] M[:,2] = 25M[:,2] print(M)
# Adjust Bayesian Ridge regr4 = linear_model.BayesianRidge() obj_DeepMTS = ns.DeepMTS(regr4, lags = 1, n_hidden_features=5) obj_DeepMTS.fit(M) print(obj_DeepMTS.predict())
# with credible intervals print(obj_DeepMTS.predict(return_std=True, level=80))
print(obj_DeepMTS.predict(return_std=True, level=95))

Example 2:

import nnetsauce as ns
    import numpy as np
    from sklearn import linear_model
 
dataset = { 'date' : ['2001-01-01', '2002-01-01', '2003-01-01', '2004-01-01', '2005-01-01'], 'series1' : [34, 30, 35.6, 33.3, 38.1], 'series2' : [4, 5.5, 5.6, 6.3, 5.1], 'series3' : [100, 100.5, 100.6, 100.2, 100.1]} df = pd.DataFrame(dataset).set_index('date') print(df)
# Adjust Bayesian Ridge regr5 = linear_model.BayesianRidge() obj_DeepMTS = ns.DeepMTS(regr5, lags = 1, n_hidden_features=5) obj_DeepMTS.fit(df) print(obj_DeepMTS.predict())
# with credible intervals print(obj_DeepMTS.predict(return_std=True, level=80))
print(obj_DeepMTS.predict(return_std=True, level=95))

class Downloader:
 6class Downloader:
 7    """Download datasets from data sources (R-universe for now)"""
 8
 9    def __init__(self):
10        self.pkgname = None
11        self.dataset = None
12        self.source = None
13        self.url = None
14        self.request = None
15
16    def download(
17        self,
18        pkgname="MASS",
19        dataset="Boston",
20        source="https://cran.r-universe.dev/",
21        **kwargs
22    ):
23        """Download datasets from data sources (R-universe for now)
24
25        Examples:
26
27        ```python
28        import nnetsauce as ns
29
30        downloader = ns.Downloader()
31        df = downloader.download(pkgname="MASS", dataset="Boston")
32        ```
33
34        """
35        self.pkgname = pkgname
36        self.dataset = dataset
37        self.source = source
38        self.url = source + pkgname + "/data/" + dataset + "/json"
39        self.request = requests.get(self.url)
40        return pd.DataFrame(self.request.json(), **kwargs)

Download datasets from data sources (R-universe for now)

def download( self, pkgname='MASS', dataset='Boston', source='https://cran.r-universe.dev/', **kwargs):
16    def download(
17        self,
18        pkgname="MASS",
19        dataset="Boston",
20        source="https://cran.r-universe.dev/",
21        **kwargs
22    ):
23        """Download datasets from data sources (R-universe for now)
24
25        Examples:
26
27        ```python
28        import nnetsauce as ns
29
30        downloader = ns.Downloader()
31        df = downloader.download(pkgname="MASS", dataset="Boston")
32        ```
33
34        """
35        self.pkgname = pkgname
36        self.dataset = dataset
37        self.source = source
38        self.url = source + pkgname + "/data/" + dataset + "/json"
39        self.request = requests.get(self.url)
40        return pd.DataFrame(self.request.json(), **kwargs)

Download datasets from data sources (R-universe for now)

Examples:

import nnetsauce as ns

downloader = ns.Downloader()
df = downloader.download(pkgname="MASS", dataset="Boston")
class GLMClassifier(nnetsauce.glm.glm.GLM, sklearn.base.ClassifierMixin):
 21class GLMClassifier(GLM, ClassifierMixin):
 22    """Generalized 'linear' models using quasi-randomized networks (classification)
 23
 24    Parameters:
 25
 26        n_hidden_features: int
 27            number of nodes in the hidden layer
 28
 29        lambda1: float
 30            regularization parameter for GLM coefficients on original features
 31
 32        alpha1: float
 33            controls compromize between l1 and l2 norm of GLM coefficients on original features
 34
 35        lambda2: float
 36            regularization parameter for GLM coefficients on nonlinear features
 37
 38        alpha2: float
 39            controls compromize between l1 and l2 norm of GLM coefficients on nonlinear features
 40
 41        activation_name: str
 42            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
 43
 44        a: float
 45            hyperparameter for 'prelu' or 'elu' activation function
 46
 47        nodes_sim: str
 48            type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
 49            'uniform'
 50
 51        bias: boolean
 52            indicates if the hidden layer contains a bias term (True) or not
 53            (False)
 54
 55        dropout: float
 56            regularization parameter; (random) percentage of nodes dropped out
 57            of the training
 58
 59        direct_link: boolean
 60            indicates if the original predictors are included (True) in model's
 61            fitting or not (False)
 62
 63        n_clusters: int
 64            number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
 65                no clustering)
 66
 67        cluster_encode: bool
 68            defines how the variable containing clusters is treated (default is one-hot)
 69            if `False`, then labels are used, without one-hot encoding
 70
 71        type_clust: str
 72            type of clustering method: currently k-means ('kmeans') or Gaussian
 73            Mixture Model ('gmm')
 74
 75        type_scaling: a tuple of 3 strings
 76            scaling methods for inputs, hidden layer, and clustering respectively
 77            (and when relevant).
 78            Currently available: standardization ('std') or MinMax scaling ('minmax')
 79
 80        optimizer: object
 81            optimizer, from class nnetsauce.Optimizer
 82
 83        backend: str.
 84            "cpu" or "gpu" or "tpu".
 85
 86        seed: int
 87            reproducibility seed for nodes_sim=='uniform'
 88
 89    Attributes:
 90
 91        beta_: vector
 92            regression coefficients
 93
 94    Examples:
 95
 96    See [https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_classification.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_classification.py)
 97
 98    """
 99
100    # construct the object -----
101    _estimator_type = "classifier"
102
103    def __init__(
104        self,
105        n_hidden_features=5,
106        lambda1=0.01,
107        alpha1=0.5,
108        lambda2=0.01,
109        alpha2=0.5,
110        family="expit",
111        activation_name="relu",
112        a=0.01,
113        nodes_sim="sobol",
114        bias=True,
115        dropout=0,
116        direct_link=True,
117        n_clusters=2,
118        cluster_encode=True,
119        type_clust="kmeans",
120        type_scaling=("std", "std", "std"),
121        optimizer=Optimizer(),
122        backend="cpu",
123        seed=123,
124    ):
125        super().__init__(
126            n_hidden_features=n_hidden_features,
127            lambda1=lambda1,
128            alpha1=alpha1,
129            lambda2=lambda2,
130            alpha2=alpha2,
131            activation_name=activation_name,
132            a=a,
133            nodes_sim=nodes_sim,
134            bias=bias,
135            dropout=dropout,
136            direct_link=direct_link,
137            n_clusters=n_clusters,
138            cluster_encode=cluster_encode,
139            type_clust=type_clust,
140            type_scaling=type_scaling,
141            optimizer=optimizer,
142            backend=backend,
143            seed=seed,
144        )
145
146        self.family = family
147
148    def logit_loss(self, Y, row_index, XB):
149        self.n_classes = Y.shape[1]  # len(np.unique(y))
150        # Y = mo.one_hot_encode2(y, self.n_classes)
151        # Y = self.optimizer.one_hot_encode(y, self.n_classes)
152
153        # max_double = 709.0 # only if softmax
154        # XB[XB > max_double] = max_double
155        XB[XB > 709.0] = 709.0
156
157        if row_index is None:
158            return -np.mean(np.sum(Y * XB, axis=1) - logsumexp(XB))
159
160        return -np.mean(np.sum(Y[row_index, :] * XB, axis=1) - logsumexp(XB))
161
162    def expit_erf_loss(self, Y, row_index, XB):
163        # self.n_classes = len(np.unique(y))
164        # Y = mo.one_hot_encode2(y, self.n_classes)
165        # Y = self.optimizer.one_hot_encode(y, self.n_classes)
166        self.n_classes = Y.shape[1]
167
168        if row_index is None:
169            return -np.mean(np.sum(Y * XB, axis=1) - logsumexp(XB))
170
171        return -np.mean(np.sum(Y[row_index, :] * XB, axis=1) - logsumexp(XB))
172
173    def loss_func(
174        self,
175        beta,
176        group_index,
177        X,
178        Y,
179        y,
180        row_index=None,
181        type_loss="logit",
182        **kwargs
183    ):
184        res = {
185            "logit": self.logit_loss,
186            "expit": self.expit_erf_loss,
187            "erf": self.expit_erf_loss,
188        }
189
190        if row_index is None:
191            row_index = range(len(y))
192            XB = self.compute_XB(
193                X,
194                beta=np.reshape(beta, (X.shape[1], self.n_classes), order="F"),
195            )
196
197            return res[type_loss](Y, row_index, XB) + self.compute_penalty(
198                group_index=group_index, beta=beta
199            )
200
201        XB = self.compute_XB(
202            X,
203            beta=np.reshape(beta, (X.shape[1], self.n_classes), order="F"),
204            row_index=row_index,
205        )
206
207        return res[type_loss](Y, row_index, XB) + self.compute_penalty(
208            group_index=group_index, beta=beta
209        )
210
211    def fit(self, X, y, **kwargs):
212        """Fit GLM model to training data (X, y).
213
214        Args:
215
216            X: {array-like}, shape = [n_samples, n_features]
217                Training vectors, where n_samples is the number
218                of samples and n_features is the number of features.
219
220            y: array-like, shape = [n_samples]
221                Target values.
222
223            **kwargs: additional parameters to be passed to
224                    self.cook_training_set or self.obj.fit
225
226        Returns:
227
228            self: object
229
230        """
231
232        assert mx.is_factor(
233            y
234        ), "y must contain only integers"  # change is_factor and subsampling everywhere
235
236        self.classes_ = np.unique(y)  # for compatibility with sklearn
237        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
238
239        self.beta_ = None
240
241        n, p = X.shape
242
243        self.group_index = n * X.shape[1]
244
245        self.n_classes = len(np.unique(y))
246
247        output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
248
249        # Y = mo.one_hot_encode2(output_y, self.n_classes)
250        Y = self.optimizer.one_hot_encode(output_y, self.n_classes)
251
252        # initialization
253        if self.backend == "cpu":
254            beta_ = np.linalg.lstsq(scaled_Z, Y, rcond=None)[0]
255        else:
256            beta_ = jnp.linalg.lstsq(scaled_Z, Y, rcond=None)[0]
257
258        # optimization
259        # fit(self, loss_func, response, x0, **kwargs):
260        # loss_func(self, beta, group_index, X, y,
261        #          row_index=None, type_loss="gaussian",
262        #          **kwargs)
263        self.optimizer.fit(
264            self.loss_func,
265            response=y,
266            x0=beta_.flatten(order="F"),
267            group_index=self.group_index,
268            X=scaled_Z,
269            Y=Y,
270            y=y,
271            type_loss=self.family,
272        )
273
274        self.beta_ = self.optimizer.results[0]
275        self.classes_ = np.unique(y)
276
277        return self
278
279    def predict(self, X, **kwargs):
280        """Predict test data X.
281
282        Args:
283
284            X: {array-like}, shape = [n_samples, n_features]
285                Training vectors, where n_samples is the number
286                of samples and n_features is the number of features.
287
288            **kwargs: additional parameters to be passed to
289                    self.cook_test_set
290
291        Returns:
292
293            model predictions: {array-like}
294
295        """
296
297        return np.argmax(self.predict_proba(X, **kwargs), axis=1)
298
299    def predict_proba(self, X, **kwargs):
300        """Predict probabilities for test data X.
301
302        Args:
303
304            X: {array-like}, shape = [n_samples, n_features]
305                Training vectors, where n_samples is the number
306                of samples and n_features is the number of features.
307
308            **kwargs: additional parameters to be passed to
309                    self.cook_test_set
310
311        Returns:
312
313            probability estimates for test data: {array-like}
314
315        """
316        if len(X.shape) == 1:
317            n_features = X.shape[0]
318            new_X = mo.rbind(
319                X.reshape(1, n_features),
320                np.ones(n_features).reshape(1, n_features),
321            )
322
323            Z = self.cook_test_set(new_X, **kwargs)
324
325        else:
326            Z = self.cook_test_set(X, **kwargs)
327
328        ZB = mo.safe_sparse_dot(
329            Z,
330            self.beta_.reshape(
331                self.n_classes,
332                X.shape[1] + self.n_hidden_features + self.n_clusters,
333            ).T,
334        )
335
336        if self.family == "logit":
337            exp_ZB = np.exp(ZB)
338
339            return exp_ZB / exp_ZB.sum(axis=1)[:, None]
340
341        if self.family == "expit":
342            exp_ZB = expit(ZB)
343
344            return exp_ZB / exp_ZB.sum(axis=1)[:, None]
345
346        if self.family == "erf":
347            exp_ZB = 0.5 * (1 + erf(ZB))
348
349            return exp_ZB / exp_ZB.sum(axis=1)[:, None]
350
351    def score(self, X, y, scoring=None):
352        """Scoring function for classification.
353
354        Args:
355
356            X: {array-like}, shape = [n_samples, n_features]
357                Training vectors, where n_samples is the number
358                of samples and n_features is the number of features.
359
360            y: array-like, shape = [n_samples]
361                Target values.
362
363            scoring: str
364                scoring method (default is accuracy)
365
366        Returns:
367
368            score: float
369        """
370
371        if scoring is None:
372            scoring = "accuracy"
373
374        if scoring == "accuracy":
375            return skm2.accuracy_score(y, self.predict(X))
376
377        if scoring == "f1":
378            return skm2.f1_score(y, self.predict(X))
379
380        if scoring == "precision":
381            return skm2.precision_score(y, self.predict(X))
382
383        if scoring == "recall":
384            return skm2.recall_score(y, self.predict(X))
385
386        if scoring == "roc_auc":
387            return skm2.roc_auc_score(y, self.predict(X))
388
389        if scoring == "log_loss":
390            return skm2.log_loss(y, self.predict_proba(X))
391
392        if scoring == "balanced_accuracy":
393            return skm2.balanced_accuracy_score(y, self.predict(X))
394
395        if scoring == "average_precision":
396            return skm2.average_precision_score(y, self.predict(X))
397
398        if scoring == "neg_brier_score":
399            return -skm2.brier_score_loss(y, self.predict_proba(X))
400
401        if scoring == "neg_log_loss":
402            return -skm2.log_loss(y, self.predict_proba(X))
403
404    @property
405    def _estimator_type(self):
406        return "classifier"

Generalized 'linear' models using quasi-randomized networks (classification)

Parameters:

n_hidden_features: int
    number of nodes in the hidden layer

lambda1: float
    regularization parameter for GLM coefficients on original features

alpha1: float
    controls compromize between l1 and l2 norm of GLM coefficients on original features

lambda2: float
    regularization parameter for GLM coefficients on nonlinear features

alpha2: float
    controls compromize between l1 and l2 norm of GLM coefficients on nonlinear features

activation_name: str
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'

a: float
    hyperparameter for 'prelu' or 'elu' activation function

nodes_sim: str
    type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
    'uniform'

bias: boolean
    indicates if the hidden layer contains a bias term (True) or not
    (False)

dropout: float
    regularization parameter; (random) percentage of nodes dropped out
    of the training

direct_link: boolean
    indicates if the original predictors are included (True) in model's
    fitting or not (False)

n_clusters: int
    number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
        no clustering)

cluster_encode: bool
    defines how the variable containing clusters is treated (default is one-hot)
    if `False`, then labels are used, without one-hot encoding

type_clust: str
    type of clustering method: currently k-means ('kmeans') or Gaussian
    Mixture Model ('gmm')

type_scaling: a tuple of 3 strings
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax')

optimizer: object
    optimizer, from class nnetsauce.Optimizer

backend: str.
    "cpu" or "gpu" or "tpu".

seed: int
    reproducibility seed for nodes_sim=='uniform'

Attributes:

beta_: vector
    regression coefficients

Examples:

See https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_classification.py

def fit(self, X, y, **kwargs):
211    def fit(self, X, y, **kwargs):
212        """Fit GLM model to training data (X, y).
213
214        Args:
215
216            X: {array-like}, shape = [n_samples, n_features]
217                Training vectors, where n_samples is the number
218                of samples and n_features is the number of features.
219
220            y: array-like, shape = [n_samples]
221                Target values.
222
223            **kwargs: additional parameters to be passed to
224                    self.cook_training_set or self.obj.fit
225
226        Returns:
227
228            self: object
229
230        """
231
232        assert mx.is_factor(
233            y
234        ), "y must contain only integers"  # change is_factor and subsampling everywhere
235
236        self.classes_ = np.unique(y)  # for compatibility with sklearn
237        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
238
239        self.beta_ = None
240
241        n, p = X.shape
242
243        self.group_index = n * X.shape[1]
244
245        self.n_classes = len(np.unique(y))
246
247        output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
248
249        # Y = mo.one_hot_encode2(output_y, self.n_classes)
250        Y = self.optimizer.one_hot_encode(output_y, self.n_classes)
251
252        # initialization
253        if self.backend == "cpu":
254            beta_ = np.linalg.lstsq(scaled_Z, Y, rcond=None)[0]
255        else:
256            beta_ = jnp.linalg.lstsq(scaled_Z, Y, rcond=None)[0]
257
258        # optimization
259        # fit(self, loss_func, response, x0, **kwargs):
260        # loss_func(self, beta, group_index, X, y,
261        #          row_index=None, type_loss="gaussian",
262        #          **kwargs)
263        self.optimizer.fit(
264            self.loss_func,
265            response=y,
266            x0=beta_.flatten(order="F"),
267            group_index=self.group_index,
268            X=scaled_Z,
269            Y=Y,
270            y=y,
271            type_loss=self.family,
272        )
273
274        self.beta_ = self.optimizer.results[0]
275        self.classes_ = np.unique(y)
276
277        return self

Fit GLM model to training data (X, y).

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

**kwargs: additional parameters to be passed to
        self.cook_training_set or self.obj.fit

Returns:

self: object
def predict(self, X, **kwargs):
279    def predict(self, X, **kwargs):
280        """Predict test data X.
281
282        Args:
283
284            X: {array-like}, shape = [n_samples, n_features]
285                Training vectors, where n_samples is the number
286                of samples and n_features is the number of features.
287
288            **kwargs: additional parameters to be passed to
289                    self.cook_test_set
290
291        Returns:
292
293            model predictions: {array-like}
294
295        """
296
297        return np.argmax(self.predict_proba(X, **kwargs), axis=1)

Predict test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

model predictions: {array-like}
def predict_proba(self, X, **kwargs):
299    def predict_proba(self, X, **kwargs):
300        """Predict probabilities for test data X.
301
302        Args:
303
304            X: {array-like}, shape = [n_samples, n_features]
305                Training vectors, where n_samples is the number
306                of samples and n_features is the number of features.
307
308            **kwargs: additional parameters to be passed to
309                    self.cook_test_set
310
311        Returns:
312
313            probability estimates for test data: {array-like}
314
315        """
316        if len(X.shape) == 1:
317            n_features = X.shape[0]
318            new_X = mo.rbind(
319                X.reshape(1, n_features),
320                np.ones(n_features).reshape(1, n_features),
321            )
322
323            Z = self.cook_test_set(new_X, **kwargs)
324
325        else:
326            Z = self.cook_test_set(X, **kwargs)
327
328        ZB = mo.safe_sparse_dot(
329            Z,
330            self.beta_.reshape(
331                self.n_classes,
332                X.shape[1] + self.n_hidden_features + self.n_clusters,
333            ).T,
334        )
335
336        if self.family == "logit":
337            exp_ZB = np.exp(ZB)
338
339            return exp_ZB / exp_ZB.sum(axis=1)[:, None]
340
341        if self.family == "expit":
342            exp_ZB = expit(ZB)
343
344            return exp_ZB / exp_ZB.sum(axis=1)[:, None]
345
346        if self.family == "erf":
347            exp_ZB = 0.5 * (1 + erf(ZB))
348
349            return exp_ZB / exp_ZB.sum(axis=1)[:, None]

Predict probabilities for test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

probability estimates for test data: {array-like}
def score(self, X, y, scoring=None):
351    def score(self, X, y, scoring=None):
352        """Scoring function for classification.
353
354        Args:
355
356            X: {array-like}, shape = [n_samples, n_features]
357                Training vectors, where n_samples is the number
358                of samples and n_features is the number of features.
359
360            y: array-like, shape = [n_samples]
361                Target values.
362
363            scoring: str
364                scoring method (default is accuracy)
365
366        Returns:
367
368            score: float
369        """
370
371        if scoring is None:
372            scoring = "accuracy"
373
374        if scoring == "accuracy":
375            return skm2.accuracy_score(y, self.predict(X))
376
377        if scoring == "f1":
378            return skm2.f1_score(y, self.predict(X))
379
380        if scoring == "precision":
381            return skm2.precision_score(y, self.predict(X))
382
383        if scoring == "recall":
384            return skm2.recall_score(y, self.predict(X))
385
386        if scoring == "roc_auc":
387            return skm2.roc_auc_score(y, self.predict(X))
388
389        if scoring == "log_loss":
390            return skm2.log_loss(y, self.predict_proba(X))
391
392        if scoring == "balanced_accuracy":
393            return skm2.balanced_accuracy_score(y, self.predict(X))
394
395        if scoring == "average_precision":
396            return skm2.average_precision_score(y, self.predict(X))
397
398        if scoring == "neg_brier_score":
399            return -skm2.brier_score_loss(y, self.predict_proba(X))
400
401        if scoring == "neg_log_loss":
402            return -skm2.log_loss(y, self.predict_proba(X))

Scoring function for classification.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

scoring: str
    scoring method (default is accuracy)

Returns:

score: float
class GLMRegressor(nnetsauce.glm.glm.GLM, sklearn.base.RegressorMixin):
 20class GLMRegressor(GLM, RegressorMixin):
 21    """Generalized 'linear' models using quasi-randomized networks (regression)
 22
 23    Attributes:
 24
 25        n_hidden_features: int
 26            number of nodes in the hidden layer
 27
 28        lambda1: float
 29            regularization parameter for GLM coefficients on original features
 30
 31        alpha1: float
 32            controls compromize between l1 and l2 norm of GLM coefficients on original features
 33
 34        lambda2: float
 35            regularization parameter for GLM coefficients on nonlinear features
 36
 37        alpha2: float
 38            controls compromize between l1 and l2 norm of GLM coefficients on nonlinear features
 39
 40        family: str
 41            "gaussian", "laplace", "poisson", or "quantile" (for now)
 42
 43        level: int, default=50
 44            The level of the quantiles to compute for family = "quantile".
 45            Default is the median.
 46
 47        activation_name: str
 48            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
 49
 50        a: float
 51            hyperparameter for 'prelu' or 'elu' activation function
 52
 53        nodes_sim: str
 54            type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
 55            'uniform'
 56
 57        bias: boolean
 58            indicates if the hidden layer contains a bias term (True) or not
 59            (False)
 60
 61        dropout: float
 62            regularization parameter; (random) percentage of nodes dropped out
 63            of the training
 64
 65        direct_link: boolean
 66            indicates if the original predictors are included (True) in model's
 67            fitting or not (False)
 68
 69        n_clusters: int
 70            number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
 71                no clustering)
 72
 73        cluster_encode: bool
 74            defines how the variable containing clusters is treated (default is one-hot)
 75            if `False`, then labels are used, without one-hot encoding
 76
 77        type_clust: str
 78            type of clustering method: currently k-means ('kmeans') or Gaussian
 79            Mixture Model ('gmm')
 80
 81        type_scaling: a tuple of 3 strings
 82            scaling methods for inputs, hidden layer, and clustering respectively
 83            (and when relevant).
 84            Currently available: standardization ('std') or MinMax scaling ('minmax')
 85
 86        optimizer: object
 87            optimizer, from class nnetsauce.utils.Optimizer
 88
 89        backend: str.
 90            "cpu" or "gpu" or "tpu".
 91
 92        seed: int
 93            reproducibility seed for nodes_sim=='uniform'
 94
 95        backend: str
 96            "cpu", "gpu", "tpu"
 97
 98    Attributes:
 99
100        beta_: vector
101            regression coefficients
102
103    Examples:
104
105        See [https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_regression.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_regression.py)
106
107    """
108
109    # construct the object -----
110
111    def __init__(
112        self,
113        n_hidden_features=5,
114        lambda1=0.01,
115        alpha1=0.5,
116        lambda2=0.01,
117        alpha2=0.5,
118        family="gaussian",
119        level=50,
120        activation_name="relu",
121        a=0.01,
122        nodes_sim="sobol",
123        bias=True,
124        dropout=0,
125        direct_link=True,
126        n_clusters=2,
127        cluster_encode=True,
128        type_clust="kmeans",
129        type_scaling=("std", "std", "std"),
130        optimizer=Optimizer(),
131        backend="cpu",
132        seed=123,
133    ):
134        super().__init__(
135            n_hidden_features=n_hidden_features,
136            lambda1=lambda1,
137            alpha1=alpha1,
138            lambda2=lambda2,
139            alpha2=alpha2,
140            activation_name=activation_name,
141            a=a,
142            nodes_sim=nodes_sim,
143            bias=bias,
144            dropout=dropout,
145            direct_link=direct_link,
146            n_clusters=n_clusters,
147            cluster_encode=cluster_encode,
148            type_clust=type_clust,
149            type_scaling=type_scaling,
150            optimizer=optimizer,
151            backend=backend,
152            seed=seed,
153        )
154
155        self.family = family
156        self.level = level
157        self.q = self.level / 100
158
159    def gaussian_loss(self, y, row_index, XB):
160        return 0.5 * np.mean(np.square(y[row_index] - XB))
161
162    def laplace_loss(self, y, row_index, XB):
163        return 0.5 * np.mean(np.abs(y[row_index] - XB))
164
165    def poisson_loss(self, y, row_index, XB):
166        return -np.mean(y[row_index] * XB - np.exp(XB))
167
168    def pinball_loss(self, y, row_index, XB, tau=0.5):
169        y = np.array(y[row_index])
170        y_pred = np.array(XB)
171        return mean_pinball_loss(y, y_pred, alpha=tau)
172        # return np.mean(np.maximum(tau * residuals, (tau - 1) * residuals))
173
174    def loss_func(
175        self,
176        beta,
177        group_index,
178        X,
179        y,
180        row_index=None,
181        type_loss="gaussian",
182        **kwargs
183    ):
184        res = {
185            "gaussian": self.gaussian_loss,
186            "laplace": self.laplace_loss,
187            "poisson": self.poisson_loss,
188            "quantile": self.pinball_loss,
189        }
190
191        if type_loss != "quantile":
192            if row_index is None:
193                row_index = range(len(y))
194                XB = self.compute_XB(X, beta=beta)
195
196                return res[type_loss](y, row_index, XB) + self.compute_penalty(
197                    group_index=group_index, beta=beta
198                )
199
200            XB = self.compute_XB(X, beta=beta, row_index=row_index)
201
202            return res[type_loss](y, row_index, XB) + self.compute_penalty(
203                group_index=group_index, beta=beta
204            )
205
206        else:  # quantile
207            assert (
208                self.q > 0 and self.q < 1
209            ), "'tau' must be comprised 0 < tau < 1"
210
211            if row_index is None:
212                row_index = range(len(y))
213                XB = self.compute_XB(X, beta=beta)
214                return res[type_loss](y, row_index, XB, self.q)
215
216            XB = self.compute_XB(X, beta=beta, row_index=row_index)
217            return res[type_loss](y, row_index, XB, self.q)
218
219    def fit(self, X, y, **kwargs):
220        """Fit GLM model to training data (X, y).
221
222        Args:
223
224            X: {array-like}, shape = [n_samples, n_features]
225                Training vectors, where n_samples is the number
226                of samples and n_features is the number of features.
227
228            y: array-like, shape = [n_samples]
229                Target values.
230
231            **kwargs: additional parameters to be passed to
232                    self.cook_training_set or self.obj.fit
233
234        Returns:
235
236            self: object
237
238        """
239        self.beta_ = None
240        self.n_iter = 0
241
242        _, self.group_index = X.shape
243
244        centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
245        # initialization
246        if self.backend == "cpu":
247            beta_ = np.linalg.lstsq(scaled_Z, centered_y, rcond=None)[0]
248        else:
249            beta_ = jnp.linalg.lstsq(scaled_Z, centered_y, rcond=None)[0]
250        # optimization
251        # fit(self, loss_func, response, x0, **kwargs):
252        # loss_func(self, beta, group_index, X, y,
253        #          row_index=None, type_loss="gaussian",
254        #          **kwargs)
255        self.optimizer.fit(
256            self.loss_func,
257            response=centered_y,
258            x0=beta_,
259            group_index=self.group_index,
260            X=scaled_Z,
261            y=centered_y,
262            type_loss=self.family,
263            **kwargs
264        )
265
266        self.beta_ = self.optimizer.results[0]
267
268        return self
269
270    def predict(self, X, **kwargs):
271        """Predict test data X.
272
273        Args:
274
275            X: {array-like}, shape = [n_samples, n_features]
276                Training vectors, where n_samples is the number
277                of samples and n_features is the number of features.
278
279            **kwargs: additional parameters to be passed to
280                    self.cook_test_set
281
282        Returns:
283
284            model predictions: {array-like}
285
286        """
287
288        if len(X.shape) == 1:
289            n_features = X.shape[0]
290            new_X = mo.rbind(
291                X.reshape(1, n_features),
292                np.ones(n_features).reshape(1, n_features),
293            )
294
295            return (
296                self.y_mean_
297                + np.dot(self.cook_test_set(new_X, **kwargs), self.beta_)
298            )[0]
299
300        return self.y_mean_ + np.dot(
301            self.cook_test_set(X, **kwargs), self.beta_
302        )
303
304    def score(self, X, y, scoring=None):
305        """Compute the score of the model.
306
307        Parameters:
308
309            X: {array-like}, shape = [n_samples, n_features]
310                Training vectors, where n_samples is the number
311                of samples and n_features is the number of features.
312
313            y: array-like, shape = [n_samples]
314                Target values.
315
316            scoring: str
317                scoring method
318
319        Returns:
320
321            score: float
322
323        """
324
325        if scoring is None:
326            return np.sqrt(np.mean((self.predict(X) - y) ** 2))
327
328        return skm2.get_scorer(scoring)(self, X, y)

Generalized 'linear' models using quasi-randomized networks (regression)

Attributes:

n_hidden_features: int
    number of nodes in the hidden layer

lambda1: float
    regularization parameter for GLM coefficients on original features

alpha1: float
    controls compromize between l1 and l2 norm of GLM coefficients on original features

lambda2: float
    regularization parameter for GLM coefficients on nonlinear features

alpha2: float
    controls compromize between l1 and l2 norm of GLM coefficients on nonlinear features

family: str
    "gaussian", "laplace", "poisson", or "quantile" (for now)

level: int, default=50
    The level of the quantiles to compute for family = "quantile".
    Default is the median.

activation_name: str
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'

a: float
    hyperparameter for 'prelu' or 'elu' activation function

nodes_sim: str
    type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
    'uniform'

bias: boolean
    indicates if the hidden layer contains a bias term (True) or not
    (False)

dropout: float
    regularization parameter; (random) percentage of nodes dropped out
    of the training

direct_link: boolean
    indicates if the original predictors are included (True) in model's
    fitting or not (False)

n_clusters: int
    number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
        no clustering)

cluster_encode: bool
    defines how the variable containing clusters is treated (default is one-hot)
    if `False`, then labels are used, without one-hot encoding

type_clust: str
    type of clustering method: currently k-means ('kmeans') or Gaussian
    Mixture Model ('gmm')

type_scaling: a tuple of 3 strings
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax')

optimizer: object
    optimizer, from class nnetsauce.utils.Optimizer

backend: str.
    "cpu" or "gpu" or "tpu".

seed: int
    reproducibility seed for nodes_sim=='uniform'

backend: str
    "cpu", "gpu", "tpu"

Attributes:

beta_: vector
    regression coefficients

Examples:

See [https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_regression.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_regression.py)
def fit(self, X, y, **kwargs):
219    def fit(self, X, y, **kwargs):
220        """Fit GLM model to training data (X, y).
221
222        Args:
223
224            X: {array-like}, shape = [n_samples, n_features]
225                Training vectors, where n_samples is the number
226                of samples and n_features is the number of features.
227
228            y: array-like, shape = [n_samples]
229                Target values.
230
231            **kwargs: additional parameters to be passed to
232                    self.cook_training_set or self.obj.fit
233
234        Returns:
235
236            self: object
237
238        """
239        self.beta_ = None
240        self.n_iter = 0
241
242        _, self.group_index = X.shape
243
244        centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
245        # initialization
246        if self.backend == "cpu":
247            beta_ = np.linalg.lstsq(scaled_Z, centered_y, rcond=None)[0]
248        else:
249            beta_ = jnp.linalg.lstsq(scaled_Z, centered_y, rcond=None)[0]
250        # optimization
251        # fit(self, loss_func, response, x0, **kwargs):
252        # loss_func(self, beta, group_index, X, y,
253        #          row_index=None, type_loss="gaussian",
254        #          **kwargs)
255        self.optimizer.fit(
256            self.loss_func,
257            response=centered_y,
258            x0=beta_,
259            group_index=self.group_index,
260            X=scaled_Z,
261            y=centered_y,
262            type_loss=self.family,
263            **kwargs
264        )
265
266        self.beta_ = self.optimizer.results[0]
267
268        return self

Fit GLM model to training data (X, y).

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

**kwargs: additional parameters to be passed to
        self.cook_training_set or self.obj.fit

Returns:

self: object
def predict(self, X, **kwargs):
270    def predict(self, X, **kwargs):
271        """Predict test data X.
272
273        Args:
274
275            X: {array-like}, shape = [n_samples, n_features]
276                Training vectors, where n_samples is the number
277                of samples and n_features is the number of features.
278
279            **kwargs: additional parameters to be passed to
280                    self.cook_test_set
281
282        Returns:
283
284            model predictions: {array-like}
285
286        """
287
288        if len(X.shape) == 1:
289            n_features = X.shape[0]
290            new_X = mo.rbind(
291                X.reshape(1, n_features),
292                np.ones(n_features).reshape(1, n_features),
293            )
294
295            return (
296                self.y_mean_
297                + np.dot(self.cook_test_set(new_X, **kwargs), self.beta_)
298            )[0]
299
300        return self.y_mean_ + np.dot(
301            self.cook_test_set(X, **kwargs), self.beta_
302        )

Predict test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

model predictions: {array-like}
def score(self, X, y, scoring=None):
304    def score(self, X, y, scoring=None):
305        """Compute the score of the model.
306
307        Parameters:
308
309            X: {array-like}, shape = [n_samples, n_features]
310                Training vectors, where n_samples is the number
311                of samples and n_features is the number of features.
312
313            y: array-like, shape = [n_samples]
314                Target values.
315
316            scoring: str
317                scoring method
318
319        Returns:
320
321            score: float
322
323        """
324
325        if scoring is None:
326            return np.sqrt(np.mean((self.predict(X) - y) ** 2))
327
328        return skm2.get_scorer(scoring)(self, X, y)

Compute the score of the model.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

scoring: str
    scoring method

Returns:

score: float
class KernelRidge(sklearn.base.BaseEstimator, sklearn.base.RegressorMixin):
 18class KernelRidge(BaseEstimator, RegressorMixin):
 19    """
 20    Kernel Ridge Regression with optional GPU support, Matérn kernels, and automatic input standardization.
 21
 22    Parameters:
 23    - alpha: float
 24        Regularization parameter.
 25    - kernel: str
 26        Kernel type ("linear", "rbf", or "matern").
 27    - gamma: float
 28        Kernel coefficient for "rbf". Ignored for other kernels.
 29    - nu: float
 30        Smoothness parameter for the Matérn kernel. Default is 1.5.
 31    - length_scale: float
 32        Length scale parameter for the Matérn kernel. Default is 1.0.
 33    - backend: str
 34        "cpu" or "gpu" (uses JAX if "gpu").
 35    """
 36
 37    def __init__(
 38        self,
 39        alpha=1.0,
 40        kernel="rbf",
 41        gamma=None,
 42        nu=1.5,
 43        length_scale=1.0,
 44        backend="cpu",
 45    ):
 46        self.alpha = alpha
 47        self.alpha_ = alpha
 48        self.kernel = kernel
 49        self.gamma = gamma
 50        self.nu = nu
 51        self.length_scale = length_scale
 52        self.backend = backend
 53        self.scaler = StandardScaler()
 54
 55        if backend == "gpu" and not JAX_AVAILABLE:
 56            raise ImportError(
 57                "JAX is not installed. Please install JAX to use the GPU backend."
 58            )
 59
 60    def _linear_kernel(self, X, Y):
 61        return jnp.dot(X, Y.T) if self.backend == "gpu" else np.dot(X, Y.T)
 62
 63    def _rbf_kernel(self, X, Y):
 64        if self.gamma is None:
 65            self.gamma = 1.0 / X.shape[1]
 66        if self.backend == "gpu":
 67            sq_dists = (
 68                jnp.sum(X**2, axis=1)[:, None]
 69                + jnp.sum(Y**2, axis=1)
 70                - 2 * jnp.dot(X, Y.T)
 71            )
 72            return jnp.exp(-self.gamma * sq_dists)
 73        else:
 74            sq_dists = (
 75                np.sum(X**2, axis=1)[:, None]
 76                + np.sum(Y**2, axis=1)
 77                - 2 * np.dot(X, Y.T)
 78            )
 79            return np.exp(-self.gamma * sq_dists)
 80
 81    def _matern_kernel(self, X, Y):
 82        """
 83        Compute the Matérn kernel using JAX for GPU or NumPy for CPU.
 84
 85        Parameters:
 86        - X: array-like, shape (n_samples_X, n_features)
 87        - Y: array-like, shape (n_samples_Y, n_features)
 88
 89        Returns:
 90        - Kernel matrix, shape (n_samples_X, n_samples_Y)
 91        """
 92        if self.backend == "gpu":
 93            # Compute pairwise distances
 94            dists = jnp.sqrt(
 95                jnp.sum((X[:, None, :] - Y[None, :, :]) ** 2, axis=2)
 96            )
 97            scaled_dists = jnp.sqrt(2 * self.nu) * dists / self.length_scale
 98
 99            # Matérn kernel formula
100            coeff = (2 ** (1 - self.nu)) / jnp.exp(gammaln(self.nu))
101            matern_kernel = (
102                coeff * (scaled_dists**self.nu) * kv(self.nu, scaled_dists)
103            )
104            matern_kernel = jnp.where(
105                dists == 0, 1.0, matern_kernel
106            )  # Handle the case where distance is 0
107            return matern_kernel
108        else:
109            # Use NumPy for CPU
110            from scipy.special import (
111                gammaln,
112                kv,
113            )  # Ensure scipy.special is used for CPU
114
115            dists = np.sqrt(
116                np.sum((X[:, None, :] - Y[None, :, :]) ** 2, axis=2)
117            )
118            scaled_dists = np.sqrt(2 * self.nu) * dists / self.length_scale
119
120            # Matérn kernel formula
121            coeff = (2 ** (1 - self.nu)) / np.exp(gammaln(self.nu))
122            matern_kernel = (
123                coeff * (scaled_dists**self.nu) * kv(self.nu, scaled_dists)
124            )
125            matern_kernel = np.where(
126                dists == 0, 1.0, matern_kernel
127            )  # Handle the case where distance is 0
128            return matern_kernel
129
130    def _get_kernel(self, X, Y):
131        if self.kernel == "linear":
132            return self._linear_kernel(X, Y)
133        elif self.kernel == "rbf":
134            return self._rbf_kernel(X, Y)
135        elif self.kernel == "matern":
136            return self._matern_kernel(X, Y)
137        else:
138            raise ValueError(f"Unsupported kernel: {self.kernel}")
139
140    def fit(self, X, y):
141        """
142        Fit the Kernel Ridge Regression model.
143
144        Parameters:
145        - X: array-like, shape (n_samples, n_features)
146            Training data.
147        - y: array-like, shape (n_samples,)
148            Target values.
149        """
150        # Standardize the inputs
151        X = self.scaler.fit_transform(X)
152        self.X_fit_ = X
153
154        # Center the response
155        self.y_mean_ = np.mean(y)
156        y_centered = y - self.y_mean_
157
158        n_samples = X.shape[0]
159
160        # Compute the kernel matrix
161        K = self._get_kernel(X, X)
162        self.K_ = K
163        self.y_fit_ = y_centered
164
165        if isinstance(self.alpha, (list, np.ndarray)):
166            # If alpha is a list or array, compute LOOE for each alpha
167            self.alphas_ = self.alpha  # Store the list of alphas
168            self.dual_coefs_ = []  # Store dual coefficients for each alpha
169            self.looe_ = []  # Store LOOE for each alpha
170
171            for alpha in self.alpha:
172                G = K + alpha * np.eye(n_samples)
173                G_inv = np.linalg.inv(G)
174                diag_G_inv = np.diag(G_inv)
175                dual_coef = np.linalg.solve(G, y_centered)
176                looe = np.sum((dual_coef / diag_G_inv) ** 2)  # Compute LOOE
177                self.dual_coefs_.append(dual_coef)
178                self.looe_.append(looe)
179
180            # Select the best alpha based on the smallest LOOE
181            best_index = np.argmin(self.looe_)
182            self.alpha_ = self.alpha[best_index]
183            self.dual_coef_ = self.dual_coefs_[best_index]
184        else:
185            # If alpha is a single value, proceed as usual
186            if self.backend == "gpu":
187                self.dual_coef_ = jnp.linalg.solve(
188                    K + self.alpha * jnp.eye(n_samples), y_centered
189                )
190            else:
191                self.dual_coef_ = np.linalg.solve(
192                    K + self.alpha * np.eye(n_samples), y_centered
193                )
194
195        return self
196
197    def predict(self, X, probs=False):
198        """
199        Predict using the Kernel Ridge Regression model.
200
201        Parameters:
202        - X: array-like, shape (n_samples, n_features)
203            Test data.
204
205        Returns:
206        - Predicted values, shape (n_samples,).
207        """
208        # Standardize the inputs
209        X = self.scaler.transform(X)
210        K = self._get_kernel(X, self.X_fit_)
211        if self.backend == "gpu":
212            preds = jnp.dot(K, self.dual_coef_) + self.y_mean_
213            if probs:
214                # Compute similarity to self.X_fit_
215                similarities = jnp.dot(
216                    preds, self.X_fit_.T
217                )  # Shape: (n_samples, n_fit_)
218                # Apply softmax to get probabilities
219                return jaxsoftmax(similarities, axis=1)
220            return preds
221        else:
222            preds = np.dot(K, self.dual_coef_) + self.y_mean_
223            if probs:
224                # Compute similarity to self.X_fit_
225                similarities = np.dot(
226                    preds, self.X_fit_.T
227                )  # Shape: (n_samples, n_fit_)
228                # Apply softmax to get probabilities
229                return softmax(similarities, axis=1)
230            return preds
231
232    def partial_fit(self, X, y):
233        """
234        Incrementally fit the Kernel Ridge Regression model with new data using a recursive approach.
235
236        Parameters:
237        - X: array-like, shape (n_samples, n_features)
238            New training data.
239        - y: array-like, shape (n_samples,)
240            New target values.
241
242        Returns:
243        - self: object
244            The updated model.
245        """
246        # Standardize the inputs
247        X = (
248            self.scaler.fit_transform(X)
249            if not hasattr(self, "X_fit_")
250            else self.scaler.transform(X)
251        )
252
253        if not hasattr(self, "X_fit_"):
254            # Initialize with the first batch of data
255            self.X_fit_ = X
256
257            # Center the response
258            self.y_mean_ = np.mean(y)
259            y_centered = y - self.y_mean_
260            self.y_fit_ = y_centered
261
262            n_samples = X.shape[0]
263
264            # Compute the kernel matrix for the initial data
265            self.K_ = self._get_kernel(X, X)
266
267            # Initialize dual coefficients for each alpha
268            if isinstance(self.alpha, (list, np.ndarray)):
269                self.dual_coefs_ = [np.zeros(n_samples) for _ in self.alpha]
270            else:
271                self.dual_coef_ = np.zeros(n_samples)
272        else:
273            # Incrementally update with new data
274            y_centered = y - self.y_mean_  # Center the new batch of responses
275            for x_new, y_new in zip(X, y_centered):
276                x_new = x_new.reshape(1, -1)  # Ensure x_new is 2D
277                k_new = self._get_kernel(self.X_fit_, x_new).flatten()
278
279                # Compute the kernel value for the new data point
280                k_self = self._get_kernel(x_new, x_new).item()
281
282                if isinstance(self.alpha, (list, np.ndarray)):
283                    # Update dual coefficients for each alpha
284                    for idx, alpha in enumerate(self.alpha):
285                        gamma_new = 1 / (k_self + alpha)
286                        residual = y_new - np.dot(self.dual_coefs_[idx], k_new)
287                        self.dual_coefs_[idx] = np.append(
288                            self.dual_coefs_[idx], gamma_new * residual
289                        )
290                else:
291                    # Update dual coefficients for a single alpha
292                    gamma_new = 1 / (k_self + self.alpha)
293                    residual = y_new - np.dot(self.dual_coef_, k_new)
294                    self.dual_coef_ = np.append(
295                        self.dual_coef_, gamma_new * residual
296                    )
297
298                # Update the kernel matrix
299                self.K_ = np.block(
300                    [
301                        [self.K_, k_new[:, None]],
302                        [k_new[None, :], np.array([[k_self]])],
303                    ]
304                )
305
306                # Update the stored data
307                self.X_fit_ = np.vstack([self.X_fit_, x_new])
308                self.y_fit_ = np.append(self.y_fit_, y_new)
309
310        # Select the best alpha based on LOOE after the batch
311        if isinstance(self.alpha, (list, np.ndarray)):
312            self.looe_ = []
313            for idx, alpha in enumerate(self.alpha):
314                G = self.K_ + alpha * np.eye(self.K_.shape[0])
315                G_inv = np.linalg.inv(G)
316                diag_G_inv = np.diag(G_inv)
317                looe = np.sum((self.dual_coefs_[idx] / diag_G_inv) ** 2)
318                self.looe_.append(looe)
319
320            # Select the best alpha
321            best_index = np.argmin(self.looe_)
322            self.alpha_ = self.alpha[best_index]
323            self.dual_coef_ = self.dual_coefs_[best_index]
324
325        return self

Kernel Ridge Regression with optional GPU support, Matérn kernels, and automatic input standardization.

Parameters:

  • alpha: float Regularization parameter.
  • kernel: str Kernel type ("linear", "rbf", or "matern").
  • gamma: float Kernel coefficient for "rbf". Ignored for other kernels.
  • nu: float Smoothness parameter for the Matérn kernel. Default is 1.5.
  • length_scale: float Length scale parameter for the Matérn kernel. Default is 1.0.
  • backend: str "cpu" or "gpu" (uses JAX if "gpu").
def fit(self, X, y):
140    def fit(self, X, y):
141        """
142        Fit the Kernel Ridge Regression model.
143
144        Parameters:
145        - X: array-like, shape (n_samples, n_features)
146            Training data.
147        - y: array-like, shape (n_samples,)
148            Target values.
149        """
150        # Standardize the inputs
151        X = self.scaler.fit_transform(X)
152        self.X_fit_ = X
153
154        # Center the response
155        self.y_mean_ = np.mean(y)
156        y_centered = y - self.y_mean_
157
158        n_samples = X.shape[0]
159
160        # Compute the kernel matrix
161        K = self._get_kernel(X, X)
162        self.K_ = K
163        self.y_fit_ = y_centered
164
165        if isinstance(self.alpha, (list, np.ndarray)):
166            # If alpha is a list or array, compute LOOE for each alpha
167            self.alphas_ = self.alpha  # Store the list of alphas
168            self.dual_coefs_ = []  # Store dual coefficients for each alpha
169            self.looe_ = []  # Store LOOE for each alpha
170
171            for alpha in self.alpha:
172                G = K + alpha * np.eye(n_samples)
173                G_inv = np.linalg.inv(G)
174                diag_G_inv = np.diag(G_inv)
175                dual_coef = np.linalg.solve(G, y_centered)
176                looe = np.sum((dual_coef / diag_G_inv) ** 2)  # Compute LOOE
177                self.dual_coefs_.append(dual_coef)
178                self.looe_.append(looe)
179
180            # Select the best alpha based on the smallest LOOE
181            best_index = np.argmin(self.looe_)
182            self.alpha_ = self.alpha[best_index]
183            self.dual_coef_ = self.dual_coefs_[best_index]
184        else:
185            # If alpha is a single value, proceed as usual
186            if self.backend == "gpu":
187                self.dual_coef_ = jnp.linalg.solve(
188                    K + self.alpha * jnp.eye(n_samples), y_centered
189                )
190            else:
191                self.dual_coef_ = np.linalg.solve(
192                    K + self.alpha * np.eye(n_samples), y_centered
193                )
194
195        return self

Fit the Kernel Ridge Regression model.

Parameters:

  • X: array-like, shape (n_samples, n_features) Training data.
  • y: array-like, shape (n_samples,) Target values.
def predict(self, X, probs=False):
197    def predict(self, X, probs=False):
198        """
199        Predict using the Kernel Ridge Regression model.
200
201        Parameters:
202        - X: array-like, shape (n_samples, n_features)
203            Test data.
204
205        Returns:
206        - Predicted values, shape (n_samples,).
207        """
208        # Standardize the inputs
209        X = self.scaler.transform(X)
210        K = self._get_kernel(X, self.X_fit_)
211        if self.backend == "gpu":
212            preds = jnp.dot(K, self.dual_coef_) + self.y_mean_
213            if probs:
214                # Compute similarity to self.X_fit_
215                similarities = jnp.dot(
216                    preds, self.X_fit_.T
217                )  # Shape: (n_samples, n_fit_)
218                # Apply softmax to get probabilities
219                return jaxsoftmax(similarities, axis=1)
220            return preds
221        else:
222            preds = np.dot(K, self.dual_coef_) + self.y_mean_
223            if probs:
224                # Compute similarity to self.X_fit_
225                similarities = np.dot(
226                    preds, self.X_fit_.T
227                )  # Shape: (n_samples, n_fit_)
228                # Apply softmax to get probabilities
229                return softmax(similarities, axis=1)
230            return preds

Predict using the Kernel Ridge Regression model.

Parameters:

  • X: array-like, shape (n_samples, n_features) Test data.

Returns:

  • Predicted values, shape (n_samples,).
class LazyClassifier(nnetsauce.LazyDeepClassifier):
757class LazyClassifier(LazyDeepClassifier):
758    """
759        Fitting -- almost -- all the classification algorithms with
760        nnetsauce's CustomClassifier and returning their scores (no layers).
761
762    Parameters:
763
764        verbose: int, optional (default=0)
765            Any positive number for verbosity.
766
767        ignore_warnings: bool, optional (default=True)
768            When set to True, the warning related to algorigms that are not able to run are ignored.
769
770        custom_metric: function, optional (default=None)
771            When function is provided, models are evaluated based on the custom evaluation metric provided.
772
773        predictions: bool, optional (default=False)
774            When set to True, the predictions of all the models models are returned as dataframe.
775
776        sort_by: string, optional (default='Accuracy')
777            Sort models by a metric. Available options are 'Accuracy', 'Balanced Accuracy', 'ROC AUC', 'F1 Score'
778            or a custom metric identified by its name and provided by custom_metric.
779
780        random_state: int, optional (default=42)
781            Reproducibiility seed.
782
783        estimators: list, optional (default='all')
784            list of Estimators names or just 'all' (default='all')
785
786        preprocess: bool
787            preprocessing is done when set to True
788
789        n_jobs : int, when possible, run in parallel
790            For now, only used by individual models that support it.
791
792        All the other parameters are the same as CustomClassifier's.
793
794    Attributes:
795
796        models_: dict-object
797            Returns a dictionary with each model pipeline as value
798            with key as name of models.
799
800        best_model_: object
801            Returns the best model pipeline based on the sort_by metric.
802
803    Examples:
804
805        import nnetsauce as ns
806        import numpy as np
807        from sklearn import datasets
808        from sklearn.utils import shuffle
809
810        dataset = datasets.load_iris()
811        X = dataset.data
812        y = dataset.target
813        X, y = shuffle(X, y, random_state=123)
814        X = X.astype(np.float32)
815        y = y.astype(np.float32)
816        X_train, X_test = X[:100], X[100:]
817        y_train, y_test = y[:100], y[100:]
818
819        clf = ns.LazyClassifier(verbose=0, ignore_warnings=True, custom_metric=None)
820        models, predictions = clf.fit(X_train, X_test, y_train, y_test)
821        model_dictionary = clf.provide_models(X_train,X_test,y_train,y_test)
822        print(models)
823
824    """
825
826    def __init__(
827        self,
828        verbose=0,
829        ignore_warnings=True,
830        custom_metric=None,
831        predictions=False,
832        sort_by="Accuracy",
833        random_state=42,
834        estimators="all",
835        preprocess=False,
836        n_jobs=None,
837        # CustomClassifier attributes
838        obj=None,
839        n_hidden_features=5,
840        activation_name="relu",
841        a=0.01,
842        nodes_sim="sobol",
843        bias=True,
844        dropout=0,
845        direct_link=True,
846        n_clusters=2,
847        cluster_encode=True,
848        type_clust="kmeans",
849        type_scaling=("std", "std", "std"),
850        col_sample=1,
851        row_sample=1,
852        seed=123,
853        backend="cpu",
854    ):
855        super().__init__(
856            verbose=verbose,
857            ignore_warnings=ignore_warnings,
858            custom_metric=custom_metric,
859            predictions=predictions,
860            sort_by=sort_by,
861            random_state=random_state,
862            estimators=estimators,
863            preprocess=preprocess,
864            n_jobs=n_jobs,
865            n_layers=1,
866            obj=obj,
867            n_hidden_features=n_hidden_features,
868            activation_name=activation_name,
869            a=a,
870            nodes_sim=nodes_sim,
871            bias=bias,
872            dropout=dropout,
873            direct_link=direct_link,
874            n_clusters=n_clusters,
875            cluster_encode=cluster_encode,
876            type_clust=type_clust,
877            type_scaling=type_scaling,
878            col_sample=col_sample,
879            row_sample=row_sample,
880            seed=seed,
881            backend=backend,
882        )

Fitting -- almost -- all the classification algorithms with nnetsauce's CustomClassifier and returning their scores (no layers).

Parameters:

verbose: int, optional (default=0)
    Any positive number for verbosity.

ignore_warnings: bool, optional (default=True)
    When set to True, the warning related to algorigms that are not able to run are ignored.

custom_metric: function, optional (default=None)
    When function is provided, models are evaluated based on the custom evaluation metric provided.

predictions: bool, optional (default=False)
    When set to True, the predictions of all the models models are returned as dataframe.

sort_by: string, optional (default='Accuracy')
    Sort models by a metric. Available options are 'Accuracy', 'Balanced Accuracy', 'ROC AUC', 'F1 Score'
    or a custom metric identified by its name and provided by custom_metric.

random_state: int, optional (default=42)
    Reproducibiility seed.

estimators: list, optional (default='all')
    list of Estimators names or just 'all' (default='all')

preprocess: bool
    preprocessing is done when set to True

n_jobs : int, when possible, run in parallel
    For now, only used by individual models that support it.

All the other parameters are the same as CustomClassifier's.

Attributes:

models_: dict-object
    Returns a dictionary with each model pipeline as value
    with key as name of models.

best_model_: object
    Returns the best model pipeline based on the sort_by metric.

Examples:

import nnetsauce as ns
import numpy as np
from sklearn import datasets
from sklearn.utils import shuffle

dataset = datasets.load_iris()
X = dataset.data
y = dataset.target
X, y = shuffle(X, y, random_state=123)
X = X.astype(np.float32)
y = y.astype(np.float32)
X_train, X_test = X[:100], X[100:]
y_train, y_test = y[:100], y[100:]

clf = ns.LazyClassifier(verbose=0, ignore_warnings=True, custom_metric=None)
models, predictions = clf.fit(X_train, X_test, y_train, y_test)
model_dictionary = clf.provide_models(X_train,X_test,y_train,y_test)
print(models)
class LazyRegressor(nnetsauce.LazyDeepRegressor):
654class LazyRegressor(LazyDeepRegressor):
655    """
656        Fitting -- almost -- all the regression algorithms with
657        nnetsauce's CustomRegressor and returning their scores.
658
659    Parameters:
660
661        verbose: int, optional (default=0)
662            Any positive number for verbosity.
663
664        ignore_warnings: bool, optional (default=True)
665            When set to True, the warning related to algorigms that are not able to run are ignored.
666
667        custom_metric: function, optional (default=None)
668            When function is provided, models are evaluated based on the custom evaluation metric provided.
669
670        predictions: bool, optional (default=False)
671            When set to True, the predictions of all the models models are returned as dataframe.
672
673        sort_by: string, optional (default='RMSE')
674            Sort models by a metric. Available options are 'R-Squared', 'Adjusted R-Squared', 'RMSE', 'Time Taken' and 'Custom Metric'.
675            or a custom metric identified by its name and provided by custom_metric.
676
677        random_state: int, optional (default=42)
678            Reproducibiility seed.
679
680        estimators: list, optional (default='all')
681            list of Estimators names or just 'all' (default='all')
682
683        preprocess: bool
684            preprocessing is done when set to True
685
686        n_jobs : int, when possible, run in parallel
687            For now, only used by individual models that support it.
688
689        All the other parameters are the same as CustomRegressor's.
690
691    Attributes:
692
693        models_: dict-object
694            Returns a dictionary with each model pipeline as value
695            with key as name of models.
696
697        best_model_: object
698            Returns the best model pipeline based on the sort_by metric.
699
700    Examples:
701
702        import nnetsauce as ns
703        import numpy as np
704        from sklearn import datasets
705        from sklearn.utils import shuffle
706
707        diabetes = datasets.load_diabetes()
708        X, y = shuffle(diabetes.data, diabetes.target, random_state=13)
709        X = X.astype(np.float32)
710
711        offset = int(X.shape[0] * 0.9)
712        X_train, y_train = X[:offset], y[:offset]
713        X_test, y_test = X[offset:], y[offset:]
714
715        reg = ns.LazyRegressor(verbose=0, ignore_warnings=False,
716                            custom_metric=None)
717        models, predictions = reg.fit(X_train, X_test, y_train, y_test)
718        print(models)
719
720    """
721
722    def __init__(
723        self,
724        verbose=0,
725        ignore_warnings=True,
726        custom_metric=None,
727        predictions=False,
728        sort_by="RMSE",
729        random_state=42,
730        estimators="all",
731        preprocess=False,
732        n_jobs=None,
733        # CustomRegressor attributes
734        obj=None,
735        n_hidden_features=5,
736        activation_name="relu",
737        a=0.01,
738        nodes_sim="sobol",
739        bias=True,
740        dropout=0,
741        direct_link=True,
742        n_clusters=2,
743        cluster_encode=True,
744        type_clust="kmeans",
745        type_scaling=("std", "std", "std"),
746        col_sample=1,
747        row_sample=1,
748        seed=123,
749        backend="cpu",
750    ):
751        super().__init__(
752            verbose=verbose,
753            ignore_warnings=ignore_warnings,
754            custom_metric=custom_metric,
755            predictions=predictions,
756            sort_by=sort_by,
757            random_state=random_state,
758            estimators=estimators,
759            preprocess=preprocess,
760            n_jobs=n_jobs,
761            n_layers=1,
762            obj=obj,
763            n_hidden_features=n_hidden_features,
764            activation_name=activation_name,
765            a=a,
766            nodes_sim=nodes_sim,
767            bias=bias,
768            dropout=dropout,
769            direct_link=direct_link,
770            n_clusters=n_clusters,
771            cluster_encode=cluster_encode,
772            type_clust=type_clust,
773            type_scaling=type_scaling,
774            col_sample=col_sample,
775            row_sample=row_sample,
776            seed=seed,
777            backend=backend,
778        )

Fitting -- almost -- all the regression algorithms with nnetsauce's CustomRegressor and returning their scores.

Parameters:

verbose: int, optional (default=0)
    Any positive number for verbosity.

ignore_warnings: bool, optional (default=True)
    When set to True, the warning related to algorigms that are not able to run are ignored.

custom_metric: function, optional (default=None)
    When function is provided, models are evaluated based on the custom evaluation metric provided.

predictions: bool, optional (default=False)
    When set to True, the predictions of all the models models are returned as dataframe.

sort_by: string, optional (default='RMSE')
    Sort models by a metric. Available options are 'R-Squared', 'Adjusted R-Squared', 'RMSE', 'Time Taken' and 'Custom Metric'.
    or a custom metric identified by its name and provided by custom_metric.

random_state: int, optional (default=42)
    Reproducibiility seed.

estimators: list, optional (default='all')
    list of Estimators names or just 'all' (default='all')

preprocess: bool
    preprocessing is done when set to True

n_jobs : int, when possible, run in parallel
    For now, only used by individual models that support it.

All the other parameters are the same as CustomRegressor's.

Attributes:

models_: dict-object
    Returns a dictionary with each model pipeline as value
    with key as name of models.

best_model_: object
    Returns the best model pipeline based on the sort_by metric.

Examples:

import nnetsauce as ns
import numpy as np
from sklearn import datasets
from sklearn.utils import shuffle

diabetes = datasets.load_diabetes()
X, y = shuffle(diabetes.data, diabetes.target, random_state=13)
X = X.astype(np.float32)

offset = int(X.shape[0] * 0.9)
X_train, y_train = X[:offset], y[:offset]
X_test, y_test = X[offset:], y[offset:]

reg = ns.LazyRegressor(verbose=0, ignore_warnings=False,
                    custom_metric=None)
models, predictions = reg.fit(X_train, X_test, y_train, y_test)
print(models)
class LazyDeepClassifier(nnetsauce.custom.custom.Custom, sklearn.base.ClassifierMixin):
 94class LazyDeepClassifier(Custom, ClassifierMixin):
 95    """
 96
 97    Fitting -- almost -- all the classification algorithms with layers of
 98    nnetsauce's CustomClassifier and returning their scores.
 99
100    Parameters:
101
102        verbose: int, optional (default=0)
103            Any positive number for verbosity.
104
105        ignore_warnings: bool, optional (default=True)
106            When set to True, the warning related to algorigms that are not
107            able to run are ignored.
108
109        custom_metric: function, optional (default=None)
110            When function is provided, models are evaluated based on the custom
111              evaluation metric provided.
112
113        predictions: bool, optional (default=False)
114            When set to True, the predictions of all the models models are
115            returned as data frame.
116
117        sort_by: string, optional (default='Accuracy')
118            Sort models by a metric. Available options are 'Accuracy',
119            'Balanced Accuracy', 'ROC AUC', 'F1 Score' or a custom metric
120            identified by its name and provided by custom_metric.
121
122        random_state: int, optional (default=42)
123            Reproducibiility seed.
124
125        estimators: list, optional (default='all')
126            list of Estimators names or just 'all' for > 90 classifiers
127            (default='all')
128
129        preprocess: bool, preprocessing is done when set to True
130
131        n_jobs: int, when possible, run in parallel
132            For now, only used by individual models that support it.
133
134        n_layers: int, optional (default=3)
135            Number of layers of CustomClassifiers to be used.
136
137        All the other parameters are the same as CustomClassifier's.
138
139    Attributes:
140
141        models_: dict-object
142            Returns a dictionary with each model pipeline as value
143            with key as name of models.
144
145        best_model_: object
146            Returns the best model pipeline.
147
148    Examples
149
150        ```python
151        import nnetsauce as ns
152        from sklearn.datasets import load_breast_cancer
153        from sklearn.model_selection import train_test_split
154        data = load_breast_cancer()
155        X = data.data
156        y= data.target
157        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2,
158            random_state=123)
159        clf = ns.LazyDeepClassifier(verbose=0, ignore_warnings=True, custom_metric=None)
160        models, predictions = clf.fit(X_train, X_test, y_train, y_test)
161        model_dictionary = clf.provide_models(X_train,X_test,y_train,y_test)
162        print(models)
163        ```
164
165    """
166
167    def __init__(
168        self,
169        verbose=0,
170        ignore_warnings=True,
171        custom_metric=None,
172        predictions=False,
173        sort_by="Accuracy",
174        random_state=42,
175        estimators="all",
176        preprocess=False,
177        n_jobs=None,
178        # Defining depth
179        n_layers=3,
180        # CustomClassifier attributes
181        obj=None,
182        n_hidden_features=5,
183        activation_name="relu",
184        a=0.01,
185        nodes_sim="sobol",
186        bias=True,
187        dropout=0,
188        direct_link=True,
189        n_clusters=2,
190        cluster_encode=True,
191        type_clust="kmeans",
192        type_scaling=("std", "std", "std"),
193        col_sample=1,
194        row_sample=1,
195        seed=123,
196        backend="cpu",
197    ):
198        self.verbose = verbose
199        self.ignore_warnings = ignore_warnings
200        self.custom_metric = custom_metric
201        self.predictions = predictions
202        self.sort_by = sort_by
203        self.models_ = {}
204        self.best_model_ = None
205        self.random_state = random_state
206        self.estimators = estimators
207        self.preprocess = preprocess
208        self.n_layers = n_layers - 1
209        self.n_jobs = n_jobs
210        super().__init__(
211            obj=obj,
212            n_hidden_features=n_hidden_features,
213            activation_name=activation_name,
214            a=a,
215            nodes_sim=nodes_sim,
216            bias=bias,
217            dropout=dropout,
218            direct_link=direct_link,
219            n_clusters=n_clusters,
220            cluster_encode=cluster_encode,
221            type_clust=type_clust,
222            type_scaling=type_scaling,
223            col_sample=col_sample,
224            row_sample=row_sample,
225            seed=seed,
226            backend=backend,
227        )
228
229    def fit(self, X_train, X_test, y_train, y_test):
230        """Fit classifiers to X_train and y_train, predict and score on X_test,
231        y_test.
232
233        Parameters:
234
235            X_train: array-like,
236                Training vectors, where rows is the number of samples
237                and columns is the number of features.
238
239            X_test: array-like,
240                Testing vectors, where rows is the number of samples
241                and columns is the number of features.
242
243            y_train: array-like,
244                Training vectors, where rows is the number of samples
245                and columns is the number of features.
246
247            y_test: array-like,
248                Testing vectors, where rows is the number of samples
249                and columns is the number of features.
250
251        Returns:
252
253            scores: Pandas DataFrame
254                Returns metrics of all the models in a Pandas DataFrame.
255
256            predictions: Pandas DataFrame
257                Returns predictions of all the models in a Pandas DataFrame.
258        """
259        Accuracy = []
260        B_Accuracy = []
261        ROC_AUC = []
262        F1 = []
263        names = []
264        TIME = []
265        predictions = {}
266
267        if self.custom_metric is not None:
268            CUSTOM_METRIC = []
269
270        if isinstance(X_train, np.ndarray):
271            X_train = pd.DataFrame(X_train)
272            X_test = pd.DataFrame(X_test)
273
274        numeric_features = X_train.select_dtypes(include=[np.number]).columns
275        categorical_features = X_train.select_dtypes(include=["object"]).columns
276
277        categorical_low, categorical_high = get_card_split(
278            X_train, categorical_features
279        )
280
281        if self.preprocess is True:
282            preprocessor = ColumnTransformer(
283                transformers=[
284                    ("numeric", numeric_transformer, numeric_features),
285                    (
286                        "categorical_low",
287                        categorical_transformer_low,
288                        categorical_low,
289                    ),
290                    (
291                        "categorical_high",
292                        categorical_transformer_high,
293                        categorical_high,
294                    ),
295                ]
296            )
297
298        # baseline models
299        try:
300            baseline_names = ["RandomForestClassifier", "XGBClassifier"]
301            baseline_models = [RandomForestClassifier(), xgb.XGBClassifier()]
302        except Exception as exception:
303            baseline_names = ["RandomForestClassifier"]
304            baseline_models = [RandomForestClassifier()]
305
306        for name, model in zip(baseline_names, baseline_models):
307            start = time.time()
308            try:
309                model.fit(X_train, y_train)
310                self.models_[name] = model
311                y_pred = model.predict(X_test)
312                accuracy = accuracy_score(y_test, y_pred, normalize=True)
313                b_accuracy = balanced_accuracy_score(y_test, y_pred)
314                f1 = f1_score(y_test, y_pred, average="weighted")
315                try:
316                    roc_auc = roc_auc_score(y_test, y_pred)
317                except Exception as exception:
318                    roc_auc = None
319                    if self.ignore_warnings is False:
320                        print("ROC AUC couldn't be calculated for " + name)
321                        print(exception)
322                names.append(name)
323                Accuracy.append(accuracy)
324                B_Accuracy.append(b_accuracy)
325                ROC_AUC.append(roc_auc)
326                F1.append(f1)
327                TIME.append(time.time() - start)
328                if self.custom_metric is not None:
329                    custom_metric = self.custom_metric(y_test, y_pred)
330                    CUSTOM_METRIC.append(custom_metric)
331                if self.verbose > 0:
332                    if self.custom_metric is not None:
333                        print(
334                            {
335                                "Model": name,
336                                "Accuracy": accuracy,
337                                "Balanced Accuracy": b_accuracy,
338                                "ROC AUC": roc_auc,
339                                "F1 Score": f1,
340                                self.custom_metric.__name__: custom_metric,
341                                "Time taken": time.time() - start,
342                            }
343                        )
344                    else:
345                        print(
346                            {
347                                "Model": name,
348                                "Accuracy": accuracy,
349                                "Balanced Accuracy": b_accuracy,
350                                "ROC AUC": roc_auc,
351                                "F1 Score": f1,
352                                "Time taken": time.time() - start,
353                            }
354                        )
355                if self.predictions:
356                    predictions[name] = y_pred
357            except Exception as exception:
358                if self.ignore_warnings is False:
359                    print(name + " model failed to execute")
360                    print(exception)
361
362        if self.estimators == "all":
363            self.classifiers = [
364                item
365                for sublist in [
366                    DEEPCLASSIFIERS,
367                    DEEPMULTITASKCLASSIFIERS,
368                    DEEPSIMPLEMULTITASKCLASSIFIERS,
369                ]
370                for item in sublist
371            ]
372        else:
373            self.classifiers = (
374                [
375                    ("DeepCustomClassifier(" + est[0] + ")", est[1])
376                    for est in all_estimators()
377                    if (
378                        issubclass(est[1], ClassifierMixin)
379                        and (est[0] in self.estimators)
380                    )
381                ]
382                + [
383                    (
384                        "DeepMultitaskClassifier(" + est[0] + ")",
385                        partial(MultitaskClassifier, obj=est[1]()),
386                    )
387                    for est in all_estimators()
388                    if (
389                        issubclass(est[1], RegressorMixin)
390                        and (est[0] in self.estimators)
391                    )
392                ]
393                + [
394                    (
395                        "DeepSimpleMultitaskClassifier(" + est[0] + ")",
396                        partial(SimpleMultitaskClassifier, obj=est[1]()),
397                    )
398                    for est in all_estimators()
399                    if (
400                        issubclass(est[1], RegressorMixin)
401                        and (est[0] in self.estimators)
402                    )
403                ]
404            )
405
406        if self.preprocess is True:
407            for name, model in tqdm(self.classifiers):  # do parallel exec
408                other_args = (
409                    {}
410                )  # use this trick for `random_state` too --> refactor
411                try:
412                    if (
413                        "n_jobs" in model().get_params().keys()
414                        and name.find("LogisticRegression") == -1
415                    ):
416                        other_args["n_jobs"] = self.n_jobs
417                except Exception:
418                    pass
419
420                start = time.time()
421
422                try:
423                    if "random_state" in model().get_params().keys():
424                        layer_clf = CustomClassifier(
425                            obj=model(random_state=self.random_state),
426                            n_hidden_features=self.n_hidden_features,
427                            activation_name=self.activation_name,
428                            a=self.a,
429                            nodes_sim=self.nodes_sim,
430                            bias=self.bias,
431                            dropout=self.dropout,
432                            direct_link=self.direct_link,
433                            n_clusters=self.n_clusters,
434                            cluster_encode=self.cluster_encode,
435                            type_clust=self.type_clust,
436                            type_scaling=self.type_scaling,
437                            col_sample=self.col_sample,
438                            row_sample=self.row_sample,
439                            seed=self.seed,
440                            backend=self.backend,
441                            cv_calibration=None,
442                        )
443
444                    else:
445                        layer_clf = CustomClassifier(
446                            obj=model(),
447                            n_hidden_features=self.n_hidden_features,
448                            activation_name=self.activation_name,
449                            a=self.a,
450                            nodes_sim=self.nodes_sim,
451                            bias=self.bias,
452                            dropout=self.dropout,
453                            direct_link=self.direct_link,
454                            n_clusters=self.n_clusters,
455                            cluster_encode=self.cluster_encode,
456                            type_clust=self.type_clust,
457                            type_scaling=self.type_scaling,
458                            col_sample=self.col_sample,
459                            row_sample=self.row_sample,
460                            seed=self.seed,
461                            backend=self.backend,
462                            cv_calibration=None,
463                        )
464
465                    layer_clf.fit(X_train, y_train)
466
467                    for _ in range(self.n_layers):
468                        layer_clf = deepcopy(
469                            CustomClassifier(
470                                obj=layer_clf,
471                                n_hidden_features=self.n_hidden_features,
472                                activation_name=self.activation_name,
473                                a=self.a,
474                                nodes_sim=self.nodes_sim,
475                                bias=self.bias,
476                                dropout=self.dropout,
477                                direct_link=self.direct_link,
478                                n_clusters=self.n_clusters,
479                                cluster_encode=self.cluster_encode,
480                                type_clust=self.type_clust,
481                                type_scaling=self.type_scaling,
482                                col_sample=self.col_sample,
483                                row_sample=self.row_sample,
484                                seed=self.seed,
485                                backend=self.backend,
486                                cv_calibration=None,
487                            )
488                        )
489
490                    pipe = Pipeline(
491                        [
492                            ("preprocessor", preprocessor),
493                            ("classifier", layer_clf),
494                        ]
495                    )
496
497                    pipe.fit(X_train, y_train)
498                    self.models_[name] = pipe
499                    y_pred = pipe.predict(X_test)
500                    accuracy = accuracy_score(y_test, y_pred, normalize=True)
501                    b_accuracy = balanced_accuracy_score(y_test, y_pred)
502                    f1 = f1_score(y_test, y_pred, average="weighted")
503                    try:
504                        roc_auc = roc_auc_score(y_test, y_pred)
505                    except Exception as exception:
506                        roc_auc = None
507                        if self.ignore_warnings is False:
508                            print("ROC AUC couldn't be calculated for " + name)
509                            print(exception)
510                    names.append(name)
511                    Accuracy.append(accuracy)
512                    B_Accuracy.append(b_accuracy)
513                    ROC_AUC.append(roc_auc)
514                    F1.append(f1)
515                    TIME.append(time.time() - start)
516                    if self.custom_metric is not None:
517                        custom_metric = self.custom_metric(y_test, y_pred)
518                        CUSTOM_METRIC.append(custom_metric)
519                    if self.verbose > 0:
520                        if self.custom_metric is not None:
521                            print(
522                                {
523                                    "Model": name,
524                                    "Accuracy": accuracy,
525                                    "Balanced Accuracy": b_accuracy,
526                                    "ROC AUC": roc_auc,
527                                    "F1 Score": f1,
528                                    self.custom_metric.__name__: custom_metric,
529                                    "Time taken": time.time() - start,
530                                }
531                            )
532                        else:
533                            print(
534                                {
535                                    "Model": name,
536                                    "Accuracy": accuracy,
537                                    "Balanced Accuracy": b_accuracy,
538                                    "ROC AUC": roc_auc,
539                                    "F1 Score": f1,
540                                    "Time taken": time.time() - start,
541                                }
542                            )
543                    if self.predictions:
544                        predictions[name] = y_pred
545                except Exception as exception:
546                    if self.ignore_warnings is False:
547                        print(name + " model failed to execute")
548                        print(exception)
549
550        else:  # no preprocessing
551            for name, model in tqdm(self.classifiers):  # do parallel exec
552                start = time.time()
553                try:
554                    if "random_state" in model().get_params().keys():
555                        layer_clf = CustomClassifier(
556                            obj=model(random_state=self.random_state),
557                            n_hidden_features=self.n_hidden_features,
558                            activation_name=self.activation_name,
559                            a=self.a,
560                            nodes_sim=self.nodes_sim,
561                            bias=self.bias,
562                            dropout=self.dropout,
563                            direct_link=self.direct_link,
564                            n_clusters=self.n_clusters,
565                            cluster_encode=self.cluster_encode,
566                            type_clust=self.type_clust,
567                            type_scaling=self.type_scaling,
568                            col_sample=self.col_sample,
569                            row_sample=self.row_sample,
570                            seed=self.seed,
571                            backend=self.backend,
572                            cv_calibration=None,
573                        )
574
575                    else:
576                        layer_clf = CustomClassifier(
577                            obj=model(),
578                            n_hidden_features=self.n_hidden_features,
579                            activation_name=self.activation_name,
580                            a=self.a,
581                            nodes_sim=self.nodes_sim,
582                            bias=self.bias,
583                            dropout=self.dropout,
584                            direct_link=self.direct_link,
585                            n_clusters=self.n_clusters,
586                            cluster_encode=self.cluster_encode,
587                            type_clust=self.type_clust,
588                            type_scaling=self.type_scaling,
589                            col_sample=self.col_sample,
590                            row_sample=self.row_sample,
591                            seed=self.seed,
592                            backend=self.backend,
593                            cv_calibration=None,
594                        )
595
596                    layer_clf.fit(X_train, y_train)
597
598                    for _ in range(self.n_layers):
599                        layer_clf = deepcopy(
600                            CustomClassifier(
601                                obj=layer_clf,
602                                n_hidden_features=self.n_hidden_features,
603                                activation_name=self.activation_name,
604                                a=self.a,
605                                nodes_sim=self.nodes_sim,
606                                bias=self.bias,
607                                dropout=self.dropout,
608                                direct_link=self.direct_link,
609                                n_clusters=self.n_clusters,
610                                cluster_encode=self.cluster_encode,
611                                type_clust=self.type_clust,
612                                type_scaling=self.type_scaling,
613                                col_sample=self.col_sample,
614                                row_sample=self.row_sample,
615                                seed=self.seed,
616                                backend=self.backend,
617                                cv_calibration=None,
618                            )
619                        )
620
621                        # layer_clf.fit(X_train, y_train)
622
623                    layer_clf.fit(X_train, y_train)
624
625                    self.models_[name] = layer_clf
626                    y_pred = layer_clf.predict(X_test)
627                    accuracy = accuracy_score(y_test, y_pred, normalize=True)
628                    b_accuracy = balanced_accuracy_score(y_test, y_pred)
629                    f1 = f1_score(y_test, y_pred, average="weighted")
630                    try:
631                        roc_auc = roc_auc_score(y_test, y_pred)
632                    except Exception as exception:
633                        roc_auc = None
634                        if self.ignore_warnings is False:
635                            print("ROC AUC couldn't be calculated for " + name)
636                            print(exception)
637                    names.append(name)
638                    Accuracy.append(accuracy)
639                    B_Accuracy.append(b_accuracy)
640                    ROC_AUC.append(roc_auc)
641                    F1.append(f1)
642                    TIME.append(time.time() - start)
643                    if self.custom_metric is not None:
644                        custom_metric = self.custom_metric(y_test, y_pred)
645                        CUSTOM_METRIC.append(custom_metric)
646                    if self.verbose > 0:
647                        if self.custom_metric is not None:
648                            print(
649                                {
650                                    "Model": name,
651                                    "Accuracy": accuracy,
652                                    "Balanced Accuracy": b_accuracy,
653                                    "ROC AUC": roc_auc,
654                                    "F1 Score": f1,
655                                    self.custom_metric.__name__: custom_metric,
656                                    "Time taken": time.time() - start,
657                                }
658                            )
659                        else:
660                            print(
661                                {
662                                    "Model": name,
663                                    "Accuracy": accuracy,
664                                    "Balanced Accuracy": b_accuracy,
665                                    "ROC AUC": roc_auc,
666                                    "F1 Score": f1,
667                                    "Time taken": time.time() - start,
668                                }
669                            )
670                    if self.predictions:
671                        predictions[name] = y_pred
672                except Exception as exception:
673                    if self.ignore_warnings is False:
674                        print(name + " model failed to execute")
675                        print(exception)
676
677        if self.custom_metric is None:
678            scores = pd.DataFrame(
679                {
680                    "Model": names,
681                    "Accuracy": Accuracy,
682                    "Balanced Accuracy": B_Accuracy,
683                    "ROC AUC": ROC_AUC,
684                    "F1 Score": F1,
685                    "Time Taken": TIME,
686                }
687            )
688        else:
689            scores = pd.DataFrame(
690                {
691                    "Model": names,
692                    "Accuracy": Accuracy,
693                    "Balanced Accuracy": B_Accuracy,
694                    "ROC AUC": ROC_AUC,
695                    "F1 Score": F1,
696                    "Custom metric": CUSTOM_METRIC,
697                    "Time Taken": TIME,
698                }
699            )
700        scores = scores.sort_values(by=self.sort_by, ascending=False).set_index(
701            "Model"
702        )
703
704        self.best_model_ = self.models_[scores.index[0]]
705
706        if self.predictions is True:
707            return scores, predictions
708
709        return scores
710
711    def get_best_model(self):
712        """
713        This function returns the best model pipeline based on the sort_by metric.
714
715        Returns:
716
717            best_model: object,
718                Returns the best model pipeline based on the sort_by metric.
719
720        """
721        return self.best_model_
722
723    def provide_models(self, X_train, X_test, y_train, y_test):
724        """Returns all the model objects trained. If fit hasn't been called yet,
725        then it's called to return the models.
726
727        Parameters:
728
729        X_train: array-like,
730            Training vectors, where rows is the number of samples
731            and columns is the number of features.
732
733        X_test: array-like,
734            Testing vectors, where rows is the number of samples
735            and columns is the number of features.
736
737        y_train: array-like,
738            Training vectors, where rows is the number of samples
739            and columns is the number of features.
740
741        y_test: array-like,
742            Testing vectors, where rows is the number of samples
743            and columns is the number of features.
744
745        Returns:
746
747            models: dict-object,
748                Returns a dictionary with each model's pipeline as value
749                and key = name of the model.
750        """
751        if len(self.models_.keys()) == 0:
752            self.fit(X_train, X_test, y_train, y_test)
753
754        return self.models_

Fitting -- almost -- all the classification algorithms with layers of nnetsauce's CustomClassifier and returning their scores.

Parameters:

verbose: int, optional (default=0)
    Any positive number for verbosity.

ignore_warnings: bool, optional (default=True)
    When set to True, the warning related to algorigms that are not
    able to run are ignored.

custom_metric: function, optional (default=None)
    When function is provided, models are evaluated based on the custom
      evaluation metric provided.

predictions: bool, optional (default=False)
    When set to True, the predictions of all the models models are
    returned as data frame.

sort_by: string, optional (default='Accuracy')
    Sort models by a metric. Available options are 'Accuracy',
    'Balanced Accuracy', 'ROC AUC', 'F1 Score' or a custom metric
    identified by its name and provided by custom_metric.

random_state: int, optional (default=42)
    Reproducibiility seed.

estimators: list, optional (default='all')
    list of Estimators names or just 'all' for > 90 classifiers
    (default='all')

preprocess: bool, preprocessing is done when set to True

n_jobs: int, when possible, run in parallel
    For now, only used by individual models that support it.

n_layers: int, optional (default=3)
    Number of layers of CustomClassifiers to be used.

All the other parameters are the same as CustomClassifier's.

Attributes:

models_: dict-object
    Returns a dictionary with each model pipeline as value
    with key as name of models.

best_model_: object
    Returns the best model pipeline.

Examples

import nnetsauce as ns
    from sklearn.datasets import load_breast_cancer
    from sklearn.model_selection import train_test_split
    data = load_breast_cancer()
    X = data.data
    y= data.target
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2,
        random_state=123)
    clf = ns.LazyDeepClassifier(verbose=0, ignore_warnings=True, custom_metric=None)
    models, predictions = clf.fit(X_train, X_test, y_train, y_test)
    model_dictionary = clf.provide_models(X_train,X_test,y_train,y_test)
    print(models)
    

def fit(self, X_train, X_test, y_train, y_test):
229    def fit(self, X_train, X_test, y_train, y_test):
230        """Fit classifiers to X_train and y_train, predict and score on X_test,
231        y_test.
232
233        Parameters:
234
235            X_train: array-like,
236                Training vectors, where rows is the number of samples
237                and columns is the number of features.
238
239            X_test: array-like,
240                Testing vectors, where rows is the number of samples
241                and columns is the number of features.
242
243            y_train: array-like,
244                Training vectors, where rows is the number of samples
245                and columns is the number of features.
246
247            y_test: array-like,
248                Testing vectors, where rows is the number of samples
249                and columns is the number of features.
250
251        Returns:
252
253            scores: Pandas DataFrame
254                Returns metrics of all the models in a Pandas DataFrame.
255
256            predictions: Pandas DataFrame
257                Returns predictions of all the models in a Pandas DataFrame.
258        """
259        Accuracy = []
260        B_Accuracy = []
261        ROC_AUC = []
262        F1 = []
263        names = []
264        TIME = []
265        predictions = {}
266
267        if self.custom_metric is not None:
268            CUSTOM_METRIC = []
269
270        if isinstance(X_train, np.ndarray):
271            X_train = pd.DataFrame(X_train)
272            X_test = pd.DataFrame(X_test)
273
274        numeric_features = X_train.select_dtypes(include=[np.number]).columns
275        categorical_features = X_train.select_dtypes(include=["object"]).columns
276
277        categorical_low, categorical_high = get_card_split(
278            X_train, categorical_features
279        )
280
281        if self.preprocess is True:
282            preprocessor = ColumnTransformer(
283                transformers=[
284                    ("numeric", numeric_transformer, numeric_features),
285                    (
286                        "categorical_low",
287                        categorical_transformer_low,
288                        categorical_low,
289                    ),
290                    (
291                        "categorical_high",
292                        categorical_transformer_high,
293                        categorical_high,
294                    ),
295                ]
296            )
297
298        # baseline models
299        try:
300            baseline_names = ["RandomForestClassifier", "XGBClassifier"]
301            baseline_models = [RandomForestClassifier(), xgb.XGBClassifier()]
302        except Exception as exception:
303            baseline_names = ["RandomForestClassifier"]
304            baseline_models = [RandomForestClassifier()]
305
306        for name, model in zip(baseline_names, baseline_models):
307            start = time.time()
308            try:
309                model.fit(X_train, y_train)
310                self.models_[name] = model
311                y_pred = model.predict(X_test)
312                accuracy = accuracy_score(y_test, y_pred, normalize=True)
313                b_accuracy = balanced_accuracy_score(y_test, y_pred)
314                f1 = f1_score(y_test, y_pred, average="weighted")
315                try:
316                    roc_auc = roc_auc_score(y_test, y_pred)
317                except Exception as exception:
318                    roc_auc = None
319                    if self.ignore_warnings is False:
320                        print("ROC AUC couldn't be calculated for " + name)
321                        print(exception)
322                names.append(name)
323                Accuracy.append(accuracy)
324                B_Accuracy.append(b_accuracy)
325                ROC_AUC.append(roc_auc)
326                F1.append(f1)
327                TIME.append(time.time() - start)
328                if self.custom_metric is not None:
329                    custom_metric = self.custom_metric(y_test, y_pred)
330                    CUSTOM_METRIC.append(custom_metric)
331                if self.verbose > 0:
332                    if self.custom_metric is not None:
333                        print(
334                            {
335                                "Model": name,
336                                "Accuracy": accuracy,
337                                "Balanced Accuracy": b_accuracy,
338                                "ROC AUC": roc_auc,
339                                "F1 Score": f1,
340                                self.custom_metric.__name__: custom_metric,
341                                "Time taken": time.time() - start,
342                            }
343                        )
344                    else:
345                        print(
346                            {
347                                "Model": name,
348                                "Accuracy": accuracy,
349                                "Balanced Accuracy": b_accuracy,
350                                "ROC AUC": roc_auc,
351                                "F1 Score": f1,
352                                "Time taken": time.time() - start,
353                            }
354                        )
355                if self.predictions:
356                    predictions[name] = y_pred
357            except Exception as exception:
358                if self.ignore_warnings is False:
359                    print(name + " model failed to execute")
360                    print(exception)
361
362        if self.estimators == "all":
363            self.classifiers = [
364                item
365                for sublist in [
366                    DEEPCLASSIFIERS,
367                    DEEPMULTITASKCLASSIFIERS,
368                    DEEPSIMPLEMULTITASKCLASSIFIERS,
369                ]
370                for item in sublist
371            ]
372        else:
373            self.classifiers = (
374                [
375                    ("DeepCustomClassifier(" + est[0] + ")", est[1])
376                    for est in all_estimators()
377                    if (
378                        issubclass(est[1], ClassifierMixin)
379                        and (est[0] in self.estimators)
380                    )
381                ]
382                + [
383                    (
384                        "DeepMultitaskClassifier(" + est[0] + ")",
385                        partial(MultitaskClassifier, obj=est[1]()),
386                    )
387                    for est in all_estimators()
388                    if (
389                        issubclass(est[1], RegressorMixin)
390                        and (est[0] in self.estimators)
391                    )
392                ]
393                + [
394                    (
395                        "DeepSimpleMultitaskClassifier(" + est[0] + ")",
396                        partial(SimpleMultitaskClassifier, obj=est[1]()),
397                    )
398                    for est in all_estimators()
399                    if (
400                        issubclass(est[1], RegressorMixin)
401                        and (est[0] in self.estimators)
402                    )
403                ]
404            )
405
406        if self.preprocess is True:
407            for name, model in tqdm(self.classifiers):  # do parallel exec
408                other_args = (
409                    {}
410                )  # use this trick for `random_state` too --> refactor
411                try:
412                    if (
413                        "n_jobs" in model().get_params().keys()
414                        and name.find("LogisticRegression") == -1
415                    ):
416                        other_args["n_jobs"] = self.n_jobs
417                except Exception:
418                    pass
419
420                start = time.time()
421
422                try:
423                    if "random_state" in model().get_params().keys():
424                        layer_clf = CustomClassifier(
425                            obj=model(random_state=self.random_state),
426                            n_hidden_features=self.n_hidden_features,
427                            activation_name=self.activation_name,
428                            a=self.a,
429                            nodes_sim=self.nodes_sim,
430                            bias=self.bias,
431                            dropout=self.dropout,
432                            direct_link=self.direct_link,
433                            n_clusters=self.n_clusters,
434                            cluster_encode=self.cluster_encode,
435                            type_clust=self.type_clust,
436                            type_scaling=self.type_scaling,
437                            col_sample=self.col_sample,
438                            row_sample=self.row_sample,
439                            seed=self.seed,
440                            backend=self.backend,
441                            cv_calibration=None,
442                        )
443
444                    else:
445                        layer_clf = CustomClassifier(
446                            obj=model(),
447                            n_hidden_features=self.n_hidden_features,
448                            activation_name=self.activation_name,
449                            a=self.a,
450                            nodes_sim=self.nodes_sim,
451                            bias=self.bias,
452                            dropout=self.dropout,
453                            direct_link=self.direct_link,
454                            n_clusters=self.n_clusters,
455                            cluster_encode=self.cluster_encode,
456                            type_clust=self.type_clust,
457                            type_scaling=self.type_scaling,
458                            col_sample=self.col_sample,
459                            row_sample=self.row_sample,
460                            seed=self.seed,
461                            backend=self.backend,
462                            cv_calibration=None,
463                        )
464
465                    layer_clf.fit(X_train, y_train)
466
467                    for _ in range(self.n_layers):
468                        layer_clf = deepcopy(
469                            CustomClassifier(
470                                obj=layer_clf,
471                                n_hidden_features=self.n_hidden_features,
472                                activation_name=self.activation_name,
473                                a=self.a,
474                                nodes_sim=self.nodes_sim,
475                                bias=self.bias,
476                                dropout=self.dropout,
477                                direct_link=self.direct_link,
478                                n_clusters=self.n_clusters,
479                                cluster_encode=self.cluster_encode,
480                                type_clust=self.type_clust,
481                                type_scaling=self.type_scaling,
482                                col_sample=self.col_sample,
483                                row_sample=self.row_sample,
484                                seed=self.seed,
485                                backend=self.backend,
486                                cv_calibration=None,
487                            )
488                        )
489
490                    pipe = Pipeline(
491                        [
492                            ("preprocessor", preprocessor),
493                            ("classifier", layer_clf),
494                        ]
495                    )
496
497                    pipe.fit(X_train, y_train)
498                    self.models_[name] = pipe
499                    y_pred = pipe.predict(X_test)
500                    accuracy = accuracy_score(y_test, y_pred, normalize=True)
501                    b_accuracy = balanced_accuracy_score(y_test, y_pred)
502                    f1 = f1_score(y_test, y_pred, average="weighted")
503                    try:
504                        roc_auc = roc_auc_score(y_test, y_pred)
505                    except Exception as exception:
506                        roc_auc = None
507                        if self.ignore_warnings is False:
508                            print("ROC AUC couldn't be calculated for " + name)
509                            print(exception)
510                    names.append(name)
511                    Accuracy.append(accuracy)
512                    B_Accuracy.append(b_accuracy)
513                    ROC_AUC.append(roc_auc)
514                    F1.append(f1)
515                    TIME.append(time.time() - start)
516                    if self.custom_metric is not None:
517                        custom_metric = self.custom_metric(y_test, y_pred)
518                        CUSTOM_METRIC.append(custom_metric)
519                    if self.verbose > 0:
520                        if self.custom_metric is not None:
521                            print(
522                                {
523                                    "Model": name,
524                                    "Accuracy": accuracy,
525                                    "Balanced Accuracy": b_accuracy,
526                                    "ROC AUC": roc_auc,
527                                    "F1 Score": f1,
528                                    self.custom_metric.__name__: custom_metric,
529                                    "Time taken": time.time() - start,
530                                }
531                            )
532                        else:
533                            print(
534                                {
535                                    "Model": name,
536                                    "Accuracy": accuracy,
537                                    "Balanced Accuracy": b_accuracy,
538                                    "ROC AUC": roc_auc,
539                                    "F1 Score": f1,
540                                    "Time taken": time.time() - start,
541                                }
542                            )
543                    if self.predictions:
544                        predictions[name] = y_pred
545                except Exception as exception:
546                    if self.ignore_warnings is False:
547                        print(name + " model failed to execute")
548                        print(exception)
549
550        else:  # no preprocessing
551            for name, model in tqdm(self.classifiers):  # do parallel exec
552                start = time.time()
553                try:
554                    if "random_state" in model().get_params().keys():
555                        layer_clf = CustomClassifier(
556                            obj=model(random_state=self.random_state),
557                            n_hidden_features=self.n_hidden_features,
558                            activation_name=self.activation_name,
559                            a=self.a,
560                            nodes_sim=self.nodes_sim,
561                            bias=self.bias,
562                            dropout=self.dropout,
563                            direct_link=self.direct_link,
564                            n_clusters=self.n_clusters,
565                            cluster_encode=self.cluster_encode,
566                            type_clust=self.type_clust,
567                            type_scaling=self.type_scaling,
568                            col_sample=self.col_sample,
569                            row_sample=self.row_sample,
570                            seed=self.seed,
571                            backend=self.backend,
572                            cv_calibration=None,
573                        )
574
575                    else:
576                        layer_clf = CustomClassifier(
577                            obj=model(),
578                            n_hidden_features=self.n_hidden_features,
579                            activation_name=self.activation_name,
580                            a=self.a,
581                            nodes_sim=self.nodes_sim,
582                            bias=self.bias,
583                            dropout=self.dropout,
584                            direct_link=self.direct_link,
585                            n_clusters=self.n_clusters,
586                            cluster_encode=self.cluster_encode,
587                            type_clust=self.type_clust,
588                            type_scaling=self.type_scaling,
589                            col_sample=self.col_sample,
590                            row_sample=self.row_sample,
591                            seed=self.seed,
592                            backend=self.backend,
593                            cv_calibration=None,
594                        )
595
596                    layer_clf.fit(X_train, y_train)
597
598                    for _ in range(self.n_layers):
599                        layer_clf = deepcopy(
600                            CustomClassifier(
601                                obj=layer_clf,
602                                n_hidden_features=self.n_hidden_features,
603                                activation_name=self.activation_name,
604                                a=self.a,
605                                nodes_sim=self.nodes_sim,
606                                bias=self.bias,
607                                dropout=self.dropout,
608                                direct_link=self.direct_link,
609                                n_clusters=self.n_clusters,
610                                cluster_encode=self.cluster_encode,
611                                type_clust=self.type_clust,
612                                type_scaling=self.type_scaling,
613                                col_sample=self.col_sample,
614                                row_sample=self.row_sample,
615                                seed=self.seed,
616                                backend=self.backend,
617                                cv_calibration=None,
618                            )
619                        )
620
621                        # layer_clf.fit(X_train, y_train)
622
623                    layer_clf.fit(X_train, y_train)
624
625                    self.models_[name] = layer_clf
626                    y_pred = layer_clf.predict(X_test)
627                    accuracy = accuracy_score(y_test, y_pred, normalize=True)
628                    b_accuracy = balanced_accuracy_score(y_test, y_pred)
629                    f1 = f1_score(y_test, y_pred, average="weighted")
630                    try:
631                        roc_auc = roc_auc_score(y_test, y_pred)
632                    except Exception as exception:
633                        roc_auc = None
634                        if self.ignore_warnings is False:
635                            print("ROC AUC couldn't be calculated for " + name)
636                            print(exception)
637                    names.append(name)
638                    Accuracy.append(accuracy)
639                    B_Accuracy.append(b_accuracy)
640                    ROC_AUC.append(roc_auc)
641                    F1.append(f1)
642                    TIME.append(time.time() - start)
643                    if self.custom_metric is not None:
644                        custom_metric = self.custom_metric(y_test, y_pred)
645                        CUSTOM_METRIC.append(custom_metric)
646                    if self.verbose > 0:
647                        if self.custom_metric is not None:
648                            print(
649                                {
650                                    "Model": name,
651                                    "Accuracy": accuracy,
652                                    "Balanced Accuracy": b_accuracy,
653                                    "ROC AUC": roc_auc,
654                                    "F1 Score": f1,
655                                    self.custom_metric.__name__: custom_metric,
656                                    "Time taken": time.time() - start,
657                                }
658                            )
659                        else:
660                            print(
661                                {
662                                    "Model": name,
663                                    "Accuracy": accuracy,
664                                    "Balanced Accuracy": b_accuracy,
665                                    "ROC AUC": roc_auc,
666                                    "F1 Score": f1,
667                                    "Time taken": time.time() - start,
668                                }
669                            )
670                    if self.predictions:
671                        predictions[name] = y_pred
672                except Exception as exception:
673                    if self.ignore_warnings is False:
674                        print(name + " model failed to execute")
675                        print(exception)
676
677        if self.custom_metric is None:
678            scores = pd.DataFrame(
679                {
680                    "Model": names,
681                    "Accuracy": Accuracy,
682                    "Balanced Accuracy": B_Accuracy,
683                    "ROC AUC": ROC_AUC,
684                    "F1 Score": F1,
685                    "Time Taken": TIME,
686                }
687            )
688        else:
689            scores = pd.DataFrame(
690                {
691                    "Model": names,
692                    "Accuracy": Accuracy,
693                    "Balanced Accuracy": B_Accuracy,
694                    "ROC AUC": ROC_AUC,
695                    "F1 Score": F1,
696                    "Custom metric": CUSTOM_METRIC,
697                    "Time Taken": TIME,
698                }
699            )
700        scores = scores.sort_values(by=self.sort_by, ascending=False).set_index(
701            "Model"
702        )
703
704        self.best_model_ = self.models_[scores.index[0]]
705
706        if self.predictions is True:
707            return scores, predictions
708
709        return scores

Fit classifiers to X_train and y_train, predict and score on X_test, y_test.

Parameters:

X_train: array-like,
    Training vectors, where rows is the number of samples
    and columns is the number of features.

X_test: array-like,
    Testing vectors, where rows is the number of samples
    and columns is the number of features.

y_train: array-like,
    Training vectors, where rows is the number of samples
    and columns is the number of features.

y_test: array-like,
    Testing vectors, where rows is the number of samples
    and columns is the number of features.

Returns:

scores: Pandas DataFrame
    Returns metrics of all the models in a Pandas DataFrame.

predictions: Pandas DataFrame
    Returns predictions of all the models in a Pandas DataFrame.
def provide_models(self, X_train, X_test, y_train, y_test):
723    def provide_models(self, X_train, X_test, y_train, y_test):
724        """Returns all the model objects trained. If fit hasn't been called yet,
725        then it's called to return the models.
726
727        Parameters:
728
729        X_train: array-like,
730            Training vectors, where rows is the number of samples
731            and columns is the number of features.
732
733        X_test: array-like,
734            Testing vectors, where rows is the number of samples
735            and columns is the number of features.
736
737        y_train: array-like,
738            Training vectors, where rows is the number of samples
739            and columns is the number of features.
740
741        y_test: array-like,
742            Testing vectors, where rows is the number of samples
743            and columns is the number of features.
744
745        Returns:
746
747            models: dict-object,
748                Returns a dictionary with each model's pipeline as value
749                and key = name of the model.
750        """
751        if len(self.models_.keys()) == 0:
752            self.fit(X_train, X_test, y_train, y_test)
753
754        return self.models_

Returns all the model objects trained. If fit hasn't been called yet, then it's called to return the models.

Parameters:

X_train: array-like, Training vectors, where rows is the number of samples and columns is the number of features.

X_test: array-like, Testing vectors, where rows is the number of samples and columns is the number of features.

y_train: array-like, Training vectors, where rows is the number of samples and columns is the number of features.

y_test: array-like, Testing vectors, where rows is the number of samples and columns is the number of features.

Returns:

models: dict-object,
    Returns a dictionary with each model's pipeline as value
    and key = name of the model.
class LazyDeepRegressor(nnetsauce.custom.custom.Custom, sklearn.base.RegressorMixin):
 90class LazyDeepRegressor(Custom, RegressorMixin):
 91    """
 92        Fitting -- almost -- all the regression algorithms with layers of
 93        nnetsauce's CustomRegressor and returning their scores.
 94
 95    Parameters:
 96
 97        verbose: int, optional (default=0)
 98            Any positive number for verbosity.
 99
100        ignore_warnings: bool, optional (default=True)
101            When set to True, the warning related to algorigms that are not able to run are ignored.
102
103        custom_metric: function, optional (default=None)
104            When function is provided, models are evaluated based on the custom evaluation metric provided.
105
106        predictions: bool, optional (default=False)
107            When set to True, the predictions of all the models models are returned as dataframe.
108
109        sort_by: string, optional (default='RMSE')
110            Sort models by a metric. Available options are 'R-Squared', 'Adjusted R-Squared', 'RMSE', 'Time Taken' and 'Custom Metric'.
111            or a custom metric identified by its name and provided by custom_metric.
112
113        random_state: int, optional (default=42)
114            Reproducibiility seed.
115
116        estimators: list, optional (default='all')
117            list of Estimators names or just 'all' (default='all')
118
119        preprocess: bool
120            preprocessing is done when set to True
121
122        n_jobs : int, when possible, run in parallel
123            For now, only used by individual models that support it.
124
125        n_layers: int, optional (default=3)
126            Number of layers of CustomRegressors to be used.
127
128        All the other parameters are the same as CustomRegressor's.
129
130    Attributes:
131
132        models_: dict-object
133            Returns a dictionary with each model pipeline as value
134            with key as name of models.
135
136        best_model_: object
137            Returns the best model pipeline based on the sort_by metric.
138
139    Examples:
140
141        import nnetsauce as ns
142        import numpy as np
143        from sklearn import datasets
144        from sklearn.utils import shuffle
145
146        diabetes = datasets.load_diabetes()
147        X, y = shuffle(diabetes.data, diabetes.target, random_state=13)
148        X = X.astype(np.float32)
149
150        offset = int(X.shape[0] * 0.9)
151        X_train, y_train = X[:offset], y[:offset]
152        X_test, y_test = X[offset:], y[offset:]
153
154        reg = ns.LazyDeepRegressor(verbose=0, ignore_warnings=False, custom_metric=None)
155        models, predictions = reg.fit(X_train, X_test, y_train, y_test)
156        print(models)
157
158    """
159
160    def __init__(
161        self,
162        verbose=0,
163        ignore_warnings=True,
164        custom_metric=None,
165        predictions=False,
166        sort_by="RMSE",
167        random_state=42,
168        estimators="all",
169        preprocess=False,
170        n_jobs=None,
171        # Defining depth
172        n_layers=3,
173        # CustomRegressor attributes
174        obj=None,
175        n_hidden_features=5,
176        activation_name="relu",
177        a=0.01,
178        nodes_sim="sobol",
179        bias=True,
180        dropout=0,
181        direct_link=True,
182        n_clusters=2,
183        cluster_encode=True,
184        type_clust="kmeans",
185        type_scaling=("std", "std", "std"),
186        col_sample=1,
187        row_sample=1,
188        seed=123,
189        backend="cpu",
190    ):
191        self.verbose = verbose
192        self.ignore_warnings = ignore_warnings
193        self.custom_metric = custom_metric
194        self.predictions = predictions
195        self.sort_by = sort_by
196        self.models_ = {}
197        self.best_model_ = None
198        self.random_state = random_state
199        self.estimators = estimators
200        self.preprocess = preprocess
201        self.n_layers = n_layers - 1
202        self.n_jobs = n_jobs
203        super().__init__(
204            obj=obj,
205            n_hidden_features=n_hidden_features,
206            activation_name=activation_name,
207            a=a,
208            nodes_sim=nodes_sim,
209            bias=bias,
210            dropout=dropout,
211            direct_link=direct_link,
212            n_clusters=n_clusters,
213            cluster_encode=cluster_encode,
214            type_clust=type_clust,
215            type_scaling=type_scaling,
216            col_sample=col_sample,
217            row_sample=row_sample,
218            seed=seed,
219            backend=backend,
220        )
221
222    def fit(self, X_train, X_test, y_train, y_test):
223        """Fit Regression algorithms to X_train and y_train, predict and score on X_test, y_test.
224
225        Parameters:
226
227            X_train : array-like,
228                Training vectors, where rows is the number of samples
229                and columns is the number of features.
230
231            X_test : array-like,
232                Testing vectors, where rows is the number of samples
233                and columns is the number of features.
234
235            y_train : array-like,
236                Training vectors, where rows is the number of samples
237                and columns is the number of features.
238
239            y_test : array-like,
240                Testing vectors, where rows is the number of samples
241                and columns is the number of features.
242
243        Returns:
244        -------
245        scores:  Pandas DataFrame
246            Returns metrics of all the models in a Pandas DataFrame.
247
248        predictions : Pandas DataFrame
249            Returns predictions of all the models in a Pandas DataFrame.
250
251        """
252        R2 = []
253        ADJR2 = []
254        RMSE = []
255        # WIN = []
256        names = []
257        TIME = []
258        predictions = {}
259
260        if self.custom_metric:
261            CUSTOM_METRIC = []
262
263        if isinstance(X_train, np.ndarray):
264            X_train = pd.DataFrame(X_train)
265            X_test = pd.DataFrame(X_test)
266
267        numeric_features = X_train.select_dtypes(include=[np.number]).columns
268        categorical_features = X_train.select_dtypes(include=["object"]).columns
269
270        categorical_low, categorical_high = get_card_split(
271            X_train, categorical_features
272        )
273
274        if self.preprocess is True:
275            preprocessor = ColumnTransformer(
276                transformers=[
277                    ("numeric", numeric_transformer, numeric_features),
278                    (
279                        "categorical_low",
280                        categorical_transformer_low,
281                        categorical_low,
282                    ),
283                    (
284                        "categorical_high",
285                        categorical_transformer_high,
286                        categorical_high,
287                    ),
288                ]
289            )
290
291        # base models
292        try:
293            baseline_names = ["RandomForestRegressor", "XGBRegressor"]
294            baseline_models = [RandomForestRegressor(), xgb.XGBRegressor()]
295        except Exception as exception:
296            baseline_names = ["RandomForestRegressor"]
297            baseline_models = [RandomForestRegressor()]
298
299        for name, model in zip(baseline_names, baseline_models):
300            start = time.time()
301            try:
302                model.fit(X_train, y_train)
303                self.models_[name] = model
304                y_pred = model.predict(X_test)
305                r_squared = r2_score(y_test, y_pred)
306                adj_rsquared = adjusted_rsquared(
307                    r_squared, X_test.shape[0], X_test.shape[1]
308                )
309                rmse = np.sqrt(np.mean((y_test - y_pred) ** 2))
310
311                names.append(name)
312                R2.append(r_squared)
313                ADJR2.append(adj_rsquared)
314                RMSE.append(rmse)
315                TIME.append(time.time() - start)
316
317                if self.custom_metric:
318                    custom_metric = self.custom_metric(y_test, y_pred)
319                    CUSTOM_METRIC.append(custom_metric)
320
321                if self.verbose > 0:
322                    scores_verbose = {
323                        "Model": name,
324                        "R-Squared": r_squared,
325                        "Adjusted R-Squared": adj_rsquared,
326                        "RMSE": rmse,
327                        "Time taken": time.time() - start,
328                    }
329
330                    if self.custom_metric:
331                        scores_verbose[
332                            self.custom_metric.__name__
333                        ] = custom_metric
334
335                    print(scores_verbose)
336                if self.predictions:
337                    predictions[name] = y_pred
338            except Exception as exception:
339                if self.ignore_warnings is False:
340                    print(name + " model failed to execute")
341                    print(exception)
342
343        if self.estimators == "all":
344            self.regressors = DEEPREGRESSORS
345        else:
346            self.regressors = [
347                ("DeepCustomRegressor(" + est[0] + ")", est[1])
348                for est in all_estimators()
349                if (
350                    issubclass(est[1], RegressorMixin)
351                    and (est[0] in self.estimators)
352                )
353            ]
354
355        if self.preprocess is True:
356            for name, model in tqdm(self.regressors):  # do parallel exec
357                start = time.time()
358                try:
359                    if "random_state" in model().get_params().keys():
360                        layer_regr = CustomRegressor(
361                            obj=model(random_state=self.random_state),
362                            n_hidden_features=self.n_hidden_features,
363                            activation_name=self.activation_name,
364                            a=self.a,
365                            nodes_sim=self.nodes_sim,
366                            bias=self.bias,
367                            dropout=self.dropout,
368                            direct_link=self.direct_link,
369                            n_clusters=self.n_clusters,
370                            cluster_encode=self.cluster_encode,
371                            type_clust=self.type_clust,
372                            type_scaling=self.type_scaling,
373                            col_sample=self.col_sample,
374                            row_sample=self.row_sample,
375                            seed=self.seed,
376                            backend=self.backend,
377                        )
378                    else:
379                        layer_regr = CustomRegressor(
380                            obj=model(),
381                            n_hidden_features=self.n_hidden_features,
382                            activation_name=self.activation_name,
383                            a=self.a,
384                            nodes_sim=self.nodes_sim,
385                            bias=self.bias,
386                            dropout=self.dropout,
387                            direct_link=self.direct_link,
388                            n_clusters=self.n_clusters,
389                            cluster_encode=self.cluster_encode,
390                            type_clust=self.type_clust,
391                            type_scaling=self.type_scaling,
392                            col_sample=self.col_sample,
393                            row_sample=self.row_sample,
394                            seed=self.seed,
395                            backend=self.backend,
396                        )
397
398                    for _ in range(self.n_layers):
399                        layer_regr = deepcopy(
400                            CustomRegressor(
401                                obj=layer_regr,
402                                n_hidden_features=self.n_hidden_features,
403                                activation_name=self.activation_name,
404                                a=self.a,
405                                nodes_sim=self.nodes_sim,
406                                bias=self.bias,
407                                dropout=self.dropout,
408                                direct_link=self.direct_link,
409                                n_clusters=self.n_clusters,
410                                cluster_encode=self.cluster_encode,
411                                type_clust=self.type_clust,
412                                type_scaling=self.type_scaling,
413                                col_sample=self.col_sample,
414                                row_sample=self.row_sample,
415                                seed=self.seed,
416                                backend=self.backend,
417                            )
418                        )
419
420                    layer_regr.fit(X_train, y_train)
421
422                    pipe = Pipeline(
423                        steps=[
424                            ("preprocessor", preprocessor),
425                            ("regressor", layer_regr),
426                        ]
427                    )
428
429                    pipe.fit(X_train, y_train)
430
431                    self.models_[name] = pipe
432                    y_pred = pipe.predict(X_test)
433                    r_squared = r2_score(y_test, y_pred)
434                    adj_rsquared = adjusted_rsquared(
435                        r_squared, X_test.shape[0], X_test.shape[1]
436                    )
437                    rmse = np.sqrt(np.mean((y_test - y_pred) ** 2))
438
439                    names.append(name)
440                    R2.append(r_squared)
441                    ADJR2.append(adj_rsquared)
442                    RMSE.append(rmse)
443                    TIME.append(time.time() - start)
444
445                    if self.custom_metric:
446                        custom_metric = self.custom_metric(y_test, y_pred)
447                        CUSTOM_METRIC.append(custom_metric)
448
449                    if self.verbose > 0:
450                        scores_verbose = {
451                            "Model": name,
452                            "R-Squared": r_squared,
453                            "Adjusted R-Squared": adj_rsquared,
454                            "RMSE": rmse,
455                            "Time taken": time.time() - start,
456                        }
457
458                        if self.custom_metric:
459                            scores_verbose[
460                                self.custom_metric.__name__
461                            ] = custom_metric
462
463                        print(scores_verbose)
464                    if self.predictions:
465                        predictions[name] = y_pred
466                except Exception as exception:
467                    if self.ignore_warnings is False:
468                        print(name + " model failed to execute")
469                        print(exception)
470
471        else:  # no preprocessing
472            for name, model in tqdm(self.regressors):  # do parallel exec
473                start = time.time()
474                try:
475                    if "random_state" in model().get_params().keys():
476                        layer_regr = CustomRegressor(
477                            obj=model(random_state=self.random_state),
478                            n_hidden_features=self.n_hidden_features,
479                            activation_name=self.activation_name,
480                            a=self.a,
481                            nodes_sim=self.nodes_sim,
482                            bias=self.bias,
483                            dropout=self.dropout,
484                            direct_link=self.direct_link,
485                            n_clusters=self.n_clusters,
486                            cluster_encode=self.cluster_encode,
487                            type_clust=self.type_clust,
488                            type_scaling=self.type_scaling,
489                            col_sample=self.col_sample,
490                            row_sample=self.row_sample,
491                            seed=self.seed,
492                            backend=self.backend,
493                        )
494                    else:
495                        layer_regr = CustomRegressor(
496                            obj=model(),
497                            n_hidden_features=self.n_hidden_features,
498                            activation_name=self.activation_name,
499                            a=self.a,
500                            nodes_sim=self.nodes_sim,
501                            bias=self.bias,
502                            dropout=self.dropout,
503                            direct_link=self.direct_link,
504                            n_clusters=self.n_clusters,
505                            cluster_encode=self.cluster_encode,
506                            type_clust=self.type_clust,
507                            type_scaling=self.type_scaling,
508                            col_sample=self.col_sample,
509                            row_sample=self.row_sample,
510                            seed=self.seed,
511                            backend=self.backend,
512                        )
513
514                    layer_regr.fit(X_train, y_train)
515
516                    for _ in range(self.n_layers):
517                        layer_regr = deepcopy(
518                            CustomRegressor(
519                                obj=layer_regr,
520                                n_hidden_features=self.n_hidden_features,
521                                activation_name=self.activation_name,
522                                a=self.a,
523                                nodes_sim=self.nodes_sim,
524                                bias=self.bias,
525                                dropout=self.dropout,
526                                direct_link=self.direct_link,
527                                n_clusters=self.n_clusters,
528                                cluster_encode=self.cluster_encode,
529                                type_clust=self.type_clust,
530                                type_scaling=self.type_scaling,
531                                col_sample=self.col_sample,
532                                row_sample=self.row_sample,
533                                seed=self.seed,
534                                backend=self.backend,
535                            )
536                        )
537
538                        # layer_regr.fit(X_train, y_train)
539
540                    layer_regr.fit(X_train, y_train)
541
542                    self.models_[name] = layer_regr
543                    y_pred = layer_regr.predict(X_test)
544
545                    r_squared = r2_score(y_test, y_pred)
546                    adj_rsquared = adjusted_rsquared(
547                        r_squared, X_test.shape[0], X_test.shape[1]
548                    )
549                    rmse = np.sqrt(np.mean((y_test - y_pred) ** 2))
550
551                    names.append(name)
552                    R2.append(r_squared)
553                    ADJR2.append(adj_rsquared)
554                    RMSE.append(rmse)
555                    TIME.append(time.time() - start)
556
557                    if self.custom_metric:
558                        custom_metric = self.custom_metric(y_test, y_pred)
559                        CUSTOM_METRIC.append(custom_metric)
560
561                    if self.verbose > 0:
562                        scores_verbose = {
563                            "Model": name,
564                            "R-Squared": r_squared,
565                            "Adjusted R-Squared": adj_rsquared,
566                            "RMSE": rmse,
567                            "Time taken": time.time() - start,
568                        }
569
570                        if self.custom_metric:
571                            scores_verbose[
572                                self.custom_metric.__name__
573                            ] = custom_metric
574
575                        print(scores_verbose)
576                    if self.predictions:
577                        predictions[name] = y_pred
578                except Exception as exception:
579                    if self.ignore_warnings is False:
580                        print(name + " model failed to execute")
581                        print(exception)
582
583        scores = {
584            "Model": names,
585            "Adjusted R-Squared": ADJR2,
586            "R-Squared": R2,
587            "RMSE": RMSE,
588            "Time Taken": TIME,
589        }
590
591        if self.custom_metric:
592            scores["Custom metric"] = CUSTOM_METRIC
593
594        scores = pd.DataFrame(scores)
595        scores = scores.sort_values(by=self.sort_by, ascending=True).set_index(
596            "Model"
597        )
598
599        self.best_model_ = self.models_[scores.index[0]]
600
601        if self.predictions is True:
602            return scores, predictions
603
604        return scores
605
606    def get_best_model(self):
607        """
608        This function returns the best model pipeline based on the sort_by metric.
609
610        Returns:
611
612            best_model: object,
613                Returns the best model pipeline based on the sort_by metric.
614
615        """
616        return self.best_model_
617
618    def provide_models(self, X_train, X_test, y_train, y_test):
619        """
620        This function returns all the model objects trained in fit function.
621        If fit is not called already, then we call fit and then return the models.
622
623        Parameters:
624
625            X_train : array-like,
626                Training vectors, where rows is the number of samples
627                and columns is the number of features.
628
629            X_test : array-like,
630                Testing vectors, where rows is the number of samples
631                and columns is the number of features.
632
633            y_train : array-like,
634                Training vectors, where rows is the number of samples
635                and columns is the number of features.
636
637            y_test : array-like,
638                Testing vectors, where rows is the number of samples
639                and columns is the number of features.
640
641        Returns:
642
643            models: dict-object,
644                Returns a dictionary with each model pipeline as value
645                with key as name of models.
646
647        """
648        if len(self.models_.keys()) == 0:
649            self.fit(X_train, X_test, y_train, y_test)
650
651        return self.models_

Fitting -- almost -- all the regression algorithms with layers of nnetsauce's CustomRegressor and returning their scores.

Parameters:

verbose: int, optional (default=0)
    Any positive number for verbosity.

ignore_warnings: bool, optional (default=True)
    When set to True, the warning related to algorigms that are not able to run are ignored.

custom_metric: function, optional (default=None)
    When function is provided, models are evaluated based on the custom evaluation metric provided.

predictions: bool, optional (default=False)
    When set to True, the predictions of all the models models are returned as dataframe.

sort_by: string, optional (default='RMSE')
    Sort models by a metric. Available options are 'R-Squared', 'Adjusted R-Squared', 'RMSE', 'Time Taken' and 'Custom Metric'.
    or a custom metric identified by its name and provided by custom_metric.

random_state: int, optional (default=42)
    Reproducibiility seed.

estimators: list, optional (default='all')
    list of Estimators names or just 'all' (default='all')

preprocess: bool
    preprocessing is done when set to True

n_jobs : int, when possible, run in parallel
    For now, only used by individual models that support it.

n_layers: int, optional (default=3)
    Number of layers of CustomRegressors to be used.

All the other parameters are the same as CustomRegressor's.

Attributes:

models_: dict-object
    Returns a dictionary with each model pipeline as value
    with key as name of models.

best_model_: object
    Returns the best model pipeline based on the sort_by metric.

Examples:

import nnetsauce as ns
import numpy as np
from sklearn import datasets
from sklearn.utils import shuffle

diabetes = datasets.load_diabetes()
X, y = shuffle(diabetes.data, diabetes.target, random_state=13)
X = X.astype(np.float32)

offset = int(X.shape[0] * 0.9)
X_train, y_train = X[:offset], y[:offset]
X_test, y_test = X[offset:], y[offset:]

reg = ns.LazyDeepRegressor(verbose=0, ignore_warnings=False, custom_metric=None)
models, predictions = reg.fit(X_train, X_test, y_train, y_test)
print(models)
def fit(self, X_train, X_test, y_train, y_test):
222    def fit(self, X_train, X_test, y_train, y_test):
223        """Fit Regression algorithms to X_train and y_train, predict and score on X_test, y_test.
224
225        Parameters:
226
227            X_train : array-like,
228                Training vectors, where rows is the number of samples
229                and columns is the number of features.
230
231            X_test : array-like,
232                Testing vectors, where rows is the number of samples
233                and columns is the number of features.
234
235            y_train : array-like,
236                Training vectors, where rows is the number of samples
237                and columns is the number of features.
238
239            y_test : array-like,
240                Testing vectors, where rows is the number of samples
241                and columns is the number of features.
242
243        Returns:
244        -------
245        scores:  Pandas DataFrame
246            Returns metrics of all the models in a Pandas DataFrame.
247
248        predictions : Pandas DataFrame
249            Returns predictions of all the models in a Pandas DataFrame.
250
251        """
252        R2 = []
253        ADJR2 = []
254        RMSE = []
255        # WIN = []
256        names = []
257        TIME = []
258        predictions = {}
259
260        if self.custom_metric:
261            CUSTOM_METRIC = []
262
263        if isinstance(X_train, np.ndarray):
264            X_train = pd.DataFrame(X_train)
265            X_test = pd.DataFrame(X_test)
266
267        numeric_features = X_train.select_dtypes(include=[np.number]).columns
268        categorical_features = X_train.select_dtypes(include=["object"]).columns
269
270        categorical_low, categorical_high = get_card_split(
271            X_train, categorical_features
272        )
273
274        if self.preprocess is True:
275            preprocessor = ColumnTransformer(
276                transformers=[
277                    ("numeric", numeric_transformer, numeric_features),
278                    (
279                        "categorical_low",
280                        categorical_transformer_low,
281                        categorical_low,
282                    ),
283                    (
284                        "categorical_high",
285                        categorical_transformer_high,
286                        categorical_high,
287                    ),
288                ]
289            )
290
291        # base models
292        try:
293            baseline_names = ["RandomForestRegressor", "XGBRegressor"]
294            baseline_models = [RandomForestRegressor(), xgb.XGBRegressor()]
295        except Exception as exception:
296            baseline_names = ["RandomForestRegressor"]
297            baseline_models = [RandomForestRegressor()]
298
299        for name, model in zip(baseline_names, baseline_models):
300            start = time.time()
301            try:
302                model.fit(X_train, y_train)
303                self.models_[name] = model
304                y_pred = model.predict(X_test)
305                r_squared = r2_score(y_test, y_pred)
306                adj_rsquared = adjusted_rsquared(
307                    r_squared, X_test.shape[0], X_test.shape[1]
308                )
309                rmse = np.sqrt(np.mean((y_test - y_pred) ** 2))
310
311                names.append(name)
312                R2.append(r_squared)
313                ADJR2.append(adj_rsquared)
314                RMSE.append(rmse)
315                TIME.append(time.time() - start)
316
317                if self.custom_metric:
318                    custom_metric = self.custom_metric(y_test, y_pred)
319                    CUSTOM_METRIC.append(custom_metric)
320
321                if self.verbose > 0:
322                    scores_verbose = {
323                        "Model": name,
324                        "R-Squared": r_squared,
325                        "Adjusted R-Squared": adj_rsquared,
326                        "RMSE": rmse,
327                        "Time taken": time.time() - start,
328                    }
329
330                    if self.custom_metric:
331                        scores_verbose[
332                            self.custom_metric.__name__
333                        ] = custom_metric
334
335                    print(scores_verbose)
336                if self.predictions:
337                    predictions[name] = y_pred
338            except Exception as exception:
339                if self.ignore_warnings is False:
340                    print(name + " model failed to execute")
341                    print(exception)
342
343        if self.estimators == "all":
344            self.regressors = DEEPREGRESSORS
345        else:
346            self.regressors = [
347                ("DeepCustomRegressor(" + est[0] + ")", est[1])
348                for est in all_estimators()
349                if (
350                    issubclass(est[1], RegressorMixin)
351                    and (est[0] in self.estimators)
352                )
353            ]
354
355        if self.preprocess is True:
356            for name, model in tqdm(self.regressors):  # do parallel exec
357                start = time.time()
358                try:
359                    if "random_state" in model().get_params().keys():
360                        layer_regr = CustomRegressor(
361                            obj=model(random_state=self.random_state),
362                            n_hidden_features=self.n_hidden_features,
363                            activation_name=self.activation_name,
364                            a=self.a,
365                            nodes_sim=self.nodes_sim,
366                            bias=self.bias,
367                            dropout=self.dropout,
368                            direct_link=self.direct_link,
369                            n_clusters=self.n_clusters,
370                            cluster_encode=self.cluster_encode,
371                            type_clust=self.type_clust,
372                            type_scaling=self.type_scaling,
373                            col_sample=self.col_sample,
374                            row_sample=self.row_sample,
375                            seed=self.seed,
376                            backend=self.backend,
377                        )
378                    else:
379                        layer_regr = CustomRegressor(
380                            obj=model(),
381                            n_hidden_features=self.n_hidden_features,
382                            activation_name=self.activation_name,
383                            a=self.a,
384                            nodes_sim=self.nodes_sim,
385                            bias=self.bias,
386                            dropout=self.dropout,
387                            direct_link=self.direct_link,
388                            n_clusters=self.n_clusters,
389                            cluster_encode=self.cluster_encode,
390                            type_clust=self.type_clust,
391                            type_scaling=self.type_scaling,
392                            col_sample=self.col_sample,
393                            row_sample=self.row_sample,
394                            seed=self.seed,
395                            backend=self.backend,
396                        )
397
398                    for _ in range(self.n_layers):
399                        layer_regr = deepcopy(
400                            CustomRegressor(
401                                obj=layer_regr,
402                                n_hidden_features=self.n_hidden_features,
403                                activation_name=self.activation_name,
404                                a=self.a,
405                                nodes_sim=self.nodes_sim,
406                                bias=self.bias,
407                                dropout=self.dropout,
408                                direct_link=self.direct_link,
409                                n_clusters=self.n_clusters,
410                                cluster_encode=self.cluster_encode,
411                                type_clust=self.type_clust,
412                                type_scaling=self.type_scaling,
413                                col_sample=self.col_sample,
414                                row_sample=self.row_sample,
415                                seed=self.seed,
416                                backend=self.backend,
417                            )
418                        )
419
420                    layer_regr.fit(X_train, y_train)
421
422                    pipe = Pipeline(
423                        steps=[
424                            ("preprocessor", preprocessor),
425                            ("regressor", layer_regr),
426                        ]
427                    )
428
429                    pipe.fit(X_train, y_train)
430
431                    self.models_[name] = pipe
432                    y_pred = pipe.predict(X_test)
433                    r_squared = r2_score(y_test, y_pred)
434                    adj_rsquared = adjusted_rsquared(
435                        r_squared, X_test.shape[0], X_test.shape[1]
436                    )
437                    rmse = np.sqrt(np.mean((y_test - y_pred) ** 2))
438
439                    names.append(name)
440                    R2.append(r_squared)
441                    ADJR2.append(adj_rsquared)
442                    RMSE.append(rmse)
443                    TIME.append(time.time() - start)
444
445                    if self.custom_metric:
446                        custom_metric = self.custom_metric(y_test, y_pred)
447                        CUSTOM_METRIC.append(custom_metric)
448
449                    if self.verbose > 0:
450                        scores_verbose = {
451                            "Model": name,
452                            "R-Squared": r_squared,
453                            "Adjusted R-Squared": adj_rsquared,
454                            "RMSE": rmse,
455                            "Time taken": time.time() - start,
456                        }
457
458                        if self.custom_metric:
459                            scores_verbose[
460                                self.custom_metric.__name__
461                            ] = custom_metric
462
463                        print(scores_verbose)
464                    if self.predictions:
465                        predictions[name] = y_pred
466                except Exception as exception:
467                    if self.ignore_warnings is False:
468                        print(name + " model failed to execute")
469                        print(exception)
470
471        else:  # no preprocessing
472            for name, model in tqdm(self.regressors):  # do parallel exec
473                start = time.time()
474                try:
475                    if "random_state" in model().get_params().keys():
476                        layer_regr = CustomRegressor(
477                            obj=model(random_state=self.random_state),
478                            n_hidden_features=self.n_hidden_features,
479                            activation_name=self.activation_name,
480                            a=self.a,
481                            nodes_sim=self.nodes_sim,
482                            bias=self.bias,
483                            dropout=self.dropout,
484                            direct_link=self.direct_link,
485                            n_clusters=self.n_clusters,
486                            cluster_encode=self.cluster_encode,
487                            type_clust=self.type_clust,
488                            type_scaling=self.type_scaling,
489                            col_sample=self.col_sample,
490                            row_sample=self.row_sample,
491                            seed=self.seed,
492                            backend=self.backend,
493                        )
494                    else:
495                        layer_regr = CustomRegressor(
496                            obj=model(),
497                            n_hidden_features=self.n_hidden_features,
498                            activation_name=self.activation_name,
499                            a=self.a,
500                            nodes_sim=self.nodes_sim,
501                            bias=self.bias,
502                            dropout=self.dropout,
503                            direct_link=self.direct_link,
504                            n_clusters=self.n_clusters,
505                            cluster_encode=self.cluster_encode,
506                            type_clust=self.type_clust,
507                            type_scaling=self.type_scaling,
508                            col_sample=self.col_sample,
509                            row_sample=self.row_sample,
510                            seed=self.seed,
511                            backend=self.backend,
512                        )
513
514                    layer_regr.fit(X_train, y_train)
515
516                    for _ in range(self.n_layers):
517                        layer_regr = deepcopy(
518                            CustomRegressor(
519                                obj=layer_regr,
520                                n_hidden_features=self.n_hidden_features,
521                                activation_name=self.activation_name,
522                                a=self.a,
523                                nodes_sim=self.nodes_sim,
524                                bias=self.bias,
525                                dropout=self.dropout,
526                                direct_link=self.direct_link,
527                                n_clusters=self.n_clusters,
528                                cluster_encode=self.cluster_encode,
529                                type_clust=self.type_clust,
530                                type_scaling=self.type_scaling,
531                                col_sample=self.col_sample,
532                                row_sample=self.row_sample,
533                                seed=self.seed,
534                                backend=self.backend,
535                            )
536                        )
537
538                        # layer_regr.fit(X_train, y_train)
539
540                    layer_regr.fit(X_train, y_train)
541
542                    self.models_[name] = layer_regr
543                    y_pred = layer_regr.predict(X_test)
544
545                    r_squared = r2_score(y_test, y_pred)
546                    adj_rsquared = adjusted_rsquared(
547                        r_squared, X_test.shape[0], X_test.shape[1]
548                    )
549                    rmse = np.sqrt(np.mean((y_test - y_pred) ** 2))
550
551                    names.append(name)
552                    R2.append(r_squared)
553                    ADJR2.append(adj_rsquared)
554                    RMSE.append(rmse)
555                    TIME.append(time.time() - start)
556
557                    if self.custom_metric:
558                        custom_metric = self.custom_metric(y_test, y_pred)
559                        CUSTOM_METRIC.append(custom_metric)
560
561                    if self.verbose > 0:
562                        scores_verbose = {
563                            "Model": name,
564                            "R-Squared": r_squared,
565                            "Adjusted R-Squared": adj_rsquared,
566                            "RMSE": rmse,
567                            "Time taken": time.time() - start,
568                        }
569
570                        if self.custom_metric:
571                            scores_verbose[
572                                self.custom_metric.__name__
573                            ] = custom_metric
574
575                        print(scores_verbose)
576                    if self.predictions:
577                        predictions[name] = y_pred
578                except Exception as exception:
579                    if self.ignore_warnings is False:
580                        print(name + " model failed to execute")
581                        print(exception)
582
583        scores = {
584            "Model": names,
585            "Adjusted R-Squared": ADJR2,
586            "R-Squared": R2,
587            "RMSE": RMSE,
588            "Time Taken": TIME,
589        }
590
591        if self.custom_metric:
592            scores["Custom metric"] = CUSTOM_METRIC
593
594        scores = pd.DataFrame(scores)
595        scores = scores.sort_values(by=self.sort_by, ascending=True).set_index(
596            "Model"
597        )
598
599        self.best_model_ = self.models_[scores.index[0]]
600
601        if self.predictions is True:
602            return scores, predictions
603
604        return scores

Fit Regression algorithms to X_train and y_train, predict and score on X_test, y_test.

Parameters:

X_train : array-like,
    Training vectors, where rows is the number of samples
    and columns is the number of features.

X_test : array-like,
    Testing vectors, where rows is the number of samples
    and columns is the number of features.

y_train : array-like,
    Training vectors, where rows is the number of samples
    and columns is the number of features.

y_test : array-like,
    Testing vectors, where rows is the number of samples
    and columns is the number of features.

Returns:

scores: Pandas DataFrame Returns metrics of all the models in a Pandas DataFrame.

predictions : Pandas DataFrame Returns predictions of all the models in a Pandas DataFrame.

def provide_models(self, X_train, X_test, y_train, y_test):
618    def provide_models(self, X_train, X_test, y_train, y_test):
619        """
620        This function returns all the model objects trained in fit function.
621        If fit is not called already, then we call fit and then return the models.
622
623        Parameters:
624
625            X_train : array-like,
626                Training vectors, where rows is the number of samples
627                and columns is the number of features.
628
629            X_test : array-like,
630                Testing vectors, where rows is the number of samples
631                and columns is the number of features.
632
633            y_train : array-like,
634                Training vectors, where rows is the number of samples
635                and columns is the number of features.
636
637            y_test : array-like,
638                Testing vectors, where rows is the number of samples
639                and columns is the number of features.
640
641        Returns:
642
643            models: dict-object,
644                Returns a dictionary with each model pipeline as value
645                with key as name of models.
646
647        """
648        if len(self.models_.keys()) == 0:
649            self.fit(X_train, X_test, y_train, y_test)
650
651        return self.models_

This function returns all the model objects trained in fit function. If fit is not called already, then we call fit and then return the models.

Parameters:

X_train : array-like,
    Training vectors, where rows is the number of samples
    and columns is the number of features.

X_test : array-like,
    Testing vectors, where rows is the number of samples
    and columns is the number of features.

y_train : array-like,
    Training vectors, where rows is the number of samples
    and columns is the number of features.

y_test : array-like,
    Testing vectors, where rows is the number of samples
    and columns is the number of features.

Returns:

models: dict-object,
    Returns a dictionary with each model pipeline as value
    with key as name of models.
class LazyMTS(nnetsauce.LazyDeepMTS):
 998class LazyMTS(LazyDeepMTS):
 999    """
1000    Fitting -- almost -- all the regression algorithms to multivariate time series
1001    and returning their scores (no layers).
1002
1003    Parameters:
1004
1005        verbose: int, optional (default=0)
1006            Any positive number for verbosity.
1007
1008        ignore_warnings: bool, optional (default=True)
1009            When set to True, the warning related to algorigms that are not
1010            able to run are ignored.
1011
1012        custom_metric: function, optional (default=None)
1013            When function is provided, models are evaluated based on the custom
1014              evaluation metric provided.
1015
1016        predictions: bool, optional (default=False)
1017            When set to True, the predictions of all the models models are returned as dataframe.
1018
1019        sort_by: string, optional (default='RMSE')
1020            Sort models by a metric. Available options are 'RMSE', 'MAE', 'MPL', 'MPE', 'MAPE',
1021            'R-Squared', 'Adjusted R-Squared' or a custom metric identified by its name and
1022            provided by custom_metric.
1023
1024        random_state: int, optional (default=42)
1025            Reproducibiility seed.
1026
1027        estimators: list, optional (default='all')
1028            list of Estimators (regression algorithms) names or just 'all' (default='all')
1029
1030        preprocess: bool, preprocessing is done when set to True
1031
1032        h: int, optional (default=None)
1033            Number of steps ahead to predict (when used, must be > 0 and < X_test.shape[0]).
1034
1035        All the other parameters are the same as MTS's.
1036
1037    Attributes:
1038
1039        models_: dict-object
1040            Returns a dictionary with each model pipeline as value
1041            with key as name of models.
1042
1043        best_model_: object
1044            Returns the best model pipeline based on the sort_by metric.
1045
1046    Examples:
1047
1048        See https://thierrymoudiki.github.io/blog/2023/10/29/python/quasirandomizednn/MTS-LazyPredict
1049
1050    """
1051
1052    def __init__(
1053        self,
1054        verbose=0,
1055        ignore_warnings=True,
1056        custom_metric=None,
1057        predictions=False,
1058        sort_by=None,  # leave it as is
1059        random_state=42,
1060        estimators="all",
1061        preprocess=False,
1062        h=None,
1063        # MTS attributes
1064        obj=None,
1065        n_hidden_features=5,
1066        activation_name="relu",
1067        a=0.01,
1068        nodes_sim="sobol",
1069        bias=True,
1070        dropout=0,
1071        direct_link=True,
1072        n_clusters=2,
1073        cluster_encode=True,
1074        type_clust="kmeans",
1075        type_scaling=("std", "std", "std"),
1076        lags=15,
1077        type_pi="scp2-kde",
1078        block_size=None,
1079        replications=None,
1080        kernel=None,
1081        agg="mean",
1082        seed=123,
1083        backend="cpu",
1084        show_progress=False,
1085    ):
1086        super().__init__(
1087            verbose=verbose,
1088            ignore_warnings=ignore_warnings,
1089            custom_metric=custom_metric,
1090            predictions=predictions,
1091            sort_by=sort_by,
1092            random_state=random_state,
1093            estimators=estimators,
1094            preprocess=preprocess,
1095            n_layers=1,
1096            h=h,
1097            obj=obj,
1098            n_hidden_features=n_hidden_features,
1099            activation_name=activation_name,
1100            a=a,
1101            nodes_sim=nodes_sim,
1102            bias=bias,
1103            dropout=dropout,
1104            direct_link=direct_link,
1105            n_clusters=n_clusters,
1106            cluster_encode=cluster_encode,
1107            type_clust=type_clust,
1108            type_scaling=type_scaling,
1109            lags=lags,
1110            type_pi=type_pi,
1111            block_size=block_size,
1112            replications=replications,
1113            kernel=kernel,
1114            agg=agg,
1115            seed=seed,
1116            backend=backend,
1117            show_progress=show_progress,
1118        )

Fitting -- almost -- all the regression algorithms to multivariate time series and returning their scores (no layers).

Parameters:

verbose: int, optional (default=0)
    Any positive number for verbosity.

ignore_warnings: bool, optional (default=True)
    When set to True, the warning related to algorigms that are not
    able to run are ignored.

custom_metric: function, optional (default=None)
    When function is provided, models are evaluated based on the custom
      evaluation metric provided.

predictions: bool, optional (default=False)
    When set to True, the predictions of all the models models are returned as dataframe.

sort_by: string, optional (default='RMSE')
    Sort models by a metric. Available options are 'RMSE', 'MAE', 'MPL', 'MPE', 'MAPE',
    'R-Squared', 'Adjusted R-Squared' or a custom metric identified by its name and
    provided by custom_metric.

random_state: int, optional (default=42)
    Reproducibiility seed.

estimators: list, optional (default='all')
    list of Estimators (regression algorithms) names or just 'all' (default='all')

preprocess: bool, preprocessing is done when set to True

h: int, optional (default=None)
    Number of steps ahead to predict (when used, must be > 0 and < X_test.shape[0]).

All the other parameters are the same as MTS's.

Attributes:

models_: dict-object
    Returns a dictionary with each model pipeline as value
    with key as name of models.

best_model_: object
    Returns the best model pipeline based on the sort_by metric.

Examples:

See https://thierrymoudiki.github.io/blog/2023/10/29/python/quasirandomizednn/MTS-LazyPredict
class LazyDeepMTS(nnetsauce.MTS):
104class LazyDeepMTS(MTS):
105    """
106
107    Fitting -- almost -- all the regression algorithms with layers of
108    nnetsauce's CustomRegressor to multivariate time series
109    and returning their scores.
110
111    Parameters:
112
113        verbose: int, optional (default=0)
114            Any positive number for verbosity.
115
116        ignore_warnings: bool, optional (default=True)
117            When set to True, the warning related to algorigms that are not
118            able to run are ignored.
119
120        custom_metric: function, optional (default=None)
121            When function is provided, models are evaluated based on the custom
122              evaluation metric provided.
123
124        predictions: bool, optional (default=False)
125            When set to True, the predictions of all the models models are returned as dataframe.
126
127        sort_by: string, optional (default='RMSE')
128            Sort models by a metric. Available options are 'RMSE', 'MAE', 'MPL', 'MPE', 'MAPE',
129            'R-Squared', 'Adjusted R-Squared' or a custom metric identified by its name and
130            provided by custom_metric.
131
132        random_state: int, optional (default=42)
133            Reproducibiility seed.
134
135        estimators: list, optional (default='all')
136            list of Estimators (regression algorithms) names or just 'all' (default='all')
137
138        preprocess: bool, preprocessing is done when set to True
139
140        n_layers: int, optional (default=1)
141            Number of layers in the network. When set to 1, the model is equivalent to a MTS.
142
143        h: int, optional (default=None)
144            Number of steps ahead to predict (when used, must be > 0 and < X_test.shape[0]).
145
146        All the other parameters are the same as MTS's.
147
148    Attributes:
149
150        models_: dict-object
151            Returns a dictionary with each model pipeline as value
152            with key as name of models.
153
154        best_model_: object
155            Returns the best model pipeline based on the sort_by metric.
156
157    Examples:
158
159        See https://thierrymoudiki.github.io/blog/2023/10/29/python/quasirandomizednn/MTS-LazyPredict
160
161    """
162
163    def __init__(
164        self,
165        verbose=0,
166        ignore_warnings=True,
167        custom_metric=None,
168        predictions=False,
169        sort_by=None,  # leave it as is
170        random_state=42,
171        estimators="all",
172        preprocess=False,
173        n_layers=1,
174        h=None,
175        # MTS attributes
176        obj=None,
177        n_hidden_features=5,
178        activation_name="relu",
179        a=0.01,
180        nodes_sim="sobol",
181        bias=True,
182        dropout=0,
183        direct_link=True,
184        n_clusters=2,
185        cluster_encode=True,
186        type_clust="kmeans",
187        type_scaling=("std", "std", "std"),
188        lags=15,
189        type_pi="scp2-kde",
190        block_size=None,
191        replications=None,
192        kernel=None,
193        agg="mean",
194        seed=123,
195        backend="cpu",
196        show_progress=False,
197    ):
198        self.verbose = verbose
199        self.ignore_warnings = ignore_warnings
200        self.custom_metric = custom_metric
201        self.predictions = predictions
202        self.sort_by = sort_by
203        self.models_ = {}
204        self.best_model_ = None
205        self.random_state = random_state
206        self.estimators = estimators
207        self.preprocess = preprocess
208        self.n_layers = n_layers
209        self.h = h
210        super().__init__(
211            obj=obj,
212            n_hidden_features=n_hidden_features,
213            activation_name=activation_name,
214            a=a,
215            nodes_sim=nodes_sim,
216            bias=bias,
217            dropout=dropout,
218            direct_link=direct_link,
219            n_clusters=n_clusters,
220            cluster_encode=cluster_encode,
221            type_clust=type_clust,
222            type_scaling=type_scaling,
223            seed=seed,
224            backend=backend,
225            lags=lags,
226            type_pi=type_pi,
227            block_size=block_size,
228            replications=replications,
229            kernel=kernel,
230            agg=agg,
231            verbose=verbose,
232            show_progress=show_progress,
233        )
234        if self.replications is not None or self.type_pi == "gaussian":
235            if self.sort_by is None:
236                self.sort_by = "WINKLERSCORE"
237        else:
238            if self.sort_by is None:
239                self.sort_by = "RMSE"
240
241    def fit(self, X_train, X_test, xreg=None, per_series=False, **kwargs):
242        """Fit Regression algorithms to X_train, predict and score on X_test.
243
244        Parameters:
245
246            X_train: array-like or data frame,
247                Training vectors, where rows is the number of samples
248                and columns is the number of features.
249
250            X_test: array-like or data frame,
251                Testing vectors, where rows is the number of samples
252                and columns is the number of features.
253
254            xreg: array-like, optional (default=None)
255                Additional (external) regressors to be passed to self.obj
256                xreg must be in 'increasing' order (most recent observations last)
257
258            per_series: bool, optional (default=False)
259                When set to True, the metrics are computed series by series.
260
261            **kwargs: dict, optional (default=None)
262                Additional parameters to be passed to `fit` method of `obj`.
263
264        Returns:
265
266            scores: Pandas DataFrame
267                Returns metrics of all the models in a Pandas DataFrame.
268
269            predictions: Pandas DataFrame
270                Returns predictions of all the models in a Pandas DataFrame.
271
272        """
273        R2 = []
274        ADJR2 = []
275        ME = []
276        MPL = []
277        RMSE = []
278        MAE = []
279        MPE = []
280        MAPE = []
281        WINKLERSCORE = []
282        COVERAGE = []
283
284        # WIN = []
285        names = []
286        TIME = []
287        predictions = {}
288
289        if self.custom_metric is not None:
290            CUSTOM_METRIC = []
291
292        if self.h is None:
293            assert X_test is not None, "If h is None, X_test must be provided."
294
295        if isinstance(X_train, np.ndarray):
296            X_train = pd.DataFrame(X_train)
297            X_test = pd.DataFrame(X_test)
298
299        self.series_names = X_train.columns.tolist()
300
301        X_train = convert_df_to_numeric(X_train)
302        X_test = convert_df_to_numeric(X_test)
303
304        numeric_features = X_train.select_dtypes(include=[np.number]).columns
305        categorical_features = X_train.select_dtypes(include=["object"]).columns
306
307        categorical_low, categorical_high = get_card_split(
308            X_train, categorical_features
309        )
310
311        if self.preprocess:
312            preprocessor = ColumnTransformer(
313                transformers=[
314                    ("numeric", numeric_transformer, numeric_features),
315                    (
316                        "categorical_low",
317                        categorical_transformer_low,
318                        categorical_low,
319                    ),
320                    (
321                        "categorical_high",
322                        categorical_transformer_high,
323                        categorical_high,
324                    ),
325                ]
326            )
327
328        # baselines (Classical MTS) ----
329        for i, name in enumerate(["ARIMA", "ETS", "Theta", "VAR", "VECM"]):
330            try:
331                start = time.time()
332                regr = ClassicalMTS(model=name)
333                regr.fit(X_train, **kwargs)
334                self.models_[name] = regr
335                if self.h is None:
336                    X_pred = regr.predict(h=X_test.shape[0], **kwargs)
337                else:
338                    assert self.h > 0, "h must be > 0"
339                    X_pred = regr.predict(h=self.h, **kwargs)
340                    try:
341                        X_test = X_test[0: self.h, :]
342                    except Exception as e:
343                        X_test = X_test.iloc[0: self.h, :]
344
345                rmse = mean_errors(
346                    actual=X_test,
347                    pred=X_pred,
348                    scoring="root_mean_squared_error",
349                    per_series=per_series,
350                )
351                mae = mean_errors(
352                    actual=X_test,
353                    pred=X_pred,
354                    scoring="mean_absolute_error",
355                    per_series=per_series,
356                )
357                mpl = mean_errors(
358                    actual=X_test,
359                    pred=X_pred,
360                    scoring="mean_pinball_loss",
361                    per_series=per_series,
362                )
363            except Exception:
364                continue
365
366            names.append(name)
367            RMSE.append(rmse)
368            MAE.append(mae)
369            MPL.append(mpl)
370
371            if self.custom_metric is not None:
372                try:
373                    if self.h is None:
374                        custom_metric = self.custom_metric(X_test, X_pred)
375                    else:
376                        custom_metric = self.custom_metric(X_test_h, X_pred)
377                    CUSTOM_METRIC.append(custom_metric)
378                except Exception as e:
379                    custom_metric = np.iinfo(np.float32).max
380                    CUSTOM_METRIC.append(np.iinfo(np.float32).max)
381
382            if (self.replications is not None) or (self.type_pi == "gaussian"):
383                if per_series == False:
384                    winklerscore = winkler_score(
385                        obj=X_pred, actual=X_test, level=95
386                    )
387                    coveragecalc = coverage(X_pred, X_test, level=95)
388                else:
389                    winklerscore = winkler_score(
390                        obj=X_pred, actual=X_test, level=95, per_series=True
391                    )
392                    coveragecalc = coverage(
393                        X_pred, X_test, level=95, per_series=True
394                    )
395                WINKLERSCORE.append(winklerscore)
396                COVERAGE.append(coveragecalc)
397            TIME.append(time.time() - start)
398
399        if self.estimators == "all":
400            if self.n_layers <= 1:
401                self.regressors = REGRESSORSMTS
402            else:
403                self.regressors = DEEPREGRESSORSMTS
404        else:
405            if self.n_layers <= 1:
406                self.regressors = [
407                    ("MTS(" + est[0] + ")", est[1])
408                    for est in all_estimators()
409                    if (
410                        issubclass(est[1], RegressorMixin)
411                        and (est[0] in self.estimators)
412                    )
413                ]
414            else:  # self.n_layers > 1
415                self.regressors = [
416                    ("DeepMTS(" + est[0] + ")", est[1])
417                    for est in all_estimators()
418                    if (
419                        issubclass(est[1], RegressorMixin)
420                        and (est[0] in self.estimators)
421                    )
422                ]
423
424        if self.preprocess is True:
425            for name, model in tqdm(self.regressors):  # do parallel exec
426                start = time.time()
427                try:
428                    if "random_state" in model().get_params().keys():
429                        pipe = Pipeline(
430                            steps=[
431                                ("preprocessor", preprocessor),
432                                (
433                                    "regressor",
434                                    DeepMTS(
435                                        obj=model(
436                                            random_state=self.random_state,
437                                            **kwargs,
438                                        ),
439                                        n_layers=self.n_layers,
440                                        n_hidden_features=self.n_hidden_features,
441                                        activation_name=self.activation_name,
442                                        a=self.a,
443                                        nodes_sim=self.nodes_sim,
444                                        bias=self.bias,
445                                        dropout=self.dropout,
446                                        direct_link=self.direct_link,
447                                        n_clusters=self.n_clusters,
448                                        cluster_encode=self.cluster_encode,
449                                        type_clust=self.type_clust,
450                                        type_scaling=self.type_scaling,
451                                        lags=self.lags,
452                                        type_pi=self.type_pi,
453                                        block_size=self.block_size,
454                                        replications=self.replications,
455                                        kernel=self.kernel,
456                                        agg=self.agg,
457                                        seed=self.seed,
458                                        backend=self.backend,
459                                        show_progress=self.show_progress,
460                                    ),
461                                ),
462                            ]
463                        )
464                    else:  # "random_state" in model().get_params().keys()
465                        pipe = Pipeline(
466                            steps=[
467                                ("preprocessor", preprocessor),
468                                (
469                                    "regressor",
470                                    DeepMTS(
471                                        obj=model(**kwargs),
472                                        n_layers=self.n_layers,
473                                        n_hidden_features=self.n_hidden_features,
474                                        activation_name=self.activation_name,
475                                        a=self.a,
476                                        nodes_sim=self.nodes_sim,
477                                        bias=self.bias,
478                                        dropout=self.dropout,
479                                        direct_link=self.direct_link,
480                                        n_clusters=self.n_clusters,
481                                        cluster_encode=self.cluster_encode,
482                                        type_clust=self.type_clust,
483                                        type_scaling=self.type_scaling,
484                                        lags=self.lags,
485                                        type_pi=self.type_pi,
486                                        block_size=self.block_size,
487                                        replications=self.replications,
488                                        kernel=self.kernel,
489                                        agg=self.agg,
490                                        seed=self.seed,
491                                        backend=self.backend,
492                                        show_progress=self.show_progress,
493                                    ),
494                                ),
495                            ]
496                        )
497
498                    pipe.fit(X_train, **kwargs)
499                    # pipe.fit(X_train, xreg=xreg)
500
501                    self.models_[name] = pipe
502
503                    if self.h is None:
504                        X_pred = pipe["regressor"].predict(h=self.h, **kwargs)
505                    else:
506                        assert self.h > 0, "h must be > 0"
507                        X_pred = pipe["regressor"].predict(h=self.h, **kwargs)
508
509                    if (self.replications is not None) or (
510                        self.type_pi == "gaussian"
511                    ):
512                        rmse = mean_errors(
513                            actual=X_test,
514                            pred=X_pred,
515                            scoring="root_mean_squared_error",
516                            per_series=per_series,
517                        )
518                        mae = mean_errors(
519                            actual=X_test,
520                            pred=X_pred,
521                            scoring="mean_absolute_error",
522                            per_series=per_series,
523                        )
524                        mpl = mean_errors(
525                            actual=X_test,
526                            pred=X_pred,
527                            scoring="mean_pinball_loss",
528                            per_series=per_series,
529                        )
530                        winklerscore = winkler_score(
531                            obj=X_pred,
532                            actual=X_test,
533                            level=95,
534                            per_series=per_series,
535                        )
536                        coveragecalc = coverage(
537                            X_pred, X_test, level=95, per_series=per_series
538                        )
539                    else:
540                        rmse = mean_errors(
541                            actual=X_test,
542                            pred=X_pred,
543                            scoring="root_mean_squared_error",
544                            per_series=per_series,
545                        )
546                        mae = mean_errors(
547                            actual=X_test,
548                            pred=X_pred,
549                            scoring="mean_absolute_error",
550                            per_series=per_series,
551                        )
552                        mpl = mean_errors(
553                            actual=X_test,
554                            pred=X_pred,
555                            scoring="mean_pinball_loss",
556                            per_series=per_series,
557                        )
558
559                    names.append(name)
560                    RMSE.append(rmse)
561                    MAE.append(mae)
562                    MPL.append(mpl)
563
564                    if (self.replications is not None) or (
565                        self.type_pi == "gaussian"
566                    ):
567                        WINKLERSCORE.append(winklerscore)
568                        COVERAGE.append(coveragecalc)
569                    TIME.append(time.time() - start)
570
571                    if self.custom_metric is not None:
572                        try:
573                            custom_metric = self.custom_metric(X_test, X_pred)
574                            CUSTOM_METRIC.append(custom_metric)
575                        except Exception as e:
576                            custom_metric = np.iinfo(np.float32).max
577                            CUSTOM_METRIC.append(custom_metric)
578
579                    if self.verbose > 0:
580                        if (self.replications is not None) or (
581                            self.type_pi == "gaussian"
582                        ):
583                            scores_verbose = {
584                                "Model": name,
585                                "RMSE": rmse,
586                                "MAE": mae,
587                                "MPL": mpl,
588                                "WINKLERSCORE": winklerscore,
589                                "COVERAGE": coveragecalc,
590                                "Time taken": time.time() - start,
591                            }
592                        else:
593                            scores_verbose = {
594                                "Model": name,
595                                "RMSE": rmse,
596                                "MAE": mae,
597                                "MPL": mpl,
598                                "Time taken": time.time() - start,
599                            }
600
601                        if self.custom_metric is not None:
602                            scores_verbose["Custom metric"] = custom_metric
603
604                    if self.predictions:
605                        predictions[name] = X_pred
606                except Exception as exception:
607                    if self.ignore_warnings is False:
608                        print(name + " model failed to execute")
609                        print(exception)
610
611        else:  # no preprocessing
612            for name, model in tqdm(self.regressors):  # do parallel exec
613                start = time.time()
614                try:
615                    if "random_state" in model().get_params().keys():
616                        pipe = DeepMTS(
617                            obj=model(random_state=self.random_state, **kwargs),
618                            n_layers=self.n_layers,
619                            n_hidden_features=self.n_hidden_features,
620                            activation_name=self.activation_name,
621                            a=self.a,
622                            nodes_sim=self.nodes_sim,
623                            bias=self.bias,
624                            dropout=self.dropout,
625                            direct_link=self.direct_link,
626                            n_clusters=self.n_clusters,
627                            cluster_encode=self.cluster_encode,
628                            type_clust=self.type_clust,
629                            type_scaling=self.type_scaling,
630                            lags=self.lags,
631                            type_pi=self.type_pi,
632                            block_size=self.block_size,
633                            replications=self.replications,
634                            kernel=self.kernel,
635                            agg=self.agg,
636                            seed=self.seed,
637                            backend=self.backend,
638                            show_progress=self.show_progress,
639                        )
640                    else:
641                        pipe = DeepMTS(
642                            obj=model(**kwargs),
643                            n_layers=self.n_layers,
644                            n_hidden_features=self.n_hidden_features,
645                            activation_name=self.activation_name,
646                            a=self.a,
647                            nodes_sim=self.nodes_sim,
648                            bias=self.bias,
649                            dropout=self.dropout,
650                            direct_link=self.direct_link,
651                            n_clusters=self.n_clusters,
652                            cluster_encode=self.cluster_encode,
653                            type_clust=self.type_clust,
654                            type_scaling=self.type_scaling,
655                            lags=self.lags,
656                            type_pi=self.type_pi,
657                            block_size=self.block_size,
658                            replications=self.replications,
659                            kernel=self.kernel,
660                            agg=self.agg,
661                            seed=self.seed,
662                            backend=self.backend,
663                            show_progress=self.show_progress,
664                        )
665
666                    pipe.fit(X_train, xreg, **kwargs)
667                    # pipe.fit(X_train, xreg=xreg) # DO xreg like in `ahead`
668
669                    self.models_[name] = pipe
670
671                    if self.preprocess is True:
672                        if self.h is None:
673                            X_pred = pipe["regressor"].predict(
674                                h=X_test.shape[0], **kwargs
675                            )
676                        else:
677                            assert (
678                                self.h > 0 and self.h <= X_test.shape[0]
679                            ), "h must be > 0 and < X_test.shape[0]"
680                            X_pred = pipe["regressor"].predict(
681                                h=self.h, **kwargs
682                            )
683
684                    else:
685                        if self.h is None:
686                            X_pred = pipe.predict(
687                                h=X_test.shape[0],
688                                **kwargs,
689                                # X_pred = pipe.predict(h=X_test.shape[0], new_xreg=new_xreg) ## DO xreg like in `ahead`
690                            )
691                        else:
692                            assert (
693                                self.h > 0 and self.h <= X_test.shape[0]
694                            ), "h must be > 0 and < X_test.shape[0]"
695                            X_pred = pipe.predict(h=self.h, **kwargs)
696
697                    if self.h is None:
698                        if (self.replications is not None) or (
699                            self.type_pi == "gaussian"
700                        ):
701                            rmse = mean_errors(
702                                actual=X_test,
703                                pred=X_pred.mean,
704                                scoring="root_mean_squared_error",
705                                per_series=per_series,
706                            )
707                            mae = mean_errors(
708                                actual=X_test,
709                                pred=X_pred.mean,
710                                scoring="mean_absolute_error",
711                                per_series=per_series,
712                            )
713                            mpl = mean_errors(
714                                actual=X_test,
715                                pred=X_pred.mean,
716                                scoring="mean_pinball_loss",
717                                per_series=per_series,
718                            )
719                            winklerscore = winkler_score(
720                                obj=X_pred,
721                                actual=X_test,
722                                level=95,
723                                per_series=per_series,
724                            )
725                            coveragecalc = coverage(
726                                X_pred, X_test, level=95, per_series=per_series
727                            )
728                        else:  # no prediction interval
729                            rmse = mean_errors(
730                                actual=X_test,
731                                pred=X_pred,
732                                scoring="root_mean_squared_error",
733                                per_series=per_series,
734                            )
735                            mae = mean_errors(
736                                actual=X_test,
737                                pred=X_pred,
738                                scoring="mean_absolute_error",
739                                per_series=per_series,
740                            )
741                            mpl = mean_errors(
742                                actual=X_test,
743                                pred=X_pred,
744                                scoring="mean_pinball_loss",
745                                per_series=per_series,
746                            )
747                    else:  # self.h is not None
748                        if (self.replications is not None) or (
749                            self.type_pi == "gaussian"
750                        ):
751                            if isinstance(X_test, pd.DataFrame):
752                                X_test_h = X_test.iloc[0: self.h, :]
753                                rmse = mean_errors(
754                                    actual=X_test_h,
755                                    pred=X_pred,
756                                    scoring="root_mean_squared_error",
757                                    per_series=per_series,
758                                )
759                                mae = mean_errors(
760                                    actual=X_test_h,
761                                    pred=X_pred,
762                                    scoring="mean_absolute_error",
763                                    per_series=per_series,
764                                )
765                                mpl = mean_errors(
766                                    actual=X_test_h,
767                                    pred=X_pred,
768                                    scoring="mean_pinball_loss",
769                                    per_series=per_series,
770                                )
771                                winklerscore = winkler_score(
772                                    obj=X_pred,
773                                    actual=X_test_h,
774                                    level=95,
775                                    per_series=per_series,
776                                )
777                                coveragecalc = coverage(
778                                    X_pred,
779                                    X_test_h,
780                                    level=95,
781                                    per_series=per_series,
782                                )
783                            else:
784                                X_test_h = X_test[0: self.h, :]
785                                rmse = mean_errors(
786                                    actual=X_test_h,
787                                    pred=X_pred,
788                                    scoring="root_mean_squared_error",
789                                    per_series=per_series,
790                                )
791                                mae = mean_errors(
792                                    actual=X_test_h,
793                                    pred=X_pred,
794                                    scoring="mean_absolute_error",
795                                    per_series=per_series,
796                                )
797                                mpl = mean_errors(
798                                    actual=X_test_h,
799                                    pred=X_pred,
800                                    scoring="mean_pinball_loss",
801                                    per_series=per_series,
802                                )
803                                winklerscore = winkler_score(
804                                    obj=X_pred,
805                                    actual=X_test_h,
806                                    level=95,
807                                    per_series=per_series,
808                                )
809                                coveragecalc = coverage(
810                                    X_pred,
811                                    X_test_h,
812                                    level=95,
813                                    per_series=per_series,
814                                )
815                        else:  # no prediction interval
816                            if isinstance(X_test, pd.DataFrame):
817                                X_test_h = X_test.iloc[0: self.h, :]
818                                rmse = mean_errors(
819                                    actual=X_test_h,
820                                    pred=X_pred,
821                                    scoring="root_mean_squared_error",
822                                    per_series=per_series,
823                                )
824                                mae = mean_errors(
825                                    actual=X_test_h,
826                                    pred=X_pred,
827                                    scoring="mean_absolute_error",
828                                    per_series=per_series,
829                                )
830                                mpl = mean_errors(
831                                    actual=X_test_h,
832                                    pred=X_pred,
833                                    scoring="mean_pinball_loss",
834                                    per_series=per_series,
835                                )
836                            else:
837                                X_test_h = X_test[0: self.h, :]
838                                rmse = mean_errors(
839                                    actual=X_test_h,
840                                    pred=X_pred,
841                                    scoring="root_mean_squared_error",
842                                    per_series=per_series,
843                                )
844                                mae = mean_errors(
845                                    actual=X_test_h,
846                                    pred=X_pred,
847                                    scoring="mean_absolute_error",
848                                    per_series=per_series,
849                                )
850
851                    names.append(name)
852                    RMSE.append(rmse)
853                    MAE.append(mae)
854                    MPL.append(mpl)
855                    if (self.replications is not None) or (
856                        self.type_pi == "gaussian"
857                    ):
858                        WINKLERSCORE.append(winklerscore)
859                        COVERAGE.append(coveragecalc)
860                    TIME.append(time.time() - start)
861
862                    if self.custom_metric is not None:
863                        try:
864                            if self.h is None:
865                                custom_metric = self.custom_metric(
866                                    X_test, X_pred
867                                )
868                            else:
869                                custom_metric = self.custom_metric(
870                                    X_test_h, X_pred
871                                )
872                            CUSTOM_METRIC.append(custom_metric)
873                        except Exception as e:
874                            custom_metric = np.iinfo(np.float32).max
875                            CUSTOM_METRIC.append(np.iinfo(np.float32).max)
876
877                    if self.verbose > 0:
878                        if (self.replications is not None) or (
879                            self.type_pi == "gaussian"
880                        ):
881                            scores_verbose = {
882                                "Model": name,
883                                "RMSE": rmse,
884                                "MAE": mae,
885                                "MPL": mpl,
886                                "WINKLERSCORE": winklerscore,
887                                "COVERAGE": coveragecalc,
888                                "Time taken": time.time() - start,
889                            }
890                        else:
891                            scores_verbose = {
892                                "Model": name,
893                                "RMSE": rmse,
894                                "MAE": mae,
895                                "MPL": mpl,
896                                "Time taken": time.time() - start,
897                            }
898
899                        if self.custom_metric is not None:
900                            scores_verbose["Custom metric"] = custom_metric
901
902                    if self.predictions:
903                        predictions[name] = X_pred
904
905                except Exception as exception:
906                    if self.ignore_warnings is False:
907                        print(name + " model failed to execute")
908                        print(exception)
909
910        if (self.replications is not None) or (self.type_pi == "gaussian"):
911            scores = {
912                "Model": names,
913                "RMSE": RMSE,
914                "MAE": MAE,
915                "MPL": MPL,
916                "WINKLERSCORE": WINKLERSCORE,
917                "COVERAGE": COVERAGE,
918                "Time Taken": TIME,
919            }
920        else:
921            scores = {
922                "Model": names,
923                "RMSE": RMSE,
924                "MAE": MAE,
925                "MPL": MPL,
926                "Time Taken": TIME,
927            }
928
929        if self.custom_metric is not None:
930            scores["Custom metric"] = CUSTOM_METRIC
931
932        if per_series:
933            scores = dict_to_dataframe_series(scores, self.series_names)
934        else:
935            scores = pd.DataFrame(scores)
936
937        try:  # case per_series, can't be sorted
938            scores = scores.sort_values(
939                by=self.sort_by, ascending=True
940            ).set_index("Model")
941
942            self.best_model_ = self.models_[scores.index[0]]
943        except Exception as e:
944            pass
945
946        if self.predictions is True:
947            return scores, predictions
948
949        return scores
950
951    def get_best_model(self):
952        """
953        This function returns the best model pipeline based on the sort_by metric.
954
955        Returns:
956
957            best_model: object,
958                Returns the best model pipeline based on the sort_by metric.
959
960        """
961        return self.best_model_
962
963    def provide_models(self, X_train, X_test):
964        """
965        This function returns all the model objects trained in fit function.
966        If fit is not called already, then we call fit and then return the models.
967
968        Parameters:
969
970            X_train : array-like,
971                Training vectors, where rows is the number of samples
972                and columns is the number of features.
973
974            X_test : array-like,
975                Testing vectors, where rows is the number of samples
976                and columns is the number of features.
977
978        Returns:
979
980            models: dict-object,
981                Returns a dictionary with each model pipeline as value
982                with key as name of models.
983
984        """
985        if self.h is None:
986            if len(self.models_.keys()) == 0:
987                self.fit(X_train, X_test)
988        else:
989            if len(self.models_.keys()) == 0:
990                if isinstance(X_test, pd.DataFrame):
991                    self.fit(X_train, X_test.iloc[0: self.h, :])
992                else:
993                    self.fit(X_train, X_test[0: self.h, :])
994
995        return self.models_

Fitting -- almost -- all the regression algorithms with layers of nnetsauce's CustomRegressor to multivariate time series and returning their scores.

Parameters:

verbose: int, optional (default=0)
    Any positive number for verbosity.

ignore_warnings: bool, optional (default=True)
    When set to True, the warning related to algorigms that are not
    able to run are ignored.

custom_metric: function, optional (default=None)
    When function is provided, models are evaluated based on the custom
      evaluation metric provided.

predictions: bool, optional (default=False)
    When set to True, the predictions of all the models models are returned as dataframe.

sort_by: string, optional (default='RMSE')
    Sort models by a metric. Available options are 'RMSE', 'MAE', 'MPL', 'MPE', 'MAPE',
    'R-Squared', 'Adjusted R-Squared' or a custom metric identified by its name and
    provided by custom_metric.

random_state: int, optional (default=42)
    Reproducibiility seed.

estimators: list, optional (default='all')
    list of Estimators (regression algorithms) names or just 'all' (default='all')

preprocess: bool, preprocessing is done when set to True

n_layers: int, optional (default=1)
    Number of layers in the network. When set to 1, the model is equivalent to a MTS.

h: int, optional (default=None)
    Number of steps ahead to predict (when used, must be > 0 and < X_test.shape[0]).

All the other parameters are the same as MTS's.

Attributes:

models_: dict-object
    Returns a dictionary with each model pipeline as value
    with key as name of models.

best_model_: object
    Returns the best model pipeline based on the sort_by metric.

Examples:

See https://thierrymoudiki.github.io/blog/2023/10/29/python/quasirandomizednn/MTS-LazyPredict
def fit(self, X_train, X_test, xreg=None, per_series=False, **kwargs):
241    def fit(self, X_train, X_test, xreg=None, per_series=False, **kwargs):
242        """Fit Regression algorithms to X_train, predict and score on X_test.
243
244        Parameters:
245
246            X_train: array-like or data frame,
247                Training vectors, where rows is the number of samples
248                and columns is the number of features.
249
250            X_test: array-like or data frame,
251                Testing vectors, where rows is the number of samples
252                and columns is the number of features.
253
254            xreg: array-like, optional (default=None)
255                Additional (external) regressors to be passed to self.obj
256                xreg must be in 'increasing' order (most recent observations last)
257
258            per_series: bool, optional (default=False)
259                When set to True, the metrics are computed series by series.
260
261            **kwargs: dict, optional (default=None)
262                Additional parameters to be passed to `fit` method of `obj`.
263
264        Returns:
265
266            scores: Pandas DataFrame
267                Returns metrics of all the models in a Pandas DataFrame.
268
269            predictions: Pandas DataFrame
270                Returns predictions of all the models in a Pandas DataFrame.
271
272        """
273        R2 = []
274        ADJR2 = []
275        ME = []
276        MPL = []
277        RMSE = []
278        MAE = []
279        MPE = []
280        MAPE = []
281        WINKLERSCORE = []
282        COVERAGE = []
283
284        # WIN = []
285        names = []
286        TIME = []
287        predictions = {}
288
289        if self.custom_metric is not None:
290            CUSTOM_METRIC = []
291
292        if self.h is None:
293            assert X_test is not None, "If h is None, X_test must be provided."
294
295        if isinstance(X_train, np.ndarray):
296            X_train = pd.DataFrame(X_train)
297            X_test = pd.DataFrame(X_test)
298
299        self.series_names = X_train.columns.tolist()
300
301        X_train = convert_df_to_numeric(X_train)
302        X_test = convert_df_to_numeric(X_test)
303
304        numeric_features = X_train.select_dtypes(include=[np.number]).columns
305        categorical_features = X_train.select_dtypes(include=["object"]).columns
306
307        categorical_low, categorical_high = get_card_split(
308            X_train, categorical_features
309        )
310
311        if self.preprocess:
312            preprocessor = ColumnTransformer(
313                transformers=[
314                    ("numeric", numeric_transformer, numeric_features),
315                    (
316                        "categorical_low",
317                        categorical_transformer_low,
318                        categorical_low,
319                    ),
320                    (
321                        "categorical_high",
322                        categorical_transformer_high,
323                        categorical_high,
324                    ),
325                ]
326            )
327
328        # baselines (Classical MTS) ----
329        for i, name in enumerate(["ARIMA", "ETS", "Theta", "VAR", "VECM"]):
330            try:
331                start = time.time()
332                regr = ClassicalMTS(model=name)
333                regr.fit(X_train, **kwargs)
334                self.models_[name] = regr
335                if self.h is None:
336                    X_pred = regr.predict(h=X_test.shape[0], **kwargs)
337                else:
338                    assert self.h > 0, "h must be > 0"
339                    X_pred = regr.predict(h=self.h, **kwargs)
340                    try:
341                        X_test = X_test[0: self.h, :]
342                    except Exception as e:
343                        X_test = X_test.iloc[0: self.h, :]
344
345                rmse = mean_errors(
346                    actual=X_test,
347                    pred=X_pred,
348                    scoring="root_mean_squared_error",
349                    per_series=per_series,
350                )
351                mae = mean_errors(
352                    actual=X_test,
353                    pred=X_pred,
354                    scoring="mean_absolute_error",
355                    per_series=per_series,
356                )
357                mpl = mean_errors(
358                    actual=X_test,
359                    pred=X_pred,
360                    scoring="mean_pinball_loss",
361                    per_series=per_series,
362                )
363            except Exception:
364                continue
365
366            names.append(name)
367            RMSE.append(rmse)
368            MAE.append(mae)
369            MPL.append(mpl)
370
371            if self.custom_metric is not None:
372                try:
373                    if self.h is None:
374                        custom_metric = self.custom_metric(X_test, X_pred)
375                    else:
376                        custom_metric = self.custom_metric(X_test_h, X_pred)
377                    CUSTOM_METRIC.append(custom_metric)
378                except Exception as e:
379                    custom_metric = np.iinfo(np.float32).max
380                    CUSTOM_METRIC.append(np.iinfo(np.float32).max)
381
382            if (self.replications is not None) or (self.type_pi == "gaussian"):
383                if per_series == False:
384                    winklerscore = winkler_score(
385                        obj=X_pred, actual=X_test, level=95
386                    )
387                    coveragecalc = coverage(X_pred, X_test, level=95)
388                else:
389                    winklerscore = winkler_score(
390                        obj=X_pred, actual=X_test, level=95, per_series=True
391                    )
392                    coveragecalc = coverage(
393                        X_pred, X_test, level=95, per_series=True
394                    )
395                WINKLERSCORE.append(winklerscore)
396                COVERAGE.append(coveragecalc)
397            TIME.append(time.time() - start)
398
399        if self.estimators == "all":
400            if self.n_layers <= 1:
401                self.regressors = REGRESSORSMTS
402            else:
403                self.regressors = DEEPREGRESSORSMTS
404        else:
405            if self.n_layers <= 1:
406                self.regressors = [
407                    ("MTS(" + est[0] + ")", est[1])
408                    for est in all_estimators()
409                    if (
410                        issubclass(est[1], RegressorMixin)
411                        and (est[0] in self.estimators)
412                    )
413                ]
414            else:  # self.n_layers > 1
415                self.regressors = [
416                    ("DeepMTS(" + est[0] + ")", est[1])
417                    for est in all_estimators()
418                    if (
419                        issubclass(est[1], RegressorMixin)
420                        and (est[0] in self.estimators)
421                    )
422                ]
423
424        if self.preprocess is True:
425            for name, model in tqdm(self.regressors):  # do parallel exec
426                start = time.time()
427                try:
428                    if "random_state" in model().get_params().keys():
429                        pipe = Pipeline(
430                            steps=[
431                                ("preprocessor", preprocessor),
432                                (
433                                    "regressor",
434                                    DeepMTS(
435                                        obj=model(
436                                            random_state=self.random_state,
437                                            **kwargs,
438                                        ),
439                                        n_layers=self.n_layers,
440                                        n_hidden_features=self.n_hidden_features,
441                                        activation_name=self.activation_name,
442                                        a=self.a,
443                                        nodes_sim=self.nodes_sim,
444                                        bias=self.bias,
445                                        dropout=self.dropout,
446                                        direct_link=self.direct_link,
447                                        n_clusters=self.n_clusters,
448                                        cluster_encode=self.cluster_encode,
449                                        type_clust=self.type_clust,
450                                        type_scaling=self.type_scaling,
451                                        lags=self.lags,
452                                        type_pi=self.type_pi,
453                                        block_size=self.block_size,
454                                        replications=self.replications,
455                                        kernel=self.kernel,
456                                        agg=self.agg,
457                                        seed=self.seed,
458                                        backend=self.backend,
459                                        show_progress=self.show_progress,
460                                    ),
461                                ),
462                            ]
463                        )
464                    else:  # "random_state" in model().get_params().keys()
465                        pipe = Pipeline(
466                            steps=[
467                                ("preprocessor", preprocessor),
468                                (
469                                    "regressor",
470                                    DeepMTS(
471                                        obj=model(**kwargs),
472                                        n_layers=self.n_layers,
473                                        n_hidden_features=self.n_hidden_features,
474                                        activation_name=self.activation_name,
475                                        a=self.a,
476                                        nodes_sim=self.nodes_sim,
477                                        bias=self.bias,
478                                        dropout=self.dropout,
479                                        direct_link=self.direct_link,
480                                        n_clusters=self.n_clusters,
481                                        cluster_encode=self.cluster_encode,
482                                        type_clust=self.type_clust,
483                                        type_scaling=self.type_scaling,
484                                        lags=self.lags,
485                                        type_pi=self.type_pi,
486                                        block_size=self.block_size,
487                                        replications=self.replications,
488                                        kernel=self.kernel,
489                                        agg=self.agg,
490                                        seed=self.seed,
491                                        backend=self.backend,
492                                        show_progress=self.show_progress,
493                                    ),
494                                ),
495                            ]
496                        )
497
498                    pipe.fit(X_train, **kwargs)
499                    # pipe.fit(X_train, xreg=xreg)
500
501                    self.models_[name] = pipe
502
503                    if self.h is None:
504                        X_pred = pipe["regressor"].predict(h=self.h, **kwargs)
505                    else:
506                        assert self.h > 0, "h must be > 0"
507                        X_pred = pipe["regressor"].predict(h=self.h, **kwargs)
508
509                    if (self.replications is not None) or (
510                        self.type_pi == "gaussian"
511                    ):
512                        rmse = mean_errors(
513                            actual=X_test,
514                            pred=X_pred,
515                            scoring="root_mean_squared_error",
516                            per_series=per_series,
517                        )
518                        mae = mean_errors(
519                            actual=X_test,
520                            pred=X_pred,
521                            scoring="mean_absolute_error",
522                            per_series=per_series,
523                        )
524                        mpl = mean_errors(
525                            actual=X_test,
526                            pred=X_pred,
527                            scoring="mean_pinball_loss",
528                            per_series=per_series,
529                        )
530                        winklerscore = winkler_score(
531                            obj=X_pred,
532                            actual=X_test,
533                            level=95,
534                            per_series=per_series,
535                        )
536                        coveragecalc = coverage(
537                            X_pred, X_test, level=95, per_series=per_series
538                        )
539                    else:
540                        rmse = mean_errors(
541                            actual=X_test,
542                            pred=X_pred,
543                            scoring="root_mean_squared_error",
544                            per_series=per_series,
545                        )
546                        mae = mean_errors(
547                            actual=X_test,
548                            pred=X_pred,
549                            scoring="mean_absolute_error",
550                            per_series=per_series,
551                        )
552                        mpl = mean_errors(
553                            actual=X_test,
554                            pred=X_pred,
555                            scoring="mean_pinball_loss",
556                            per_series=per_series,
557                        )
558
559                    names.append(name)
560                    RMSE.append(rmse)
561                    MAE.append(mae)
562                    MPL.append(mpl)
563
564                    if (self.replications is not None) or (
565                        self.type_pi == "gaussian"
566                    ):
567                        WINKLERSCORE.append(winklerscore)
568                        COVERAGE.append(coveragecalc)
569                    TIME.append(time.time() - start)
570
571                    if self.custom_metric is not None:
572                        try:
573                            custom_metric = self.custom_metric(X_test, X_pred)
574                            CUSTOM_METRIC.append(custom_metric)
575                        except Exception as e:
576                            custom_metric = np.iinfo(np.float32).max
577                            CUSTOM_METRIC.append(custom_metric)
578
579                    if self.verbose > 0:
580                        if (self.replications is not None) or (
581                            self.type_pi == "gaussian"
582                        ):
583                            scores_verbose = {
584                                "Model": name,
585                                "RMSE": rmse,
586                                "MAE": mae,
587                                "MPL": mpl,
588                                "WINKLERSCORE": winklerscore,
589                                "COVERAGE": coveragecalc,
590                                "Time taken": time.time() - start,
591                            }
592                        else:
593                            scores_verbose = {
594                                "Model": name,
595                                "RMSE": rmse,
596                                "MAE": mae,
597                                "MPL": mpl,
598                                "Time taken": time.time() - start,
599                            }
600
601                        if self.custom_metric is not None:
602                            scores_verbose["Custom metric"] = custom_metric
603
604                    if self.predictions:
605                        predictions[name] = X_pred
606                except Exception as exception:
607                    if self.ignore_warnings is False:
608                        print(name + " model failed to execute")
609                        print(exception)
610
611        else:  # no preprocessing
612            for name, model in tqdm(self.regressors):  # do parallel exec
613                start = time.time()
614                try:
615                    if "random_state" in model().get_params().keys():
616                        pipe = DeepMTS(
617                            obj=model(random_state=self.random_state, **kwargs),
618                            n_layers=self.n_layers,
619                            n_hidden_features=self.n_hidden_features,
620                            activation_name=self.activation_name,
621                            a=self.a,
622                            nodes_sim=self.nodes_sim,
623                            bias=self.bias,
624                            dropout=self.dropout,
625                            direct_link=self.direct_link,
626                            n_clusters=self.n_clusters,
627                            cluster_encode=self.cluster_encode,
628                            type_clust=self.type_clust,
629                            type_scaling=self.type_scaling,
630                            lags=self.lags,
631                            type_pi=self.type_pi,
632                            block_size=self.block_size,
633                            replications=self.replications,
634                            kernel=self.kernel,
635                            agg=self.agg,
636                            seed=self.seed,
637                            backend=self.backend,
638                            show_progress=self.show_progress,
639                        )
640                    else:
641                        pipe = DeepMTS(
642                            obj=model(**kwargs),
643                            n_layers=self.n_layers,
644                            n_hidden_features=self.n_hidden_features,
645                            activation_name=self.activation_name,
646                            a=self.a,
647                            nodes_sim=self.nodes_sim,
648                            bias=self.bias,
649                            dropout=self.dropout,
650                            direct_link=self.direct_link,
651                            n_clusters=self.n_clusters,
652                            cluster_encode=self.cluster_encode,
653                            type_clust=self.type_clust,
654                            type_scaling=self.type_scaling,
655                            lags=self.lags,
656                            type_pi=self.type_pi,
657                            block_size=self.block_size,
658                            replications=self.replications,
659                            kernel=self.kernel,
660                            agg=self.agg,
661                            seed=self.seed,
662                            backend=self.backend,
663                            show_progress=self.show_progress,
664                        )
665
666                    pipe.fit(X_train, xreg, **kwargs)
667                    # pipe.fit(X_train, xreg=xreg) # DO xreg like in `ahead`
668
669                    self.models_[name] = pipe
670
671                    if self.preprocess is True:
672                        if self.h is None:
673                            X_pred = pipe["regressor"].predict(
674                                h=X_test.shape[0], **kwargs
675                            )
676                        else:
677                            assert (
678                                self.h > 0 and self.h <= X_test.shape[0]
679                            ), "h must be > 0 and < X_test.shape[0]"
680                            X_pred = pipe["regressor"].predict(
681                                h=self.h, **kwargs
682                            )
683
684                    else:
685                        if self.h is None:
686                            X_pred = pipe.predict(
687                                h=X_test.shape[0],
688                                **kwargs,
689                                # X_pred = pipe.predict(h=X_test.shape[0], new_xreg=new_xreg) ## DO xreg like in `ahead`
690                            )
691                        else:
692                            assert (
693                                self.h > 0 and self.h <= X_test.shape[0]
694                            ), "h must be > 0 and < X_test.shape[0]"
695                            X_pred = pipe.predict(h=self.h, **kwargs)
696
697                    if self.h is None:
698                        if (self.replications is not None) or (
699                            self.type_pi == "gaussian"
700                        ):
701                            rmse = mean_errors(
702                                actual=X_test,
703                                pred=X_pred.mean,
704                                scoring="root_mean_squared_error",
705                                per_series=per_series,
706                            )
707                            mae = mean_errors(
708                                actual=X_test,
709                                pred=X_pred.mean,
710                                scoring="mean_absolute_error",
711                                per_series=per_series,
712                            )
713                            mpl = mean_errors(
714                                actual=X_test,
715                                pred=X_pred.mean,
716                                scoring="mean_pinball_loss",
717                                per_series=per_series,
718                            )
719                            winklerscore = winkler_score(
720                                obj=X_pred,
721                                actual=X_test,
722                                level=95,
723                                per_series=per_series,
724                            )
725                            coveragecalc = coverage(
726                                X_pred, X_test, level=95, per_series=per_series
727                            )
728                        else:  # no prediction interval
729                            rmse = mean_errors(
730                                actual=X_test,
731                                pred=X_pred,
732                                scoring="root_mean_squared_error",
733                                per_series=per_series,
734                            )
735                            mae = mean_errors(
736                                actual=X_test,
737                                pred=X_pred,
738                                scoring="mean_absolute_error",
739                                per_series=per_series,
740                            )
741                            mpl = mean_errors(
742                                actual=X_test,
743                                pred=X_pred,
744                                scoring="mean_pinball_loss",
745                                per_series=per_series,
746                            )
747                    else:  # self.h is not None
748                        if (self.replications is not None) or (
749                            self.type_pi == "gaussian"
750                        ):
751                            if isinstance(X_test, pd.DataFrame):
752                                X_test_h = X_test.iloc[0: self.h, :]
753                                rmse = mean_errors(
754                                    actual=X_test_h,
755                                    pred=X_pred,
756                                    scoring="root_mean_squared_error",
757                                    per_series=per_series,
758                                )
759                                mae = mean_errors(
760                                    actual=X_test_h,
761                                    pred=X_pred,
762                                    scoring="mean_absolute_error",
763                                    per_series=per_series,
764                                )
765                                mpl = mean_errors(
766                                    actual=X_test_h,
767                                    pred=X_pred,
768                                    scoring="mean_pinball_loss",
769                                    per_series=per_series,
770                                )
771                                winklerscore = winkler_score(
772                                    obj=X_pred,
773                                    actual=X_test_h,
774                                    level=95,
775                                    per_series=per_series,
776                                )
777                                coveragecalc = coverage(
778                                    X_pred,
779                                    X_test_h,
780                                    level=95,
781                                    per_series=per_series,
782                                )
783                            else:
784                                X_test_h = X_test[0: self.h, :]
785                                rmse = mean_errors(
786                                    actual=X_test_h,
787                                    pred=X_pred,
788                                    scoring="root_mean_squared_error",
789                                    per_series=per_series,
790                                )
791                                mae = mean_errors(
792                                    actual=X_test_h,
793                                    pred=X_pred,
794                                    scoring="mean_absolute_error",
795                                    per_series=per_series,
796                                )
797                                mpl = mean_errors(
798                                    actual=X_test_h,
799                                    pred=X_pred,
800                                    scoring="mean_pinball_loss",
801                                    per_series=per_series,
802                                )
803                                winklerscore = winkler_score(
804                                    obj=X_pred,
805                                    actual=X_test_h,
806                                    level=95,
807                                    per_series=per_series,
808                                )
809                                coveragecalc = coverage(
810                                    X_pred,
811                                    X_test_h,
812                                    level=95,
813                                    per_series=per_series,
814                                )
815                        else:  # no prediction interval
816                            if isinstance(X_test, pd.DataFrame):
817                                X_test_h = X_test.iloc[0: self.h, :]
818                                rmse = mean_errors(
819                                    actual=X_test_h,
820                                    pred=X_pred,
821                                    scoring="root_mean_squared_error",
822                                    per_series=per_series,
823                                )
824                                mae = mean_errors(
825                                    actual=X_test_h,
826                                    pred=X_pred,
827                                    scoring="mean_absolute_error",
828                                    per_series=per_series,
829                                )
830                                mpl = mean_errors(
831                                    actual=X_test_h,
832                                    pred=X_pred,
833                                    scoring="mean_pinball_loss",
834                                    per_series=per_series,
835                                )
836                            else:
837                                X_test_h = X_test[0: self.h, :]
838                                rmse = mean_errors(
839                                    actual=X_test_h,
840                                    pred=X_pred,
841                                    scoring="root_mean_squared_error",
842                                    per_series=per_series,
843                                )
844                                mae = mean_errors(
845                                    actual=X_test_h,
846                                    pred=X_pred,
847                                    scoring="mean_absolute_error",
848                                    per_series=per_series,
849                                )
850
851                    names.append(name)
852                    RMSE.append(rmse)
853                    MAE.append(mae)
854                    MPL.append(mpl)
855                    if (self.replications is not None) or (
856                        self.type_pi == "gaussian"
857                    ):
858                        WINKLERSCORE.append(winklerscore)
859                        COVERAGE.append(coveragecalc)
860                    TIME.append(time.time() - start)
861
862                    if self.custom_metric is not None:
863                        try:
864                            if self.h is None:
865                                custom_metric = self.custom_metric(
866                                    X_test, X_pred
867                                )
868                            else:
869                                custom_metric = self.custom_metric(
870                                    X_test_h, X_pred
871                                )
872                            CUSTOM_METRIC.append(custom_metric)
873                        except Exception as e:
874                            custom_metric = np.iinfo(np.float32).max
875                            CUSTOM_METRIC.append(np.iinfo(np.float32).max)
876
877                    if self.verbose > 0:
878                        if (self.replications is not None) or (
879                            self.type_pi == "gaussian"
880                        ):
881                            scores_verbose = {
882                                "Model": name,
883                                "RMSE": rmse,
884                                "MAE": mae,
885                                "MPL": mpl,
886                                "WINKLERSCORE": winklerscore,
887                                "COVERAGE": coveragecalc,
888                                "Time taken": time.time() - start,
889                            }
890                        else:
891                            scores_verbose = {
892                                "Model": name,
893                                "RMSE": rmse,
894                                "MAE": mae,
895                                "MPL": mpl,
896                                "Time taken": time.time() - start,
897                            }
898
899                        if self.custom_metric is not None:
900                            scores_verbose["Custom metric"] = custom_metric
901
902                    if self.predictions:
903                        predictions[name] = X_pred
904
905                except Exception as exception:
906                    if self.ignore_warnings is False:
907                        print(name + " model failed to execute")
908                        print(exception)
909
910        if (self.replications is not None) or (self.type_pi == "gaussian"):
911            scores = {
912                "Model": names,
913                "RMSE": RMSE,
914                "MAE": MAE,
915                "MPL": MPL,
916                "WINKLERSCORE": WINKLERSCORE,
917                "COVERAGE": COVERAGE,
918                "Time Taken": TIME,
919            }
920        else:
921            scores = {
922                "Model": names,
923                "RMSE": RMSE,
924                "MAE": MAE,
925                "MPL": MPL,
926                "Time Taken": TIME,
927            }
928
929        if self.custom_metric is not None:
930            scores["Custom metric"] = CUSTOM_METRIC
931
932        if per_series:
933            scores = dict_to_dataframe_series(scores, self.series_names)
934        else:
935            scores = pd.DataFrame(scores)
936
937        try:  # case per_series, can't be sorted
938            scores = scores.sort_values(
939                by=self.sort_by, ascending=True
940            ).set_index("Model")
941
942            self.best_model_ = self.models_[scores.index[0]]
943        except Exception as e:
944            pass
945
946        if self.predictions is True:
947            return scores, predictions
948
949        return scores

Fit Regression algorithms to X_train, predict and score on X_test.

Parameters:

X_train: array-like or data frame,
    Training vectors, where rows is the number of samples
    and columns is the number of features.

X_test: array-like or data frame,
    Testing vectors, where rows is the number of samples
    and columns is the number of features.

xreg: array-like, optional (default=None)
    Additional (external) regressors to be passed to self.obj
    xreg must be in 'increasing' order (most recent observations last)

per_series: bool, optional (default=False)
    When set to True, the metrics are computed series by series.

**kwargs: dict, optional (default=None)
    Additional parameters to be passed to `fit` method of `obj`.

Returns:

scores: Pandas DataFrame
    Returns metrics of all the models in a Pandas DataFrame.

predictions: Pandas DataFrame
    Returns predictions of all the models in a Pandas DataFrame.
def provide_models(self, X_train, X_test):
963    def provide_models(self, X_train, X_test):
964        """
965        This function returns all the model objects trained in fit function.
966        If fit is not called already, then we call fit and then return the models.
967
968        Parameters:
969
970            X_train : array-like,
971                Training vectors, where rows is the number of samples
972                and columns is the number of features.
973
974            X_test : array-like,
975                Testing vectors, where rows is the number of samples
976                and columns is the number of features.
977
978        Returns:
979
980            models: dict-object,
981                Returns a dictionary with each model pipeline as value
982                with key as name of models.
983
984        """
985        if self.h is None:
986            if len(self.models_.keys()) == 0:
987                self.fit(X_train, X_test)
988        else:
989            if len(self.models_.keys()) == 0:
990                if isinstance(X_test, pd.DataFrame):
991                    self.fit(X_train, X_test.iloc[0: self.h, :])
992                else:
993                    self.fit(X_train, X_test[0: self.h, :])
994
995        return self.models_

This function returns all the model objects trained in fit function. If fit is not called already, then we call fit and then return the models.

Parameters:

X_train : array-like,
    Training vectors, where rows is the number of samples
    and columns is the number of features.

X_test : array-like,
    Testing vectors, where rows is the number of samples
    and columns is the number of features.

Returns:

models: dict-object,
    Returns a dictionary with each model pipeline as value
    with key as name of models.
class MLARCH:
 10class MLARCH:
 11    """Machine Learning-agnostic ARCH for nearly-stationary time series (e.g., returns)
 12
 13    Parameters
 14    ----------
 15    model_mean : object
 16        Model for mean component
 17    model_sigma : object
 18        Model for volatility component (sklearn regressor)
 19    model_residuals : object
 20        Model for standardized residuals
 21    lags_vol : int, default=10
 22        Number of lags for squared residuals in volatility model
 23    """
 24
 25    def __init__(self, model_mean, model_sigma, model_residuals, lags_vol=10):
 26        self.model_mean = model_mean
 27        self.model_sigma = model_sigma
 28        self.model_residuals = model_residuals
 29        self.lags_vol = lags_vol
 30
 31    def _create_lags(self, y, lags):
 32        """Create lagged feature matrix"""
 33        n = len(y)
 34        if n <= lags:
 35            raise ValueError(f"Series length {n} must be > lags {lags}")
 36        X = np.zeros((n - lags, lags))
 37        for i in range(lags):
 38            X[:, i] = y[i: (n - lags + i)]
 39        return X
 40
 41    def fit(self, y, **kwargs):
 42        """Fit the MLARCH model
 43
 44        Parameters
 45        ----------
 46        y : array-like
 47            Target time series (should be stationary, e.g., returns)
 48
 49        Returns
 50        -------
 51        self
 52        """
 53        # Format input
 54        if isinstance(y, (pd.Series, pd.DataFrame)):
 55            y = y.values
 56        y = y.ravel()
 57
 58        if len(y) < self.lags_vol + 20:
 59            raise ValueError(f"Need at least {self.lags_vol + 20} observations")
 60
 61        # Step 1: Fit mean model
 62        self.model_mean.fit(y.reshape(-1, 1))
 63        mean_residuals = self.model_mean.residuals_.ravel()
 64
 65        # Step 2: Fit ARCH volatility model on lagged squared residuals
 66        resid_squared = mean_residuals**2
 67        X_vol = self._create_lags(resid_squared, self.lags_vol)
 68        y_vol = np.log(resid_squared[self.lags_vol:] + 1e-8)
 69
 70        self.model_sigma.fit(X_vol, y_vol)
 71
 72        # Get fitted volatility
 73        fitted_log_sigma = self.model_sigma.predict(X_vol)
 74        fitted_sigma = np.exp(fitted_log_sigma)
 75
 76        # Step 3: Compute standardized residuals with proper scaling
 77        standardized_residuals = mean_residuals[self.lags_vol:] / np.sqrt(
 78            fitted_sigma
 79        )
 80
 81        # Enforce zero mean and unit variance
 82        self.z_mean_ = np.mean(standardized_residuals)
 83        self.z_std_ = np.std(standardized_residuals)
 84        standardized_residuals = (
 85            standardized_residuals - self.z_mean_
 86        ) / self.z_std_
 87
 88        # Step 4: Fit residuals model
 89        self.model_residuals.fit(standardized_residuals.reshape(-1, 1))
 90
 91        # Store for prediction
 92        self.last_residuals_squared_ = resid_squared[-self.lags_vol:]
 93
 94        # Store diagnostics
 95        self.fitted_volatility_mean_ = np.mean(np.sqrt(fitted_sigma))
 96        self.fitted_volatility_std_ = np.std(np.sqrt(fitted_sigma))
 97
 98        return self
 99
100    def predict(self, h=5, level=95, return_sims=False):
101        """Predict future values
102
103        Parameters
104        ----------
105        h : int
106            Forecast horizon
107        level : int
108            Confidence level for prediction intervals
109        return_sims : bool
110            If True, return full simulation paths
111
112        Returns
113        -------
114        DescribeResult
115            Named tuple with mean, sims, lower, upper
116        """
117        DescribeResult = namedtuple(
118            "DescribeResult", ("mean", "sims", "lower", "upper")
119        )
120
121        # Get mean forecast
122        mean_forecast = self.model_mean.predict(h=h).values.ravel()
123
124        # Recursive ARCH volatility forecasting
125        sigma_forecast = np.zeros(h)
126        current_lags = self.last_residuals_squared_.copy()
127
128        for i in range(h):
129            X_t = current_lags.reshape(1, -1)
130            log_sigma_t = self.model_sigma.predict(X_t)[0]
131            sigma_forecast[i] = np.exp(log_sigma_t)
132            # Update lags with predicted variance
133            current_lags = np.append(current_lags[1:], sigma_forecast[i])
134
135        # Predict standardized residuals and rescale
136        z_forecast_normalized = self.model_residuals.predict(h=h).values.ravel()
137        z_forecast = z_forecast_normalized * self.z_std_ + self.z_mean_
138
139        # Combine: μ + z × σ
140        point_forecast = mean_forecast + z_forecast * np.sqrt(sigma_forecast)
141
142        # Generate prediction intervals
143        sims = None
144        if return_sims:
145            preds_z_for_sims = self.model_residuals.predict(h=h)
146            if hasattr(preds_z_for_sims, "sims") and isinstance(
147                preds_z_for_sims.sims, pd.DataFrame
148            ):
149                sims_z_normalized = preds_z_for_sims.sims
150                n_sims = sims_z_normalized.shape[1]
151
152                sims = np.zeros((h, n_sims))
153                for sim_idx in range(n_sims):
154                    # Rescale simulations
155                    z_sim = (
156                        sims_z_normalized.iloc[:, sim_idx].values * self.z_std_
157                        + self.z_mean_
158                    )
159                    sims[:, sim_idx] = mean_forecast + z_sim * np.sqrt(
160                        sigma_forecast
161                    )
162
163                alpha = 1 - level / 100
164                lower_bound = np.quantile(sims, alpha / 2, axis=1)
165                upper_bound = np.quantile(sims, 1 - alpha / 2, axis=1)
166            else:
167                # Fallback to Gaussian
168                z_score = norm.ppf(1 - (1 - level / 100) / 2)
169                margin = z_score * np.sqrt(sigma_forecast) * self.z_std_
170                lower_bound = point_forecast - margin
171                upper_bound = point_forecast + margin
172        else:
173            # Gaussian intervals with proper scaling
174            z_score = norm.ppf(1 - (1 - level / 100) / 2)
175            margin = z_score * np.sqrt(sigma_forecast) * self.z_std_
176            lower_bound = point_forecast - margin
177            upper_bound = point_forecast + margin
178
179        return DescribeResult(point_forecast, sims, lower_bound, upper_bound)

Machine Learning-agnostic ARCH for nearly-stationary time series (e.g., returns)

Parameters

model_mean : object Model for mean component model_sigma : object Model for volatility component (sklearn regressor) model_residuals : object Model for standardized residuals lags_vol : int, default=10 Number of lags for squared residuals in volatility model

def fit(self, y, **kwargs):
41    def fit(self, y, **kwargs):
42        """Fit the MLARCH model
43
44        Parameters
45        ----------
46        y : array-like
47            Target time series (should be stationary, e.g., returns)
48
49        Returns
50        -------
51        self
52        """
53        # Format input
54        if isinstance(y, (pd.Series, pd.DataFrame)):
55            y = y.values
56        y = y.ravel()
57
58        if len(y) < self.lags_vol + 20:
59            raise ValueError(f"Need at least {self.lags_vol + 20} observations")
60
61        # Step 1: Fit mean model
62        self.model_mean.fit(y.reshape(-1, 1))
63        mean_residuals = self.model_mean.residuals_.ravel()
64
65        # Step 2: Fit ARCH volatility model on lagged squared residuals
66        resid_squared = mean_residuals**2
67        X_vol = self._create_lags(resid_squared, self.lags_vol)
68        y_vol = np.log(resid_squared[self.lags_vol:] + 1e-8)
69
70        self.model_sigma.fit(X_vol, y_vol)
71
72        # Get fitted volatility
73        fitted_log_sigma = self.model_sigma.predict(X_vol)
74        fitted_sigma = np.exp(fitted_log_sigma)
75
76        # Step 3: Compute standardized residuals with proper scaling
77        standardized_residuals = mean_residuals[self.lags_vol:] / np.sqrt(
78            fitted_sigma
79        )
80
81        # Enforce zero mean and unit variance
82        self.z_mean_ = np.mean(standardized_residuals)
83        self.z_std_ = np.std(standardized_residuals)
84        standardized_residuals = (
85            standardized_residuals - self.z_mean_
86        ) / self.z_std_
87
88        # Step 4: Fit residuals model
89        self.model_residuals.fit(standardized_residuals.reshape(-1, 1))
90
91        # Store for prediction
92        self.last_residuals_squared_ = resid_squared[-self.lags_vol:]
93
94        # Store diagnostics
95        self.fitted_volatility_mean_ = np.mean(np.sqrt(fitted_sigma))
96        self.fitted_volatility_std_ = np.std(np.sqrt(fitted_sigma))
97
98        return self

Fit the MLARCH model

Parameters

y : array-like Target time series (should be stationary, e.g., returns)

Returns

self

def predict(self, h=5, level=95, return_sims=False):
100    def predict(self, h=5, level=95, return_sims=False):
101        """Predict future values
102
103        Parameters
104        ----------
105        h : int
106            Forecast horizon
107        level : int
108            Confidence level for prediction intervals
109        return_sims : bool
110            If True, return full simulation paths
111
112        Returns
113        -------
114        DescribeResult
115            Named tuple with mean, sims, lower, upper
116        """
117        DescribeResult = namedtuple(
118            "DescribeResult", ("mean", "sims", "lower", "upper")
119        )
120
121        # Get mean forecast
122        mean_forecast = self.model_mean.predict(h=h).values.ravel()
123
124        # Recursive ARCH volatility forecasting
125        sigma_forecast = np.zeros(h)
126        current_lags = self.last_residuals_squared_.copy()
127
128        for i in range(h):
129            X_t = current_lags.reshape(1, -1)
130            log_sigma_t = self.model_sigma.predict(X_t)[0]
131            sigma_forecast[i] = np.exp(log_sigma_t)
132            # Update lags with predicted variance
133            current_lags = np.append(current_lags[1:], sigma_forecast[i])
134
135        # Predict standardized residuals and rescale
136        z_forecast_normalized = self.model_residuals.predict(h=h).values.ravel()
137        z_forecast = z_forecast_normalized * self.z_std_ + self.z_mean_
138
139        # Combine: μ + z × σ
140        point_forecast = mean_forecast + z_forecast * np.sqrt(sigma_forecast)
141
142        # Generate prediction intervals
143        sims = None
144        if return_sims:
145            preds_z_for_sims = self.model_residuals.predict(h=h)
146            if hasattr(preds_z_for_sims, "sims") and isinstance(
147                preds_z_for_sims.sims, pd.DataFrame
148            ):
149                sims_z_normalized = preds_z_for_sims.sims
150                n_sims = sims_z_normalized.shape[1]
151
152                sims = np.zeros((h, n_sims))
153                for sim_idx in range(n_sims):
154                    # Rescale simulations
155                    z_sim = (
156                        sims_z_normalized.iloc[:, sim_idx].values * self.z_std_
157                        + self.z_mean_
158                    )
159                    sims[:, sim_idx] = mean_forecast + z_sim * np.sqrt(
160                        sigma_forecast
161                    )
162
163                alpha = 1 - level / 100
164                lower_bound = np.quantile(sims, alpha / 2, axis=1)
165                upper_bound = np.quantile(sims, 1 - alpha / 2, axis=1)
166            else:
167                # Fallback to Gaussian
168                z_score = norm.ppf(1 - (1 - level / 100) / 2)
169                margin = z_score * np.sqrt(sigma_forecast) * self.z_std_
170                lower_bound = point_forecast - margin
171                upper_bound = point_forecast + margin
172        else:
173            # Gaussian intervals with proper scaling
174            z_score = norm.ppf(1 - (1 - level / 100) / 2)
175            margin = z_score * np.sqrt(sigma_forecast) * self.z_std_
176            lower_bound = point_forecast - margin
177            upper_bound = point_forecast + margin
178
179        return DescribeResult(point_forecast, sims, lower_bound, upper_bound)

Predict future values

Parameters

h : int Forecast horizon level : int Confidence level for prediction intervals return_sims : bool If True, return full simulation paths

Returns

DescribeResult Named tuple with mean, sims, lower, upper

class MedianVotingRegressor(sklearn.ensemble._voting.VotingRegressor):
 6class MedianVotingRegressor(VotingRegressor):
 7    def predict(self, X):
 8        """
 9        Predict using the median of the base regressors' predictions.
10
11        Parameters:
12        X (array-like): Feature matrix for predictions.
13
14        Returns:
15        y_pred (array): Median of predictions from the base regressors.
16        """
17        predictions = np.asarray(
18            [regressor.predict(X) for regressor in self.estimators_]
19        )
20        return np.median(predictions, axis=0)

Prediction voting regressor for unfitted estimators.

A voting regressor is an ensemble meta-estimator that fits several base regressors, each on the whole dataset. Then it averages the individual predictions to form a final prediction.

Read more in the :ref:User Guide <voting_regressor>.

New in version 0.21.

Parameters

estimators : list of (str, estimator) tuples Invoking the fit method on the VotingRegressor will fit clones of those original estimators that will be stored in the class attribute self.estimators_. An estimator can be set to 'drop' using set_params().

*Changed in version 0.21:*
``'drop'`` is accepted. Using None was deprecated in 0.22 and
support was removed in 0.24.

weights : array-like of shape (n_regressors,), default=None Sequence of weights (float or int) to weight the occurrences of predicted values before averaging. Uses uniform weights if None.

n_jobs : int, default=None The number of jobs to run in parallel for fit. None means 1 unless in a joblib.parallel_backend context. -1 means using all processors. See :term:Glossary <n_jobs> for more details.

verbose : bool, default=False If True, the time elapsed while fitting will be printed as it is completed.

*New in version 0.23.*

Attributes

estimators_ : list of regressors The collection of fitted sub-estimators as defined in estimators that are not 'drop'.

named_estimators_ : ~sklearn.utils.Bunch Attribute to access any fitted sub-estimators by name.

*New in version 0.20.*

n_features_in_ : int Number of features seen during :term:fit. Only defined if the underlying regressor exposes such an attribute when fit.

*New in version 0.24.*

feature_names_in_ : ndarray of shape (n_features_in_,) Names of features seen during :term:fit. Only defined if the underlying estimators expose such an attribute when fit.

*New in version 1.0.*

See Also

VotingClassifier : Soft Voting/Majority Rule classifier.

Examples

>>> import numpy as np
>>> from sklearn.linear_model import LinearRegression
>>> from sklearn.ensemble import RandomForestRegressor
>>> from sklearn.ensemble import VotingRegressor
>>> from sklearn.neighbors import KNeighborsRegressor
>>> r1 = LinearRegression()
>>> r2 = RandomForestRegressor(n_estimators=10, random_state=1)
>>> r3 = KNeighborsRegressor()
>>> X = np.array([[1, 1], [2, 4], [3, 9], [4, 16], [5, 25], [6, 36]])
>>> y = np.array([2, 6, 12, 20, 30, 42])
>>> er = VotingRegressor([('lr', r1), ('rf', r2), ('r3', r3)])
>>> print(er.fit(X, y).predict(X))
[ 6.8...  8.4... 12.5... 17.8... 26...  34...]

In the following example, we drop the 'lr' estimator with ~VotingRegressor.set_params() and fit the remaining two estimators:

>>> er = er.set_params(lr='drop')
>>> er = er.fit(X, y)
>>> len(er.estimators_)
2
def predict(self, X):
 7    def predict(self, X):
 8        """
 9        Predict using the median of the base regressors' predictions.
10
11        Parameters:
12        X (array-like): Feature matrix for predictions.
13
14        Returns:
15        y_pred (array): Median of predictions from the base regressors.
16        """
17        predictions = np.asarray(
18            [regressor.predict(X) for regressor in self.estimators_]
19        )
20        return np.median(predictions, axis=0)

Predict using the median of the base regressors' predictions.

Parameters: X (array-like): Feature matrix for predictions.

Returns: y_pred (array): Median of predictions from the base regressors.

class MTS(nnetsauce.Base):
  31class MTS(Base):
  32    """Univariate and multivariate time series (MTS) forecasting with Quasi-Randomized networks
  33
  34    Parameters:
  35
  36        obj: object.
  37            any object containing a method fit (obj.fit()) and a method predict
  38            (obj.predict()).
  39
  40        n_hidden_features: int.
  41            number of nodes in the hidden layer.
  42
  43        activation_name: str.
  44            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'.
  45
  46        a: float.
  47            hyperparameter for 'prelu' or 'elu' activation function.
  48
  49        nodes_sim: str.
  50            type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
  51            'uniform'.
  52
  53        bias: boolean.
  54            indicates if the hidden layer contains a bias term (True) or not
  55            (False).
  56
  57        dropout: float.
  58            regularization parameter; (random) percentage of nodes dropped out
  59            of the training.
  60
  61        direct_link: boolean.
  62            indicates if the original predictors are included (True) in model's fitting or not (False).
  63
  64        n_clusters: int.
  65            number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering).
  66
  67        cluster_encode: bool.
  68            defines how the variable containing clusters is treated (default is one-hot)
  69            if `False`, then labels are used, without one-hot encoding.
  70
  71        type_clust: str.
  72            type of clustering method: currently k-means ('kmeans') or Gaussian
  73            Mixture Model ('gmm').
  74
  75        type_scaling: a tuple of 3 strings.
  76            scaling methods for inputs, hidden layer, and clustering respectively
  77            (and when relevant).
  78            Currently available: standardization ('std') or MinMax scaling ('minmax').
  79
  80        lags: int.
  81            number of lags used for each time series.
  82            If string, lags must be one of 'AIC', 'AICc', or 'BIC'.
  83
  84        type_pi: str.
  85            type of prediction interval; currently:
  86            - "gaussian": simple, fast, but: assumes stationarity of Gaussian in-sample residuals and independence in the multivariate case
  87            - "quantile": use model-agnostic quantile regression under the hood
  88            - "kde": based on Kernel Density Estimation of in-sample residuals
  89            - "bootstrap": based on independent bootstrap of in-sample residuals
  90            - "block-bootstrap": based on basic block bootstrap of in-sample residuals
  91            - "scp-kde": Sequential split conformal prediction with Kernel Density Estimation of calibrated residuals
  92            - "scp-bootstrap": Sequential split conformal prediction with independent bootstrap of calibrated residuals
  93            - "scp-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of calibrated residuals
  94            - "scp2-kde": Sequential split conformal prediction with Kernel Density Estimation of standardized calibrated residuals
  95            - "scp2-bootstrap": Sequential split conformal prediction with independent bootstrap of standardized calibrated residuals
  96            - "scp2-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of standardized calibrated residuals
  97            - based on copulas of in-sample residuals: 'vine-tll', 'vine-bb1', 'vine-bb6', 'vine-bb7', 'vine-bb8', 'vine-clayton',
  98            'vine-frank', 'vine-gaussian', 'vine-gumbel', 'vine-indep', 'vine-joe', 'vine-student'
  99            - 'scp-vine-tll', 'scp-vine-bb1', 'scp-vine-bb6', 'scp-vine-bb7', 'scp-vine-bb8', 'scp-vine-clayton',
 100            'scp-vine-frank', 'scp-vine-gaussian', 'scp-vine-gumbel', 'scp-vine-indep', 'scp-vine-joe', 'scp-vine-student'
 101            - 'scp2-vine-tll', 'scp2-vine-bb1', 'scp2-vine-bb6', 'scp2-vine-bb7', 'scp2-vine-bb8', 'scp2-vine-clayton',
 102            'scp2-vine-frank', 'scp2-vine-gaussian', 'scp2-vine-gumbel', 'scp2-vine-indep', 'scp2-vine-joe', 'scp2-vine-student'
 103
 104        level: int.
 105            level of confidence for `type_pi == 'quantile'` (default is `95`)
 106
 107        block_size: int.
 108            size of block for 'type_pi' in ("block-bootstrap", "scp-block-bootstrap", "scp2-block-bootstrap").
 109            Default is round(3.15*(n_residuals^1/3))
 110
 111        replications: int.
 112            number of replications (if needed, for predictive simulation). Default is 'None'.
 113
 114        kernel: str.
 115            the kernel to use for residuals density estimation (used for predictive simulation). Currently, either 'gaussian' or 'tophat'.
 116
 117        agg: str.
 118            either "mean" or "median" for simulation of bootstrap aggregating
 119
 120        seed: int.
 121            reproducibility seed for nodes_sim=='uniform' or predictive simulation.
 122
 123        backend: str.
 124            "cpu" or "gpu" or "tpu".
 125
 126        verbose: int.
 127            0: not printing; 1: printing
 128
 129        show_progress: bool.
 130            True: progress bar when fitting each series; False: no progress bar when fitting each series
 131
 132    Attributes:
 133
 134        fit_objs_: dict
 135            objects adjusted to each individual time series
 136
 137        y_: {array-like}
 138            MTS responses (most recent observations first)
 139
 140        X_: {array-like}
 141            MTS lags
 142
 143        xreg_: {array-like}
 144            external regressors
 145
 146        y_means_: dict
 147            a dictionary of each series mean values
 148
 149        preds_: {array-like}
 150            successive model predictions
 151
 152        preds_std_: {array-like}
 153            standard deviation around the predictions for Bayesian base learners (`obj`)
 154
 155        gaussian_preds_std_: {array-like}
 156            standard deviation around the predictions for `type_pi='gaussian'`
 157
 158        return_std_: boolean
 159            return uncertainty or not (set in predict)
 160
 161        df_: data frame
 162            the input data frame, in case a data.frame is provided to `fit`
 163
 164        n_obs_: int
 165            number of time series observations (number of rows for multivariate)
 166
 167        level_: int
 168            level of confidence for prediction intervals (default is 95)
 169
 170        residuals_: {array-like}
 171            in-sample residuals (for `type_pi` not conformal prediction) or calibrated residuals
 172            (for `type_pi` in conformal prediction)
 173
 174        residuals_sims_: tuple of {array-like}
 175            simulations of in-sample residuals (for `type_pi` not conformal prediction) or
 176            calibrated residuals (for `type_pi` in conformal prediction)
 177
 178        kde_: A scikit-learn object, see https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KernelDensity.html
 179
 180        residuals_std_dev_: residuals standard deviation
 181
 182    Examples:
 183
 184    Example 1:
 185
 186    ```python
 187    import nnetsauce as ns
 188    import numpy as np
 189    from sklearn import linear_model
 190    np.random.seed(123)
 191
 192    M = np.random.rand(10, 3)
 193    M[:,0] = 10*M[:,0]
 194    M[:,2] = 25*M[:,2]
 195    print(M)
 196
 197    # Adjust Bayesian Ridge
 198    regr4 = linear_model.BayesianRidge()
 199    obj_MTS = ns.MTS(regr4, lags = 1, n_hidden_features=5)
 200    obj_MTS.fit(M)
 201    print(obj_MTS.predict())
 202
 203    # with credible intervals
 204    print(obj_MTS.predict(return_std=True, level=80))
 205
 206    print(obj_MTS.predict(return_std=True, level=95))
 207    ```
 208
 209    Example 2:
 210
 211    ```python
 212    import nnetsauce as ns
 213    import numpy as np
 214    from sklearn import linear_model
 215
 216    dataset = {
 217    'date' : ['2001-01-01', '2002-01-01', '2003-01-01', '2004-01-01', '2005-01-01'],
 218    'series1' : [34, 30, 35.6, 33.3, 38.1],
 219    'series2' : [4, 5.5, 5.6, 6.3, 5.1],
 220    'series3' : [100, 100.5, 100.6, 100.2, 100.1]}
 221    df = pd.DataFrame(dataset).set_index('date')
 222    print(df)
 223
 224    # Adjust Bayesian Ridge
 225    regr5 = linear_model.BayesianRidge()
 226    obj_MTS = ns.MTS(regr5, lags = 1, n_hidden_features=5)
 227    obj_MTS.fit(df)
 228    print(obj_MTS.predict())
 229
 230    # with credible intervals
 231    print(obj_MTS.predict(return_std=True, level=80))
 232
 233    print(obj_MTS.predict(return_std=True, level=95))
 234    ```
 235    """
 236
 237    # construct the object -----
 238
 239    def __init__(
 240        self,
 241        obj,
 242        n_hidden_features=5,
 243        activation_name="relu",
 244        a=0.01,
 245        nodes_sim="sobol",
 246        bias=True,
 247        dropout=0,
 248        direct_link=True,
 249        n_clusters=2,
 250        cluster_encode=True,
 251        type_clust="kmeans",
 252        type_scaling=("std", "std", "std"),
 253        lags=1,
 254        type_pi="kde",
 255        level=95,
 256        block_size=None,
 257        replications=None,
 258        kernel="gaussian",
 259        agg="mean",
 260        seed=123,
 261        backend="cpu",
 262        verbose=0,
 263        show_progress=True,
 264    ):
 265        super().__init__(
 266            n_hidden_features=n_hidden_features,
 267            activation_name=activation_name,
 268            a=a,
 269            nodes_sim=nodes_sim,
 270            bias=bias,
 271            dropout=dropout,
 272            direct_link=direct_link,
 273            n_clusters=n_clusters,
 274            cluster_encode=cluster_encode,
 275            type_clust=type_clust,
 276            type_scaling=type_scaling,
 277            seed=seed,
 278            backend=backend,
 279        )
 280
 281        # Add validation for lags parameter
 282        if isinstance(lags, str):
 283            assert lags in (
 284                "AIC",
 285                "AICc",
 286                "BIC",
 287            ), "if string, lags must be one of 'AIC', 'AICc', or 'BIC'"
 288        else:
 289            assert (
 290                int(lags) == lags
 291            ), "if numeric, lags parameter should be an integer"
 292
 293        self.obj = obj
 294        self.n_series = None
 295        self.lags = lags
 296        self.type_pi = type_pi
 297        self.level = level
 298        if self.type_pi == "quantile":
 299            self.obj = QuantileRegressor(
 300                self.obj, level=self.level, scoring="conformal"
 301            )
 302        self.block_size = block_size
 303        self.replications = replications
 304        self.kernel = kernel
 305        self.agg = agg
 306        self.verbose = verbose
 307        self.show_progress = show_progress
 308        self.series_names = ["series0"]
 309        self.input_dates = None
 310        self.quantiles = None
 311        self.fit_objs_ = {}
 312        self.y_ = None  # MTS responses (most recent observations first)
 313        self.X_ = None  # MTS lags
 314        self.xreg_ = None
 315        self.y_means_ = {}
 316        self.mean_ = None
 317        self.median_ = None
 318        self.upper_ = None
 319        self.lower_ = None
 320        self.output_dates_ = None
 321        self.preds_std_ = []
 322        self.gaussian_preds_std_ = None
 323        self.alpha_ = None
 324        self.return_std_ = None
 325        self.df_ = None
 326        self.residuals_ = []
 327        self.abs_calib_residuals_ = None
 328        self.calib_residuals_quantile_ = None
 329        self.residuals_sims_ = None
 330        self.kde_ = None
 331        self.sims_ = None
 332        self.residuals_std_dev_ = None
 333        self.n_obs_ = None
 334        self.level_ = None
 335        self.init_n_series_ = None
 336
 337    def fit(self, X, xreg=None, **kwargs):
 338        """Fit MTS model to training data X, with optional regressors xreg
 339
 340        Parameters:
 341
 342        X: {array-like}, shape = [n_samples, n_features]
 343            Training time series, where n_samples is the number
 344            of samples and n_features is the number of features;
 345            X must be in increasing order (most recent observations last)
 346
 347        xreg: {array-like}, shape = [n_samples, n_features_xreg]
 348            Additional (external) regressors to be passed to self.obj
 349            xreg must be in 'increasing' order (most recent observations last)
 350
 351        **kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity)
 352
 353        Returns:
 354
 355        self: object
 356        """
 357        try:
 358            self.init_n_series_ = X.shape[1]
 359        except IndexError as e:
 360            self.init_n_series_ = 1
 361
 362        # Automatic lag selection if requested
 363        if isinstance(self.lags, str):
 364            max_lags = min(25, X.shape[0] // 4)
 365            best_ic = float("inf")
 366            best_lags = 1
 367
 368            if self.verbose:
 369                print(
 370                    f"\nSelecting optimal number of lags using {self.lags}..."
 371                )
 372                iterator = tqdm(range(1, max_lags + 1))
 373            else:
 374                iterator = range(1, max_lags + 1)
 375
 376            for lag in iterator:
 377                # Convert DataFrame to numpy array before reversing
 378                if isinstance(X, pd.DataFrame):
 379                    X_values = X.values[::-1]
 380                else:
 381                    X_values = X[::-1]
 382
 383                # Try current lag value
 384                if self.init_n_series_ > 1:
 385                    mts_input = ts.create_train_inputs(X_values, lag)
 386                else:
 387                    mts_input = ts.create_train_inputs(
 388                        X_values.reshape(-1, 1), lag
 389                    )
 390
 391                # Cook training set and fit model
 392                dummy_y, scaled_Z = self.cook_training_set(
 393                    y=np.ones(mts_input[0].shape[0]), X=mts_input[1]
 394                )
 395                residuals_ = []
 396
 397                for i in range(self.init_n_series_):
 398                    y_mean = np.mean(mts_input[0][:, i])
 399                    centered_y_i = mts_input[0][:, i] - y_mean
 400                    self.obj.fit(X=scaled_Z, y=centered_y_i)
 401                    residuals_.append(
 402                        (centered_y_i - self.obj.predict(scaled_Z)).tolist()
 403                    )
 404
 405                self.residuals_ = np.asarray(residuals_).T
 406                ic = self._compute_information_criterion(
 407                    curr_lags=lag, criterion=self.lags
 408                )
 409
 410                if self.verbose:
 411                    print(f"Trying lags={lag}, {self.lags}={ic:.2f}")
 412
 413                if ic < best_ic:
 414                    best_ic = ic
 415                    best_lags = lag
 416
 417            if self.verbose:
 418                print(
 419                    f"\nSelected {best_lags} lags with {self.lags}={best_ic:.2f}"
 420                )
 421
 422            self.lags = best_lags
 423
 424        self.input_dates = None
 425        self.df_ = None
 426
 427        if isinstance(X, pd.DataFrame) is False:
 428            # input data set is a numpy array
 429            if xreg is None:
 430                X = pd.DataFrame(X)
 431                self.series_names = [
 432                    "series" + str(i) for i in range(X.shape[1])
 433                ]
 434            else:
 435                # xreg is not None
 436                X = mo.cbind(X, xreg)
 437                self.xreg_ = xreg
 438
 439        else:  # input data set is a DataFrame with column names
 440            X_index = None
 441            if X.index is not None:
 442                X_index = X.index
 443            if xreg is None:
 444                X = copy.deepcopy(mo.convert_df_to_numeric(X))
 445            else:
 446                X = copy.deepcopy(mo.cbind(mo.convert_df_to_numeric(X), xreg))
 447                self.xreg_ = xreg
 448            if X_index is not None:
 449                X.index = X_index
 450            self.series_names = X.columns.tolist()
 451
 452        if isinstance(X, pd.DataFrame):
 453            if self.df_ is None:
 454                self.df_ = X
 455                X = X.values
 456            else:
 457                input_dates_prev = pd.DatetimeIndex(self.df_.index.values)
 458                frequency = pd.infer_freq(input_dates_prev)
 459                self.df_ = pd.concat([self.df_, X], axis=0)
 460                self.input_dates = pd.date_range(
 461                    start=input_dates_prev[0],
 462                    periods=len(input_dates_prev) + X.shape[0],
 463                    freq=frequency,
 464                ).values.tolist()
 465                self.df_.index = self.input_dates
 466                X = self.df_.values
 467            self.df_.columns = self.series_names
 468        else:
 469            if self.df_ is None:
 470                self.df_ = pd.DataFrame(X, columns=self.series_names)
 471            else:
 472                self.df_ = pd.concat(
 473                    [self.df_, pd.DataFrame(X, columns=self.series_names)],
 474                    axis=0,
 475                )
 476
 477        self.input_dates = ts.compute_input_dates(self.df_)
 478
 479        try:
 480            # multivariate time series
 481            n, p = X.shape
 482        except:
 483            # univariate time series
 484            n = X.shape[0]
 485            p = 1
 486        self.n_obs_ = n
 487
 488        rep_1_n = np.repeat(1, n)
 489
 490        self.y_ = None
 491        self.X_ = None
 492        self.n_series = p
 493        self.fit_objs_.clear()
 494        self.y_means_.clear()
 495        residuals_ = []
 496        self.residuals_ = None
 497        self.residuals_sims_ = None
 498        self.kde_ = None
 499        self.sims_ = None
 500        self.scaled_Z_ = None
 501        self.centered_y_is_ = []
 502
 503        if self.init_n_series_ > 1:
 504            # multivariate time series
 505            mts_input = ts.create_train_inputs(X[::-1], self.lags)
 506        else:
 507            # univariate time series
 508            mts_input = ts.create_train_inputs(
 509                X.reshape(-1, 1)[::-1], self.lags
 510            )
 511
 512        self.y_ = mts_input[0]
 513
 514        self.X_ = mts_input[1]
 515
 516        dummy_y, scaled_Z = self.cook_training_set(y=rep_1_n, X=self.X_)
 517
 518        self.scaled_Z_ = scaled_Z
 519
 520        # loop on all the time series and adjust self.obj.fit
 521        if self.verbose > 0:
 522            print(
 523                f"\n Adjusting {type(self.obj).__name__} to multivariate time series... \n"
 524            )
 525
 526        if self.show_progress is True:
 527            iterator = tqdm(range(self.init_n_series_))
 528        else:
 529            iterator = range(self.init_n_series_)
 530
 531        if self.type_pi in (
 532            "gaussian",
 533            "kde",
 534            "bootstrap",
 535            "block-bootstrap",
 536        ) or self.type_pi.startswith("vine"):
 537            for i in iterator:
 538                y_mean = np.mean(self.y_[:, i])
 539                self.y_means_[i] = y_mean
 540                centered_y_i = self.y_[:, i] - y_mean
 541                self.centered_y_is_.append(centered_y_i)
 542                self.obj.fit(X=scaled_Z, y=centered_y_i)
 543                self.fit_objs_[i] = deepcopy(self.obj)
 544                residuals_.append(
 545                    (
 546                        centered_y_i - self.fit_objs_[i].predict(scaled_Z)
 547                    ).tolist()
 548                )
 549
 550        if self.type_pi == "quantile":
 551            for i in iterator:
 552                y_mean = np.mean(self.y_[:, i])
 553                self.y_means_[i] = y_mean
 554                centered_y_i = self.y_[:, i] - y_mean
 555                self.centered_y_is_.append(centered_y_i)
 556                self.obj.fit(X=scaled_Z, y=centered_y_i)
 557                self.fit_objs_[i] = deepcopy(self.obj)
 558
 559        if self.type_pi.startswith("scp"):
 560            # split conformal prediction
 561            for i in iterator:
 562                n_y = self.y_.shape[0]
 563                n_y_half = n_y // 2
 564                first_half_idx = range(0, n_y_half)
 565                second_half_idx = range(n_y_half, n_y)
 566                y_mean_temp = np.mean(self.y_[first_half_idx, i])
 567                centered_y_i_temp = self.y_[first_half_idx, i] - y_mean_temp
 568                self.obj.fit(X=scaled_Z[first_half_idx, :], y=centered_y_i_temp)
 569                # calibrated residuals actually
 570                residuals_.append(
 571                    (
 572                        self.y_[second_half_idx, i]
 573                        - (
 574                            y_mean_temp
 575                            + self.obj.predict(scaled_Z[second_half_idx, :])
 576                        )
 577                    ).tolist()
 578                )
 579                # fit on the second half
 580                y_mean = np.mean(self.y_[second_half_idx, i])
 581                self.y_means_[i] = y_mean
 582                centered_y_i = self.y_[second_half_idx, i] - y_mean
 583                self.obj.fit(X=scaled_Z[second_half_idx, :], y=centered_y_i)
 584                self.fit_objs_[i] = deepcopy(self.obj)
 585
 586        self.residuals_ = np.asarray(residuals_).T
 587
 588        if self.type_pi == "gaussian":
 589            self.gaussian_preds_std_ = np.std(self.residuals_, axis=0)
 590
 591        if self.type_pi.startswith("scp2"):
 592            # Calculate mean and standard deviation for each column
 593            data_mean = np.mean(self.residuals_, axis=0)
 594            self.residuals_std_dev_ = np.std(self.residuals_, axis=0)
 595            # Center and scale the array using broadcasting
 596            self.residuals_ = (
 597                self.residuals_ - data_mean[np.newaxis, :]
 598            ) / self.residuals_std_dev_[np.newaxis, :]
 599
 600        if self.replications != None and "kde" in self.type_pi:
 601            if self.verbose > 0:
 602                print(f"\n Simulate residuals using {self.kernel} kernel... \n")
 603            assert self.kernel in (
 604                "gaussian",
 605                "tophat",
 606            ), "currently, 'kernel' must be either 'gaussian' or 'tophat'"
 607            kernel_bandwidths = {"bandwidth": np.logspace(-6, 6, 150)}
 608            grid = GridSearchCV(
 609                KernelDensity(kernel=self.kernel, **kwargs),
 610                param_grid=kernel_bandwidths,
 611            )
 612            grid.fit(self.residuals_)
 613
 614            if self.verbose > 0:
 615                print(
 616                    f"\n Best parameters for {self.kernel} kernel: {grid.best_params_} \n"
 617                )
 618
 619            self.kde_ = grid.best_estimator_
 620
 621        return self
 622
 623    def partial_fit(self, X, xreg=None, **kwargs):
 624        """partial_fit MTS model to training data X, with optional regressors xreg
 625
 626        Parameters:
 627
 628        X: {array-like}, shape = [n_samples, n_features]
 629            Training time series, where n_samples is the number
 630            of samples and n_features is the number of features;
 631            X must be in increasing order (most recent observations last)
 632
 633        xreg: {array-like}, shape = [n_samples, n_features_xreg]
 634            Additional (external) regressors to be passed to self.obj
 635            xreg must be in 'increasing' order (most recent observations last)
 636
 637        **kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity)
 638
 639        Returns:
 640
 641        self: object
 642        """
 643        try:
 644            self.init_n_series_ = X.shape[1]
 645        except IndexError as e:
 646            self.init_n_series_ = 1
 647
 648        # Automatic lag selection if requested
 649        if isinstance(self.lags, str):
 650            max_lags = min(25, X.shape[0] // 4)
 651            best_ic = float("inf")
 652            best_lags = 1
 653
 654            if self.verbose:
 655                print(
 656                    f"\nSelecting optimal number of lags using {self.lags}..."
 657                )
 658                iterator = tqdm(range(1, max_lags + 1))
 659            else:
 660                iterator = range(1, max_lags + 1)
 661
 662            for lag in iterator:
 663                # Convert DataFrame to numpy array before reversing
 664                if isinstance(X, pd.DataFrame):
 665                    X_values = X.values[::-1]
 666                else:
 667                    X_values = X[::-1]
 668
 669                # Try current lag value
 670                if self.init_n_series_ > 1:
 671                    mts_input = ts.create_train_inputs(X_values, lag)
 672                else:
 673                    mts_input = ts.create_train_inputs(
 674                        X_values.reshape(-1, 1), lag
 675                    )
 676
 677                # Cook training set and partial_fit model
 678                dummy_y, scaled_Z = self.cook_training_set(
 679                    y=np.ones(mts_input[0].shape[0]), X=mts_input[1]
 680                )
 681                residuals_ = []
 682
 683                for i in range(self.init_n_series_):
 684                    y_mean = np.mean(mts_input[0][:, i])
 685                    centered_y_i = mts_input[0][:, i] - y_mean
 686                    self.obj.partial_fit(X=scaled_Z, y=centered_y_i)
 687                    residuals_.append(
 688                        (centered_y_i - self.obj.predict(scaled_Z)).tolist()
 689                    )
 690
 691                self.residuals_ = np.asarray(residuals_).T
 692                ic = self._compute_information_criterion(
 693                    curr_lags=lag, criterion=self.lags
 694                )
 695
 696                if self.verbose:
 697                    print(f"Trying lags={lag}, {self.lags}={ic:.2f}")
 698
 699                if ic < best_ic:
 700                    best_ic = ic
 701                    best_lags = lag
 702
 703            if self.verbose:
 704                print(
 705                    f"\nSelected {best_lags} lags with {self.lags}={best_ic:.2f}"
 706                )
 707
 708            self.lags = best_lags
 709
 710        self.input_dates = None
 711        self.df_ = None
 712
 713        if isinstance(X, pd.DataFrame) is False:
 714            # input data set is a numpy array
 715            if xreg is None:
 716                X = pd.DataFrame(X)
 717                if len(X.shape) > 1:
 718                    self.series_names = [
 719                        "series" + str(i) for i in range(X.shape[1])
 720                    ]
 721                else:
 722                    self.series_names = ["series0"]
 723            else:
 724                # xreg is not None
 725                X = mo.cbind(X, xreg)
 726                self.xreg_ = xreg
 727
 728        else:  # input data set is a DataFrame with column names
 729            X_index = None
 730            if X.index is not None:
 731                X_index = X.index
 732            if xreg is None:
 733                X = copy.deepcopy(mo.convert_df_to_numeric(X))
 734            else:
 735                X = copy.deepcopy(mo.cbind(mo.convert_df_to_numeric(X), xreg))
 736                self.xreg_ = xreg
 737            if X_index is not None:
 738                X.index = X_index
 739            self.series_names = X.columns.tolist()
 740
 741        if isinstance(X, pd.DataFrame):
 742            if self.df_ is None:
 743                self.df_ = X
 744                X = X.values
 745            else:
 746                input_dates_prev = pd.DatetimeIndex(self.df_.index.values)
 747                frequency = pd.infer_freq(input_dates_prev)
 748                self.df_ = pd.concat([self.df_, X], axis=0)
 749                self.input_dates = pd.date_range(
 750                    start=input_dates_prev[0],
 751                    periods=len(input_dates_prev) + X.shape[0],
 752                    freq=frequency,
 753                ).values.tolist()
 754                self.df_.index = self.input_dates
 755                X = self.df_.values
 756            self.df_.columns = self.series_names
 757        else:
 758            if self.df_ is None:
 759                self.df_ = pd.DataFrame(X, columns=self.series_names)
 760            else:
 761                self.df_ = pd.concat(
 762                    [self.df_, pd.DataFrame(X, columns=self.series_names)],
 763                    axis=0,
 764                )
 765
 766        self.input_dates = ts.compute_input_dates(self.df_)
 767
 768        try:
 769            # multivariate time series
 770            n, p = X.shape
 771        except:
 772            # univariate time series
 773            n = X.shape[0]
 774            p = 1
 775        self.n_obs_ = n
 776
 777        rep_1_n = np.repeat(1, n)
 778
 779        self.y_ = None
 780        self.X_ = None
 781        self.n_series = p
 782        self.fit_objs_.clear()
 783        self.y_means_.clear()
 784        residuals_ = []
 785        self.residuals_ = None
 786        self.residuals_sims_ = None
 787        self.kde_ = None
 788        self.sims_ = None
 789        self.scaled_Z_ = None
 790        self.centered_y_is_ = []
 791
 792        if self.init_n_series_ > 1:
 793            # multivariate time series
 794            mts_input = ts.create_train_inputs(X[::-1], self.lags)
 795        else:
 796            # univariate time series
 797            mts_input = ts.create_train_inputs(
 798                X.reshape(-1, 1)[::-1], self.lags
 799            )
 800
 801        self.y_ = mts_input[0]
 802
 803        self.X_ = mts_input[1]
 804
 805        dummy_y, scaled_Z = self.cook_training_set(y=rep_1_n, X=self.X_)
 806
 807        self.scaled_Z_ = scaled_Z
 808
 809        # loop on all the time series and adjust self.obj.partial_fit
 810        if self.verbose > 0:
 811            print(
 812                f"\n Adjusting {type(self.obj).__name__} to multivariate time series... \n"
 813            )
 814
 815        if self.show_progress is True:
 816            iterator = tqdm(range(self.init_n_series_))
 817        else:
 818            iterator = range(self.init_n_series_)
 819
 820        if self.type_pi in (
 821            "gaussian",
 822            "kde",
 823            "bootstrap",
 824            "block-bootstrap",
 825        ) or self.type_pi.startswith("vine"):
 826            for i in iterator:
 827                y_mean = np.mean(self.y_[:, i])
 828                self.y_means_[i] = y_mean
 829                centered_y_i = self.y_[:, i] - y_mean
 830                self.centered_y_is_.append(centered_y_i)
 831                self.obj.partial_fit(X=scaled_Z, y=centered_y_i)
 832                self.fit_objs_[i] = deepcopy(self.obj)
 833                residuals_.append(
 834                    (
 835                        centered_y_i - self.fit_objs_[i].predict(scaled_Z)
 836                    ).tolist()
 837                )
 838
 839        if self.type_pi == "quantile":
 840            for i in iterator:
 841                y_mean = np.mean(self.y_[:, i])
 842                self.y_means_[i] = y_mean
 843                centered_y_i = self.y_[:, i] - y_mean
 844                self.centered_y_is_.append(centered_y_i)
 845                self.obj.partial_fit(X=scaled_Z, y=centered_y_i)
 846                self.fit_objs_[i] = deepcopy(self.obj)
 847
 848        if self.type_pi.startswith("scp"):
 849            # split conformal prediction
 850            for i in iterator:
 851                n_y = self.y_.shape[0]
 852                n_y_half = n_y // 2
 853                first_half_idx = range(0, n_y_half)
 854                second_half_idx = range(n_y_half, n_y)
 855                y_mean_temp = np.mean(self.y_[first_half_idx, i])
 856                centered_y_i_temp = self.y_[first_half_idx, i] - y_mean_temp
 857                self.obj.partial_fit(
 858                    X=scaled_Z[first_half_idx, :], y=centered_y_i_temp
 859                )
 860                # calibrated residuals actually
 861                residuals_.append(
 862                    (
 863                        self.y_[second_half_idx, i]
 864                        - (
 865                            y_mean_temp
 866                            + self.obj.predict(scaled_Z[second_half_idx, :])
 867                        )
 868                    ).tolist()
 869                )
 870                # partial_fit on the second half
 871                y_mean = np.mean(self.y_[second_half_idx, i])
 872                self.y_means_[i] = y_mean
 873                centered_y_i = self.y_[second_half_idx, i] - y_mean
 874                self.obj.partial_fit(
 875                    X=scaled_Z[second_half_idx, :], y=centered_y_i
 876                )
 877                self.fit_objs_[i] = deepcopy(self.obj)
 878
 879        self.residuals_ = np.asarray(residuals_).T
 880
 881        if self.type_pi == "gaussian":
 882            self.gaussian_preds_std_ = np.std(self.residuals_, axis=0)
 883
 884        if self.type_pi.startswith("scp2"):
 885            # Calculate mean and standard deviation for each column
 886            data_mean = np.mean(self.residuals_, axis=0)
 887            self.residuals_std_dev_ = np.std(self.residuals_, axis=0)
 888            # Center and scale the array using broadcasting
 889            self.residuals_ = (
 890                self.residuals_ - data_mean[np.newaxis, :]
 891            ) / self.residuals_std_dev_[np.newaxis, :]
 892
 893        if self.replications != None and "kde" in self.type_pi:
 894            if self.verbose > 0:
 895                print(f"\n Simulate residuals using {self.kernel} kernel... \n")
 896            assert self.kernel in (
 897                "gaussian",
 898                "tophat",
 899            ), "currently, 'kernel' must be either 'gaussian' or 'tophat'"
 900            kernel_bandwidths = {"bandwidth": np.logspace(-6, 6, 150)}
 901            grid = GridSearchCV(
 902                KernelDensity(kernel=self.kernel, **kwargs),
 903                param_grid=kernel_bandwidths,
 904            )
 905            grid.fit(self.residuals_)
 906
 907            if self.verbose > 0:
 908                print(
 909                    f"\n Best parameters for {self.kernel} kernel: {grid.best_params_} \n"
 910                )
 911
 912            self.kde_ = grid.best_estimator_
 913
 914        return self
 915
 916    def _predict_quantiles(self, h, quantiles, **kwargs):
 917        """Predict arbitrary quantiles from simulated paths."""
 918        # Ensure output dates are set
 919        self.output_dates_, _ = ts.compute_output_dates(self.df_, h)
 920
 921        # Trigger full prediction to generate self.sims_
 922        if not hasattr(self, "sims_") or self.sims_ is None:
 923            _ = self.predict(h=h, level=95, **kwargs)  # Any level triggers sim
 924
 925        result_dict = {}
 926
 927        # Stack simulations: (R, h, n_series)
 928        sims_array = np.stack([sim.values for sim in self.sims_], axis=0)
 929
 930        # Compute quantiles over replication axis
 931        q_values = np.quantile(
 932            sims_array, quantiles, axis=0
 933        )  # (n_q, h, n_series)
 934
 935        for i, q in enumerate(quantiles):
 936            # Clean label: 0.05 → "05", 0.1 → "10", 0.95 → "95"
 937            q_label = (
 938                f"{int(q * 100):02d}"
 939                if (q * 100).is_integer()
 940                else f"{q:.3f}".replace(".", "_")
 941            )
 942            for series_id in range(self.init_n_series_):
 943                series_name = self.series_names[series_id]
 944                col_name = f"quantile_{q_label}_{series_name}"
 945                result_dict[col_name] = q_values[i, :, series_id]
 946
 947        df_return_quantiles = pd.DataFrame(
 948            result_dict, index=self.output_dates_
 949        )
 950
 951        return df_return_quantiles
 952
 953    def predict(self, h=5, level=95, quantiles=None, **kwargs):
 954        """Forecast all the time series, h steps ahead"""
 955
 956        if quantiles is not None:
 957            # Validate
 958            quantiles = np.asarray(quantiles)
 959            if not ((quantiles > 0) & (quantiles < 1)).all():
 960                raise ValueError("quantiles must be between 0 and 1.")
 961            # Delegate to dedicated method
 962            return self._predict_quantiles(h=h, quantiles=quantiles, **kwargs)
 963
 964        if isinstance(level, list) or isinstance(level, np.ndarray):
 965            # Store results
 966            result_dict = {}
 967            # Loop through alphas and calculate lower/upper for each alpha level
 968            # E.g [0.5, 2.5, 5, 16.5, 25, 50]
 969            for lev in level:
 970                # Get the forecast for this alpha
 971                res = self.predict(h=h, level=lev, **kwargs)
 972                # Adjust index and collect lower/upper bounds
 973                res.lower.index = pd.to_datetime(res.lower.index)
 974                res.upper.index = pd.to_datetime(res.upper.index)
 975                # Loop over each time series (multivariate) and flatten results
 976                if isinstance(res.lower, pd.DataFrame):
 977                    for (
 978                        series
 979                    ) in (
 980                        res.lower.columns
 981                    ):  # Assumes 'lower' and 'upper' have multiple series
 982                        result_dict[f"lower_{lev}_{series}"] = (
 983                            res.lower[series].to_numpy().flatten()
 984                        )
 985                        result_dict[f"upper_{lev}_{series}"] = (
 986                            res.upper[series].to_numpy().flatten()
 987                        )
 988                else:
 989                    for series_id in range(
 990                        self.n_series
 991                    ):  # Assumes 'lower' and 'upper' have multiple series
 992                        result_dict[f"lower_{lev}_{series_id}"] = (
 993                            res.lower[series_id, :].to_numpy().flatten()
 994                        )
 995                        result_dict[f"upper_{lev}_{series_id}"] = (
 996                            res.upper[series_id, :].to_numpy().flatten()
 997                        )
 998            return pd.DataFrame(result_dict, index=self.output_dates_)
 999
1000        # only one prediction interval
1001        self.output_dates_, frequency = ts.compute_output_dates(self.df_, h)
1002
1003        self.level_ = level
1004
1005        self.return_std_ = False  # do not remove (/!\)
1006
1007        self.mean_ = None  # do not remove (/!\)
1008
1009        self.mean_ = deepcopy(self.y_)  # do not remove (/!\)
1010
1011        self.lower_ = None  # do not remove (/!\)
1012
1013        self.upper_ = None  # do not remove (/!\)
1014
1015        self.sims_ = None  # do not remove (/!\)
1016
1017        y_means_ = np.asarray(
1018            [self.y_means_[i] for i in range(self.init_n_series_)]
1019        )
1020
1021        n_features = self.init_n_series_ * self.lags
1022
1023        self.alpha_ = 100 - level
1024
1025        pi_multiplier = norm.ppf(1 - self.alpha_ / 200)
1026
1027        if "return_std" in kwargs:  # bayesian forecasting
1028            self.return_std_ = True
1029            self.preds_std_ = []
1030            DescribeResult = namedtuple(
1031                "DescribeResult", ("mean", "lower", "upper")
1032            )  # to be updated
1033
1034        if "return_pi" in kwargs:  # split conformal, without simulation
1035            mean_pi_ = []
1036            lower_pi_ = []
1037            upper_pi_ = []
1038            median_pi_ = []
1039            DescribeResult = namedtuple(
1040                "DescribeResult", ("mean", "lower", "upper")
1041            )  # to be updated
1042
1043        if self.kde_ != None and "kde" in self.type_pi:  # kde
1044            target_cols = self.df_.columns[
1045                : self.init_n_series_
1046            ]  # Get target column names
1047            if self.verbose == 1:
1048                self.residuals_sims_ = tuple(
1049                    self.kde_.sample(
1050                        n_samples=h, random_state=self.seed + 100 * i
1051                    )  # Keep full sample
1052                    for i in tqdm(range(self.replications))
1053                )
1054            elif self.verbose == 0:
1055                self.residuals_sims_ = tuple(
1056                    self.kde_.sample(
1057                        n_samples=h, random_state=self.seed + 100 * i
1058                    )  # Keep full sample
1059                    for i in range(self.replications)
1060                )
1061
1062            # Convert to DataFrames after sampling
1063            self.residuals_sims_ = tuple(
1064                pd.DataFrame(
1065                    sim,  # Keep all columns
1066                    columns=target_cols,  # Use original target column names
1067                    index=self.output_dates_,
1068                )
1069                for sim in self.residuals_sims_
1070            )
1071
1072        if self.type_pi in ("bootstrap", "scp-bootstrap", "scp2-bootstrap"):
1073            assert self.replications is not None and isinstance(
1074                self.replications, int
1075            ), "'replications' must be provided and be an integer"
1076            if self.verbose == 1:
1077                self.residuals_sims_ = tuple(
1078                    ts.bootstrap(
1079                        self.residuals_,
1080                        h=h,
1081                        block_size=None,
1082                        seed=self.seed + 100 * i,
1083                    )
1084                    for i in tqdm(range(self.replications))
1085                )
1086            elif self.verbose == 0:
1087                self.residuals_sims_ = tuple(
1088                    ts.bootstrap(
1089                        self.residuals_,
1090                        h=h,
1091                        block_size=None,
1092                        seed=self.seed + 100 * i,
1093                    )
1094                    for i in range(self.replications)
1095                )
1096
1097        if self.type_pi in (
1098            "block-bootstrap",
1099            "scp-block-bootstrap",
1100            "scp2-block-bootstrap",
1101        ):
1102            if self.block_size is None:
1103                self.block_size = int(
1104                    np.ceil(3.15 * (self.residuals_.shape[0] ** (1 / 3)))
1105                )
1106
1107            assert self.replications is not None and isinstance(
1108                self.replications, int
1109            ), "'replications' must be provided and be an integer"
1110            if self.verbose == 1:
1111                self.residuals_sims_ = tuple(
1112                    ts.bootstrap(
1113                        self.residuals_,
1114                        h=h,
1115                        block_size=self.block_size,
1116                        seed=self.seed + 100 * i,
1117                    )
1118                    for i in tqdm(range(self.replications))
1119                )
1120            elif self.verbose == 0:
1121                self.residuals_sims_ = tuple(
1122                    ts.bootstrap(
1123                        self.residuals_,
1124                        h=h,
1125                        block_size=self.block_size,
1126                        seed=self.seed + 100 * i,
1127                    )
1128                    for i in range(self.replications)
1129                )
1130
1131        if "vine" in self.type_pi:
1132            if self.verbose == 1:
1133                self.residuals_sims_ = tuple(
1134                    vinecopula_sample(
1135                        x=self.residuals_,
1136                        n_samples=h,
1137                        method=self.type_pi,
1138                        random_state=self.seed + 100 * i,
1139                    )
1140                    for i in tqdm(range(self.replications))
1141                )
1142            elif self.verbose == 0:
1143                self.residuals_sims_ = tuple(
1144                    vinecopula_sample(
1145                        x=self.residuals_,
1146                        n_samples=h,
1147                        method=self.type_pi,
1148                        random_state=self.seed + 100 * i,
1149                    )
1150                    for i in range(self.replications)
1151                )
1152
1153        mean_ = deepcopy(self.mean_)
1154
1155        for i in range(h):
1156            new_obs = ts.reformat_response(mean_, self.lags)
1157            new_X = new_obs.reshape(1, -1)
1158            cooked_new_X = self.cook_test_set(new_X, **kwargs)
1159
1160            if "return_std" in kwargs:
1161                self.preds_std_.append(
1162                    [
1163                        np.asarray(
1164                            self.fit_objs_[i].predict(
1165                                cooked_new_X, return_std=True
1166                            )[1]
1167                        ).item()
1168                        for i in range(self.n_series)
1169                    ]
1170                )
1171
1172            if "return_pi" in kwargs:
1173                for i in range(self.n_series):
1174                    preds_pi = self.fit_objs_[i].predict(cooked_new_X, **kwargs)
1175                    mean_pi_.append(preds_pi.mean[0])
1176                    lower_pi_.append(preds_pi.lower[0])
1177                    upper_pi_.append(preds_pi.upper[0])
1178
1179            if self.type_pi != "quantile":
1180                predicted_cooked_new_X = np.asarray(
1181                    [
1182                        np.asarray(
1183                            self.fit_objs_[i].predict(cooked_new_X)
1184                        ).item()
1185                        for i in range(self.init_n_series_)
1186                    ]
1187                )
1188            else:
1189                predicted_cooked_new_X = np.asarray(
1190                    [
1191                        np.asarray(
1192                            self.fit_objs_[i]
1193                            .predict(cooked_new_X, return_pi=True)
1194                            .upper
1195                        ).item()
1196                        for i in range(self.init_n_series_)
1197                    ]
1198                )
1199
1200            preds = np.asarray(y_means_ + predicted_cooked_new_X)
1201
1202            # Create full row with both predictions and external regressors
1203            if self.xreg_ is not None and "xreg" in kwargs:
1204                next_xreg = kwargs["xreg"].iloc[i: i + 1].values.flatten()
1205                full_row = np.concatenate([preds, next_xreg])
1206            else:
1207                full_row = preds
1208
1209            # Create a new row with same number of columns as mean_
1210            new_row = np.zeros((1, mean_.shape[1]))
1211            new_row[0, : full_row.shape[0]] = full_row
1212
1213            # Maintain the full dimensionality by using vstack instead of rbind
1214            mean_ = np.vstack([new_row, mean_[:-1]])
1215
1216        # Final output should only include the target columns
1217        self.mean_ = pd.DataFrame(
1218            mean_[0: min(h, self.n_obs_ - self.lags), : self.init_n_series_][
1219                ::-1
1220            ],
1221            columns=self.df_.columns[: self.init_n_series_],
1222            index=self.output_dates_,
1223        )
1224
1225        # function's return ----------------------------------------------------------------------
1226        if (
1227            (("return_std" not in kwargs) and ("return_pi" not in kwargs))
1228            and (self.type_pi not in ("gaussian", "scp"))
1229        ) or ("vine" in self.type_pi):
1230            if self.replications is None:
1231                return self.mean_.iloc[:, : self.init_n_series_]
1232
1233            # if "return_std" not in kwargs and self.replications is not None
1234            meanf = []
1235            medianf = []
1236            lower = []
1237            upper = []
1238
1239            if "scp2" in self.type_pi:
1240                if self.verbose == 1:
1241                    self.sims_ = tuple(
1242                        (
1243                            self.mean_
1244                            + self.residuals_sims_[i]
1245                            * self.residuals_std_dev_[np.newaxis, :]
1246                            for i in tqdm(range(self.replications))
1247                        )
1248                    )
1249                elif self.verbose == 0:
1250                    self.sims_ = tuple(
1251                        (
1252                            self.mean_
1253                            + self.residuals_sims_[i]
1254                            * self.residuals_std_dev_[np.newaxis, :]
1255                            for i in range(self.replications)
1256                        )
1257                    )
1258            else:
1259                if self.verbose == 1:
1260                    self.sims_ = tuple(
1261                        (
1262                            self.mean_ + self.residuals_sims_[i]
1263                            for i in tqdm(range(self.replications))
1264                        )
1265                    )
1266                elif self.verbose == 0:
1267                    self.sims_ = tuple(
1268                        (
1269                            self.mean_ + self.residuals_sims_[i]
1270                            for i in range(self.replications)
1271                        )
1272                    )
1273
1274            DescribeResult = namedtuple(
1275                "DescribeResult", ("mean", "sims", "lower", "upper")
1276            )
1277            for ix in range(self.init_n_series_):
1278                sims_ix = getsims(self.sims_, ix)
1279                if self.agg == "mean":
1280                    meanf.append(np.mean(sims_ix, axis=1))
1281                else:
1282                    medianf.append(np.median(sims_ix, axis=1))
1283                lower.append(np.quantile(sims_ix, q=self.alpha_ / 200, axis=1))
1284                upper.append(
1285                    np.quantile(sims_ix, q=1 - self.alpha_ / 200, axis=1)
1286                )
1287            self.mean_ = pd.DataFrame(
1288                np.asarray(meanf).T,
1289                columns=self.series_names[
1290                    : self.init_n_series_
1291                ],  # self.df_.columns,
1292                index=self.output_dates_,
1293            )
1294
1295            self.lower_ = pd.DataFrame(
1296                np.asarray(lower).T,
1297                columns=self.series_names[
1298                    : self.init_n_series_
1299                ],  # self.df_.columns,
1300                index=self.output_dates_,
1301            )
1302
1303            self.upper_ = pd.DataFrame(
1304                np.asarray(upper).T,
1305                columns=self.series_names[
1306                    : self.init_n_series_
1307                ],  # self.df_.columns,
1308                index=self.output_dates_,
1309            )
1310
1311            try:
1312                self.median_ = pd.DataFrame(
1313                    np.asarray(medianf).T,
1314                    columns=self.series_names[
1315                        : self.init_n_series_
1316                    ],  # self.df_.columns,
1317                    index=self.output_dates_,
1318                )
1319            except Exception as e:
1320                pass
1321
1322            return DescribeResult(
1323                self.mean_, self.sims_, self.lower_, self.upper_
1324            )
1325
1326        if (
1327            (("return_std" in kwargs) or ("return_pi" in kwargs))
1328            and (self.type_pi not in ("gaussian", "scp"))
1329        ) or "vine" in self.type_pi:
1330            DescribeResult = namedtuple(
1331                "DescribeResult", ("mean", "lower", "upper")
1332            )
1333
1334            self.mean_ = pd.DataFrame(
1335                np.asarray(self.mean_),
1336                columns=self.series_names,  # self.df_.columns,
1337                index=self.output_dates_,
1338            )
1339
1340            if "return_std" in kwargs:
1341                self.preds_std_ = np.asarray(self.preds_std_)
1342
1343                self.lower_ = pd.DataFrame(
1344                    self.mean_.values - pi_multiplier * self.preds_std_,
1345                    columns=self.series_names,  # self.df_.columns,
1346                    index=self.output_dates_,
1347                )
1348
1349                self.upper_ = pd.DataFrame(
1350                    self.mean_.values + pi_multiplier * self.preds_std_,
1351                    columns=self.series_names,  # self.df_.columns,
1352                    index=self.output_dates_,
1353                )
1354
1355            if "return_pi" in kwargs:
1356                self.lower_ = pd.DataFrame(
1357                    np.asarray(lower_pi_).reshape(h, self.n_series)
1358                    + y_means_[np.newaxis, :],
1359                    columns=self.series_names,  # self.df_.columns,
1360                    index=self.output_dates_,
1361                )
1362
1363                self.upper_ = pd.DataFrame(
1364                    np.asarray(upper_pi_).reshape(h, self.n_series)
1365                    + y_means_[np.newaxis, :],
1366                    columns=self.series_names,  # self.df_.columns,
1367                    index=self.output_dates_,
1368                )
1369
1370            res = DescribeResult(self.mean_, self.lower_, self.upper_)
1371
1372            if self.xreg_ is not None:
1373                if len(self.xreg_.shape) > 1:
1374                    res2 = mx.tuple_map(
1375                        res,
1376                        lambda x: mo.delete_last_columns(
1377                            x, num_columns=self.xreg_.shape[1]
1378                        ),
1379                    )
1380                else:
1381                    res2 = mx.tuple_map(
1382                        res, lambda x: mo.delete_last_columns(x, num_columns=1)
1383                    )
1384                return DescribeResult(res2[0], res2[1], res2[2])
1385
1386            return res
1387
1388        if self.type_pi == "gaussian":
1389            DescribeResult = namedtuple(
1390                "DescribeResult", ("mean", "lower", "upper")
1391            )
1392
1393            self.mean_ = pd.DataFrame(
1394                np.asarray(self.mean_),
1395                columns=self.series_names,  # self.df_.columns,
1396                index=self.output_dates_,
1397            )
1398
1399            # Use Bayesian std if available, otherwise use gaussian residual std
1400            if "return_std" in kwargs and len(self.preds_std_) > 0:
1401                preds_std_to_use = np.asarray(self.preds_std_)
1402            else:
1403                preds_std_to_use = self.gaussian_preds_std_
1404
1405            self.lower_ = pd.DataFrame(
1406                self.mean_.values - pi_multiplier * preds_std_to_use,
1407                columns=self.series_names,  # self.df_.columns,
1408                index=self.output_dates_,
1409            )
1410
1411            self.upper_ = pd.DataFrame(
1412                self.mean_.values + pi_multiplier * preds_std_to_use,
1413                columns=self.series_names,  # self.df_.columns,
1414                index=self.output_dates_,
1415            )
1416
1417            res = DescribeResult(self.mean_, self.lower_, self.upper_)
1418
1419            if self.xreg_ is not None:
1420                if len(self.xreg_.shape) > 1:
1421                    res2 = mx.tuple_map(
1422                        res,
1423                        lambda x: mo.delete_last_columns(
1424                            x, num_columns=self.xreg_.shape[1]
1425                        ),
1426                    )
1427                else:
1428                    res2 = mx.tuple_map(
1429                        res, lambda x: mo.delete_last_columns(x, num_columns=1)
1430                    )
1431                return DescribeResult(res2[0], res2[1], res2[2])
1432
1433            return res
1434
1435        if self.type_pi == "quantile":
1436            DescribeResult = namedtuple("DescribeResult", ("mean"))
1437
1438            self.mean_ = pd.DataFrame(
1439                np.asarray(self.mean_),
1440                columns=self.series_names,  # self.df_.columns,
1441                index=self.output_dates_,
1442            )
1443
1444            res = DescribeResult(self.mean_)
1445
1446            if self.xreg_ is not None:
1447                if len(self.xreg_.shape) > 1:
1448                    res2 = mx.tuple_map(
1449                        res,
1450                        lambda x: mo.delete_last_columns(
1451                            x, num_columns=self.xreg_.shape[1]
1452                        ),
1453                    )
1454                else:
1455                    res2 = mx.tuple_map(
1456                        res, lambda x: mo.delete_last_columns(x, num_columns=1)
1457                    )
1458                return DescribeResult(res2[0])
1459
1460            return res
1461
1462        # After prediction loop, ensure sims only contain target columns
1463        if self.sims_ is not None:
1464            if self.verbose == 1:
1465                self.sims_ = tuple(
1466                    sim[:h,]  # Only keep target columns and h rows
1467                    for sim in tqdm(self.sims_)
1468                )
1469            elif self.verbose == 0:
1470                self.sims_ = tuple(
1471                    sim[:h,]  # Only keep target columns and h rows
1472                    for sim in self.sims_
1473                )
1474
1475            # Convert numpy arrays to DataFrames with proper columns
1476            self.sims_ = tuple(
1477                pd.DataFrame(
1478                    sim,
1479                    columns=self.df_.columns[: self.init_n_series_],
1480                    index=self.output_dates_,
1481                )
1482                for sim in self.sims_
1483            )
1484
1485        if self.type_pi in (
1486            "kde",
1487            "bootstrap",
1488            "block-bootstrap",
1489            "vine-copula",
1490        ):
1491            if self.xreg_ is not None:
1492                # Use getsimsxreg when external regressors are present
1493                target_cols = self.df_.columns[: self.init_n_series_]
1494                self.sims_ = getsimsxreg(
1495                    self.sims_, self.output_dates_, target_cols
1496                )
1497            else:
1498                # Use original getsims for backward compatibility
1499                self.sims_ = getsims(self.sims_)
1500
1501    def _crps_ensemble(self, y_true, simulations, axis=0):
1502        """
1503        Compute the Continuous Ranked Probability Score (CRPS) for an ensemble of simulations.
1504
1505        The CRPS is a measure of the distance between the cumulative distribution
1506        function (CDF) of a forecast and the CDF of the observed value. This method
1507        computes the CRPS in a vectorized form for an ensemble of simulations, efficiently
1508        handling the case where there is only one simulation.
1509
1510        Parameters
1511        ----------
1512        y_true : array_like, shape (n,)
1513            A 1D array of true values (observations).
1514            Each element represents the true value for a given sample.
1515
1516        simulations : array_like, shape (n, R)
1517            A 2D array of simulated values. Each row corresponds to a different sample
1518            and each column corresponds to a different simulation of that sample.
1519
1520        axis : int, optional, default=0
1521            Axis along which to transpose the simulations if needed.
1522            If axis=0, the simulations are transposed to shape (R, n).
1523
1524        Returns
1525        -------
1526        crps : ndarray, shape (n,)
1527            A 1D array of CRPS scores, one for each sample.
1528
1529        Notes
1530        -----
1531        The CRPS score is computed as:
1532
1533        CRPS(y_true, simulations) = E[|X - y|] - 0.5 * E[|X - X'|]
1534
1535        Where:
1536        - `X` is the ensemble of simulations.
1537        - `y` is the true value.
1538        - `X'` is a second independent sample from the ensemble.
1539
1540        The calculation is vectorized to optimize performance for large datasets.
1541
1542        The edge case where `R=1` (only one simulation) is handled by returning
1543        only `term1` (i.e., no ensemble spread).
1544        """
1545        sims = np.asarray(simulations)  # Convert simulations to numpy array
1546        if axis == 0:
1547            sims = sims.T  # Transpose if the axis is 0
1548        n, R = sims.shape  # n = number of samples, R = number of simulations
1549        # Term 1: E|X - y|, average absolute difference between simulations and true value
1550        term1 = np.mean(np.abs(sims - y_true[:, np.newaxis]), axis=1)
1551        # Handle edge case: if R == 1, return term1 (no spread in ensemble)
1552        if R == 1:
1553            return term1
1554        # Term 2: 0.5 * E|X - X'|, using efficient sorted formula
1555        sims_sorted = np.sort(sims, axis=1)  # Sort simulations along each row
1556        # Correct coefficients for efficient calculation
1557        j = np.arange(R)  # 0-indexed positions in the sorted simulations
1558        coefficients = (2 * (j + 1) - R - 1) / (
1559            R * (R - 1)
1560        )  # Efficient coefficient calculation
1561        # Dot product along the second axis (over the simulations)
1562        term2 = np.dot(sims_sorted, coefficients)
1563        # Return CRPS score: term1 - 0.5 * term2
1564        return term1 - 0.5 * term2
1565
1566    def score(
1567        self,
1568        X,
1569        training_index,
1570        testing_index,
1571        scoring=None,
1572        alpha=0.5,
1573        **kwargs,
1574    ):
1575        """Train on training_index, score on testing_index."""
1576
1577        assert (
1578            bool(set(training_index).intersection(set(testing_index))) == False
1579        ), "Non-overlapping 'training_index' and 'testing_index' required"
1580
1581        # Dimensions
1582        try:
1583            # multivariate time series
1584            n, p = X.shape
1585        except:
1586            # univariate time series
1587            n = X.shape[0]
1588            p = 1
1589
1590        # Training and testing sets
1591        if p > 1:
1592            X_train = X[training_index, :]
1593            X_test = X[testing_index, :]
1594        else:
1595            X_train = X[training_index]
1596            X_test = X[testing_index]
1597
1598        # Horizon
1599        h = len(testing_index)
1600        assert (
1601            len(training_index) + h
1602        ) <= n, "Please check lengths of training and testing windows"
1603
1604        # Fit and predict
1605        self.fit(X_train, **kwargs)
1606        preds = self.predict(h=h, **kwargs)
1607
1608        if scoring is None:
1609            scoring = "neg_root_mean_squared_error"
1610
1611        if scoring == "pinball":
1612            # Predict requested quantile
1613            q_pred = self.predict(h=h, quantiles=[alpha], **kwargs)
1614            # Handle multivariate
1615            scores = []
1616            for j in range(p):
1617                series_name = getattr(self, "series_names", [f"Series_{j}"])[j]
1618                q_label = (
1619                    f"{int(alpha * 100):02d}"
1620                    if (alpha * 100).is_integer()
1621                    else f"{alpha:.3f}".replace(".", "_")
1622                )
1623                col = f"quantile_{q_label}_{series_name}"
1624                if col not in q_pred.columns:
1625                    raise ValueError(
1626                        f"Column '{col}' not found in quantile forecast output."
1627                    )
1628                y_true_j = X_test[:, j]
1629                y_pred_j = q_pred[col].values
1630                # Compute pinball loss for this series
1631                loss = mean_pinball_loss(y_true_j, y_pred_j, alpha=alpha)
1632                scores.append(loss)
1633            # Return average over series
1634            return np.mean(scores)
1635
1636        if scoring == "crps":
1637            # Ensure simulations exist
1638            preds = self.predict(h=h, **kwargs)  # triggers self.sims_
1639            # Extract simulations: list of DataFrames → (R, h, p)
1640            sims_vals = np.stack(
1641                [sim.values for sim in self.sims_], axis=0
1642            )  # (R, h, p)
1643            crps_scores = []
1644            for j in range(p):
1645                y_true_j = X_test[:, j]
1646                sims_j = sims_vals[:, :, j]  # (R, h)
1647                crps_j = self._crps_ensemble(np.asarray(y_true_j), sims_j)
1648                crps_scores.append(np.mean(crps_j))  # average over horizon
1649            return np.mean(crps_scores)  # average over series
1650
1651        # check inputs
1652        assert scoring in (
1653            "explained_variance",
1654            "neg_mean_absolute_error",
1655            "neg_mean_squared_error",
1656            "neg_root_mean_squared_error",
1657            "neg_mean_squared_log_error",
1658            "neg_median_absolute_error",
1659            "r2",
1660        ), "'scoring' should be in ('explained_variance', 'neg_mean_absolute_error', \
1661                               'neg_mean_squared_error', 'neg_root_mean_squared_error', 'neg_mean_squared_log_error', \
1662                               'neg_median_absolute_error', 'r2')"
1663
1664        scoring_options = {
1665            "explained_variance": skm2.explained_variance_score,
1666            "neg_mean_absolute_error": skm2.mean_absolute_error,
1667            "neg_mean_squared_error": lambda x, y: np.mean((x - y) ** 2),
1668            "neg_root_mean_squared_error": lambda x, y: np.sqrt(
1669                np.mean((x - y) ** 2)
1670            ),
1671            "neg_mean_squared_log_error": skm2.mean_squared_log_error,
1672            "neg_median_absolute_error": skm2.median_absolute_error,
1673            "r2": skm2.r2_score,
1674        }
1675
1676        return scoring_options[scoring](X_test, preds)
1677
1678    def plot(self, series=None, type_axis="dates", type_plot="pi"):
1679        """Plot time series forecast
1680
1681        Parameters:
1682
1683        series: {integer} or {string}
1684            series index or name
1685
1686        """
1687
1688        assert all(
1689            [
1690                self.mean_ is not None,
1691                self.lower_ is not None,
1692                self.upper_ is not None,
1693                self.output_dates_ is not None,
1694            ]
1695        ), "model forecasting must be obtained first (with predict)"
1696
1697        if series is None:
1698            # assert (
1699            #    self.init_n_series_ == 1
1700            # ), "please specify series index or name (n_series > 1)"
1701            series = 0
1702
1703        if isinstance(series, str):
1704            assert (
1705                series in self.series_names
1706            ), f"series {series} doesn't exist in the input dataset"
1707            series_idx = self.df_.columns.get_loc(series)
1708        else:
1709            assert isinstance(series, int) and (
1710                0 <= series < self.n_series
1711            ), f"check series index (< {self.n_series})"
1712            series_idx = series
1713
1714        y_all = list(self.df_.iloc[:, series_idx]) + list(
1715            self.mean_.iloc[:, series_idx]
1716        )
1717        y_test = list(self.mean_.iloc[:, series_idx])
1718        n_points_all = len(y_all)
1719        n_points_train = self.df_.shape[0]
1720
1721        if type_axis == "numeric":
1722            x_all = [i for i in range(n_points_all)]
1723            x_test = [i for i in range(n_points_train, n_points_all)]
1724
1725        if type_axis == "dates":  # use dates
1726            x_all = np.concatenate(
1727                (self.input_dates.values, self.output_dates_.values), axis=None
1728            )
1729            x_test = self.output_dates_.values
1730
1731        if type_plot == "pi":
1732            fig, ax = plt.subplots()
1733            ax.plot(x_all, y_all, "-")
1734            ax.plot(x_test, y_test, "-", color="orange")
1735            ax.fill_between(
1736                x_test,
1737                self.lower_.iloc[:, series_idx],
1738                self.upper_.iloc[:, series_idx],
1739                alpha=0.2,
1740                color="orange",
1741            )
1742            if self.replications is None:
1743                if self.n_series > 1:
1744                    plt.title(
1745                        f"prediction intervals for {series}",
1746                        loc="left",
1747                        fontsize=12,
1748                        fontweight=0,
1749                        color="black",
1750                    )
1751                else:
1752                    plt.title(
1753                        f"prediction intervals for input time series",
1754                        loc="left",
1755                        fontsize=12,
1756                        fontweight=0,
1757                        color="black",
1758                    )
1759                plt.show()
1760            else:  # self.replications is not None
1761                if self.n_series > 1:
1762                    plt.title(
1763                        f"prediction intervals for {self.replications} simulations of {series}",
1764                        loc="left",
1765                        fontsize=12,
1766                        fontweight=0,
1767                        color="black",
1768                    )
1769                else:
1770                    plt.title(
1771                        f"prediction intervals for {self.replications} simulations of input time series",
1772                        loc="left",
1773                        fontsize=12,
1774                        fontweight=0,
1775                        color="black",
1776                    )
1777                plt.show()
1778
1779        if type_plot == "spaghetti":
1780            palette = plt.get_cmap("Set1")
1781            sims_ix = getsims(self.sims_, series_idx)
1782            plt.plot(x_all, y_all, "-")
1783            for col_ix in range(
1784                sims_ix.shape[1]
1785            ):  # avoid this when there are thousands of simulations
1786                plt.plot(
1787                    x_test,
1788                    sims_ix[:, col_ix],
1789                    "-",
1790                    color=palette(col_ix),
1791                    linewidth=1,
1792                    alpha=0.9,
1793                )
1794            plt.plot(x_all, y_all, "-", color="black")
1795            plt.plot(x_test, y_test, "-", color="blue")
1796            # Add titles
1797            if self.n_series > 1:
1798                plt.title(
1799                    f"{self.replications} simulations of {series}",
1800                    loc="left",
1801                    fontsize=12,
1802                    fontweight=0,
1803                    color="black",
1804                )
1805            else:
1806                plt.title(
1807                    f"{self.replications} simulations of input time series",
1808                    loc="left",
1809                    fontsize=12,
1810                    fontweight=0,
1811                    color="black",
1812                )
1813            plt.xlabel("Time")
1814            plt.ylabel("Values")
1815            # Show the graph
1816            plt.show()
1817
1818    def cross_val_score(
1819        self,
1820        X,
1821        scoring="root_mean_squared_error",
1822        n_jobs=None,
1823        verbose=0,
1824        xreg=None,
1825        initial_window=5,
1826        horizon=3,
1827        fixed_window=False,
1828        show_progress=True,
1829        level=95,
1830        alpha=0.5,
1831        **kwargs,
1832    ):
1833        """Evaluate a score by time series cross-validation.
1834
1835        Parameters:
1836
1837            X: {array-like, sparse matrix} of shape (n_samples, n_features)
1838                The data to fit.
1839
1840            scoring: str or a function
1841                A str in ('root_mean_squared_error', 'mean_squared_error', 'mean_error',
1842                'mean_absolute_error', 'mean_error', 'mean_percentage_error',
1843                'mean_absolute_percentage_error',  'winkler_score', 'coverage')
1844                Or a function defined as 'coverage' and 'winkler_score' in `utils.timeseries`
1845
1846            n_jobs: int, default=None
1847                Number of jobs to run in parallel.
1848
1849            verbose: int, default=0
1850                The verbosity level.
1851
1852            xreg: array-like, optional (default=None)
1853                Additional (external) regressors to be passed to `fit`
1854                xreg must be in 'increasing' order (most recent observations last)
1855
1856            initial_window: int
1857                initial number of consecutive values in each training set sample
1858
1859            horizon: int
1860                number of consecutive values in test set sample
1861
1862            fixed_window: boolean
1863                if False, all training samples start at index 0, and the training
1864                window's size is increasing.
1865                if True, the training window's size is fixed, and the window is
1866                rolling forward
1867
1868            show_progress: boolean
1869                if True, a progress bar is printed
1870
1871            level: int
1872                confidence level for prediction intervals
1873
1874            alpha: float
1875                quantile level for pinball loss if scoring='pinball'
1876                0 < alpha < 1
1877
1878            **kwargs: dict
1879                additional parameters to be passed to `fit` and `predict`
1880
1881        Returns:
1882
1883            A tuple: descriptive statistics or errors and raw errors
1884
1885        """
1886        tscv = TimeSeriesSplit()
1887
1888        tscv_obj = tscv.split(
1889            X,
1890            initial_window=initial_window,
1891            horizon=horizon,
1892            fixed_window=fixed_window,
1893        )
1894
1895        if isinstance(scoring, str):
1896            assert scoring in (
1897                "pinball",
1898                "crps",
1899                "root_mean_squared_error",
1900                "mean_squared_error",
1901                "mean_error",
1902                "mean_absolute_error",
1903                "mean_percentage_error",
1904                "mean_absolute_percentage_error",
1905                "winkler_score",
1906                "coverage",
1907            ), "must have scoring in ('pinball', 'crps', 'root_mean_squared_error', 'mean_squared_error', 'mean_error', 'mean_absolute_error', 'mean_error', 'mean_percentage_error', 'mean_absolute_percentage_error',  'winkler_score', 'coverage')"
1908
1909            def err_func(X_test, X_pred, scoring, alpha=0.5):
1910                if (self.replications is not None) or (
1911                    self.type_pi == "gaussian"
1912                ):  # probabilistic
1913                    if scoring == "pinball":
1914                        # Predict requested quantile
1915                        q_pred = self.predict(
1916                            h=len(X_test), quantiles=[alpha], **kwargs
1917                        )
1918                        # Handle multivariate
1919                        scores = []
1920                        p = X_test.shape[1] if len(X_test.shape) > 1 else 1
1921                        for j in range(p):
1922                            series_name = getattr(
1923                                self, "series_names", [f"Series_{j}"]
1924                            )[j]
1925                            q_label = (
1926                                f"{int(alpha * 100):02d}"
1927                                if (alpha * 100).is_integer()
1928                                else f"{alpha:.3f}".replace(".", "_")
1929                            )
1930                            col = f"quantile_{q_label}_{series_name}"
1931                            if col not in q_pred.columns:
1932                                raise ValueError(
1933                                    f"Column '{col}' not found in quantile forecast output."
1934                                )
1935                            try:
1936                                y_true_j = X_test[:, j] if p > 1 else X_test
1937                            except:
1938                                y_true_j = (
1939                                    X_test.iloc[:, j]
1940                                    if p > 1
1941                                    else X_test.values
1942                                )
1943                            y_pred_j = q_pred[col].values
1944                            # Compute pinball loss for this series
1945                            loss = mean_pinball_loss(
1946                                y_true_j, y_pred_j, alpha=alpha
1947                            )
1948                            scores.append(loss)
1949                        # Return average over series
1950                        return np.mean(scores)
1951                    elif scoring == "crps":
1952                        # Ensure simulations exist
1953                        _ = self.predict(
1954                            h=len(X_test), **kwargs
1955                        )  # triggers self.sims_
1956                        # Extract simulations: list of DataFrames → (R, h, p)
1957                        sims_vals = np.stack(
1958                            [sim.values for sim in self.sims_], axis=0
1959                        )  # (R, h, p)
1960                        crps_scores = []
1961                        p = X_test.shape[1] if len(X_test.shape) > 1 else 1
1962                        for j in range(p):
1963                            try:
1964                                y_true_j = X_test[:, j] if p > 1 else X_test
1965                            except Exception as e:
1966                                y_true_j = (
1967                                    X_test.iloc[:, j]
1968                                    if p > 1
1969                                    else X_test.values
1970                                )
1971                            sims_j = sims_vals[:, :, j]  # (R, h)
1972                            crps_j = self._crps_ensemble(
1973                                np.asarray(y_true_j), sims_j
1974                            )
1975                            crps_scores.append(
1976                                np.mean(crps_j)
1977                            )  # average over horizon
1978                        return np.mean(crps_scores)  # average over series
1979                    if scoring == "winkler_score":
1980                        return winkler_score(X_pred, X_test, level=level)
1981                    elif scoring == "coverage":
1982                        return coverage(X_pred, X_test, level=level)
1983                    else:
1984                        return mean_errors(
1985                            pred=X_pred.mean, actual=X_test, scoring=scoring
1986                        )
1987                else:  # not probabilistic
1988                    return mean_errors(
1989                        pred=X_pred, actual=X_test, scoring=scoring
1990                    )
1991
1992        else:  # isinstance(scoring, str) = False
1993            err_func = scoring
1994
1995        errors = []
1996
1997        train_indices = []
1998
1999        test_indices = []
2000
2001        for train_index, test_index in tscv_obj:
2002            train_indices.append(train_index)
2003            test_indices.append(test_index)
2004
2005        if show_progress is True:
2006            iterator = tqdm(
2007                zip(train_indices, test_indices), total=len(train_indices)
2008            )
2009        else:
2010            iterator = zip(train_indices, test_indices)
2011
2012        for train_index, test_index in iterator:
2013            if verbose == 1:
2014                print(f"TRAIN: {train_index}")
2015                print(f"TEST: {test_index}")
2016
2017            if isinstance(X, pd.DataFrame):
2018                self.fit(X.iloc[train_index, :], xreg=xreg, **kwargs)
2019                X_test = X.iloc[test_index, :]
2020            else:
2021                self.fit(X[train_index, :], xreg=xreg, **kwargs)
2022                X_test = X[test_index, :]
2023            X_pred = self.predict(h=int(len(test_index)), level=level, **kwargs)
2024
2025            errors.append(err_func(X_test, X_pred, scoring, alpha=alpha))
2026
2027        res = np.asarray(errors)
2028
2029        return res, describe(res)
2030
2031    def _compute_information_criterion(self, curr_lags, criterion="AIC"):
2032        """Compute information criterion using existing residuals
2033
2034        Parameters
2035        ----------
2036        curr_lags : int
2037            Current number of lags being evaluated
2038        criterion : str
2039            One of 'AIC', 'AICc', or 'BIC'
2040
2041        Returns
2042        -------
2043        float
2044            Information criterion value or inf if parameters exceed observations
2045        """
2046        # Get dimensions
2047        n_obs = self.residuals_.shape[0]
2048        n_features = int(self.init_n_series_ * curr_lags)
2049        n_hidden = int(self.n_hidden_features)
2050        # Calculate number of parameters
2051        term1 = int(n_features * n_hidden)
2052        term2 = int(n_hidden * self.init_n_series_)
2053        n_params = term1 + term2
2054        # Check if we have enough observations for the number of parameters
2055        if n_obs <= n_params + 1:
2056            return float("inf")  # Return infinity if too many parameters
2057        # Compute RSS using existing residuals
2058        rss = np.sum(self.residuals_**2)
2059        # Compute criterion
2060        if criterion == "AIC":
2061            ic = n_obs * np.log(rss / n_obs) + 2 * n_params
2062        elif criterion == "AICc":
2063            ic = n_obs * np.log(rss / n_obs) + 2 * n_params * (
2064                n_obs / (n_obs - n_params - 1)
2065            )
2066        else:  # BIC
2067            ic = n_obs * np.log(rss / n_obs) + n_params * np.log(n_obs)
2068
2069        return ic

Univariate and multivariate time series (MTS) forecasting with Quasi-Randomized networks

Parameters:

obj: object.
    any object containing a method fit (obj.fit()) and a method predict
    (obj.predict()).

n_hidden_features: int.
    number of nodes in the hidden layer.

activation_name: str.
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'.

a: float.
    hyperparameter for 'prelu' or 'elu' activation function.

nodes_sim: str.
    type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
    'uniform'.

bias: boolean.
    indicates if the hidden layer contains a bias term (True) or not
    (False).

dropout: float.
    regularization parameter; (random) percentage of nodes dropped out
    of the training.

direct_link: boolean.
    indicates if the original predictors are included (True) in model's fitting or not (False).

n_clusters: int.
    number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering).

cluster_encode: bool.
    defines how the variable containing clusters is treated (default is one-hot)
    if `False`, then labels are used, without one-hot encoding.

type_clust: str.
    type of clustering method: currently k-means ('kmeans') or Gaussian
    Mixture Model ('gmm').

type_scaling: a tuple of 3 strings.
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax').

lags: int.
    number of lags used for each time series.
    If string, lags must be one of 'AIC', 'AICc', or 'BIC'.

type_pi: str.
    type of prediction interval; currently:
    - "gaussian": simple, fast, but: assumes stationarity of Gaussian in-sample residuals and independence in the multivariate case
    - "quantile": use model-agnostic quantile regression under the hood
    - "kde": based on Kernel Density Estimation of in-sample residuals
    - "bootstrap": based on independent bootstrap of in-sample residuals
    - "block-bootstrap": based on basic block bootstrap of in-sample residuals
    - "scp-kde": Sequential split conformal prediction with Kernel Density Estimation of calibrated residuals
    - "scp-bootstrap": Sequential split conformal prediction with independent bootstrap of calibrated residuals
    - "scp-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of calibrated residuals
    - "scp2-kde": Sequential split conformal prediction with Kernel Density Estimation of standardized calibrated residuals
    - "scp2-bootstrap": Sequential split conformal prediction with independent bootstrap of standardized calibrated residuals
    - "scp2-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of standardized calibrated residuals
    - based on copulas of in-sample residuals: 'vine-tll', 'vine-bb1', 'vine-bb6', 'vine-bb7', 'vine-bb8', 'vine-clayton',
    'vine-frank', 'vine-gaussian', 'vine-gumbel', 'vine-indep', 'vine-joe', 'vine-student'
    - 'scp-vine-tll', 'scp-vine-bb1', 'scp-vine-bb6', 'scp-vine-bb7', 'scp-vine-bb8', 'scp-vine-clayton',
    'scp-vine-frank', 'scp-vine-gaussian', 'scp-vine-gumbel', 'scp-vine-indep', 'scp-vine-joe', 'scp-vine-student'
    - 'scp2-vine-tll', 'scp2-vine-bb1', 'scp2-vine-bb6', 'scp2-vine-bb7', 'scp2-vine-bb8', 'scp2-vine-clayton',
    'scp2-vine-frank', 'scp2-vine-gaussian', 'scp2-vine-gumbel', 'scp2-vine-indep', 'scp2-vine-joe', 'scp2-vine-student'

level: int.
    level of confidence for `type_pi == 'quantile'` (default is `95`)

block_size: int.
    size of block for 'type_pi' in ("block-bootstrap", "scp-block-bootstrap", "scp2-block-bootstrap").
    Default is round(3.15*(n_residuals^1/3))

replications: int.
    number of replications (if needed, for predictive simulation). Default is 'None'.

kernel: str.
    the kernel to use for residuals density estimation (used for predictive simulation). Currently, either 'gaussian' or 'tophat'.

agg: str.
    either "mean" or "median" for simulation of bootstrap aggregating

seed: int.
    reproducibility seed for nodes_sim=='uniform' or predictive simulation.

backend: str.
    "cpu" or "gpu" or "tpu".

verbose: int.
    0: not printing; 1: printing

show_progress: bool.
    True: progress bar when fitting each series; False: no progress bar when fitting each series

Attributes:

fit_objs_: dict
    objects adjusted to each individual time series

y_: {array-like}
    MTS responses (most recent observations first)

X_: {array-like}
    MTS lags

xreg_: {array-like}
    external regressors

y_means_: dict
    a dictionary of each series mean values

preds_: {array-like}
    successive model predictions

preds_std_: {array-like}
    standard deviation around the predictions for Bayesian base learners (`obj`)

gaussian_preds_std_: {array-like}
    standard deviation around the predictions for `type_pi='gaussian'`

return_std_: boolean
    return uncertainty or not (set in predict)

df_: data frame
    the input data frame, in case a data.frame is provided to `fit`

n_obs_: int
    number of time series observations (number of rows for multivariate)

level_: int
    level of confidence for prediction intervals (default is 95)

residuals_: {array-like}
    in-sample residuals (for `type_pi` not conformal prediction) or calibrated residuals
    (for `type_pi` in conformal prediction)

residuals_sims_: tuple of {array-like}
    simulations of in-sample residuals (for `type_pi` not conformal prediction) or
    calibrated residuals (for `type_pi` in conformal prediction)

kde_: A scikit-learn object, see https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KernelDensity.html

residuals_std_dev_: residuals standard deviation

Examples:

Example 1:

import nnetsauce as ns
import numpy as np
from sklearn import linear_model
np.random.seed(123)

M = np.random.rand(10, 3)
M[:,0] = 10*M[:,0]
M[:,2] = 25*M[:,2]
print(M)

# Adjust Bayesian Ridge
regr4 = linear_model.BayesianRidge()
obj_MTS = ns.MTS(regr4, lags = 1, n_hidden_features=5)
obj_MTS.fit(M)
print(obj_MTS.predict())

# with credible intervals
print(obj_MTS.predict(return_std=True, level=80))

print(obj_MTS.predict(return_std=True, level=95))

Example 2:

import nnetsauce as ns
import numpy as np
from sklearn import linear_model

dataset = {
'date' : ['2001-01-01', '2002-01-01', '2003-01-01', '2004-01-01', '2005-01-01'],
'series1' : [34, 30, 35.6, 33.3, 38.1],
'series2' : [4, 5.5, 5.6, 6.3, 5.1],
'series3' : [100, 100.5, 100.6, 100.2, 100.1]}
df = pd.DataFrame(dataset).set_index('date')
print(df)

# Adjust Bayesian Ridge
regr5 = linear_model.BayesianRidge()
obj_MTS = ns.MTS(regr5, lags = 1, n_hidden_features=5)
obj_MTS.fit(df)
print(obj_MTS.predict())

# with credible intervals
print(obj_MTS.predict(return_std=True, level=80))

print(obj_MTS.predict(return_std=True, level=95))
def fit(self, X, xreg=None, **kwargs):
337    def fit(self, X, xreg=None, **kwargs):
338        """Fit MTS model to training data X, with optional regressors xreg
339
340        Parameters:
341
342        X: {array-like}, shape = [n_samples, n_features]
343            Training time series, where n_samples is the number
344            of samples and n_features is the number of features;
345            X must be in increasing order (most recent observations last)
346
347        xreg: {array-like}, shape = [n_samples, n_features_xreg]
348            Additional (external) regressors to be passed to self.obj
349            xreg must be in 'increasing' order (most recent observations last)
350
351        **kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity)
352
353        Returns:
354
355        self: object
356        """
357        try:
358            self.init_n_series_ = X.shape[1]
359        except IndexError as e:
360            self.init_n_series_ = 1
361
362        # Automatic lag selection if requested
363        if isinstance(self.lags, str):
364            max_lags = min(25, X.shape[0] // 4)
365            best_ic = float("inf")
366            best_lags = 1
367
368            if self.verbose:
369                print(
370                    f"\nSelecting optimal number of lags using {self.lags}..."
371                )
372                iterator = tqdm(range(1, max_lags + 1))
373            else:
374                iterator = range(1, max_lags + 1)
375
376            for lag in iterator:
377                # Convert DataFrame to numpy array before reversing
378                if isinstance(X, pd.DataFrame):
379                    X_values = X.values[::-1]
380                else:
381                    X_values = X[::-1]
382
383                # Try current lag value
384                if self.init_n_series_ > 1:
385                    mts_input = ts.create_train_inputs(X_values, lag)
386                else:
387                    mts_input = ts.create_train_inputs(
388                        X_values.reshape(-1, 1), lag
389                    )
390
391                # Cook training set and fit model
392                dummy_y, scaled_Z = self.cook_training_set(
393                    y=np.ones(mts_input[0].shape[0]), X=mts_input[1]
394                )
395                residuals_ = []
396
397                for i in range(self.init_n_series_):
398                    y_mean = np.mean(mts_input[0][:, i])
399                    centered_y_i = mts_input[0][:, i] - y_mean
400                    self.obj.fit(X=scaled_Z, y=centered_y_i)
401                    residuals_.append(
402                        (centered_y_i - self.obj.predict(scaled_Z)).tolist()
403                    )
404
405                self.residuals_ = np.asarray(residuals_).T
406                ic = self._compute_information_criterion(
407                    curr_lags=lag, criterion=self.lags
408                )
409
410                if self.verbose:
411                    print(f"Trying lags={lag}, {self.lags}={ic:.2f}")
412
413                if ic < best_ic:
414                    best_ic = ic
415                    best_lags = lag
416
417            if self.verbose:
418                print(
419                    f"\nSelected {best_lags} lags with {self.lags}={best_ic:.2f}"
420                )
421
422            self.lags = best_lags
423
424        self.input_dates = None
425        self.df_ = None
426
427        if isinstance(X, pd.DataFrame) is False:
428            # input data set is a numpy array
429            if xreg is None:
430                X = pd.DataFrame(X)
431                self.series_names = [
432                    "series" + str(i) for i in range(X.shape[1])
433                ]
434            else:
435                # xreg is not None
436                X = mo.cbind(X, xreg)
437                self.xreg_ = xreg
438
439        else:  # input data set is a DataFrame with column names
440            X_index = None
441            if X.index is not None:
442                X_index = X.index
443            if xreg is None:
444                X = copy.deepcopy(mo.convert_df_to_numeric(X))
445            else:
446                X = copy.deepcopy(mo.cbind(mo.convert_df_to_numeric(X), xreg))
447                self.xreg_ = xreg
448            if X_index is not None:
449                X.index = X_index
450            self.series_names = X.columns.tolist()
451
452        if isinstance(X, pd.DataFrame):
453            if self.df_ is None:
454                self.df_ = X
455                X = X.values
456            else:
457                input_dates_prev = pd.DatetimeIndex(self.df_.index.values)
458                frequency = pd.infer_freq(input_dates_prev)
459                self.df_ = pd.concat([self.df_, X], axis=0)
460                self.input_dates = pd.date_range(
461                    start=input_dates_prev[0],
462                    periods=len(input_dates_prev) + X.shape[0],
463                    freq=frequency,
464                ).values.tolist()
465                self.df_.index = self.input_dates
466                X = self.df_.values
467            self.df_.columns = self.series_names
468        else:
469            if self.df_ is None:
470                self.df_ = pd.DataFrame(X, columns=self.series_names)
471            else:
472                self.df_ = pd.concat(
473                    [self.df_, pd.DataFrame(X, columns=self.series_names)],
474                    axis=0,
475                )
476
477        self.input_dates = ts.compute_input_dates(self.df_)
478
479        try:
480            # multivariate time series
481            n, p = X.shape
482        except:
483            # univariate time series
484            n = X.shape[0]
485            p = 1
486        self.n_obs_ = n
487
488        rep_1_n = np.repeat(1, n)
489
490        self.y_ = None
491        self.X_ = None
492        self.n_series = p
493        self.fit_objs_.clear()
494        self.y_means_.clear()
495        residuals_ = []
496        self.residuals_ = None
497        self.residuals_sims_ = None
498        self.kde_ = None
499        self.sims_ = None
500        self.scaled_Z_ = None
501        self.centered_y_is_ = []
502
503        if self.init_n_series_ > 1:
504            # multivariate time series
505            mts_input = ts.create_train_inputs(X[::-1], self.lags)
506        else:
507            # univariate time series
508            mts_input = ts.create_train_inputs(
509                X.reshape(-1, 1)[::-1], self.lags
510            )
511
512        self.y_ = mts_input[0]
513
514        self.X_ = mts_input[1]
515
516        dummy_y, scaled_Z = self.cook_training_set(y=rep_1_n, X=self.X_)
517
518        self.scaled_Z_ = scaled_Z
519
520        # loop on all the time series and adjust self.obj.fit
521        if self.verbose > 0:
522            print(
523                f"\n Adjusting {type(self.obj).__name__} to multivariate time series... \n"
524            )
525
526        if self.show_progress is True:
527            iterator = tqdm(range(self.init_n_series_))
528        else:
529            iterator = range(self.init_n_series_)
530
531        if self.type_pi in (
532            "gaussian",
533            "kde",
534            "bootstrap",
535            "block-bootstrap",
536        ) or self.type_pi.startswith("vine"):
537            for i in iterator:
538                y_mean = np.mean(self.y_[:, i])
539                self.y_means_[i] = y_mean
540                centered_y_i = self.y_[:, i] - y_mean
541                self.centered_y_is_.append(centered_y_i)
542                self.obj.fit(X=scaled_Z, y=centered_y_i)
543                self.fit_objs_[i] = deepcopy(self.obj)
544                residuals_.append(
545                    (
546                        centered_y_i - self.fit_objs_[i].predict(scaled_Z)
547                    ).tolist()
548                )
549
550        if self.type_pi == "quantile":
551            for i in iterator:
552                y_mean = np.mean(self.y_[:, i])
553                self.y_means_[i] = y_mean
554                centered_y_i = self.y_[:, i] - y_mean
555                self.centered_y_is_.append(centered_y_i)
556                self.obj.fit(X=scaled_Z, y=centered_y_i)
557                self.fit_objs_[i] = deepcopy(self.obj)
558
559        if self.type_pi.startswith("scp"):
560            # split conformal prediction
561            for i in iterator:
562                n_y = self.y_.shape[0]
563                n_y_half = n_y // 2
564                first_half_idx = range(0, n_y_half)
565                second_half_idx = range(n_y_half, n_y)
566                y_mean_temp = np.mean(self.y_[first_half_idx, i])
567                centered_y_i_temp = self.y_[first_half_idx, i] - y_mean_temp
568                self.obj.fit(X=scaled_Z[first_half_idx, :], y=centered_y_i_temp)
569                # calibrated residuals actually
570                residuals_.append(
571                    (
572                        self.y_[second_half_idx, i]
573                        - (
574                            y_mean_temp
575                            + self.obj.predict(scaled_Z[second_half_idx, :])
576                        )
577                    ).tolist()
578                )
579                # fit on the second half
580                y_mean = np.mean(self.y_[second_half_idx, i])
581                self.y_means_[i] = y_mean
582                centered_y_i = self.y_[second_half_idx, i] - y_mean
583                self.obj.fit(X=scaled_Z[second_half_idx, :], y=centered_y_i)
584                self.fit_objs_[i] = deepcopy(self.obj)
585
586        self.residuals_ = np.asarray(residuals_).T
587
588        if self.type_pi == "gaussian":
589            self.gaussian_preds_std_ = np.std(self.residuals_, axis=0)
590
591        if self.type_pi.startswith("scp2"):
592            # Calculate mean and standard deviation for each column
593            data_mean = np.mean(self.residuals_, axis=0)
594            self.residuals_std_dev_ = np.std(self.residuals_, axis=0)
595            # Center and scale the array using broadcasting
596            self.residuals_ = (
597                self.residuals_ - data_mean[np.newaxis, :]
598            ) / self.residuals_std_dev_[np.newaxis, :]
599
600        if self.replications != None and "kde" in self.type_pi:
601            if self.verbose > 0:
602                print(f"\n Simulate residuals using {self.kernel} kernel... \n")
603            assert self.kernel in (
604                "gaussian",
605                "tophat",
606            ), "currently, 'kernel' must be either 'gaussian' or 'tophat'"
607            kernel_bandwidths = {"bandwidth": np.logspace(-6, 6, 150)}
608            grid = GridSearchCV(
609                KernelDensity(kernel=self.kernel, **kwargs),
610                param_grid=kernel_bandwidths,
611            )
612            grid.fit(self.residuals_)
613
614            if self.verbose > 0:
615                print(
616                    f"\n Best parameters for {self.kernel} kernel: {grid.best_params_} \n"
617                )
618
619            self.kde_ = grid.best_estimator_
620
621        return self

Fit MTS model to training data X, with optional regressors xreg

Parameters:

X: {array-like}, shape = [n_samples, n_features] Training time series, where n_samples is the number of samples and n_features is the number of features; X must be in increasing order (most recent observations last)

xreg: {array-like}, shape = [n_samples, n_features_xreg] Additional (external) regressors to be passed to self.obj xreg must be in 'increasing' order (most recent observations last)

**kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity)

Returns:

self: object

def predict(self, h=5, level=95, quantiles=None, **kwargs):
 953    def predict(self, h=5, level=95, quantiles=None, **kwargs):
 954        """Forecast all the time series, h steps ahead"""
 955
 956        if quantiles is not None:
 957            # Validate
 958            quantiles = np.asarray(quantiles)
 959            if not ((quantiles > 0) & (quantiles < 1)).all():
 960                raise ValueError("quantiles must be between 0 and 1.")
 961            # Delegate to dedicated method
 962            return self._predict_quantiles(h=h, quantiles=quantiles, **kwargs)
 963
 964        if isinstance(level, list) or isinstance(level, np.ndarray):
 965            # Store results
 966            result_dict = {}
 967            # Loop through alphas and calculate lower/upper for each alpha level
 968            # E.g [0.5, 2.5, 5, 16.5, 25, 50]
 969            for lev in level:
 970                # Get the forecast for this alpha
 971                res = self.predict(h=h, level=lev, **kwargs)
 972                # Adjust index and collect lower/upper bounds
 973                res.lower.index = pd.to_datetime(res.lower.index)
 974                res.upper.index = pd.to_datetime(res.upper.index)
 975                # Loop over each time series (multivariate) and flatten results
 976                if isinstance(res.lower, pd.DataFrame):
 977                    for (
 978                        series
 979                    ) in (
 980                        res.lower.columns
 981                    ):  # Assumes 'lower' and 'upper' have multiple series
 982                        result_dict[f"lower_{lev}_{series}"] = (
 983                            res.lower[series].to_numpy().flatten()
 984                        )
 985                        result_dict[f"upper_{lev}_{series}"] = (
 986                            res.upper[series].to_numpy().flatten()
 987                        )
 988                else:
 989                    for series_id in range(
 990                        self.n_series
 991                    ):  # Assumes 'lower' and 'upper' have multiple series
 992                        result_dict[f"lower_{lev}_{series_id}"] = (
 993                            res.lower[series_id, :].to_numpy().flatten()
 994                        )
 995                        result_dict[f"upper_{lev}_{series_id}"] = (
 996                            res.upper[series_id, :].to_numpy().flatten()
 997                        )
 998            return pd.DataFrame(result_dict, index=self.output_dates_)
 999
1000        # only one prediction interval
1001        self.output_dates_, frequency = ts.compute_output_dates(self.df_, h)
1002
1003        self.level_ = level
1004
1005        self.return_std_ = False  # do not remove (/!\)
1006
1007        self.mean_ = None  # do not remove (/!\)
1008
1009        self.mean_ = deepcopy(self.y_)  # do not remove (/!\)
1010
1011        self.lower_ = None  # do not remove (/!\)
1012
1013        self.upper_ = None  # do not remove (/!\)
1014
1015        self.sims_ = None  # do not remove (/!\)
1016
1017        y_means_ = np.asarray(
1018            [self.y_means_[i] for i in range(self.init_n_series_)]
1019        )
1020
1021        n_features = self.init_n_series_ * self.lags
1022
1023        self.alpha_ = 100 - level
1024
1025        pi_multiplier = norm.ppf(1 - self.alpha_ / 200)
1026
1027        if "return_std" in kwargs:  # bayesian forecasting
1028            self.return_std_ = True
1029            self.preds_std_ = []
1030            DescribeResult = namedtuple(
1031                "DescribeResult", ("mean", "lower", "upper")
1032            )  # to be updated
1033
1034        if "return_pi" in kwargs:  # split conformal, without simulation
1035            mean_pi_ = []
1036            lower_pi_ = []
1037            upper_pi_ = []
1038            median_pi_ = []
1039            DescribeResult = namedtuple(
1040                "DescribeResult", ("mean", "lower", "upper")
1041            )  # to be updated
1042
1043        if self.kde_ != None and "kde" in self.type_pi:  # kde
1044            target_cols = self.df_.columns[
1045                : self.init_n_series_
1046            ]  # Get target column names
1047            if self.verbose == 1:
1048                self.residuals_sims_ = tuple(
1049                    self.kde_.sample(
1050                        n_samples=h, random_state=self.seed + 100 * i
1051                    )  # Keep full sample
1052                    for i in tqdm(range(self.replications))
1053                )
1054            elif self.verbose == 0:
1055                self.residuals_sims_ = tuple(
1056                    self.kde_.sample(
1057                        n_samples=h, random_state=self.seed + 100 * i
1058                    )  # Keep full sample
1059                    for i in range(self.replications)
1060                )
1061
1062            # Convert to DataFrames after sampling
1063            self.residuals_sims_ = tuple(
1064                pd.DataFrame(
1065                    sim,  # Keep all columns
1066                    columns=target_cols,  # Use original target column names
1067                    index=self.output_dates_,
1068                )
1069                for sim in self.residuals_sims_
1070            )
1071
1072        if self.type_pi in ("bootstrap", "scp-bootstrap", "scp2-bootstrap"):
1073            assert self.replications is not None and isinstance(
1074                self.replications, int
1075            ), "'replications' must be provided and be an integer"
1076            if self.verbose == 1:
1077                self.residuals_sims_ = tuple(
1078                    ts.bootstrap(
1079                        self.residuals_,
1080                        h=h,
1081                        block_size=None,
1082                        seed=self.seed + 100 * i,
1083                    )
1084                    for i in tqdm(range(self.replications))
1085                )
1086            elif self.verbose == 0:
1087                self.residuals_sims_ = tuple(
1088                    ts.bootstrap(
1089                        self.residuals_,
1090                        h=h,
1091                        block_size=None,
1092                        seed=self.seed + 100 * i,
1093                    )
1094                    for i in range(self.replications)
1095                )
1096
1097        if self.type_pi in (
1098            "block-bootstrap",
1099            "scp-block-bootstrap",
1100            "scp2-block-bootstrap",
1101        ):
1102            if self.block_size is None:
1103                self.block_size = int(
1104                    np.ceil(3.15 * (self.residuals_.shape[0] ** (1 / 3)))
1105                )
1106
1107            assert self.replications is not None and isinstance(
1108                self.replications, int
1109            ), "'replications' must be provided and be an integer"
1110            if self.verbose == 1:
1111                self.residuals_sims_ = tuple(
1112                    ts.bootstrap(
1113                        self.residuals_,
1114                        h=h,
1115                        block_size=self.block_size,
1116                        seed=self.seed + 100 * i,
1117                    )
1118                    for i in tqdm(range(self.replications))
1119                )
1120            elif self.verbose == 0:
1121                self.residuals_sims_ = tuple(
1122                    ts.bootstrap(
1123                        self.residuals_,
1124                        h=h,
1125                        block_size=self.block_size,
1126                        seed=self.seed + 100 * i,
1127                    )
1128                    for i in range(self.replications)
1129                )
1130
1131        if "vine" in self.type_pi:
1132            if self.verbose == 1:
1133                self.residuals_sims_ = tuple(
1134                    vinecopula_sample(
1135                        x=self.residuals_,
1136                        n_samples=h,
1137                        method=self.type_pi,
1138                        random_state=self.seed + 100 * i,
1139                    )
1140                    for i in tqdm(range(self.replications))
1141                )
1142            elif self.verbose == 0:
1143                self.residuals_sims_ = tuple(
1144                    vinecopula_sample(
1145                        x=self.residuals_,
1146                        n_samples=h,
1147                        method=self.type_pi,
1148                        random_state=self.seed + 100 * i,
1149                    )
1150                    for i in range(self.replications)
1151                )
1152
1153        mean_ = deepcopy(self.mean_)
1154
1155        for i in range(h):
1156            new_obs = ts.reformat_response(mean_, self.lags)
1157            new_X = new_obs.reshape(1, -1)
1158            cooked_new_X = self.cook_test_set(new_X, **kwargs)
1159
1160            if "return_std" in kwargs:
1161                self.preds_std_.append(
1162                    [
1163                        np.asarray(
1164                            self.fit_objs_[i].predict(
1165                                cooked_new_X, return_std=True
1166                            )[1]
1167                        ).item()
1168                        for i in range(self.n_series)
1169                    ]
1170                )
1171
1172            if "return_pi" in kwargs:
1173                for i in range(self.n_series):
1174                    preds_pi = self.fit_objs_[i].predict(cooked_new_X, **kwargs)
1175                    mean_pi_.append(preds_pi.mean[0])
1176                    lower_pi_.append(preds_pi.lower[0])
1177                    upper_pi_.append(preds_pi.upper[0])
1178
1179            if self.type_pi != "quantile":
1180                predicted_cooked_new_X = np.asarray(
1181                    [
1182                        np.asarray(
1183                            self.fit_objs_[i].predict(cooked_new_X)
1184                        ).item()
1185                        for i in range(self.init_n_series_)
1186                    ]
1187                )
1188            else:
1189                predicted_cooked_new_X = np.asarray(
1190                    [
1191                        np.asarray(
1192                            self.fit_objs_[i]
1193                            .predict(cooked_new_X, return_pi=True)
1194                            .upper
1195                        ).item()
1196                        for i in range(self.init_n_series_)
1197                    ]
1198                )
1199
1200            preds = np.asarray(y_means_ + predicted_cooked_new_X)
1201
1202            # Create full row with both predictions and external regressors
1203            if self.xreg_ is not None and "xreg" in kwargs:
1204                next_xreg = kwargs["xreg"].iloc[i: i + 1].values.flatten()
1205                full_row = np.concatenate([preds, next_xreg])
1206            else:
1207                full_row = preds
1208
1209            # Create a new row with same number of columns as mean_
1210            new_row = np.zeros((1, mean_.shape[1]))
1211            new_row[0, : full_row.shape[0]] = full_row
1212
1213            # Maintain the full dimensionality by using vstack instead of rbind
1214            mean_ = np.vstack([new_row, mean_[:-1]])
1215
1216        # Final output should only include the target columns
1217        self.mean_ = pd.DataFrame(
1218            mean_[0: min(h, self.n_obs_ - self.lags), : self.init_n_series_][
1219                ::-1
1220            ],
1221            columns=self.df_.columns[: self.init_n_series_],
1222            index=self.output_dates_,
1223        )
1224
1225        # function's return ----------------------------------------------------------------------
1226        if (
1227            (("return_std" not in kwargs) and ("return_pi" not in kwargs))
1228            and (self.type_pi not in ("gaussian", "scp"))
1229        ) or ("vine" in self.type_pi):
1230            if self.replications is None:
1231                return self.mean_.iloc[:, : self.init_n_series_]
1232
1233            # if "return_std" not in kwargs and self.replications is not None
1234            meanf = []
1235            medianf = []
1236            lower = []
1237            upper = []
1238
1239            if "scp2" in self.type_pi:
1240                if self.verbose == 1:
1241                    self.sims_ = tuple(
1242                        (
1243                            self.mean_
1244                            + self.residuals_sims_[i]
1245                            * self.residuals_std_dev_[np.newaxis, :]
1246                            for i in tqdm(range(self.replications))
1247                        )
1248                    )
1249                elif self.verbose == 0:
1250                    self.sims_ = tuple(
1251                        (
1252                            self.mean_
1253                            + self.residuals_sims_[i]
1254                            * self.residuals_std_dev_[np.newaxis, :]
1255                            for i in range(self.replications)
1256                        )
1257                    )
1258            else:
1259                if self.verbose == 1:
1260                    self.sims_ = tuple(
1261                        (
1262                            self.mean_ + self.residuals_sims_[i]
1263                            for i in tqdm(range(self.replications))
1264                        )
1265                    )
1266                elif self.verbose == 0:
1267                    self.sims_ = tuple(
1268                        (
1269                            self.mean_ + self.residuals_sims_[i]
1270                            for i in range(self.replications)
1271                        )
1272                    )
1273
1274            DescribeResult = namedtuple(
1275                "DescribeResult", ("mean", "sims", "lower", "upper")
1276            )
1277            for ix in range(self.init_n_series_):
1278                sims_ix = getsims(self.sims_, ix)
1279                if self.agg == "mean":
1280                    meanf.append(np.mean(sims_ix, axis=1))
1281                else:
1282                    medianf.append(np.median(sims_ix, axis=1))
1283                lower.append(np.quantile(sims_ix, q=self.alpha_ / 200, axis=1))
1284                upper.append(
1285                    np.quantile(sims_ix, q=1 - self.alpha_ / 200, axis=1)
1286                )
1287            self.mean_ = pd.DataFrame(
1288                np.asarray(meanf).T,
1289                columns=self.series_names[
1290                    : self.init_n_series_
1291                ],  # self.df_.columns,
1292                index=self.output_dates_,
1293            )
1294
1295            self.lower_ = pd.DataFrame(
1296                np.asarray(lower).T,
1297                columns=self.series_names[
1298                    : self.init_n_series_
1299                ],  # self.df_.columns,
1300                index=self.output_dates_,
1301            )
1302
1303            self.upper_ = pd.DataFrame(
1304                np.asarray(upper).T,
1305                columns=self.series_names[
1306                    : self.init_n_series_
1307                ],  # self.df_.columns,
1308                index=self.output_dates_,
1309            )
1310
1311            try:
1312                self.median_ = pd.DataFrame(
1313                    np.asarray(medianf).T,
1314                    columns=self.series_names[
1315                        : self.init_n_series_
1316                    ],  # self.df_.columns,
1317                    index=self.output_dates_,
1318                )
1319            except Exception as e:
1320                pass
1321
1322            return DescribeResult(
1323                self.mean_, self.sims_, self.lower_, self.upper_
1324            )
1325
1326        if (
1327            (("return_std" in kwargs) or ("return_pi" in kwargs))
1328            and (self.type_pi not in ("gaussian", "scp"))
1329        ) or "vine" in self.type_pi:
1330            DescribeResult = namedtuple(
1331                "DescribeResult", ("mean", "lower", "upper")
1332            )
1333
1334            self.mean_ = pd.DataFrame(
1335                np.asarray(self.mean_),
1336                columns=self.series_names,  # self.df_.columns,
1337                index=self.output_dates_,
1338            )
1339
1340            if "return_std" in kwargs:
1341                self.preds_std_ = np.asarray(self.preds_std_)
1342
1343                self.lower_ = pd.DataFrame(
1344                    self.mean_.values - pi_multiplier * self.preds_std_,
1345                    columns=self.series_names,  # self.df_.columns,
1346                    index=self.output_dates_,
1347                )
1348
1349                self.upper_ = pd.DataFrame(
1350                    self.mean_.values + pi_multiplier * self.preds_std_,
1351                    columns=self.series_names,  # self.df_.columns,
1352                    index=self.output_dates_,
1353                )
1354
1355            if "return_pi" in kwargs:
1356                self.lower_ = pd.DataFrame(
1357                    np.asarray(lower_pi_).reshape(h, self.n_series)
1358                    + y_means_[np.newaxis, :],
1359                    columns=self.series_names,  # self.df_.columns,
1360                    index=self.output_dates_,
1361                )
1362
1363                self.upper_ = pd.DataFrame(
1364                    np.asarray(upper_pi_).reshape(h, self.n_series)
1365                    + y_means_[np.newaxis, :],
1366                    columns=self.series_names,  # self.df_.columns,
1367                    index=self.output_dates_,
1368                )
1369
1370            res = DescribeResult(self.mean_, self.lower_, self.upper_)
1371
1372            if self.xreg_ is not None:
1373                if len(self.xreg_.shape) > 1:
1374                    res2 = mx.tuple_map(
1375                        res,
1376                        lambda x: mo.delete_last_columns(
1377                            x, num_columns=self.xreg_.shape[1]
1378                        ),
1379                    )
1380                else:
1381                    res2 = mx.tuple_map(
1382                        res, lambda x: mo.delete_last_columns(x, num_columns=1)
1383                    )
1384                return DescribeResult(res2[0], res2[1], res2[2])
1385
1386            return res
1387
1388        if self.type_pi == "gaussian":
1389            DescribeResult = namedtuple(
1390                "DescribeResult", ("mean", "lower", "upper")
1391            )
1392
1393            self.mean_ = pd.DataFrame(
1394                np.asarray(self.mean_),
1395                columns=self.series_names,  # self.df_.columns,
1396                index=self.output_dates_,
1397            )
1398
1399            # Use Bayesian std if available, otherwise use gaussian residual std
1400            if "return_std" in kwargs and len(self.preds_std_) > 0:
1401                preds_std_to_use = np.asarray(self.preds_std_)
1402            else:
1403                preds_std_to_use = self.gaussian_preds_std_
1404
1405            self.lower_ = pd.DataFrame(
1406                self.mean_.values - pi_multiplier * preds_std_to_use,
1407                columns=self.series_names,  # self.df_.columns,
1408                index=self.output_dates_,
1409            )
1410
1411            self.upper_ = pd.DataFrame(
1412                self.mean_.values + pi_multiplier * preds_std_to_use,
1413                columns=self.series_names,  # self.df_.columns,
1414                index=self.output_dates_,
1415            )
1416
1417            res = DescribeResult(self.mean_, self.lower_, self.upper_)
1418
1419            if self.xreg_ is not None:
1420                if len(self.xreg_.shape) > 1:
1421                    res2 = mx.tuple_map(
1422                        res,
1423                        lambda x: mo.delete_last_columns(
1424                            x, num_columns=self.xreg_.shape[1]
1425                        ),
1426                    )
1427                else:
1428                    res2 = mx.tuple_map(
1429                        res, lambda x: mo.delete_last_columns(x, num_columns=1)
1430                    )
1431                return DescribeResult(res2[0], res2[1], res2[2])
1432
1433            return res
1434
1435        if self.type_pi == "quantile":
1436            DescribeResult = namedtuple("DescribeResult", ("mean"))
1437
1438            self.mean_ = pd.DataFrame(
1439                np.asarray(self.mean_),
1440                columns=self.series_names,  # self.df_.columns,
1441                index=self.output_dates_,
1442            )
1443
1444            res = DescribeResult(self.mean_)
1445
1446            if self.xreg_ is not None:
1447                if len(self.xreg_.shape) > 1:
1448                    res2 = mx.tuple_map(
1449                        res,
1450                        lambda x: mo.delete_last_columns(
1451                            x, num_columns=self.xreg_.shape[1]
1452                        ),
1453                    )
1454                else:
1455                    res2 = mx.tuple_map(
1456                        res, lambda x: mo.delete_last_columns(x, num_columns=1)
1457                    )
1458                return DescribeResult(res2[0])
1459
1460            return res
1461
1462        # After prediction loop, ensure sims only contain target columns
1463        if self.sims_ is not None:
1464            if self.verbose == 1:
1465                self.sims_ = tuple(
1466                    sim[:h,]  # Only keep target columns and h rows
1467                    for sim in tqdm(self.sims_)
1468                )
1469            elif self.verbose == 0:
1470                self.sims_ = tuple(
1471                    sim[:h,]  # Only keep target columns and h rows
1472                    for sim in self.sims_
1473                )
1474
1475            # Convert numpy arrays to DataFrames with proper columns
1476            self.sims_ = tuple(
1477                pd.DataFrame(
1478                    sim,
1479                    columns=self.df_.columns[: self.init_n_series_],
1480                    index=self.output_dates_,
1481                )
1482                for sim in self.sims_
1483            )
1484
1485        if self.type_pi in (
1486            "kde",
1487            "bootstrap",
1488            "block-bootstrap",
1489            "vine-copula",
1490        ):
1491            if self.xreg_ is not None:
1492                # Use getsimsxreg when external regressors are present
1493                target_cols = self.df_.columns[: self.init_n_series_]
1494                self.sims_ = getsimsxreg(
1495                    self.sims_, self.output_dates_, target_cols
1496                )
1497            else:
1498                # Use original getsims for backward compatibility
1499                self.sims_ = getsims(self.sims_)

Forecast all the time series, h steps ahead

def score( self, X, training_index, testing_index, scoring=None, alpha=0.5, **kwargs):
1566    def score(
1567        self,
1568        X,
1569        training_index,
1570        testing_index,
1571        scoring=None,
1572        alpha=0.5,
1573        **kwargs,
1574    ):
1575        """Train on training_index, score on testing_index."""
1576
1577        assert (
1578            bool(set(training_index).intersection(set(testing_index))) == False
1579        ), "Non-overlapping 'training_index' and 'testing_index' required"
1580
1581        # Dimensions
1582        try:
1583            # multivariate time series
1584            n, p = X.shape
1585        except:
1586            # univariate time series
1587            n = X.shape[0]
1588            p = 1
1589
1590        # Training and testing sets
1591        if p > 1:
1592            X_train = X[training_index, :]
1593            X_test = X[testing_index, :]
1594        else:
1595            X_train = X[training_index]
1596            X_test = X[testing_index]
1597
1598        # Horizon
1599        h = len(testing_index)
1600        assert (
1601            len(training_index) + h
1602        ) <= n, "Please check lengths of training and testing windows"
1603
1604        # Fit and predict
1605        self.fit(X_train, **kwargs)
1606        preds = self.predict(h=h, **kwargs)
1607
1608        if scoring is None:
1609            scoring = "neg_root_mean_squared_error"
1610
1611        if scoring == "pinball":
1612            # Predict requested quantile
1613            q_pred = self.predict(h=h, quantiles=[alpha], **kwargs)
1614            # Handle multivariate
1615            scores = []
1616            for j in range(p):
1617                series_name = getattr(self, "series_names", [f"Series_{j}"])[j]
1618                q_label = (
1619                    f"{int(alpha * 100):02d}"
1620                    if (alpha * 100).is_integer()
1621                    else f"{alpha:.3f}".replace(".", "_")
1622                )
1623                col = f"quantile_{q_label}_{series_name}"
1624                if col not in q_pred.columns:
1625                    raise ValueError(
1626                        f"Column '{col}' not found in quantile forecast output."
1627                    )
1628                y_true_j = X_test[:, j]
1629                y_pred_j = q_pred[col].values
1630                # Compute pinball loss for this series
1631                loss = mean_pinball_loss(y_true_j, y_pred_j, alpha=alpha)
1632                scores.append(loss)
1633            # Return average over series
1634            return np.mean(scores)
1635
1636        if scoring == "crps":
1637            # Ensure simulations exist
1638            preds = self.predict(h=h, **kwargs)  # triggers self.sims_
1639            # Extract simulations: list of DataFrames → (R, h, p)
1640            sims_vals = np.stack(
1641                [sim.values for sim in self.sims_], axis=0
1642            )  # (R, h, p)
1643            crps_scores = []
1644            for j in range(p):
1645                y_true_j = X_test[:, j]
1646                sims_j = sims_vals[:, :, j]  # (R, h)
1647                crps_j = self._crps_ensemble(np.asarray(y_true_j), sims_j)
1648                crps_scores.append(np.mean(crps_j))  # average over horizon
1649            return np.mean(crps_scores)  # average over series
1650
1651        # check inputs
1652        assert scoring in (
1653            "explained_variance",
1654            "neg_mean_absolute_error",
1655            "neg_mean_squared_error",
1656            "neg_root_mean_squared_error",
1657            "neg_mean_squared_log_error",
1658            "neg_median_absolute_error",
1659            "r2",
1660        ), "'scoring' should be in ('explained_variance', 'neg_mean_absolute_error', \
1661                               'neg_mean_squared_error', 'neg_root_mean_squared_error', 'neg_mean_squared_log_error', \
1662                               'neg_median_absolute_error', 'r2')"
1663
1664        scoring_options = {
1665            "explained_variance": skm2.explained_variance_score,
1666            "neg_mean_absolute_error": skm2.mean_absolute_error,
1667            "neg_mean_squared_error": lambda x, y: np.mean((x - y) ** 2),
1668            "neg_root_mean_squared_error": lambda x, y: np.sqrt(
1669                np.mean((x - y) ** 2)
1670            ),
1671            "neg_mean_squared_log_error": skm2.mean_squared_log_error,
1672            "neg_median_absolute_error": skm2.median_absolute_error,
1673            "r2": skm2.r2_score,
1674        }
1675
1676        return scoring_options[scoring](X_test, preds)

Train on training_index, score on testing_index.

class MTSStacker(nnetsauce.MTS):
 12class MTSStacker(MTS):
 13    """
 14    Sequential stacking for time series with unified strategy.
 15
 16    Core Strategy:
 17    1. Split data: half1 (base models) | half2 (meta-model)
 18    2. Train base models on half1, predict half2
 19    3. Create augmented dataset: [original_series | base_pred_1 | base_pred_2 | ...]
 20       Stack as additional time series, extract target series
 21    4. Train meta-MTS on half2 with augmented data
 22    5. Retrain base models on half2 for temporal alignment
 23    6. At prediction: base models forecast → augment → meta-model predicts
 24    """
 25
 26    def __init__(
 27        self,
 28        base_models,
 29        meta_model,
 30        split_ratio=0.5,
 31    ):
 32        """
 33        Parameters
 34        ----------
 35        base_models : list of sklearn-compatible models
 36            Base models (e.g., Ridge, Lasso, RandomForest)
 37        meta_model : nnetsauce.MTS instance
 38            MTS with type_pi='scp2-kde' or similar
 39        split_ratio : float
 40            Proportion for half1 (default: 0.5)
 41        """
 42        self.base_models = base_models
 43        self.meta_model = meta_model
 44        self.split_ratio = split_ratio
 45        self.fitted_base_models_ = []
 46        self.split_idx_ = None
 47        self.mean_ = None
 48        self.lower_ = None
 49        self.upper_ = None
 50        self.sims_ = None
 51        self.output_dates_ = None
 52
 53    def fit(self, X, xreg=None, **kwargs):
 54        """
 55        Fit MTSStacker using sequential stacking strategy.
 56
 57        Parameters
 58        ----------
 59        X : array-like or DataFrame, shape (n_samples, n_features)
 60            Training time series (most recent observations last)
 61        xreg : array-like, optional
 62            External regressors
 63        **kwargs : dict
 64            Additional parameters for base and meta models
 65
 66        Returns
 67        -------
 68        self : object
 69        """
 70        # 1. Store attributes and convert to DataFrame if needed
 71        if isinstance(X, pd.DataFrame):
 72            self.df_ = X.copy()
 73            X_array = X.values
 74            self.series_names = X.columns.tolist()
 75        else:
 76            X_array = np.asarray(X)
 77            self.df_ = pd.DataFrame(X_array)
 78            self.series_names = [f"series{i}" for i in range(X_array.shape[1])]
 79
 80        n_samples = X_array.shape[0]
 81        self.n_series_ = X_array.shape[1] if X_array.ndim > 1 else 1
 82
 83        # 2. Split data into half1 and half2
 84        split_idx = int(n_samples * self.split_ratio)
 85        self.split_idx_ = split_idx
 86
 87        if split_idx < self.meta_model.lags:
 88            raise ValueError(
 89                f"Split creates insufficient data: split_idx={split_idx} < "
 90                f"lags={self.meta_model.lags}. Reduce split_ratio or use fewer lags."
 91            )
 92
 93        half1 = X_array[:split_idx]
 94        half2 = X_array[split_idx:]
 95
 96        # 3. Train base models on half1 and predict half2
 97        base_preds = []
 98        temp_base_models = []
 99
100        for base_model in self.base_models:
101            # Wrap in MTS with same config as meta_model
102            base_mts = MTS(
103                obj=clone(base_model),
104                lags=self.meta_model.lags,
105                n_hidden_features=self.meta_model.n_hidden_features,
106                replications=self.meta_model.replications,
107                kernel=self.meta_model.kernel,
108                type_pi=None,  # No prediction intervals for base models
109            )
110            base_mts.fit(half1)
111
112            # Predict half2
113            pred = base_mts.predict(h=len(half2))
114
115            # Handle different return types
116            if isinstance(pred, pd.DataFrame):
117                base_preds.append(pred.values)
118            elif isinstance(pred, np.ndarray):
119                base_preds.append(pred)
120            elif hasattr(pred, "mean"):
121                # Named tuple with mean attribute
122                mean_pred = pred.mean
123                base_preds.append(
124                    mean_pred.values
125                    if isinstance(mean_pred, pd.DataFrame)
126                    else mean_pred
127                )
128            else:
129                raise ValueError(f"Unexpected prediction type: {type(pred)}")
130
131            temp_base_models.append(base_mts)
132
133        # 4. Create augmented dataset: [original | base_pred_1 | base_pred_2 | ...]
134        base_preds_array = np.hstack(
135            base_preds
136        )  # shape: (len(half2), n_series * n_base_models)
137
138        if isinstance(X, pd.DataFrame):
139            half2_df = pd.DataFrame(
140                half2,
141                index=self.df_.index[split_idx:],
142                columns=self.series_names,
143            )
144            base_preds_df = pd.DataFrame(
145                base_preds_array,
146                index=self.df_.index[split_idx:],
147                columns=[
148                    f"base_{i}_{j}"
149                    for i in range(len(self.base_models))
150                    for j in range(self.n_series_)
151                ],
152            )
153            augmented = pd.concat([half2_df, base_preds_df], axis=1)
154        else:
155            augmented = np.hstack([half2, base_preds_array])
156
157        # 5. Train meta-model on augmented half2
158        self.meta_model.fit(augmented, xreg=xreg, **kwargs)
159
160        # Store meta-model attributes
161        self.output_dates_ = self.meta_model.output_dates_
162        self.fit_objs_ = self.meta_model.fit_objs_
163        self.y_ = self.meta_model.y_
164        self.X_ = self.meta_model.X_
165        self.xreg_ = self.meta_model.xreg_
166        self.y_means_ = self.meta_model.y_means_
167        self.residuals_ = self.meta_model.residuals_
168
169        # 6. FIXED: Retrain base models on half2 for temporal alignment
170        self.fitted_base_models_ = []
171        for i, base_model in enumerate(self.base_models):
172            base_mts_final = MTS(
173                obj=clone(base_model),
174                lags=self.meta_model.lags,
175                n_hidden_features=self.meta_model.n_hidden_features,
176                replications=self.meta_model.replications,
177                kernel=self.meta_model.kernel,
178                type_pi=None,
179            )
180            base_mts_final.fit(half2)
181            self.fitted_base_models_.append(base_mts_final)
182
183        return self
184
185    def predict(self, h=5, level=95, **kwargs):
186        """
187        Forecast h steps ahead using stacked predictions.
188
189        FIXED: Now properly generates base model forecasts and uses them
190        to create augmented features for the meta-model.
191
192        Parameters
193        ----------
194        h : int
195            Forecast horizon
196        level : int
197            Confidence level for prediction intervals
198        **kwargs : dict
199            Additional parameters for prediction
200
201        Returns
202        -------
203        DescribeResult or DataFrame
204            Predictions with optional intervals/simulations
205        """
206        # Step 1: Generate base model forecasts for horizon h
207        base_forecasts = []
208
209        for base_mts in self.fitted_base_models_:
210            # Each base model forecasts h steps ahead
211            forecast = base_mts.predict(h=h)
212
213            # Extract mean prediction
214            if isinstance(forecast, pd.DataFrame):
215                base_forecasts.append(forecast.values)
216            elif isinstance(forecast, np.ndarray):
217                base_forecasts.append(forecast)
218            elif hasattr(forecast, "mean"):
219                mean_pred = forecast.mean
220                base_forecasts.append(
221                    mean_pred.values
222                    if isinstance(mean_pred, pd.DataFrame)
223                    else mean_pred
224                )
225            else:
226                raise ValueError(f"Unexpected forecast type: {type(forecast)}")
227
228        # Step 2: Stack base forecasts into augmented features
229        base_forecasts_array = np.hstack(
230            base_forecasts
231        )  # shape: (h, n_series * n_base)
232
233        # Step 3: Create augmented input for meta-model
234        # The meta-model needs the original series structure + base predictions
235        # We use recursive forecasting: predict one step, update history, repeat
236
237        # Get last window of data from training
238        last_window = self.df_.iloc[-self.meta_model.lags:].values
239
240        # Initialize containers for results
241        all_forecasts = []
242        all_lowers = [] if level is not None else None
243        all_uppers = [] if level is not None else None
244        all_sims = (
245            []
246            if hasattr(self.meta_model, "type_pi") and self.meta_model.type_pi
247            else None
248        )
249
250        # Recursive forecasting
251        current_window = last_window.copy()
252
253        for step in range(h):
254            # Create augmented input: [current_window_last_row | base_forecast_step]
255            # Note: meta-model was trained on [original | base_preds]
256            # For prediction, we need to simulate this structure
257
258            # Use the base forecast for this step
259            base_forecast_step = base_forecasts_array[
260                step: step + 1, :
261            ]  # shape: (1, n_base_features)
262
263            # Create a dummy augmented dataset for this step
264            # Combine last observed values with base predictions
265            last_obs = current_window[-1:, :]  # shape: (1, n_series)
266            augmented_step = np.hstack([last_obs, base_forecast_step])
267
268            # Convert to DataFrame if needed
269            if isinstance(self.df_, pd.DataFrame):
270                augmented_df = pd.DataFrame(
271                    augmented_step,
272                    columns=(
273                        self.series_names
274                        + [
275                            f"base_{i}_{j}"
276                            for i in range(len(self.base_models))
277                            for j in range(self.n_series_)
278                        ]
279                    ),
280                )
281            else:
282                augmented_df = augmented_step
283
284            # Predict one step with meta-model
285            # This is tricky: we need to use meta-model's internal predict
286            # but with our augmented data structure
287
288            # For now, use the standard predict and extract one step
289            step_result = self.meta_model.predict(h=1, level=level, **kwargs)
290
291            # Extract forecasts
292            if isinstance(step_result, pd.DataFrame):
293                forecast_step = step_result.iloc[0, : self.n_series_].values
294                all_forecasts.append(forecast_step)
295            elif isinstance(step_result, np.ndarray):
296                forecast_step = step_result[0, : self.n_series_]
297                all_forecasts.append(forecast_step)
298            elif hasattr(step_result, "mean"):
299                mean_pred = step_result.mean
300                if isinstance(mean_pred, pd.DataFrame):
301                    forecast_step = mean_pred.iloc[0, : self.n_series_].values
302                else:
303                    forecast_step = mean_pred[0, : self.n_series_]
304                all_forecasts.append(forecast_step)
305
306                # Extract intervals if available
307                if hasattr(step_result, "lower") and all_lowers is not None:
308                    lower_pred = step_result.lower
309                    if isinstance(lower_pred, pd.DataFrame):
310                        all_lowers.append(
311                            lower_pred.iloc[0, : self.n_series_].values
312                        )
313                    else:
314                        all_lowers.append(lower_pred[0, : self.n_series_])
315
316                if hasattr(step_result, "upper") and all_uppers is not None:
317                    upper_pred = step_result.upper
318                    if isinstance(upper_pred, pd.DataFrame):
319                        all_uppers.append(
320                            upper_pred.iloc[0, : self.n_series_].values
321                        )
322                    else:
323                        all_uppers.append(upper_pred[0, : self.n_series_])
324
325                # Extract simulations if available
326                if hasattr(step_result, "sims") and all_sims is not None:
327                    all_sims.append(step_result.sims)
328
329            # Update window for next iteration
330            current_window = np.vstack(
331                [current_window[1:], forecast_step.reshape(1, -1)]
332            )
333
334        # Combine all forecasts
335        forecasts_array = np.array(all_forecasts)
336
337        # Create output dates
338        if hasattr(self.df_, "index") and isinstance(
339            self.df_.index, pd.DatetimeIndex
340        ):
341            last_date = self.df_.index[-1]
342            freq = pd.infer_freq(self.df_.index)
343            if freq:
344                output_dates = pd.date_range(
345                    start=last_date, periods=h + 1, freq=freq
346                )[1:]
347            else:
348                output_dates = pd.RangeIndex(
349                    start=len(self.df_), stop=len(self.df_) + h
350                )
351        else:
352            output_dates = pd.RangeIndex(
353                start=len(self.df_), stop=len(self.df_) + h
354            )
355
356        self.output_dates_ = output_dates
357
358        # Format output
359        mean_df = pd.DataFrame(
360            forecasts_array,
361            index=output_dates,
362            columns=self.series_names[: self.n_series_],
363        )
364        self.mean_ = mean_df
365
366        # Return based on what was computed
367        if all_lowers and all_uppers:
368            lowers_array = np.array(all_lowers)
369            uppers_array = np.array(all_uppers)
370
371            lower_df = pd.DataFrame(
372                lowers_array,
373                index=output_dates,
374                columns=self.series_names[: self.n_series_],
375            )
376            upper_df = pd.DataFrame(
377                uppers_array,
378                index=output_dates,
379                columns=self.series_names[: self.n_series_],
380            )
381
382            self.lower_ = lower_df
383            self.upper_ = upper_df
384
385            if all_sims:
386                self.sims_ = tuple(all_sims)
387                DescribeResult = namedtuple(
388                    "DescribeResult", ("mean", "sims", "lower", "upper")
389                )
390                return DescribeResult(mean_df, self.sims_, lower_df, upper_df)
391            else:
392                DescribeResult = namedtuple(
393                    "DescribeResult", ("mean", "lower", "upper")
394                )
395                return DescribeResult(mean_df, lower_df, upper_df)
396        else:
397            return mean_df
398
399    def plot(self, series=None, **kwargs):
400        """
401        Plot the time series with forecasts and prediction intervals.
402
403        Parameters
404        ----------
405        series : str or int, optional
406            Name or index of the series to plot (default: 0)
407        **kwargs : dict
408            Additional parameters for plotting
409        """
410        # Ensure we have predictions
411        if self.mean_ is None:
412            raise ValueError(
413                "Model forecasting must be obtained first (call predict)"
414            )
415
416        # Convert series name to index if needed
417        if isinstance(series, str):
418            if series in self.series_names:
419                series_idx = self.series_names.index(series)
420            else:
421                raise ValueError(
422                    f"Series '{series}' doesn't exist in the input dataset"
423                )
424        else:
425            series_idx = series if series is not None else 0
426
427        # Check bounds
428        if series_idx < 0 or series_idx >= self.n_series_:
429            raise ValueError(
430                f"Series index {series_idx} is out of bounds (0 to {self.n_series_ - 1})"
431            )
432
433        # Prepare data for plotting
434        import matplotlib.pyplot as plt
435        import matplotlib.dates as mdates
436
437        # Get historical data
438        historical_data = self.df_.iloc[:, series_idx]
439        forecast_data = self.mean_.iloc[:, series_idx]
440
441        # Get prediction intervals if available
442        has_intervals = self.lower_ is not None and self.upper_ is not None
443        if has_intervals:
444            lower_data = self.lower_.iloc[:, series_idx]
445            upper_data = self.upper_.iloc[:, series_idx]
446
447        # Create figure
448        fig, ax = plt.subplots(figsize=(12, 6))
449
450        # Plot historical data
451        if isinstance(self.df_.index, pd.DatetimeIndex):
452            hist_index = self.df_.index
453            ax.plot(
454                hist_index,
455                historical_data,
456                "-",
457                label="Historical",
458                color="blue",
459                linewidth=1.5,
460            )
461
462            # Plot forecast
463            forecast_index = self.mean_.index
464            ax.plot(
465                forecast_index,
466                forecast_data,
467                "-",
468                label="Forecast",
469                color="red",
470                linewidth=1.5,
471            )
472
473            # Plot prediction intervals
474            if has_intervals:
475                ax.fill_between(
476                    forecast_index,
477                    lower_data,
478                    upper_data,
479                    alpha=0.3,
480                    color="red",
481                    label="Prediction Interval",
482                )
483
484            # Add vertical line at the split point
485            if self.split_idx_ is not None:
486                split_date = hist_index[self.split_idx_]
487                ax.axvline(
488                    x=split_date,
489                    color="gray",
490                    linestyle="--",
491                    alpha=0.5,
492                    label="Train Split",
493                )
494
495            # Format x-axis for dates
496            ax.xaxis.set_major_formatter(mdates.DateFormatter("%Y-%m-%d"))
497            fig.autofmt_xdate()
498        else:
499            # Numeric indices
500            n_points_train = len(self.df_)
501            n_points_forecast = len(self.mean_)
502
503            x_hist = np.arange(n_points_train)
504            x_forecast = np.arange(
505                n_points_train, n_points_train + n_points_forecast
506            )
507
508            ax.plot(
509                x_hist,
510                historical_data,
511                "-",
512                label="Historical",
513                color="blue",
514                linewidth=1.5,
515            )
516            ax.plot(
517                x_forecast,
518                forecast_data,
519                "-",
520                label="Forecast",
521                color="red",
522                linewidth=1.5,
523            )
524
525            if has_intervals:
526                ax.fill_between(
527                    x_forecast,
528                    lower_data,
529                    upper_data,
530                    alpha=0.3,
531                    color="red",
532                    label="Prediction Interval",
533                )
534
535            if self.split_idx_ is not None:
536                ax.axvline(
537                    x=self.split_idx_,
538                    color="gray",
539                    linestyle="--",
540                    alpha=0.5,
541                    label="Train Split",
542                )
543
544        # Set title and labels
545        series_name = (
546            self.series_names[series_idx]
547            if series_idx < len(self.series_names)
548            else f"Series {series_idx}"
549        )
550        plt.title(f"Forecast for {series_name}", fontsize=14, fontweight="bold")
551        plt.xlabel("Time")
552        plt.ylabel("Value")
553        plt.legend()
554        plt.grid(True, alpha=0.3)
555        plt.tight_layout()
556        plt.show()

Sequential stacking for time series with unified strategy.

Core Strategy:

  1. Split data: half1 (base models) | half2 (meta-model)
  2. Train base models on half1, predict half2
  3. Create augmented dataset: [original_series | base_pred_1 | base_pred_2 | ...] Stack as additional time series, extract target series
  4. Train meta-MTS on half2 with augmented data
  5. Retrain base models on half2 for temporal alignment
  6. At prediction: base models forecast → augment → meta-model predicts
def fit(self, X, xreg=None, **kwargs):
 53    def fit(self, X, xreg=None, **kwargs):
 54        """
 55        Fit MTSStacker using sequential stacking strategy.
 56
 57        Parameters
 58        ----------
 59        X : array-like or DataFrame, shape (n_samples, n_features)
 60            Training time series (most recent observations last)
 61        xreg : array-like, optional
 62            External regressors
 63        **kwargs : dict
 64            Additional parameters for base and meta models
 65
 66        Returns
 67        -------
 68        self : object
 69        """
 70        # 1. Store attributes and convert to DataFrame if needed
 71        if isinstance(X, pd.DataFrame):
 72            self.df_ = X.copy()
 73            X_array = X.values
 74            self.series_names = X.columns.tolist()
 75        else:
 76            X_array = np.asarray(X)
 77            self.df_ = pd.DataFrame(X_array)
 78            self.series_names = [f"series{i}" for i in range(X_array.shape[1])]
 79
 80        n_samples = X_array.shape[0]
 81        self.n_series_ = X_array.shape[1] if X_array.ndim > 1 else 1
 82
 83        # 2. Split data into half1 and half2
 84        split_idx = int(n_samples * self.split_ratio)
 85        self.split_idx_ = split_idx
 86
 87        if split_idx < self.meta_model.lags:
 88            raise ValueError(
 89                f"Split creates insufficient data: split_idx={split_idx} < "
 90                f"lags={self.meta_model.lags}. Reduce split_ratio or use fewer lags."
 91            )
 92
 93        half1 = X_array[:split_idx]
 94        half2 = X_array[split_idx:]
 95
 96        # 3. Train base models on half1 and predict half2
 97        base_preds = []
 98        temp_base_models = []
 99
100        for base_model in self.base_models:
101            # Wrap in MTS with same config as meta_model
102            base_mts = MTS(
103                obj=clone(base_model),
104                lags=self.meta_model.lags,
105                n_hidden_features=self.meta_model.n_hidden_features,
106                replications=self.meta_model.replications,
107                kernel=self.meta_model.kernel,
108                type_pi=None,  # No prediction intervals for base models
109            )
110            base_mts.fit(half1)
111
112            # Predict half2
113            pred = base_mts.predict(h=len(half2))
114
115            # Handle different return types
116            if isinstance(pred, pd.DataFrame):
117                base_preds.append(pred.values)
118            elif isinstance(pred, np.ndarray):
119                base_preds.append(pred)
120            elif hasattr(pred, "mean"):
121                # Named tuple with mean attribute
122                mean_pred = pred.mean
123                base_preds.append(
124                    mean_pred.values
125                    if isinstance(mean_pred, pd.DataFrame)
126                    else mean_pred
127                )
128            else:
129                raise ValueError(f"Unexpected prediction type: {type(pred)}")
130
131            temp_base_models.append(base_mts)
132
133        # 4. Create augmented dataset: [original | base_pred_1 | base_pred_2 | ...]
134        base_preds_array = np.hstack(
135            base_preds
136        )  # shape: (len(half2), n_series * n_base_models)
137
138        if isinstance(X, pd.DataFrame):
139            half2_df = pd.DataFrame(
140                half2,
141                index=self.df_.index[split_idx:],
142                columns=self.series_names,
143            )
144            base_preds_df = pd.DataFrame(
145                base_preds_array,
146                index=self.df_.index[split_idx:],
147                columns=[
148                    f"base_{i}_{j}"
149                    for i in range(len(self.base_models))
150                    for j in range(self.n_series_)
151                ],
152            )
153            augmented = pd.concat([half2_df, base_preds_df], axis=1)
154        else:
155            augmented = np.hstack([half2, base_preds_array])
156
157        # 5. Train meta-model on augmented half2
158        self.meta_model.fit(augmented, xreg=xreg, **kwargs)
159
160        # Store meta-model attributes
161        self.output_dates_ = self.meta_model.output_dates_
162        self.fit_objs_ = self.meta_model.fit_objs_
163        self.y_ = self.meta_model.y_
164        self.X_ = self.meta_model.X_
165        self.xreg_ = self.meta_model.xreg_
166        self.y_means_ = self.meta_model.y_means_
167        self.residuals_ = self.meta_model.residuals_
168
169        # 6. FIXED: Retrain base models on half2 for temporal alignment
170        self.fitted_base_models_ = []
171        for i, base_model in enumerate(self.base_models):
172            base_mts_final = MTS(
173                obj=clone(base_model),
174                lags=self.meta_model.lags,
175                n_hidden_features=self.meta_model.n_hidden_features,
176                replications=self.meta_model.replications,
177                kernel=self.meta_model.kernel,
178                type_pi=None,
179            )
180            base_mts_final.fit(half2)
181            self.fitted_base_models_.append(base_mts_final)
182
183        return self

Fit MTSStacker using sequential stacking strategy.

Parameters

X : array-like or DataFrame, shape (n_samples, n_features) Training time series (most recent observations last) xreg : array-like, optional External regressors **kwargs : dict Additional parameters for base and meta models

Returns

self : object

def predict(self, h=5, level=95, **kwargs):
185    def predict(self, h=5, level=95, **kwargs):
186        """
187        Forecast h steps ahead using stacked predictions.
188
189        FIXED: Now properly generates base model forecasts and uses them
190        to create augmented features for the meta-model.
191
192        Parameters
193        ----------
194        h : int
195            Forecast horizon
196        level : int
197            Confidence level for prediction intervals
198        **kwargs : dict
199            Additional parameters for prediction
200
201        Returns
202        -------
203        DescribeResult or DataFrame
204            Predictions with optional intervals/simulations
205        """
206        # Step 1: Generate base model forecasts for horizon h
207        base_forecasts = []
208
209        for base_mts in self.fitted_base_models_:
210            # Each base model forecasts h steps ahead
211            forecast = base_mts.predict(h=h)
212
213            # Extract mean prediction
214            if isinstance(forecast, pd.DataFrame):
215                base_forecasts.append(forecast.values)
216            elif isinstance(forecast, np.ndarray):
217                base_forecasts.append(forecast)
218            elif hasattr(forecast, "mean"):
219                mean_pred = forecast.mean
220                base_forecasts.append(
221                    mean_pred.values
222                    if isinstance(mean_pred, pd.DataFrame)
223                    else mean_pred
224                )
225            else:
226                raise ValueError(f"Unexpected forecast type: {type(forecast)}")
227
228        # Step 2: Stack base forecasts into augmented features
229        base_forecasts_array = np.hstack(
230            base_forecasts
231        )  # shape: (h, n_series * n_base)
232
233        # Step 3: Create augmented input for meta-model
234        # The meta-model needs the original series structure + base predictions
235        # We use recursive forecasting: predict one step, update history, repeat
236
237        # Get last window of data from training
238        last_window = self.df_.iloc[-self.meta_model.lags:].values
239
240        # Initialize containers for results
241        all_forecasts = []
242        all_lowers = [] if level is not None else None
243        all_uppers = [] if level is not None else None
244        all_sims = (
245            []
246            if hasattr(self.meta_model, "type_pi") and self.meta_model.type_pi
247            else None
248        )
249
250        # Recursive forecasting
251        current_window = last_window.copy()
252
253        for step in range(h):
254            # Create augmented input: [current_window_last_row | base_forecast_step]
255            # Note: meta-model was trained on [original | base_preds]
256            # For prediction, we need to simulate this structure
257
258            # Use the base forecast for this step
259            base_forecast_step = base_forecasts_array[
260                step: step + 1, :
261            ]  # shape: (1, n_base_features)
262
263            # Create a dummy augmented dataset for this step
264            # Combine last observed values with base predictions
265            last_obs = current_window[-1:, :]  # shape: (1, n_series)
266            augmented_step = np.hstack([last_obs, base_forecast_step])
267
268            # Convert to DataFrame if needed
269            if isinstance(self.df_, pd.DataFrame):
270                augmented_df = pd.DataFrame(
271                    augmented_step,
272                    columns=(
273                        self.series_names
274                        + [
275                            f"base_{i}_{j}"
276                            for i in range(len(self.base_models))
277                            for j in range(self.n_series_)
278                        ]
279                    ),
280                )
281            else:
282                augmented_df = augmented_step
283
284            # Predict one step with meta-model
285            # This is tricky: we need to use meta-model's internal predict
286            # but with our augmented data structure
287
288            # For now, use the standard predict and extract one step
289            step_result = self.meta_model.predict(h=1, level=level, **kwargs)
290
291            # Extract forecasts
292            if isinstance(step_result, pd.DataFrame):
293                forecast_step = step_result.iloc[0, : self.n_series_].values
294                all_forecasts.append(forecast_step)
295            elif isinstance(step_result, np.ndarray):
296                forecast_step = step_result[0, : self.n_series_]
297                all_forecasts.append(forecast_step)
298            elif hasattr(step_result, "mean"):
299                mean_pred = step_result.mean
300                if isinstance(mean_pred, pd.DataFrame):
301                    forecast_step = mean_pred.iloc[0, : self.n_series_].values
302                else:
303                    forecast_step = mean_pred[0, : self.n_series_]
304                all_forecasts.append(forecast_step)
305
306                # Extract intervals if available
307                if hasattr(step_result, "lower") and all_lowers is not None:
308                    lower_pred = step_result.lower
309                    if isinstance(lower_pred, pd.DataFrame):
310                        all_lowers.append(
311                            lower_pred.iloc[0, : self.n_series_].values
312                        )
313                    else:
314                        all_lowers.append(lower_pred[0, : self.n_series_])
315
316                if hasattr(step_result, "upper") and all_uppers is not None:
317                    upper_pred = step_result.upper
318                    if isinstance(upper_pred, pd.DataFrame):
319                        all_uppers.append(
320                            upper_pred.iloc[0, : self.n_series_].values
321                        )
322                    else:
323                        all_uppers.append(upper_pred[0, : self.n_series_])
324
325                # Extract simulations if available
326                if hasattr(step_result, "sims") and all_sims is not None:
327                    all_sims.append(step_result.sims)
328
329            # Update window for next iteration
330            current_window = np.vstack(
331                [current_window[1:], forecast_step.reshape(1, -1)]
332            )
333
334        # Combine all forecasts
335        forecasts_array = np.array(all_forecasts)
336
337        # Create output dates
338        if hasattr(self.df_, "index") and isinstance(
339            self.df_.index, pd.DatetimeIndex
340        ):
341            last_date = self.df_.index[-1]
342            freq = pd.infer_freq(self.df_.index)
343            if freq:
344                output_dates = pd.date_range(
345                    start=last_date, periods=h + 1, freq=freq
346                )[1:]
347            else:
348                output_dates = pd.RangeIndex(
349                    start=len(self.df_), stop=len(self.df_) + h
350                )
351        else:
352            output_dates = pd.RangeIndex(
353                start=len(self.df_), stop=len(self.df_) + h
354            )
355
356        self.output_dates_ = output_dates
357
358        # Format output
359        mean_df = pd.DataFrame(
360            forecasts_array,
361            index=output_dates,
362            columns=self.series_names[: self.n_series_],
363        )
364        self.mean_ = mean_df
365
366        # Return based on what was computed
367        if all_lowers and all_uppers:
368            lowers_array = np.array(all_lowers)
369            uppers_array = np.array(all_uppers)
370
371            lower_df = pd.DataFrame(
372                lowers_array,
373                index=output_dates,
374                columns=self.series_names[: self.n_series_],
375            )
376            upper_df = pd.DataFrame(
377                uppers_array,
378                index=output_dates,
379                columns=self.series_names[: self.n_series_],
380            )
381
382            self.lower_ = lower_df
383            self.upper_ = upper_df
384
385            if all_sims:
386                self.sims_ = tuple(all_sims)
387                DescribeResult = namedtuple(
388                    "DescribeResult", ("mean", "sims", "lower", "upper")
389                )
390                return DescribeResult(mean_df, self.sims_, lower_df, upper_df)
391            else:
392                DescribeResult = namedtuple(
393                    "DescribeResult", ("mean", "lower", "upper")
394                )
395                return DescribeResult(mean_df, lower_df, upper_df)
396        else:
397            return mean_df

Forecast h steps ahead using stacked predictions.

FIXED: Now properly generates base model forecasts and uses them to create augmented features for the meta-model.

Parameters

h : int Forecast horizon level : int Confidence level for prediction intervals **kwargs : dict Additional parameters for prediction

Returns

DescribeResult or DataFrame Predictions with optional intervals/simulations

class MultitaskClassifier(nnetsauce.Base, sklearn.base.ClassifierMixin):
 16class MultitaskClassifier(Base, ClassifierMixin):
 17    """Multitask Classification model based on regression models, with shared covariates
 18
 19    Parameters:
 20
 21        obj: object
 22            any object (must be a regression model) containing a method fit (obj.fit())
 23            and a method predict (obj.predict())
 24
 25        n_hidden_features: int
 26            number of nodes in the hidden layer
 27
 28        activation_name: str
 29            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
 30
 31        a: float
 32            hyperparameter for 'prelu' or 'elu' activation function
 33
 34        nodes_sim: str
 35            type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
 36            'uniform'
 37
 38        bias: boolean
 39            indicates if the hidden layer contains a bias term (True) or not
 40            (False)
 41
 42        dropout: float
 43            regularization parameter; (random) percentage of nodes dropped out
 44            of the training
 45
 46        direct_link: boolean
 47            indicates if the original predictors are included (True) in model's
 48            fitting or not (False)
 49
 50        n_clusters: int
 51            number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
 52                no clustering)
 53
 54        cluster_encode: bool
 55            defines how the variable containing clusters is treated (default is one-hot)
 56            if `False`, then labels are used, without one-hot encoding
 57
 58        type_clust: str
 59            type of clustering method: currently k-means ('kmeans') or Gaussian
 60            Mixture Model ('gmm')
 61
 62        type_scaling: a tuple of 3 strings
 63            scaling methods for inputs, hidden layer, and clustering respectively
 64            (and when relevant).
 65            Currently available: standardization ('std') or MinMax scaling ('minmax')
 66
 67        col_sample: float
 68            percentage of covariates randomly chosen for training
 69
 70        row_sample: float
 71            percentage of rows chosen for training, by stratified bootstrapping
 72
 73        seed: int
 74            reproducibility seed for nodes_sim=='uniform'
 75
 76        backend: str
 77            "cpu" or "gpu" or "tpu"
 78
 79    Attributes:
 80
 81        fit_objs_: dict
 82            objects adjusted to each individual time series
 83
 84        n_classes_: int
 85            number of classes for the classifier
 86
 87    Examples:
 88
 89    See also [https://github.com/Techtonique/nnetsauce/blob/master/examples/mtask_classification.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/mtask_classification.py)
 90
 91    ```python
 92    import nnetsauce as ns
 93    import numpy as np
 94    from sklearn.datasets import load_breast_cancer
 95    from sklearn.linear_model import LinearRegression
 96    from sklearn.model_selection import train_test_split
 97    from sklearn import metrics
 98    from time import time
 99
100    breast_cancer = load_breast_cancer()
101    Z = breast_cancer.data
102    t = breast_cancer.target
103
104    X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2,
105                                                        random_state=123+2*10)
106
107    # Linear Regression is used
108    regr = LinearRegression()
109    fit_obj = ns.MultitaskClassifier(regr, n_hidden_features=5,
110                                n_clusters=2, type_clust="gmm")
111
112    start = time()
113    fit_obj.fit(X_train, y_train)
114    print(f"Elapsed {time() - start}")
115
116    print(fit_obj.score(X_test, y_test))
117    print(fit_obj.score(X_test, y_test, scoring="roc_auc"))
118
119    start = time()
120    preds = fit_obj.predict(X_test)
121    print(f"Elapsed {time() - start}")
122    print(metrics.classification_report(preds, y_test))
123    ```
124
125    """
126
127    # construct the object -----
128    _estimator_type = "classifier"
129
130    def __init__(
131        self,
132        obj,
133        n_hidden_features=5,
134        activation_name="relu",
135        a=0.01,
136        nodes_sim="sobol",
137        bias=True,
138        dropout=0,
139        direct_link=True,
140        n_clusters=2,
141        cluster_encode=True,
142        type_clust="kmeans",
143        type_scaling=("std", "std", "std"),
144        col_sample=1,
145        row_sample=1,
146        seed=123,
147        backend="cpu",
148    ):
149        super().__init__(
150            n_hidden_features=n_hidden_features,
151            activation_name=activation_name,
152            a=a,
153            nodes_sim=nodes_sim,
154            bias=bias,
155            dropout=dropout,
156            direct_link=direct_link,
157            n_clusters=n_clusters,
158            cluster_encode=cluster_encode,
159            type_clust=type_clust,
160            type_scaling=type_scaling,
161            col_sample=col_sample,
162            row_sample=row_sample,
163            seed=seed,
164            backend=backend,
165        )
166
167        self.type_fit = "classification"
168        self.obj = obj
169        self.fit_objs_ = {}
170
171    def fit(self, X, y, sample_weight=None, **kwargs):
172        """Fit MultitaskClassifier to training data (X, y).
173
174        Args:
175
176            X: {array-like}, shape = [n_samples, n_features]
177                Training vectors, where n_samples is the number
178                of samples and n_features is the number of features.
179
180            y: array-like, shape = [n_samples]
181                Target values.
182
183            **kwargs: additional parameters to be passed to
184                    self.cook_training_set or self.obj.fit
185
186        Returns:
187
188            self: object
189
190        """
191
192        assert mx.is_factor(y), "y must contain only integers"
193
194        output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
195
196        self.classes_ = np.unique(y)  # for compatibility with sklearn
197        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
198
199        # multitask response
200        Y = mo.one_hot_encode2(output_y, self.n_classes_)
201
202        # if sample_weight is None:
203        for i in range(self.n_classes_):
204            self.fit_objs_[i] = deepcopy(
205                self.obj.fit(scaled_Z, Y[:, i], **kwargs)
206            )
207
208        self.classes_ = np.unique(y)
209        return self
210
211    def predict(self, X, **kwargs):
212        """Predict test data X.
213
214        Args:
215
216            X: {array-like}, shape = [n_samples, n_features]
217                Training vectors, where n_samples is the number
218                of samples and n_features is the number of features.
219
220            **kwargs: additional parameters to be passed to
221                    self.cook_test_set
222
223        Returns:
224
225            model predictions: {array-like}
226
227        """
228        return np.argmax(self.predict_proba(X, **kwargs), axis=1)
229
230    def predict_proba(self, X, **kwargs):
231        """Predict probabilities for test data X.
232
233        Args:
234
235            X: {array-like}, shape = [n_samples, n_features]
236                Training vectors, where n_samples is the number
237                of samples and n_features is the number of features.
238
239            **kwargs: additional parameters to be passed to
240                    self.cook_test_set
241
242        Returns:
243
244            probability estimates for test data: {array-like}
245
246        """
247
248        shape_X = X.shape
249
250        probs = np.zeros((shape_X[0], self.n_classes_))
251
252        if len(shape_X) == 1:
253            n_features = shape_X[0]
254
255            new_X = mo.rbind(
256                X.reshape(1, n_features),
257                np.ones(n_features).reshape(1, n_features),
258            )
259
260            Z = self.cook_test_set(new_X, **kwargs)
261
262            # loop on all the classes
263            for i in range(self.n_classes_):
264                probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs)[0]
265
266        else:
267            Z = self.cook_test_set(X, **kwargs)
268
269            # loop on all the classes
270            for i in range(self.n_classes_):
271                probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs)
272
273        expit_raw_probs = expit(probs)
274
275        return expit_raw_probs / expit_raw_probs.sum(axis=1)[:, None]
276
277    def decision_function(self, X, **kwargs):
278        """Compute the decision function of X.
279
280        Parameters:
281            X: {array-like}, shape = [n_samples, n_features]
282                Samples to compute decision function for.
283
284            **kwargs: additional parameters to be passed to
285                    self.cook_test_set
286
287        Returns:
288            array-like of shape (n_samples,) or (n_samples, n_classes)
289            Decision function of the input samples. The order of outputs is the same
290            as that of the classes passed to fit.
291        """
292        if not hasattr(self.obj, "decision_function"):
293            # If base classifier doesn't have decision_function, use predict_proba
294            proba = self.predict_proba(X, **kwargs)
295            if proba.shape[1] == 2:
296                return proba[:, 1]  # For binary classification
297            return proba  # For multiclass
298
299        if len(X.shape) == 1:
300            n_features = X.shape[0]
301            new_X = mo.rbind(
302                X.reshape(1, n_features),
303                np.ones(n_features).reshape(1, n_features),
304            )
305
306            return (
307                self.obj.decision_function(
308                    self.cook_test_set(new_X, **kwargs), **kwargs
309                )
310            )[0]
311
312        return self.obj.decision_function(
313            self.cook_test_set(X, **kwargs), **kwargs
314        )
315
316    @property
317    def _estimator_type(self):
318        return "classifier"

Multitask Classification model based on regression models, with shared covariates

Parameters:

obj: object
    any object (must be a regression model) containing a method fit (obj.fit())
    and a method predict (obj.predict())

n_hidden_features: int
    number of nodes in the hidden layer

activation_name: str
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'

a: float
    hyperparameter for 'prelu' or 'elu' activation function

nodes_sim: str
    type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
    'uniform'

bias: boolean
    indicates if the hidden layer contains a bias term (True) or not
    (False)

dropout: float
    regularization parameter; (random) percentage of nodes dropped out
    of the training

direct_link: boolean
    indicates if the original predictors are included (True) in model's
    fitting or not (False)

n_clusters: int
    number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
        no clustering)

cluster_encode: bool
    defines how the variable containing clusters is treated (default is one-hot)
    if `False`, then labels are used, without one-hot encoding

type_clust: str
    type of clustering method: currently k-means ('kmeans') or Gaussian
    Mixture Model ('gmm')

type_scaling: a tuple of 3 strings
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax')

col_sample: float
    percentage of covariates randomly chosen for training

row_sample: float
    percentage of rows chosen for training, by stratified bootstrapping

seed: int
    reproducibility seed for nodes_sim=='uniform'

backend: str
    "cpu" or "gpu" or "tpu"

Attributes:

fit_objs_: dict
    objects adjusted to each individual time series

n_classes_: int
    number of classes for the classifier

Examples:

See also https://github.com/Techtonique/nnetsauce/blob/master/examples/mtask_classification.py

import nnetsauce as ns
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics
from time import time

breast_cancer = load_breast_cancer()
Z = breast_cancer.data
t = breast_cancer.target

X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2,
                                                    random_state=123+2*10)

# Linear Regression is used
regr = LinearRegression()
fit_obj = ns.MultitaskClassifier(regr, n_hidden_features=5,
                            n_clusters=2, type_clust="gmm")

start = time()
fit_obj.fit(X_train, y_train)
print(f"Elapsed {time() - start}")

print(fit_obj.score(X_test, y_test))
print(fit_obj.score(X_test, y_test, scoring="roc_auc"))

start = time()
preds = fit_obj.predict(X_test)
print(f"Elapsed {time() - start}")
print(metrics.classification_report(preds, y_test))
def fit(self, X, y, sample_weight=None, **kwargs):
171    def fit(self, X, y, sample_weight=None, **kwargs):
172        """Fit MultitaskClassifier to training data (X, y).
173
174        Args:
175
176            X: {array-like}, shape = [n_samples, n_features]
177                Training vectors, where n_samples is the number
178                of samples and n_features is the number of features.
179
180            y: array-like, shape = [n_samples]
181                Target values.
182
183            **kwargs: additional parameters to be passed to
184                    self.cook_training_set or self.obj.fit
185
186        Returns:
187
188            self: object
189
190        """
191
192        assert mx.is_factor(y), "y must contain only integers"
193
194        output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
195
196        self.classes_ = np.unique(y)  # for compatibility with sklearn
197        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
198
199        # multitask response
200        Y = mo.one_hot_encode2(output_y, self.n_classes_)
201
202        # if sample_weight is None:
203        for i in range(self.n_classes_):
204            self.fit_objs_[i] = deepcopy(
205                self.obj.fit(scaled_Z, Y[:, i], **kwargs)
206            )
207
208        self.classes_ = np.unique(y)
209        return self

Fit MultitaskClassifier to training data (X, y).

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

**kwargs: additional parameters to be passed to
        self.cook_training_set or self.obj.fit

Returns:

self: object
def predict(self, X, **kwargs):
211    def predict(self, X, **kwargs):
212        """Predict test data X.
213
214        Args:
215
216            X: {array-like}, shape = [n_samples, n_features]
217                Training vectors, where n_samples is the number
218                of samples and n_features is the number of features.
219
220            **kwargs: additional parameters to be passed to
221                    self.cook_test_set
222
223        Returns:
224
225            model predictions: {array-like}
226
227        """
228        return np.argmax(self.predict_proba(X, **kwargs), axis=1)

Predict test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

model predictions: {array-like}
def predict_proba(self, X, **kwargs):
230    def predict_proba(self, X, **kwargs):
231        """Predict probabilities for test data X.
232
233        Args:
234
235            X: {array-like}, shape = [n_samples, n_features]
236                Training vectors, where n_samples is the number
237                of samples and n_features is the number of features.
238
239            **kwargs: additional parameters to be passed to
240                    self.cook_test_set
241
242        Returns:
243
244            probability estimates for test data: {array-like}
245
246        """
247
248        shape_X = X.shape
249
250        probs = np.zeros((shape_X[0], self.n_classes_))
251
252        if len(shape_X) == 1:
253            n_features = shape_X[0]
254
255            new_X = mo.rbind(
256                X.reshape(1, n_features),
257                np.ones(n_features).reshape(1, n_features),
258            )
259
260            Z = self.cook_test_set(new_X, **kwargs)
261
262            # loop on all the classes
263            for i in range(self.n_classes_):
264                probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs)[0]
265
266        else:
267            Z = self.cook_test_set(X, **kwargs)
268
269            # loop on all the classes
270            for i in range(self.n_classes_):
271                probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs)
272
273        expit_raw_probs = expit(probs)
274
275        return expit_raw_probs / expit_raw_probs.sum(axis=1)[:, None]

Predict probabilities for test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

probability estimates for test data: {array-like}
class NeuralNetRegressor(sklearn.base.BaseEstimator, sklearn.base.RegressorMixin):
110class NeuralNetRegressor(BaseEstimator, RegressorMixin):
111    """
112    (Pretrained) Neural Network Regressor.
113
114    Parameters:
115
116        hidden_layer_sizes : tuple, default=(100,)
117            The number of neurons in each hidden layer.
118        max_iter : int, default=100
119            The maximum number of iterations to train the model.
120        learning_rate : float, default=0.01
121            The learning rate for the optimizer.
122        l1_ratio : float, default=0.5
123            The ratio of L1 regularization.
124        alpha : float, default=1e-6
125            The regularization parameter.
126        activation_name : str, default="relu"
127            The activation function to use.
128        dropout : float, default=0.0
129            The dropout rate.
130        random_state : int, default=None
131            The random state for the random number generator.
132        weights : list, default=None
133            The weights to initialize the model with.
134
135    Attributes:
136
137        weights : list
138            The weights of the model.
139        params : list
140            The parameters of the model.
141        scaler_ : sklearn.preprocessing.StandardScaler
142            The scaler used to standardize the input features.
143        y_mean_ : float
144            The mean of the target variable.
145
146    Methods:
147
148        fit(X, y)
149            Fit the model to the data.
150        predict(X)
151            Predict the target variable.
152        get_weights()
153            Get the weights of the model.
154        set_weights(weights)
155            Set the weights of the model.
156    """
157
158    def __init__(
159        self,
160        hidden_layer_sizes=None,
161        max_iter=100,
162        learning_rate=0.01,
163        l1_ratio=0.5,
164        alpha=1e-6,
165        activation_name="relu",
166        dropout=0,
167        weights=None,
168        random_state=None,
169    ):
170        if weights is None and hidden_layer_sizes is None:
171            hidden_layer_sizes = (100,)  # default value if neither is provided
172        self.hidden_layer_sizes = hidden_layer_sizes
173        self.max_iter = max_iter
174        self.learning_rate = learning_rate
175        self.l1_ratio = l1_ratio
176        self.alpha = alpha
177        self.activation_name = activation_name
178        self.dropout = dropout
179        self.weights = weights
180        self.random_state = random_state
181        self.params = None
182        self.scaler_ = StandardScaler()
183        self.y_mean_ = None
184
185    def _validate_weights(self, input_dim):
186        """Validate that weights dimensions are coherent."""
187        if not self.weights:
188            return False
189
190        try:
191            # Check each layer's weights and biases
192            prev_dim = input_dim
193            for W, b in self.weights:
194                # Check weight matrix dimensions
195                if W.shape[0] != prev_dim:
196                    raise ValueError(
197                        f"Weight matrix input dimension {W.shape[0]} does not match, previous layer output dimension {prev_dim}"
198                    )
199                # Check bias dimension matches weight matrix output
200                if W.shape[1] != b.shape[0]:
201                    raise ValueError(
202                        f"Bias dimension {b.shape[0]} does not match weight matrix, output dimension {W.shape[1]}"
203                    )
204                prev_dim = W.shape[1]
205
206            # Check final output dimension is 1 for regression
207            if prev_dim != 1:
208                raise ValueError(
209                    f"Final layer output dimension {prev_dim} must be 1 for regression"
210                )
211
212            return True
213        except (AttributeError, IndexError):
214            raise ValueError(
215                "Weights format is invalid. Expected list of (weight, bias) tuples"
216            )
217
218    def fit(self, X, y):
219        # Standardize the input features
220        X = self.scaler_.fit_transform(X)
221        # Ensure y is 2D for consistency
222        y = y.reshape(-1, 1)
223        self.y_mean_ = jnp.mean(y)
224        y = y - self.y_mean_
225        # Validate or initialize weights
226        if self.weights is not None:
227            if self._validate_weights(X.shape[1]):
228                self.params = self.weights
229        else:
230            if self.hidden_layer_sizes is None:
231                raise ValueError(
232                    "Either weights or hidden_layer_sizes must be provided"
233                )
234            self.params = initialize_params(
235                X.shape[1], self.hidden_layer_sizes, self.random_state
236            )
237        loss_fn = partial(loss, l1_ratio=self.l1_ratio, alpha=self.alpha)
238        grad_loss = jit(grad(loss_fn))  # compiled gradient evaluation function
239        perex_grads = jit(
240            vmap(grad_loss, in_axes=(None, 0, 0))
241        )  # fast per-example grads
242        # Training loop
243        for _ in range(self.max_iter):
244            grads = perex_grads(self.params, X, y)
245            # Average gradients across examples
246            grads = jax.tree_map(lambda g: jnp.mean(g, axis=0), grads)
247            # Update parameters
248            self.params = [
249                (W - self.learning_rate * dW, b - self.learning_rate * db)
250                for (W, b), (dW, db) in zip(self.params, grads)
251            ]
252        # Store final weights
253        self.weights = self.params
254        return self
255
256    def get_weights(self):
257        """Return the current weights of the model."""
258        if self.weights is None:
259            raise ValueError(
260                "No weights available. Model has not been fitted yet."
261            )
262        return self.weights
263
264    def set_weights(self, weights):
265        """Set the weights of the model manually."""
266        self.weights = weights
267        self.params = weights
268
269    def predict(self, X):
270        X = self.scaler_.transform(X)
271        if self.params is None:
272            raise ValueError("Model has not been fitted yet.")
273        predictions = predict_internal(
274            self.params,
275            X,
276            activation_func=self.activation_name,
277            dropout=self.dropout,
278            seed=self.random_state,
279        )
280        return predictions.reshape(-1) + self.y_mean_

(Pretrained) Neural Network Regressor.

Parameters:

hidden_layer_sizes : tuple, default=(100,)
    The number of neurons in each hidden layer.
max_iter : int, default=100
    The maximum number of iterations to train the model.
learning_rate : float, default=0.01
    The learning rate for the optimizer.
l1_ratio : float, default=0.5
    The ratio of L1 regularization.
alpha : float, default=1e-6
    The regularization parameter.
activation_name : str, default="relu"
    The activation function to use.
dropout : float, default=0.0
    The dropout rate.
random_state : int, default=None
    The random state for the random number generator.
weights : list, default=None
    The weights to initialize the model with.

Attributes:

weights : list
    The weights of the model.
params : list
    The parameters of the model.
scaler_ : sklearn.preprocessing.StandardScaler
    The scaler used to standardize the input features.
y_mean_ : float
    The mean of the target variable.

Methods:

fit(X, y)
    Fit the model to the data.
predict(X)
    Predict the target variable.
get_weights()
    Get the weights of the model.
set_weights(weights)
    Set the weights of the model.
def fit(self, X, y):
218    def fit(self, X, y):
219        # Standardize the input features
220        X = self.scaler_.fit_transform(X)
221        # Ensure y is 2D for consistency
222        y = y.reshape(-1, 1)
223        self.y_mean_ = jnp.mean(y)
224        y = y - self.y_mean_
225        # Validate or initialize weights
226        if self.weights is not None:
227            if self._validate_weights(X.shape[1]):
228                self.params = self.weights
229        else:
230            if self.hidden_layer_sizes is None:
231                raise ValueError(
232                    "Either weights or hidden_layer_sizes must be provided"
233                )
234            self.params = initialize_params(
235                X.shape[1], self.hidden_layer_sizes, self.random_state
236            )
237        loss_fn = partial(loss, l1_ratio=self.l1_ratio, alpha=self.alpha)
238        grad_loss = jit(grad(loss_fn))  # compiled gradient evaluation function
239        perex_grads = jit(
240            vmap(grad_loss, in_axes=(None, 0, 0))
241        )  # fast per-example grads
242        # Training loop
243        for _ in range(self.max_iter):
244            grads = perex_grads(self.params, X, y)
245            # Average gradients across examples
246            grads = jax.tree_map(lambda g: jnp.mean(g, axis=0), grads)
247            # Update parameters
248            self.params = [
249                (W - self.learning_rate * dW, b - self.learning_rate * db)
250                for (W, b), (dW, db) in zip(self.params, grads)
251            ]
252        # Store final weights
253        self.weights = self.params
254        return self
def predict(self, X):
269    def predict(self, X):
270        X = self.scaler_.transform(X)
271        if self.params is None:
272            raise ValueError("Model has not been fitted yet.")
273        predictions = predict_internal(
274            self.params,
275            X,
276            activation_func=self.activation_name,
277            dropout=self.dropout,
278            seed=self.random_state,
279        )
280        return predictions.reshape(-1) + self.y_mean_
class NeuralNetClassifier(sklearn.base.BaseEstimator, sklearn.base.ClassifierMixin):
 10class NeuralNetClassifier(BaseEstimator, ClassifierMixin):
 11    """
 12    (Pretrained) Neural Network Classifier.
 13
 14    Parameters:
 15
 16        hidden_layer_sizes : tuple, default=(100,)
 17            The number of neurons in each hidden layer.
 18        max_iter : int, default=100
 19            The maximum number of iterations to train the model.
 20        learning_rate : float, default=0.01
 21            The learning rate for the optimizer.
 22        l1_ratio : float, default=0.5
 23            The ratio of L1 regularization.
 24        alpha : float, default=1e-6
 25            The regularization parameter.
 26        activation_name : str, default="relu"
 27            The activation function to use.
 28        dropout : float, default=0.0
 29            The dropout rate.
 30        random_state : int, default=None
 31            The random state for the random number generator.
 32        weights : list, default=None
 33            The weights to initialize the model with.
 34
 35    Attributes:
 36
 37        weights : list
 38            The weights of the model.
 39        params : list
 40            The parameters of the model.
 41        scaler_ : sklearn.preprocessing.StandardScaler
 42            The scaler used to standardize the input features.
 43        y_mean_ : float
 44            The mean of the target variable.
 45
 46    Methods:
 47
 48        fit(X, y)
 49            Fit the model to the data.
 50        predict(X)
 51            Predict the target variable.
 52        predict_proba(X)
 53            Predict the probability of the target variable.
 54        get_weights()
 55            Get the weights of the model.
 56        set_weights(weights)
 57            Set the weights of the model.
 58    """
 59
 60    _estimator_type = "classifier"
 61
 62    def __init__(
 63        self,
 64        hidden_layer_sizes=(100,),
 65        max_iter=100,
 66        learning_rate=0.01,
 67        weights=None,
 68        l1_ratio=0.5,
 69        alpha=1e-6,
 70        activation_name="relu",
 71        dropout=0.0,
 72        random_state=None,
 73    ):
 74        self.hidden_layer_sizes = hidden_layer_sizes
 75        self.max_iter = max_iter
 76        self.learning_rate = learning_rate
 77        self.weights = weights
 78        self.l1_ratio = l1_ratio
 79        self.alpha = alpha
 80        self.activation_name = activation_name
 81        self.dropout = dropout
 82        self.random_state = random_state
 83        self.regr = None
 84
 85    def fit(self, X, y):
 86        """Fit the model to the data.
 87
 88        Parameters:
 89
 90            X: {array-like}, shape = [n_samples, n_features]
 91                Training vectors, where n_samples is the number of samples and
 92                n_features is the number of features.
 93            y: array-like, shape = [n_samples]
 94                Target values.
 95        """
 96        regressor = NeuralNetRegressor(
 97            hidden_layer_sizes=self.hidden_layer_sizes,
 98            max_iter=self.max_iter,
 99            learning_rate=self.learning_rate,
100            weights=self.weights,
101            l1_ratio=self.l1_ratio,
102            alpha=self.alpha,
103            activation_name=self.activation_name,
104            dropout=self.dropout,
105            random_state=self.random_state,
106        )
107        self.regr = SimpleMultitaskClassifier(regressor)
108        self.regr.fit(X, y)
109        self.classes_ = np.unique(y)
110        self.n_classes_ = len(self.classes_)
111        self.n_tasks_ = 1
112        self.n_features_in_ = X.shape[1]
113        self.n_outputs_ = 1
114        self.n_samples_fit_ = X.shape[0]
115        self.n_samples_test_ = X.shape[0]
116        self.n_features_out_ = 1
117        self.n_outputs_ = 1
118        self.n_features_in_ = X.shape[1]
119        self.n_features_out_ = 1
120        self.n_outputs_ = 1
121        return self
122
123    def predict_proba(self, X):
124        """Predict the probability of the target variable.
125
126        Parameters:
127
128            X: {array-like}, shape = [n_samples, n_features]
129                Training vectors, where n_samples is the number of samples and
130                n_features is the number of features.
131        """
132        return self.regr.predict_proba(X)
133
134    def predict(self, X):
135        """Predict the target variable.
136
137        Parameters:
138
139            X: {array-like}, shape = [n_samples, n_features]
140                Training vectors, where n_samples is the number of samples and
141                n_features is the number of features.
142        """
143        return self.regr.predict(X)
144
145    @property
146    def _estimator_type(self):
147        return "classifier"

(Pretrained) Neural Network Classifier.

Parameters:

hidden_layer_sizes : tuple, default=(100,)
    The number of neurons in each hidden layer.
max_iter : int, default=100
    The maximum number of iterations to train the model.
learning_rate : float, default=0.01
    The learning rate for the optimizer.
l1_ratio : float, default=0.5
    The ratio of L1 regularization.
alpha : float, default=1e-6
    The regularization parameter.
activation_name : str, default="relu"
    The activation function to use.
dropout : float, default=0.0
    The dropout rate.
random_state : int, default=None
    The random state for the random number generator.
weights : list, default=None
    The weights to initialize the model with.

Attributes:

weights : list
    The weights of the model.
params : list
    The parameters of the model.
scaler_ : sklearn.preprocessing.StandardScaler
    The scaler used to standardize the input features.
y_mean_ : float
    The mean of the target variable.

Methods:

fit(X, y)
    Fit the model to the data.
predict(X)
    Predict the target variable.
predict_proba(X)
    Predict the probability of the target variable.
get_weights()
    Get the weights of the model.
set_weights(weights)
    Set the weights of the model.
def fit(self, X, y):
 85    def fit(self, X, y):
 86        """Fit the model to the data.
 87
 88        Parameters:
 89
 90            X: {array-like}, shape = [n_samples, n_features]
 91                Training vectors, where n_samples is the number of samples and
 92                n_features is the number of features.
 93            y: array-like, shape = [n_samples]
 94                Target values.
 95        """
 96        regressor = NeuralNetRegressor(
 97            hidden_layer_sizes=self.hidden_layer_sizes,
 98            max_iter=self.max_iter,
 99            learning_rate=self.learning_rate,
100            weights=self.weights,
101            l1_ratio=self.l1_ratio,
102            alpha=self.alpha,
103            activation_name=self.activation_name,
104            dropout=self.dropout,
105            random_state=self.random_state,
106        )
107        self.regr = SimpleMultitaskClassifier(regressor)
108        self.regr.fit(X, y)
109        self.classes_ = np.unique(y)
110        self.n_classes_ = len(self.classes_)
111        self.n_tasks_ = 1
112        self.n_features_in_ = X.shape[1]
113        self.n_outputs_ = 1
114        self.n_samples_fit_ = X.shape[0]
115        self.n_samples_test_ = X.shape[0]
116        self.n_features_out_ = 1
117        self.n_outputs_ = 1
118        self.n_features_in_ = X.shape[1]
119        self.n_features_out_ = 1
120        self.n_outputs_ = 1
121        return self

Fit the model to the data.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number of samples and
    n_features is the number of features.
y: array-like, shape = [n_samples]
    Target values.
def predict_proba(self, X):
123    def predict_proba(self, X):
124        """Predict the probability of the target variable.
125
126        Parameters:
127
128            X: {array-like}, shape = [n_samples, n_features]
129                Training vectors, where n_samples is the number of samples and
130                n_features is the number of features.
131        """
132        return self.regr.predict_proba(X)

Predict the probability of the target variable.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number of samples and
    n_features is the number of features.
def predict(self, X):
134    def predict(self, X):
135        """Predict the target variable.
136
137        Parameters:
138
139            X: {array-like}, shape = [n_samples, n_features]
140                Training vectors, where n_samples is the number of samples and
141                n_features is the number of features.
142        """
143        return self.regr.predict(X)

Predict the target variable.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number of samples and
    n_features is the number of features.
class PredictionInterval(sklearn.base.BaseEstimator, sklearn.base.RegressorMixin):
 20class PredictionInterval(BaseEstimator, RegressorMixin):
 21    """Class PredictionInterval: Obtain prediction intervals.
 22
 23    Attributes:
 24
 25        obj: an object;
 26            fitted object containing methods `fit` and `predict`
 27
 28        method: a string;
 29            method for constructing the prediction intervals.
 30            Currently "splitconformal" (default) and "localconformal"
 31
 32        level: a float;
 33            Confidence level for prediction intervals. Default is 95,
 34            equivalent to a miscoverage error of 5 (%)
 35
 36        replications: an integer;
 37            Number of replications for simulated conformal (default is `None`)
 38
 39        type_pi: a string;
 40            type of prediction interval: currently `None`
 41            (split conformal without simulation)
 42            for type_pi in:
 43                - 'bootstrap': Bootstrap resampling.
 44                - 'kde': Kernel Density Estimation.
 45
 46        type_split: a string;
 47            "random" (random split of data) or "sequential" (sequential split of data)
 48
 49        seed: an integer;
 50            Reproducibility of fit (there's a random split between fitting and calibration data)
 51    """
 52
 53    def __init__(
 54        self,
 55        obj,
 56        method="splitconformal",
 57        level=95,
 58        type_pi=None,
 59        type_split="random",
 60        replications=None,
 61        kernel=None,
 62        agg="mean",
 63        seed=123,
 64    ):
 65        self.obj = obj
 66        self.method = method
 67        self.level = level
 68        self.type_pi = type_pi
 69        self.type_split = type_split
 70        self.replications = replications
 71        self.kernel = kernel
 72        self.agg = agg
 73        self.seed = seed
 74        self.alpha_ = 1 - self.level / 100
 75        self.quantile_ = None
 76        self.icp_ = None
 77        self.calibrated_residuals_ = None
 78        self.scaled_calibrated_residuals_ = None
 79        self.calibrated_residuals_scaler_ = None
 80        self.kde_ = None
 81        self.aic_ = None
 82        self.aicc_ = None
 83        self.bic_ = None
 84        self.sse_ = None
 85
 86    def fit(self, X, y, sample_weight=None, **kwargs):
 87        """Fit the `method` to training data (X, y).
 88
 89        Args:
 90
 91            X: array-like, shape = [n_samples, n_features];
 92                Training set vectors, where n_samples is the number
 93                of samples and n_features is the number of features.
 94
 95            y: array-like, shape = [n_samples, ]; Target values.
 96
 97            sample_weight: array-like, shape = [n_samples]
 98                Sample weights.
 99
100        """
101
102        if self.type_split == "random":
103            X_train, X_calibration, y_train, y_calibration = train_test_split(
104                X, y, test_size=0.5, random_state=self.seed
105            )
106
107        elif self.type_split == "sequential":
108            n_x = X.shape[0]
109            n_x_half = n_x // 2
110            first_half_idx = range(0, n_x_half)
111            second_half_idx = range(n_x_half, n_x)
112            X_train = X[first_half_idx, :]
113            X_calibration = X[second_half_idx, :]
114            y_train = y[first_half_idx]
115            y_calibration = y[second_half_idx]
116
117        if self.method == "splitconformal":
118            self.obj.fit(X_train, y_train)
119            preds_calibration = self.obj.predict(X_calibration)
120            self.calibrated_residuals_ = y_calibration - preds_calibration
121            absolute_residuals = np.abs(self.calibrated_residuals_)
122            self.calibrated_residuals_scaler_ = StandardScaler(
123                with_mean=True, with_std=True
124            )
125            self.scaled_calibrated_residuals_ = (
126                self.calibrated_residuals_scaler_.fit_transform(
127                    self.calibrated_residuals_.reshape(-1, 1)
128                ).ravel()
129            )
130            try:
131                # numpy version >= 1.22
132                self.quantile_ = np.quantile(
133                    a=absolute_residuals, q=self.level / 100, method="higher"
134                )
135            except Exception:
136                # numpy version < 1.22
137                self.quantile_ = np.quantile(
138                    a=absolute_residuals,
139                    q=self.level / 100,
140                    interpolation="higher",
141                )
142
143        if self.method == "localconformal":
144            mad_estimator = ExtraTreesRegressor()
145            normalizer = RegressorNormalizer(
146                self.obj, mad_estimator, AbsErrorErrFunc()
147            )
148            nc = RegressorNc(self.obj, AbsErrorErrFunc(), normalizer)
149            self.icp_ = IcpRegressor(nc)
150            self.icp_.fit(X_train, y_train)
151            self.icp_.calibrate(X_calibration, y_calibration)
152
153        # Calculate AIC
154        # Get predictions
155        preds = self.obj.predict(X_calibration)
156
157        # Calculate SSE
158        self.sse_ = np.sum((y_calibration - preds) ** 2)
159
160        # Get number of parameters from the base model
161        n_params = (
162            getattr(self.obj, "n_hidden_features", 0) + X_calibration.shape[1]
163        )
164
165        # Calculate AIC
166        n_samples = len(y_calibration)
167        temp = n_samples * np.log(self.sse_ / n_samples)
168        self.aic_ = temp + 2 * n_params
169        self.bic_ = temp + np.log(n_samples) * n_params
170
171        return self
172
173    def predict(self, X, return_pi=False):
174        """Obtain predictions and prediction intervals
175
176        Args:
177
178            X: array-like, shape = [n_samples, n_features];
179                Testing set vectors, where n_samples is the number
180                of samples and n_features is the number of features.
181
182            return_pi: boolean
183                Whether the prediction interval is returned or not.
184                Default is False, for compatibility with other _estimators_.
185                If True, a tuple containing the predictions + lower and upper
186                bounds is returned.
187
188        """
189
190        if self.method == "splitconformal":
191            pred = self.obj.predict(X)
192
193        if self.method == "localconformal":
194            pred = self.icp_.predict(X)
195
196        if self.method == "splitconformal":
197            if (
198                self.replications is None and self.type_pi is None
199            ):  # type_pi is not used here, no bootstrap or kde
200                if return_pi:
201                    DescribeResult = namedtuple(
202                        "DescribeResult", ("mean", "lower", "upper")
203                    )
204                    return DescribeResult(
205                        pred, pred - self.quantile_, pred + self.quantile_
206                    )
207
208                else:
209                    return pred
210
211            else:  # self.method == "splitconformal" and if self.replications is not None, type_pi must be used
212                raise NotImplementedError
213
214                if self.type_pi is None:
215                    self.type_pi = "kde"
216                    raise Warning("type_pi must be set, setting to 'kde'")
217
218                if self.replications is None:
219                    self.replications = 100
220                    raise Warning("replications must be set, setting to 100")
221
222                assert self.type_pi in (
223                    "bootstrap",
224                    "kde",
225                    "normal",
226                    "ecdf",
227                    "permutation",
228                    "smooth-bootstrap",
229                ), "`self.type_pi` must be in ('bootstrap', 'kde', 'normal', 'ecdf', 'permutation', 'smooth-bootstrap')"
230
231                if self.type_pi == "bootstrap":
232                    np.random.seed(self.seed)
233                    self.residuals_sims_ = np.asarray(
234                        [
235                            np.random.choice(
236                                a=self.scaled_calibrated_residuals_,
237                                size=X.shape[0],
238                            )
239                            for _ in range(self.replications)
240                        ]
241                    ).T
242                    self.sims_ = np.asarray(
243                        [
244                            pred
245                            + self.calibrated_residuals_scaler_.scale_[0]
246                            * self.residuals_sims_[:, i].ravel()
247                            for i in range(self.replications)
248                        ]
249                    ).T
250                elif self.type_pi == "kde":
251                    self.kde_ = gaussian_kde(
252                        dataset=self.scaled_calibrated_residuals_
253                    )
254                    self.sims_ = np.asarray(
255                        [
256                            pred
257                            + self.calibrated_residuals_scaler_.scale_[0]
258                            * self.kde_.resample(
259                                size=X.shape[0], seed=self.seed + i
260                            ).ravel()
261                            for i in range(self.replications)
262                        ]
263                    ).T
264                else:  # self.type_pi == "normal" or "ecdf" or "permutation" or "smooth-bootstrap"
265                    self.residuals_sims_ = np.asarray(
266                        simulate_replications(
267                            data=self.scaled_calibrated_residuals_,
268                            method=self.type_pi,
269                            num_replications=self.replications,
270                            n_obs=X.shape[0],
271                            seed=self.seed,
272                        )
273                    ).T
274                    self.sims_ = np.asarray(
275                        [
276                            pred
277                            + self.calibrated_residuals_scaler_.scale_[0]
278                            * self.residuals_sims_[:, i].ravel()
279                            for i in range(self.replications)
280                        ]
281                    ).T
282
283                self.mean_ = np.mean(self.sims_, axis=1)
284                self.lower_ = np.quantile(
285                    self.sims_, q=self.alpha_ / 200, axis=1
286                )
287                self.upper_ = np.quantile(
288                    self.sims_, q=1 - self.alpha_ / 200, axis=1
289                )
290
291                DescribeResult = namedtuple(
292                    "DescribeResult", ("mean", "sims", "lower", "upper")
293                )
294
295                return DescribeResult(
296                    self.mean_, self.sims_, self.lower_, self.upper_
297                )
298
299        if self.method == "localconformal":
300            if self.replications is None:
301                if return_pi:
302                    predictions_bounds = self.icp_.predict(
303                        X, significance=1 - self.level
304                    )
305                    DescribeResult = namedtuple(
306                        "DescribeResult", ("mean", "lower", "upper")
307                    )
308                    return DescribeResult(
309                        pred, predictions_bounds[:, 0], predictions_bounds[:, 1]
310                    )
311
312                else:
313                    return pred
314
315            else:  # (self.method == "localconformal") and if self.replications is not None
316                raise NotImplementedError(
317                    "When self.method == 'localconformal', there are no simulations"
318                )

Class PredictionInterval: Obtain prediction intervals.

Attributes:

obj: an object;
    fitted object containing methods `fit` and `predict`

method: a string;
    method for constructing the prediction intervals.
    Currently "splitconformal" (default) and "localconformal"

level: a float;
    Confidence level for prediction intervals. Default is 95,
    equivalent to a miscoverage error of 5 (%)

replications: an integer;
    Number of replications for simulated conformal (default is `None`)

type_pi: a string;
    type of prediction interval: currently `None`
    (split conformal without simulation)
    for type_pi in:
        - 'bootstrap': Bootstrap resampling.
        - 'kde': Kernel Density Estimation.

type_split: a string;
    "random" (random split of data) or "sequential" (sequential split of data)

seed: an integer;
    Reproducibility of fit (there's a random split between fitting and calibration data)
def fit(self, X, y, sample_weight=None, **kwargs):
 86    def fit(self, X, y, sample_weight=None, **kwargs):
 87        """Fit the `method` to training data (X, y).
 88
 89        Args:
 90
 91            X: array-like, shape = [n_samples, n_features];
 92                Training set vectors, where n_samples is the number
 93                of samples and n_features is the number of features.
 94
 95            y: array-like, shape = [n_samples, ]; Target values.
 96
 97            sample_weight: array-like, shape = [n_samples]
 98                Sample weights.
 99
100        """
101
102        if self.type_split == "random":
103            X_train, X_calibration, y_train, y_calibration = train_test_split(
104                X, y, test_size=0.5, random_state=self.seed
105            )
106
107        elif self.type_split == "sequential":
108            n_x = X.shape[0]
109            n_x_half = n_x // 2
110            first_half_idx = range(0, n_x_half)
111            second_half_idx = range(n_x_half, n_x)
112            X_train = X[first_half_idx, :]
113            X_calibration = X[second_half_idx, :]
114            y_train = y[first_half_idx]
115            y_calibration = y[second_half_idx]
116
117        if self.method == "splitconformal":
118            self.obj.fit(X_train, y_train)
119            preds_calibration = self.obj.predict(X_calibration)
120            self.calibrated_residuals_ = y_calibration - preds_calibration
121            absolute_residuals = np.abs(self.calibrated_residuals_)
122            self.calibrated_residuals_scaler_ = StandardScaler(
123                with_mean=True, with_std=True
124            )
125            self.scaled_calibrated_residuals_ = (
126                self.calibrated_residuals_scaler_.fit_transform(
127                    self.calibrated_residuals_.reshape(-1, 1)
128                ).ravel()
129            )
130            try:
131                # numpy version >= 1.22
132                self.quantile_ = np.quantile(
133                    a=absolute_residuals, q=self.level / 100, method="higher"
134                )
135            except Exception:
136                # numpy version < 1.22
137                self.quantile_ = np.quantile(
138                    a=absolute_residuals,
139                    q=self.level / 100,
140                    interpolation="higher",
141                )
142
143        if self.method == "localconformal":
144            mad_estimator = ExtraTreesRegressor()
145            normalizer = RegressorNormalizer(
146                self.obj, mad_estimator, AbsErrorErrFunc()
147            )
148            nc = RegressorNc(self.obj, AbsErrorErrFunc(), normalizer)
149            self.icp_ = IcpRegressor(nc)
150            self.icp_.fit(X_train, y_train)
151            self.icp_.calibrate(X_calibration, y_calibration)
152
153        # Calculate AIC
154        # Get predictions
155        preds = self.obj.predict(X_calibration)
156
157        # Calculate SSE
158        self.sse_ = np.sum((y_calibration - preds) ** 2)
159
160        # Get number of parameters from the base model
161        n_params = (
162            getattr(self.obj, "n_hidden_features", 0) + X_calibration.shape[1]
163        )
164
165        # Calculate AIC
166        n_samples = len(y_calibration)
167        temp = n_samples * np.log(self.sse_ / n_samples)
168        self.aic_ = temp + 2 * n_params
169        self.bic_ = temp + np.log(n_samples) * n_params
170
171        return self

Fit the method to training data (X, y).

Args:

X: array-like, shape = [n_samples, n_features];
    Training set vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples, ]; Target values.

sample_weight: array-like, shape = [n_samples]
    Sample weights.
def predict(self, X, return_pi=False):
173    def predict(self, X, return_pi=False):
174        """Obtain predictions and prediction intervals
175
176        Args:
177
178            X: array-like, shape = [n_samples, n_features];
179                Testing set vectors, where n_samples is the number
180                of samples and n_features is the number of features.
181
182            return_pi: boolean
183                Whether the prediction interval is returned or not.
184                Default is False, for compatibility with other _estimators_.
185                If True, a tuple containing the predictions + lower and upper
186                bounds is returned.
187
188        """
189
190        if self.method == "splitconformal":
191            pred = self.obj.predict(X)
192
193        if self.method == "localconformal":
194            pred = self.icp_.predict(X)
195
196        if self.method == "splitconformal":
197            if (
198                self.replications is None and self.type_pi is None
199            ):  # type_pi is not used here, no bootstrap or kde
200                if return_pi:
201                    DescribeResult = namedtuple(
202                        "DescribeResult", ("mean", "lower", "upper")
203                    )
204                    return DescribeResult(
205                        pred, pred - self.quantile_, pred + self.quantile_
206                    )
207
208                else:
209                    return pred
210
211            else:  # self.method == "splitconformal" and if self.replications is not None, type_pi must be used
212                raise NotImplementedError
213
214                if self.type_pi is None:
215                    self.type_pi = "kde"
216                    raise Warning("type_pi must be set, setting to 'kde'")
217
218                if self.replications is None:
219                    self.replications = 100
220                    raise Warning("replications must be set, setting to 100")
221
222                assert self.type_pi in (
223                    "bootstrap",
224                    "kde",
225                    "normal",
226                    "ecdf",
227                    "permutation",
228                    "smooth-bootstrap",
229                ), "`self.type_pi` must be in ('bootstrap', 'kde', 'normal', 'ecdf', 'permutation', 'smooth-bootstrap')"
230
231                if self.type_pi == "bootstrap":
232                    np.random.seed(self.seed)
233                    self.residuals_sims_ = np.asarray(
234                        [
235                            np.random.choice(
236                                a=self.scaled_calibrated_residuals_,
237                                size=X.shape[0],
238                            )
239                            for _ in range(self.replications)
240                        ]
241                    ).T
242                    self.sims_ = np.asarray(
243                        [
244                            pred
245                            + self.calibrated_residuals_scaler_.scale_[0]
246                            * self.residuals_sims_[:, i].ravel()
247                            for i in range(self.replications)
248                        ]
249                    ).T
250                elif self.type_pi == "kde":
251                    self.kde_ = gaussian_kde(
252                        dataset=self.scaled_calibrated_residuals_
253                    )
254                    self.sims_ = np.asarray(
255                        [
256                            pred
257                            + self.calibrated_residuals_scaler_.scale_[0]
258                            * self.kde_.resample(
259                                size=X.shape[0], seed=self.seed + i
260                            ).ravel()
261                            for i in range(self.replications)
262                        ]
263                    ).T
264                else:  # self.type_pi == "normal" or "ecdf" or "permutation" or "smooth-bootstrap"
265                    self.residuals_sims_ = np.asarray(
266                        simulate_replications(
267                            data=self.scaled_calibrated_residuals_,
268                            method=self.type_pi,
269                            num_replications=self.replications,
270                            n_obs=X.shape[0],
271                            seed=self.seed,
272                        )
273                    ).T
274                    self.sims_ = np.asarray(
275                        [
276                            pred
277                            + self.calibrated_residuals_scaler_.scale_[0]
278                            * self.residuals_sims_[:, i].ravel()
279                            for i in range(self.replications)
280                        ]
281                    ).T
282
283                self.mean_ = np.mean(self.sims_, axis=1)
284                self.lower_ = np.quantile(
285                    self.sims_, q=self.alpha_ / 200, axis=1
286                )
287                self.upper_ = np.quantile(
288                    self.sims_, q=1 - self.alpha_ / 200, axis=1
289                )
290
291                DescribeResult = namedtuple(
292                    "DescribeResult", ("mean", "sims", "lower", "upper")
293                )
294
295                return DescribeResult(
296                    self.mean_, self.sims_, self.lower_, self.upper_
297                )
298
299        if self.method == "localconformal":
300            if self.replications is None:
301                if return_pi:
302                    predictions_bounds = self.icp_.predict(
303                        X, significance=1 - self.level
304                    )
305                    DescribeResult = namedtuple(
306                        "DescribeResult", ("mean", "lower", "upper")
307                    )
308                    return DescribeResult(
309                        pred, predictions_bounds[:, 0], predictions_bounds[:, 1]
310                    )
311
312                else:
313                    return pred
314
315            else:  # (self.method == "localconformal") and if self.replications is not None
316                raise NotImplementedError(
317                    "When self.method == 'localconformal', there are no simulations"
318                )

Obtain predictions and prediction intervals

Args:

X: array-like, shape = [n_samples, n_features];
    Testing set vectors, where n_samples is the number
    of samples and n_features is the number of features.

return_pi: boolean
    Whether the prediction interval is returned or not.
    Default is False, for compatibility with other _estimators_.
    If True, a tuple containing the predictions + lower and upper
    bounds is returned.
class PredictionSet(sklearn.base.BaseEstimator, sklearn.base.ClassifierMixin):
 20class PredictionSet(BaseEstimator, ClassifierMixin):
 21    """Class PredictionSet: Obtain prediction sets.
 22
 23    Attributes:
 24
 25        obj: an object;
 26            fitted object containing methods `fit` and `predict`
 27
 28        method: a string;
 29            method for constructing the prediction sets.
 30            Currently "icp" (default, inductive conformal) and "tcp" (transductive conformal)
 31
 32        level: a float;
 33            Confidence level for prediction sets. Default is None,
 34            95 is equivalent to a miscoverage error of 5 (%)
 35
 36        seed: an integer;
 37            Reproducibility of fit (there's a random split between fitting and calibration data)
 38    """
 39
 40    def __init__(
 41        self,
 42        obj,
 43        method="icp",
 44        level=None,
 45        seed=123,
 46    ):
 47        self.obj = obj
 48        self.method = method
 49        self.level = level
 50        self.seed = seed
 51        if self.level is not None:
 52            self.alpha_ = 1 - self.level / 100
 53        self.quantile_ = None
 54        self.icp_ = None
 55        self.tcp_ = None
 56
 57        if self.method == "icp":
 58            self.icp_ = IcpClassifier(
 59                ClassifierNc(ClassifierAdapter(self.obj), MarginErrFunc()),
 60            )
 61        elif self.method == "tcp":
 62            self.tcp_ = TcpClassifier(
 63                ClassifierNc(ClassifierAdapter(self.obj), MarginErrFunc()),
 64            )
 65        else:
 66            raise ValueError("`self.method` must be in ('icp', 'tcp')")
 67
 68    def fit(self, X, y, sample_weight=None, **kwargs):
 69        """Fit the `method` to training data (X, y).
 70
 71        Args:
 72
 73            X: array-like, shape = [n_samples, n_features];
 74                Training set vectors, where n_samples is the number
 75                of samples and n_features is the number of features.
 76
 77            y: array-like, shape = [n_samples, ]; Target values.
 78
 79            sample_weight: array-like, shape = [n_samples]
 80                Sample weights.
 81
 82        """
 83        if self.method == "icp":
 84            X_train, X_calibration, y_train, y_calibration = train_test_split(
 85                X, y, test_size=0.5, random_state=self.seed
 86            )
 87            self.icp_.fit(X_train, y_train)
 88            self.icp_.calibrate(X_calibration, y_calibration)
 89
 90        elif self.method == "tcp":
 91            self.tcp_.fit(X, y)
 92
 93        return self
 94
 95    def predict(self, X, **kwargs):
 96        """Obtain predictions and prediction sets
 97
 98        Args:
 99
100            X: array-like, shape = [n_samples, n_features];
101                Testing set vectors, where n_samples is the number
102                of samples and n_features is the number of features.
103
104        """
105
106        if self.method == "icp":
107            return self.icp_.predict(X, significance=self.alpha_, **kwargs)
108
109        elif self.method == "tcp":
110            return self.tcp_.predict(X, significance=self.alpha_, **kwargs)
111
112        else:
113            raise ValueError("`self.method` must be in ('icp', 'tcp')")
114
115    def predict_proba(self, X):
116        predictions = self.predict(X)
117        return np.eye(len(np.unique(predictions)))[predictions]

Class PredictionSet: Obtain prediction sets.

Attributes:

obj: an object;
    fitted object containing methods `fit` and `predict`

method: a string;
    method for constructing the prediction sets.
    Currently "icp" (default, inductive conformal) and "tcp" (transductive conformal)

level: a float;
    Confidence level for prediction sets. Default is None,
    95 is equivalent to a miscoverage error of 5 (%)

seed: an integer;
    Reproducibility of fit (there's a random split between fitting and calibration data)
def fit(self, X, y, sample_weight=None, **kwargs):
68    def fit(self, X, y, sample_weight=None, **kwargs):
69        """Fit the `method` to training data (X, y).
70
71        Args:
72
73            X: array-like, shape = [n_samples, n_features];
74                Training set vectors, where n_samples is the number
75                of samples and n_features is the number of features.
76
77            y: array-like, shape = [n_samples, ]; Target values.
78
79            sample_weight: array-like, shape = [n_samples]
80                Sample weights.
81
82        """
83        if self.method == "icp":
84            X_train, X_calibration, y_train, y_calibration = train_test_split(
85                X, y, test_size=0.5, random_state=self.seed
86            )
87            self.icp_.fit(X_train, y_train)
88            self.icp_.calibrate(X_calibration, y_calibration)
89
90        elif self.method == "tcp":
91            self.tcp_.fit(X, y)
92
93        return self

Fit the method to training data (X, y).

Args:

X: array-like, shape = [n_samples, n_features];
    Training set vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples, ]; Target values.

sample_weight: array-like, shape = [n_samples]
    Sample weights.
def predict(self, X, **kwargs):
 95    def predict(self, X, **kwargs):
 96        """Obtain predictions and prediction sets
 97
 98        Args:
 99
100            X: array-like, shape = [n_samples, n_features];
101                Testing set vectors, where n_samples is the number
102                of samples and n_features is the number of features.
103
104        """
105
106        if self.method == "icp":
107            return self.icp_.predict(X, significance=self.alpha_, **kwargs)
108
109        elif self.method == "tcp":
110            return self.tcp_.predict(X, significance=self.alpha_, **kwargs)
111
112        else:
113            raise ValueError("`self.method` must be in ('icp', 'tcp')")

Obtain predictions and prediction sets

Args:

X: array-like, shape = [n_samples, n_features];
    Testing set vectors, where n_samples is the number
    of samples and n_features is the number of features.
def predict_proba(self, X):
115    def predict_proba(self, X):
116        predictions = self.predict(X)
117        return np.eye(len(np.unique(predictions)))[predictions]
class SimpleMultitaskClassifier(nnetsauce.Base, sklearn.base.ClassifierMixin):
 18class SimpleMultitaskClassifier(Base, ClassifierMixin):
 19    """Multitask Classification model based on regression models, with shared covariates
 20
 21    Parameters:
 22
 23        obj: object
 24            any object (must be a regression model) containing a method fit (obj.fit())
 25            and a method predict (obj.predict())
 26
 27        seed: int
 28            reproducibility seed
 29
 30    Attributes:
 31
 32        fit_objs_: dict
 33            objects adjusted to each individual time series
 34
 35        n_classes_: int
 36            number of classes for the classifier
 37
 38    Examples:
 39
 40    ```python
 41    import nnetsauce as ns
 42    import numpy as np
 43    from sklearn.datasets import load_breast_cancer
 44    from sklearn.linear_model import LinearRegression
 45    from sklearn.model_selection import train_test_split
 46    from sklearn import metrics
 47    from time import time
 48
 49    breast_cancer = load_breast_cancer()
 50    Z = breast_cancer.data
 51    t = breast_cancer.target
 52
 53    X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2,
 54                                                        random_state=123+2*10)
 55
 56    # Linear Regression is used
 57    regr = LinearRegression()
 58    fit_obj = ns.SimpleMultitaskClassifier(regr)
 59
 60    start = time()
 61    fit_obj.fit(X_train, y_train)
 62    print(f"Elapsed {time() - start}")
 63
 64    print(fit_obj.score(X_test, y_test))
 65    print(fit_obj.score(X_test, y_test, scoring="roc_auc"))
 66
 67    start = time()
 68    preds = fit_obj.predict(X_test)
 69    print(f"Elapsed {time() - start}")
 70    print(metrics.classification_report(preds, y_test))
 71    ```
 72
 73    """
 74
 75    # construct the object -----
 76    _estimator_type = "classifier"
 77
 78    def __init__(
 79        self,
 80        obj,
 81    ):
 82        self.type_fit = "classification"
 83        self.obj = obj
 84        self.fit_objs_ = {}
 85        self.X_scaler_ = StandardScaler()
 86        self.scaled_X_ = None
 87
 88    def fit(self, X, y, sample_weight=None, **kwargs):
 89        """Fit SimpleMultitaskClassifier to training data (X, y).
 90
 91        Args:
 92
 93            X: {array-like}, shape = [n_samples, n_features]
 94                Training vectors, where n_samples is the number
 95                of samples and n_features is the number of features.
 96
 97            y: array-like, shape = [n_samples]
 98                Target values.
 99
100            **kwargs: additional parameters to be passed to
101                    self.cook_training_set or self.obj.fit
102
103        Returns:
104
105            self: object
106
107        """
108
109        assert mx.is_factor(y), "y must contain only integers"
110
111        self.classes_ = np.unique(y)  # for compatibility with sklearn
112        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
113
114        self.scaled_X_ = self.X_scaler_.fit_transform(X)
115
116        # multitask response
117        Y = mo.one_hot_encode2(y, self.n_classes_)
118
119        try:
120            for i in range(self.n_classes_):
121                self.fit_objs_[i] = deepcopy(
122                    self.obj.fit(
123                        self.scaled_X_,
124                        Y[:, i],
125                        sample_weight=sample_weight,
126                        **kwargs
127                    )
128                )
129        except Exception as e:
130            for i in range(self.n_classes_):
131                self.fit_objs_[i] = deepcopy(
132                    self.obj.fit(self.scaled_X_, Y[:, i], **kwargs)
133                )
134        return self
135
136    def predict(self, X, **kwargs):
137        """Predict test data X.
138
139        Args:
140
141            X: {array-like}, shape = [n_samples, n_features]
142                Training vectors, where n_samples is the number
143                of samples and n_features is the number of features.
144
145            **kwargs: additional parameters
146
147        Returns:
148
149            model predictions: {array-like}
150
151        """
152        return np.argmax(self.predict_proba(X, **kwargs), axis=1)
153
154    def predict_proba(self, X, **kwargs):
155        """Predict probabilities for test data X.
156
157        Args:
158
159            X: {array-like}, shape = [n_samples, n_features]
160                Training vectors, where n_samples is the number
161                of samples and n_features is the number of features.
162
163            **kwargs: additional parameters
164
165        Returns:
166
167            probability estimates for test data: {array-like}
168
169        """
170
171        shape_X = X.shape
172
173        probs = np.zeros((shape_X[0], self.n_classes_))
174
175        if len(shape_X) == 1:  # one example
176            n_features = shape_X[0]
177
178            new_X = mo.rbind(
179                X.reshape(1, n_features),
180                np.ones(n_features).reshape(1, n_features),
181            )
182
183            Z = self.X_scaler_.transform(new_X, **kwargs)
184
185            # Fallback to standard model
186            for i in range(self.n_classes_):
187                probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs)[0]
188
189        else:  # multiple rows
190            Z = self.X_scaler_.transform(X, **kwargs)
191
192            # Fallback to standard model
193            for i in range(self.n_classes_):
194                probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs)
195
196        expit_raw_probs = expit(probs)
197
198        # Add small epsilon to avoid division by zero
199        row_sums = expit_raw_probs.sum(axis=1)[:, None]
200        row_sums[row_sums < 1e-10] = 1e-10
201
202        return expit_raw_probs / row_sums
203
204    def decision_function(self, X, **kwargs):
205        """Compute the decision function of X.
206
207        Parameters:
208            X: {array-like}, shape = [n_samples, n_features]
209                Samples to compute decision function for.
210
211            **kwargs: additional parameters to be passed to
212                    self.cook_test_set
213
214        Returns:
215            array-like of shape (n_samples,) or (n_samples, n_classes)
216            Decision function of the input samples. The order of outputs is the same
217            as that of the classes passed to fit.
218        """
219        if not hasattr(self.obj, "decision_function"):
220            # If base classifier doesn't have decision_function, use predict_proba
221            proba = self.predict_proba(X, **kwargs)
222            if proba.shape[1] == 2:
223                return proba[:, 1]  # For binary classification
224            return proba  # For multiclass
225
226        if len(X.shape) == 1:
227            n_features = X.shape[0]
228            new_X = mo.rbind(
229                X.reshape(1, n_features),
230                np.ones(n_features).reshape(1, n_features),
231            )
232
233            return (
234                self.obj.decision_function(
235                    self.cook_test_set(new_X, **kwargs), **kwargs
236                )
237            )[0]
238
239        return self.obj.decision_function(
240            self.cook_test_set(X, **kwargs), **kwargs
241        )
242
243    @property
244    def _estimator_type(self):
245        return "classifier"

Multitask Classification model based on regression models, with shared covariates

Parameters:

obj: object
    any object (must be a regression model) containing a method fit (obj.fit())
    and a method predict (obj.predict())

seed: int
    reproducibility seed

Attributes:

fit_objs_: dict
    objects adjusted to each individual time series

n_classes_: int
    number of classes for the classifier

Examples:

import nnetsauce as ns
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics
from time import time

breast_cancer = load_breast_cancer()
Z = breast_cancer.data
t = breast_cancer.target

X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2,
                                                    random_state=123+2*10)

# Linear Regression is used
regr = LinearRegression()
fit_obj = ns.SimpleMultitaskClassifier(regr)

start = time()
fit_obj.fit(X_train, y_train)
print(f"Elapsed {time() - start}")

print(fit_obj.score(X_test, y_test))
print(fit_obj.score(X_test, y_test, scoring="roc_auc"))

start = time()
preds = fit_obj.predict(X_test)
print(f"Elapsed {time() - start}")
print(metrics.classification_report(preds, y_test))
def fit(self, X, y, sample_weight=None, **kwargs):
 88    def fit(self, X, y, sample_weight=None, **kwargs):
 89        """Fit SimpleMultitaskClassifier to training data (X, y).
 90
 91        Args:
 92
 93            X: {array-like}, shape = [n_samples, n_features]
 94                Training vectors, where n_samples is the number
 95                of samples and n_features is the number of features.
 96
 97            y: array-like, shape = [n_samples]
 98                Target values.
 99
100            **kwargs: additional parameters to be passed to
101                    self.cook_training_set or self.obj.fit
102
103        Returns:
104
105            self: object
106
107        """
108
109        assert mx.is_factor(y), "y must contain only integers"
110
111        self.classes_ = np.unique(y)  # for compatibility with sklearn
112        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
113
114        self.scaled_X_ = self.X_scaler_.fit_transform(X)
115
116        # multitask response
117        Y = mo.one_hot_encode2(y, self.n_classes_)
118
119        try:
120            for i in range(self.n_classes_):
121                self.fit_objs_[i] = deepcopy(
122                    self.obj.fit(
123                        self.scaled_X_,
124                        Y[:, i],
125                        sample_weight=sample_weight,
126                        **kwargs
127                    )
128                )
129        except Exception as e:
130            for i in range(self.n_classes_):
131                self.fit_objs_[i] = deepcopy(
132                    self.obj.fit(self.scaled_X_, Y[:, i], **kwargs)
133                )
134        return self

Fit SimpleMultitaskClassifier to training data (X, y).

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

**kwargs: additional parameters to be passed to
        self.cook_training_set or self.obj.fit

Returns:

self: object
def predict(self, X, **kwargs):
136    def predict(self, X, **kwargs):
137        """Predict test data X.
138
139        Args:
140
141            X: {array-like}, shape = [n_samples, n_features]
142                Training vectors, where n_samples is the number
143                of samples and n_features is the number of features.
144
145            **kwargs: additional parameters
146
147        Returns:
148
149            model predictions: {array-like}
150
151        """
152        return np.argmax(self.predict_proba(X, **kwargs), axis=1)

Predict test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters

Returns:

model predictions: {array-like}
def predict_proba(self, X, **kwargs):
154    def predict_proba(self, X, **kwargs):
155        """Predict probabilities for test data X.
156
157        Args:
158
159            X: {array-like}, shape = [n_samples, n_features]
160                Training vectors, where n_samples is the number
161                of samples and n_features is the number of features.
162
163            **kwargs: additional parameters
164
165        Returns:
166
167            probability estimates for test data: {array-like}
168
169        """
170
171        shape_X = X.shape
172
173        probs = np.zeros((shape_X[0], self.n_classes_))
174
175        if len(shape_X) == 1:  # one example
176            n_features = shape_X[0]
177
178            new_X = mo.rbind(
179                X.reshape(1, n_features),
180                np.ones(n_features).reshape(1, n_features),
181            )
182
183            Z = self.X_scaler_.transform(new_X, **kwargs)
184
185            # Fallback to standard model
186            for i in range(self.n_classes_):
187                probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs)[0]
188
189        else:  # multiple rows
190            Z = self.X_scaler_.transform(X, **kwargs)
191
192            # Fallback to standard model
193            for i in range(self.n_classes_):
194                probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs)
195
196        expit_raw_probs = expit(probs)
197
198        # Add small epsilon to avoid division by zero
199        row_sums = expit_raw_probs.sum(axis=1)[:, None]
200        row_sums[row_sums < 1e-10] = 1e-10
201
202        return expit_raw_probs / row_sums

Predict probabilities for test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters

Returns:

probability estimates for test data: {array-like}
class Optimizer:
  9class Optimizer:
 10    """Optimizer class
 11
 12    Attributes:
 13
 14        type_optim: str
 15            type of optimizer, (currently) either 'sgd' (stochastic minibatch gradient descent)
 16            or 'scd' (stochastic minibatch coordinate descent)
 17
 18        num_iters: int
 19            number of iterations of the optimizer
 20
 21        learning_rate: float
 22            step size
 23
 24        batch_prop: float
 25            proportion of the initial data used at each optimization step
 26
 27        learning_method: str
 28            "poly" - learning rate decreasing as a polynomial function
 29            of # of iterations (default)
 30            "exp" - learning rate decreasing as an exponential function
 31            of # of iterations
 32            "momentum" - gradient descent using momentum
 33
 34        randomization: str
 35            type of randomization applied at each step
 36            "strat" - stratified subsampling (default)
 37            "shuffle" - random subsampling
 38
 39        mass: float
 40            mass on velocity, for `method` == "momentum"
 41
 42        decay: float
 43            coefficient of decrease of the learning rate for
 44            `method` == "poly" and `method` == "exp"
 45
 46        tolerance: float
 47            early stopping parameter (convergence of loss function)
 48
 49        verbose: int
 50            controls verbosity of gradient descent
 51            0 - nothing is printed
 52            1 - a progress bar is printed
 53            2 - successive loss function values are printed
 54
 55    """
 56
 57    # construct the object -----
 58
 59    def __init__(
 60        self,
 61        type_optim="sgd",
 62        num_iters=100,
 63        learning_rate=0.01,
 64        batch_prop=1.0,
 65        learning_method="momentum",
 66        randomization="strat",
 67        mass=0.9,
 68        decay=0.1,
 69        tolerance=1e-3,
 70        verbose=1,
 71    ):
 72        self.type_optim = type_optim
 73        self.num_iters = num_iters
 74        self.learning_rate = learning_rate
 75        self.batch_prop = batch_prop
 76        self.learning_method = learning_method
 77        self.randomization = randomization
 78        self.mass = mass
 79        self.decay = decay
 80        self.tolerance = tolerance
 81        self.verbose = verbose
 82        self.opt = None
 83
 84    def fit(self, loss_func, response, x0, q=None, **kwargs):
 85        """Fit GLM model to training data (X, y).
 86
 87        Args:
 88
 89            loss_func: loss function
 90
 91            response: array-like, shape = [n_samples]
 92            target variable (used for subsampling)
 93
 94            x0: array-like, shape = [n_features]
 95                initial value provided to the optimizer
 96
 97            **kwargs: additional parameters to be passed to
 98                    loss function
 99
100        Returns:
101
102            self: object
103
104        """
105
106        if self.type_optim == "scd":
107            self.results = scd(
108                loss_func,
109                response=response,
110                x=x0,
111                num_iters=self.num_iters,
112                batch_prop=self.batch_prop,
113                learning_rate=self.learning_rate,
114                learning_method=self.learning_method,
115                mass=self.mass,
116                decay=self.decay,
117                randomization=self.randomization,
118                tolerance=self.tolerance,
119                verbose=self.verbose,
120                **kwargs
121            )
122
123        if self.type_optim == "sgd":
124            self.results = sgd(
125                loss_func,
126                response=response,
127                x=x0,
128                num_iters=self.num_iters,
129                batch_prop=self.batch_prop,
130                learning_rate=self.learning_rate,
131                learning_method=self.learning_method,
132                mass=self.mass,
133                decay=self.decay,
134                randomization=self.randomization,
135                tolerance=self.tolerance,
136                verbose=self.verbose,
137                **kwargs
138            )
139
140        return self
141
142    def one_hot_encode(self, y, n_classes):
143        return one_hot_encode(y, n_classes)

Optimizer class

Attributes:

type_optim: str
    type of optimizer, (currently) either 'sgd' (stochastic minibatch gradient descent)
    or 'scd' (stochastic minibatch coordinate descent)

num_iters: int
    number of iterations of the optimizer

learning_rate: float
    step size

batch_prop: float
    proportion of the initial data used at each optimization step

learning_method: str
    "poly" - learning rate decreasing as a polynomial function
    of # of iterations (default)
    "exp" - learning rate decreasing as an exponential function
    of # of iterations
    "momentum" - gradient descent using momentum

randomization: str
    type of randomization applied at each step
    "strat" - stratified subsampling (default)
    "shuffle" - random subsampling

mass: float
    mass on velocity, for `method` == "momentum"

decay: float
    coefficient of decrease of the learning rate for
    `method` == "poly" and `method` == "exp"

tolerance: float
    early stopping parameter (convergence of loss function)

verbose: int
    controls verbosity of gradient descent
    0 - nothing is printed
    1 - a progress bar is printed
    2 - successive loss function values are printed
def fit(self, loss_func, response, x0, q=None, **kwargs):
 84    def fit(self, loss_func, response, x0, q=None, **kwargs):
 85        """Fit GLM model to training data (X, y).
 86
 87        Args:
 88
 89            loss_func: loss function
 90
 91            response: array-like, shape = [n_samples]
 92            target variable (used for subsampling)
 93
 94            x0: array-like, shape = [n_features]
 95                initial value provided to the optimizer
 96
 97            **kwargs: additional parameters to be passed to
 98                    loss function
 99
100        Returns:
101
102            self: object
103
104        """
105
106        if self.type_optim == "scd":
107            self.results = scd(
108                loss_func,
109                response=response,
110                x=x0,
111                num_iters=self.num_iters,
112                batch_prop=self.batch_prop,
113                learning_rate=self.learning_rate,
114                learning_method=self.learning_method,
115                mass=self.mass,
116                decay=self.decay,
117                randomization=self.randomization,
118                tolerance=self.tolerance,
119                verbose=self.verbose,
120                **kwargs
121            )
122
123        if self.type_optim == "sgd":
124            self.results = sgd(
125                loss_func,
126                response=response,
127                x=x0,
128                num_iters=self.num_iters,
129                batch_prop=self.batch_prop,
130                learning_rate=self.learning_rate,
131                learning_method=self.learning_method,
132                mass=self.mass,
133                decay=self.decay,
134                randomization=self.randomization,
135                tolerance=self.tolerance,
136                verbose=self.verbose,
137                **kwargs
138            )
139
140        return self

Fit GLM model to training data (X, y).

Args:

loss_func: loss function

response: array-like, shape = [n_samples]
target variable (used for subsampling)

x0: array-like, shape = [n_features]
    initial value provided to the optimizer

**kwargs: additional parameters to be passed to
        loss function

Returns:

self: object
class QuantileRegressor(sklearn.base.BaseEstimator, sklearn.base.RegressorMixin):
 37class QuantileRegressor(BaseEstimator, RegressorMixin):
 38    """
 39    Quantile Regressor.
 40
 41    Parameters:
 42
 43        obj: base model (regression model)
 44            The base regressor from which to build a
 45            quantile regressor.
 46
 47        level: int, default=95
 48            The level of the quantiles to compute.
 49
 50        scoring: str, default="predictions"
 51            The scoring to use for the optimization and constructing
 52            prediction intervals (predictions, residuals, conformal,
 53              studentized, conformal-studentized).
 54
 55    Attributes:
 56
 57        obj_ : base model (regression model)
 58            The base regressor from which to build a
 59            quantile regressor.
 60
 61        offset_multipliers_ : list
 62            The multipliers for the offset.
 63
 64        scoring_residuals_ : list
 65            The residuals for the scoring.
 66
 67        student_multiplier_ : float
 68            The multiplier for the student.
 69
 70    """
 71
 72    def __init__(self, obj, level=95, scoring="predictions"):
 73        assert scoring in (
 74            "predictions",
 75            "residuals",
 76            "conformal",
 77            "studentized",
 78            "conformal-studentized",
 79        ), "scoring must be 'predictions' or 'residuals'"
 80        self.obj = obj
 81        low_risk_level = (1 - level / 100) / 2
 82        self.quantiles = [low_risk_level, 0.5, 1 - low_risk_level]
 83        self.scoring = scoring
 84        self.offset_multipliers_ = None
 85        self.obj_ = None
 86        self.scoring_residuals_ = None
 87        self.student_multiplier_ = None
 88
 89    def _compute_quantile_loss(self, residuals, quantile):
 90        """
 91        Compute the quantile loss for a given set of residuals and quantile.
 92        """
 93        return np.mean(
 94            residuals
 95            * (quantile * (residuals >= 0) + (quantile - 1) * (residuals < 0))
 96        )
 97
 98    def _optimize_multiplier(
 99        self,
100        y,
101        base_predictions,
102        prev_predictions,
103        scoring_residuals=None,
104        quantile=0.5,
105    ):
106        """
107        Optimize the multiplier for a given quantile.
108        """
109        if not 0 < quantile < 1:
110            raise ValueError("Quantile should be between 0 and 1.")
111
112        n = len(y)
113
114        def objective(log_multiplier):
115            """
116            Objective function for optimization.
117            """
118            # Convert to positive multiplier using exp
119            multiplier = np.exp(log_multiplier[0])
120            if self.scoring == "predictions":
121                assert (
122                    base_predictions is not None
123                ), "base_predictions must be not None"
124                # Calculate predictions
125                if prev_predictions is None:
126                    # For first quantile, subtract from conditional expectation
127                    predictions = base_predictions - multiplier * np.abs(
128                        base_predictions
129                    )
130                else:
131                    # For other quantiles, add to previous quantile
132                    offset = multiplier * np.abs(base_predictions)
133                    predictions = prev_predictions + offset
134            elif self.scoring in ("residuals", "conformal"):
135                assert (
136                    scoring_residuals is not None
137                ), "scoring_residuals must be not None"
138                # print("scoring_residuals", scoring_residuals)
139                # Calculate predictions
140                if prev_predictions is None:
141                    # For first quantile, subtract from conditional expectation
142                    predictions = base_predictions - multiplier * np.std(
143                        scoring_residuals
144                    ) / np.sqrt(len(scoring_residuals))
145                    # print("predictions", predictions)
146                else:
147                    # For other quantiles, add to previous quantile
148                    offset = (
149                        multiplier
150                        * np.std(scoring_residuals)
151                        / np.sqrt(len(scoring_residuals))
152                    )
153                    predictions = prev_predictions + offset
154            elif self.scoring in ("studentized", "conformal-studentized"):
155                assert (
156                    scoring_residuals is not None
157                ), "scoring_residuals must be not None"
158                # Calculate predictions
159                if prev_predictions is None:
160                    # For first quantile, subtract from conditional expectation
161                    predictions = (
162                        base_predictions - multiplier * self.student_multiplier_
163                    )
164                    # print("predictions", predictions)
165                else:
166                    # For other quantiles, add to previous quantile
167                    offset = multiplier * self.student_multiplier_
168                    predictions = prev_predictions + offset
169            else:
170                raise ValueError("Invalid argument 'scoring'")
171
172            return self._compute_quantile_loss(y - predictions, quantile)
173
174        # Optimize in log space for numerical stability
175        # bounds = [(-10, 10)]  # log space bounds
176        bounds = [(-100, 100)]  # log space bounds
177        result = differential_evolution(
178            objective,
179            bounds,
180            # popsize=15,
181            # maxiter=100,
182            # tol=1e-4,
183            popsize=25,
184            maxiter=200,
185            tol=1e-6,
186            disp=False,
187        )
188
189        return np.exp(result.x[0])
190
191    def fit(self, X, y):
192        """Fit the model to the data.
193
194        Parameters:
195
196            X: {array-like}, shape = [n_samples, n_features]
197                Training vectors, where n_samples is the number of samples and
198                n_features is the number of features.
199            y: array-like, shape = [n_samples]
200                Target values.
201        """
202        self.obj_ = clone(self.obj)
203
204        if self.scoring in ("predictions", "residuals"):
205            self.obj_.fit(X, y)
206            base_predictions = self.obj_.predict(X)
207            scoring_residuals = y - base_predictions
208            self.scoring_residuals_ = scoring_residuals
209
210        elif self.scoring == "conformal":
211            X_train, X_calib, y_train, y_calib = train_test_split(
212                X, y, test_size=0.5, random_state=42
213            )
214            self.obj_.fit(X_train, y_train)
215            scoring_residuals = y_calib - self.obj_.predict(
216                X_calib
217            )  # These are calibration predictions
218            self.scoring_residuals_ = scoring_residuals
219            # Update base_predictions to use training predictions for optimization
220            self.obj_.fit(X_calib, y_calib)
221            base_predictions = self.obj_.predict(X_calib)
222
223        elif self.scoring in ("studentized", "conformal-studentized"):
224            # Calculate student multiplier
225            if self.scoring == "conformal-studentized":
226                X_train, X_calib, y_train, y_calib = train_test_split(
227                    X, y, test_size=0.5, random_state=42
228                )
229                self.obj_.fit(X_train, y_train)
230                scoring_residuals = y_calib - self.obj_.predict(X_calib)
231                # Calculate studentized multiplier using calibration data
232                self.student_multiplier_ = np.std(y_calib, ddof=1) / np.sqrt(
233                    len(y_calib) - 1
234                )
235                self.obj_.fit(X_calib, y_calib)
236                base_predictions = self.obj_.predict(X_calib)
237            else:  # regular studentized
238                self.obj_.fit(X, y)
239                base_predictions = self.obj_.predict(X)
240                scoring_residuals = y - base_predictions
241                self.student_multiplier_ = np.std(y, ddof=1) / np.sqrt(
242                    len(y) - 1
243                )
244
245        # Initialize storage for multipliers
246        self.offset_multipliers_ = []
247        # Keep track of current predictions for each quantile
248        current_predictions = None
249
250        # Fit each quantile sequentially
251        for i, quantile in enumerate(self.quantiles):
252            if self.scoring == "predictions":
253                multiplier = self._optimize_multiplier(
254                    y=y,
255                    base_predictions=base_predictions,
256                    prev_predictions=current_predictions,
257                    quantile=quantile,
258                )
259
260                self.offset_multipliers_.append(multiplier)
261
262                # Update current predictions
263                if current_predictions is None:
264                    # First quantile (lowest)
265                    current_predictions = (
266                        base_predictions - multiplier * np.abs(base_predictions)
267                    )
268                else:
269                    # Subsequent quantiles
270                    offset = multiplier * np.abs(base_predictions)
271                    current_predictions = current_predictions + offset
272
273            elif self.scoring == "residuals":
274                multiplier = self._optimize_multiplier(
275                    y=y,
276                    base_predictions=base_predictions,
277                    scoring_residuals=scoring_residuals,
278                    prev_predictions=current_predictions,
279                    quantile=quantile,
280                )
281
282                self.offset_multipliers_.append(multiplier)
283
284                # Update current predictions
285                if current_predictions is None:
286                    # First quantile (lowest)
287                    current_predictions = (
288                        base_predictions
289                        - multiplier
290                        * np.std(scoring_residuals)
291                        / np.sqrt(len(scoring_residuals))
292                    )
293                else:
294                    # Subsequent quantiles
295                    offset = (
296                        multiplier
297                        * np.std(scoring_residuals)
298                        / np.sqrt(len(scoring_residuals))
299                    )
300                    current_predictions = current_predictions + offset
301
302            elif self.scoring == "conformal":
303                multiplier = self._optimize_multiplier(
304                    y=y_calib,
305                    base_predictions=base_predictions,
306                    scoring_residuals=scoring_residuals,
307                    prev_predictions=current_predictions,
308                    quantile=quantile,
309                )
310
311                self.offset_multipliers_.append(multiplier)
312
313                # Update current predictions
314                if current_predictions is None:
315                    # First quantile (lowest)
316                    current_predictions = (
317                        base_predictions
318                        - multiplier
319                        * np.std(scoring_residuals)
320                        / np.sqrt(len(scoring_residuals))
321                    )
322                else:
323                    # Subsequent quantiles
324                    offset = (
325                        multiplier
326                        * np.std(scoring_residuals)
327                        / np.sqrt(len(scoring_residuals))
328                    )
329                    current_predictions = current_predictions + offset
330
331            elif self.scoring in ("studentized", "conformal-studentized"):
332                multiplier = self._optimize_multiplier(
333                    y=y_calib if self.scoring == "conformal-studentized" else y,
334                    base_predictions=base_predictions,
335                    scoring_residuals=scoring_residuals,
336                    prev_predictions=current_predictions,
337                    quantile=quantile,
338                )
339
340                self.offset_multipliers_.append(multiplier)
341
342                # Update current predictions
343                if current_predictions is None:
344                    current_predictions = (
345                        base_predictions - multiplier * self.student_multiplier_
346                    )
347                else:
348                    offset = multiplier * self.student_multiplier_
349                    current_predictions = current_predictions + offset
350
351        return self
352
353    def predict(self, X, return_pi=False):
354        """Predict the target variable.
355
356        Parameters:
357
358            X: {array-like}, shape = [n_samples, n_features]
359                Training vectors, where n_samples is the number of samples and
360                n_features is the number of features.
361
362            return_pi: bool, default=True
363                Whether to return the prediction intervals.
364        """
365        if self.obj_ is None or self.offset_multipliers_ is None:
366            raise ValueError("Model not fitted yet.")
367
368        base_predictions = self.obj_.predict(X)
369        all_predictions = []
370
371        if self.scoring == "predictions":
372            # Generate first quantile
373            current_predictions = base_predictions - self.offset_multipliers_[
374                0
375            ] * np.abs(base_predictions)
376            all_predictions.append(current_predictions)
377
378            # Generate remaining quantiles
379            for multiplier in self.offset_multipliers_[1:]:
380                offset = multiplier * np.abs(base_predictions)
381                current_predictions = current_predictions + offset
382                all_predictions.append(current_predictions)
383
384        elif self.scoring in ("residuals", "conformal"):
385            # Generate first quantile
386            current_predictions = base_predictions - self.offset_multipliers_[
387                0
388            ] * np.std(self.scoring_residuals_) / np.sqrt(
389                len(self.scoring_residuals_)
390            )
391            all_predictions.append(current_predictions)
392
393            # Generate remaining quantiles
394            for multiplier in self.offset_multipliers_[1:]:
395                offset = (
396                    multiplier
397                    * np.std(self.scoring_residuals_)
398                    / np.sqrt(len(self.scoring_residuals_))
399                )
400                current_predictions = current_predictions + offset
401                all_predictions.append(current_predictions)
402
403        elif self.scoring in ("studentized", "conformal-studentized"):
404            # Generate first quantile
405            current_predictions = (
406                base_predictions
407                - self.offset_multipliers_[0] * self.student_multiplier_
408            )
409            all_predictions.append(current_predictions)
410
411            # Generate remaining quantiles
412            for multiplier in self.offset_multipliers_[1:]:
413                offset = multiplier * self.student_multiplier_
414                current_predictions = current_predictions + offset
415                all_predictions.append(current_predictions)
416
417        if return_pi == False:
418            return np.asarray(all_predictions[1])
419
420        DescribeResult = namedtuple(
421            "DecribeResult", ["mean", "lower", "upper", "median"]
422        )
423        DescribeResult.mean = base_predictions
424        DescribeResult.lower = np.asarray(all_predictions[0])
425        DescribeResult.median = np.asarray(all_predictions[1])
426        DescribeResult.upper = np.asarray(all_predictions[2])
427
428        return DescribeResult

Quantile Regressor.

Parameters:

obj: base model (regression model)
    The base regressor from which to build a
    quantile regressor.

level: int, default=95
    The level of the quantiles to compute.

scoring: str, default="predictions"
    The scoring to use for the optimization and constructing
    prediction intervals (predictions, residuals, conformal,
      studentized, conformal-studentized).

Attributes:

obj_ : base model (regression model)
    The base regressor from which to build a
    quantile regressor.

offset_multipliers_ : list
    The multipliers for the offset.

scoring_residuals_ : list
    The residuals for the scoring.

student_multiplier_ : float
    The multiplier for the student.
def fit(self, X, y):
191    def fit(self, X, y):
192        """Fit the model to the data.
193
194        Parameters:
195
196            X: {array-like}, shape = [n_samples, n_features]
197                Training vectors, where n_samples is the number of samples and
198                n_features is the number of features.
199            y: array-like, shape = [n_samples]
200                Target values.
201        """
202        self.obj_ = clone(self.obj)
203
204        if self.scoring in ("predictions", "residuals"):
205            self.obj_.fit(X, y)
206            base_predictions = self.obj_.predict(X)
207            scoring_residuals = y - base_predictions
208            self.scoring_residuals_ = scoring_residuals
209
210        elif self.scoring == "conformal":
211            X_train, X_calib, y_train, y_calib = train_test_split(
212                X, y, test_size=0.5, random_state=42
213            )
214            self.obj_.fit(X_train, y_train)
215            scoring_residuals = y_calib - self.obj_.predict(
216                X_calib
217            )  # These are calibration predictions
218            self.scoring_residuals_ = scoring_residuals
219            # Update base_predictions to use training predictions for optimization
220            self.obj_.fit(X_calib, y_calib)
221            base_predictions = self.obj_.predict(X_calib)
222
223        elif self.scoring in ("studentized", "conformal-studentized"):
224            # Calculate student multiplier
225            if self.scoring == "conformal-studentized":
226                X_train, X_calib, y_train, y_calib = train_test_split(
227                    X, y, test_size=0.5, random_state=42
228                )
229                self.obj_.fit(X_train, y_train)
230                scoring_residuals = y_calib - self.obj_.predict(X_calib)
231                # Calculate studentized multiplier using calibration data
232                self.student_multiplier_ = np.std(y_calib, ddof=1) / np.sqrt(
233                    len(y_calib) - 1
234                )
235                self.obj_.fit(X_calib, y_calib)
236                base_predictions = self.obj_.predict(X_calib)
237            else:  # regular studentized
238                self.obj_.fit(X, y)
239                base_predictions = self.obj_.predict(X)
240                scoring_residuals = y - base_predictions
241                self.student_multiplier_ = np.std(y, ddof=1) / np.sqrt(
242                    len(y) - 1
243                )
244
245        # Initialize storage for multipliers
246        self.offset_multipliers_ = []
247        # Keep track of current predictions for each quantile
248        current_predictions = None
249
250        # Fit each quantile sequentially
251        for i, quantile in enumerate(self.quantiles):
252            if self.scoring == "predictions":
253                multiplier = self._optimize_multiplier(
254                    y=y,
255                    base_predictions=base_predictions,
256                    prev_predictions=current_predictions,
257                    quantile=quantile,
258                )
259
260                self.offset_multipliers_.append(multiplier)
261
262                # Update current predictions
263                if current_predictions is None:
264                    # First quantile (lowest)
265                    current_predictions = (
266                        base_predictions - multiplier * np.abs(base_predictions)
267                    )
268                else:
269                    # Subsequent quantiles
270                    offset = multiplier * np.abs(base_predictions)
271                    current_predictions = current_predictions + offset
272
273            elif self.scoring == "residuals":
274                multiplier = self._optimize_multiplier(
275                    y=y,
276                    base_predictions=base_predictions,
277                    scoring_residuals=scoring_residuals,
278                    prev_predictions=current_predictions,
279                    quantile=quantile,
280                )
281
282                self.offset_multipliers_.append(multiplier)
283
284                # Update current predictions
285                if current_predictions is None:
286                    # First quantile (lowest)
287                    current_predictions = (
288                        base_predictions
289                        - multiplier
290                        * np.std(scoring_residuals)
291                        / np.sqrt(len(scoring_residuals))
292                    )
293                else:
294                    # Subsequent quantiles
295                    offset = (
296                        multiplier
297                        * np.std(scoring_residuals)
298                        / np.sqrt(len(scoring_residuals))
299                    )
300                    current_predictions = current_predictions + offset
301
302            elif self.scoring == "conformal":
303                multiplier = self._optimize_multiplier(
304                    y=y_calib,
305                    base_predictions=base_predictions,
306                    scoring_residuals=scoring_residuals,
307                    prev_predictions=current_predictions,
308                    quantile=quantile,
309                )
310
311                self.offset_multipliers_.append(multiplier)
312
313                # Update current predictions
314                if current_predictions is None:
315                    # First quantile (lowest)
316                    current_predictions = (
317                        base_predictions
318                        - multiplier
319                        * np.std(scoring_residuals)
320                        / np.sqrt(len(scoring_residuals))
321                    )
322                else:
323                    # Subsequent quantiles
324                    offset = (
325                        multiplier
326                        * np.std(scoring_residuals)
327                        / np.sqrt(len(scoring_residuals))
328                    )
329                    current_predictions = current_predictions + offset
330
331            elif self.scoring in ("studentized", "conformal-studentized"):
332                multiplier = self._optimize_multiplier(
333                    y=y_calib if self.scoring == "conformal-studentized" else y,
334                    base_predictions=base_predictions,
335                    scoring_residuals=scoring_residuals,
336                    prev_predictions=current_predictions,
337                    quantile=quantile,
338                )
339
340                self.offset_multipliers_.append(multiplier)
341
342                # Update current predictions
343                if current_predictions is None:
344                    current_predictions = (
345                        base_predictions - multiplier * self.student_multiplier_
346                    )
347                else:
348                    offset = multiplier * self.student_multiplier_
349                    current_predictions = current_predictions + offset
350
351        return self

Fit the model to the data.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number of samples and
    n_features is the number of features.
y: array-like, shape = [n_samples]
    Target values.
def predict(self, X, return_pi=False):
353    def predict(self, X, return_pi=False):
354        """Predict the target variable.
355
356        Parameters:
357
358            X: {array-like}, shape = [n_samples, n_features]
359                Training vectors, where n_samples is the number of samples and
360                n_features is the number of features.
361
362            return_pi: bool, default=True
363                Whether to return the prediction intervals.
364        """
365        if self.obj_ is None or self.offset_multipliers_ is None:
366            raise ValueError("Model not fitted yet.")
367
368        base_predictions = self.obj_.predict(X)
369        all_predictions = []
370
371        if self.scoring == "predictions":
372            # Generate first quantile
373            current_predictions = base_predictions - self.offset_multipliers_[
374                0
375            ] * np.abs(base_predictions)
376            all_predictions.append(current_predictions)
377
378            # Generate remaining quantiles
379            for multiplier in self.offset_multipliers_[1:]:
380                offset = multiplier * np.abs(base_predictions)
381                current_predictions = current_predictions + offset
382                all_predictions.append(current_predictions)
383
384        elif self.scoring in ("residuals", "conformal"):
385            # Generate first quantile
386            current_predictions = base_predictions - self.offset_multipliers_[
387                0
388            ] * np.std(self.scoring_residuals_) / np.sqrt(
389                len(self.scoring_residuals_)
390            )
391            all_predictions.append(current_predictions)
392
393            # Generate remaining quantiles
394            for multiplier in self.offset_multipliers_[1:]:
395                offset = (
396                    multiplier
397                    * np.std(self.scoring_residuals_)
398                    / np.sqrt(len(self.scoring_residuals_))
399                )
400                current_predictions = current_predictions + offset
401                all_predictions.append(current_predictions)
402
403        elif self.scoring in ("studentized", "conformal-studentized"):
404            # Generate first quantile
405            current_predictions = (
406                base_predictions
407                - self.offset_multipliers_[0] * self.student_multiplier_
408            )
409            all_predictions.append(current_predictions)
410
411            # Generate remaining quantiles
412            for multiplier in self.offset_multipliers_[1:]:
413                offset = multiplier * self.student_multiplier_
414                current_predictions = current_predictions + offset
415                all_predictions.append(current_predictions)
416
417        if return_pi == False:
418            return np.asarray(all_predictions[1])
419
420        DescribeResult = namedtuple(
421            "DecribeResult", ["mean", "lower", "upper", "median"]
422        )
423        DescribeResult.mean = base_predictions
424        DescribeResult.lower = np.asarray(all_predictions[0])
425        DescribeResult.median = np.asarray(all_predictions[1])
426        DescribeResult.upper = np.asarray(all_predictions[2])
427
428        return DescribeResult

Predict the target variable.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number of samples and
    n_features is the number of features.

return_pi: bool, default=True
    Whether to return the prediction intervals.
class QuantileClassifier(sklearn.base.BaseEstimator, sklearn.base.ClassifierMixin):
 43class QuantileClassifier(BaseEstimator, ClassifierMixin):
 44    """
 45    Quantile Classifier.
 46
 47    Parameters:
 48
 49        obj: base model (classification model)
 50            The base classifier from which to build a
 51            quantile classifier.
 52
 53        level: int, default=95
 54            The level of the quantiles to compute.
 55
 56        scoring: str, default="predictions"
 57            The scoring to use for the optimization and constructing
 58            prediction intervals (predictions, residuals, conformal,
 59              studentized, conformal-studentized).
 60
 61    Attributes:
 62
 63        obj_ : base model (classification model)
 64            The base classifier from which to build a
 65            quantile classifier.
 66
 67        offset_multipliers_ : list
 68            The multipliers for the offset.
 69
 70        scoring_residuals_ : list
 71            The residuals for the scoring.
 72
 73        student_multiplier_ : float
 74            The multiplier for the student.
 75
 76
 77    """
 78
 79    def __init__(self, obj, level=95, scoring="predictions"):
 80        assert scoring in (
 81            "predictions",
 82            "residuals",
 83            "conformal",
 84            "studentized",
 85            "conformal-studentized",
 86        ), "scoring must be 'predictions' or 'residuals'"
 87        self.obj = obj
 88        quantileregressor = QuantileRegressor(self.obj)
 89        quantileregressor.predict = partial(
 90            quantileregressor.predict, return_pi=False
 91        )
 92        self.obj_ = SimpleMultitaskClassifier(quantileregressor)
 93
 94    def fit(self, X, y, **kwargs):
 95        self.obj_.fit(X, y, **kwargs)
 96
 97    def predict(self, X, **kwargs):
 98        return self.obj_.predict(X, **kwargs)
 99
100    def predict_proba(self, X, **kwargs):
101        return self.obj_.predict_proba(X, **kwargs)

Quantile Classifier.

Parameters:

obj: base model (classification model)
    The base classifier from which to build a
    quantile classifier.

level: int, default=95
    The level of the quantiles to compute.

scoring: str, default="predictions"
    The scoring to use for the optimization and constructing
    prediction intervals (predictions, residuals, conformal,
      studentized, conformal-studentized).

Attributes:

obj_ : base model (classification model)
    The base classifier from which to build a
    quantile classifier.

offset_multipliers_ : list
    The multipliers for the offset.

scoring_residuals_ : list
    The residuals for the scoring.

student_multiplier_ : float
    The multiplier for the student.
def fit(self, X, y, **kwargs):
94    def fit(self, X, y, **kwargs):
95        self.obj_.fit(X, y, **kwargs)
def predict(self, X, **kwargs):
97    def predict(self, X, **kwargs):
98        return self.obj_.predict(X, **kwargs)
def predict_proba(self, X, **kwargs):
100    def predict_proba(self, X, **kwargs):
101        return self.obj_.predict_proba(X, **kwargs)
class RandomBagRegressor(nnetsauce.randombag.bag.RandomBag, sklearn.base.RegressorMixin):
 18class RandomBagRegressor(RandomBag, RegressorMixin):
 19    """Randomized 'Bagging' Regression model
 20
 21    Parameters:
 22
 23        obj: object
 24            any object containing a method fit (obj.fit()) and a method predict
 25            (obj.predict())
 26
 27        n_estimators: int
 28            number of boosting iterations
 29
 30        n_hidden_features: int
 31            number of nodes in the hidden layer
 32
 33        activation_name: str
 34            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
 35
 36        a: float
 37            hyperparameter for 'prelu' or 'elu' activation function
 38
 39        nodes_sim: str
 40            type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
 41            'uniform'
 42
 43        bias: boolean
 44            indicates if the hidden layer contains a bias term (True) or not
 45            (False)
 46
 47        dropout: float
 48            regularization parameter; (random) percentage of nodes dropped out
 49            of the training
 50
 51        direct_link: boolean
 52            indicates if the original predictors are included (True) in model''s
 53            fitting or not (False)
 54
 55        n_clusters: int
 56            number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
 57                no clustering)
 58
 59        cluster_encode: bool
 60            defines how the variable containing clusters is treated (default is one-hot)
 61            if `False`, then labels are used, without one-hot encoding
 62
 63        type_clust: str
 64            type of clustering method: currently k-means ('kmeans') or Gaussian
 65            Mixture Model ('gmm')
 66
 67        type_scaling: a tuple of 3 strings
 68            scaling methods for inputs, hidden layer, and clustering respectively
 69            (and when relevant).
 70            Currently available: standardization ('std') or MinMax scaling ('minmax')
 71
 72        col_sample: float
 73            percentage of covariates randomly chosen for training
 74
 75        row_sample: float
 76            percentage of rows chosen for training, by stratified bootstrapping
 77
 78        seed: int
 79            reproducibility seed for nodes_sim=='uniform'
 80
 81        backend: str
 82            "cpu" or "gpu" or "tpu"
 83
 84    Attributes:
 85
 86        voter_: dict
 87            dictionary containing all the fitted base-learners
 88
 89
 90    Examples:
 91
 92    ```python
 93    import numpy as np
 94    import nnetsauce as ns
 95    from sklearn.datasets import fetch_california_housing
 96    from sklearn.tree import DecisionTreeRegressor
 97    from sklearn.model_selection import train_test_split
 98
 99    X, y = fetch_california_housing(return_X_y=True, as_frame=False)
100
101    # split data into training test and test set
102    X_train, X_test, y_train, y_test = train_test_split(X, y,
103                                                        test_size=0.2, random_state=13)
104
105    # Requires further tuning
106    obj = DecisionTreeRegressor(max_depth=3, random_state=123)
107    obj2 = ns.RandomBagRegressor(obj=obj, direct_link=False,
108                                n_estimators=50,
109                                col_sample=0.9, row_sample=0.9,
110                                dropout=0, n_clusters=0, verbose=1)
111
112    obj2.fit(X_train, y_train)
113
114    print(np.sqrt(obj2.score(X_test, y_test))) # RMSE
115
116    ```
117
118    """
119
120    # construct the object -----
121
122    def __init__(
123        self,
124        obj,
125        n_estimators=10,
126        n_hidden_features=1,
127        activation_name="relu",
128        a=0.01,
129        nodes_sim="sobol",
130        bias=True,
131        dropout=0,
132        direct_link=False,
133        n_clusters=2,
134        cluster_encode=True,
135        type_clust="kmeans",
136        type_scaling=("std", "std", "std"),
137        col_sample=1,
138        row_sample=1,
139        n_jobs=None,
140        seed=123,
141        verbose=1,
142        backend="cpu",
143    ):
144        super().__init__(
145            obj=obj,
146            n_estimators=n_estimators,
147            n_hidden_features=n_hidden_features,
148            activation_name=activation_name,
149            a=a,
150            nodes_sim=nodes_sim,
151            bias=bias,
152            dropout=dropout,
153            direct_link=direct_link,
154            n_clusters=n_clusters,
155            cluster_encode=cluster_encode,
156            type_clust=type_clust,
157            type_scaling=type_scaling,
158            col_sample=col_sample,
159            row_sample=row_sample,
160            seed=seed,
161            backend=backend,
162        )
163
164        self.type_fit = "regression"
165        self.verbose = verbose
166        self.n_jobs = n_jobs
167        self.voter_ = {}
168
169    def fit(self, X, y, **kwargs):
170        """Fit Random 'Bagging' model to training data (X, y).
171
172        Args:
173
174            X: {array-like}, shape = [n_samples, n_features]
175                Training vectors, where n_samples is the number
176                of samples and n_features is the number of features.
177
178            y: array-like, shape = [n_samples]
179                Target values.
180
181            **kwargs: additional parameters to be passed to
182                    self.cook_training_set or self.obj.fit
183
184        Returns:
185
186            self: object
187
188        """
189
190        base_learner = CustomRegressor(
191            self.obj,
192            n_hidden_features=self.n_hidden_features,
193            activation_name=self.activation_name,
194            a=self.a,
195            nodes_sim=self.nodes_sim,
196            bias=self.bias,
197            dropout=self.dropout,
198            direct_link=self.direct_link,
199            n_clusters=self.n_clusters,
200            type_clust=self.type_clust,
201            type_scaling=self.type_scaling,
202            col_sample=self.col_sample,
203            row_sample=self.row_sample,
204            seed=self.seed,
205        )
206
207        # 1 - Sequential training -----
208
209        if self.n_jobs is None:
210            self.voter_ = rbagloop_regression(
211                base_learner, X, y, self.n_estimators, self.verbose, self.seed
212            )
213
214            self.n_estimators = len(self.voter_)
215
216            return self
217
218        # 2 - Parallel training -----
219        # buggy
220        # if self.n_jobs is not None:
221        def fit_estimators(m):
222            base_learner__ = deepcopy(base_learner)
223            base_learner__.set_params(seed=self.seed + m * 1000)
224            base_learner__.fit(X, y, **kwargs)
225            return base_learner__
226
227        if self.verbose == 1:
228            voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")(
229                delayed(fit_estimators)(m)
230                for m in tqdm(range(self.n_estimators))
231            )
232        else:
233            voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")(
234                delayed(fit_estimators)(m) for m in range(self.n_estimators)
235            )
236
237        self.voter_ = {i: elt for i, elt in enumerate(voters_list)}
238
239        self.n_estimators = len(self.voter_)
240
241        return self
242
243    def predict(self, X, weights=None, **kwargs):
244        """Predict for test data X.
245
246        Args:
247
248            X: {array-like}, shape = [n_samples, n_features]
249                Training vectors, where n_samples is the number
250                of samples and n_features is the number of features.
251
252            **kwargs: additional parameters to be passed to
253                    self.cook_test_set
254
255        Returns:
256
257            estimates for test data: {array-like}
258
259        """
260
261        def calculate_preds(voter, weights=None):
262            ensemble_preds = 0
263
264            n_iter = len(voter)
265
266            assert n_iter > 0, "no estimator found in `RandomBag` ensemble"
267
268            if weights is None:
269                for idx, elt in voter.items():
270                    ensemble_preds += elt.predict(X)
271
272                return ensemble_preds / n_iter
273
274            # if weights is not None:
275            for idx, elt in voter.items():
276                ensemble_preds += weights[idx] * elt.predict(X)
277
278            return ensemble_preds
279
280        # end calculate_preds ----
281
282        if weights is None:
283            return calculate_preds(self.voter_)
284
285        # if weights is not None:
286        self.weights = weights
287
288        return calculate_preds(self.voter_, weights)

Randomized 'Bagging' Regression model

Parameters:

obj: object
    any object containing a method fit (obj.fit()) and a method predict
    (obj.predict())

n_estimators: int
    number of boosting iterations

n_hidden_features: int
    number of nodes in the hidden layer

activation_name: str
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'

a: float
    hyperparameter for 'prelu' or 'elu' activation function

nodes_sim: str
    type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
    'uniform'

bias: boolean
    indicates if the hidden layer contains a bias term (True) or not
    (False)

dropout: float
    regularization parameter; (random) percentage of nodes dropped out
    of the training

direct_link: boolean
    indicates if the original predictors are included (True) in model''s
    fitting or not (False)

n_clusters: int
    number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
        no clustering)

cluster_encode: bool
    defines how the variable containing clusters is treated (default is one-hot)
    if `False`, then labels are used, without one-hot encoding

type_clust: str
    type of clustering method: currently k-means ('kmeans') or Gaussian
    Mixture Model ('gmm')

type_scaling: a tuple of 3 strings
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax')

col_sample: float
    percentage of covariates randomly chosen for training

row_sample: float
    percentage of rows chosen for training, by stratified bootstrapping

seed: int
    reproducibility seed for nodes_sim=='uniform'

backend: str
    "cpu" or "gpu" or "tpu"

Attributes:

voter_: dict
    dictionary containing all the fitted base-learners

Examples:

import numpy as np
import nnetsauce as ns
from sklearn.datasets import fetch_california_housing
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split

X, y = fetch_california_housing(return_X_y=True, as_frame=False)

# split data into training test and test set
X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    test_size=0.2, random_state=13)

# Requires further tuning
obj = DecisionTreeRegressor(max_depth=3, random_state=123)
obj2 = ns.RandomBagRegressor(obj=obj, direct_link=False,
                            n_estimators=50,
                            col_sample=0.9, row_sample=0.9,
                            dropout=0, n_clusters=0, verbose=1)

obj2.fit(X_train, y_train)

print(np.sqrt(obj2.score(X_test, y_test))) # RMSE
def fit(self, X, y, **kwargs):
169    def fit(self, X, y, **kwargs):
170        """Fit Random 'Bagging' model to training data (X, y).
171
172        Args:
173
174            X: {array-like}, shape = [n_samples, n_features]
175                Training vectors, where n_samples is the number
176                of samples and n_features is the number of features.
177
178            y: array-like, shape = [n_samples]
179                Target values.
180
181            **kwargs: additional parameters to be passed to
182                    self.cook_training_set or self.obj.fit
183
184        Returns:
185
186            self: object
187
188        """
189
190        base_learner = CustomRegressor(
191            self.obj,
192            n_hidden_features=self.n_hidden_features,
193            activation_name=self.activation_name,
194            a=self.a,
195            nodes_sim=self.nodes_sim,
196            bias=self.bias,
197            dropout=self.dropout,
198            direct_link=self.direct_link,
199            n_clusters=self.n_clusters,
200            type_clust=self.type_clust,
201            type_scaling=self.type_scaling,
202            col_sample=self.col_sample,
203            row_sample=self.row_sample,
204            seed=self.seed,
205        )
206
207        # 1 - Sequential training -----
208
209        if self.n_jobs is None:
210            self.voter_ = rbagloop_regression(
211                base_learner, X, y, self.n_estimators, self.verbose, self.seed
212            )
213
214            self.n_estimators = len(self.voter_)
215
216            return self
217
218        # 2 - Parallel training -----
219        # buggy
220        # if self.n_jobs is not None:
221        def fit_estimators(m):
222            base_learner__ = deepcopy(base_learner)
223            base_learner__.set_params(seed=self.seed + m * 1000)
224            base_learner__.fit(X, y, **kwargs)
225            return base_learner__
226
227        if self.verbose == 1:
228            voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")(
229                delayed(fit_estimators)(m)
230                for m in tqdm(range(self.n_estimators))
231            )
232        else:
233            voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")(
234                delayed(fit_estimators)(m) for m in range(self.n_estimators)
235            )
236
237        self.voter_ = {i: elt for i, elt in enumerate(voters_list)}
238
239        self.n_estimators = len(self.voter_)
240
241        return self

Fit Random 'Bagging' model to training data (X, y).

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

**kwargs: additional parameters to be passed to
        self.cook_training_set or self.obj.fit

Returns:

self: object
def predict(self, X, weights=None, **kwargs):
243    def predict(self, X, weights=None, **kwargs):
244        """Predict for test data X.
245
246        Args:
247
248            X: {array-like}, shape = [n_samples, n_features]
249                Training vectors, where n_samples is the number
250                of samples and n_features is the number of features.
251
252            **kwargs: additional parameters to be passed to
253                    self.cook_test_set
254
255        Returns:
256
257            estimates for test data: {array-like}
258
259        """
260
261        def calculate_preds(voter, weights=None):
262            ensemble_preds = 0
263
264            n_iter = len(voter)
265
266            assert n_iter > 0, "no estimator found in `RandomBag` ensemble"
267
268            if weights is None:
269                for idx, elt in voter.items():
270                    ensemble_preds += elt.predict(X)
271
272                return ensemble_preds / n_iter
273
274            # if weights is not None:
275            for idx, elt in voter.items():
276                ensemble_preds += weights[idx] * elt.predict(X)
277
278            return ensemble_preds
279
280        # end calculate_preds ----
281
282        if weights is None:
283            return calculate_preds(self.voter_)
284
285        # if weights is not None:
286        self.weights = weights
287
288        return calculate_preds(self.voter_, weights)

Predict for test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

estimates for test data: {array-like}
class RandomBagClassifier(nnetsauce.randombag.bag.RandomBag, sklearn.base.ClassifierMixin):
 18class RandomBagClassifier(RandomBag, ClassifierMixin):
 19    """Randomized 'Bagging' Classification model
 20
 21    Parameters:
 22
 23        obj: object
 24            any object containing a method fit (obj.fit()) and a method predict
 25            (obj.predict())
 26
 27        n_estimators: int
 28            number of boosting iterations
 29
 30        n_hidden_features: int
 31            number of nodes in the hidden layer
 32
 33        activation_name: str
 34            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
 35
 36        a: float
 37            hyperparameter for 'prelu' or 'elu' activation function
 38
 39        nodes_sim: str
 40            type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
 41            'uniform'
 42
 43        bias: boolean
 44            indicates if the hidden layer contains a bias term (True) or not
 45            (False)
 46
 47        dropout: float
 48            regularization parameter; (random) percentage of nodes dropped out
 49            of the training
 50
 51        direct_link: boolean
 52            indicates if the original predictors are included (True) in model's
 53            fitting or not (False)
 54
 55        n_clusters: int
 56            number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
 57                no clustering)
 58
 59        cluster_encode: bool
 60            defines how the variable containing clusters is treated (default is one-hot)
 61            if `False`, then labels are used, without one-hot encoding
 62
 63        type_clust: str
 64            type of clustering method: currently k-means ('kmeans') or Gaussian
 65            Mixture Model ('gmm')
 66
 67        type_scaling: a tuple of 3 strings
 68            scaling methods for inputs, hidden layer, and clustering respectively
 69            (and when relevant).
 70            Currently available: standardization ('std') or MinMax scaling ('minmax')
 71
 72        col_sample: float
 73            percentage of covariates randomly chosen for training
 74
 75        row_sample: float
 76            percentage of rows chosen for training, by stratified bootstrapping
 77
 78        seed: int
 79            reproducibility seed for nodes_sim=='uniform'
 80
 81        backend: str
 82            "cpu" or "gpu" or "tpu"
 83
 84    Attributes:
 85
 86        voter_: dict
 87            dictionary containing all the fitted base-learners
 88
 89
 90    Examples:
 91
 92    See also [https://github.com/Techtonique/nnetsauce/blob/master/examples/randombag_classification.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/randombag_classification.py)
 93
 94    ```python
 95    import nnetsauce as ns
 96    from sklearn.datasets import load_breast_cancer
 97    from sklearn.tree import DecisionTreeClassifier
 98    from sklearn.model_selection import train_test_split
 99    from sklearn import metrics
100    from time import time
101
102
103    breast_cancer = load_breast_cancer()
104    Z = breast_cancer.data
105    t = breast_cancer.target
106    np.random.seed(123)
107    X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2)
108
109    # decision tree
110    clf = DecisionTreeClassifier(max_depth=2, random_state=123)
111    fit_obj = ns.RandomBagClassifier(clf, n_hidden_features=2,
112                                    direct_link=True,
113                                    n_estimators=100,
114                                    col_sample=0.9, row_sample=0.9,
115                                    dropout=0.3, n_clusters=0, verbose=1)
116
117    start = time()
118    fit_obj.fit(X_train, y_train)
119    print(f"Elapsed {time() - start}")
120
121    print(fit_obj.score(X_test, y_test))
122    print(fit_obj.score(X_test, y_test, scoring="roc_auc"))
123
124    start = time()
125    preds = fit_obj.predict(X_test)
126    print(f"Elapsed {time() - start}")
127    print(metrics.classification_report(preds, y_test))
128    ```
129
130    """
131
132    # construct the object -----
133    _estimator_type = "classifier"
134
135    def __init__(
136        self,
137        obj,
138        n_estimators=10,
139        n_hidden_features=1,
140        activation_name="relu",
141        a=0.01,
142        nodes_sim="sobol",
143        bias=True,
144        dropout=0,
145        direct_link=False,
146        n_clusters=2,
147        cluster_encode=True,
148        type_clust="kmeans",
149        type_scaling=("std", "std", "std"),
150        col_sample=1,
151        row_sample=1,
152        n_jobs=None,
153        seed=123,
154        verbose=1,
155        backend="cpu",
156    ):
157        super().__init__(
158            obj=obj,
159            n_estimators=n_estimators,
160            n_hidden_features=n_hidden_features,
161            activation_name=activation_name,
162            a=a,
163            nodes_sim=nodes_sim,
164            bias=bias,
165            dropout=dropout,
166            direct_link=direct_link,
167            n_clusters=n_clusters,
168            cluster_encode=cluster_encode,
169            type_clust=type_clust,
170            type_scaling=type_scaling,
171            col_sample=col_sample,
172            row_sample=row_sample,
173            seed=seed,
174            backend=backend,
175        )
176
177        self.type_fit = "classification"
178        self.verbose = verbose
179        self.n_jobs = n_jobs
180        self.voter_ = {}
181
182    def fit(self, X, y, **kwargs):
183        """Fit Random 'Bagging' model to training data (X, y).
184
185        Args:
186
187            X: {array-like}, shape = [n_samples, n_features]
188                Training vectors, where n_samples is the number
189                of samples and n_features is the number of features.
190
191            y: array-like, shape = [n_samples]
192                Target values.
193
194            **kwargs: additional parameters to be passed to
195                    self.cook_training_set or self.obj.fit
196
197        Returns:
198
199            self: object
200
201        """
202
203        assert mx.is_factor(y), "y must contain only integers"
204
205        self.n_classes_ = len(np.unique(y))  # for compatibility with sklearn
206
207        # training
208        self.n_classes = len(np.unique(y))
209
210        base_learner = CustomClassifier(
211            self.obj,
212            n_hidden_features=self.n_hidden_features,
213            activation_name=self.activation_name,
214            a=self.a,
215            nodes_sim=self.nodes_sim,
216            bias=self.bias,
217            dropout=self.dropout,
218            direct_link=self.direct_link,
219            n_clusters=self.n_clusters,
220            type_clust=self.type_clust,
221            type_scaling=self.type_scaling,
222            col_sample=self.col_sample,
223            row_sample=self.row_sample,
224            seed=self.seed,
225            cv_calibration=None,
226        )
227
228        # 1 - Sequential training -----
229
230        if self.n_jobs is None:
231            self.voter_ = rbagloop_classification(
232                base_learner, X, y, self.n_estimators, self.verbose, self.seed
233            )
234
235            self.n_estimators = len(self.voter_)
236
237            return self
238
239        # 2 - Parallel training -----
240        # buggy
241        # if self.n_jobs is not None:
242        def fit_estimators(m):
243            base_learner__ = deepcopy(base_learner)
244            base_learner__.set_params(seed=self.seed + m * 1000)
245            base_learner__.fit(X, y, **kwargs)
246            return base_learner__
247
248        if self.verbose == 1:
249            voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")(
250                delayed(fit_estimators)(m)
251                for m in tqdm(range(self.n_estimators))
252            )
253        else:
254            voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")(
255                delayed(fit_estimators)(m) for m in range(self.n_estimators)
256            )
257
258        self.voter_ = {idx: elt for idx, elt in enumerate(voters_list)}
259
260        self.n_estimators = len(self.voter_)
261        self.classes_ = np.unique(y)
262        return self
263
264    def predict(self, X, weights=None, **kwargs):
265        """Predict test data X.
266
267        Args:
268
269            X: {array-like}, shape = [n_samples, n_features]
270                Training vectors, where n_samples is the number
271                of samples and n_features is the number of features.
272
273            **kwargs: additional parameters to be passed to
274                    self.cook_test_set
275
276        Returns:
277
278            model predictions: {array-like}
279
280        """
281        return self.predict_proba(X, weights, **kwargs).argmax(axis=1)
282
283    def predict_proba(self, X, weights=None, **kwargs):
284        """Predict probabilities for test data X.
285
286        Args:
287
288            X: {array-like}, shape = [n_samples, n_features]
289                Training vectors, where n_samples is the number
290                of samples and n_features is the number of features.
291
292            **kwargs: additional parameters to be passed to
293                    self.cook_test_set
294
295        Returns:
296
297            probability estimates for test data: {array-like}
298
299        """
300
301        def calculate_probas(voter, weights=None, verbose=None):
302            ensemble_proba = 0
303
304            n_iter = len(voter)
305
306            assert n_iter > 0, "no estimator found in `RandomBag` ensemble"
307
308            if weights is None:
309                for idx, elt in voter.items():
310                    try:
311                        ensemble_proba += elt.predict_proba(X)
312
313                        # if verbose == 1:
314                        #    pbar.update(idx)
315
316                    except:
317                        continue
318
319                # if verbose == 1:
320                #    pbar.update(n_iter)
321
322                return ensemble_proba / n_iter
323
324            # if weights is not None:
325            for idx, elt in voter.items():
326                ensemble_proba += weights[idx] * elt.predict_proba(X)
327
328                # if verbose == 1:
329                #    pbar.update(idx)
330
331            # if verbose == 1:
332            #    pbar.update(n_iter)
333
334            return ensemble_proba
335
336        # end calculate_probas ----
337
338        if self.n_jobs is None:
339            # if self.verbose == 1:
340            #    pbar = Progbar(self.n_estimators)
341
342            if weights is None:
343                return calculate_probas(self.voter_, verbose=self.verbose)
344
345            # if weights is not None:
346            self.weights = weights
347
348            return calculate_probas(self.voter_, weights, verbose=self.verbose)
349
350        # if self.n_jobs is not None:
351        def predict_estimator(m):
352            try:
353                return self.voter_[m].predict_proba(X)
354            except:
355                pass
356
357        if self.verbose == 1:
358            preds = Parallel(n_jobs=self.n_jobs, prefer="threads")(
359                delayed(predict_estimator)(m)
360                for m in tqdm(range(self.n_estimators))
361            )
362
363        else:
364            preds = Parallel(n_jobs=self.n_jobs, prefer="threads")(
365                delayed(predict_estimator)(m) for m in range(self.n_estimators)
366            )
367
368        ensemble_proba = 0
369
370        if weights is None:
371            for i in range(self.n_estimators):
372                ensemble_proba += preds[i]
373
374            return ensemble_proba / self.n_estimators
375
376        for i in range(self.n_estimators):
377            ensemble_proba += weights[i] * preds[i]
378
379        return ensemble_proba
380
381    @property
382    def _estimator_type(self):
383        return "classifier"

Randomized 'Bagging' Classification model

Parameters:

obj: object
    any object containing a method fit (obj.fit()) and a method predict
    (obj.predict())

n_estimators: int
    number of boosting iterations

n_hidden_features: int
    number of nodes in the hidden layer

activation_name: str
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'

a: float
    hyperparameter for 'prelu' or 'elu' activation function

nodes_sim: str
    type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
    'uniform'

bias: boolean
    indicates if the hidden layer contains a bias term (True) or not
    (False)

dropout: float
    regularization parameter; (random) percentage of nodes dropped out
    of the training

direct_link: boolean
    indicates if the original predictors are included (True) in model's
    fitting or not (False)

n_clusters: int
    number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
        no clustering)

cluster_encode: bool
    defines how the variable containing clusters is treated (default is one-hot)
    if `False`, then labels are used, without one-hot encoding

type_clust: str
    type of clustering method: currently k-means ('kmeans') or Gaussian
    Mixture Model ('gmm')

type_scaling: a tuple of 3 strings
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax')

col_sample: float
    percentage of covariates randomly chosen for training

row_sample: float
    percentage of rows chosen for training, by stratified bootstrapping

seed: int
    reproducibility seed for nodes_sim=='uniform'

backend: str
    "cpu" or "gpu" or "tpu"

Attributes:

voter_: dict
    dictionary containing all the fitted base-learners

Examples:

See also https://github.com/Techtonique/nnetsauce/blob/master/examples/randombag_classification.py

import nnetsauce as ns
from sklearn.datasets import load_breast_cancer
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn import metrics
from time import time


breast_cancer = load_breast_cancer()
Z = breast_cancer.data
t = breast_cancer.target
np.random.seed(123)
X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2)

# decision tree
clf = DecisionTreeClassifier(max_depth=2, random_state=123)
fit_obj = ns.RandomBagClassifier(clf, n_hidden_features=2,
                                direct_link=True,
                                n_estimators=100,
                                col_sample=0.9, row_sample=0.9,
                                dropout=0.3, n_clusters=0, verbose=1)

start = time()
fit_obj.fit(X_train, y_train)
print(f"Elapsed {time() - start}")

print(fit_obj.score(X_test, y_test))
print(fit_obj.score(X_test, y_test, scoring="roc_auc"))

start = time()
preds = fit_obj.predict(X_test)
print(f"Elapsed {time() - start}")
print(metrics.classification_report(preds, y_test))
def fit(self, X, y, **kwargs):
182    def fit(self, X, y, **kwargs):
183        """Fit Random 'Bagging' model to training data (X, y).
184
185        Args:
186
187            X: {array-like}, shape = [n_samples, n_features]
188                Training vectors, where n_samples is the number
189                of samples and n_features is the number of features.
190
191            y: array-like, shape = [n_samples]
192                Target values.
193
194            **kwargs: additional parameters to be passed to
195                    self.cook_training_set or self.obj.fit
196
197        Returns:
198
199            self: object
200
201        """
202
203        assert mx.is_factor(y), "y must contain only integers"
204
205        self.n_classes_ = len(np.unique(y))  # for compatibility with sklearn
206
207        # training
208        self.n_classes = len(np.unique(y))
209
210        base_learner = CustomClassifier(
211            self.obj,
212            n_hidden_features=self.n_hidden_features,
213            activation_name=self.activation_name,
214            a=self.a,
215            nodes_sim=self.nodes_sim,
216            bias=self.bias,
217            dropout=self.dropout,
218            direct_link=self.direct_link,
219            n_clusters=self.n_clusters,
220            type_clust=self.type_clust,
221            type_scaling=self.type_scaling,
222            col_sample=self.col_sample,
223            row_sample=self.row_sample,
224            seed=self.seed,
225            cv_calibration=None,
226        )
227
228        # 1 - Sequential training -----
229
230        if self.n_jobs is None:
231            self.voter_ = rbagloop_classification(
232                base_learner, X, y, self.n_estimators, self.verbose, self.seed
233            )
234
235            self.n_estimators = len(self.voter_)
236
237            return self
238
239        # 2 - Parallel training -----
240        # buggy
241        # if self.n_jobs is not None:
242        def fit_estimators(m):
243            base_learner__ = deepcopy(base_learner)
244            base_learner__.set_params(seed=self.seed + m * 1000)
245            base_learner__.fit(X, y, **kwargs)
246            return base_learner__
247
248        if self.verbose == 1:
249            voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")(
250                delayed(fit_estimators)(m)
251                for m in tqdm(range(self.n_estimators))
252            )
253        else:
254            voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")(
255                delayed(fit_estimators)(m) for m in range(self.n_estimators)
256            )
257
258        self.voter_ = {idx: elt for idx, elt in enumerate(voters_list)}
259
260        self.n_estimators = len(self.voter_)
261        self.classes_ = np.unique(y)
262        return self

Fit Random 'Bagging' model to training data (X, y).

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

**kwargs: additional parameters to be passed to
        self.cook_training_set or self.obj.fit

Returns:

self: object
def predict(self, X, weights=None, **kwargs):
264    def predict(self, X, weights=None, **kwargs):
265        """Predict test data X.
266
267        Args:
268
269            X: {array-like}, shape = [n_samples, n_features]
270                Training vectors, where n_samples is the number
271                of samples and n_features is the number of features.
272
273            **kwargs: additional parameters to be passed to
274                    self.cook_test_set
275
276        Returns:
277
278            model predictions: {array-like}
279
280        """
281        return self.predict_proba(X, weights, **kwargs).argmax(axis=1)

Predict test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

model predictions: {array-like}
def predict_proba(self, X, weights=None, **kwargs):
283    def predict_proba(self, X, weights=None, **kwargs):
284        """Predict probabilities for test data X.
285
286        Args:
287
288            X: {array-like}, shape = [n_samples, n_features]
289                Training vectors, where n_samples is the number
290                of samples and n_features is the number of features.
291
292            **kwargs: additional parameters to be passed to
293                    self.cook_test_set
294
295        Returns:
296
297            probability estimates for test data: {array-like}
298
299        """
300
301        def calculate_probas(voter, weights=None, verbose=None):
302            ensemble_proba = 0
303
304            n_iter = len(voter)
305
306            assert n_iter > 0, "no estimator found in `RandomBag` ensemble"
307
308            if weights is None:
309                for idx, elt in voter.items():
310                    try:
311                        ensemble_proba += elt.predict_proba(X)
312
313                        # if verbose == 1:
314                        #    pbar.update(idx)
315
316                    except:
317                        continue
318
319                # if verbose == 1:
320                #    pbar.update(n_iter)
321
322                return ensemble_proba / n_iter
323
324            # if weights is not None:
325            for idx, elt in voter.items():
326                ensemble_proba += weights[idx] * elt.predict_proba(X)
327
328                # if verbose == 1:
329                #    pbar.update(idx)
330
331            # if verbose == 1:
332            #    pbar.update(n_iter)
333
334            return ensemble_proba
335
336        # end calculate_probas ----
337
338        if self.n_jobs is None:
339            # if self.verbose == 1:
340            #    pbar = Progbar(self.n_estimators)
341
342            if weights is None:
343                return calculate_probas(self.voter_, verbose=self.verbose)
344
345            # if weights is not None:
346            self.weights = weights
347
348            return calculate_probas(self.voter_, weights, verbose=self.verbose)
349
350        # if self.n_jobs is not None:
351        def predict_estimator(m):
352            try:
353                return self.voter_[m].predict_proba(X)
354            except:
355                pass
356
357        if self.verbose == 1:
358            preds = Parallel(n_jobs=self.n_jobs, prefer="threads")(
359                delayed(predict_estimator)(m)
360                for m in tqdm(range(self.n_estimators))
361            )
362
363        else:
364            preds = Parallel(n_jobs=self.n_jobs, prefer="threads")(
365                delayed(predict_estimator)(m) for m in range(self.n_estimators)
366            )
367
368        ensemble_proba = 0
369
370        if weights is None:
371            for i in range(self.n_estimators):
372                ensemble_proba += preds[i]
373
374            return ensemble_proba / self.n_estimators
375
376        for i in range(self.n_estimators):
377            ensemble_proba += weights[i] * preds[i]
378
379        return ensemble_proba

Predict probabilities for test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

probability estimates for test data: {array-like}
class RandomFourierEstimator(sklearn.base.BaseEstimator):
 23class RandomFourierEstimator(BaseEstimator):
 24    def __init__(
 25        self, estimator, n_components=100, gamma=1.0, random_state=None
 26    ):
 27        """
 28        Random Fourier Features transformation with a given estimator.
 29
 30        Parameters:
 31        - estimator: A scikit-learn estimator (classifier, regressor, etc.).
 32        - n_components: Number of random Fourier features.
 33        - gamma: Hyperparameter for RBF kernel approximation.
 34        - random_state: Random state for reproducibility.
 35        """
 36        self.estimator = estimator
 37        self.n_components = n_components
 38        self.gamma = gamma
 39        self.random_state = random_state
 40
 41        # Dynamically set the estimator type and appropriate mixin
 42        estimator_type = _get_estimator_type(estimator)
 43        if estimator_type == "classifier":
 44            self._estimator_type = "classifier"
 45            # Add ClassifierMixin to the class hierarchy
 46            if not isinstance(self, ClassifierMixin):
 47                self.__class__ = type(
 48                    self.__class__.__name__,
 49                    (self.__class__, ClassifierMixin),
 50                    dict(self.__class__.__dict__),
 51                )
 52        elif estimator_type == "regressor":
 53            self._estimator_type = "regressor"
 54            # Add RegressorMixin to the class hierarchy
 55            if not isinstance(self, RegressorMixin):
 56                self.__class__ = type(
 57                    self.__class__.__name__,
 58                    (self.__class__, RegressorMixin),
 59                    dict(self.__class__.__dict__),
 60                )
 61
 62    def fit(self, X, y=None):
 63        """
 64        Fit the Random Fourier feature transformer and the estimator.
 65        """
 66        X = check_array(X)
 67
 68        # Initialize and fit the Random Fourier Feature transformer
 69        self.rff_ = RBFSampler(
 70            n_components=self.n_components,
 71            gamma=self.gamma,
 72            random_state=self.random_state,
 73        )
 74        X_transformed = self.rff_.fit_transform(X)
 75
 76        # Fit the underlying estimator on the transformed data
 77        self.estimator.fit(X_transformed, y)
 78
 79        return self
 80
 81    def partial_fit(self, X, y, classes=None):
 82        """
 83        Incrementally fit the Random Fourier feature transformer and the estimator.
 84        """
 85        X = check_array(X)
 86
 87        # Check if RFF transformer is already fitted
 88        if not hasattr(self, "rff_"):
 89            # First call - fit the transformer
 90            self.rff_ = RBFSampler(
 91                n_components=self.n_components,
 92                gamma=self.gamma,
 93                random_state=self.random_state,
 94            )
 95            X_transformed = self.rff_.fit_transform(X)
 96        else:
 97            # Subsequent calls - only transform
 98            X_transformed = self.rff_.transform(X)
 99
100        # If estimator supports partial_fit, we use it, otherwise raise an error
101        if hasattr(self.estimator, "partial_fit"):
102            self.estimator.partial_fit(X_transformed, y, classes=classes)
103        else:
104            raise ValueError(
105                f"The estimator {type(self.estimator).__name__} does not support partial_fit method."
106            )
107
108        return self
109
110    def predict(self, X):
111        """
112        Predict using the Random Fourier transformed data.
113        """
114        check_is_fitted(self, ["rff_"])
115        X = check_array(X)
116
117        # Transform the input data
118        X_transformed = self.rff_.transform(X)
119
120        # Predict using the underlying estimator
121        return self.estimator.predict(X_transformed)
122
123    def predict_proba(self, X):
124        """
125        Predict class probabilities (only for classifiers).
126        """
127        if (
128            not hasattr(self, "_estimator_type")
129            or self._estimator_type != "classifier"
130        ):
131            raise AttributeError(
132                "predict_proba is not available for this estimator type."
133            )
134
135        check_is_fitted(self, ["rff_"])
136        X = check_array(X)
137
138        if not hasattr(self.estimator, "predict_proba"):
139            raise ValueError(
140                f"The estimator {type(self.estimator).__name__} does not support predict_proba."
141            )
142
143        # Transform the input data
144        X_transformed = self.rff_.transform(X)
145
146        # Predict probabilities using the underlying estimator
147        return self.estimator.predict_proba(X_transformed)
148
149    def predict_log_proba(self, X):
150        """
151        Predict class log probabilities (only for classifiers).
152        """
153        if (
154            not hasattr(self, "_estimator_type")
155            or self._estimator_type != "classifier"
156        ):
157            raise AttributeError(
158                "predict_log_proba is not available for this estimator type."
159            )
160
161        check_is_fitted(self, ["rff_"])
162        X = check_array(X)
163
164        if not hasattr(self.estimator, "predict_log_proba"):
165            raise ValueError(
166                f"The estimator {type(self.estimator).__name__} does not support predict_log_proba."
167            )
168
169        # Transform the input data
170        X_transformed = self.rff_.transform(X)
171
172        return self.estimator.predict_log_proba(X_transformed)
173
174    def decision_function(self, X):
175        """
176        Decision function (only for classifiers).
177        """
178        if (
179            not hasattr(self, "_estimator_type")
180            or self._estimator_type != "classifier"
181        ):
182            raise AttributeError(
183                "decision_function is not available for this estimator type."
184            )
185
186        check_is_fitted(self, ["rff_"])
187        X = check_array(X)
188
189        if not hasattr(self.estimator, "decision_function"):
190            raise ValueError(
191                f"The estimator {type(self.estimator).__name__} does not support decision_function."
192            )
193
194        # Transform the input data
195        X_transformed = self.rff_.transform(X)
196
197        return self.estimator.decision_function(X_transformed)
198
199    def score(self, X, y):
200        """
201        Evaluate the model performance.
202        """
203        check_is_fitted(self, ["rff_"])
204        X = check_array(X)
205
206        # Transform the input data
207        X_transformed = self.rff_.transform(X)
208
209        # Evaluate using the underlying estimator's score method
210        return self.estimator.score(X_transformed, y)
211
212    @property
213    def classes_(self):
214        """Classes labels (only for classifiers)."""
215        if (
216            hasattr(self, "_estimator_type")
217            and self._estimator_type == "classifier"
218        ):
219            return getattr(self.estimator, "classes_", None)
220        else:
221            raise AttributeError(
222                "classes_ is not available for this estimator type."
223            )
224
225    def get_params(self, deep=True):
226        """
227        Get parameters for this estimator.
228        """
229        params = {}
230
231        # Get estimator parameters with proper prefixing
232        if deep:
233            estimator_params = self.estimator.get_params(deep=True)
234            for key, value in estimator_params.items():
235                params[f"estimator__{key}"] = value
236
237        # Add our own parameters
238        params.update(
239            {
240                "estimator": self.estimator,
241                "n_components": self.n_components,
242                "gamma": self.gamma,
243                "random_state": self.random_state,
244            }
245        )
246
247        return params
248
249    def set_params(self, **params):
250        """
251        Set the parameters of this estimator.
252        """
253        # Separate our parameters from estimator parameters
254        our_params = {}
255        estimator_params = {}
256
257        for param, value in params.items():
258            if param.startswith("estimator__"):
259                # Remove the 'estimator__' prefix
260                estimator_params[param[11:]] = value
261            elif param in [
262                "estimator",
263                "n_components",
264                "gamma",
265                "random_state",
266            ]:
267                our_params[param] = value
268            else:
269                # Assume it's an estimator parameter without prefix
270                estimator_params[param] = value
271
272        # Set our parameters
273        for param, value in our_params.items():
274            setattr(self, param, value)
275
276        # If estimator changed, update the estimator type
277        if "estimator" in our_params:
278            self.__init__(
279                self.estimator, self.n_components, self.gamma, self.random_state
280            )
281
282        # Set estimator parameters
283        if estimator_params:
284            self.estimator.set_params(**estimator_params)
285
286        # If RFF parameters changed and model is fitted, we need to refit
287        if hasattr(self, "rff_") and (
288            "n_components" in our_params
289            or "gamma" in our_params
290            or "random_state" in our_params
291        ):
292            # Remove the fitted transformer so it gets recreated on next fit
293            delattr(self, "rff_")
294
295        return self

Base class for all estimators in scikit-learn.

Inheriting from this class provides default implementations of:

  • setting and getting parameters used by GridSearchCV and friends;
  • textual and HTML representation displayed in terminals and IDEs;
  • estimator serialization;
  • parameters validation;
  • data validation;
  • feature names validation.

Read more in the :ref:User Guide <rolling_your_own_estimator>.

Notes

All estimators should specify all the parameters that can be set at the class level in their __init__ as explicit keyword arguments (no *args or **kwargs).

Examples

>>> import numpy as np
>>> from sklearn.base import BaseEstimator
>>> class MyEstimator(BaseEstimator):
...     def __init__(self, *, param=1):
...         self.param = param
...     def fit(self, X, y=None):
...         self.is_fitted_ = True
...         return self
...     def predict(self, X):
...         return np.full(shape=X.shape[0], fill_value=self.param)
>>> estimator = MyEstimator(param=2)
>>> estimator.get_params()
{'param': 2}
>>> X = np.array([[1, 2], [2, 3], [3, 4]])
>>> y = np.array([1, 0, 1])
>>> estimator.fit(X, y).predict(X)
array([2, 2, 2])
>>> estimator.set_params(param=3).fit(X, y).predict(X)
array([3, 3, 3])
def fit(self, X, y=None):
62    def fit(self, X, y=None):
63        """
64        Fit the Random Fourier feature transformer and the estimator.
65        """
66        X = check_array(X)
67
68        # Initialize and fit the Random Fourier Feature transformer
69        self.rff_ = RBFSampler(
70            n_components=self.n_components,
71            gamma=self.gamma,
72            random_state=self.random_state,
73        )
74        X_transformed = self.rff_.fit_transform(X)
75
76        # Fit the underlying estimator on the transformed data
77        self.estimator.fit(X_transformed, y)
78
79        return self

Fit the Random Fourier feature transformer and the estimator.

def predict(self, X):
110    def predict(self, X):
111        """
112        Predict using the Random Fourier transformed data.
113        """
114        check_is_fitted(self, ["rff_"])
115        X = check_array(X)
116
117        # Transform the input data
118        X_transformed = self.rff_.transform(X)
119
120        # Predict using the underlying estimator
121        return self.estimator.predict(X_transformed)

Predict using the Random Fourier transformed data.

def predict_proba(self, X):
123    def predict_proba(self, X):
124        """
125        Predict class probabilities (only for classifiers).
126        """
127        if (
128            not hasattr(self, "_estimator_type")
129            or self._estimator_type != "classifier"
130        ):
131            raise AttributeError(
132                "predict_proba is not available for this estimator type."
133            )
134
135        check_is_fitted(self, ["rff_"])
136        X = check_array(X)
137
138        if not hasattr(self.estimator, "predict_proba"):
139            raise ValueError(
140                f"The estimator {type(self.estimator).__name__} does not support predict_proba."
141            )
142
143        # Transform the input data
144        X_transformed = self.rff_.transform(X)
145
146        # Predict probabilities using the underlying estimator
147        return self.estimator.predict_proba(X_transformed)

Predict class probabilities (only for classifiers).

def score(self, X, y):
199    def score(self, X, y):
200        """
201        Evaluate the model performance.
202        """
203        check_is_fitted(self, ["rff_"])
204        X = check_array(X)
205
206        # Transform the input data
207        X_transformed = self.rff_.transform(X)
208
209        # Evaluate using the underlying estimator's score method
210        return self.estimator.score(X_transformed, y)

Evaluate the model performance.

class RandomFourierFeaturesRidge(sklearn.base.BaseEstimator, sklearn.base.RegressorMixin):
 12class RandomFourierFeaturesRidge(BaseEstimator, RegressorMixin):
 13    """
 14    Random Fourier Features with Bayesian Ridge Regression.
 15
 16    Implements both standard (MLE) and Bayesian versions with uncertainty quantification.
 17    Uses data augmentation for L2 regularization via jnp.lstsq.
 18    """
 19
 20    def __init__(
 21        self,
 22        n_features: int = 100,
 23        gamma: float = 1.0,
 24        alpha: float = 1e-6,
 25        include_bias: bool = True,
 26        random_seed: int = 42,
 27    ):
 28        """
 29        Parameters:
 30        -----------
 31        n_features : int
 32            Number of random Fourier features (D)
 33        gamma : float
 34            RBF kernel parameter: k(x,y) = exp(-gamma * ||x-y||²)
 35        alpha : float
 36            Prior precision (inverse variance) for Bayesian version
 37            Equivalent to regularization strength: lambda = alpha / beta
 38        include_bias : bool
 39            Whether to include a bias term
 40        random_seed : int
 41            Random seed for reproducibility
 42        """
 43        self.n_features = n_features
 44        self.gamma = gamma
 45        self.alpha = alpha
 46        self.include_bias = include_bias
 47        self.key = random.PRNGKey(random_seed)
 48        self.is_fitted = False
 49
 50        # Bayesian parameters
 51        self.beta = None  # Noise precision (will be estimated from data)
 52        self.w_mean = None  # Posterior mean of weights
 53        self.w_cov = None  # Posterior covariance of weights
 54        self.S_N = None  # Posterior precision matrix
 55
 56    def _compute_random_features(
 57        self, X: jnp.ndarray, W: jnp.ndarray, b: jnp.ndarray
 58    ) -> jnp.ndarray:
 59        """Compute random Fourier features: sqrt(2/D) * cos(XW + b)"""
 60        projection = jnp.dot(X, W) + b  # Shape: (n_samples, n_features)
 61        features = jnp.sqrt(2.0 / self.n_features) * jnp.cos(projection)
 62
 63        if self.include_bias:
 64            features = jnp.concatenate(
 65                [jnp.ones((X.shape[0], 1)), features], axis=1
 66            )
 67
 68        return features
 69
 70    def _init_random_weights(
 71        self, input_dim: int
 72    ) -> Tuple[jnp.ndarray, jnp.ndarray]:
 73        """Initialize random weights and biases for RFF"""
 74        # Sample from Gaussian distribution for RBF kernel
 75        # Variance = 2 * gamma for RBF kernel
 76        self.key, subkey = random.split(self.key)
 77        W = random.normal(
 78            subkey, shape=(input_dim, self.n_features)
 79        ) * jnp.sqrt(2.0 * self.gamma)
 80
 81        self.key, subkey = random.split(self.key)
 82        b = random.uniform(
 83            subkey, shape=(1, self.n_features), minval=0, maxval=2 * jnp.pi
 84        )
 85
 86        return W, b
 87
 88    def fit(
 89        self,
 90        X: Union[jnp.ndarray, np.ndarray],
 91        y: Union[jnp.ndarray, np.ndarray],
 92        method: str = "bayesian",
 93        noise_variance: Optional[float] = None,
 94    ) -> "RandomFourierFeaturesRidge":
 95        """
 96        Fit the model using either standard or Bayesian ridge regression.
 97
 98        Parameters:
 99        -----------
100        X : array-like, shape (n_samples, n_features)
101            Training data
102        y : array-like, shape (n_samples,) or (n_samples, n_targets)
103            Target values
104        method : str, either "standard" or "bayesian"
105            "standard": Maximum likelihood estimation with L2 regularization
106            "bayesian": Full Bayesian inference with uncertainty quantification
107        noise_variance : float, optional
108            If provided, fixes the noise variance instead of estimating it
109        """
110        # Convert to JAX arrays if needed
111        X = jnp.asarray(X)
112        y = jnp.asarray(y)
113
114        if len(y.shape) == 1:
115            y = y.reshape(-1, 1)
116
117        n_samples, input_dim = X.shape
118
119        # Initialize random Fourier weights
120        self.W, self.b = self._init_random_weights(input_dim)
121
122        # Compute random Fourier features
123        Phi = self._compute_random_features(X, self.W, self.b)
124        n_basis = Phi.shape[1]  # D + 1 if bias included
125
126        # Store feature matrix and target values for Bayesian updates/likelihood computation
127        self.Phi_train = Phi
128        self.y_train = y  # Store y_train
129
130        if method == "standard":
131            # Standard ridge regression using data augmentation for regularization
132            self._fit_standard(Phi, y)
133        elif method == "bayesian":
134            # Bayesian ridge regression
135            self._fit_bayesian(Phi, y, noise_variance)
136        else:
137            raise ValueError("method must be 'standard' or 'bayesian'")
138
139        self.is_fitted = True
140        self.method = method
141        self.input_dim = input_dim
142
143        return self
144
145    def _fit_standard(self, Phi: jnp.ndarray, y: jnp.ndarray) -> None:
146        """Standard ridge regression using lstsq with data augmentation"""
147        n_samples, n_basis = Phi.shape
148
149        # Create augmented data for L2 regularization
150        # This is equivalent to adding sqrt(alpha) * I to the design matrix
151        sqrt_alpha = jnp.sqrt(self.alpha)
152        Phi_aug = jnp.vstack([Phi, sqrt_alpha * jnp.eye(n_basis)])
153        y_aug = jnp.vstack([y, jnp.zeros((n_basis, y.shape[1]))])
154
155        # Solve using least squares
156        # Note: jnp.linalg.lstsq is more stable than explicit normal equations
157        weights, residuals, rank, s = jnp.linalg.lstsq(
158            Phi_aug, y_aug, rcond=None
159        )
160
161        self.w_mean = weights
162        self.weights = weights  # For compatibility
163
164        # Estimate noise variance from residuals
165        residuals = y - Phi @ weights
166        self.beta = 1.0 / jnp.maximum(jnp.var(residuals), 1e-8)
167
168    def _fit_bayesian(
169        self,
170        Phi: jnp.ndarray,
171        y: jnp.ndarray,
172        noise_variance: Optional[float] = None,
173    ) -> None:
174        """Bayesian ridge regression with evidence approximation"""
175        n_samples, n_basis = Phi.shape
176
177        # Initialize precision parameters
178        if noise_variance is not None:
179            self.beta = 1.0 / noise_variance
180        else:
181            # Initial estimate of beta from data
182            self.beta = 1.0 / jnp.maximum(jnp.var(y), 1e-8)
183
184        # Posterior precision matrix: S_N⁻¹ = alpha * I + beta * ΦᵀΦ
185        I = jnp.eye(n_basis)
186        PhiT_Phi = Phi.T @ Phi
187
188        # Initialize with prior
189        S_N_inv = self.alpha * I
190
191        # Evidence approximation to optimize alpha, beta
192        for _ in range(10):  # Iterate to converge on alpha, beta
193            # Update posterior mean and covariance
194            S_N = jnp.linalg.inv(S_N_inv + self.beta * PhiT_Phi)
195            self.w_mean = self.beta * S_N @ Phi.T @ y
196
197            # Update gamma (effective number of parameters)
198            eigenvalues = jnp.linalg.eigvalsh(PhiT_Phi)
199            gamma_val = jnp.sum(eigenvalues / (self.alpha + eigenvalues))
200
201            # Update alpha and beta (MacKay's fixed point updates)
202            if self.alpha > 0:
203                self.alpha = gamma_val / jnp.sum(self.w_mean**2)
204
205            if noise_variance is None:
206                residuals = y - Phi @ self.w_mean
207                self.beta = (n_samples - gamma_val) / jnp.sum(residuals**2)
208
209            # Update precision matrix
210            S_N_inv = self.alpha * I
211
212        # Store final covariance
213        self.S_N = jnp.linalg.inv(self.alpha * I + self.beta * PhiT_Phi)
214        self.w_cov = self.S_N
215
216        # Also store for compatibility
217        self.weights = self.w_mean
218
219    def transform(self, X: Union[jnp.ndarray, np.ndarray]) -> jnp.ndarray:
220        """Transform input data to random Fourier feature space"""
221        if not self.is_fitted:
222            raise ValueError("Model must be fitted before transforming")
223
224        X = jnp.asarray(X)
225        return self._compute_random_features(X, self.W, self.b)
226
227    def predict(
228        self,
229        X: Union[jnp.ndarray, np.ndarray],
230        return_std: bool = False,
231        return_cov: bool = False,
232    ) -> Union[jnp.ndarray, Tuple[jnp.ndarray, jnp.ndarray]]:
233        """
234        Make predictions, optionally with uncertainty quantification.
235
236        Parameters:
237        -----------
238        X : array-like, shape (n_samples, n_features)
239            Input data
240        return_std : bool
241            If True, return standard deviation of predictive distribution
242        return_cov : bool
243            If True, return full covariance matrix of predictive distribution
244
245        Returns:
246        --------
247        y_pred : jnp.ndarray
248            Predictive mean
249        y_std or y_cov : jnp.ndarray, optional
250            Predictive standard deviation or covariance
251        """
252        if not self.is_fitted:
253            raise ValueError("Model must be fitted before prediction")
254
255        X = jnp.asarray(X)
256        Phi = self.transform(X)
257
258        # Predictive mean
259        y_pred = Phi @ self.w_mean
260
261        if not return_std and not return_cov:
262            return y_pred
263
264        if self.method != "bayesian":
265            raise ValueError(
266                "Uncertainty quantification only available for Bayesian method"
267            )
268
269        # Predictive variance
270        if return_cov:
271            # Full predictive covariance
272            # Σ_pred = (1/β) * I + Φ @ S_N @ Φᵀ
273            pred_cov = (1.0 / self.beta) * jnp.eye(
274                Phi.shape[0]
275            ) + Phi @ self.S_N @ Phi.T
276            return y_pred, pred_cov
277        else:
278            # Diagonal of predictive covariance (standard deviations)
279            # σ²_pred = (1/β) + diag(Φ @ S_N @ Φᵀ)
280            var_diag = (1.0 / self.beta) + jnp.sum(
281                (Phi @ self.S_N) * Phi, axis=1
282            )
283            y_std = jnp.sqrt(jnp.maximum(var_diag, 0.0)).reshape(-1, 1)
284            return y_pred, y_std
285
286    def sample_posterior(
287        self,
288        X: Union[jnp.ndarray, np.ndarray],
289        n_samples: int = 1,
290        key: Optional[jax.random.PRNGKey] = None,
291    ) -> jnp.ndarray:
292        """
293        Sample from the posterior predictive distribution.
294
295        Parameters:
296        -----------
297        X : array-like
298            Input data
299        n_samples : int
300            Number of samples to draw
301        key : PRNGKey, optional
302            Random key for sampling
303
304        Returns:
305        --------
306        samples : jnp.ndarray, shape (n_samples, n_test_samples)
307            Samples from posterior predictive distribution
308        """
309        if self.method != "bayesian":
310            raise ValueError("Sampling only available for Bayesian method")
311
312        if key is None:
313            key = self.key
314
315        X = jnp.asarray(X)
316        Phi = self.transform(X)
317        n_test = Phi.shape[0]
318
319        # Sample weights from posterior
320        key, subkey = random.split(key)
321        w_samples = random.multivariate_normal(
322            subkey, self.w_mean.flatten(), self.S_N, shape=(n_samples,)
323        )
324
325        # Generate predictions for each weight sample
326        samples = []
327        for i in range(n_samples):
328            w_sample = w_samples[i].reshape(-1, 1)
329            # Add noise variance
330            key, subkey1, subkey2 = random.split(key, 3)
331            pred_mean = Phi @ w_sample
332            noise = random.normal(subkey2, shape=pred_mean.shape) / jnp.sqrt(
333                self.beta
334            )
335            samples.append(pred_mean + noise)
336
337        return jnp.stack(samples, axis=0)
338
339    def log_marginal_likelihood(self) -> float:
340        """
341        Compute log marginal likelihood (evidence) for Bayesian model.
342
343        Returns:
344        --------
345        log_evidence : float
346            Log marginal likelihood p(y|X,α,β)
347        """
348        if self.method != "bayesian":
349            raise ValueError(
350                "Log marginal likelihood only available for Bayesian method"
351            )
352
353        n_samples = self.Phi_train.shape[0]
354        n_basis = self.Phi_train.shape[1]
355
356        # Log determinant term
357        I = jnp.eye(n_basis)
358        A = self.alpha * I + self.beta * self.Phi_train.T @ self.Phi_train
359        sign, logdet_A = jnp.linalg.slogdet(A)
360        logdet_term = 0.5 * (n_basis * jnp.log(self.alpha) - logdet_A)
361
362        # Data fit term
363        residuals = self.y_train - self.Phi_train @ self.w_mean
364        data_fit_term = -0.5 * self.beta * jnp.sum(residuals**2)
365
366        # Constant term
367        const_term = 0.5 * n_samples * jnp.log(self.beta / (2 * jnp.pi))
368
369        return float(logdet_term + data_fit_term + const_term)
370
371    def get_params(self) -> Dict:
372        """Get model parameters"""
373        return {
374            "n_features": self.n_features,
375            "gamma": self.gamma,
376            "alpha": self.alpha,
377            "beta": self.beta if self.beta is not None else None,
378            "method": self.method if hasattr(self, "method") else None,
379            "input_dim": self.input_dim if hasattr(self, "input_dim") else None,
380        }
381
382    def set_params(self, **params) -> "RandomFourierFeaturesRidge":
383        """Set model parameters"""
384        for key, value in params.items():
385            if hasattr(self, key):
386                setattr(self, key, value)
387        return self

Random Fourier Features with Bayesian Ridge Regression.

Implements both standard (MLE) and Bayesian versions with uncertainty quantification. Uses data augmentation for L2 regularization via jnp.lstsq.

def fit( self, X: Union[jax.Array, numpy.ndarray], y: Union[jax.Array, numpy.ndarray], method: str = 'bayesian', noise_variance: Optional[float] = None) -> RandomFourierFeaturesRidge:
 88    def fit(
 89        self,
 90        X: Union[jnp.ndarray, np.ndarray],
 91        y: Union[jnp.ndarray, np.ndarray],
 92        method: str = "bayesian",
 93        noise_variance: Optional[float] = None,
 94    ) -> "RandomFourierFeaturesRidge":
 95        """
 96        Fit the model using either standard or Bayesian ridge regression.
 97
 98        Parameters:
 99        -----------
100        X : array-like, shape (n_samples, n_features)
101            Training data
102        y : array-like, shape (n_samples,) or (n_samples, n_targets)
103            Target values
104        method : str, either "standard" or "bayesian"
105            "standard": Maximum likelihood estimation with L2 regularization
106            "bayesian": Full Bayesian inference with uncertainty quantification
107        noise_variance : float, optional
108            If provided, fixes the noise variance instead of estimating it
109        """
110        # Convert to JAX arrays if needed
111        X = jnp.asarray(X)
112        y = jnp.asarray(y)
113
114        if len(y.shape) == 1:
115            y = y.reshape(-1, 1)
116
117        n_samples, input_dim = X.shape
118
119        # Initialize random Fourier weights
120        self.W, self.b = self._init_random_weights(input_dim)
121
122        # Compute random Fourier features
123        Phi = self._compute_random_features(X, self.W, self.b)
124        n_basis = Phi.shape[1]  # D + 1 if bias included
125
126        # Store feature matrix and target values for Bayesian updates/likelihood computation
127        self.Phi_train = Phi
128        self.y_train = y  # Store y_train
129
130        if method == "standard":
131            # Standard ridge regression using data augmentation for regularization
132            self._fit_standard(Phi, y)
133        elif method == "bayesian":
134            # Bayesian ridge regression
135            self._fit_bayesian(Phi, y, noise_variance)
136        else:
137            raise ValueError("method must be 'standard' or 'bayesian'")
138
139        self.is_fitted = True
140        self.method = method
141        self.input_dim = input_dim
142
143        return self

Fit the model using either standard or Bayesian ridge regression.

Parameters:

X : array-like, shape (n_samples, n_features) Training data y : array-like, shape (n_samples,) or (n_samples, n_targets) Target values method : str, either "standard" or "bayesian" "standard": Maximum likelihood estimation with L2 regularization "bayesian": Full Bayesian inference with uncertainty quantification noise_variance : float, optional If provided, fixes the noise variance instead of estimating it

def predict( self, X: Union[jax.Array, numpy.ndarray], return_std: bool = False, return_cov: bool = False) -> Union[jax.Array, Tuple[jax.Array, jax.Array]]:
227    def predict(
228        self,
229        X: Union[jnp.ndarray, np.ndarray],
230        return_std: bool = False,
231        return_cov: bool = False,
232    ) -> Union[jnp.ndarray, Tuple[jnp.ndarray, jnp.ndarray]]:
233        """
234        Make predictions, optionally with uncertainty quantification.
235
236        Parameters:
237        -----------
238        X : array-like, shape (n_samples, n_features)
239            Input data
240        return_std : bool
241            If True, return standard deviation of predictive distribution
242        return_cov : bool
243            If True, return full covariance matrix of predictive distribution
244
245        Returns:
246        --------
247        y_pred : jnp.ndarray
248            Predictive mean
249        y_std or y_cov : jnp.ndarray, optional
250            Predictive standard deviation or covariance
251        """
252        if not self.is_fitted:
253            raise ValueError("Model must be fitted before prediction")
254
255        X = jnp.asarray(X)
256        Phi = self.transform(X)
257
258        # Predictive mean
259        y_pred = Phi @ self.w_mean
260
261        if not return_std and not return_cov:
262            return y_pred
263
264        if self.method != "bayesian":
265            raise ValueError(
266                "Uncertainty quantification only available for Bayesian method"
267            )
268
269        # Predictive variance
270        if return_cov:
271            # Full predictive covariance
272            # Σ_pred = (1/β) * I + Φ @ S_N @ Φᵀ
273            pred_cov = (1.0 / self.beta) * jnp.eye(
274                Phi.shape[0]
275            ) + Phi @ self.S_N @ Phi.T
276            return y_pred, pred_cov
277        else:
278            # Diagonal of predictive covariance (standard deviations)
279            # σ²_pred = (1/β) + diag(Φ @ S_N @ Φᵀ)
280            var_diag = (1.0 / self.beta) + jnp.sum(
281                (Phi @ self.S_N) * Phi, axis=1
282            )
283            y_std = jnp.sqrt(jnp.maximum(var_diag, 0.0)).reshape(-1, 1)
284            return y_pred, y_std

Make predictions, optionally with uncertainty quantification.

Parameters:

X : array-like, shape (n_samples, n_features) Input data return_std : bool If True, return standard deviation of predictive distribution return_cov : bool If True, return full covariance matrix of predictive distribution

Returns:

y_pred : jnp.ndarray Predictive mean y_std or y_cov : jnp.ndarray, optional Predictive standard deviation or covariance

class RandomFourierFeaturesRidgeGCV(nnetsauce.RandomFourierFeaturesRidge):
390class RandomFourierFeaturesRidgeGCV(RandomFourierFeaturesRidge):
391    """
392    Extends RandomFourierFeaturesRidge with GCV for automatic
393    regularization parameter selection.
394    """
395
396    def __init__(
397        self,
398        n_features: int = 100,
399        gamma: float = 1.0,
400        alpha: Optional[float] = None,
401        include_bias: bool = True,
402        random_seed: int = 42,
403    ):
404        super().__init__(n_features, gamma, alpha, include_bias, random_seed)
405        self.alpha_opt = None  # Stores the GCV-optimized alpha
406        self.gcv_score = None  # Stores the optimal GCV score
407
408    def _compute_gcv(
409        self,
410        alpha: float,
411        s_sq: jnp.ndarray,
412        U: jnp.ndarray,
413        y: jnp.ndarray,
414        n_samples: int,
415    ) -> float:
416        """
417        Compute GCV score for a given alpha.
418
419        Parameters:
420        -----------
421        alpha : float
422            Regularization parameter
423        s_sq : jnp.ndarray
424            Squared singular values of design matrix Φ
425        U : jnp.ndarray
426            Left singular vectors of Φ
427        y : jnp.ndarray
428            Target values
429        n_samples : int
430            Number of data points
431
432        Returns:
433        --------
434        gcv : float
435            GCV score for this alpha
436        """
437        # Degrees of freedom: df(α) = Σ(σ_j²/(σ_j² + α))
438        df = jnp.sum(s_sq / (s_sq + alpha))
439
440        # Compute residual sum of squares efficiently using SVD
441        # y_pred = U @ (S²/(S² + α)) @ (U.T @ y)
442        Uty = U.T @ y
443        shrinkage = s_sq / (s_sq + alpha)
444        y_pred = U @ (shrinkage * Uty)
445        residuals = y - y_pred
446        rss = jnp.sum(residuals**2)
447
448        # GCV formula
449        denom = (1.0 - df / n_samples) ** 2
450        gcv = (rss / n_samples) / denom
451
452        return float(gcv)
453
454    def fit_gcv(
455        self,
456        X: Union[jnp.ndarray, np.ndarray],
457        y: Union[jnp.ndarray, np.ndarray],
458        alpha_range: Tuple[float, float] = (1e-8, 1e4),
459        n_alphas: int = 50,
460        method: str = "standard",
461        optimize: bool = True,
462    ) -> "RandomFourierFeaturesRidgeGCV":
463        """
464        Fit model with GCV-optimized regularization parameter.
465
466        Parameters:
467        -----------
468        X : array-like
469            Training data
470        y : array-like
471            Target values
472        alpha_range : tuple
473            (min_alpha, max_alpha) range to search
474        n_alphas : int
475            Number of alpha values to try in initial grid search
476        method : str
477            "standard" or "bayesian"
478        optimize : bool
479            If True, perform fine optimization after grid search
480
481        Returns:
482        --------
483        self : fitted model
484        """
485        # Convert to JAX arrays
486        X = jnp.asarray(X)
487        y = jnp.asarray(y)
488
489        if len(y.shape) == 1:
490            y = y.reshape(-1, 1)
491
492        n_samples, input_dim = X.shape
493
494        # Initialize random Fourier weights
495        self.W, self.b = self._init_random_weights(input_dim)
496
497        # Compute random Fourier features
498        Phi = self._compute_random_features(X, self.W, self.b)
499
500        # Compute SVD of design matrix for efficient GCV computation
501        # Φ = U @ diag(S) @ V.T
502        U, S, Vt = jnp.linalg.svd(Phi, full_matrices=False)
503        s_sq = S**2  # Squared singular values
504
505        # Grid search on log scale
506        alphas_grid = jnp.logspace(
507            jnp.log10(alpha_range[0]), jnp.log10(alpha_range[1]), n_alphas
508        )
509
510        gcv_scores = []
511        for alpha in alphas_grid:
512            score = self._compute_gcv(float(alpha), s_sq, U, y, n_samples)
513            gcv_scores.append(score)
514
515        # Find best alpha from grid
516        best_idx = jnp.argmin(jnp.array(gcv_scores))
517        alpha_grid_opt = float(alphas_grid[best_idx])
518
519        # Fine optimization using Brent's method
520        if optimize:
521            # Define objective for scipy optimizer
522            def gcv_objective(log_alpha):
523                alpha = 10**log_alpha
524                return self._compute_gcv(alpha, s_sq, U, y, n_samples)
525
526            # Optimize in log space
527            result = minimize_scalar(
528                gcv_objective,
529                bounds=(jnp.log10(alpha_range[0]), jnp.log10(alpha_range[1])),
530                method="bounded",
531                options={"xatol": 0.1},  # Tolerance in log10 space
532            )
533
534            if result.success:
535                alpha_opt = 10**result.x
536                gcv_opt = result.fun
537            else:
538                alpha_opt = alpha_grid_opt
539                gcv_opt = gcv_scores[best_idx]
540        else:
541            alpha_opt = alpha_grid_opt
542            gcv_opt = gcv_scores[best_idx]
543
544        # Store optimized parameters
545        self.alpha_opt = alpha_opt
546        self.gcv_score = gcv_opt
547        self.alpha = alpha_opt  # Set as the model's alpha
548
549        # Fit final model with optimized alpha
550        if method == "standard":
551            self._fit_standard(Phi, y)
552        elif method == "bayesian":
553            # For Bayesian version, we can use alpha as prior precision
554            # Optionally optimize beta too
555            self._fit_bayesian(Phi, y)
556        else:
557            raise ValueError("method must be 'standard' or 'bayesian'")
558
559        self.is_fitted = True
560        self.method = method
561        self.input_dim = input_dim
562
563        return self
564
565    def fit_gcv_with_path(
566        self,
567        X: Union[jnp.ndarray, np.ndarray],
568        y: Union[jnp.ndarray, np.ndarray],
569        alpha_range: Tuple[float, float] = (1e-8, 1e4),
570        n_alphas: int = 100,
571        method: str = "standard",
572    ) -> dict:
573        """
574        Fit with GCV and return full regularization path.
575
576        Returns:
577        --------
578        path_info : dict
579            Dictionary with alpha values, GCV scores, and metrics
580        """
581        X = jnp.asarray(X)
582        y = jnp.asarray(y)
583
584        if len(y.shape) == 1:
585            y = y.reshape(-1, 1)
586
587        n_samples, input_dim = X.shape
588
589        # Initialize random features
590        self.W, self.b = self._init_random_weights(input_dim)
591        Phi = self._compute_random_features(X, self.W, self.b)
592
593        # Compute SVD
594        U, S, Vt = jnp.linalg.svd(Phi, full_matrices=False)
595        s_sq = S**2
596
597        # Compute GCV path
598        alphas = jnp.logspace(
599            jnp.log10(alpha_range[0]), jnp.log10(alpha_range[1]), n_alphas
600        )
601
602        gcv_scores = []
603        train_errors = []
604        effective_dof = []
605
606        for alpha in alphas:
607            alpha_val = float(alpha)
608
609            # GCV score
610            gcv = self._compute_gcv(alpha_val, s_sq, U, y, n_samples)
611            gcv_scores.append(gcv)
612
613            # Effective degrees of freedom
614            df = float(jnp.sum(s_sq / (s_sq + alpha_val)))
615            effective_dof.append(df)
616
617            # Training error for this alpha
618            # Compute weights: w = V @ (S/(S² + α)) @ (U.T @ y)
619            Uty = U.T @ y
620            shrinkage = S / (s_sq + alpha_val)
621            w_alpha = Vt.T @ (shrinkage.reshape(-1, 1) * Uty)
622            y_pred = Phi @ w_alpha
623            train_err = float(jnp.mean((y - y_pred) ** 2))
624            train_errors.append(train_err)
625
626        # Find optimal alpha
627        best_idx = jnp.argmin(jnp.array(gcv_scores))
628        alpha_opt = float(alphas[best_idx])
629
630        # Fit final model with optimal alpha
631        self.alpha = alpha_opt
632        if method == "standard":
633            self._fit_standard(Phi, y)
634        elif method == "bayesian":
635            self._fit_bayesian(Phi, y)
636
637        self.is_fitted = True
638        self.method = method
639        self.input_dim = input_dim
640        self.alpha_opt = alpha_opt
641        self.gcv_score = gcv_scores[best_idx]
642
643        # Return full path information
644        path_info = {
645            "alphas": np.array(alphas),
646            "gcv_scores": np.array(gcv_scores),
647            "train_errors": np.array(train_errors),
648            "effective_dof": np.array(effective_dof),
649            "alpha_opt": alpha_opt,
650            "gcv_opt": gcv_scores[best_idx],
651            "dof_opt": effective_dof[best_idx],
652        }
653
654        return path_info
655
656    def plot_gcv_path(self, path_info: dict, save_path: str = None):
657        """
658        Plot GCV regularization path.
659        """
660        import matplotlib.pyplot as plt
661
662        fig, axes = plt.subplots(2, 2, figsize=(12, 10))
663
664        # Plot 1: GCV score vs alpha
665        ax = axes[0, 0]
666        ax.semilogx(
667            path_info["alphas"], path_info["gcv_scores"], "b-", linewidth=2
668        )
669        ax.axvline(
670            path_info["alpha_opt"],
671            color="r",
672            linestyle="--",
673            label=f'Optimal α = {path_info["alpha_opt"]:.2e}',
674        )
675        ax.set_xlabel("Regularization α")
676        ax.set_ylabel("GCV Score")
677        ax.set_title("GCV Score vs Regularization")
678        ax.legend()
679        ax.grid(True, alpha=0.3)
680
681        # Plot 2: Training error vs alpha
682        ax = axes[0, 1]
683        ax.loglog(
684            path_info["alphas"], path_info["train_errors"], "g-", linewidth=2
685        )
686        ax.axvline(path_info["alpha_opt"], color="r", linestyle="--")
687        ax.set_xlabel("Regularization α")
688        ax.set_ylabel("Training MSE")
689        ax.set_title("Training Error vs Regularization")
690        ax.grid(True, alpha=0.3)
691
692        # Plot 3: Effective DOF vs alpha
693        ax = axes[1, 0]
694        ax.semilogx(
695            path_info["alphas"], path_info["effective_dof"], "m-", linewidth=2
696        )
697        ax.axvline(path_info["alpha_opt"], color="r", linestyle="--")
698        ax.axhline(
699            path_info["dof_opt"],
700            color="r",
701            linestyle=":",
702            label=f'DOF at optimum = {path_info["dof_opt"]:.1f}',
703        )
704        ax.set_xlabel("Regularization α")
705        ax.set_ylabel("Effective Degrees of Freedom")
706        ax.set_title("Model Complexity vs Regularization")
707        ax.legend()
708        ax.grid(True, alpha=0.3)
709
710        # Plot 4: GCV vs DOF
711        ax = axes[1, 1]
712        ax.plot(
713            path_info["effective_dof"],
714            path_info["gcv_scores"],
715            "k-",
716            linewidth=2,
717        )
718        ax.axvline(path_info["dof_opt"], color="r", linestyle="--")
719        ax.set_xlabel("Effective Degrees of Freedom")
720        ax.set_ylabel("GCV Score")
721        ax.set_title("GCV vs Model Complexity")
722        ax.grid(True, alpha=0.3)
723
724        plt.suptitle(
725            "GCV Regularization Path Analysis", fontsize=14, fontweight="bold"
726        )
727        plt.tight_layout()
728
729        if save_path:
730            plt.savefig(save_path, dpi=150, bbox_inches="tight")
731
732        plt.show()

Extends RandomFourierFeaturesRidge with GCV for automatic regularization parameter selection.

class RegressorUpdater(sklearn.base.BaseEstimator, sklearn.base.RegressorMixin):
 16class RegressorUpdater(BaseEstimator, RegressorMixin):
 17    """
 18    Update a regression model with new observations
 19
 20    Parameters
 21    ----------
 22    regr: object
 23        A regression model with a coef_ attribute
 24    alpha: float
 25        Updating factor's exponent
 26
 27    Attributes
 28    ----------
 29    n_obs_: int
 30        Number of observations
 31    coef_: np.ndarray
 32        Coefficients of the model
 33    updating_factor_: float
 34        Updating factor
 35
 36    """
 37
 38    def __init__(self, regr, alpha=0.5):
 39        self.regr = regr
 40        self.alpha = alpha
 41        self.n_obs_ = None
 42        self.coef_ = None
 43        self.updating_factor_ = None
 44        try:
 45            self.coef_ = self.regr.coef_
 46            if isinstance(self.regr, Base):
 47                self.n_obs_ = self.regr.scaler_.n_samples_seen_
 48        except AttributeError:
 49            pass
 50
 51    def fit(self, X, y, **kwargs):
 52        if isinstance(
 53            self.regr, CustomRegressor
 54        ):  # nnetsauce model not deep ---
 55            if check_is_fitted(self.regr) == False:
 56                self.regr.fit(X, y, **kwargs)
 57                self.n_obs_ = X.shape[0]
 58                if hasattr(self.regr, "coef_"):
 59                    self.coef_ = self.regr.coef_
 60                return self
 61            self.n_obs_ = self.regr.scaler_.n_samples_seen_
 62            if hasattr(self.regr, "coef_"):
 63                self.coef_ = self.regr.coef_
 64            return self
 65
 66        if (
 67            hasattr(self.regr, "coef_") == False
 68        ):  # sklearn model or CustomRegressor model ---
 69            self.regr.fit(X, y)
 70            self.n_obs_ = X.shape[0]
 71            self.regr.fit(X, y)
 72            if hasattr(self.regr, "stacked_obj"):
 73                self.coef_ = self.regr.stacked_obj.coef_
 74            else:
 75                self.coef_ = self.regr.coef_
 76            return self
 77        self.n_obs_ = X.shape[0]
 78        if hasattr(self.regr, "coef_"):
 79            self.coef_ = self.regr.coef_
 80        return self
 81
 82    def predict(self, X):
 83        # assert hasattr(self.regr, "coef_"), "model must have coef_ attribute"
 84        return self.regr.predict(X)
 85
 86    def partial_fit(self, X, y):
 87        assert hasattr(
 88            self.regr, "coef_"
 89        ), "model must be fitted first (i.e have 'coef_' attribute)"
 90        assert (
 91            self.n_obs_ is not None
 92        ), "model must be fitted first (i.e have 'n_obs_' attribute)"
 93
 94        if len(X.shape) == 1:
 95            X = X.reshape(1, -1)
 96
 97        assert X.shape[0] == 1, "X must have one row"
 98
 99        self.updating_factor_ = self.n_obs_ ** (-self.alpha)
100
101        if isinstance(self.regr, Base):  # nnetsauce model ---
102            newX = deepcopy(X)
103
104            if isinstance(
105                self.regr, CustomRegressor
106            ):  # other nnetsauce model (CustomRegressor) ---
107                newX = self.regr.cook_test_set(X=X)
108                if isinstance(X, pd.DataFrame):
109                    newx = newX.values.ravel()
110                else:
111                    newx = newX.ravel()
112
113        else:  # an sklearn model ---
114            if isinstance(X, pd.DataFrame):
115                newx = X.values.ravel()
116            else:
117                newx = X.ravel()
118
119        new_coef = self.regr.coef_ + self.updating_factor_ * np.dot(
120            newx, y - np.dot(newx, self.regr.coef_)
121        )
122        self.regr.coef_ = _update_mean(self.regr.coef_, self.n_obs_, new_coef)
123        self.coef_ = deepcopy(self.regr.coef_)
124        self.n_obs_ += 1
125        return self

Update a regression model with new observations

Parameters

regr: object A regression model with a coef_ attribute alpha: float Updating factor's exponent

Attributes

n_obs_: int Number of observations coef_: np.ndarray Coefficients of the model updating_factor_: float Updating factor

def fit(self, X, y, **kwargs):
51    def fit(self, X, y, **kwargs):
52        if isinstance(
53            self.regr, CustomRegressor
54        ):  # nnetsauce model not deep ---
55            if check_is_fitted(self.regr) == False:
56                self.regr.fit(X, y, **kwargs)
57                self.n_obs_ = X.shape[0]
58                if hasattr(self.regr, "coef_"):
59                    self.coef_ = self.regr.coef_
60                return self
61            self.n_obs_ = self.regr.scaler_.n_samples_seen_
62            if hasattr(self.regr, "coef_"):
63                self.coef_ = self.regr.coef_
64            return self
65
66        if (
67            hasattr(self.regr, "coef_") == False
68        ):  # sklearn model or CustomRegressor model ---
69            self.regr.fit(X, y)
70            self.n_obs_ = X.shape[0]
71            self.regr.fit(X, y)
72            if hasattr(self.regr, "stacked_obj"):
73                self.coef_ = self.regr.stacked_obj.coef_
74            else:
75                self.coef_ = self.regr.coef_
76            return self
77        self.n_obs_ = X.shape[0]
78        if hasattr(self.regr, "coef_"):
79            self.coef_ = self.regr.coef_
80        return self
def predict(self, X):
82    def predict(self, X):
83        # assert hasattr(self.regr, "coef_"), "model must have coef_ attribute"
84        return self.regr.predict(X)
class ClassifierUpdater(sklearn.base.BaseEstimator, sklearn.base.ClassifierMixin):
 16class ClassifierUpdater(BaseEstimator, ClassifierMixin):
 17    """
 18    Update a regression model with new observations
 19
 20    Parameters
 21    ----------
 22    clf: object
 23        A regression model with a coef_ attribute
 24    alpha: float
 25        Updating factor's exponent
 26
 27    Attributes
 28    ----------
 29    n_obs_: int
 30        Number of observations
 31    coef_: np.ndarray
 32        Coefficients of the model
 33    updating_factor_: float
 34        Updating factor
 35
 36    """
 37
 38    _estimator_type = "classifier"
 39
 40    def __init__(self, clf, alpha=0.5):
 41        self.clf = clf
 42        self.alpha = alpha
 43        self.n_obs_ = None
 44        self.coef_ = None
 45        self.updating_factor_ = None
 46        try:
 47            self.coef_ = self.clf.coef_
 48            if isinstance(self.clf, Base):
 49                self.n_obs_ = self.clf.scaler_.n_samples_seen_
 50        except AttributeError:
 51            pass
 52
 53    def fit(self, X, y, **kwargs):
 54        raise NotImplementedError(
 55            "fit method is not implemented for ClassifierUpdater"
 56        )
 57
 58        if isinstance(
 59            self.clf, CustomClassifier
 60        ):  # nnetsauce model not deep ---
 61            if check_is_fitted(self.clf) == False:
 62                self.clf.fit(X, y, **kwargs)
 63                self.n_obs_ = X.shape[0]
 64                if hasattr(self.clf, "coef_"):
 65                    self.coef_ = self.clf.coef_
 66                return self
 67            self.n_obs_ = self.clf.scaler_.n_samples_seen_
 68            if hasattr(self.clf, "coef_"):
 69                self.coef_ = self.clf.coef_
 70            return self
 71
 72        if (
 73            hasattr(self.clf, "coef_") == False
 74        ):  # sklearn model or CustomClassifier model ---
 75            self.clf.fit(X, y)
 76            self.n_obs_ = X.shape[0]
 77            self.clf.fit(X, y)
 78            if hasattr(self.clf, "stacked_obj"):
 79                self.coef_ = self.clf.stacked_obj.coef_
 80            else:
 81                self.coef_ = self.clf.coef_
 82            return self
 83        self.n_obs_ = X.shape[0]
 84        if hasattr(self.clf, "coef_"):
 85            self.coef_ = self.clf.coef_
 86        return self
 87
 88    def predict(self, X):
 89        raise NotImplementedError(
 90            "predict method is not implemented for ClassifierUpdater"
 91        )
 92        # assert hasattr(self.clf, "coef_"), "model must have coef_ attribute"
 93        return self.clf.predict(X)
 94
 95    def partial_fit(self, X, y):
 96        raise NotImplementedError(
 97            "partial_fit method is not implemented for ClassifierUpdater"
 98        )
 99
100        assert hasattr(
101            self.clf, "coef_"
102        ), "model must be fitted first (i.e have 'coef_' attribute)"
103        assert (
104            self.n_obs_ is not None
105        ), "model must be fitted first (i.e have 'n_obs_' attribute)"
106
107        if len(X.shape) == 1:
108            X = X.reshape(1, -1)
109
110        assert X.shape[0] == 1, "X must have one row"
111
112        self.updating_factor_ = self.n_obs_ ** (-self.alpha)
113
114        if isinstance(self.clf, Base):  # nnetsauce model ---
115            newX = deepcopy(X)
116
117            if isinstance(
118                self.clf, CustomClassifier
119            ):  # other nnetsauce model (CustomClassifier) ---
120                newX = self.clf.cook_test_set(X=X)
121                if isinstance(X, pd.DataFrame):
122                    newx = newX.values.ravel()
123                else:
124                    newx = newX.ravel()
125
126        else:  # an sklearn model ---
127            if isinstance(X, pd.DataFrame):
128                newx = X.values.ravel()
129            else:
130                newx = X.ravel()
131
132        new_coef = self.clf.coef_ + self.updating_factor_ * np.dot(
133            newx, y - np.dot(newx, self.clf.coef_)
134        )
135        self.clf.coef_ = _update_mean(self.clf.coef_, self.n_obs_, new_coef)
136        self.coef_ = deepcopy(self.clf.coef_)
137        self.n_obs_ += 1
138        return self

Update a regression model with new observations

Parameters

clf: object A regression model with a coef_ attribute alpha: float Updating factor's exponent

Attributes

n_obs_: int Number of observations coef_: np.ndarray Coefficients of the model updating_factor_: float Updating factor

def fit(self, X, y, **kwargs):
53    def fit(self, X, y, **kwargs):
54        raise NotImplementedError(
55            "fit method is not implemented for ClassifierUpdater"
56        )
57
58        if isinstance(
59            self.clf, CustomClassifier
60        ):  # nnetsauce model not deep ---
61            if check_is_fitted(self.clf) == False:
62                self.clf.fit(X, y, **kwargs)
63                self.n_obs_ = X.shape[0]
64                if hasattr(self.clf, "coef_"):
65                    self.coef_ = self.clf.coef_
66                return self
67            self.n_obs_ = self.clf.scaler_.n_samples_seen_
68            if hasattr(self.clf, "coef_"):
69                self.coef_ = self.clf.coef_
70            return self
71
72        if (
73            hasattr(self.clf, "coef_") == False
74        ):  # sklearn model or CustomClassifier model ---
75            self.clf.fit(X, y)
76            self.n_obs_ = X.shape[0]
77            self.clf.fit(X, y)
78            if hasattr(self.clf, "stacked_obj"):
79                self.coef_ = self.clf.stacked_obj.coef_
80            else:
81                self.coef_ = self.clf.coef_
82            return self
83        self.n_obs_ = X.shape[0]
84        if hasattr(self.clf, "coef_"):
85            self.coef_ = self.clf.coef_
86        return self
def predict(self, X):
88    def predict(self, X):
89        raise NotImplementedError(
90            "predict method is not implemented for ClassifierUpdater"
91        )
92        # assert hasattr(self.clf, "coef_"), "model must have coef_ attribute"
93        return self.clf.predict(X)
class RidgeRegressor(sklearn.base.BaseEstimator, sklearn.base.RegressorMixin):
 24class RidgeRegressor(BaseEstimator, RegressorMixin):
 25    """Ridge.
 26
 27    Attributes:
 28
 29        reg_lambda: float
 30            regularization parameter.
 31
 32        backend: str
 33            type of backend; must be in ('cpu', 'gpu', 'tpu')
 34
 35    """
 36
 37    def __init__(self, reg_lambda=0.1, backend="cpu"):
 38        assert backend in (
 39            "cpu",
 40            "gpu",
 41            "tpu",
 42        ), "`backend` must be in ('cpu', 'gpu', 'tpu')"
 43
 44        sys_platform = platform.system()
 45
 46        if (sys_platform == "Windows") and (backend in ("gpu", "tpu")):
 47            warnings.warn(
 48                "No GPU/TPU computing on Windows yet, backend set to 'cpu'"
 49            )
 50            backend = "cpu"
 51
 52        self.reg_lambda = reg_lambda
 53        self.backend = backend
 54        self.coef_ = None
 55
 56    def fit(self, X, y, **kwargs):
 57        """Fit matrixops (classifier) to training data (X, y)
 58
 59        Args:
 60
 61            X: {array-like}, shape = [n_samples, n_features]
 62                Training vectors, where n_samples is the number
 63                of samples and n_features is the number of features.
 64
 65            y: array-like, shape = [n_samples]
 66                Target values.
 67
 68            **kwargs: additional parameters to be passed to self.cook_training_set.
 69
 70        Returns:
 71
 72            self: object.
 73
 74        """
 75        self.ym, centered_y = mo.center_response(y)
 76        self.xm = X.mean(axis=0)
 77        self.xsd = X.std(axis=0)
 78        self.xsd[self.xsd == 0] = 1  # avoid division by zero
 79        X_ = (X - self.xm[None, :]) / self.xsd[None, :]
 80
 81        if self.backend == "cpu":
 82            if len(centered_y.shape) <= 1:
 83                eye_term = np.sqrt(self.reg_lambda) * np.eye(X.shape[1])
 84                X_ = np.row_stack((X_, eye_term))
 85                y_ = np.concatenate((centered_y, np.zeros(X.shape[1])))
 86                beta_info = get_beta(X_, y_)
 87                self.coef_ = beta_info[0]
 88            else:
 89                try:
 90                    eye_term = np.sqrt(self.reg_lambda) * np.eye(X.shape[1])
 91                    X_ = np.row_stack((X_, eye_term))
 92                    y_ = np.row_stack(
 93                        (
 94                            centered_y,
 95                            np.zeros((eye_term.shape[0], centered_y.shape[1])),
 96                        )
 97                    )
 98                    beta_info = get_beta(X_, y_)
 99                    self.coef_ = beta_info[0]
100                except Exception:
101                    x = inv(
102                        mo.crossprod(X_) + self.reg_lambda * np.eye(X_.shape[1])
103                    )
104                    hat_matrix = mo.tcrossprod(x, X_)
105                    self.coef_ = mo.safe_sparse_dot(hat_matrix, centered_y)
106            return self
107
108        x = jinv(
109            mo.crossprod(X_, backend=self.backend)
110            + self.reg_lambda * jnp.eye(X_.shape[1])
111        )
112
113        hat_matrix = mo.tcrossprod(x, X_, backend=self.backend)
114        self.coef_ = mo.safe_sparse_dot(
115            hat_matrix, centered_y, backend=self.backend
116        )
117        return self
118
119    def predict(self, X, **kwargs):
120        """Predict test data X.
121
122        Args:
123
124            X: {array-like}, shape = [n_samples, n_features]
125                Training vectors, where n_samples is the number
126                of samples and n_features is the number of features.
127
128            **kwargs: additional parameters to be passed to `predict_proba`
129
130        Returns:
131
132            model predictions: {array-like}
133
134        """
135        X_ = (X - self.xm[None, :]) / self.xsd[None, :]
136
137        if self.backend == "cpu":
138            if isinstance(self.ym, float):
139                return self.ym + mo.safe_sparse_dot(X_, self.coef_)
140            return self.ym[None, :] + mo.safe_sparse_dot(X_, self.coef_)
141
142        # if self.backend in ("gpu", "tpu"):
143        if isinstance(self.ym, float):
144            return self.ym + mo.safe_sparse_dot(
145                X_, self.coef_, backend=self.backend
146            )
147        return self.ym[None, :] + mo.safe_sparse_dot(
148            X_, self.coef_, backend=self.backend
149        )

Ridge.

Attributes:

reg_lambda: float
    regularization parameter.

backend: str
    type of backend; must be in ('cpu', 'gpu', 'tpu')
def fit(self, X, y, **kwargs):
 56    def fit(self, X, y, **kwargs):
 57        """Fit matrixops (classifier) to training data (X, y)
 58
 59        Args:
 60
 61            X: {array-like}, shape = [n_samples, n_features]
 62                Training vectors, where n_samples is the number
 63                of samples and n_features is the number of features.
 64
 65            y: array-like, shape = [n_samples]
 66                Target values.
 67
 68            **kwargs: additional parameters to be passed to self.cook_training_set.
 69
 70        Returns:
 71
 72            self: object.
 73
 74        """
 75        self.ym, centered_y = mo.center_response(y)
 76        self.xm = X.mean(axis=0)
 77        self.xsd = X.std(axis=0)
 78        self.xsd[self.xsd == 0] = 1  # avoid division by zero
 79        X_ = (X - self.xm[None, :]) / self.xsd[None, :]
 80
 81        if self.backend == "cpu":
 82            if len(centered_y.shape) <= 1:
 83                eye_term = np.sqrt(self.reg_lambda) * np.eye(X.shape[1])
 84                X_ = np.row_stack((X_, eye_term))
 85                y_ = np.concatenate((centered_y, np.zeros(X.shape[1])))
 86                beta_info = get_beta(X_, y_)
 87                self.coef_ = beta_info[0]
 88            else:
 89                try:
 90                    eye_term = np.sqrt(self.reg_lambda) * np.eye(X.shape[1])
 91                    X_ = np.row_stack((X_, eye_term))
 92                    y_ = np.row_stack(
 93                        (
 94                            centered_y,
 95                            np.zeros((eye_term.shape[0], centered_y.shape[1])),
 96                        )
 97                    )
 98                    beta_info = get_beta(X_, y_)
 99                    self.coef_ = beta_info[0]
100                except Exception:
101                    x = inv(
102                        mo.crossprod(X_) + self.reg_lambda * np.eye(X_.shape[1])
103                    )
104                    hat_matrix = mo.tcrossprod(x, X_)
105                    self.coef_ = mo.safe_sparse_dot(hat_matrix, centered_y)
106            return self
107
108        x = jinv(
109            mo.crossprod(X_, backend=self.backend)
110            + self.reg_lambda * jnp.eye(X_.shape[1])
111        )
112
113        hat_matrix = mo.tcrossprod(x, X_, backend=self.backend)
114        self.coef_ = mo.safe_sparse_dot(
115            hat_matrix, centered_y, backend=self.backend
116        )
117        return self

Fit matrixops (classifier) to training data (X, y)

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

**kwargs: additional parameters to be passed to self.cook_training_set.

Returns:

self: object.
def predict(self, X, **kwargs):
119    def predict(self, X, **kwargs):
120        """Predict test data X.
121
122        Args:
123
124            X: {array-like}, shape = [n_samples, n_features]
125                Training vectors, where n_samples is the number
126                of samples and n_features is the number of features.
127
128            **kwargs: additional parameters to be passed to `predict_proba`
129
130        Returns:
131
132            model predictions: {array-like}
133
134        """
135        X_ = (X - self.xm[None, :]) / self.xsd[None, :]
136
137        if self.backend == "cpu":
138            if isinstance(self.ym, float):
139                return self.ym + mo.safe_sparse_dot(X_, self.coef_)
140            return self.ym[None, :] + mo.safe_sparse_dot(X_, self.coef_)
141
142        # if self.backend in ("gpu", "tpu"):
143        if isinstance(self.ym, float):
144            return self.ym + mo.safe_sparse_dot(
145                X_, self.coef_, backend=self.backend
146            )
147        return self.ym[None, :] + mo.safe_sparse_dot(
148            X_, self.coef_, backend=self.backend
149        )

Predict test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to `predict_proba`

Returns:

model predictions: {array-like}
class Ridge2Regressor(nnetsauce.ridge2.ridge2.Ridge2, sklearn.base.RegressorMixin):
 23class Ridge2Regressor(Ridge2, RegressorMixin):
 24    """Ridge regression with 2 regularization parameters derived from class Ridge
 25
 26    Parameters:
 27
 28        n_hidden_features: int
 29            number of nodes in the hidden layer
 30
 31        activation_name: str
 32            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
 33
 34        a: float
 35            hyperparameter for 'prelu' or 'elu' activation function
 36
 37        nodes_sim: str
 38            type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
 39            'uniform'
 40
 41        bias: boolean
 42            indicates if the hidden layer contains a bias term (True) or not
 43            (False)
 44
 45        dropout: float
 46            regularization parameter; (random) percentage of nodes dropped out
 47            of the training
 48
 49        n_clusters: int
 50            number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
 51                no clustering)
 52
 53        cluster_encode: bool
 54            defines how the variable containing clusters is treated (default is one-hot)
 55            if `False`, then labels are used, without one-hot encoding
 56
 57        type_clust: str
 58            type of clustering method: currently k-means ('kmeans') or Gaussian
 59            Mixture Model ('gmm')
 60
 61        type_scaling: a tuple of 3 strings
 62            scaling methods for inputs, hidden layer, and clustering respectively
 63            (and when relevant).
 64            Currently available: standardization ('std') or MinMax scaling ('minmax')
 65
 66        lambda1: float
 67            regularization parameter on direct link
 68
 69        lambda2: float
 70            regularization parameter on hidden layer
 71
 72        seed: int
 73            reproducibility seed for nodes_sim=='uniform'
 74
 75        backend: str
 76            'cpu' or 'gpu' or 'tpu'
 77
 78    Attributes:
 79
 80        beta_: {array-like}
 81            regression coefficients
 82
 83        coef_: {array-like}
 84            alias for `beta_`, regression coefficients
 85
 86        y_mean_: float
 87            average response
 88
 89    """
 90
 91    # construct the object -----
 92
 93    def __init__(
 94        self,
 95        n_hidden_features=5,
 96        activation_name="relu",
 97        a=0.01,
 98        nodes_sim="sobol",
 99        bias=True,
100        dropout=0,
101        n_clusters=2,
102        cluster_encode=True,
103        type_clust="kmeans",
104        type_scaling=("std", "std", "std"),
105        lambda1=0.1,
106        lambda2=0.1,
107        seed=123,
108        backend="cpu",
109    ):
110        super().__init__(
111            n_hidden_features=n_hidden_features,
112            activation_name=activation_name,
113            a=a,
114            nodes_sim=nodes_sim,
115            bias=bias,
116            dropout=dropout,
117            n_clusters=n_clusters,
118            cluster_encode=cluster_encode,
119            type_clust=type_clust,
120            type_scaling=type_scaling,
121            lambda1=lambda1,
122            lambda2=lambda2,
123            seed=seed,
124            backend=backend,
125        )
126
127        self.type_fit = "regression"
128        self.coef_ = None
129
130    def fit(self, X, y, **kwargs):
131        """Fit Ridge model to training data (X, y).
132
133        Args:
134
135            X: {array-like}, shape = [n_samples, n_features]
136                Training vectors, where n_samples is the number
137                of samples and n_features is the number of features.
138
139            y: array-like, shape = [n_samples]
140                Target values.
141
142            **kwargs: additional parameters to be passed to
143                    self.cook_training_set or self.obj.fit
144
145        Returns:
146
147            self: object
148
149        """
150
151        sys_platform = platform.system()
152
153        centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
154
155        n_X, p_X = X.shape
156        n_Z, p_Z = scaled_Z.shape
157
158        if self.n_clusters > 0:
159            if self.encode_clusters == True:
160                n_features = p_X + self.n_clusters
161            else:
162                n_features = p_X + 1
163        else:
164            n_features = p_X
165
166        X_ = scaled_Z[:, 0:n_features]
167        Phi_X_ = scaled_Z[:, n_features:p_Z]
168
169        B = mo.crossprod(x=X_, backend=self.backend) + self.lambda1 * np.diag(
170            np.repeat(1, n_features)
171        )
172        C = mo.crossprod(x=Phi_X_, y=X_, backend=self.backend)
173        D = mo.crossprod(
174            x=Phi_X_, backend=self.backend
175        ) + self.lambda2 * np.diag(np.repeat(1, Phi_X_.shape[1]))
176
177        if sys_platform in ("Linux", "Darwin"):
178            B_inv = pinv(B) if self.backend == "cpu" else jpinv(B)
179        else:
180            B_inv = pinv(B)
181
182        W = mo.safe_sparse_dot(a=C, b=B_inv, backend=self.backend)
183        S_mat = D - mo.tcrossprod(x=W, y=C, backend=self.backend)
184
185        if sys_platform in ("Linux", "Darwin"):
186            S_inv = pinv(S_mat) if self.backend == "cpu" else jpinv(S_mat)
187        else:
188            S_inv = pinv(S_mat)
189
190        Y = mo.safe_sparse_dot(a=S_inv, b=W, backend=self.backend)
191        inv = mo.rbind(
192            mo.cbind(
193                x=B_inv + mo.crossprod(x=W, y=Y, backend=self.backend),
194                y=-np.transpose(Y),
195                backend=self.backend,
196            ),
197            mo.cbind(x=-Y, y=S_inv, backend=self.backend),
198            backend=self.backend,
199        )
200
201        self.beta_ = mo.safe_sparse_dot(
202            a=inv,
203            b=mo.crossprod(x=scaled_Z, y=centered_y, backend=self.backend),
204            backend=self.backend,
205        )
206
207        self.coef_ = self.beta_  # sklearn compatibility
208
209        return self
210
211    def predict(self, X, **kwargs):
212        """Predict test data X.
213
214        Args:
215
216            X: {array-like}, shape = [n_samples, n_features]
217                Training vectors, where n_samples is the number
218                of samples and n_features is the number of features.
219
220            **kwargs: additional parameters to be passed to
221                    self.cook_test_set
222
223        Returns:
224
225            model predictions: {array-like}
226
227        """
228
229        if len(X.shape) == 1:
230            n_features = X.shape[0]
231            new_X = mo.rbind(
232                x=X.reshape(1, n_features),
233                y=np.ones(n_features).reshape(1, n_features),
234                backend=self.backend,
235            )
236
237            return (
238                self.y_mean_
239                + mo.safe_sparse_dot(
240                    a=self.cook_test_set(new_X, **kwargs),
241                    b=self.beta_,
242                    backend=self.backend,
243                )
244            )[0]
245
246        return self.y_mean_ + mo.safe_sparse_dot(
247            a=self.cook_test_set(X, **kwargs),
248            b=self.beta_,
249            backend=self.backend,
250        )
251
252    def partial_fit(self, X, y, learning_rate=0.01, decay=0.001, **kwargs):
253        """Incrementally fit the Ridge model using SGD-style updates.
254
255        Uses the update rule: w_{n+1} = w_n + γ_n * x_n * [y_n - x_n^T * w_n] - γ_n * λ * w_n
256        for online learning with individual samples.
257
258        Args:
259            X: {array-like}, shape = [n_samples, n_features]
260                Training vectors for this batch
261
262            y: array-like, shape = [n_samples]
263                Target values for this batch
264
265            learning_rate: float, default=0.01
266                Initial learning rate for SGD updates
267
268            decay: float, default=0.001
269                Learning rate decay parameter
270
271            **kwargs: additional parameters to be passed to self.cook_training_set
272
273        Returns:
274            self: object
275        """
276
277        # Input validation
278        X = np.asarray(X)
279        y = np.asarray(y)
280
281        if X.shape[0] != y.shape[0]:
282            raise ValueError("X and y must have the same number of samples")
283
284        # Handle first call
285        if not self._is_fitted:
286            # Initialize learning parameters
287            self.initial_learning_rate = learning_rate
288            self.decay = decay
289            self._step_count = 0
290            self._is_fitted = True
291
292        # Process the batch
293        centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
294
295        # Get dimensions
296        n_samples, n_features_total = scaled_Z.shape
297        n_original_features = X.shape[1]
298
299        # Determine feature dimensions for regularization
300        if self.n_clusters > 0:
301            if self.cluster_encode:
302                n_direct_features = n_original_features + self.n_clusters
303            else:
304                n_direct_features = n_original_features + 1
305        else:
306            n_direct_features = n_original_features
307
308        # Initialize beta_ if first time
309        if not hasattr(self, "beta_") or self.beta_ is None:
310            # For regression, beta_ is 1D (single output)
311            self.beta_ = np.zeros(n_features_total)
312
313        # Precompute indices for regularization
314        direct_indices = slice(0, n_direct_features)
315        hidden_indices = slice(n_direct_features, n_features_total)
316
317        # Process each sample with SGD
318        for i in range(n_samples):
319            self._step_count += 1
320
321            # Current learning rate with decay
322            current_lr = self.initial_learning_rate / (
323                1 + self.decay * self._step_count
324            )
325
326            # Current sample and target
327            x_i = scaled_Z[i, :]  # Feature vector
328            y_i = (
329                centered_y[i] if centered_y.ndim == 1 else centered_y[i, 0]
330            )  # Scalar target
331
332            # Prediction: x_i^T * beta
333            prediction = x_i @ self.beta_
334
335            # Error: y_i - prediction
336            error = y_i - prediction
337
338            # Gradient update: current_lr * x_i * error
339            gradient_update = current_lr * x_i * error
340
341            # Regularization terms (more efficient indexing)
342            reg_update = np.zeros_like(self.beta_)
343            reg_update[direct_indices] = (
344                current_lr * self.lambda1 * self.beta_[direct_indices]
345            )
346            reg_update[hidden_indices] = (
347                current_lr * self.lambda2 * self.beta_[hidden_indices]
348            )
349
350            # Combined update: beta = beta + gradient_update - reg_update
351            self.beta_ += gradient_update - reg_update
352
353        self.coef_ = self.beta_  # sklearn compatibility
354
355        return self

Ridge regression with 2 regularization parameters derived from class Ridge

Parameters:

n_hidden_features: int
    number of nodes in the hidden layer

activation_name: str
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'

a: float
    hyperparameter for 'prelu' or 'elu' activation function

nodes_sim: str
    type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
    'uniform'

bias: boolean
    indicates if the hidden layer contains a bias term (True) or not
    (False)

dropout: float
    regularization parameter; (random) percentage of nodes dropped out
    of the training

n_clusters: int
    number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
        no clustering)

cluster_encode: bool
    defines how the variable containing clusters is treated (default is one-hot)
    if `False`, then labels are used, without one-hot encoding

type_clust: str
    type of clustering method: currently k-means ('kmeans') or Gaussian
    Mixture Model ('gmm')

type_scaling: a tuple of 3 strings
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax')

lambda1: float
    regularization parameter on direct link

lambda2: float
    regularization parameter on hidden layer

seed: int
    reproducibility seed for nodes_sim=='uniform'

backend: str
    'cpu' or 'gpu' or 'tpu'

Attributes:

beta_: {array-like}
    regression coefficients

coef_: {array-like}
    alias for `beta_`, regression coefficients

y_mean_: float
    average response
def fit(self, X, y, **kwargs):
130    def fit(self, X, y, **kwargs):
131        """Fit Ridge model to training data (X, y).
132
133        Args:
134
135            X: {array-like}, shape = [n_samples, n_features]
136                Training vectors, where n_samples is the number
137                of samples and n_features is the number of features.
138
139            y: array-like, shape = [n_samples]
140                Target values.
141
142            **kwargs: additional parameters to be passed to
143                    self.cook_training_set or self.obj.fit
144
145        Returns:
146
147            self: object
148
149        """
150
151        sys_platform = platform.system()
152
153        centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
154
155        n_X, p_X = X.shape
156        n_Z, p_Z = scaled_Z.shape
157
158        if self.n_clusters > 0:
159            if self.encode_clusters == True:
160                n_features = p_X + self.n_clusters
161            else:
162                n_features = p_X + 1
163        else:
164            n_features = p_X
165
166        X_ = scaled_Z[:, 0:n_features]
167        Phi_X_ = scaled_Z[:, n_features:p_Z]
168
169        B = mo.crossprod(x=X_, backend=self.backend) + self.lambda1 * np.diag(
170            np.repeat(1, n_features)
171        )
172        C = mo.crossprod(x=Phi_X_, y=X_, backend=self.backend)
173        D = mo.crossprod(
174            x=Phi_X_, backend=self.backend
175        ) + self.lambda2 * np.diag(np.repeat(1, Phi_X_.shape[1]))
176
177        if sys_platform in ("Linux", "Darwin"):
178            B_inv = pinv(B) if self.backend == "cpu" else jpinv(B)
179        else:
180            B_inv = pinv(B)
181
182        W = mo.safe_sparse_dot(a=C, b=B_inv, backend=self.backend)
183        S_mat = D - mo.tcrossprod(x=W, y=C, backend=self.backend)
184
185        if sys_platform in ("Linux", "Darwin"):
186            S_inv = pinv(S_mat) if self.backend == "cpu" else jpinv(S_mat)
187        else:
188            S_inv = pinv(S_mat)
189
190        Y = mo.safe_sparse_dot(a=S_inv, b=W, backend=self.backend)
191        inv = mo.rbind(
192            mo.cbind(
193                x=B_inv + mo.crossprod(x=W, y=Y, backend=self.backend),
194                y=-np.transpose(Y),
195                backend=self.backend,
196            ),
197            mo.cbind(x=-Y, y=S_inv, backend=self.backend),
198            backend=self.backend,
199        )
200
201        self.beta_ = mo.safe_sparse_dot(
202            a=inv,
203            b=mo.crossprod(x=scaled_Z, y=centered_y, backend=self.backend),
204            backend=self.backend,
205        )
206
207        self.coef_ = self.beta_  # sklearn compatibility
208
209        return self

Fit Ridge model to training data (X, y).

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

**kwargs: additional parameters to be passed to
        self.cook_training_set or self.obj.fit

Returns:

self: object
def predict(self, X, **kwargs):
211    def predict(self, X, **kwargs):
212        """Predict test data X.
213
214        Args:
215
216            X: {array-like}, shape = [n_samples, n_features]
217                Training vectors, where n_samples is the number
218                of samples and n_features is the number of features.
219
220            **kwargs: additional parameters to be passed to
221                    self.cook_test_set
222
223        Returns:
224
225            model predictions: {array-like}
226
227        """
228
229        if len(X.shape) == 1:
230            n_features = X.shape[0]
231            new_X = mo.rbind(
232                x=X.reshape(1, n_features),
233                y=np.ones(n_features).reshape(1, n_features),
234                backend=self.backend,
235            )
236
237            return (
238                self.y_mean_
239                + mo.safe_sparse_dot(
240                    a=self.cook_test_set(new_X, **kwargs),
241                    b=self.beta_,
242                    backend=self.backend,
243                )
244            )[0]
245
246        return self.y_mean_ + mo.safe_sparse_dot(
247            a=self.cook_test_set(X, **kwargs),
248            b=self.beta_,
249            backend=self.backend,
250        )

Predict test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

model predictions: {array-like}
class Ridge2Classifier(nnetsauce.ridge2.ridge2.Ridge2, sklearn.base.ClassifierMixin):
 18class Ridge2Classifier(Ridge2, ClassifierMixin):
 19    """Multinomial logit classification with 2 regularization parameters
 20
 21    Parameters:
 22
 23        n_hidden_features: int
 24            number of nodes in the hidden layer
 25
 26        activation_name: str
 27            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
 28
 29        a: float
 30            hyperparameter for 'prelu' or 'elu' activation function
 31
 32        nodes_sim: str
 33            type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
 34            'uniform'
 35
 36        bias: boolean
 37            indicates if the hidden layer contains a bias term (True) or not
 38            (False)
 39
 40        dropout: float
 41            regularization parameter; (random) percentage of nodes dropped out
 42            of the training
 43
 44        direct_link: boolean
 45            indicates if the original predictors are included (True) in model's
 46            fitting or not (False)
 47
 48        n_clusters: int
 49            number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
 50                no clustering)
 51
 52        cluster_encode: bool
 53            defines how the variable containing clusters is treated (default is one-hot)
 54            if `False`, then labels are used, without one-hot encoding
 55
 56        type_clust: str
 57            type of clustering method: currently k-means ('kmeans') or Gaussian
 58            Mixture Model ('gmm')
 59
 60        type_scaling: a tuple of 3 strings
 61            scaling methods for inputs, hidden layer, and clustering respectively
 62            (and when relevant).
 63            Currently available: standardization ('std') or MinMax scaling ('minmax')
 64
 65        lambda1: float
 66            regularization parameter on direct link
 67
 68        lambda2: float
 69            regularization parameter on hidden layer
 70
 71        solver: str
 72            optimization function "L-BFGS-B",  "Newton-CG",
 73            "trust-ncg", "L-BFGS-B-lstsq", "Newton-CG-lstsq",
 74            "trust-ncg-lstsq" (see scipy.optimize.minimize)
 75            When using "L-BFGS-B-lstsq", "Newton-CG-lstsq", or "trust-ncg-lstsq",
 76            the initial value for the optimization is set to the least squares solution
 77
 78        seed: int
 79            reproducibility seed for nodes_sim=='uniform'
 80
 81        backend: str
 82            "cpu" or "gpu" or "tpu"
 83
 84    Attributes:
 85
 86        beta_: {array-like}
 87            regression coefficients
 88
 89        classes_: {array-like}
 90            unique classes in the target variable
 91
 92        minloglik_: float
 93            minimum value of the negative log-likelihood
 94
 95    Examples:
 96
 97    See also [https://github.com/Techtonique/nnetsauce/blob/master/examples/ridge_classification.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/ridge_classification.py)
 98
 99    ```python
100    import nnetsauce as ns
101    import numpy as np
102    from sklearn.datasets import load_breast_cancer
103    from sklearn.model_selection import train_test_split
104    from time import time
105
106
107    breast_cancer = load_breast_cancer()
108    X = breast_cancer.data
109    y = breast_cancer.target
110
111    # split data into training test and test set
112    np.random.seed(123)
113    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
114
115    # create the model with nnetsauce
116    fit_obj = ns.Ridge2Classifier(lambda1 = 6.90185578e+04,
117                                lambda2 = 3.17392781e+02,
118                                n_hidden_features=95,
119                                n_clusters=2,
120                                dropout = 3.62817383e-01,
121                                type_clust = "gmm")
122
123    # fit the model on training set
124    start = time()
125    fit_obj.fit(X_train, y_train)
126    print(f"Elapsed {time() - start}")
127
128    # get the accuracy on test set
129    start = time()
130    print(fit_obj.score(X_test, y_test))
131    print(f"Elapsed {time() - start}")
132
133    # get area under the curve on test set (auc)
134    print(fit_obj.score(X_test, y_test, scoring="roc_auc"))
135    ```
136
137
138    """
139
140    _estimator_type = "classifier"
141
142    # construct the object -----
143
144    def __init__(
145        self,
146        n_hidden_features=5,
147        activation_name="relu",
148        a=0.01,
149        nodes_sim="sobol",
150        bias=True,
151        dropout=0,
152        direct_link=True,
153        n_clusters=2,
154        cluster_encode=True,
155        type_clust="kmeans",
156        type_scaling=("std", "std", "std"),
157        lambda1=0.1,
158        lambda2=0.1,
159        solver="L-BFGS-B",
160        seed=123,
161        backend="cpu",
162    ):
163        super().__init__(
164            n_hidden_features=n_hidden_features,
165            activation_name=activation_name,
166            a=a,
167            nodes_sim=nodes_sim,
168            bias=bias,
169            dropout=dropout,
170            direct_link=direct_link,
171            n_clusters=n_clusters,
172            cluster_encode=cluster_encode,
173            type_clust=type_clust,
174            type_scaling=type_scaling,
175            lambda1=lambda1,
176            lambda2=lambda2,
177            seed=seed,
178            backend=backend,
179        )
180
181        self.type_fit = "classification"
182        self.solver = solver
183        self.beta_ = None
184        self.classes_ = None
185        self.minloglik_ = None
186        self.coef_ = None
187
188    def loglik(self, X, Y, **kwargs):
189        """Log-likelihood for training data (X, Y).
190
191        Args:
192
193            X: {array-like}, shape = [n_samples, n_features]
194                Training vectors, where n_samples is the number
195                of samples and n_features is the number of features.
196
197            Y: array-like, shape = [n_samples]
198                One-hot encode target values.
199
200            **kwargs: additional parameters to be passed to
201                    self.cook_training_set or self.obj.fit
202
203        Returns:
204
205        """
206
207        def loglik_grad_hess(Y, X, B, XB, hessian=True, **kwargs):
208            # nobs, n_classes
209            n, K = Y.shape
210
211            # total number of covariates
212            p = X.shape[1]
213
214            # initial number of covariates
215            init_p = p - self.n_hidden_features
216
217            max_double = 709.0
218            XB[XB > max_double] = max_double
219            exp_XB = np.exp(XB)
220            probs = exp_XB / exp_XB.sum(axis=1)[:, None]
221
222            # gradient -----
223            # (Y - p) -> (n, K)
224            # X -> (n, p)
225            # (K, n) %*% (n, p) -> (K, p)
226            if hessian is False:
227                grad = (
228                    -mo.safe_sparse_dot(
229                        a=(Y - probs).T, b=X, backend=self.backend
230                    )
231                    / n
232                )
233                grad += self.lambda1 * B[0:init_p, :].sum(axis=0)[:, None]
234                grad += self.lambda2 * B[init_p:p, :].sum(axis=0)[:, None]
235
236                return grad.flatten()
237
238            # hessian -----
239            if hessian is True:
240                Kp = K * p
241                hess = np.zeros((Kp, Kp), float)
242                for k1 in range(K):
243                    x_index = range(k1 * p, (k1 + 1) * p)
244                    for k2 in range(k1, K):
245                        y_index = range(k2 * p, (k2 + 1) * p)
246                        H_sub = (
247                            -mo.safe_sparse_dot(
248                                a=X.T,
249                                b=(probs[:, k1] * probs[:, k2])[:, None] * X,
250                                backend=self.backend,
251                            )
252                            / n
253                        )  # do not store
254                        hess[np.ix_(x_index, y_index)] = hess[
255                            np.ix_(y_index, x_index)
256                        ] = H_sub
257
258                return hess + (self.lambda1 + self.lambda2) * np.identity(Kp)
259
260        # total number of covariates
261        p = X.shape[1]
262
263        # initial number of covariates
264        init_p = p - self.n_hidden_features
265
266        # log-likelihood (1st return)
267        def loglik_func(x):
268            # (p, K)
269            B = x.reshape(Y.shape[1], p).T
270
271            # (n, K)
272            XB = mo.safe_sparse_dot(X, B, backend=self.backend)
273
274            res = -(np.sum(Y * XB, axis=1) - logsumexp(XB)).mean()
275
276            res += (
277                0.5
278                * self.lambda1
279                * mo.squared_norm(B[0:init_p, :], backend=self.backend)
280            )
281            res += (
282                0.5
283                * self.lambda2
284                * mo.squared_norm(B[init_p:p, :], backend=self.backend)
285            )
286
287            return res
288
289        # gradient of log-likelihood
290        def grad_func(x):
291            # (p, K)
292            B = x.reshape(Y.shape[1], p).T
293
294            return loglik_grad_hess(
295                Y=Y,
296                X=X,
297                B=B,
298                XB=mo.safe_sparse_dot(X, B, backend=self.backend),
299                hessian=False,
300                **kwargs
301            )
302
303        # hessian of log-likelihood
304        def hessian_func(x):
305            # (p, K)
306            B = x.reshape(Y.shape[1], p).T
307
308            return loglik_grad_hess(
309                Y=Y,
310                X=X,
311                B=B,
312                XB=mo.safe_sparse_dot(X, B, backend=self.backend),
313                hessian=True,
314                **kwargs
315            )
316
317        return loglik_func, grad_func, hessian_func
318
319    # newton-cg
320    # L-BFGS-B
321    def fit(self, X, y, **kwargs):
322        """Fit Ridge model to training data (X, y).
323
324        for beta: regression coeffs (beta11, ..., beta1p, ..., betaK1, ..., betaKp)
325        for K classes and p covariates.
326
327        Args:
328
329            X: {array-like}, shape = [n_samples, n_features]
330                Training vectors, where n_samples is the number
331                of samples and n_features is the number of features.
332
333            y: array-like, shape = [n_samples]
334                Target values.
335
336            **kwargs: additional parameters to be passed to
337                    self.cook_training_set or self.obj.fit
338
339        Returns:
340
341            self: object
342
343        """
344
345        assert mx.is_factor(y), "y must contain only integers"
346
347        output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
348
349        self.n_classes = len(np.unique(y))
350        self.classes_ = np.unique(y)  # for compatibility with sklearn
351        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
352
353        Y = mo.one_hot_encode2(output_y, self.n_classes)
354
355        # optimize for beta, minimize self.loglik (maximize loglik) -----
356        loglik_func, grad_func, hessian_func = self.loglik(X=scaled_Z, Y=Y)
357
358        if self.solver == "L-BFGS-B":
359            opt = minimize(
360                fun=loglik_func,
361                x0=np.zeros(scaled_Z.shape[1] * self.n_classes),
362                jac=grad_func,
363                method=self.solver,
364            )
365            self.beta_ = opt.x
366            self.minloglik_ = opt.fun
367
368        if self.solver in ("Newton-CG", "trust-ncg"):
369            opt = minimize(
370                fun=loglik_func,
371                x0=np.zeros(scaled_Z.shape[1] * self.n_classes),
372                jac=grad_func,
373                hess=hessian_func,
374                method=self.solver,
375            )
376            self.beta_ = opt.x
377            self.minloglik_ = opt.fun
378
379        if self.solver == "L-BFGS-B-lstsq":
380            opt = minimize(
381                fun=loglik_func,
382                x0=np.linalg.lstsq(scaled_Z, Y, rcond=None)[0].flatten(
383                    order="F"
384                ),
385                jac=grad_func,
386                method="L-BFGS-B",
387            )
388            self.beta_ = opt.x
389            self.minloglik_ = opt.fun
390
391        if self.solver in "Newton-CG-lstsq":
392            opt = minimize(
393                fun=loglik_func,
394                x0=np.linalg.lstsq(scaled_Z, Y, rcond=None)[0].flatten(
395                    order="F"
396                ),
397                jac=grad_func,
398                hess=hessian_func,
399                method="Newton-CG",
400            )
401            self.beta_ = opt.x
402            self.minloglik_ = opt.fun
403
404        if self.solver in "trust-ncg-lstsq":
405            opt = minimize(
406                fun=loglik_func,
407                x0=np.linalg.lstsq(scaled_Z, Y, rcond=None)[0].flatten(
408                    order="F"
409                ),
410                jac=grad_func,
411                hess=hessian_func,
412                method="trust-ncg",
413            )
414            self.beta_ = opt.x
415            self.minloglik_ = opt.fun
416
417        self.coef_ = self.beta_
418
419        self.classes_ = np.unique(y)
420
421        return self
422
423    def predict(self, X, **kwargs):
424        """Predict test data X.
425
426        Args:
427
428            X: {array-like}, shape = [n_samples, n_features]
429                Training vectors, where n_samples is the number
430                of samples and n_features is the number of features.
431
432            **kwargs: additional parameters to be passed to
433                    self.cook_test_set
434
435        Returns:
436
437            model predictions: {array-like}
438        """
439
440        return np.argmax(self.predict_proba(X, **kwargs), axis=1)
441
442    def predict_proba(self, X, **kwargs):
443        """Predict probabilities for test data X.
444
445        Args:
446
447            X: {array-like}, shape = [n_samples, n_features]
448                Training vectors, where n_samples is the number
449                of samples and n_features is the number of features.
450
451            **kwargs: additional parameters to be passed to
452                    self.cook_test_set
453
454        Returns:
455
456            probability estimates for test data: {array-like}
457
458        """
459        if len(X.shape) == 1:
460            n_features = X.shape[0]
461            new_X = mo.rbind(
462                X.reshape(1, n_features),
463                np.ones(n_features).reshape(1, n_features),
464            )
465
466            Z = self.cook_test_set(new_X, **kwargs)
467
468        else:
469            Z = self.cook_test_set(X, **kwargs)
470
471        ZB = mo.safe_sparse_dot(
472            a=Z,
473            b=self.beta_.reshape(
474                self.n_classes,
475                X.shape[1] + self.n_hidden_features + self.n_clusters,
476            ).T,
477            backend=self.backend,
478        )
479
480        exp_ZB = np.exp(ZB)
481
482        return exp_ZB / exp_ZB.sum(axis=1)[:, None]
483
484    @property
485    def _estimator_type(self):
486        return "classifier"

Multinomial logit classification with 2 regularization parameters

Parameters:

n_hidden_features: int
    number of nodes in the hidden layer

activation_name: str
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'

a: float
    hyperparameter for 'prelu' or 'elu' activation function

nodes_sim: str
    type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
    'uniform'

bias: boolean
    indicates if the hidden layer contains a bias term (True) or not
    (False)

dropout: float
    regularization parameter; (random) percentage of nodes dropped out
    of the training

direct_link: boolean
    indicates if the original predictors are included (True) in model's
    fitting or not (False)

n_clusters: int
    number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
        no clustering)

cluster_encode: bool
    defines how the variable containing clusters is treated (default is one-hot)
    if `False`, then labels are used, without one-hot encoding

type_clust: str
    type of clustering method: currently k-means ('kmeans') or Gaussian
    Mixture Model ('gmm')

type_scaling: a tuple of 3 strings
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax')

lambda1: float
    regularization parameter on direct link

lambda2: float
    regularization parameter on hidden layer

solver: str
    optimization function "L-BFGS-B",  "Newton-CG",
    "trust-ncg", "L-BFGS-B-lstsq", "Newton-CG-lstsq",
    "trust-ncg-lstsq" (see scipy.optimize.minimize)
    When using "L-BFGS-B-lstsq", "Newton-CG-lstsq", or "trust-ncg-lstsq",
    the initial value for the optimization is set to the least squares solution

seed: int
    reproducibility seed for nodes_sim=='uniform'

backend: str
    "cpu" or "gpu" or "tpu"

Attributes:

beta_: {array-like}
    regression coefficients

classes_: {array-like}
    unique classes in the target variable

minloglik_: float
    minimum value of the negative log-likelihood

Examples:

See also https://github.com/Techtonique/nnetsauce/blob/master/examples/ridge_classification.py

import nnetsauce as ns
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from time import time


breast_cancer = load_breast_cancer()
X = breast_cancer.data
y = breast_cancer.target

# split data into training test and test set
np.random.seed(123)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# create the model with nnetsauce
fit_obj = ns.Ridge2Classifier(lambda1 = 6.90185578e+04,
                            lambda2 = 3.17392781e+02,
                            n_hidden_features=95,
                            n_clusters=2,
                            dropout = 3.62817383e-01,
                            type_clust = "gmm")

# fit the model on training set
start = time()
fit_obj.fit(X_train, y_train)
print(f"Elapsed {time() - start}")

# get the accuracy on test set
start = time()
print(fit_obj.score(X_test, y_test))
print(f"Elapsed {time() - start}")

# get area under the curve on test set (auc)
print(fit_obj.score(X_test, y_test, scoring="roc_auc"))
def fit(self, X, y, **kwargs):
321    def fit(self, X, y, **kwargs):
322        """Fit Ridge model to training data (X, y).
323
324        for beta: regression coeffs (beta11, ..., beta1p, ..., betaK1, ..., betaKp)
325        for K classes and p covariates.
326
327        Args:
328
329            X: {array-like}, shape = [n_samples, n_features]
330                Training vectors, where n_samples is the number
331                of samples and n_features is the number of features.
332
333            y: array-like, shape = [n_samples]
334                Target values.
335
336            **kwargs: additional parameters to be passed to
337                    self.cook_training_set or self.obj.fit
338
339        Returns:
340
341            self: object
342
343        """
344
345        assert mx.is_factor(y), "y must contain only integers"
346
347        output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
348
349        self.n_classes = len(np.unique(y))
350        self.classes_ = np.unique(y)  # for compatibility with sklearn
351        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
352
353        Y = mo.one_hot_encode2(output_y, self.n_classes)
354
355        # optimize for beta, minimize self.loglik (maximize loglik) -----
356        loglik_func, grad_func, hessian_func = self.loglik(X=scaled_Z, Y=Y)
357
358        if self.solver == "L-BFGS-B":
359            opt = minimize(
360                fun=loglik_func,
361                x0=np.zeros(scaled_Z.shape[1] * self.n_classes),
362                jac=grad_func,
363                method=self.solver,
364            )
365            self.beta_ = opt.x
366            self.minloglik_ = opt.fun
367
368        if self.solver in ("Newton-CG", "trust-ncg"):
369            opt = minimize(
370                fun=loglik_func,
371                x0=np.zeros(scaled_Z.shape[1] * self.n_classes),
372                jac=grad_func,
373                hess=hessian_func,
374                method=self.solver,
375            )
376            self.beta_ = opt.x
377            self.minloglik_ = opt.fun
378
379        if self.solver == "L-BFGS-B-lstsq":
380            opt = minimize(
381                fun=loglik_func,
382                x0=np.linalg.lstsq(scaled_Z, Y, rcond=None)[0].flatten(
383                    order="F"
384                ),
385                jac=grad_func,
386                method="L-BFGS-B",
387            )
388            self.beta_ = opt.x
389            self.minloglik_ = opt.fun
390
391        if self.solver in "Newton-CG-lstsq":
392            opt = minimize(
393                fun=loglik_func,
394                x0=np.linalg.lstsq(scaled_Z, Y, rcond=None)[0].flatten(
395                    order="F"
396                ),
397                jac=grad_func,
398                hess=hessian_func,
399                method="Newton-CG",
400            )
401            self.beta_ = opt.x
402            self.minloglik_ = opt.fun
403
404        if self.solver in "trust-ncg-lstsq":
405            opt = minimize(
406                fun=loglik_func,
407                x0=np.linalg.lstsq(scaled_Z, Y, rcond=None)[0].flatten(
408                    order="F"
409                ),
410                jac=grad_func,
411                hess=hessian_func,
412                method="trust-ncg",
413            )
414            self.beta_ = opt.x
415            self.minloglik_ = opt.fun
416
417        self.coef_ = self.beta_
418
419        self.classes_ = np.unique(y)
420
421        return self

Fit Ridge model to training data (X, y).

for beta: regression coeffs (beta11, ..., beta1p, ..., betaK1, ..., betaKp) for K classes and p covariates.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

**kwargs: additional parameters to be passed to
        self.cook_training_set or self.obj.fit

Returns:

self: object
def predict(self, X, **kwargs):
423    def predict(self, X, **kwargs):
424        """Predict test data X.
425
426        Args:
427
428            X: {array-like}, shape = [n_samples, n_features]
429                Training vectors, where n_samples is the number
430                of samples and n_features is the number of features.
431
432            **kwargs: additional parameters to be passed to
433                    self.cook_test_set
434
435        Returns:
436
437            model predictions: {array-like}
438        """
439
440        return np.argmax(self.predict_proba(X, **kwargs), axis=1)

Predict test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

model predictions: {array-like}
def predict_proba(self, X, **kwargs):
442    def predict_proba(self, X, **kwargs):
443        """Predict probabilities for test data X.
444
445        Args:
446
447            X: {array-like}, shape = [n_samples, n_features]
448                Training vectors, where n_samples is the number
449                of samples and n_features is the number of features.
450
451            **kwargs: additional parameters to be passed to
452                    self.cook_test_set
453
454        Returns:
455
456            probability estimates for test data: {array-like}
457
458        """
459        if len(X.shape) == 1:
460            n_features = X.shape[0]
461            new_X = mo.rbind(
462                X.reshape(1, n_features),
463                np.ones(n_features).reshape(1, n_features),
464            )
465
466            Z = self.cook_test_set(new_X, **kwargs)
467
468        else:
469            Z = self.cook_test_set(X, **kwargs)
470
471        ZB = mo.safe_sparse_dot(
472            a=Z,
473            b=self.beta_.reshape(
474                self.n_classes,
475                X.shape[1] + self.n_hidden_features + self.n_clusters,
476            ).T,
477            backend=self.backend,
478        )
479
480        exp_ZB = np.exp(ZB)
481
482        return exp_ZB / exp_ZB.sum(axis=1)[:, None]

Predict probabilities for test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

probability estimates for test data: {array-like}
class Ridge2MultitaskClassifier(nnetsauce.ridge2.ridge2.Ridge2, sklearn.base.ClassifierMixin):
 23class Ridge2MultitaskClassifier(Ridge2, ClassifierMixin):
 24    """Multitask Ridge classification with 2 regularization parameters
 25
 26    Parameters:
 27
 28        n_hidden_features: int
 29            number of nodes in the hidden layer
 30
 31        activation_name: str
 32            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
 33
 34        a: float
 35            hyperparameter for 'prelu' or 'elu' activation function
 36
 37        nodes_sim: str
 38            type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
 39            'uniform'
 40
 41        bias: boolean
 42            indicates if the hidden layer contains a bias term (True) or not
 43            (False)
 44
 45        dropout: float
 46            regularization parameter; (random) percentage of nodes dropped out
 47            of the training
 48
 49        n_clusters: int
 50            number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
 51                no clustering)
 52
 53        cluster_encode: bool
 54            defines how the variable containing clusters is treated (default is one-hot)
 55            if `False`, then labels are used, without one-hot encoding
 56
 57        type_clust: str
 58            type of clustering method: currently k-means ('kmeans') or Gaussian
 59            Mixture Model ('gmm')
 60
 61        type_scaling: a tuple of 3 strings
 62            scaling methods for inputs, hidden layer, and clustering respectively
 63            (and when relevant).
 64            Currently available: standardization ('std') or MinMax scaling ('minmax')
 65
 66        lambda1: float
 67            regularization parameter on direct link
 68
 69        lambda2: float
 70            regularization parameter on hidden layer
 71
 72        seed: int
 73            reproducibility seed for nodes_sim=='uniform'
 74
 75        backend: str
 76            "cpu" or "gpu" or "tpu"
 77
 78    Attributes:
 79
 80        beta_: {array-like}
 81            regression coefficients
 82
 83        coef_: {array-like}
 84            alias for `beta_`, regression coefficients
 85
 86    Examples:
 87
 88    See also [https://github.com/Techtonique/nnetsauce/blob/master/examples/ridgemtask_classification.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/ridgemtask_classification.py)
 89
 90    ```python
 91    import nnetsauce as ns
 92    import numpy as np
 93    from sklearn.datasets import load_breast_cancer
 94    from sklearn.model_selection import train_test_split
 95    from sklearn import metrics
 96    from time import time
 97
 98    breast_cancer = load_breast_cancer()
 99    Z = breast_cancer.data
100    t = breast_cancer.target
101    np.random.seed(123)
102    X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2)
103
104    fit_obj = ns.Ridge2MultitaskClassifier(n_hidden_features=int(9.83730469e+01),
105                                    dropout=4.31054687e-01,
106                                    n_clusters=int(1.71484375e+00),
107                                    lambda1=1.24023438e+01, lambda2=7.30263672e+03)
108
109    start = time()
110    fit_obj.fit(X_train, y_train)
111    print(f"Elapsed {time() - start}")
112
113    print(fit_obj.score(X_test, y_test))
114    print(fit_obj.score(X_test, y_test, scoring="roc_auc"))
115
116    start = time()
117    preds = fit_obj.predict(X_test)
118    print(f"Elapsed {time() - start}")
119    print(metrics.classification_report(preds, y_test))
120    ```
121
122    """
123
124    # construct the object -----
125    _estimator_type = "classifier"
126
127    def __init__(
128        self,
129        n_hidden_features=5,
130        activation_name="relu",
131        a=0.01,
132        nodes_sim="sobol",
133        bias=True,
134        dropout=0,
135        n_clusters=2,
136        cluster_encode=True,
137        type_clust="kmeans",
138        type_scaling=("std", "std", "std"),
139        lambda1=0.1,
140        lambda2=0.1,
141        seed=123,
142        backend="cpu",
143    ):
144        super().__init__(
145            n_hidden_features=n_hidden_features,
146            activation_name=activation_name,
147            a=a,
148            nodes_sim=nodes_sim,
149            bias=bias,
150            dropout=dropout,
151            n_clusters=n_clusters,
152            cluster_encode=cluster_encode,
153            type_clust=type_clust,
154            type_scaling=type_scaling,
155            lambda1=lambda1,
156            lambda2=lambda2,
157            seed=seed,
158            backend=backend,
159        )
160
161        self.type_fit = "classification"
162        self.coef_ = None
163
164    def fit(self, X, y, **kwargs):
165        """Fit Ridge model to training data (X, y).
166
167        Args:
168
169            X: {array-like}, shape = [n_samples, n_features]
170                Training vectors, where n_samples is the number
171                of samples and n_features is the number of features.
172
173            y: array-like, shape = [n_samples]
174                Target values.
175
176            **kwargs: additional parameters to be passed to
177                    self.cook_training_set or self.obj.fit
178
179        Returns:
180
181            self: object
182
183        """
184
185        sys_platform = platform.system()
186
187        assert mx.is_factor(y), "y must contain only integers"
188
189        self.classes_ = np.unique(y)  # for compatibility with sklearn
190        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
191
192        output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
193
194        n_X, p_X = X.shape
195        n_Z, p_Z = scaled_Z.shape
196
197        self.n_classes = len(np.unique(y))
198
199        # multitask response
200        Y = mo.one_hot_encode2(output_y, self.n_classes)
201
202        if self.n_clusters > 0:
203            if self.encode_clusters == True:
204                n_features = p_X + self.n_clusters
205            else:
206                n_features = p_X + 1
207        else:
208            n_features = p_X
209
210        X_ = scaled_Z[:, 0:n_features]
211        Phi_X_ = scaled_Z[:, n_features:p_Z]
212
213        B = mo.crossprod(x=X_, backend=self.backend) + self.lambda1 * np.diag(
214            np.repeat(1, X_.shape[1])
215        )
216        C = mo.crossprod(x=Phi_X_, y=X_, backend=self.backend)
217        D = mo.crossprod(
218            x=Phi_X_, backend=self.backend
219        ) + self.lambda2 * np.diag(np.repeat(1, Phi_X_.shape[1]))
220
221        if sys_platform in ("Linux", "Darwin"):
222            B_inv = pinv(B) if self.backend == "cpu" else jpinv(B)
223        else:
224            B_inv = pinv(B)
225
226        W = mo.safe_sparse_dot(a=C, b=B_inv, backend=self.backend)
227        S_mat = D - mo.tcrossprod(x=W, y=C, backend=self.backend)
228
229        if sys_platform in ("Linux", "Darwin"):
230            S_inv = pinv(S_mat) if self.backend == "cpu" else jpinv(S_mat)
231        else:
232            S_inv = pinv(S_mat)
233
234        Y2 = mo.safe_sparse_dot(a=S_inv, b=W, backend=self.backend)
235        inv = mo.rbind(
236            mo.cbind(
237                x=B_inv + mo.crossprod(x=W, y=Y2, backend=self.backend),
238                y=-np.transpose(Y2),
239                backend=self.backend,
240            ),
241            mo.cbind(x=-Y2, y=S_inv, backend=self.backend),
242            backend=self.backend,
243        )
244
245        self.beta_ = mo.safe_sparse_dot(
246            a=inv,
247            b=mo.crossprod(x=scaled_Z, y=Y, backend=self.backend),
248            backend=self.backend,
249        )
250        self.coef_ = self.beta_  # sklearn compatibility
251        self.classes_ = np.unique(y)
252        self._is_fitted = True
253        return self
254
255    def predict(self, X, **kwargs):
256        """Predict test data X.
257
258        Args:
259
260            X: {array-like}, shape = [n_samples, n_features]
261                Training vectors, where n_samples is the number
262                of samples and n_features is the number of features.
263
264            **kwargs: additional parameters to be passed to
265                    self.cook_test_set
266
267        Returns:
268
269            model predictions: {array-like}
270
271        """
272
273        return np.argmax(self.predict_proba(X, **kwargs), axis=1)
274
275    def predict_proba(self, X, **kwargs):
276        """Predict probabilities for test data X.
277
278        Args:
279
280            X: {array-like}, shape = [n_samples, n_features]
281                Training vectors, where n_samples is the number
282                of samples and n_features is the number of features.
283
284            **kwargs: additional parameters to be passed to
285                    self.cook_test_set
286
287        Returns:
288
289            probability estimates for test data: {array-like}
290
291        """
292
293        if len(X.shape) == 1:
294            n_features = X.shape[0]
295            new_X = mo.rbind(
296                x=X.reshape(1, n_features),
297                y=np.ones(n_features).reshape(1, n_features),
298                backend=self.backend,
299            )
300
301            Z = self.cook_test_set(new_X, **kwargs)
302
303        else:
304            Z = self.cook_test_set(X, **kwargs)
305
306        ZB = mo.safe_sparse_dot(a=Z, b=self.beta_, backend=self.backend)
307
308        exp_ZB = np.exp(ZB)
309
310        return exp_ZB / exp_ZB.sum(axis=1)[:, None]
311
312    def score(self, X, y, scoring=None):
313        """Scoring function for classification.
314
315        Args:
316
317            X: {array-like}, shape = [n_samples, n_features]
318                Training vectors, where n_samples is the number
319                of samples and n_features is the number of features.
320
321            y: array-like, shape = [n_samples]
322                Target values.
323
324            scoring: str
325                scoring method (default is accuracy)
326
327        Returns:
328
329            score: float
330        """
331
332        if scoring is None:
333            scoring = "accuracy"
334
335        if scoring == "accuracy":
336            return skm2.accuracy_score(y, self.predict(X))
337
338        if scoring == "f1":
339            return skm2.f1_score(y, self.predict(X))
340
341        if scoring == "precision":
342            return skm2.precision_score(y, self.predict(X))
343
344        if scoring == "recall":
345            return skm2.recall_score(y, self.predict(X))
346
347        if scoring == "roc_auc":
348            return skm2.roc_auc_score(y, self.predict(X))
349
350        if scoring == "log_loss":
351            return skm2.log_loss(y, self.predict_proba(X))
352
353        if scoring == "balanced_accuracy":
354            return skm2.balanced_accuracy_score(y, self.predict(X))
355
356        if scoring == "average_precision":
357            return skm2.average_precision_score(y, self.predict(X))
358
359        if scoring == "neg_brier_score":
360            return -skm2.brier_score_loss(y, self.predict_proba(X))
361
362        if scoring == "neg_log_loss":
363            return -skm2.log_loss(y, self.predict_proba(X))
364
365    @property
366    def _estimator_type(self):
367        return "classifier"
368
369    def partial_fit(
370        self, X, y, classes=None, learning_rate=0.01, decay=0.001, **kwargs
371    ):
372        """Incrementally fit the Ridge model using SGD-style updates.
373
374        Uses the update rule: w_{n+1} = w_n + γ_n * x_n * [y_n - x_n^T * w_n] - γ_n * λ * w_n
375        for online learning with individual samples.
376
377        Args:
378            X: {array-like}, shape = [n_samples, n_features]
379                Training vectors for this batch
380
381            y: array-like, shape = [n_samples]
382                Target values for this batch
383
384            classes: array-like, shape = [n_classes], optional
385                List of all possible target classes. Must be provided on first call
386                to partial_fit if not already fitted.
387
388            learning_rate: float, default=0.01
389                Initial learning rate for SGD updates
390
391            decay: float, default=0.001
392                Learning rate decay parameter
393
394            **kwargs: additional parameters to be passed to self.cook_training_set
395
396        Returns:
397            self: object
398        """
399        # Input validation
400        X = np.asarray(X)
401        y = np.asarray(y)
402
403        if X.shape[0] != y.shape[0]:
404            raise ValueError("X and y must have the same number of samples")
405
406        assert mx.is_factor(y), "y must contain only integers"
407
408        # Handle classes on first call
409        if not self._is_fitted:
410            if classes is not None:
411                self.classes_ = np.array(classes)
412                self.n_classes_ = len(self.classes_)
413            else:
414                self.classes_ = np.unique(y)
415                self.n_classes_ = len(self.classes_)
416
417            self.n_classes = len(self.classes_)
418
419            # Initialize learning parameters
420            self.initial_learning_rate = learning_rate
421            self.decay = decay
422            self._step_count = 0
423            self._is_fitted = True
424
425        else:
426            # Check for new classes
427            new_classes = np.setdiff1d(y, self.classes_)
428            if len(new_classes) > 0:
429                raise ValueError(
430                    f"New classes {new_classes} encountered. "
431                    "partial_fit cannot handle new classes after first call."
432                )
433
434        # Process the batch
435        output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
436
437        # Get dimensions
438        n_samples, n_features_total = scaled_Z.shape
439        n_original_features = X.shape[1]
440
441        # Create one-hot encoded targets
442        Y = mo.one_hot_encode2(output_y, self.n_classes)
443
444        # Determine feature dimensions for regularization
445        if self.n_clusters > 0:
446            if self.cluster_encode:
447                n_direct_features = n_original_features + self.n_clusters
448            else:
449                n_direct_features = n_original_features + 1
450        else:
451            n_direct_features = n_original_features
452
453        # Initialize beta_ if first time
454        if not hasattr(self, "beta_") or self.beta_ is None:
455            self.beta_ = np.zeros((n_features_total, self.n_classes))
456
457        # Precompute indices for regularization
458        direct_indices = slice(0, n_direct_features)
459        hidden_indices = slice(n_direct_features, n_features_total)
460
461        # Process each sample with SGD
462        for i in range(n_samples):
463            self._step_count += 1
464
465            # Current learning rate with decay
466            current_lr = self.initial_learning_rate / (
467                1 + self.decay * self._step_count
468            )
469
470            # Current sample and target
471            x_i = scaled_Z[i, :]  # Feature vector
472            y_i = Y[i, :]  # Target vector (one-hot)
473
474            # Prediction: x_i^T * beta
475            prediction = x_i @ self.beta_
476
477            # Error: y_i - prediction
478            error = y_i - prediction
479
480            # Gradient update: current_lr * x_i * error
481            gradient_update = current_lr * np.outer(x_i, error)
482
483            # Regularization terms (more efficient indexing)
484            reg_update = np.zeros_like(self.beta_)
485            reg_update[direct_indices, :] = (
486                current_lr * self.lambda1 * self.beta_[direct_indices, :]
487            )
488            reg_update[hidden_indices, :] = (
489                current_lr * self.lambda2 * self.beta_[hidden_indices, :]
490            )
491
492            # Combined update: beta = beta + gradient_update - reg_update
493            self.beta_ += gradient_update - reg_update
494
495        self.coef_ = self.beta_  # sklearn compatibility
496
497        return self

Multitask Ridge classification with 2 regularization parameters

Parameters:

n_hidden_features: int
    number of nodes in the hidden layer

activation_name: str
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'

a: float
    hyperparameter for 'prelu' or 'elu' activation function

nodes_sim: str
    type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
    'uniform'

bias: boolean
    indicates if the hidden layer contains a bias term (True) or not
    (False)

dropout: float
    regularization parameter; (random) percentage of nodes dropped out
    of the training

n_clusters: int
    number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
        no clustering)

cluster_encode: bool
    defines how the variable containing clusters is treated (default is one-hot)
    if `False`, then labels are used, without one-hot encoding

type_clust: str
    type of clustering method: currently k-means ('kmeans') or Gaussian
    Mixture Model ('gmm')

type_scaling: a tuple of 3 strings
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax')

lambda1: float
    regularization parameter on direct link

lambda2: float
    regularization parameter on hidden layer

seed: int
    reproducibility seed for nodes_sim=='uniform'

backend: str
    "cpu" or "gpu" or "tpu"

Attributes:

beta_: {array-like}
    regression coefficients

coef_: {array-like}
    alias for `beta_`, regression coefficients

Examples:

See also https://github.com/Techtonique/nnetsauce/blob/master/examples/ridgemtask_classification.py

import nnetsauce as ns
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn import metrics
from time import time

breast_cancer = load_breast_cancer()
Z = breast_cancer.data
t = breast_cancer.target
np.random.seed(123)
X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2)

fit_obj = ns.Ridge2MultitaskClassifier(n_hidden_features=int(9.83730469e+01),
                                dropout=4.31054687e-01,
                                n_clusters=int(1.71484375e+00),
                                lambda1=1.24023438e+01, lambda2=7.30263672e+03)

start = time()
fit_obj.fit(X_train, y_train)
print(f"Elapsed {time() - start}")

print(fit_obj.score(X_test, y_test))
print(fit_obj.score(X_test, y_test, scoring="roc_auc"))

start = time()
preds = fit_obj.predict(X_test)
print(f"Elapsed {time() - start}")
print(metrics.classification_report(preds, y_test))
def fit(self, X, y, **kwargs):
164    def fit(self, X, y, **kwargs):
165        """Fit Ridge model to training data (X, y).
166
167        Args:
168
169            X: {array-like}, shape = [n_samples, n_features]
170                Training vectors, where n_samples is the number
171                of samples and n_features is the number of features.
172
173            y: array-like, shape = [n_samples]
174                Target values.
175
176            **kwargs: additional parameters to be passed to
177                    self.cook_training_set or self.obj.fit
178
179        Returns:
180
181            self: object
182
183        """
184
185        sys_platform = platform.system()
186
187        assert mx.is_factor(y), "y must contain only integers"
188
189        self.classes_ = np.unique(y)  # for compatibility with sklearn
190        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
191
192        output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
193
194        n_X, p_X = X.shape
195        n_Z, p_Z = scaled_Z.shape
196
197        self.n_classes = len(np.unique(y))
198
199        # multitask response
200        Y = mo.one_hot_encode2(output_y, self.n_classes)
201
202        if self.n_clusters > 0:
203            if self.encode_clusters == True:
204                n_features = p_X + self.n_clusters
205            else:
206                n_features = p_X + 1
207        else:
208            n_features = p_X
209
210        X_ = scaled_Z[:, 0:n_features]
211        Phi_X_ = scaled_Z[:, n_features:p_Z]
212
213        B = mo.crossprod(x=X_, backend=self.backend) + self.lambda1 * np.diag(
214            np.repeat(1, X_.shape[1])
215        )
216        C = mo.crossprod(x=Phi_X_, y=X_, backend=self.backend)
217        D = mo.crossprod(
218            x=Phi_X_, backend=self.backend
219        ) + self.lambda2 * np.diag(np.repeat(1, Phi_X_.shape[1]))
220
221        if sys_platform in ("Linux", "Darwin"):
222            B_inv = pinv(B) if self.backend == "cpu" else jpinv(B)
223        else:
224            B_inv = pinv(B)
225
226        W = mo.safe_sparse_dot(a=C, b=B_inv, backend=self.backend)
227        S_mat = D - mo.tcrossprod(x=W, y=C, backend=self.backend)
228
229        if sys_platform in ("Linux", "Darwin"):
230            S_inv = pinv(S_mat) if self.backend == "cpu" else jpinv(S_mat)
231        else:
232            S_inv = pinv(S_mat)
233
234        Y2 = mo.safe_sparse_dot(a=S_inv, b=W, backend=self.backend)
235        inv = mo.rbind(
236            mo.cbind(
237                x=B_inv + mo.crossprod(x=W, y=Y2, backend=self.backend),
238                y=-np.transpose(Y2),
239                backend=self.backend,
240            ),
241            mo.cbind(x=-Y2, y=S_inv, backend=self.backend),
242            backend=self.backend,
243        )
244
245        self.beta_ = mo.safe_sparse_dot(
246            a=inv,
247            b=mo.crossprod(x=scaled_Z, y=Y, backend=self.backend),
248            backend=self.backend,
249        )
250        self.coef_ = self.beta_  # sklearn compatibility
251        self.classes_ = np.unique(y)
252        self._is_fitted = True
253        return self

Fit Ridge model to training data (X, y).

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

**kwargs: additional parameters to be passed to
        self.cook_training_set or self.obj.fit

Returns:

self: object
def predict(self, X, **kwargs):
255    def predict(self, X, **kwargs):
256        """Predict test data X.
257
258        Args:
259
260            X: {array-like}, shape = [n_samples, n_features]
261                Training vectors, where n_samples is the number
262                of samples and n_features is the number of features.
263
264            **kwargs: additional parameters to be passed to
265                    self.cook_test_set
266
267        Returns:
268
269            model predictions: {array-like}
270
271        """
272
273        return np.argmax(self.predict_proba(X, **kwargs), axis=1)

Predict test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

model predictions: {array-like}
def predict_proba(self, X, **kwargs):
275    def predict_proba(self, X, **kwargs):
276        """Predict probabilities for test data X.
277
278        Args:
279
280            X: {array-like}, shape = [n_samples, n_features]
281                Training vectors, where n_samples is the number
282                of samples and n_features is the number of features.
283
284            **kwargs: additional parameters to be passed to
285                    self.cook_test_set
286
287        Returns:
288
289            probability estimates for test data: {array-like}
290
291        """
292
293        if len(X.shape) == 1:
294            n_features = X.shape[0]
295            new_X = mo.rbind(
296                x=X.reshape(1, n_features),
297                y=np.ones(n_features).reshape(1, n_features),
298                backend=self.backend,
299            )
300
301            Z = self.cook_test_set(new_X, **kwargs)
302
303        else:
304            Z = self.cook_test_set(X, **kwargs)
305
306        ZB = mo.safe_sparse_dot(a=Z, b=self.beta_, backend=self.backend)
307
308        exp_ZB = np.exp(ZB)
309
310        return exp_ZB / exp_ZB.sum(axis=1)[:, None]

Predict probabilities for test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

probability estimates for test data: {array-like}
def score(self, X, y, scoring=None):
312    def score(self, X, y, scoring=None):
313        """Scoring function for classification.
314
315        Args:
316
317            X: {array-like}, shape = [n_samples, n_features]
318                Training vectors, where n_samples is the number
319                of samples and n_features is the number of features.
320
321            y: array-like, shape = [n_samples]
322                Target values.
323
324            scoring: str
325                scoring method (default is accuracy)
326
327        Returns:
328
329            score: float
330        """
331
332        if scoring is None:
333            scoring = "accuracy"
334
335        if scoring == "accuracy":
336            return skm2.accuracy_score(y, self.predict(X))
337
338        if scoring == "f1":
339            return skm2.f1_score(y, self.predict(X))
340
341        if scoring == "precision":
342            return skm2.precision_score(y, self.predict(X))
343
344        if scoring == "recall":
345            return skm2.recall_score(y, self.predict(X))
346
347        if scoring == "roc_auc":
348            return skm2.roc_auc_score(y, self.predict(X))
349
350        if scoring == "log_loss":
351            return skm2.log_loss(y, self.predict_proba(X))
352
353        if scoring == "balanced_accuracy":
354            return skm2.balanced_accuracy_score(y, self.predict(X))
355
356        if scoring == "average_precision":
357            return skm2.average_precision_score(y, self.predict(X))
358
359        if scoring == "neg_brier_score":
360            return -skm2.brier_score_loss(y, self.predict_proba(X))
361
362        if scoring == "neg_log_loss":
363            return -skm2.log_loss(y, self.predict_proba(X))

Scoring function for classification.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

scoring: str
    scoring method (default is accuracy)

Returns:

score: float
class Ridge2Forecaster:
 15class Ridge2Forecaster:
 16    """Vectorized Ridge2 RVFL for multivariate time series forecasting.
 17
 18    Parameters
 19    ----------
 20    lags : int, optional
 21        Number of lags to use for feature engineering, by default 1
 22    nb_hidden : int, optional
 23        Number of hidden units, by default 5
 24    activ : str, optional
 25        Activation function, by default 'relu'
 26    lambda_1 : float, optional
 27        Ridge regularization parameter for input features, by default 0.1
 28    lambda_2 : float, optional
 29        Ridge regularization parameter for hidden units, by default 0.1
 30    nodes_sim : str, optional
 31        Type of quasi-random sequence for weight initialization, by default 'sobol'
 32    seed : int, optional
 33        Random seed for reproducibility, by default 42
 34    """
 35
 36    def __init__(
 37        self,
 38        lags=1,
 39        nb_hidden=5,
 40        activ="relu",
 41        lambda_1=0.1,
 42        lambda_2=0.1,
 43        nodes_sim="sobol",
 44        seed=42,
 45    ):
 46        self.lags = lags
 47        self.nb_hidden = nb_hidden
 48        self.lambda_1 = lambda_1
 49        self.lambda_2 = lambda_2
 50        self.nodes_sim = nodes_sim
 51        self.seed = seed
 52        self.coef_ = None
 53
 54        # Activation functions
 55        activations = {
 56            "relu": lambda x: jnp.maximum(0, x),
 57            "sigmoid": lambda x: 1 / (1 + jnp.exp(-x)),
 58            "tanh": jnp.tanh,
 59            "linear": lambda x: x,
 60        }
 61        self.activation = jax.jit(activations[activ])
 62
 63    def _create_lags(self, y):
 64        """Create lagged feature matrix (vectorized)."""
 65        n, p = y.shape
 66        X = jnp.concatenate(
 67            [y[self.lags - i - 1: n - i - 1] for i in range(self.lags)], axis=1
 68        )
 69        Y = y[self.lags:]
 70        return X, Y
 71
 72    def _init_weights(self, n_features):
 73        """Initialize hidden layer weights using quasi-random sequences."""
 74        total_dim = n_features * self.nb_hidden
 75
 76        if self.nodes_sim == "sobol":
 77            sampler = qmc.Sobol(d=total_dim, scramble=False, seed=self.seed)
 78            W = sampler.random(1).reshape(n_features, self.nb_hidden)
 79            W = 2 * W - 1
 80        else:
 81            key = jax.random.PRNGKey(self.seed)
 82            W = jax.random.uniform(
 83                key, (n_features, self.nb_hidden), minval=-1, maxval=1
 84            )
 85
 86        return jnp.array(W)
 87
 88    @partial(jax.jit, static_argnums=(0,))
 89    def _compute_hidden(self, X, W):
 90        """Compute hidden layer features (vectorized)."""
 91        return self.activation(X @ W)
 92
 93    @partial(jax.jit, static_argnums=(0,))
 94    def _solve_ridge2(self, X, H, Y):
 95        """Solve ridge regression with dual regularization."""
 96        n, p_x = X.shape
 97        _, p_h = H.shape
 98
 99        Y_mean = jnp.mean(Y, axis=0)
100        Y_c = Y - Y_mean
101
102        X_mean = jnp.mean(X, axis=0)
103        X_std = jnp.std(X, axis=0)
104        X_std = jnp.where(X_std == 0, 1.0, X_std)
105        X_s = (X - X_mean) / X_std
106
107        H_mean = jnp.mean(H, axis=0)
108        H_std = jnp.std(H, axis=0)
109        H_std = jnp.where(H_std == 0, 1.0, H_std)
110        H_s = (H - H_mean) / H_std
111
112        XX = X_s.T @ X_s + self.lambda_1 * jnp.eye(p_x)
113        XH = X_s.T @ H_s
114        HH = H_s.T @ H_s + self.lambda_2 * jnp.eye(p_h)
115
116        XX_inv = jnp.linalg.inv(XX)
117        S = HH - XH.T @ XX_inv @ XH
118        S_inv = jnp.linalg.inv(S)
119
120        XY = X_s.T @ Y_c
121        HY = H_s.T @ Y_c
122
123        beta = XX_inv @ (XY - XH @ S_inv @ (HY - XH.T @ XX_inv @ XY))
124        gamma = S_inv @ (HY - XH.T @ beta)
125        self.coef_ = jnp.concatenate([beta, gamma], axis=1)
126
127        return beta, gamma, Y_mean, X_mean, X_std, H_mean, H_std
128
129    def fit(self, y):
130        """Fit the Ridge2 model.
131
132        Parameters
133        ----------
134        y : array-like of shape (n_samples,)
135            Target values.
136        """
137        y = jnp.array(y)
138        if y.ndim == 1:
139            y = y[:, None]
140
141        X, Y = self._create_lags(y)
142        self.n_series = Y.shape[1]
143
144        self.W = self._init_weights(X.shape[1])
145        H = self._compute_hidden(X, self.W)
146
147        (
148            self.beta,
149            self.gamma,
150            self.Y_mean,
151            self.X_mean,
152            self.X_std,
153            self.H_mean,
154            self.H_std,
155        ) = self._solve_ridge2(X, H, Y)
156
157        # Compute residuals for prediction intervals
158        X_s = (X - self.X_mean) / self.X_std
159        H_s = (H - self.H_mean) / self.H_std
160        fitted = X_s @ self.beta + H_s @ self.gamma + self.Y_mean
161        self.residuals = np.array(Y - fitted)
162
163        self.last_obs = y[-self.lags:]
164        return self
165
166    @partial(jax.jit, static_argnums=(0,))
167    def _predict_step(self, x_new):
168        """Single prediction step (JIT-compiled).
169
170        Parameters
171        ----------
172        x_new : array-like of shape (n_features,)
173            New input data.
174
175        Returns
176        -------
177        y_next : float
178            Next-step prediction.
179        """
180        x_s = (x_new - self.X_mean) / self.X_std
181        h = self.activation(x_s @ self.W)
182        h_s = (h - self.H_mean) / self.H_std
183        return x_s @ self.beta + h_s @ self.gamma + self.Y_mean
184
185    def _forecast(self, h=5):
186        """Generate h-step ahead recursive forecasts.
187
188        Parameters
189        ----------
190        h : int, optional
191            Number of steps to forecast, by default 5
192
193        Returns
194        -------
195        forecasts : array-like of shape (h,)
196            Forecasted values.
197        """
198        forecasts = []
199        current = self.last_obs.copy()
200
201        for _ in range(h):
202            x_new = current.flatten()[None, :]
203            y_next = self._predict_step(x_new)[0]
204            forecasts.append(y_next)
205            current = jnp.vstack([current[1:], y_next])
206
207        return jnp.array(forecasts)
208
209    def predict(self, h=5, level=None, method="gaussian", B=100):
210        """Generate prediction intervals with proper uncertainty propagation.
211
212        Parameters
213        ----------
214        h : int, optional
215            Number of steps to forecast, by default 5
216        level : float, optional
217            Confidence level for prediction intervals, by default None
218        method : str, optional
219            Method for prediction intervals ('gaussian' or 'bootstrap'), by default 'gaussian'
220        B : int, optional
221            Number of bootstrap samples, by default 100
222
223        Returns
224        -------
225        point_forecast : array-like of shape (h,)
226            Point forecasted values.
227        lower : array-like of shape (h,)
228            Lower bounds of prediction intervals.
229        upper : array-like of shape (h,)
230            Upper bounds of prediction intervals.
231        """
232
233        point_forecast = self._forecast(h)
234
235        if level is None:
236            return point_forecast
237
238        # probabilistic prediction intervals
239        if method == "gaussian":
240            # Use residual std with horizon-dependent scaling
241            residual_std = np.std(self.residuals, axis=0)
242            z = norm.ppf(1 - (1 - level / 100) / 2)
243
244            # Scale uncertainty by sqrt(h) for each horizon
245            horizon_scale = np.sqrt(np.arange(1, h + 1))[:, None]
246            std_expanded = residual_std * horizon_scale
247
248            lower = point_forecast - z * std_expanded
249            upper = point_forecast + z * std_expanded
250
251        elif method == "bootstrap":
252            # Proper residual bootstrap
253            key = jax.random.PRNGKey(self.seed)
254            n_residuals = len(self.residuals)
255            sims = []
256
257            for _ in range(B):
258                key, subkey = jax.random.split(key)
259                boot_indices = np.random.choice(
260                    n_residuals, size=h, replace=True
261                )
262                boot_resids = self.residuals[boot_indices]
263
264                current = self.last_obs.copy()
265                path = []
266
267                for t in range(h):
268                    x_new = current.flatten()[None, :]
269                    y_pred = self._predict_step(x_new)[0]
270                    y_sim = y_pred + boot_resids[t]
271                    path.append(y_sim)
272                    current = jnp.vstack([current[1:], y_sim])
273
274                sims.append(jnp.array(path))
275
276            sims = jnp.array(sims)
277            lower = jnp.percentile(sims, (100 - level) / 2, axis=0)
278            upper = jnp.percentile(sims, 100 - (100 - level) / 2, axis=0)
279
280        return {
281            "mean": np.array(point_forecast),
282            "lower": np.array(lower),
283            "upper": np.array(upper),
284        }

Vectorized Ridge2 RVFL for multivariate time series forecasting.

Parameters

lags : int, optional Number of lags to use for feature engineering, by default 1 nb_hidden : int, optional Number of hidden units, by default 5 activ : str, optional Activation function, by default 'relu' lambda_1 : float, optional Ridge regularization parameter for input features, by default 0.1 lambda_2 : float, optional Ridge regularization parameter for hidden units, by default 0.1 nodes_sim : str, optional Type of quasi-random sequence for weight initialization, by default 'sobol' seed : int, optional Random seed for reproducibility, by default 42

def fit(self, y):
129    def fit(self, y):
130        """Fit the Ridge2 model.
131
132        Parameters
133        ----------
134        y : array-like of shape (n_samples,)
135            Target values.
136        """
137        y = jnp.array(y)
138        if y.ndim == 1:
139            y = y[:, None]
140
141        X, Y = self._create_lags(y)
142        self.n_series = Y.shape[1]
143
144        self.W = self._init_weights(X.shape[1])
145        H = self._compute_hidden(X, self.W)
146
147        (
148            self.beta,
149            self.gamma,
150            self.Y_mean,
151            self.X_mean,
152            self.X_std,
153            self.H_mean,
154            self.H_std,
155        ) = self._solve_ridge2(X, H, Y)
156
157        # Compute residuals for prediction intervals
158        X_s = (X - self.X_mean) / self.X_std
159        H_s = (H - self.H_mean) / self.H_std
160        fitted = X_s @ self.beta + H_s @ self.gamma + self.Y_mean
161        self.residuals = np.array(Y - fitted)
162
163        self.last_obs = y[-self.lags:]
164        return self

Fit the Ridge2 model.

Parameters

y : array-like of shape (n_samples,) Target values.

def predict(self, h=5, level=None, method='gaussian', B=100):
209    def predict(self, h=5, level=None, method="gaussian", B=100):
210        """Generate prediction intervals with proper uncertainty propagation.
211
212        Parameters
213        ----------
214        h : int, optional
215            Number of steps to forecast, by default 5
216        level : float, optional
217            Confidence level for prediction intervals, by default None
218        method : str, optional
219            Method for prediction intervals ('gaussian' or 'bootstrap'), by default 'gaussian'
220        B : int, optional
221            Number of bootstrap samples, by default 100
222
223        Returns
224        -------
225        point_forecast : array-like of shape (h,)
226            Point forecasted values.
227        lower : array-like of shape (h,)
228            Lower bounds of prediction intervals.
229        upper : array-like of shape (h,)
230            Upper bounds of prediction intervals.
231        """
232
233        point_forecast = self._forecast(h)
234
235        if level is None:
236            return point_forecast
237
238        # probabilistic prediction intervals
239        if method == "gaussian":
240            # Use residual std with horizon-dependent scaling
241            residual_std = np.std(self.residuals, axis=0)
242            z = norm.ppf(1 - (1 - level / 100) / 2)
243
244            # Scale uncertainty by sqrt(h) for each horizon
245            horizon_scale = np.sqrt(np.arange(1, h + 1))[:, None]
246            std_expanded = residual_std * horizon_scale
247
248            lower = point_forecast - z * std_expanded
249            upper = point_forecast + z * std_expanded
250
251        elif method == "bootstrap":
252            # Proper residual bootstrap
253            key = jax.random.PRNGKey(self.seed)
254            n_residuals = len(self.residuals)
255            sims = []
256
257            for _ in range(B):
258                key, subkey = jax.random.split(key)
259                boot_indices = np.random.choice(
260                    n_residuals, size=h, replace=True
261                )
262                boot_resids = self.residuals[boot_indices]
263
264                current = self.last_obs.copy()
265                path = []
266
267                for t in range(h):
268                    x_new = current.flatten()[None, :]
269                    y_pred = self._predict_step(x_new)[0]
270                    y_sim = y_pred + boot_resids[t]
271                    path.append(y_sim)
272                    current = jnp.vstack([current[1:], y_sim])
273
274                sims.append(jnp.array(path))
275
276            sims = jnp.array(sims)
277            lower = jnp.percentile(sims, (100 - level) / 2, axis=0)
278            upper = jnp.percentile(sims, 100 - (100 - level) / 2, axis=0)
279
280        return {
281            "mean": np.array(point_forecast),
282            "lower": np.array(lower),
283            "upper": np.array(upper),
284        }

Generate prediction intervals with proper uncertainty propagation.

Parameters

h : int, optional Number of steps to forecast, by default 5 level : float, optional Confidence level for prediction intervals, by default None method : str, optional Method for prediction intervals ('gaussian' or 'bootstrap'), by default 'gaussian' B : int, optional Number of bootstrap samples, by default 100

Returns

point_forecast : array-like of shape (h,) Point forecasted values. lower : array-like of shape (h,) Lower bounds of prediction intervals. upper : array-like of shape (h,) Upper bounds of prediction intervals.

class SubSampler:
 6class SubSampler:
 7    """Subsampling class.
 8
 9    Attributes:
10
11       y: array-like, shape = [n_samples]
12           Target values.
13
14       row_sample: double
15           subsampling fraction
16
17       n_samples: int
18            subsampling by using the number of rows (supersedes row_sample)
19
20       seed: int
21           reproductibility seed
22
23       n_jobs: int
24            number of jobs to run in parallel
25
26       verbose: bool
27            print progress messages and bars
28    """
29
30    def __init__(
31        self,
32        y,
33        row_sample=0.8,
34        n_samples=None,
35        seed=123,
36        n_jobs=None,
37        verbose=False,
38    ):
39        self.y = y
40        self.n_samples = n_samples
41        if self.n_samples is None:
42            assert (
43                row_sample < 1 and row_sample >= 0
44            ), "'row_sample' must be provided, plus < 1 and >= 0"
45            self.row_sample = row_sample
46        else:
47            assert self.n_samples < len(y), "'n_samples' must be < len(y)"
48            self.row_sample = self.n_samples / len(y)
49        self.seed = seed
50        self.indices = None
51        self.n_jobs = n_jobs
52        self.verbose = verbose
53
54    def subsample(self):
55        """Returns indices of subsampled input data.
56
57        Examples:
58
59        <ul>
60            <li> <a href="https://github.com/Techtonique/nnetsauce/blob/master/nnetsauce/demo/thierrymoudiki_20240105_subsampling.ipynb">20240105_subsampling.ipynb</a> </li>
61            <li> <a href="https://github.com/Techtonique/nnetsauce/blob/master/nnetsauce/demo/thierrymoudiki_20240131_subsampling_nsamples.ipynb">20240131_subsampling_nsamples.ipynb</a> </li>
62        </ul>
63
64        """
65        self.indices = dosubsample(
66            y=self.y,
67            row_sample=self.row_sample,
68            seed=self.seed,
69            n_jobs=self.n_jobs,
70            verbose=self.verbose,
71        )
72        return self.indices

Subsampling class.

Attributes:

y: array-like, shape = [n_samples] Target values.

row_sample: double subsampling fraction

n_samples: int subsampling by using the number of rows (supersedes row_sample)

seed: int reproductibility seed

n_jobs: int number of jobs to run in parallel

verbose: bool print progress messages and bars

def subsample(self):
54    def subsample(self):
55        """Returns indices of subsampled input data.
56
57        Examples:
58
59        <ul>
60            <li> <a href="https://github.com/Techtonique/nnetsauce/blob/master/nnetsauce/demo/thierrymoudiki_20240105_subsampling.ipynb">20240105_subsampling.ipynb</a> </li>
61            <li> <a href="https://github.com/Techtonique/nnetsauce/blob/master/nnetsauce/demo/thierrymoudiki_20240131_subsampling_nsamples.ipynb">20240131_subsampling_nsamples.ipynb</a> </li>
62        </ul>
63
64        """
65        self.indices = dosubsample(
66            y=self.y,
67            row_sample=self.row_sample,
68            seed=self.seed,
69            n_jobs=self.n_jobs,
70            verbose=self.verbose,
71        )
72        return self.indices

Returns indices of subsampled input data.

Examples: