nnetsauce

 1from .base.base import Base
 2from .base.baseRegressor import BaseRegressor
 3from .boosting.adaBoostClassifier import AdaBoostClassifier
 4from .custom.customClassifier import CustomClassifier
 5from .custom.customRegressor import CustomRegressor
 6from .custom.customBackpropRegressor import CustomBackPropRegressor
 7from .datasets import Downloader
 8from .deep.deepClassifier import DeepClassifier
 9from .deep.deepRegressor import DeepRegressor
10from .deep.deepMTS import DeepMTS
11from .glm.glmClassifier import GLMClassifier
12from .glm.glmRegressor import GLMRegressor
13from .kernel.kernel import KernelRidge
14from .lazypredict.lazydeepClassifier import LazyDeepClassifier, LazyClassifier
15from .lazypredict.lazydeepRegressor import LazyDeepRegressor, LazyRegressor
16from .lazypredict.lazydeepClassifier import LazyDeepClassifier
17from .lazypredict.lazydeepRegressor import LazyDeepRegressor
18from .lazypredict.lazydeepMTS import LazyDeepMTS, LazyMTS
19from .mts.mts import MTS
20from .mts.mlarch import MLARCH
21from .mts.classical import ClassicalMTS
22from .multitask.multitaskClassifier import MultitaskClassifier
23from .multitask.simplemultitaskClassifier import SimpleMultitaskClassifier
24from .neuralnet.neuralnetregression import NeuralNetRegressor
25from .neuralnet.neuralnetclassification import NeuralNetClassifier
26from .optimizers.optimizer import Optimizer
27from .predictioninterval import PredictionInterval
28from .quantile.quantileregression import QuantileRegressor
29from .quantile.quantileclassification import QuantileClassifier
30from .randombag.randomBagClassifier import RandomBagClassifier
31from .randombag.randomBagRegressor import RandomBagRegressor
32from .ridge.ridge import RidgeRegressor
33from .ridge2.ridge2Classifier import Ridge2Classifier
34from .ridge2.ridge2Regressor import Ridge2Regressor
35from .ridge2.ridge2MultitaskClassifier import Ridge2MultitaskClassifier
36from .rvfl.bayesianrvflRegressor import BayesianRVFLRegressor
37from .rvfl.bayesianrvfl2Regressor import BayesianRVFL2Regressor
38from .sampling import SubSampler
39from .updater import RegressorUpdater, ClassifierUpdater
40from .votingregressor import MedianVotingRegressor
41
42__all__ = [
43    "AdaBoostClassifier",
44    "Base",
45    "BaseRegressor",
46    "BayesianRVFLRegressor",
47    "BayesianRVFL2Regressor",
48    "ClassicalMTS",
49    "CustomClassifier",
50    "CustomRegressor",
51    "CustomBackPropRegressor",
52    "DeepClassifier",
53    "DeepRegressor",
54    "DeepMTS",
55    "Downloader",
56    "GLMClassifier",
57    "GLMRegressor",
58    "KernelRidge",
59    "LazyClassifier",
60    "LazyRegressor",
61    "LazyDeepClassifier",
62    "LazyDeepRegressor",
63    "LazyMTS",
64    "LazyDeepMTS",
65    "MLARCH",
66    "MedianVotingRegressor",
67    "MTS",
68    "MultitaskClassifier",
69    "NeuralNetRegressor",
70    "NeuralNetClassifier",
71    "PredictionInterval",
72    "SimpleMultitaskClassifier",
73    "Optimizer",
74    "QuantileRegressor",
75    "QuantileClassifier",
76    "RandomBagRegressor",
77    "RandomBagClassifier",
78    "RegressorUpdater",
79    "ClassifierUpdater",
80    "RidgeRegressor",
81    "Ridge2Regressor",
82    "Ridge2Classifier",
83    "Ridge2MultitaskClassifier",
84    "SubSampler",
85]
class AdaBoostClassifier(nnetsauce.boosting.bst.Boosting, sklearn.base.ClassifierMixin):
 21class AdaBoostClassifier(Boosting, ClassifierMixin):
 22    """AdaBoost Classification (SAMME) model class derived from class Boosting
 23
 24    Parameters:
 25
 26        obj: object
 27            any object containing a method fit (obj.fit()) and a method predict
 28            (obj.predict())
 29
 30        n_estimators: int
 31            number of boosting iterations
 32
 33        learning_rate: float
 34            learning rate of the boosting procedure
 35
 36        n_hidden_features: int
 37            number of nodes in the hidden layer
 38
 39        reg_lambda: float
 40            regularization parameter for weights
 41
 42        reg_alpha: float
 43            controls compromize between l1 and l2 norm of weights
 44
 45        activation_name: str
 46            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
 47
 48        a: float
 49            hyperparameter for 'prelu' or 'elu' activation function
 50
 51        nodes_sim: str
 52            type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
 53            'uniform'
 54
 55        bias: boolean
 56            indicates if the hidden layer contains a bias term (True) or not
 57            (False)
 58
 59        dropout: float
 60            regularization parameter; (random) percentage of nodes dropped out
 61            of the training
 62
 63        direct_link: boolean
 64            indicates if the original predictors are included (True) in model's
 65            fitting or not (False)
 66
 67        n_clusters: int
 68            number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
 69                no clustering)
 70
 71        cluster_encode: bool
 72            defines how the variable containing clusters is treated (default is one-hot)
 73            if `False`, then labels are used, without one-hot encoding
 74
 75        type_clust: str
 76            type of clustering method: currently k-means ('kmeans') or Gaussian
 77            Mixture Model ('gmm')
 78
 79        type_scaling: a tuple of 3 strings
 80            scaling methods for inputs, hidden layer, and clustering respectively
 81            (and when relevant).
 82            Currently available: standardization ('std') or MinMax scaling ('minmax')
 83
 84        col_sample: float
 85            percentage of covariates randomly chosen for training
 86
 87        row_sample: float
 88            percentage of rows chosen for training, by stratified bootstrapping
 89
 90        seed: int
 91            reproducibility seed for nodes_sim=='uniform'
 92
 93        verbose: int
 94            0 for no output, 1 for a progress bar (default is 1)
 95
 96        method: str
 97            type of Adaboost method, 'SAMME' (discrete) or 'SAMME.R' (real)
 98
 99        backend: str
100            "cpu" or "gpu" or "tpu"
101
102    Attributes:
103
104        alpha_: list
105            AdaBoost coefficients alpha_m
106
107        base_learners_: dict
108            a dictionary containing the base learners
109
110    Examples:
111
112    See also [https://github.com/Techtonique/nnetsauce/blob/master/examples/adaboost_classification.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/adaboost_classification.py)
113
114    ```python
115    import nnetsauce as ns
116    import numpy as np
117    from sklearn.datasets import load_breast_cancer
118    from sklearn.linear_model import LogisticRegression
119    from sklearn.model_selection import train_test_split
120    from sklearn import metrics
121    from time import time
122
123    breast_cancer = load_breast_cancer()
124    Z = breast_cancer.data
125    t = breast_cancer.target
126    np.random.seed(123)
127    X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2)
128
129    # SAMME.R
130    clf = LogisticRegression(solver='liblinear', multi_class = 'ovr',
131                            random_state=123)
132    fit_obj = ns.AdaBoostClassifier(clf,
133                                    n_hidden_features=int(11.22338867),
134                                    direct_link=True,
135                                    n_estimators=250, learning_rate=0.01126343,
136                                    col_sample=0.72684326, row_sample=0.86429443,
137                                    dropout=0.63078613, n_clusters=2,
138                                    type_clust="gmm",
139                                    verbose=1, seed = 123,
140                                    method="SAMME.R")
141
142    start = time()
143    fit_obj.fit(X_train, y_train)
144    print(f"Elapsed {time() - start}")
145
146    start = time()
147    print(fit_obj.score(X_test, y_test))
148    print(f"Elapsed {time() - start}")
149
150    preds = fit_obj.predict(X_test)
151
152    print(metrics.classification_report(preds, y_test))
153
154    ```
155
156    """
157
158    # construct the object -----
159    _estimator_type = "classifier"
160
161    def __init__(
162        self,
163        obj,
164        n_estimators=10,
165        learning_rate=0.1,
166        n_hidden_features=1,
167        reg_lambda=0,
168        reg_alpha=0.5,
169        activation_name="relu",
170        a=0.01,
171        nodes_sim="sobol",
172        bias=True,
173        dropout=0,
174        direct_link=False,
175        n_clusters=2,
176        cluster_encode=True,
177        type_clust="kmeans",
178        type_scaling=("std", "std", "std"),
179        col_sample=1,
180        row_sample=1,
181        seed=123,
182        verbose=1,
183        method="SAMME",
184        backend="cpu",
185    ):
186        self.type_fit = "classification"
187        self.verbose = verbose
188        self.method = method
189        self.reg_lambda = reg_lambda
190        self.reg_alpha = reg_alpha
191
192        super().__init__(
193            obj=obj,
194            n_estimators=n_estimators,
195            learning_rate=learning_rate,
196            n_hidden_features=n_hidden_features,
197            activation_name=activation_name,
198            a=a,
199            nodes_sim=nodes_sim,
200            bias=bias,
201            dropout=dropout,
202            direct_link=direct_link,
203            n_clusters=n_clusters,
204            cluster_encode=cluster_encode,
205            type_clust=type_clust,
206            type_scaling=type_scaling,
207            col_sample=col_sample,
208            row_sample=row_sample,
209            seed=seed,
210            backend=backend,
211        )
212
213        self.alpha_ = []
214        self.base_learners_ = dict.fromkeys(range(n_estimators))
215
216    def fit(self, X, y, sample_weight=None, **kwargs):
217        """Fit Boosting model to training data (X, y).
218
219        Parameters:
220
221            X: {array-like}, shape = [n_samples, n_features]
222                Training vectors, where n_samples is the number
223                of samples and n_features is the number of features.
224
225            y: array-like, shape = [n_samples]
226                Target values.
227
228            **kwargs: additional parameters to be passed to
229                    self.cook_training_set or self.obj.fit
230
231        Returns:
232
233             self: object
234        """
235
236        assert mx.is_factor(y), "y must contain only integers"
237
238        assert self.method in (
239            "SAMME",
240            "SAMME.R",
241        ), "`method` must be either 'SAMME' or 'SAMME.R'"
242
243        assert (self.reg_lambda <= 1) & (
244            self.reg_lambda >= 0
245        ), "must have self.reg_lambda <= 1 &  self.reg_lambda >= 0"
246
247        assert (self.reg_alpha <= 1) & (
248            self.reg_alpha >= 0
249        ), "must have self.reg_alpha <= 1 &  self.reg_alpha >= 0"
250
251        # training
252        n, p = X.shape
253        self.n_classes = len(np.unique(y))
254        self.classes_ = np.unique(y)  # for compatibility with sklearn
255        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
256
257        if sample_weight is None:
258            w_m = np.repeat(1.0 / n, n)
259        else:
260            w_m = np.asarray(sample_weight)
261
262        base_learner = CustomClassifier(
263            self.obj,
264            n_hidden_features=self.n_hidden_features,
265            activation_name=self.activation_name,
266            a=self.a,
267            nodes_sim=self.nodes_sim,
268            bias=self.bias,
269            dropout=self.dropout,
270            direct_link=self.direct_link,
271            n_clusters=self.n_clusters,
272            type_clust=self.type_clust,
273            type_scaling=self.type_scaling,
274            col_sample=self.col_sample,
275            row_sample=self.row_sample,
276            seed=self.seed,
277        )
278
279        if self.verbose == 1:
280            pbar = Progbar(self.n_estimators)
281
282        if self.method == "SAMME":
283            err_m = 1e6
284            err_bound = 1 - 1 / self.n_classes
285            self.alpha_.append(1.0)
286            x_range_n = range(n)
287
288            for m in range(self.n_estimators):
289                preds = base_learner.fit(
290                    X, y, sample_weight=w_m.ravel(), **kwargs
291                ).predict(X)
292
293                self.base_learners_.update({m: deepcopy(base_learner)})
294
295                cond = [y[i] != preds[i] for i in x_range_n]
296
297                err_m = max(
298                    sum([elt[0] * elt[1] for elt in zip(cond, w_m)]),
299                    2.220446049250313e-16,
300                )  # sum(w_m) == 1
301
302                if self.reg_lambda > 0:
303                    err_m += self.reg_lambda * (
304                        (1 - self.reg_alpha) * 0.5 * sum([x**2 for x in w_m])
305                        + self.reg_alpha * sum([abs(x) for x in w_m])
306                    )
307
308                err_m = min(err_m, err_bound)
309
310                alpha_m = self.learning_rate * log(
311                    (self.n_classes - 1) * (1 - err_m) / err_m
312                )
313
314                self.alpha_.append(alpha_m)
315
316                w_m_temp = [exp(alpha_m * cond[i]) for i in x_range_n]
317
318                sum_w_m = sum(w_m_temp)
319
320                w_m = np.asarray([w_m_temp[i] / sum_w_m for i in x_range_n])
321
322                base_learner.set_params(seed=self.seed + (m + 1) * 1000)
323
324                if self.verbose == 1:
325                    pbar.update(m)
326
327            if self.verbose == 1:
328                pbar.update(self.n_estimators)
329
330            self.n_estimators = len(self.base_learners_)
331            self.classes_ = np.unique(y)
332
333            return self
334
335        if self.method == "SAMME.R":
336            Y = mo.one_hot_encode2(y, self.n_classes)
337
338            if sample_weight is None:
339                w_m = np.repeat(1.0 / n, n)  # (N, 1)
340
341            else:
342                w_m = np.asarray(sample_weight)
343
344            for m in range(self.n_estimators):
345                probs = base_learner.fit(
346                    X, y, sample_weight=w_m.ravel(), **kwargs
347                ).predict_proba(X)
348
349                np.clip(
350                    a=probs, a_min=2.220446049250313e-16, a_max=1.0, out=probs
351                )
352
353                self.base_learners_.update({m: deepcopy(base_learner)})
354
355                w_m *= np.exp(
356                    -1.0
357                    * self.learning_rate
358                    * (1.0 - 1.0 / self.n_classes)
359                    * xlogy(Y, probs).sum(axis=1)
360                )
361
362                w_m /= np.sum(w_m)
363
364                base_learner.set_params(seed=self.seed + (m + 1) * 1000)
365
366                if self.verbose == 1:
367                    pbar.update(m)
368
369            if self.verbose == 1:
370                pbar.update(self.n_estimators)
371
372            self.n_estimators = len(self.base_learners_)
373            self.classes_ = np.unique(y)
374
375            return self
376
377    def predict(self, X, **kwargs):
378        """Predict test data X.
379
380        Parameters:
381
382            X: {array-like}, shape = [n_samples, n_features]
383                Training vectors, where n_samples is the number
384                of samples and n_features is the number of features.
385
386            **kwargs: additional parameters to be passed to
387                  self.cook_test_set
388
389        Returns:
390
391            model predictions: {array-like}
392        """
393        return self.predict_proba(X, **kwargs).argmax(axis=1)
394
395    def predict_proba(self, X, **kwargs):
396        """Predict probabilities for test data X.
397
398        Parameters:
399
400            X: {array-like}, shape = [n_samples, n_features]
401                Training vectors, where n_samples is the number
402                of samples and n_features is the number of features.
403
404            **kwargs: additional parameters to be passed to
405                  self.cook_test_set
406
407        Returns:
408
409            probability estimates for test data: {array-like}
410
411        """
412
413        n_iter = len(self.base_learners_)
414
415        if self.method == "SAMME":
416            ensemble_learner = np.zeros((X.shape[0], self.n_classes))
417
418            # if self.verbose == 1:
419            #    pbar = Progbar(n_iter)
420
421            for idx, base_learner in self.base_learners_.items():
422                preds = base_learner.predict(X, **kwargs)
423
424                ensemble_learner += self.alpha_[idx] * mo.one_hot_encode2(
425                    preds, self.n_classes
426                )
427
428                # if self.verbose == 1:
429                #    pbar.update(idx)
430
431            # if self.verbose == 1:
432            #    pbar.update(n_iter)
433
434            expit_ensemble_learner = expit(ensemble_learner)
435
436            sum_ensemble = expit_ensemble_learner.sum(axis=1)
437
438            return expit_ensemble_learner / sum_ensemble[:, None]
439
440        # if self.method == "SAMME.R":
441        ensemble_learner = 0
442
443        # if self.verbose == 1:
444        #    pbar = Progbar(n_iter)
445
446        for idx, base_learner in self.base_learners_.items():
447            probs = base_learner.predict_proba(X, **kwargs)
448
449            np.clip(a=probs, a_min=2.220446049250313e-16, a_max=1.0, out=probs)
450
451            log_preds_proba = np.log(probs)
452
453            ensemble_learner += (
454                log_preds_proba - log_preds_proba.mean(axis=1)[:, None]
455            )
456
457            # if self.verbose == 1:
458            #    pbar.update(idx)
459
460        ensemble_learner *= self.n_classes - 1
461
462        # if self.verbose == 1:
463        #    pbar.update(n_iter)
464
465        expit_ensemble_learner = expit(ensemble_learner)
466
467        sum_ensemble = expit_ensemble_learner.sum(axis=1)
468
469        return expit_ensemble_learner / sum_ensemble[:, None]
470
471    @property
472    def _estimator_type(self):
473        return "classifier"

AdaBoost Classification (SAMME) model class derived from class Boosting

Parameters:

obj: object
    any object containing a method fit (obj.fit()) and a method predict
    (obj.predict())

n_estimators: int
    number of boosting iterations

learning_rate: float
    learning rate of the boosting procedure

n_hidden_features: int
    number of nodes in the hidden layer

reg_lambda: float
    regularization parameter for weights

reg_alpha: float
    controls compromize between l1 and l2 norm of weights

activation_name: str
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'

a: float
    hyperparameter for 'prelu' or 'elu' activation function

nodes_sim: str
    type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
    'uniform'

bias: boolean
    indicates if the hidden layer contains a bias term (True) or not
    (False)

dropout: float
    regularization parameter; (random) percentage of nodes dropped out
    of the training

direct_link: boolean
    indicates if the original predictors are included (True) in model's
    fitting or not (False)

n_clusters: int
    number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
        no clustering)

cluster_encode: bool
    defines how the variable containing clusters is treated (default is one-hot)
    if `False`, then labels are used, without one-hot encoding

type_clust: str
    type of clustering method: currently k-means ('kmeans') or Gaussian
    Mixture Model ('gmm')

type_scaling: a tuple of 3 strings
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax')

col_sample: float
    percentage of covariates randomly chosen for training

row_sample: float
    percentage of rows chosen for training, by stratified bootstrapping

seed: int
    reproducibility seed for nodes_sim=='uniform'

verbose: int
    0 for no output, 1 for a progress bar (default is 1)

method: str
    type of Adaboost method, 'SAMME' (discrete) or 'SAMME.R' (real)

backend: str
    "cpu" or "gpu" or "tpu"

Attributes:

alpha_: list
    AdaBoost coefficients alpha_m

base_learners_: dict
    a dictionary containing the base learners

Examples:

See also https://github.com/Techtonique/nnetsauce/blob/master/examples/adaboost_classification.py

import nnetsauce as ns
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics
from time import time

breast_cancer = load_breast_cancer()
Z = breast_cancer.data
t = breast_cancer.target
np.random.seed(123)
X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2)

# SAMME.R
clf = LogisticRegression(solver='liblinear', multi_class = 'ovr',
                        random_state=123)
fit_obj = ns.AdaBoostClassifier(clf,
                                n_hidden_features=int(11.22338867),
                                direct_link=True,
                                n_estimators=250, learning_rate=0.01126343,
                                col_sample=0.72684326, row_sample=0.86429443,
                                dropout=0.63078613, n_clusters=2,
                                type_clust="gmm",
                                verbose=1, seed = 123,
                                method="SAMME.R")

start = time()
fit_obj.fit(X_train, y_train)
print(f"Elapsed {time() - start}")

start = time()
print(fit_obj.score(X_test, y_test))
print(f"Elapsed {time() - start}")

preds = fit_obj.predict(X_test)

print(metrics.classification_report(preds, y_test))
def fit(self, X, y, sample_weight=None, **kwargs):
216    def fit(self, X, y, sample_weight=None, **kwargs):
217        """Fit Boosting model to training data (X, y).
218
219        Parameters:
220
221            X: {array-like}, shape = [n_samples, n_features]
222                Training vectors, where n_samples is the number
223                of samples and n_features is the number of features.
224
225            y: array-like, shape = [n_samples]
226                Target values.
227
228            **kwargs: additional parameters to be passed to
229                    self.cook_training_set or self.obj.fit
230
231        Returns:
232
233             self: object
234        """
235
236        assert mx.is_factor(y), "y must contain only integers"
237
238        assert self.method in (
239            "SAMME",
240            "SAMME.R",
241        ), "`method` must be either 'SAMME' or 'SAMME.R'"
242
243        assert (self.reg_lambda <= 1) & (
244            self.reg_lambda >= 0
245        ), "must have self.reg_lambda <= 1 &  self.reg_lambda >= 0"
246
247        assert (self.reg_alpha <= 1) & (
248            self.reg_alpha >= 0
249        ), "must have self.reg_alpha <= 1 &  self.reg_alpha >= 0"
250
251        # training
252        n, p = X.shape
253        self.n_classes = len(np.unique(y))
254        self.classes_ = np.unique(y)  # for compatibility with sklearn
255        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
256
257        if sample_weight is None:
258            w_m = np.repeat(1.0 / n, n)
259        else:
260            w_m = np.asarray(sample_weight)
261
262        base_learner = CustomClassifier(
263            self.obj,
264            n_hidden_features=self.n_hidden_features,
265            activation_name=self.activation_name,
266            a=self.a,
267            nodes_sim=self.nodes_sim,
268            bias=self.bias,
269            dropout=self.dropout,
270            direct_link=self.direct_link,
271            n_clusters=self.n_clusters,
272            type_clust=self.type_clust,
273            type_scaling=self.type_scaling,
274            col_sample=self.col_sample,
275            row_sample=self.row_sample,
276            seed=self.seed,
277        )
278
279        if self.verbose == 1:
280            pbar = Progbar(self.n_estimators)
281
282        if self.method == "SAMME":
283            err_m = 1e6
284            err_bound = 1 - 1 / self.n_classes
285            self.alpha_.append(1.0)
286            x_range_n = range(n)
287
288            for m in range(self.n_estimators):
289                preds = base_learner.fit(
290                    X, y, sample_weight=w_m.ravel(), **kwargs
291                ).predict(X)
292
293                self.base_learners_.update({m: deepcopy(base_learner)})
294
295                cond = [y[i] != preds[i] for i in x_range_n]
296
297                err_m = max(
298                    sum([elt[0] * elt[1] for elt in zip(cond, w_m)]),
299                    2.220446049250313e-16,
300                )  # sum(w_m) == 1
301
302                if self.reg_lambda > 0:
303                    err_m += self.reg_lambda * (
304                        (1 - self.reg_alpha) * 0.5 * sum([x**2 for x in w_m])
305                        + self.reg_alpha * sum([abs(x) for x in w_m])
306                    )
307
308                err_m = min(err_m, err_bound)
309
310                alpha_m = self.learning_rate * log(
311                    (self.n_classes - 1) * (1 - err_m) / err_m
312                )
313
314                self.alpha_.append(alpha_m)
315
316                w_m_temp = [exp(alpha_m * cond[i]) for i in x_range_n]
317
318                sum_w_m = sum(w_m_temp)
319
320                w_m = np.asarray([w_m_temp[i] / sum_w_m for i in x_range_n])
321
322                base_learner.set_params(seed=self.seed + (m + 1) * 1000)
323
324                if self.verbose == 1:
325                    pbar.update(m)
326
327            if self.verbose == 1:
328                pbar.update(self.n_estimators)
329
330            self.n_estimators = len(self.base_learners_)
331            self.classes_ = np.unique(y)
332
333            return self
334
335        if self.method == "SAMME.R":
336            Y = mo.one_hot_encode2(y, self.n_classes)
337
338            if sample_weight is None:
339                w_m = np.repeat(1.0 / n, n)  # (N, 1)
340
341            else:
342                w_m = np.asarray(sample_weight)
343
344            for m in range(self.n_estimators):
345                probs = base_learner.fit(
346                    X, y, sample_weight=w_m.ravel(), **kwargs
347                ).predict_proba(X)
348
349                np.clip(
350                    a=probs, a_min=2.220446049250313e-16, a_max=1.0, out=probs
351                )
352
353                self.base_learners_.update({m: deepcopy(base_learner)})
354
355                w_m *= np.exp(
356                    -1.0
357                    * self.learning_rate
358                    * (1.0 - 1.0 / self.n_classes)
359                    * xlogy(Y, probs).sum(axis=1)
360                )
361
362                w_m /= np.sum(w_m)
363
364                base_learner.set_params(seed=self.seed + (m + 1) * 1000)
365
366                if self.verbose == 1:
367                    pbar.update(m)
368
369            if self.verbose == 1:
370                pbar.update(self.n_estimators)
371
372            self.n_estimators = len(self.base_learners_)
373            self.classes_ = np.unique(y)
374
375            return self

Fit Boosting model to training data (X, y).

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

**kwargs: additional parameters to be passed to
        self.cook_training_set or self.obj.fit

Returns:

 self: object
def predict(self, X, **kwargs):
377    def predict(self, X, **kwargs):
378        """Predict test data X.
379
380        Parameters:
381
382            X: {array-like}, shape = [n_samples, n_features]
383                Training vectors, where n_samples is the number
384                of samples and n_features is the number of features.
385
386            **kwargs: additional parameters to be passed to
387                  self.cook_test_set
388
389        Returns:
390
391            model predictions: {array-like}
392        """
393        return self.predict_proba(X, **kwargs).argmax(axis=1)

Predict test data X.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
      self.cook_test_set

Returns:

model predictions: {array-like}
def predict_proba(self, X, **kwargs):
395    def predict_proba(self, X, **kwargs):
396        """Predict probabilities for test data X.
397
398        Parameters:
399
400            X: {array-like}, shape = [n_samples, n_features]
401                Training vectors, where n_samples is the number
402                of samples and n_features is the number of features.
403
404            **kwargs: additional parameters to be passed to
405                  self.cook_test_set
406
407        Returns:
408
409            probability estimates for test data: {array-like}
410
411        """
412
413        n_iter = len(self.base_learners_)
414
415        if self.method == "SAMME":
416            ensemble_learner = np.zeros((X.shape[0], self.n_classes))
417
418            # if self.verbose == 1:
419            #    pbar = Progbar(n_iter)
420
421            for idx, base_learner in self.base_learners_.items():
422                preds = base_learner.predict(X, **kwargs)
423
424                ensemble_learner += self.alpha_[idx] * mo.one_hot_encode2(
425                    preds, self.n_classes
426                )
427
428                # if self.verbose == 1:
429                #    pbar.update(idx)
430
431            # if self.verbose == 1:
432            #    pbar.update(n_iter)
433
434            expit_ensemble_learner = expit(ensemble_learner)
435
436            sum_ensemble = expit_ensemble_learner.sum(axis=1)
437
438            return expit_ensemble_learner / sum_ensemble[:, None]
439
440        # if self.method == "SAMME.R":
441        ensemble_learner = 0
442
443        # if self.verbose == 1:
444        #    pbar = Progbar(n_iter)
445
446        for idx, base_learner in self.base_learners_.items():
447            probs = base_learner.predict_proba(X, **kwargs)
448
449            np.clip(a=probs, a_min=2.220446049250313e-16, a_max=1.0, out=probs)
450
451            log_preds_proba = np.log(probs)
452
453            ensemble_learner += (
454                log_preds_proba - log_preds_proba.mean(axis=1)[:, None]
455            )
456
457            # if self.verbose == 1:
458            #    pbar.update(idx)
459
460        ensemble_learner *= self.n_classes - 1
461
462        # if self.verbose == 1:
463        #    pbar.update(n_iter)
464
465        expit_ensemble_learner = expit(ensemble_learner)
466
467        sum_ensemble = expit_ensemble_learner.sum(axis=1)
468
469        return expit_ensemble_learner / sum_ensemble[:, None]

Predict probabilities for test data X.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
      self.cook_test_set

Returns:

probability estimates for test data: {array-like}
class Base(sklearn.base.BaseEstimator):
  48class Base(BaseEstimator):
  49    """Base model from which all the other classes inherit.
  50
  51    This class contains the most important data preprocessing/feature engineering methods.
  52
  53    Parameters:
  54
  55        n_hidden_features: int
  56            number of nodes in the hidden layer
  57
  58        activation_name: str
  59            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
  60
  61        a: float
  62            hyperparameter for 'prelu' or 'elu' activation function
  63
  64        nodes_sim: str
  65            type of simulation for hidden layer nodes: 'sobol', 'hammersley', 'halton',
  66            'uniform'
  67
  68        bias: boolean
  69            indicates if the hidden layer contains a bias term (True) or
  70            not (False)
  71
  72        dropout: float
  73            regularization parameter; (random) percentage of nodes dropped out
  74            of the training
  75
  76        direct_link: boolean
  77            indicates if the original features are included (True) in model's
  78            fitting or not (False)
  79
  80        n_clusters: int
  81            number of clusters for type_clust='kmeans' or type_clust='gmm'
  82            clustering (could be 0: no clustering)
  83
  84        cluster_encode: bool
  85            defines how the variable containing clusters is treated (default is one-hot);
  86            if `False`, then labels are used, without one-hot encoding
  87
  88        type_clust: str
  89            type of clustering method: currently k-means ('kmeans') or Gaussian
  90            Mixture Model ('gmm')
  91
  92        type_scaling: a tuple of 3 strings
  93            scaling methods for inputs, hidden layer, and clustering respectively
  94            (and when relevant).
  95            Currently available: standardization ('std') or MinMax scaling ('minmax') or robust scaling ('robust') or  max absolute scaling ('maxabs')
  96
  97        col_sample: float
  98            percentage of features randomly chosen for training
  99
 100        row_sample: float
 101            percentage of rows chosen for training, by stratified bootstrapping
 102
 103        seed: int
 104            reproducibility seed for nodes_sim=='uniform', clustering and dropout
 105
 106        backend: str
 107            "cpu" or "gpu" or "tpu"
 108
 109    """
 110
 111    # construct the object -----
 112
 113    def __init__(
 114        self,
 115        n_hidden_features=5,
 116        activation_name="relu",
 117        a=0.01,
 118        nodes_sim="sobol",
 119        bias=True,
 120        dropout=0,
 121        direct_link=True,
 122        n_clusters=2,
 123        cluster_encode=True,
 124        type_clust="kmeans",
 125        type_scaling=("std", "std", "std"),
 126        col_sample=1,
 127        row_sample=1,
 128        seed=123,
 129        backend="cpu",
 130    ):
 131        # input checks -----
 132
 133        sys_platform = platform.system()
 134
 135        if (sys_platform == "Windows") and (backend in ("gpu", "tpu")):
 136            warnings.warn(
 137                "No GPU/TPU computing on Windows yet, backend set to 'cpu'"
 138            )
 139            backend = "cpu"
 140
 141        assert activation_name in (
 142            "relu",
 143            "tanh",
 144            "sigmoid",
 145            "prelu",
 146            "elu",
 147        ), "'activation_name' must be in ('relu', 'tanh', 'sigmoid','prelu', 'elu')"
 148
 149        assert nodes_sim in (
 150            "sobol",
 151            "hammersley",
 152            "uniform",
 153            "halton",
 154        ), "'nodes_sim' must be in ('sobol', 'hammersley', 'uniform', 'halton')"
 155
 156        assert type_clust in (
 157            "kmeans",
 158            "gmm",
 159        ), "'type_clust' must be in ('kmeans', 'gmm')"
 160
 161        assert (len(type_scaling) == 3) & all(
 162            type_scaling[i] in ("minmax", "std", "robust", "maxabs")
 163            for i in range(len(type_scaling))
 164        ), "'type_scaling' must have length 3, and available scaling methods are 'minmax' scaling, standardization ('std'), robust scaling ('robust') and max absolute ('maxabs')"
 165
 166        assert (col_sample >= 0) & (
 167            col_sample <= 1
 168        ), "'col_sample' must be comprised between 0 and 1 (both included)"
 169
 170        assert backend in (
 171            "cpu",
 172            "gpu",
 173            "tpu",
 174        ), "must have 'backend' in ('cpu', 'gpu', 'tpu')"
 175
 176        self.n_hidden_features = n_hidden_features
 177        self.activation_name = activation_name
 178        self.a = a
 179        self.nodes_sim = nodes_sim
 180        self.bias = bias
 181        self.seed = seed
 182        self.backend = backend
 183        self.dropout = dropout
 184        self.direct_link = direct_link
 185        self.cluster_encode = cluster_encode
 186        self.type_clust = type_clust
 187        self.type_scaling = type_scaling
 188        self.col_sample = col_sample
 189        self.row_sample = row_sample
 190        self.n_clusters = n_clusters
 191        if isinstance(self, RegressorMixin):
 192            self.type_fit = "regression"
 193        elif isinstance(self, ClassifierMixin):
 194            self.type_fit = "classification"
 195        self.subsampler_ = None
 196        self.index_col_ = None
 197        self.index_row_ = True
 198        self.clustering_obj_ = None
 199        self.clustering_scaler_ = None
 200        self.nn_scaler_ = None
 201        self.scaler_ = None
 202        self.encoder_ = None
 203        self.W_ = None
 204        self.X_ = None
 205        self.y_ = None
 206        self.y_mean_ = None
 207        self.beta_ = None
 208
 209        # activation function -----
 210        if sys_platform in ("Linux", "Darwin"):
 211            activation_options = {
 212                "relu": ac.relu if (self.backend == "cpu") else jnn.relu,
 213                "tanh": np.tanh if (self.backend == "cpu") else jnp.tanh,
 214                "sigmoid": (
 215                    ac.sigmoid if (self.backend == "cpu") else jnn.sigmoid
 216                ),
 217                "prelu": partial(ac.prelu, a=a),
 218                "elu": (
 219                    partial(ac.elu, a=a)
 220                    if (self.backend == "cpu")
 221                    else partial(jnn.elu, a=a)
 222                ),
 223            }
 224        else:  # on Windows currently, no JAX
 225            activation_options = {
 226                "relu": (
 227                    ac.relu if (self.backend == "cpu") else NotImplementedError
 228                ),
 229                "tanh": (
 230                    np.tanh if (self.backend == "cpu") else NotImplementedError
 231                ),
 232                "sigmoid": (
 233                    ac.sigmoid
 234                    if (self.backend == "cpu")
 235                    else NotImplementedError
 236                ),
 237                "prelu": partial(ac.prelu, a=a),
 238                "elu": (
 239                    partial(ac.elu, a=a)
 240                    if (self.backend == "cpu")
 241                    else NotImplementedError
 242                ),
 243            }
 244        self.activation_func = activation_options[activation_name]
 245
 246    # "preprocessing" methods to be inherited -----
 247
 248    def encode_clusters(self, X=None, predict=False, scaler=None, **kwargs):  #
 249        """Create new covariates with kmeans or GMM clustering
 250
 251        Parameters:
 252
 253            X: {array-like}, shape = [n_samples, n_features]
 254                Training vectors, where n_samples is the number
 255                of samples and n_features is the number of features.
 256
 257            predict: boolean
 258                is False on training set and True on test set
 259
 260            scaler: {object} of class StandardScaler, MinMaxScaler, RobustScaler or MaxAbsScaler
 261                if scaler has already been fitted on training data (online training), it can be passed here
 262
 263            **kwargs:
 264                additional parameters to be passed to the
 265                clustering method
 266
 267        Returns:
 268
 269            Clusters' matrix, one-hot encoded: {array-like}
 270
 271        """
 272
 273        np.random.seed(self.seed)
 274
 275        if X is None:
 276            X = self.X_
 277
 278        if isinstance(X, pd.DataFrame):
 279            X = copy.deepcopy(X.values.astype(float))
 280
 281        if len(X.shape) == 1:
 282            X = X.reshape(1, -1)
 283
 284        if predict is False:  # encode training set
 285
 286            # scale input data before clustering
 287            self.clustering_scaler_, scaled_X = mo.scale_covariates(
 288                X, choice=self.type_scaling[2], scaler=self.clustering_scaler_
 289            )
 290
 291            self.clustering_obj_, X_clustered = mo.cluster_covariates(
 292                scaled_X,
 293                self.n_clusters,
 294                self.seed,
 295                type_clust=self.type_clust,
 296                **kwargs
 297            )
 298
 299            if self.cluster_encode:
 300                return mo.one_hot_encode(X_clustered, self.n_clusters).astype(
 301                    np.float16
 302                )
 303
 304            return X_clustered.astype(np.float16)
 305
 306        # if predict == True, encode test set
 307        X_clustered = self.clustering_obj_.predict(
 308            self.clustering_scaler_.transform(X)
 309        )
 310
 311        if self.cluster_encode == True:
 312            return mo.one_hot_encode(X_clustered, self.n_clusters).astype(
 313                np.float16
 314            )
 315
 316        return X_clustered.astype(np.float16)
 317
 318    def create_layer(self, scaled_X, W=None):
 319        """Create hidden layer.
 320
 321        Parameters:
 322
 323            scaled_X: {array-like}, shape = [n_samples, n_features]
 324                Training vectors, where n_samples is the number
 325                of samples and n_features is the number of features
 326
 327            W: {array-like}, shape = [n_features, hidden_features]
 328                if provided, constructs the hidden layer with W; otherwise computed internally
 329
 330        Returns:
 331
 332            Hidden layer matrix: {array-like}
 333
 334        """
 335
 336        n_features = scaled_X.shape[1]
 337
 338        # hash_sim = {
 339        #         "sobol": generate_sobol,
 340        #         "hammersley": generate_hammersley,
 341        #         "uniform": generate_uniform,
 342        #         "halton": generate_halton
 343        #     }
 344
 345        if self.bias is False:  # no bias term in the hidden layer
 346            if W is None:
 347                if self.nodes_sim == "sobol":
 348                    self.W_ = generate_sobol(
 349                        n_dims=n_features,
 350                        n_points=self.n_hidden_features,
 351                        seed=self.seed,
 352                    )
 353                elif self.nodes_sim == "hammersley":
 354                    self.W_ = generate_hammersley(
 355                        n_dims=n_features,
 356                        n_points=self.n_hidden_features,
 357                        seed=self.seed,
 358                    )
 359                elif self.nodes_sim == "uniform":
 360                    self.W_ = generate_uniform(
 361                        n_dims=n_features,
 362                        n_points=self.n_hidden_features,
 363                        seed=self.seed,
 364                    )
 365                else:
 366                    self.W_ = generate_halton(
 367                        n_dims=n_features,
 368                        n_points=self.n_hidden_features,
 369                        seed=self.seed,
 370                    )
 371
 372                assert (
 373                    scaled_X.shape[1] == self.W_.shape[0]
 374                ), "check dimensions of covariates X and matrix W"
 375
 376                return mo.dropout(
 377                    x=self.activation_func(
 378                        mo.safe_sparse_dot(
 379                            a=scaled_X, b=self.W_, backend=self.backend
 380                        )
 381                    ),
 382                    drop_prob=self.dropout,
 383                    seed=self.seed,
 384                )
 385
 386            # W is not none
 387            assert (
 388                scaled_X.shape[1] == W.shape[0]
 389            ), "check dimensions of covariates X and matrix W"
 390
 391            # self.W_ = W
 392            return mo.dropout(
 393                x=self.activation_func(
 394                    mo.safe_sparse_dot(a=scaled_X, b=W, backend=self.backend)
 395                ),
 396                drop_prob=self.dropout,
 397                seed=self.seed,
 398            )
 399
 400        # with bias term in the hidden layer
 401        if W is None:
 402            n_features_1 = n_features + 1
 403
 404            if self.nodes_sim == "sobol":
 405                self.W_ = generate_sobol(
 406                    n_dims=n_features_1,
 407                    n_points=self.n_hidden_features,
 408                    seed=self.seed,
 409                )
 410            elif self.nodes_sim == "hammersley":
 411                self.W_ = generate_hammersley(
 412                    n_dims=n_features_1,
 413                    n_points=self.n_hidden_features,
 414                    seed=self.seed,
 415                )
 416            elif self.nodes_sim == "uniform":
 417                self.W_ = generate_uniform(
 418                    n_dims=n_features_1,
 419                    n_points=self.n_hidden_features,
 420                    seed=self.seed,
 421                )
 422            else:
 423                self.W_ = generate_halton(
 424                    n_dims=n_features_1,
 425                    n_points=self.n_hidden_features,
 426                    seed=self.seed,
 427                )
 428
 429            # self.W_ = hash_sim[self.nodes_sim](
 430            #         n_dims=n_features_1,
 431            #         n_points=self.n_hidden_features,
 432            #         seed=self.seed,
 433            #     )
 434
 435            return mo.dropout(
 436                x=self.activation_func(
 437                    mo.safe_sparse_dot(
 438                        a=mo.cbind(
 439                            np.ones(scaled_X.shape[0]),
 440                            scaled_X,
 441                            backend=self.backend,
 442                        ),
 443                        b=self.W_,
 444                        backend=self.backend,
 445                    )
 446                ),
 447                drop_prob=self.dropout,
 448                seed=self.seed,
 449            )
 450
 451        # W is not None
 452        # self.W_ = W
 453        return mo.dropout(
 454            x=self.activation_func(
 455                mo.safe_sparse_dot(
 456                    a=mo.cbind(
 457                        np.ones(scaled_X.shape[0]),
 458                        scaled_X,
 459                        backend=self.backend,
 460                    ),
 461                    b=W,
 462                    backend=self.backend,
 463                )
 464            ),
 465            drop_prob=self.dropout,
 466            seed=self.seed,
 467        )
 468
 469    def _jax_create_layer(
 470        self, scaled_X: jnp.ndarray, W: Optional[jnp.ndarray] = None
 471    ) -> jnp.ndarray:
 472        """JAX-compatible version of create_layer that exactly matches the original functionality."""
 473        key = jax.random.PRNGKey(self.seed)
 474        n_features = scaled_X.shape[1]
 475
 476        # Generate weights if not provided
 477        if W is None:
 478            if self.bias:
 479                n_features_1 = n_features + 1
 480                shape = (n_features_1, self.n_hidden_features)
 481            else:
 482                shape = (n_features, self.n_hidden_features)
 483
 484            # JAX-compatible weight generation matching original behavior
 485            if self.nodes_sim == "sobol":
 486                W_np = generate_sobol(
 487                    n_dims=n_features_1,
 488                    n_points=self.n_hidden_features,
 489                    seed=self.seed,
 490                )
 491                W = jnp.asarray(W_np)
 492            elif self.nodes_sim == "hammersley":
 493                W_np = generate_hammersley(
 494                    n_dims=n_features_1,
 495                    n_points=self.n_hidden_features,
 496                    seed=self.seed,
 497                )
 498                W = jnp.asarray(W_np)
 499            elif self.nodes_sim == "uniform":
 500                key, subkey = jax.random.split(key)
 501                W = jax.random.uniform(
 502                    subkey, shape=shape, minval=-1.0, maxval=1.0
 503                )
 504            else:  # halton
 505                W_np = generate_halton(
 506                    n_dims=n_features_1,
 507                    n_points=self.n_hidden_features,
 508                    seed=self.seed,
 509                )
 510                W = jnp.asarray(W_np)
 511
 512            self.W_ = np.array(W)  # Store as numpy for original methods
 513
 514        # Prepare input with bias if needed
 515        if self.bias:
 516            X_with_bias = jnp.hstack(
 517                [jnp.ones((scaled_X.shape[0], 1)), scaled_X]
 518            )
 519            print("X_with_bias shape:", X_with_bias.shape)
 520            print("W shape:", W.shape)
 521            linear_output = jnp.dot(X_with_bias, W)
 522        else:
 523            linear_output = jnp.dot(scaled_X, W)
 524
 525        # Apply activation function
 526        if self.activation_name == "relu":
 527            activated = jax.nn.relu(linear_output)
 528        elif self.activation_name == "tanh":
 529            activated = jnp.tanh(linear_output)
 530        elif self.activation_name == "sigmoid":
 531            activated = jax.nn.sigmoid(linear_output)
 532        else:  # leaky relu
 533            activated = jax.nn.leaky_relu(linear_output, negative_slope=self.a)
 534
 535        # Apply dropout
 536        if self.dropout > 0:
 537            key, subkey = jax.random.split(key)
 538            mask = jax.random.bernoulli(
 539                subkey, p=1 - self.dropout, shape=activated.shape
 540            )
 541            activated = jnp.where(mask, activated / (1 - self.dropout), 0)
 542
 543        return activated
 544
 545    def cook_training_set(self, y=None, X=None, W=None, **kwargs):
 546        """Create new hidden features for training set, with hidden layer, center the response.
 547
 548        Parameters:
 549
 550            y: array-like, shape = [n_samples]
 551                Target values
 552
 553            X: {array-like}, shape = [n_samples, n_features]
 554                Training vectors, where n_samples is the number
 555                of samples and n_features is the number of features
 556
 557            W: {array-like}, shape = [n_features, hidden_features]
 558                if provided, constructs the hidden layer via W
 559
 560        Returns:
 561
 562            (centered response, direct link + hidden layer matrix): {tuple}
 563
 564        """
 565
 566        # either X and y are stored or not
 567        # assert ((y is None) & (X is None)) | ((y is not None) & (X is not None))
 568        if self.n_hidden_features > 0:  # has a hidden layer
 569            assert (
 570                len(self.type_scaling) >= 2
 571            ), "must have len(self.type_scaling) >= 2 when self.n_hidden_features > 0"
 572
 573        if X is None:
 574
 575            if self.col_sample == 1:
 576                input_X = self.X_
 577            else:
 578                n_features = self.X_.shape[1]
 579                new_n_features = int(np.ceil(n_features * self.col_sample))
 580                assert (
 581                    new_n_features >= 1
 582                ), "check class attribute 'col_sample' and the number of covariates provided for X"
 583                np.random.seed(self.seed)
 584                index_col = np.random.choice(
 585                    range(n_features), size=new_n_features, replace=False
 586                )
 587                self.index_col_ = index_col
 588                input_X = self.X_[:, self.index_col_]
 589
 590        else:  # X is not None # keep X vs self.X_
 591
 592            if isinstance(X, pd.DataFrame):
 593                X = copy.deepcopy(X.values.astype(float))
 594
 595            if self.col_sample == 1:
 596                input_X = X
 597            else:
 598                n_features = X.shape[1]
 599                new_n_features = int(np.ceil(n_features * self.col_sample))
 600                assert (
 601                    new_n_features >= 1
 602                ), "check class attribute 'col_sample' and the number of covariates provided for X"
 603                np.random.seed(self.seed)
 604                index_col = np.random.choice(
 605                    range(n_features), size=new_n_features, replace=False
 606                )
 607                self.index_col_ = index_col
 608                input_X = X[:, self.index_col_]
 609
 610        if self.n_clusters <= 0:
 611            # data without any clustering: self.n_clusters is None -----
 612
 613            if self.n_hidden_features > 0:  # with hidden layer
 614
 615                self.nn_scaler_, scaled_X = mo.scale_covariates(
 616                    input_X, choice=self.type_scaling[1], scaler=self.nn_scaler_
 617                )
 618                Phi_X = (
 619                    self.create_layer(scaled_X)
 620                    if W is None
 621                    else self.create_layer(scaled_X, W=W)
 622                )
 623                Z = (
 624                    mo.cbind(input_X, Phi_X, backend=self.backend)
 625                    if self.direct_link is True
 626                    else Phi_X
 627                )
 628                self.scaler_, scaled_Z = mo.scale_covariates(
 629                    Z, choice=self.type_scaling[0], scaler=self.scaler_
 630                )
 631            else:  # no hidden layer
 632                Z = input_X
 633                self.scaler_, scaled_Z = mo.scale_covariates(
 634                    Z, choice=self.type_scaling[0], scaler=self.scaler_
 635                )
 636
 637        else:
 638
 639            # data with clustering: self.n_clusters is not None ----- # keep
 640
 641            augmented_X = mo.cbind(
 642                input_X,
 643                self.encode_clusters(input_X, **kwargs),
 644                backend=self.backend,
 645            )
 646
 647            if self.n_hidden_features > 0:  # with hidden layer
 648
 649                self.nn_scaler_, scaled_X = mo.scale_covariates(
 650                    augmented_X,
 651                    choice=self.type_scaling[1],
 652                    scaler=self.nn_scaler_,
 653                )
 654                Phi_X = (
 655                    self.create_layer(scaled_X)
 656                    if W is None
 657                    else self.create_layer(scaled_X, W=W)
 658                )
 659                Z = (
 660                    mo.cbind(augmented_X, Phi_X, backend=self.backend)
 661                    if self.direct_link is True
 662                    else Phi_X
 663                )
 664                self.scaler_, scaled_Z = mo.scale_covariates(
 665                    Z, choice=self.type_scaling[0], scaler=self.scaler_
 666                )
 667            else:  # no hidden layer
 668                Z = augmented_X
 669                self.scaler_, scaled_Z = mo.scale_covariates(
 670                    Z, choice=self.type_scaling[0], scaler=self.scaler_
 671                )
 672
 673        # Returning model inputs -----
 674        if mx.is_factor(y) is False:  # regression
 675            # center y
 676            if y is None:
 677                self.y_mean_, centered_y = mo.center_response(self.y_)
 678            else:
 679                self.y_mean_, centered_y = mo.center_response(y)
 680
 681            # y is subsampled
 682            if self.row_sample < 1:
 683                n, p = Z.shape
 684
 685                self.subsampler_ = (
 686                    SubSampler(
 687                        y=self.y_, row_sample=self.row_sample, seed=self.seed
 688                    )
 689                    if y is None
 690                    else SubSampler(
 691                        y=y, row_sample=self.row_sample, seed=self.seed
 692                    )
 693                )
 694
 695                self.index_row_ = self.subsampler_.subsample()
 696
 697                n_row_sample = len(self.index_row_)
 698                # regression
 699                return (
 700                    centered_y[self.index_row_].reshape(n_row_sample),
 701                    self.scaler_.transform(
 702                        Z[self.index_row_, :].reshape(n_row_sample, p)
 703                    ),
 704                )
 705            # y is not subsampled
 706            # regression
 707            return (centered_y, self.scaler_.transform(Z))
 708
 709        # classification
 710        # y is subsampled
 711        if self.row_sample < 1:
 712            n, p = Z.shape
 713
 714            self.subsampler_ = (
 715                SubSampler(
 716                    y=self.y_, row_sample=self.row_sample, seed=self.seed
 717                )
 718                if y is None
 719                else SubSampler(y=y, row_sample=self.row_sample, seed=self.seed)
 720            )
 721
 722            self.index_row_ = self.subsampler_.subsample()
 723
 724            n_row_sample = len(self.index_row_)
 725            # classification
 726            return (
 727                y[self.index_row_].reshape(n_row_sample),
 728                self.scaler_.transform(
 729                    Z[self.index_row_, :].reshape(n_row_sample, p)
 730                ),
 731            )
 732        # y is not subsampled
 733        # classification
 734        return (y, self.scaler_.transform(Z))
 735
 736    def cook_test_set(self, X, **kwargs):
 737        """Transform data from test set, with hidden layer.
 738
 739        Parameters:
 740
 741            X: {array-like}, shape = [n_samples, n_features]
 742                Training vectors, where n_samples is the number
 743                of samples and n_features is the number of features
 744
 745            **kwargs: additional parameters to be passed to self.encode_cluster
 746
 747        Returns:
 748
 749            Transformed test set : {array-like}
 750        """
 751
 752        if isinstance(X, pd.DataFrame):
 753            X = copy.deepcopy(X.values.astype(float))
 754
 755        if len(X.shape) == 1:
 756            X = X.reshape(1, -1)
 757
 758        if (
 759            self.n_clusters == 0
 760        ):  # data without clustering: self.n_clusters is None -----
 761            if self.n_hidden_features > 0:
 762                # if hidden layer
 763                scaled_X = (
 764                    self.nn_scaler_.transform(X)
 765                    if (self.col_sample == 1)
 766                    else self.nn_scaler_.transform(X[:, self.index_col_])
 767                )
 768                Phi_X = self.create_layer(scaled_X, self.W_)
 769                if self.direct_link:
 770                    return self.scaler_.transform(
 771                        mo.cbind(scaled_X, Phi_X, backend=self.backend)
 772                    )
 773                # when self.direct_link == False
 774                return self.scaler_.transform(Phi_X)
 775            # if no hidden layer # self.n_hidden_features == 0
 776            return self.scaler_.transform(X)
 777
 778        # data with clustering: self.n_clusters > 0 -----
 779        if self.col_sample == 1:
 780            predicted_clusters = self.encode_clusters(
 781                X=X, predict=True, **kwargs
 782            )
 783            augmented_X = mo.cbind(X, predicted_clusters, backend=self.backend)
 784        else:
 785            predicted_clusters = self.encode_clusters(
 786                X=X[:, self.index_col_], predict=True, **kwargs
 787            )
 788            augmented_X = mo.cbind(
 789                X[:, self.index_col_], predicted_clusters, backend=self.backend
 790            )
 791
 792        if self.n_hidden_features > 0:  # if hidden layer
 793            scaled_X = self.nn_scaler_.transform(augmented_X)
 794            Phi_X = self.create_layer(scaled_X, self.W_)
 795            if self.direct_link:
 796                return self.scaler_.transform(
 797                    mo.cbind(augmented_X, Phi_X, backend=self.backend)
 798                )
 799            return self.scaler_.transform(Phi_X)
 800
 801        # if no hidden layer
 802        return self.scaler_.transform(augmented_X)
 803
 804    def cook_training_set_jax(self, y=None, X=None, W=None, **kwargs):
 805        """JAX-compatible version of cook_training_set that maintains side effects."""
 806        # Initialize random key
 807        key = jax.random.PRNGKey(self.seed)
 808
 809        # Convert inputs to JAX arrays
 810        X = jnp.asarray(X) if X is not None else jnp.asarray(self.X_)
 811        y = jnp.asarray(y) if y is not None else jnp.asarray(self.y_)
 812
 813        # Handle column sampling
 814        if self.col_sample < 1:
 815            n_features = X.shape[1]
 816            new_n_features = int(jnp.ceil(n_features * self.col_sample))
 817            assert new_n_features >= 1, "Invalid col_sample"
 818
 819            key, subkey = jax.random.split(key)
 820            index_col = jax.random.choice(
 821                subkey, n_features, shape=(new_n_features,), replace=False
 822            )
 823            self.index_col_ = np.array(
 824                index_col
 825            )  # Store as numpy for original methods
 826            input_X = X[:, index_col]
 827            n_features = (
 828                new_n_features  # Update n_features after column sampling
 829            )
 830        else:
 831            input_X = X
 832            n_features = X.shape[1]
 833
 834        augmented_X = input_X
 835
 836        # JAX-compatible scaling
 837        def jax_scale(data, mean=None, std=None):
 838            if mean is None:
 839                mean = jnp.mean(data, axis=0)
 840            if std is None:
 841                std = jnp.std(data, axis=0)
 842            return (data - mean) / (std + 1e-10), mean, std
 843
 844        # Hidden layer processing
 845        if self.n_hidden_features > 0:
 846            # Initialize weights if not provided
 847            if W is None:
 848                shape = (n_features, self.n_hidden_features)
 849
 850                # JAX-compatible weight generation
 851                if self.nodes_sim == "uniform":
 852                    key, subkey = jax.random.split(key)
 853                    W = jax.random.uniform(
 854                        subkey, shape=shape, minval=-1.0, maxval=1.0
 855                    ) * (1 / jnp.sqrt(n_features))
 856                else:
 857                    # For other sequences, use numpy generation then convert to JAX
 858                    if self.nodes_sim == "sobol":
 859                        W_np = generate_sobol(
 860                            n_dims=shape[0],
 861                            n_points=shape[1],
 862                            seed=self.seed,
 863                        )
 864                    elif self.nodes_sim == "hammersley":
 865                        W_np = generate_hammersley(
 866                            n_dims=shape[0],
 867                            n_points=shape[1],
 868                            seed=self.seed,
 869                        )
 870                    elif self.nodes_sim == "halton":
 871                        W_np = generate_halton(
 872                            n_dims=shape[0],
 873                            n_points=shape[1],
 874                            seed=self.seed,
 875                        )
 876                    else:  # default to uniform
 877                        key, subkey = jax.random.split(key)
 878                        W = jax.random.uniform(
 879                            subkey, shape=shape, minval=-1.0, maxval=1.0
 880                        ) * (1 / jnp.sqrt(n_features))
 881
 882                    if self.nodes_sim in ["sobol", "hammersley", "halton"]:
 883                        W = jnp.asarray(W_np) * (1 / jnp.sqrt(n_features))
 884
 885                self.W_ = np.array(W)  # Store as numpy for original methods
 886
 887            # Scale features
 888            scaled_X, self.nn_mean_, self.nn_std_ = jax_scale(
 889                augmented_X,
 890                getattr(self, "nn_mean_", None),
 891                getattr(self, "nn_std_", None),
 892            )
 893
 894            # Create hidden layer with proper bias handling
 895            linear_output = jnp.dot(scaled_X, W)
 896
 897            # Apply activation
 898            if self.activation_name == "relu":
 899                Phi_X = jax.nn.relu(linear_output)
 900            elif self.activation_name == "tanh":
 901                Phi_X = jnp.tanh(linear_output)
 902            elif self.activation_name == "sigmoid":
 903                Phi_X = jax.nn.sigmoid(linear_output)
 904            else:  # leaky relu
 905                Phi_X = jax.nn.leaky_relu(linear_output, negative_slope=self.a)
 906
 907            # Apply dropout
 908            if self.dropout > 0:
 909                key, subkey = jax.random.split(key)
 910                mask = jax.random.bernoulli(
 911                    subkey, p=1 - self.dropout, shape=Phi_X.shape
 912                )
 913                Phi_X = jnp.where(mask, Phi_X / (1 - self.dropout), 0)
 914
 915            Z = jnp.hstack([scaled_X, Phi_X]) if self.direct_link else Phi_X
 916        else:
 917            Z = augmented_X
 918
 919        # Final scaling
 920        scaled_Z, self.scale_mean_, self.scale_std_ = jax_scale(
 921            Z,
 922            getattr(self, "scale_mean_", None),
 923            getattr(self, "scale_std_", None),
 924        )
 925
 926        # Center response for regression
 927        if not hasattr(mx, "is_factor") or not mx.is_factor(
 928            y
 929        ):  # regression case
 930            self.y_mean_ = float(
 931                jnp.mean(y)
 932            )  # Convert to Python float for compatibility
 933            centered_y = y - self.y_mean_
 934        else:
 935            centered_y = y
 936
 937        # Handle row sampling
 938        if self.row_sample < 1:
 939            key, subkey = jax.random.split(key)
 940            n_samples = Z.shape[0]
 941            n_row_sample = int(jnp.ceil(n_samples * self.row_sample))
 942            index_row = jax.random.choice(
 943                subkey, n_samples, shape=(n_row_sample,), replace=False
 944            )
 945            self.index_row_ = np.array(
 946                index_row
 947            )  # Store as numpy for original methods
 948            return (centered_y[index_row], scaled_Z[index_row])
 949
 950        return (centered_y, scaled_Z)
 951
 952    def cook_test_set_jax(self, X, **kwargs):
 953        """JAX-compatible test set processing with matching dimension handling."""
 954        X = jnp.asarray(X)
 955
 956        if len(X.shape) == 1:
 957            X = X.reshape(1, -1)
 958
 959        # Handle column sampling
 960        input_X = (
 961            X if self.col_sample == 1 else X[:, jnp.asarray(self.index_col_)]
 962        )
 963
 964        augmented_X = input_X
 965
 966        # JAX-compatible scaling
 967        scaled_X = (augmented_X - self.nn_mean_) / (self.nn_std_ + 1e-10)
 968
 969        # Process hidden layer if needed
 970        if self.n_hidden_features > 0:
 971            Phi_X = self._jax_create_layer(scaled_X, jnp.asarray(self.W_))
 972            Z = jnp.hstack([scaled_X, Phi_X]) if self.direct_link else Phi_X
 973        else:
 974            Z = augmented_X
 975
 976        # Final scaling
 977        scaled_Z = (Z - self.scale_mean_) / (self.scale_std_ + 1e-10)
 978
 979        return scaled_Z
 980
 981    def _jax_create_layer(self, X, W):
 982        """JAX-compatible hidden layer creation."""
 983        # print("X", X.shape)
 984        # print("W", W.shape)
 985        # print("self.W_", self.W_.shape)
 986        linear_output = jnp.dot(X, W)
 987
 988        if self.activation_name == "relu":
 989            return jax.nn.relu(linear_output)
 990        elif self.activation_name == "tanh":
 991            return jnp.tanh(linear_output)
 992        elif self.activation_name == "sigmoid":
 993            return jax.nn.sigmoid(linear_output)
 994        else:  # leaky relu
 995            return jax.nn.leaky_relu(linear_output, negative_slope=self.a)
 996
 997    def cross_val_score(
 998        self,
 999        X,
1000        y,
1001        cv=5,
1002        scoring="accuracy",
1003        random_state=42,
1004        n_jobs=-1,
1005        epsilon=0.5,
1006        penalized=True,
1007        objective="abs",
1008        **kwargs
1009    ):
1010        """
1011        Penalized Cross-validation score for a model.
1012
1013        Parameters:
1014
1015            X: {array-like}, shape = [n_samples, n_features]
1016                Training vectors, where n_samples is the number
1017                of samples and n_features is the number of features
1018
1019            y: array-like, shape = [n_samples]
1020                Target values
1021
1022            X_test: {array-like}, shape = [n_samples, n_features]
1023                Test vectors, where n_samples is the number
1024                of samples and n_features is the number of features
1025
1026            y_test: array-like, shape = [n_samples]
1027                Target values
1028
1029            cv: int
1030                Number of folds
1031
1032            scoring: str
1033                Scoring metric
1034
1035            random_state: int
1036                Random state
1037
1038            n_jobs: int
1039                Number of jobs to run in parallel
1040
1041            epsilon: float
1042                Penalty parameter
1043
1044            penalized: bool
1045                Whether to obtain penalized cross-validation score or not
1046
1047            objective: str
1048                'abs': Minimize the absolute difference between cross-validation score and validation score
1049                'relative': Minimize the relative difference between cross-validation score and validation score
1050        Returns:
1051
1052            A namedtuple with the following fields:
1053                - cv_score: float
1054                    cross-validation score
1055                - val_score: float
1056                    validation score
1057                - penalized_score: float
1058                    penalized cross-validation score: cv_score / val_score + epsilon*(1/val_score + 1/cv_score)
1059                    If higher scoring metric is better, minimize the function result.
1060                    If lower scoring metric is better, maximize the function result.
1061        """
1062        if scoring == "accuracy":
1063            scoring_func = accuracy_score
1064        elif scoring == "balanced_accuracy":
1065            scoring_func = balanced_accuracy_score
1066        elif scoring == "f1":
1067            scoring_func = f1_score
1068        elif scoring == "roc_auc":
1069            scoring_func = roc_auc_score
1070        elif scoring == "r2":
1071            scoring_func = r2_score
1072        elif scoring == "mse":
1073            scoring_func = mean_squared_error
1074        elif scoring == "mae":
1075            scoring_func = mean_absolute_error
1076        elif scoring == "mape":
1077            scoring_func = mean_absolute_percentage_error
1078        elif scoring == "rmse":
1079
1080            def scoring_func(y_true, y_pred):
1081                return np.sqrt(mean_squared_error(y_true, y_pred))
1082
1083        X_train, X_val, y_train, y_val = train_test_split(
1084            X, y, test_size=0.2, random_state=random_state
1085        )
1086
1087        res = cross_val_score(
1088            self, X_train, y_train, cv=cv, scoring=scoring, n_jobs=n_jobs
1089        )  # cross-validation error
1090
1091        if penalized == False:
1092            return res
1093
1094        DescribeResult = namedtuple(
1095            "DescribeResult", ["cv_score", "val_score", "penalized_score"]
1096        )
1097
1098        numerator = res.mean()
1099
1100        # Evaluate on the (cv+1)-th fold
1101        preds_val = self.fit(X_train, y_train).predict(X_val)
1102        try:
1103            denominator = scoring(y_val, preds_val)  # validation error
1104        except Exception as e:
1105            denominator = scoring_func(y_val, preds_val)
1106
1107        # if higher is better
1108        if objective == "abs":
1109            penalized_score = np.abs(numerator - denominator) + epsilon * (
1110                1 / denominator + 1 / numerator
1111            )
1112        elif objective == "relative":
1113            ratio = numerator / denominator
1114            penalized_score = np.abs(ratio - 1) + epsilon * (
1115                1 / denominator + 1 / numerator
1116            )
1117
1118        return DescribeResult(
1119            cv_score=numerator,
1120            val_score=denominator,
1121            penalized_score=penalized_score,
1122        )

Base model from which all the other classes inherit.

This class contains the most important data preprocessing/feature engineering methods.

Parameters:

n_hidden_features: int
    number of nodes in the hidden layer

activation_name: str
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'

a: float
    hyperparameter for 'prelu' or 'elu' activation function

nodes_sim: str
    type of simulation for hidden layer nodes: 'sobol', 'hammersley', 'halton',
    'uniform'

bias: boolean
    indicates if the hidden layer contains a bias term (True) or
    not (False)

dropout: float
    regularization parameter; (random) percentage of nodes dropped out
    of the training

direct_link: boolean
    indicates if the original features are included (True) in model's
    fitting or not (False)

n_clusters: int
    number of clusters for type_clust='kmeans' or type_clust='gmm'
    clustering (could be 0: no clustering)

cluster_encode: bool
    defines how the variable containing clusters is treated (default is one-hot);
    if `False`, then labels are used, without one-hot encoding

type_clust: str
    type of clustering method: currently k-means ('kmeans') or Gaussian
    Mixture Model ('gmm')

type_scaling: a tuple of 3 strings
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax') or robust scaling ('robust') or  max absolute scaling ('maxabs')

col_sample: float
    percentage of features randomly chosen for training

row_sample: float
    percentage of rows chosen for training, by stratified bootstrapping

seed: int
    reproducibility seed for nodes_sim=='uniform', clustering and dropout

backend: str
    "cpu" or "gpu" or "tpu"
def encode_clusters(self, X=None, predict=False, scaler=None, **kwargs):
248    def encode_clusters(self, X=None, predict=False, scaler=None, **kwargs):  #
249        """Create new covariates with kmeans or GMM clustering
250
251        Parameters:
252
253            X: {array-like}, shape = [n_samples, n_features]
254                Training vectors, where n_samples is the number
255                of samples and n_features is the number of features.
256
257            predict: boolean
258                is False on training set and True on test set
259
260            scaler: {object} of class StandardScaler, MinMaxScaler, RobustScaler or MaxAbsScaler
261                if scaler has already been fitted on training data (online training), it can be passed here
262
263            **kwargs:
264                additional parameters to be passed to the
265                clustering method
266
267        Returns:
268
269            Clusters' matrix, one-hot encoded: {array-like}
270
271        """
272
273        np.random.seed(self.seed)
274
275        if X is None:
276            X = self.X_
277
278        if isinstance(X, pd.DataFrame):
279            X = copy.deepcopy(X.values.astype(float))
280
281        if len(X.shape) == 1:
282            X = X.reshape(1, -1)
283
284        if predict is False:  # encode training set
285
286            # scale input data before clustering
287            self.clustering_scaler_, scaled_X = mo.scale_covariates(
288                X, choice=self.type_scaling[2], scaler=self.clustering_scaler_
289            )
290
291            self.clustering_obj_, X_clustered = mo.cluster_covariates(
292                scaled_X,
293                self.n_clusters,
294                self.seed,
295                type_clust=self.type_clust,
296                **kwargs
297            )
298
299            if self.cluster_encode:
300                return mo.one_hot_encode(X_clustered, self.n_clusters).astype(
301                    np.float16
302                )
303
304            return X_clustered.astype(np.float16)
305
306        # if predict == True, encode test set
307        X_clustered = self.clustering_obj_.predict(
308            self.clustering_scaler_.transform(X)
309        )
310
311        if self.cluster_encode == True:
312            return mo.one_hot_encode(X_clustered, self.n_clusters).astype(
313                np.float16
314            )
315
316        return X_clustered.astype(np.float16)

Create new covariates with kmeans or GMM clustering

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

predict: boolean
    is False on training set and True on test set

scaler: {object} of class StandardScaler, MinMaxScaler, RobustScaler or MaxAbsScaler
    if scaler has already been fitted on training data (online training), it can be passed here

**kwargs:
    additional parameters to be passed to the
    clustering method

Returns:

Clusters' matrix, one-hot encoded: {array-like}
def create_layer(self, scaled_X, W=None):
318    def create_layer(self, scaled_X, W=None):
319        """Create hidden layer.
320
321        Parameters:
322
323            scaled_X: {array-like}, shape = [n_samples, n_features]
324                Training vectors, where n_samples is the number
325                of samples and n_features is the number of features
326
327            W: {array-like}, shape = [n_features, hidden_features]
328                if provided, constructs the hidden layer with W; otherwise computed internally
329
330        Returns:
331
332            Hidden layer matrix: {array-like}
333
334        """
335
336        n_features = scaled_X.shape[1]
337
338        # hash_sim = {
339        #         "sobol": generate_sobol,
340        #         "hammersley": generate_hammersley,
341        #         "uniform": generate_uniform,
342        #         "halton": generate_halton
343        #     }
344
345        if self.bias is False:  # no bias term in the hidden layer
346            if W is None:
347                if self.nodes_sim == "sobol":
348                    self.W_ = generate_sobol(
349                        n_dims=n_features,
350                        n_points=self.n_hidden_features,
351                        seed=self.seed,
352                    )
353                elif self.nodes_sim == "hammersley":
354                    self.W_ = generate_hammersley(
355                        n_dims=n_features,
356                        n_points=self.n_hidden_features,
357                        seed=self.seed,
358                    )
359                elif self.nodes_sim == "uniform":
360                    self.W_ = generate_uniform(
361                        n_dims=n_features,
362                        n_points=self.n_hidden_features,
363                        seed=self.seed,
364                    )
365                else:
366                    self.W_ = generate_halton(
367                        n_dims=n_features,
368                        n_points=self.n_hidden_features,
369                        seed=self.seed,
370                    )
371
372                assert (
373                    scaled_X.shape[1] == self.W_.shape[0]
374                ), "check dimensions of covariates X and matrix W"
375
376                return mo.dropout(
377                    x=self.activation_func(
378                        mo.safe_sparse_dot(
379                            a=scaled_X, b=self.W_, backend=self.backend
380                        )
381                    ),
382                    drop_prob=self.dropout,
383                    seed=self.seed,
384                )
385
386            # W is not none
387            assert (
388                scaled_X.shape[1] == W.shape[0]
389            ), "check dimensions of covariates X and matrix W"
390
391            # self.W_ = W
392            return mo.dropout(
393                x=self.activation_func(
394                    mo.safe_sparse_dot(a=scaled_X, b=W, backend=self.backend)
395                ),
396                drop_prob=self.dropout,
397                seed=self.seed,
398            )
399
400        # with bias term in the hidden layer
401        if W is None:
402            n_features_1 = n_features + 1
403
404            if self.nodes_sim == "sobol":
405                self.W_ = generate_sobol(
406                    n_dims=n_features_1,
407                    n_points=self.n_hidden_features,
408                    seed=self.seed,
409                )
410            elif self.nodes_sim == "hammersley":
411                self.W_ = generate_hammersley(
412                    n_dims=n_features_1,
413                    n_points=self.n_hidden_features,
414                    seed=self.seed,
415                )
416            elif self.nodes_sim == "uniform":
417                self.W_ = generate_uniform(
418                    n_dims=n_features_1,
419                    n_points=self.n_hidden_features,
420                    seed=self.seed,
421                )
422            else:
423                self.W_ = generate_halton(
424                    n_dims=n_features_1,
425                    n_points=self.n_hidden_features,
426                    seed=self.seed,
427                )
428
429            # self.W_ = hash_sim[self.nodes_sim](
430            #         n_dims=n_features_1,
431            #         n_points=self.n_hidden_features,
432            #         seed=self.seed,
433            #     )
434
435            return mo.dropout(
436                x=self.activation_func(
437                    mo.safe_sparse_dot(
438                        a=mo.cbind(
439                            np.ones(scaled_X.shape[0]),
440                            scaled_X,
441                            backend=self.backend,
442                        ),
443                        b=self.W_,
444                        backend=self.backend,
445                    )
446                ),
447                drop_prob=self.dropout,
448                seed=self.seed,
449            )
450
451        # W is not None
452        # self.W_ = W
453        return mo.dropout(
454            x=self.activation_func(
455                mo.safe_sparse_dot(
456                    a=mo.cbind(
457                        np.ones(scaled_X.shape[0]),
458                        scaled_X,
459                        backend=self.backend,
460                    ),
461                    b=W,
462                    backend=self.backend,
463                )
464            ),
465            drop_prob=self.dropout,
466            seed=self.seed,
467        )

Create hidden layer.

Parameters:

scaled_X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features

W: {array-like}, shape = [n_features, hidden_features]
    if provided, constructs the hidden layer with W; otherwise computed internally

Returns:

Hidden layer matrix: {array-like}
def cook_training_set(self, y=None, X=None, W=None, **kwargs):
545    def cook_training_set(self, y=None, X=None, W=None, **kwargs):
546        """Create new hidden features for training set, with hidden layer, center the response.
547
548        Parameters:
549
550            y: array-like, shape = [n_samples]
551                Target values
552
553            X: {array-like}, shape = [n_samples, n_features]
554                Training vectors, where n_samples is the number
555                of samples and n_features is the number of features
556
557            W: {array-like}, shape = [n_features, hidden_features]
558                if provided, constructs the hidden layer via W
559
560        Returns:
561
562            (centered response, direct link + hidden layer matrix): {tuple}
563
564        """
565
566        # either X and y are stored or not
567        # assert ((y is None) & (X is None)) | ((y is not None) & (X is not None))
568        if self.n_hidden_features > 0:  # has a hidden layer
569            assert (
570                len(self.type_scaling) >= 2
571            ), "must have len(self.type_scaling) >= 2 when self.n_hidden_features > 0"
572
573        if X is None:
574
575            if self.col_sample == 1:
576                input_X = self.X_
577            else:
578                n_features = self.X_.shape[1]
579                new_n_features = int(np.ceil(n_features * self.col_sample))
580                assert (
581                    new_n_features >= 1
582                ), "check class attribute 'col_sample' and the number of covariates provided for X"
583                np.random.seed(self.seed)
584                index_col = np.random.choice(
585                    range(n_features), size=new_n_features, replace=False
586                )
587                self.index_col_ = index_col
588                input_X = self.X_[:, self.index_col_]
589
590        else:  # X is not None # keep X vs self.X_
591
592            if isinstance(X, pd.DataFrame):
593                X = copy.deepcopy(X.values.astype(float))
594
595            if self.col_sample == 1:
596                input_X = X
597            else:
598                n_features = X.shape[1]
599                new_n_features = int(np.ceil(n_features * self.col_sample))
600                assert (
601                    new_n_features >= 1
602                ), "check class attribute 'col_sample' and the number of covariates provided for X"
603                np.random.seed(self.seed)
604                index_col = np.random.choice(
605                    range(n_features), size=new_n_features, replace=False
606                )
607                self.index_col_ = index_col
608                input_X = X[:, self.index_col_]
609
610        if self.n_clusters <= 0:
611            # data without any clustering: self.n_clusters is None -----
612
613            if self.n_hidden_features > 0:  # with hidden layer
614
615                self.nn_scaler_, scaled_X = mo.scale_covariates(
616                    input_X, choice=self.type_scaling[1], scaler=self.nn_scaler_
617                )
618                Phi_X = (
619                    self.create_layer(scaled_X)
620                    if W is None
621                    else self.create_layer(scaled_X, W=W)
622                )
623                Z = (
624                    mo.cbind(input_X, Phi_X, backend=self.backend)
625                    if self.direct_link is True
626                    else Phi_X
627                )
628                self.scaler_, scaled_Z = mo.scale_covariates(
629                    Z, choice=self.type_scaling[0], scaler=self.scaler_
630                )
631            else:  # no hidden layer
632                Z = input_X
633                self.scaler_, scaled_Z = mo.scale_covariates(
634                    Z, choice=self.type_scaling[0], scaler=self.scaler_
635                )
636
637        else:
638
639            # data with clustering: self.n_clusters is not None ----- # keep
640
641            augmented_X = mo.cbind(
642                input_X,
643                self.encode_clusters(input_X, **kwargs),
644                backend=self.backend,
645            )
646
647            if self.n_hidden_features > 0:  # with hidden layer
648
649                self.nn_scaler_, scaled_X = mo.scale_covariates(
650                    augmented_X,
651                    choice=self.type_scaling[1],
652                    scaler=self.nn_scaler_,
653                )
654                Phi_X = (
655                    self.create_layer(scaled_X)
656                    if W is None
657                    else self.create_layer(scaled_X, W=W)
658                )
659                Z = (
660                    mo.cbind(augmented_X, Phi_X, backend=self.backend)
661                    if self.direct_link is True
662                    else Phi_X
663                )
664                self.scaler_, scaled_Z = mo.scale_covariates(
665                    Z, choice=self.type_scaling[0], scaler=self.scaler_
666                )
667            else:  # no hidden layer
668                Z = augmented_X
669                self.scaler_, scaled_Z = mo.scale_covariates(
670                    Z, choice=self.type_scaling[0], scaler=self.scaler_
671                )
672
673        # Returning model inputs -----
674        if mx.is_factor(y) is False:  # regression
675            # center y
676            if y is None:
677                self.y_mean_, centered_y = mo.center_response(self.y_)
678            else:
679                self.y_mean_, centered_y = mo.center_response(y)
680
681            # y is subsampled
682            if self.row_sample < 1:
683                n, p = Z.shape
684
685                self.subsampler_ = (
686                    SubSampler(
687                        y=self.y_, row_sample=self.row_sample, seed=self.seed
688                    )
689                    if y is None
690                    else SubSampler(
691                        y=y, row_sample=self.row_sample, seed=self.seed
692                    )
693                )
694
695                self.index_row_ = self.subsampler_.subsample()
696
697                n_row_sample = len(self.index_row_)
698                # regression
699                return (
700                    centered_y[self.index_row_].reshape(n_row_sample),
701                    self.scaler_.transform(
702                        Z[self.index_row_, :].reshape(n_row_sample, p)
703                    ),
704                )
705            # y is not subsampled
706            # regression
707            return (centered_y, self.scaler_.transform(Z))
708
709        # classification
710        # y is subsampled
711        if self.row_sample < 1:
712            n, p = Z.shape
713
714            self.subsampler_ = (
715                SubSampler(
716                    y=self.y_, row_sample=self.row_sample, seed=self.seed
717                )
718                if y is None
719                else SubSampler(y=y, row_sample=self.row_sample, seed=self.seed)
720            )
721
722            self.index_row_ = self.subsampler_.subsample()
723
724            n_row_sample = len(self.index_row_)
725            # classification
726            return (
727                y[self.index_row_].reshape(n_row_sample),
728                self.scaler_.transform(
729                    Z[self.index_row_, :].reshape(n_row_sample, p)
730                ),
731            )
732        # y is not subsampled
733        # classification
734        return (y, self.scaler_.transform(Z))

Create new hidden features for training set, with hidden layer, center the response.

Parameters:

y: array-like, shape = [n_samples]
    Target values

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features

W: {array-like}, shape = [n_features, hidden_features]
    if provided, constructs the hidden layer via W

Returns:

(centered response, direct link + hidden layer matrix): {tuple}
def cook_test_set(self, X, **kwargs):
736    def cook_test_set(self, X, **kwargs):
737        """Transform data from test set, with hidden layer.
738
739        Parameters:
740
741            X: {array-like}, shape = [n_samples, n_features]
742                Training vectors, where n_samples is the number
743                of samples and n_features is the number of features
744
745            **kwargs: additional parameters to be passed to self.encode_cluster
746
747        Returns:
748
749            Transformed test set : {array-like}
750        """
751
752        if isinstance(X, pd.DataFrame):
753            X = copy.deepcopy(X.values.astype(float))
754
755        if len(X.shape) == 1:
756            X = X.reshape(1, -1)
757
758        if (
759            self.n_clusters == 0
760        ):  # data without clustering: self.n_clusters is None -----
761            if self.n_hidden_features > 0:
762                # if hidden layer
763                scaled_X = (
764                    self.nn_scaler_.transform(X)
765                    if (self.col_sample == 1)
766                    else self.nn_scaler_.transform(X[:, self.index_col_])
767                )
768                Phi_X = self.create_layer(scaled_X, self.W_)
769                if self.direct_link:
770                    return self.scaler_.transform(
771                        mo.cbind(scaled_X, Phi_X, backend=self.backend)
772                    )
773                # when self.direct_link == False
774                return self.scaler_.transform(Phi_X)
775            # if no hidden layer # self.n_hidden_features == 0
776            return self.scaler_.transform(X)
777
778        # data with clustering: self.n_clusters > 0 -----
779        if self.col_sample == 1:
780            predicted_clusters = self.encode_clusters(
781                X=X, predict=True, **kwargs
782            )
783            augmented_X = mo.cbind(X, predicted_clusters, backend=self.backend)
784        else:
785            predicted_clusters = self.encode_clusters(
786                X=X[:, self.index_col_], predict=True, **kwargs
787            )
788            augmented_X = mo.cbind(
789                X[:, self.index_col_], predicted_clusters, backend=self.backend
790            )
791
792        if self.n_hidden_features > 0:  # if hidden layer
793            scaled_X = self.nn_scaler_.transform(augmented_X)
794            Phi_X = self.create_layer(scaled_X, self.W_)
795            if self.direct_link:
796                return self.scaler_.transform(
797                    mo.cbind(augmented_X, Phi_X, backend=self.backend)
798                )
799            return self.scaler_.transform(Phi_X)
800
801        # if no hidden layer
802        return self.scaler_.transform(augmented_X)

Transform data from test set, with hidden layer.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features

**kwargs: additional parameters to be passed to self.encode_cluster

Returns:

Transformed test set : {array-like}
class BaseRegressor(nnetsauce.Base, sklearn.base.RegressorMixin):
 15class BaseRegressor(Base, RegressorMixin):
 16    """Random Vector Functional Link Network regression without shrinkage
 17
 18    Parameters:
 19
 20        n_hidden_features: int
 21            number of nodes in the hidden layer
 22
 23        activation_name: str
 24            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
 25
 26        a: float
 27            hyperparameter for 'prelu' or 'elu' activation function
 28
 29        nodes_sim: str
 30            type of simulation for hidden layer nodes: 'sobol', 'hammersley', 'halton',
 31            'uniform'
 32
 33        bias: boolean
 34            indicates if the hidden layer contains a bias term (True) or
 35            not (False)
 36
 37        dropout: float
 38            regularization parameter; (random) percentage of nodes dropped out
 39            of the training
 40
 41        direct_link: boolean
 42            indicates if the original features are included (True) in model's
 43            fitting or not (False)
 44
 45        n_clusters: int
 46            number of clusters for type_clust='kmeans' or type_clust='gmm'
 47            clustering (could be 0: no clustering)
 48
 49        cluster_encode: bool
 50            defines how the variable containing clusters is treated (default is one-hot);
 51            if `False`, then labels are used, without one-hot encoding
 52
 53        type_clust: str
 54            type of clustering method: currently k-means ('kmeans') or Gaussian
 55            Mixture Model ('gmm')
 56
 57        type_scaling: a tuple of 3 strings
 58            scaling methods for inputs, hidden layer, and clustering respectively
 59            (and when relevant).
 60            Currently available: standardization ('std') or MinMax scaling ('minmax')
 61
 62        col_sample: float
 63            percentage of features randomly chosen for training
 64
 65        row_sample: float
 66            percentage of rows chosen for training, by stratified bootstrapping
 67
 68        seed: int
 69            reproducibility seed for nodes_sim=='uniform', clustering and dropout
 70
 71        backend: str
 72            "cpu" or "gpu" or "tpu"
 73
 74    Attributes:
 75
 76        beta_: vector
 77            regression coefficients
 78
 79        GCV_: float
 80            Generalized Cross-Validation error
 81
 82    """
 83
 84    # construct the object -----
 85
 86    def __init__(
 87        self,
 88        n_hidden_features=5,
 89        activation_name="relu",
 90        a=0.01,
 91        nodes_sim="sobol",
 92        bias=True,
 93        dropout=0,
 94        direct_link=True,
 95        n_clusters=2,
 96        cluster_encode=True,
 97        type_clust="kmeans",
 98        type_scaling=("std", "std", "std"),
 99        col_sample=1,
100        row_sample=1,
101        seed=123,
102        backend="cpu",
103    ):
104        super().__init__(
105            n_hidden_features=n_hidden_features,
106            activation_name=activation_name,
107            a=a,
108            nodes_sim=nodes_sim,
109            bias=bias,
110            dropout=dropout,
111            direct_link=direct_link,
112            n_clusters=n_clusters,
113            cluster_encode=cluster_encode,
114            type_clust=type_clust,
115            type_scaling=type_scaling,
116            col_sample=col_sample,
117            row_sample=row_sample,
118            seed=seed,
119            backend=backend,
120        )
121
122    def fit(self, X, y, **kwargs):
123        """Fit BaseRegressor to training data (X, y)
124
125        Parameters:
126
127            X: {array-like}, shape = [n_samples, n_features]
128                Training vectors, where n_samples is the number
129                of samples and n_features is the number of features
130
131            y: array-like, shape = [n_samples]
132                Target values
133
134            **kwargs: additional parameters to be passed to self.cook_training_set
135
136        Returns:
137
138            self: object
139        """
140
141        centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
142
143        fit_obj = lmf.beta_Sigma_hat(
144            X=scaled_Z, y=centered_y, backend=self.backend
145        )
146
147        self.beta_ = fit_obj["beta_hat"]
148
149        self.GCV_ = fit_obj["GCV"]
150
151        return self
152
153    def predict(self, X, **kwargs):
154        """Predict test data X.
155
156        Parameters:
157
158            X: {array-like}, shape = [n_samples, n_features]
159                Training vectors, where n_samples is the number
160                of samples and n_features is the number of features
161
162            **kwargs: additional parameters to be passed to self.cook_test_set
163
164        Returns:
165
166            model predictions: {array-like}
167        """
168
169        if len(X.shape) == 1:
170            n_features = X.shape[0]
171            new_X = mo.rbind(
172                X.reshape(1, n_features),
173                np.ones(n_features).reshape(1, n_features),
174            )
175
176            return (
177                self.y_mean_
178                + mo.safe_sparse_dot(
179                    a=self.cook_test_set(new_X, **kwargs),
180                    b=self.beta_,
181                    backend=self.backend,
182                )
183            )[0]
184
185        return self.y_mean_ + mo.safe_sparse_dot(
186            a=self.cook_test_set(X, **kwargs),
187            b=self.beta_,
188            backend=self.backend,
189        )

Random Vector Functional Link Network regression without shrinkage

Parameters:

n_hidden_features: int
    number of nodes in the hidden layer

activation_name: str
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'

a: float
    hyperparameter for 'prelu' or 'elu' activation function

nodes_sim: str
    type of simulation for hidden layer nodes: 'sobol', 'hammersley', 'halton',
    'uniform'

bias: boolean
    indicates if the hidden layer contains a bias term (True) or
    not (False)

dropout: float
    regularization parameter; (random) percentage of nodes dropped out
    of the training

direct_link: boolean
    indicates if the original features are included (True) in model's
    fitting or not (False)

n_clusters: int
    number of clusters for type_clust='kmeans' or type_clust='gmm'
    clustering (could be 0: no clustering)

cluster_encode: bool
    defines how the variable containing clusters is treated (default is one-hot);
    if `False`, then labels are used, without one-hot encoding

type_clust: str
    type of clustering method: currently k-means ('kmeans') or Gaussian
    Mixture Model ('gmm')

type_scaling: a tuple of 3 strings
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax')

col_sample: float
    percentage of features randomly chosen for training

row_sample: float
    percentage of rows chosen for training, by stratified bootstrapping

seed: int
    reproducibility seed for nodes_sim=='uniform', clustering and dropout

backend: str
    "cpu" or "gpu" or "tpu"

Attributes:

beta_: vector
    regression coefficients

GCV_: float
    Generalized Cross-Validation error
def fit(self, X, y, **kwargs):
122    def fit(self, X, y, **kwargs):
123        """Fit BaseRegressor to training data (X, y)
124
125        Parameters:
126
127            X: {array-like}, shape = [n_samples, n_features]
128                Training vectors, where n_samples is the number
129                of samples and n_features is the number of features
130
131            y: array-like, shape = [n_samples]
132                Target values
133
134            **kwargs: additional parameters to be passed to self.cook_training_set
135
136        Returns:
137
138            self: object
139        """
140
141        centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
142
143        fit_obj = lmf.beta_Sigma_hat(
144            X=scaled_Z, y=centered_y, backend=self.backend
145        )
146
147        self.beta_ = fit_obj["beta_hat"]
148
149        self.GCV_ = fit_obj["GCV"]
150
151        return self

Fit BaseRegressor to training data (X, y)

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features

y: array-like, shape = [n_samples]
    Target values

**kwargs: additional parameters to be passed to self.cook_training_set

Returns:

self: object
def predict(self, X, **kwargs):
153    def predict(self, X, **kwargs):
154        """Predict test data X.
155
156        Parameters:
157
158            X: {array-like}, shape = [n_samples, n_features]
159                Training vectors, where n_samples is the number
160                of samples and n_features is the number of features
161
162            **kwargs: additional parameters to be passed to self.cook_test_set
163
164        Returns:
165
166            model predictions: {array-like}
167        """
168
169        if len(X.shape) == 1:
170            n_features = X.shape[0]
171            new_X = mo.rbind(
172                X.reshape(1, n_features),
173                np.ones(n_features).reshape(1, n_features),
174            )
175
176            return (
177                self.y_mean_
178                + mo.safe_sparse_dot(
179                    a=self.cook_test_set(new_X, **kwargs),
180                    b=self.beta_,
181                    backend=self.backend,
182                )
183            )[0]
184
185        return self.y_mean_ + mo.safe_sparse_dot(
186            a=self.cook_test_set(X, **kwargs),
187            b=self.beta_,
188            backend=self.backend,
189        )

Predict test data X.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features

**kwargs: additional parameters to be passed to self.cook_test_set

Returns:

model predictions: {array-like}
class BayesianRVFLRegressor(nnetsauce.Base, sklearn.base.RegressorMixin):
 15class BayesianRVFLRegressor(Base, RegressorMixin):
 16    """Bayesian Random Vector Functional Link Network regression with one prior
 17
 18    Parameters:
 19
 20        n_hidden_features: int
 21            number of nodes in the hidden layer
 22
 23        activation_name: str
 24            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
 25
 26        a: float
 27            hyperparameter for 'prelu' or 'elu' activation function
 28
 29        nodes_sim: str
 30            type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 'uniform'
 31
 32        bias: boolean
 33            indicates if the hidden layer contains a bias term (True) or not (False)
 34
 35        dropout: float
 36            regularization parameter; (random) percentage of nodes dropped out
 37            of the training
 38
 39        direct_link: boolean
 40            indicates if the original features are included (True) in model''s fitting or not (False)
 41
 42        n_clusters: int
 43            number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering)
 44
 45        cluster_encode: bool
 46            defines how the variable containing clusters is treated (default is one-hot)
 47            if `False`, then labels are used, without one-hot encoding
 48
 49        type_clust: str
 50            type of clustering method: currently k-means ('kmeans') or Gaussian Mixture Model ('gmm')
 51
 52        type_scaling: a tuple of 3 strings
 53            scaling methods for inputs, hidden layer, and clustering respectively
 54            (and when relevant).
 55            Currently available: standardization ('std') or MinMax scaling ('minmax')
 56
 57        seed: int
 58            reproducibility seed for nodes_sim=='uniform'
 59
 60        s: float
 61            std. dev. of regression parameters in Bayesian Ridge Regression
 62
 63        sigma: float
 64            std. dev. of residuals in Bayesian Ridge Regression
 65
 66        return_std: boolean
 67            if True, uncertainty around predictions is evaluated
 68
 69        backend: str
 70            "cpu" or "gpu" or "tpu"
 71
 72    Attributes:
 73
 74        beta_: array-like
 75            regression''s coefficients
 76
 77        Sigma_: array-like
 78            covariance of the distribution of fitted parameters
 79
 80        GCV_: float
 81            Generalized cross-validation error
 82
 83        y_mean_: float
 84            average response
 85
 86    Examples:
 87
 88    ```python
 89    TBD
 90    ```
 91
 92    """
 93
 94    # construct the object -----
 95
 96    def __init__(
 97        self,
 98        n_hidden_features=5,
 99        activation_name="relu",
100        a=0.01,
101        nodes_sim="sobol",
102        bias=True,
103        dropout=0,
104        direct_link=True,
105        n_clusters=2,
106        cluster_encode=True,
107        type_clust="kmeans",
108        type_scaling=("std", "std", "std"),
109        seed=123,
110        s=0.1,
111        sigma=0.05,
112        return_std=True,
113        backend="cpu",
114    ):
115        super().__init__(
116            n_hidden_features=n_hidden_features,
117            activation_name=activation_name,
118            a=a,
119            nodes_sim=nodes_sim,
120            bias=bias,
121            dropout=dropout,
122            direct_link=direct_link,
123            n_clusters=n_clusters,
124            cluster_encode=cluster_encode,
125            type_clust=type_clust,
126            type_scaling=type_scaling,
127            seed=seed,
128            backend=backend,
129        )
130        self.s = s
131        self.sigma = sigma
132        self.beta_ = None
133        self.Sigma_ = None
134        self.GCV_ = None
135        self.return_std = return_std
136
137    def fit(self, X, y, **kwargs):
138        """Fit BayesianRVFLRegressor to training data (X, y).
139
140        Parameters:
141
142            X: {array-like}, shape = [n_samples, n_features]
143                Training vectors, where n_samples is the number
144                of samples and n_features is the number of features.
145
146            y: array-like, shape = [n_samples]
147                Target values.
148
149            **kwargs: additional parameters to be passed to
150                    self.cook_training_set
151
152        Returns:
153
154            self: object
155
156        """
157
158        centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
159
160        fit_obj = lmf.beta_Sigma_hat_rvfl(
161            X=scaled_Z,
162            y=centered_y,
163            s=self.s,
164            sigma=self.sigma,
165            fit_intercept=False,
166            return_cov=self.return_std,
167            backend=self.backend,
168        )
169
170        self.beta_ = fit_obj["beta_hat"]
171
172        if self.return_std == True:
173            self.Sigma_ = fit_obj["Sigma_hat"]
174
175        self.GCV_ = fit_obj["GCV"]
176
177        return self
178
179    def predict(self, X, return_std=False, **kwargs):
180        """Predict test data X.
181
182        Parameters:
183
184            X: {array-like}, shape = [n_samples, n_features]
185                Training vectors, where n_samples is the number
186                of samples and n_features is the number of features.
187
188            return_std: {boolean}, standard dev. is returned or not
189
190            **kwargs: additional parameters to be passed to
191                    self.cook_test_set
192
193        Returns:
194
195            model predictions: {array-like}
196
197        """
198
199        if len(X.shape) == 1:  # one observation in the test set only
200            n_features = X.shape[0]
201            new_X = mo.rbind(
202                x=X.reshape(1, n_features),
203                y=np.ones(n_features).reshape(1, n_features),
204                backend=self.backend,
205            )
206
207        self.return_std = return_std
208
209        if self.return_std == False:
210            if len(X.shape) == 1:
211                return (
212                    self.y_mean_
213                    + mo.safe_sparse_dot(
214                        a=self.cook_test_set(new_X, **kwargs),
215                        b=self.beta_,
216                        backend=self.backend,
217                    )
218                )[0]
219
220            return self.y_mean_ + mo.safe_sparse_dot(
221                a=self.cook_test_set(X, **kwargs),
222                b=self.beta_,
223                backend=self.backend,
224            )
225
226        else:  # confidence interval required for preds?
227            if len(X.shape) == 1:
228                Z = self.cook_test_set(new_X, **kwargs)
229
230                pred_obj = lmf.beta_Sigma_hat_rvfl(
231                    s=self.s,
232                    sigma=self.sigma,
233                    X_star=Z,
234                    return_cov=True,
235                    beta_hat_=self.beta_,
236                    Sigma_hat_=self.Sigma_,
237                    backend=self.backend,
238                )
239
240                return (
241                    self.y_mean_ + pred_obj["preds"][0],
242                    pred_obj["preds_std"][0],
243                )
244
245            Z = self.cook_test_set(X, **kwargs)
246
247            pred_obj = lmf.beta_Sigma_hat_rvfl(
248                s=self.s,
249                sigma=self.sigma,
250                X_star=Z,
251                return_cov=True,
252                beta_hat_=self.beta_,
253                Sigma_hat_=self.Sigma_,
254                backend=self.backend,
255            )
256
257            return (self.y_mean_ + pred_obj["preds"], pred_obj["preds_std"])

Bayesian Random Vector Functional Link Network regression with one prior

Parameters:

n_hidden_features: int
    number of nodes in the hidden layer

activation_name: str
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'

a: float
    hyperparameter for 'prelu' or 'elu' activation function

nodes_sim: str
    type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 'uniform'

bias: boolean
    indicates if the hidden layer contains a bias term (True) or not (False)

dropout: float
    regularization parameter; (random) percentage of nodes dropped out
    of the training

direct_link: boolean
    indicates if the original features are included (True) in model''s fitting or not (False)

n_clusters: int
    number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering)

cluster_encode: bool
    defines how the variable containing clusters is treated (default is one-hot)
    if `False`, then labels are used, without one-hot encoding

type_clust: str
    type of clustering method: currently k-means ('kmeans') or Gaussian Mixture Model ('gmm')

type_scaling: a tuple of 3 strings
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax')

seed: int
    reproducibility seed for nodes_sim=='uniform'

s: float
    std. dev. of regression parameters in Bayesian Ridge Regression

sigma: float
    std. dev. of residuals in Bayesian Ridge Regression

return_std: boolean
    if True, uncertainty around predictions is evaluated

backend: str
    "cpu" or "gpu" or "tpu"

Attributes:

beta_: array-like
    regression''s coefficients

Sigma_: array-like
    covariance of the distribution of fitted parameters

GCV_: float
    Generalized cross-validation error

y_mean_: float
    average response

Examples:

TBD
def fit(self, X, y, **kwargs):
137    def fit(self, X, y, **kwargs):
138        """Fit BayesianRVFLRegressor to training data (X, y).
139
140        Parameters:
141
142            X: {array-like}, shape = [n_samples, n_features]
143                Training vectors, where n_samples is the number
144                of samples and n_features is the number of features.
145
146            y: array-like, shape = [n_samples]
147                Target values.
148
149            **kwargs: additional parameters to be passed to
150                    self.cook_training_set
151
152        Returns:
153
154            self: object
155
156        """
157
158        centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
159
160        fit_obj = lmf.beta_Sigma_hat_rvfl(
161            X=scaled_Z,
162            y=centered_y,
163            s=self.s,
164            sigma=self.sigma,
165            fit_intercept=False,
166            return_cov=self.return_std,
167            backend=self.backend,
168        )
169
170        self.beta_ = fit_obj["beta_hat"]
171
172        if self.return_std == True:
173            self.Sigma_ = fit_obj["Sigma_hat"]
174
175        self.GCV_ = fit_obj["GCV"]
176
177        return self

Fit BayesianRVFLRegressor to training data (X, y).

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

**kwargs: additional parameters to be passed to
        self.cook_training_set

Returns:

self: object
def predict(self, X, return_std=False, **kwargs):
179    def predict(self, X, return_std=False, **kwargs):
180        """Predict test data X.
181
182        Parameters:
183
184            X: {array-like}, shape = [n_samples, n_features]
185                Training vectors, where n_samples is the number
186                of samples and n_features is the number of features.
187
188            return_std: {boolean}, standard dev. is returned or not
189
190            **kwargs: additional parameters to be passed to
191                    self.cook_test_set
192
193        Returns:
194
195            model predictions: {array-like}
196
197        """
198
199        if len(X.shape) == 1:  # one observation in the test set only
200            n_features = X.shape[0]
201            new_X = mo.rbind(
202                x=X.reshape(1, n_features),
203                y=np.ones(n_features).reshape(1, n_features),
204                backend=self.backend,
205            )
206
207        self.return_std = return_std
208
209        if self.return_std == False:
210            if len(X.shape) == 1:
211                return (
212                    self.y_mean_
213                    + mo.safe_sparse_dot(
214                        a=self.cook_test_set(new_X, **kwargs),
215                        b=self.beta_,
216                        backend=self.backend,
217                    )
218                )[0]
219
220            return self.y_mean_ + mo.safe_sparse_dot(
221                a=self.cook_test_set(X, **kwargs),
222                b=self.beta_,
223                backend=self.backend,
224            )
225
226        else:  # confidence interval required for preds?
227            if len(X.shape) == 1:
228                Z = self.cook_test_set(new_X, **kwargs)
229
230                pred_obj = lmf.beta_Sigma_hat_rvfl(
231                    s=self.s,
232                    sigma=self.sigma,
233                    X_star=Z,
234                    return_cov=True,
235                    beta_hat_=self.beta_,
236                    Sigma_hat_=self.Sigma_,
237                    backend=self.backend,
238                )
239
240                return (
241                    self.y_mean_ + pred_obj["preds"][0],
242                    pred_obj["preds_std"][0],
243                )
244
245            Z = self.cook_test_set(X, **kwargs)
246
247            pred_obj = lmf.beta_Sigma_hat_rvfl(
248                s=self.s,
249                sigma=self.sigma,
250                X_star=Z,
251                return_cov=True,
252                beta_hat_=self.beta_,
253                Sigma_hat_=self.Sigma_,
254                backend=self.backend,
255            )
256
257            return (self.y_mean_ + pred_obj["preds"], pred_obj["preds_std"])

Predict test data X.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

return_std: {boolean}, standard dev. is returned or not

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

model predictions: {array-like}
class BayesianRVFL2Regressor(nnetsauce.Base, sklearn.base.RegressorMixin):
 15class BayesianRVFL2Regressor(Base, RegressorMixin):
 16    """Bayesian Random Vector Functional Link Network regression with two priors
 17
 18    Parameters:
 19
 20        n_hidden_features: int
 21            number of nodes in the hidden layer
 22
 23        activation_name: str
 24            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
 25
 26        a: float
 27            hyperparameter for 'prelu' or 'elu' activation function
 28
 29        nodes_sim: str
 30            type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 'uniform'
 31
 32        bias: boolean
 33            indicates if the hidden layer contains a bias term (True) or not (False)
 34
 35        dropout: float
 36            regularization parameter; (random) percentage of nodes dropped out
 37            of the training
 38
 39        direct_link: boolean
 40            indicates if the original features are included (True) in model''s fitting or not (False)
 41
 42        n_clusters: int
 43            number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering)
 44
 45        cluster_encode: bool
 46            defines how the variable containing clusters is treated (default is one-hot)
 47            if `False`, then labels are used, without one-hot encoding
 48
 49        type_clust: str
 50            type of clustering method: currently k-means ('kmeans') or Gaussian Mixture Model ('gmm')
 51
 52        type_scaling: a tuple of 3 strings
 53            scaling methods for inputs, hidden layer, and clustering respectively
 54            (and when relevant).
 55            Currently available: standardization ('std') or MinMax scaling ('minmax')
 56
 57        seed: int
 58            reproducibility seed for nodes_sim=='uniform'
 59
 60        s1: float
 61            std. dev. of init. regression parameters in Bayesian Ridge Regression
 62
 63        s2: float
 64            std. dev. of augmented regression parameters in Bayesian Ridge Regression
 65
 66        sigma: float
 67            std. dev. of residuals in Bayesian Ridge Regression
 68
 69        return_std: boolean
 70            if True, uncertainty around predictions is evaluated
 71
 72        backend: str
 73            "cpu" or "gpu" or "tpu"
 74
 75    Attributes:
 76
 77        beta_: array-like
 78            regression''s coefficients
 79
 80        Sigma_: array-like
 81            covariance of the distribution of fitted parameters
 82
 83        GCV_: float
 84            Generalized cross-validation error
 85
 86        y_mean_: float
 87            average response
 88
 89    Examples:
 90
 91    ```python
 92    TBD
 93    ```
 94
 95    """
 96
 97    # construct the object -----
 98
 99    def __init__(
100        self,
101        n_hidden_features=5,
102        activation_name="relu",
103        a=0.01,
104        nodes_sim="sobol",
105        bias=True,
106        dropout=0,
107        direct_link=True,
108        n_clusters=0,
109        cluster_encode=True,
110        type_clust="kmeans",
111        type_scaling=("std", "std", "std"),
112        seed=123,
113        s1=0.1,
114        s2=0.1,
115        sigma=0.05,
116        return_std=True,
117        backend="cpu",
118    ):
119        super().__init__(
120            n_hidden_features=n_hidden_features,
121            activation_name=activation_name,
122            a=a,
123            nodes_sim=nodes_sim,
124            bias=bias,
125            dropout=dropout,
126            direct_link=direct_link,
127            n_clusters=n_clusters,
128            cluster_encode=cluster_encode,
129            type_clust=type_clust,
130            type_scaling=type_scaling,
131            seed=seed,
132            backend=backend,
133        )
134
135        self.s1 = s1
136        self.s2 = s2
137        self.sigma = sigma
138        self.beta_ = None
139        self.Sigma_ = None
140        self.GCV_ = None
141        self.return_std = return_std
142
143    def fit(self, X, y, **kwargs):
144        """Fit BayesianRVFL2Regressor to training data (X, y)
145
146        Parameters:
147
148            X: {array-like}, shape = [n_samples, n_features]
149                Training vectors, where n_samples is the number
150                of samples and n_features is the number of features
151
152            y: array-like, shape = [n_samples]
153                Target values
154
155            **kwargs: additional parameters to be passed to
156                    self.cook_training_set
157
158        Returns:
159
160            self: object
161
162        """
163
164        centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
165
166        n, p = X.shape
167        q = self.n_hidden_features
168
169        if self.direct_link == True:
170            r = p + self.n_clusters
171
172            block11 = (self.s1**2) * np.eye(r)
173            block12 = np.zeros((r, q))
174            block21 = np.zeros((q, r))
175            block22 = (self.s2**2) * np.eye(q)
176
177            Sigma_prior = mo.rbind(
178                x=mo.cbind(x=block11, y=block12, backend=self.backend),
179                y=mo.cbind(x=block21, y=block22, backend=self.backend),
180                backend=self.backend,
181            )
182
183        else:
184            Sigma_prior = (self.s2**2) * np.eye(q)
185
186        fit_obj = lmf.beta_Sigma_hat_rvfl2(
187            X=scaled_Z,
188            y=centered_y,
189            Sigma=Sigma_prior,
190            sigma=self.sigma,
191            fit_intercept=False,
192            return_cov=self.return_std,
193            backend=self.backend,
194        )
195
196        self.beta_ = fit_obj["beta_hat"]
197
198        if self.return_std == True:
199            self.Sigma_ = fit_obj["Sigma_hat"]
200
201        self.GCV_ = fit_obj["GCV"]
202
203        return self
204
205    def predict(self, X, return_std=False, **kwargs):
206        """Predict test data X.
207
208        Parameters:
209
210            X: {array-like}, shape = [n_samples, n_features]
211                Training vectors, where n_samples is the number
212                of samples and n_features is the number of features.
213
214            return_std: {boolean}, standard dev. is returned or not
215
216            **kwargs: additional parameters to be passed to
217                    self.cook_test_set
218
219        Returns:
220
221            model predictions: {array-like}
222
223        """
224
225        if len(X.shape) == 1:  # one observation in the test set only
226            n_features = X.shape[0]
227            new_X = mo.rbind(
228                x=X.reshape(1, n_features),
229                y=np.ones(n_features).reshape(1, n_features),
230                backend=self.backend,
231            )
232
233        self.return_std = return_std
234
235        if self.return_std == False:
236            if len(X.shape) == 1:
237                return (
238                    self.y_mean_
239                    + mo.safe_sparse_dot(
240                        self.cook_test_set(new_X, **kwargs),
241                        self.beta_,
242                        backend=self.backend,
243                    )
244                )[0]
245
246            return self.y_mean_ + mo.safe_sparse_dot(
247                self.cook_test_set(X, **kwargs),
248                self.beta_,
249                backend=self.backend,
250            )
251
252        else:  # confidence interval required for preds?
253            if len(X.shape) == 1:
254                Z = self.cook_test_set(new_X, **kwargs)
255
256                pred_obj = lmf.beta_Sigma_hat_rvfl2(
257                    X_star=Z,
258                    return_cov=self.return_std,
259                    beta_hat_=self.beta_,
260                    Sigma_hat_=self.Sigma_,
261                    backend=self.backend,
262                )
263
264                return (
265                    self.y_mean_ + pred_obj["preds"][0],
266                    pred_obj["preds_std"][0],
267                )
268
269            Z = self.cook_test_set(X, **kwargs)
270
271            pred_obj = lmf.beta_Sigma_hat_rvfl2(
272                X_star=Z,
273                return_cov=self.return_std,
274                beta_hat_=self.beta_,
275                Sigma_hat_=self.Sigma_,
276                backend=self.backend,
277            )
278
279            return (self.y_mean_ + pred_obj["preds"], pred_obj["preds_std"])

Bayesian Random Vector Functional Link Network regression with two priors

Parameters:

n_hidden_features: int
    number of nodes in the hidden layer

activation_name: str
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'

a: float
    hyperparameter for 'prelu' or 'elu' activation function

nodes_sim: str
    type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 'uniform'

bias: boolean
    indicates if the hidden layer contains a bias term (True) or not (False)

dropout: float
    regularization parameter; (random) percentage of nodes dropped out
    of the training

direct_link: boolean
    indicates if the original features are included (True) in model''s fitting or not (False)

n_clusters: int
    number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering)

cluster_encode: bool
    defines how the variable containing clusters is treated (default is one-hot)
    if `False`, then labels are used, without one-hot encoding

type_clust: str
    type of clustering method: currently k-means ('kmeans') or Gaussian Mixture Model ('gmm')

type_scaling: a tuple of 3 strings
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax')

seed: int
    reproducibility seed for nodes_sim=='uniform'

s1: float
    std. dev. of init. regression parameters in Bayesian Ridge Regression

s2: float
    std. dev. of augmented regression parameters in Bayesian Ridge Regression

sigma: float
    std. dev. of residuals in Bayesian Ridge Regression

return_std: boolean
    if True, uncertainty around predictions is evaluated

backend: str
    "cpu" or "gpu" or "tpu"

Attributes:

beta_: array-like
    regression''s coefficients

Sigma_: array-like
    covariance of the distribution of fitted parameters

GCV_: float
    Generalized cross-validation error

y_mean_: float
    average response

Examples:

TBD
def fit(self, X, y, **kwargs):
143    def fit(self, X, y, **kwargs):
144        """Fit BayesianRVFL2Regressor to training data (X, y)
145
146        Parameters:
147
148            X: {array-like}, shape = [n_samples, n_features]
149                Training vectors, where n_samples is the number
150                of samples and n_features is the number of features
151
152            y: array-like, shape = [n_samples]
153                Target values
154
155            **kwargs: additional parameters to be passed to
156                    self.cook_training_set
157
158        Returns:
159
160            self: object
161
162        """
163
164        centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
165
166        n, p = X.shape
167        q = self.n_hidden_features
168
169        if self.direct_link == True:
170            r = p + self.n_clusters
171
172            block11 = (self.s1**2) * np.eye(r)
173            block12 = np.zeros((r, q))
174            block21 = np.zeros((q, r))
175            block22 = (self.s2**2) * np.eye(q)
176
177            Sigma_prior = mo.rbind(
178                x=mo.cbind(x=block11, y=block12, backend=self.backend),
179                y=mo.cbind(x=block21, y=block22, backend=self.backend),
180                backend=self.backend,
181            )
182
183        else:
184            Sigma_prior = (self.s2**2) * np.eye(q)
185
186        fit_obj = lmf.beta_Sigma_hat_rvfl2(
187            X=scaled_Z,
188            y=centered_y,
189            Sigma=Sigma_prior,
190            sigma=self.sigma,
191            fit_intercept=False,
192            return_cov=self.return_std,
193            backend=self.backend,
194        )
195
196        self.beta_ = fit_obj["beta_hat"]
197
198        if self.return_std == True:
199            self.Sigma_ = fit_obj["Sigma_hat"]
200
201        self.GCV_ = fit_obj["GCV"]
202
203        return self

Fit BayesianRVFL2Regressor to training data (X, y)

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features

y: array-like, shape = [n_samples]
    Target values

**kwargs: additional parameters to be passed to
        self.cook_training_set

Returns:

self: object
def predict(self, X, return_std=False, **kwargs):
205    def predict(self, X, return_std=False, **kwargs):
206        """Predict test data X.
207
208        Parameters:
209
210            X: {array-like}, shape = [n_samples, n_features]
211                Training vectors, where n_samples is the number
212                of samples and n_features is the number of features.
213
214            return_std: {boolean}, standard dev. is returned or not
215
216            **kwargs: additional parameters to be passed to
217                    self.cook_test_set
218
219        Returns:
220
221            model predictions: {array-like}
222
223        """
224
225        if len(X.shape) == 1:  # one observation in the test set only
226            n_features = X.shape[0]
227            new_X = mo.rbind(
228                x=X.reshape(1, n_features),
229                y=np.ones(n_features).reshape(1, n_features),
230                backend=self.backend,
231            )
232
233        self.return_std = return_std
234
235        if self.return_std == False:
236            if len(X.shape) == 1:
237                return (
238                    self.y_mean_
239                    + mo.safe_sparse_dot(
240                        self.cook_test_set(new_X, **kwargs),
241                        self.beta_,
242                        backend=self.backend,
243                    )
244                )[0]
245
246            return self.y_mean_ + mo.safe_sparse_dot(
247                self.cook_test_set(X, **kwargs),
248                self.beta_,
249                backend=self.backend,
250            )
251
252        else:  # confidence interval required for preds?
253            if len(X.shape) == 1:
254                Z = self.cook_test_set(new_X, **kwargs)
255
256                pred_obj = lmf.beta_Sigma_hat_rvfl2(
257                    X_star=Z,
258                    return_cov=self.return_std,
259                    beta_hat_=self.beta_,
260                    Sigma_hat_=self.Sigma_,
261                    backend=self.backend,
262                )
263
264                return (
265                    self.y_mean_ + pred_obj["preds"][0],
266                    pred_obj["preds_std"][0],
267                )
268
269            Z = self.cook_test_set(X, **kwargs)
270
271            pred_obj = lmf.beta_Sigma_hat_rvfl2(
272                X_star=Z,
273                return_cov=self.return_std,
274                beta_hat_=self.beta_,
275                Sigma_hat_=self.Sigma_,
276                backend=self.backend,
277            )
278
279            return (self.y_mean_ + pred_obj["preds"], pred_obj["preds_std"])

Predict test data X.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

return_std: {boolean}, standard dev. is returned or not

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

model predictions: {array-like}
class ClassicalMTS(nnetsauce.Base):
 42class ClassicalMTS(Base):
 43    """Multivariate time series (FactorMTS) forecasting with Factor models
 44
 45    Parameters:
 46
 47        model: type of model: str.
 48            currently, 'VAR', 'VECM', 'ARIMA', 'ETS', 'Theta'
 49
 50    Attributes:
 51
 52        df_: data frame
 53            the input data frame, in case a data.frame is provided to `fit`
 54
 55        level_: int
 56            level of confidence for prediction intervals (default is 95)
 57
 58    Examples:
 59    See examples/classical_mts_timeseries.py
 60    """
 61
 62    # construct the object -----
 63
 64    def __init__(self, model="VAR"):
 65
 66        self.model = model
 67        if self.model == "VAR":
 68            self.obj = VAR
 69        elif self.model == "VECM":
 70            self.obj = VECM
 71        elif self.model == "ARIMA":
 72            self.obj = ARIMA
 73        elif self.model == "ETS":
 74            self.obj = ExponentialSmoothing
 75        elif self.model == "Theta":
 76            self.obj = ThetaModel
 77        else:
 78            raise ValueError("model not recognized")
 79        self.n_series = None
 80        self.replications = None
 81        self.mean_ = None
 82        self.upper_ = None
 83        self.lower_ = None
 84        self.output_dates_ = None
 85        self.alpha_ = None
 86        self.df_ = None
 87        self.residuals_ = []
 88        self.sims_ = None
 89        self.level_ = None
 90
 91    def fit(self, X, **kwargs):
 92        """Fit FactorMTS model to training data X, with optional regressors xreg
 93
 94        Parameters:
 95
 96        X: {array-like}, shape = [n_samples, n_features]
 97            Training time series, where n_samples is the number
 98            of samples and n_features is the number of features;
 99            X must be in increasing order (most recent observations last)
100
101        **kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity)
102
103        Returns:
104
105        self: object
106        """
107
108        try:
109            self.n_series = X.shape[1]
110        except Exception:
111            self.n_series = 1
112
113        if (isinstance(X, pd.DataFrame) is False) and isinstance(
114            X, pd.Series
115        ) is False:  # input data set is a numpy array
116
117            X = pd.DataFrame(X)
118            if self.n_series > 1:
119                self.series_names = [
120                    "series" + str(i) for i in range(X.shape[1])
121                ]
122            else:
123                self.series_names = "series0"
124
125        else:  # input data set is a DataFrame or Series with column names
126
127            X_index = None
128            if X.index is not None and len(X.shape) > 1:
129                X_index = X.index
130                X = copy.deepcopy(mo.convert_df_to_numeric(X))
131            if X_index is not None:
132                try:
133                    X.index = X_index
134                except Exception:
135                    pass
136            if isinstance(X, pd.DataFrame):
137                self.series_names = X.columns.tolist()
138            else:
139                self.series_names = X.name
140
141        if isinstance(X, pd.DataFrame) or isinstance(X, pd.Series):
142            self.df_ = X
143            X = X.values
144            self.df_.columns = self.series_names
145            self.input_dates = ts.compute_input_dates(self.df_)
146        else:
147            self.df_ = pd.DataFrame(X, columns=self.series_names)
148
149        if self.model == "Theta":
150            self.obj = self.obj(self.df_, **kwargs).fit()
151        else:
152            self.obj = self.obj(X, **kwargs).fit(**kwargs)
153
154        return self
155
156    def predict(self, h=5, level=95, **kwargs):
157        """Forecast all the time series, h steps ahead
158
159        Parameters:
160
161        h: {integer}
162            Forecasting horizon
163
164        **kwargs: additional parameters to be passed to
165                self.cook_test_set
166
167        Returns:
168
169        model predictions for horizon = h: {array-like}
170
171        """
172
173        self.output_dates_, frequency = ts.compute_output_dates(self.df_, h)
174
175        self.level_ = level
176
177        self.lower_ = None  # do not remove (/!\)
178
179        self.upper_ = None  # do not remove (/!\)
180
181        self.sims_ = None  # do not remove (/!\)
182
183        self.level_ = level
184
185        self.alpha_ = 100 - level
186
187        pi_multiplier = norm.ppf(1 - self.alpha_ / 200)
188
189        # Named tuple for forecast results
190        DescribeResult = namedtuple(
191            "DescribeResult", ("mean", "lower", "upper")
192        )
193
194        if self.model == "VAR":
195            mean_forecast, lower_bound, upper_bound = (
196                self.obj.forecast_interval(
197                    self.obj.endog, steps=h, alpha=self.alpha_ / 100, **kwargs
198                )
199            )
200
201        elif self.model == "VECM":
202            forecast_result = self.obj.predict(steps=h)
203            mean_forecast = forecast_result
204            lower_bound, upper_bound = self._compute_confidence_intervals(
205                forecast_result, alpha=self.alpha_ / 100, **kwargs
206            )
207
208        elif self.model == "ARIMA":
209            forecast_result = self.obj.get_forecast(steps=h)
210            mean_forecast = forecast_result.predicted_mean
211            lower_bound = forecast_result.conf_int()[:, 0]
212            upper_bound = forecast_result.conf_int()[:, 1]
213
214        elif self.model == "ETS":
215            forecast_result = self.obj.forecast(steps=h)
216            residuals = self.obj.resid
217            std_errors = np.std(residuals)
218            mean_forecast = forecast_result
219            lower_bound = forecast_result - pi_multiplier * std_errors
220            upper_bound = forecast_result + pi_multiplier * std_errors
221
222        elif self.model == "Theta":
223            try:
224                mean_forecast = self.obj.forecast(steps=h).values
225                forecast_result = self.obj.prediction_intervals(
226                    steps=h, alpha=self.alpha_ / 100, **kwargs
227                )
228                lower_bound = forecast_result["lower"].values
229                upper_bound = forecast_result["upper"].values
230            except Exception:
231                mean_forecast = self.obj.forecast(steps=h)
232                forecast_result = self.obj.prediction_intervals(
233                    steps=h, alpha=self.alpha_ / 100, **kwargs
234                )
235                lower_bound = forecast_result["lower"]
236                upper_bound = forecast_result["upper"]
237
238        else:
239
240            raise ValueError("model not recognized")
241
242        try:
243            self.mean_ = pd.DataFrame(
244                mean_forecast,
245                columns=self.series_names,
246                index=self.output_dates_,
247            )
248            self.lower_ = pd.DataFrame(
249                lower_bound, columns=self.series_names, index=self.output_dates_
250            )
251            self.upper_ = pd.DataFrame(
252                upper_bound, columns=self.series_names, index=self.output_dates_
253            )
254        except Exception:
255            self.mean_ = pd.Series(
256                mean_forecast, name=self.series_names, index=self.output_dates_
257            )
258            self.lower_ = pd.Series(
259                lower_bound, name=self.series_names, index=self.output_dates_
260            )
261            self.upper_ = pd.Series(
262                upper_bound, name=self.series_names, index=self.output_dates_
263            )
264
265        return DescribeResult(
266            mean=self.mean_, lower=self.lower_, upper=self.upper_
267        )
268
269    def _compute_confidence_intervals(self, forecast_result, alpha):
270        """
271        Compute confidence intervals for VECM forecasts.
272        Uses the covariance of residuals to approximate the confidence intervals.
273        """
274        residuals = self.obj.resid
275        cov_matrix = np.cov(residuals.T)  # Covariance matrix of residuals
276        std_errors = np.sqrt(np.diag(cov_matrix))  # Standard errors
277
278        z_value = norm.ppf(1 - alpha / 2)  # Z-score for the given alpha level
279        lower_bound = forecast_result - z_value * std_errors
280        upper_bound = forecast_result + z_value * std_errors
281
282        return lower_bound, upper_bound
283
284    def score(self, X, training_index, testing_index, scoring=None, **kwargs):
285        """Train on training_index, score on testing_index."""
286
287        assert (
288            bool(set(training_index).intersection(set(testing_index))) == False
289        ), "Non-overlapping 'training_index' and 'testing_index' required"
290
291        # Dimensions
292        try:
293            # multivariate time series
294            n, p = X.shape
295        except:
296            # univariate time series
297            n = X.shape[0]
298            p = 1
299
300        # Training and testing sets
301        if p > 1:
302            X_train = X[training_index, :]
303            X_test = X[testing_index, :]
304        else:
305            X_train = X[training_index]
306            X_test = X[testing_index]
307
308        # Horizon
309        h = len(testing_index)
310        assert (
311            len(training_index) + h
312        ) <= n, "Please check lengths of training and testing windows"
313
314        # Fit and predict
315        self.fit(X_train, **kwargs)
316        preds = self.predict(h=h, **kwargs)
317
318        if scoring is None:
319            scoring = "neg_root_mean_squared_error"
320
321        # check inputs
322        assert scoring in (
323            "explained_variance",
324            "neg_mean_absolute_error",
325            "neg_mean_squared_error",
326            "neg_root_mean_squared_error",
327            "neg_mean_squared_log_error",
328            "neg_median_absolute_error",
329            "r2",
330        ), "'scoring' should be in ('explained_variance', 'neg_mean_absolute_error', \
331                               'neg_mean_squared_error', 'neg_root_mean_squared_error', 'neg_mean_squared_log_error', \
332                               'neg_median_absolute_error', 'r2')"
333
334        scoring_options = {
335            "explained_variance": skm2.explained_variance_score,
336            "neg_mean_absolute_error": skm2.mean_absolute_error,
337            "neg_mean_squared_error": lambda x, y: np.mean((x - y) ** 2),
338            "neg_root_mean_squared_error": lambda x, y: np.sqrt(
339                np.mean((x - y) ** 2)
340            ),
341            "neg_mean_squared_log_error": skm2.mean_squared_log_error,
342            "neg_median_absolute_error": skm2.median_absolute_error,
343            "r2": skm2.r2_score,
344        }
345
346        # if p > 1:
347        #     return tuple(
348        #         [
349        #             scoring_options[scoring](
350        #                 X_test[:, i], preds[:, i]#, **kwargs
351        #             )
352        #             for i in range(p)
353        #         ]
354        #     )
355        # else:
356        return scoring_options[scoring](X_test, preds)
357
358    def plot(self, series=None, type_axis="dates", type_plot="pi"):
359        """Plot time series forecast
360
361        Parameters:
362
363        series: {integer} or {string}
364            series index or name
365
366        """
367
368        assert all(
369            [
370                self.mean_ is not None,
371                self.lower_ is not None,
372                self.upper_ is not None,
373                self.output_dates_ is not None,
374            ]
375        ), "model forecasting must be obtained first (with predict)"
376
377        if series is None:
378            assert (
379                self.n_series == 1
380            ), "please specify series index or name (n_series > 1)"
381            series = 0
382
383        if isinstance(series, str):
384            assert (
385                series in self.series_names
386            ), f"series {series} doesn't exist in the input dataset"
387            series_idx = self.df_.columns.get_loc(series)
388        else:
389            assert isinstance(series, int) and (
390                0 <= series < self.n_series
391            ), f"check series index (< {self.n_series})"
392            series_idx = series
393
394        if isinstance(self.df_, pd.DataFrame):
395            y_all = list(self.df_.iloc[:, series_idx]) + list(
396                self.mean_.iloc[:, series_idx]
397            )
398            y_test = list(self.mean_.iloc[:, series_idx])
399        else:
400            y_all = list(self.df_.values) + list(self.mean_.values)
401            y_test = list(self.mean_.values)
402        n_points_all = len(y_all)
403        n_points_train = self.df_.shape[0]
404
405        if type_axis == "numeric":
406            x_all = [i for i in range(n_points_all)]
407            x_test = [i for i in range(n_points_train, n_points_all)]
408
409        if type_axis == "dates":  # use dates
410            x_all = np.concatenate(
411                (self.input_dates.values, self.output_dates_.values), axis=None
412            )
413            x_test = self.output_dates_.values
414
415        if type_plot == "pi":
416            fig, ax = plt.subplots()
417            ax.plot(x_all, y_all, "-")
418            ax.plot(x_test, y_test, "-", color="orange")
419            try:
420                ax.fill_between(
421                    x_test,
422                    self.lower_.iloc[:, series_idx],
423                    self.upper_.iloc[:, series_idx],
424                    alpha=0.2,
425                    color="orange",
426                )
427            except Exception:
428                ax.fill_between(
429                    x_test,
430                    self.lower_.values,
431                    self.upper_.values,
432                    alpha=0.2,
433                    color="orange",
434                )
435            if self.replications is None:
436                if self.n_series > 1:
437                    plt.title(
438                        f"prediction intervals for {series}",
439                        loc="left",
440                        fontsize=12,
441                        fontweight=0,
442                        color="black",
443                    )
444                else:
445                    plt.title(
446                        f"prediction intervals for input time series",
447                        loc="left",
448                        fontsize=12,
449                        fontweight=0,
450                        color="black",
451                    )
452                plt.show()
453            else:  # self.replications is not None
454                if self.n_series > 1:
455                    plt.title(
456                        f"prediction intervals for {self.replications} simulations of {series}",
457                        loc="left",
458                        fontsize=12,
459                        fontweight=0,
460                        color="black",
461                    )
462                else:
463                    plt.title(
464                        f"prediction intervals for {self.replications} simulations of input time series",
465                        loc="left",
466                        fontsize=12,
467                        fontweight=0,
468                        color="black",
469                    )
470                plt.show()
471
472        if type_plot == "spaghetti":
473            palette = plt.get_cmap("Set1")
474            sims_ix = getsims(self.sims_, series_idx)
475            plt.plot(x_all, y_all, "-")
476            for col_ix in range(
477                sims_ix.shape[1]
478            ):  # avoid this when there are thousands of simulations
479                plt.plot(
480                    x_test,
481                    sims_ix[:, col_ix],
482                    "-",
483                    color=palette(col_ix),
484                    linewidth=1,
485                    alpha=0.9,
486                )
487            plt.plot(x_all, y_all, "-", color="black")
488            plt.plot(x_test, y_test, "-", color="blue")
489            # Add titles
490            if self.n_series > 1:
491                plt.title(
492                    f"{self.replications} simulations of {series}",
493                    loc="left",
494                    fontsize=12,
495                    fontweight=0,
496                    color="black",
497                )
498            else:
499                plt.title(
500                    f"{self.replications} simulations of input time series",
501                    loc="left",
502                    fontsize=12,
503                    fontweight=0,
504                    color="black",
505                )
506            plt.xlabel("Time")
507            plt.ylabel("Values")
508            # Show the graph
509            plt.show()
510
511    def cross_val_score(
512        self,
513        X,
514        scoring="root_mean_squared_error",
515        n_jobs=None,
516        verbose=0,
517        xreg=None,
518        initial_window=5,
519        horizon=3,
520        fixed_window=False,
521        show_progress=True,
522        level=95,
523        **kwargs,
524    ):
525        """Evaluate a score by time series cross-validation.
526
527        Parameters:
528
529            X: {array-like, sparse matrix} of shape (n_samples, n_features)
530                The data to fit.
531
532            scoring: str or a function
533                A str in ('root_mean_squared_error', 'mean_squared_error', 'mean_error',
534                'mean_absolute_error', 'mean_error', 'mean_percentage_error',
535                'mean_absolute_percentage_error',  'winkler_score', 'coverage')
536                Or a function defined as 'coverage' and 'winkler_score' in `utils.timeseries`
537
538            n_jobs: int, default=None
539                Number of jobs to run in parallel.
540
541            verbose: int, default=0
542                The verbosity level.
543
544            xreg: array-like, optional (default=None)
545                Additional (external) regressors to be passed to `fit`
546                xreg must be in 'increasing' order (most recent observations last)
547
548            initial_window: int
549                initial number of consecutive values in each training set sample
550
551            horizon: int
552                number of consecutive values in test set sample
553
554            fixed_window: boolean
555                if False, all training samples start at index 0, and the training
556                window's size is increasing.
557                if True, the training window's size is fixed, and the window is
558                rolling forward
559
560            show_progress: boolean
561                if True, a progress bar is printed
562
563            **kwargs: dict
564                additional parameters to be passed to `fit` and `predict`
565
566        Returns:
567
568            A tuple: descriptive statistics or errors and raw errors
569
570        """
571        tscv = TimeSeriesSplit()
572
573        tscv_obj = tscv.split(
574            X,
575            initial_window=initial_window,
576            horizon=horizon,
577            fixed_window=fixed_window,
578        )
579
580        if isinstance(scoring, str):
581
582            assert scoring in (
583                "root_mean_squared_error",
584                "mean_squared_error",
585                "mean_error",
586                "mean_absolute_error",
587                "mean_percentage_error",
588                "mean_absolute_percentage_error",
589                "winkler_score",
590                "coverage",
591            ), "must have scoring in ('root_mean_squared_error', 'mean_squared_error', 'mean_error', 'mean_absolute_error', 'mean_error', 'mean_percentage_error', 'mean_absolute_percentage_error',  'winkler_score', 'coverage')"
592
593            def err_func(X_test, X_pred, scoring):
594                if (self.replications is not None) or (
595                    self.type_pi == "gaussian"
596                ):  # probabilistic
597                    if scoring == "winkler_score":
598                        return winkler_score(X_pred, X_test, level=level)
599                    elif scoring == "coverage":
600                        return coverage(X_pred, X_test, level=level)
601                    else:
602                        return mean_errors(
603                            pred=X_pred.mean, actual=X_test, scoring=scoring
604                        )
605                else:  # not probabilistic
606                    return mean_errors(
607                        pred=X_pred, actual=X_test, scoring=scoring
608                    )
609
610        else:  # isinstance(scoring, str) = False
611
612            err_func = scoring
613
614        errors = []
615
616        train_indices = []
617
618        test_indices = []
619
620        for train_index, test_index in tscv_obj:
621            train_indices.append(train_index)
622            test_indices.append(test_index)
623
624        if show_progress is True:
625            iterator = tqdm(
626                zip(train_indices, test_indices), total=len(train_indices)
627            )
628        else:
629            iterator = zip(train_indices, test_indices)
630
631        for train_index, test_index in iterator:
632
633            if verbose == 1:
634                print(f"TRAIN: {train_index}")
635                print(f"TEST: {test_index}")
636
637            if isinstance(X, pd.DataFrame):
638                self.fit(X.iloc[train_index, :], xreg=xreg, **kwargs)
639                X_test = X.iloc[test_index, :]
640            else:
641                self.fit(X[train_index, :], xreg=xreg, **kwargs)
642                X_test = X[test_index, :]
643            X_pred = self.predict(h=int(len(test_index)), level=level, **kwargs)
644
645            errors.append(err_func(X_test, X_pred, scoring))
646
647        res = np.asarray(errors)
648
649        return res, describe(res)

Multivariate time series (FactorMTS) forecasting with Factor models

Parameters:

model: type of model: str.
    currently, 'VAR', 'VECM', 'ARIMA', 'ETS', 'Theta'

Attributes:

df_: data frame
    the input data frame, in case a data.frame is provided to `fit`

level_: int
    level of confidence for prediction intervals (default is 95)

Examples: See examples/classical_mts_timeseries.py

def fit(self, X, **kwargs):
 91    def fit(self, X, **kwargs):
 92        """Fit FactorMTS model to training data X, with optional regressors xreg
 93
 94        Parameters:
 95
 96        X: {array-like}, shape = [n_samples, n_features]
 97            Training time series, where n_samples is the number
 98            of samples and n_features is the number of features;
 99            X must be in increasing order (most recent observations last)
100
101        **kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity)
102
103        Returns:
104
105        self: object
106        """
107
108        try:
109            self.n_series = X.shape[1]
110        except Exception:
111            self.n_series = 1
112
113        if (isinstance(X, pd.DataFrame) is False) and isinstance(
114            X, pd.Series
115        ) is False:  # input data set is a numpy array
116
117            X = pd.DataFrame(X)
118            if self.n_series > 1:
119                self.series_names = [
120                    "series" + str(i) for i in range(X.shape[1])
121                ]
122            else:
123                self.series_names = "series0"
124
125        else:  # input data set is a DataFrame or Series with column names
126
127            X_index = None
128            if X.index is not None and len(X.shape) > 1:
129                X_index = X.index
130                X = copy.deepcopy(mo.convert_df_to_numeric(X))
131            if X_index is not None:
132                try:
133                    X.index = X_index
134                except Exception:
135                    pass
136            if isinstance(X, pd.DataFrame):
137                self.series_names = X.columns.tolist()
138            else:
139                self.series_names = X.name
140
141        if isinstance(X, pd.DataFrame) or isinstance(X, pd.Series):
142            self.df_ = X
143            X = X.values
144            self.df_.columns = self.series_names
145            self.input_dates = ts.compute_input_dates(self.df_)
146        else:
147            self.df_ = pd.DataFrame(X, columns=self.series_names)
148
149        if self.model == "Theta":
150            self.obj = self.obj(self.df_, **kwargs).fit()
151        else:
152            self.obj = self.obj(X, **kwargs).fit(**kwargs)
153
154        return self

Fit FactorMTS model to training data X, with optional regressors xreg

Parameters:

X: {array-like}, shape = [n_samples, n_features] Training time series, where n_samples is the number of samples and n_features is the number of features; X must be in increasing order (most recent observations last)

**kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity)

Returns:

self: object

def predict(self, h=5, level=95, **kwargs):
156    def predict(self, h=5, level=95, **kwargs):
157        """Forecast all the time series, h steps ahead
158
159        Parameters:
160
161        h: {integer}
162            Forecasting horizon
163
164        **kwargs: additional parameters to be passed to
165                self.cook_test_set
166
167        Returns:
168
169        model predictions for horizon = h: {array-like}
170
171        """
172
173        self.output_dates_, frequency = ts.compute_output_dates(self.df_, h)
174
175        self.level_ = level
176
177        self.lower_ = None  # do not remove (/!\)
178
179        self.upper_ = None  # do not remove (/!\)
180
181        self.sims_ = None  # do not remove (/!\)
182
183        self.level_ = level
184
185        self.alpha_ = 100 - level
186
187        pi_multiplier = norm.ppf(1 - self.alpha_ / 200)
188
189        # Named tuple for forecast results
190        DescribeResult = namedtuple(
191            "DescribeResult", ("mean", "lower", "upper")
192        )
193
194        if self.model == "VAR":
195            mean_forecast, lower_bound, upper_bound = (
196                self.obj.forecast_interval(
197                    self.obj.endog, steps=h, alpha=self.alpha_ / 100, **kwargs
198                )
199            )
200
201        elif self.model == "VECM":
202            forecast_result = self.obj.predict(steps=h)
203            mean_forecast = forecast_result
204            lower_bound, upper_bound = self._compute_confidence_intervals(
205                forecast_result, alpha=self.alpha_ / 100, **kwargs
206            )
207
208        elif self.model == "ARIMA":
209            forecast_result = self.obj.get_forecast(steps=h)
210            mean_forecast = forecast_result.predicted_mean
211            lower_bound = forecast_result.conf_int()[:, 0]
212            upper_bound = forecast_result.conf_int()[:, 1]
213
214        elif self.model == "ETS":
215            forecast_result = self.obj.forecast(steps=h)
216            residuals = self.obj.resid
217            std_errors = np.std(residuals)
218            mean_forecast = forecast_result
219            lower_bound = forecast_result - pi_multiplier * std_errors
220            upper_bound = forecast_result + pi_multiplier * std_errors
221
222        elif self.model == "Theta":
223            try:
224                mean_forecast = self.obj.forecast(steps=h).values
225                forecast_result = self.obj.prediction_intervals(
226                    steps=h, alpha=self.alpha_ / 100, **kwargs
227                )
228                lower_bound = forecast_result["lower"].values
229                upper_bound = forecast_result["upper"].values
230            except Exception:
231                mean_forecast = self.obj.forecast(steps=h)
232                forecast_result = self.obj.prediction_intervals(
233                    steps=h, alpha=self.alpha_ / 100, **kwargs
234                )
235                lower_bound = forecast_result["lower"]
236                upper_bound = forecast_result["upper"]
237
238        else:
239
240            raise ValueError("model not recognized")
241
242        try:
243            self.mean_ = pd.DataFrame(
244                mean_forecast,
245                columns=self.series_names,
246                index=self.output_dates_,
247            )
248            self.lower_ = pd.DataFrame(
249                lower_bound, columns=self.series_names, index=self.output_dates_
250            )
251            self.upper_ = pd.DataFrame(
252                upper_bound, columns=self.series_names, index=self.output_dates_
253            )
254        except Exception:
255            self.mean_ = pd.Series(
256                mean_forecast, name=self.series_names, index=self.output_dates_
257            )
258            self.lower_ = pd.Series(
259                lower_bound, name=self.series_names, index=self.output_dates_
260            )
261            self.upper_ = pd.Series(
262                upper_bound, name=self.series_names, index=self.output_dates_
263            )
264
265        return DescribeResult(
266            mean=self.mean_, lower=self.lower_, upper=self.upper_
267        )

Forecast all the time series, h steps ahead

Parameters:

h: {integer} Forecasting horizon

**kwargs: additional parameters to be passed to self.cook_test_set

Returns:

model predictions for horizon = h: {array-like}

def score(self, X, training_index, testing_index, scoring=None, **kwargs):
284    def score(self, X, training_index, testing_index, scoring=None, **kwargs):
285        """Train on training_index, score on testing_index."""
286
287        assert (
288            bool(set(training_index).intersection(set(testing_index))) == False
289        ), "Non-overlapping 'training_index' and 'testing_index' required"
290
291        # Dimensions
292        try:
293            # multivariate time series
294            n, p = X.shape
295        except:
296            # univariate time series
297            n = X.shape[0]
298            p = 1
299
300        # Training and testing sets
301        if p > 1:
302            X_train = X[training_index, :]
303            X_test = X[testing_index, :]
304        else:
305            X_train = X[training_index]
306            X_test = X[testing_index]
307
308        # Horizon
309        h = len(testing_index)
310        assert (
311            len(training_index) + h
312        ) <= n, "Please check lengths of training and testing windows"
313
314        # Fit and predict
315        self.fit(X_train, **kwargs)
316        preds = self.predict(h=h, **kwargs)
317
318        if scoring is None:
319            scoring = "neg_root_mean_squared_error"
320
321        # check inputs
322        assert scoring in (
323            "explained_variance",
324            "neg_mean_absolute_error",
325            "neg_mean_squared_error",
326            "neg_root_mean_squared_error",
327            "neg_mean_squared_log_error",
328            "neg_median_absolute_error",
329            "r2",
330        ), "'scoring' should be in ('explained_variance', 'neg_mean_absolute_error', \
331                               'neg_mean_squared_error', 'neg_root_mean_squared_error', 'neg_mean_squared_log_error', \
332                               'neg_median_absolute_error', 'r2')"
333
334        scoring_options = {
335            "explained_variance": skm2.explained_variance_score,
336            "neg_mean_absolute_error": skm2.mean_absolute_error,
337            "neg_mean_squared_error": lambda x, y: np.mean((x - y) ** 2),
338            "neg_root_mean_squared_error": lambda x, y: np.sqrt(
339                np.mean((x - y) ** 2)
340            ),
341            "neg_mean_squared_log_error": skm2.mean_squared_log_error,
342            "neg_median_absolute_error": skm2.median_absolute_error,
343            "r2": skm2.r2_score,
344        }
345
346        # if p > 1:
347        #     return tuple(
348        #         [
349        #             scoring_options[scoring](
350        #                 X_test[:, i], preds[:, i]#, **kwargs
351        #             )
352        #             for i in range(p)
353        #         ]
354        #     )
355        # else:
356        return scoring_options[scoring](X_test, preds)

Train on training_index, score on testing_index.

class CustomClassifier(nnetsauce.custom.custom.Custom, sklearn.base.ClassifierMixin):
 16class CustomClassifier(Custom, ClassifierMixin):
 17    """Custom Classification model
 18
 19    Attributes:
 20
 21        obj: object
 22            any object containing a method fit (obj.fit()) and a method predict
 23            (obj.predict())
 24
 25        n_hidden_features: int
 26            number of nodes in the hidden layer
 27
 28        activation_name: str
 29            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
 30
 31        a: float
 32            hyperparameter for 'prelu' or 'elu' activation function
 33
 34        nodes_sim: str
 35            type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
 36            'uniform'
 37
 38        bias: boolean
 39            indicates if the hidden layer contains a bias term (True) or not
 40            (False)
 41
 42        dropout: float
 43            regularization parameter; (random) percentage of nodes dropped out
 44            of the training
 45
 46        direct_link: boolean
 47            indicates if the original predictors are included (True) in model''s
 48            fitting or not (False)
 49
 50        n_clusters: int
 51            number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
 52                no clustering)
 53
 54        cluster_encode: bool
 55            defines how the variable containing clusters is treated (default is one-hot)
 56            if `False`, then labels are used, without one-hot encoding
 57
 58        type_clust: str
 59            type of clustering method: currently k-means ('kmeans') or Gaussian
 60            Mixture Model ('gmm')
 61
 62        type_scaling: a tuple of 3 strings
 63            scaling methods for inputs, hidden layer, and clustering respectively
 64            (and when relevant).
 65            Currently available: standardization ('std') or MinMax scaling ('minmax')
 66
 67        col_sample: float
 68            percentage of covariates randomly chosen for training
 69
 70        row_sample: float
 71            percentage of rows chosen for training, by stratified bootstrapping
 72
 73        cv_calibration: int, cross-validation generator, or iterable, default=2
 74            Determines the cross-validation splitting strategy. Same as
 75            `sklearn.calibration.CalibratedClassifierCV`
 76
 77        calibration_method: str
 78            {‘sigmoid’, ‘isotonic’}, default=’sigmoid’
 79            The method to use for calibration. Same as
 80            `sklearn.calibration.CalibratedClassifierCV`
 81
 82        seed: int
 83            reproducibility seed for nodes_sim=='uniform'
 84
 85        backend: str
 86            "cpu" or "gpu" or "tpu"
 87
 88    Examples:
 89
 90    Note: it's better to use the `DeepClassifier` or `LazyDeepClassifier` classes directly
 91
 92    ```python
 93    import nnetsauce as ns
 94    from sklearn.ensemble import RandomForestClassifier
 95    from sklearn.model_selection import train_test_split
 96    from sklearn.datasets import load_digits
 97    from time import time
 98
 99    digits = load_digits()
100    X = digits.data
101    y = digits.target
102    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
103                                                        random_state=123)
104
105    # layer 1 (base layer) ----
106    layer1_regr = RandomForestClassifier(n_estimators=10, random_state=123)
107
108    start = time()
109
110    layer1_regr.fit(X_train, y_train)
111
112    # Accuracy in layer 1
113    print(layer1_regr.score(X_test, y_test))
114
115    # layer 2 using layer 1 ----
116    layer2_regr = ns.CustomClassifier(obj = layer1_regr, n_hidden_features=5,
117                            direct_link=True, bias=True,
118                            nodes_sim='uniform', activation_name='relu',
119                            n_clusters=2, seed=123)
120    layer2_regr.fit(X_train, y_train)
121
122    # Accuracy in layer 2
123    print(layer2_regr.score(X_test, y_test))
124
125    # layer 3 using layer 2 ----
126    layer3_regr = ns.CustomClassifier(obj = layer2_regr, n_hidden_features=10,
127                            direct_link=True, bias=True, dropout=0.7,
128                            nodes_sim='uniform', activation_name='relu',
129                            n_clusters=2, seed=123)
130    layer3_regr.fit(X_train, y_train)
131
132    # Accuracy in layer 3
133    print(layer3_regr.score(X_test, y_test))
134
135    print(f"Elapsed {time() - start}")
136    ```
137
138    """
139
140    # construct the object -----
141    _estimator_type = "classifier"
142
143    def __init__(
144        self,
145        obj,
146        n_hidden_features=5,
147        activation_name="relu",
148        a=0.01,
149        nodes_sim="sobol",
150        bias=True,
151        dropout=0,
152        direct_link=True,
153        n_clusters=2,
154        cluster_encode=True,
155        type_clust="kmeans",
156        type_scaling=("std", "std", "std"),
157        col_sample=1,
158        row_sample=1,
159        cv_calibration=2,
160        calibration_method="sigmoid",
161        seed=123,
162        backend="cpu",
163    ):
164        super().__init__(
165            obj=obj,
166            n_hidden_features=n_hidden_features,
167            activation_name=activation_name,
168            a=a,
169            nodes_sim=nodes_sim,
170            bias=bias,
171            dropout=dropout,
172            direct_link=direct_link,
173            n_clusters=n_clusters,
174            cluster_encode=cluster_encode,
175            type_clust=type_clust,
176            type_scaling=type_scaling,
177            col_sample=col_sample,
178            row_sample=row_sample,
179            seed=seed,
180            backend=backend,
181        )
182        self.coef_ = None
183        self.intercept_ = None
184        self.type_fit = "classification"
185        self.cv_calibration = cv_calibration
186        self.calibration_method = calibration_method
187
188    def __sklearn_clone__(self):
189        """Create a clone of the estimator.
190
191        This is required for scikit-learn's calibration system to work properly.
192        """
193        # Create a new instance with the same parameters
194        clone = CustomClassifier(
195            obj=self.obj,
196            n_hidden_features=self.n_hidden_features,
197            activation_name=self.activation_name,
198            a=self.a,
199            nodes_sim=self.nodes_sim,
200            bias=self.bias,
201            dropout=self.dropout,
202            direct_link=self.direct_link,
203            n_clusters=self.n_clusters,
204            cluster_encode=self.cluster_encode,
205            type_clust=self.type_clust,
206            type_scaling=self.type_scaling,
207            col_sample=self.col_sample,
208            row_sample=self.row_sample,
209            cv_calibration=self.cv_calibration,
210            calibration_method=self.calibration_method,
211            seed=self.seed,
212            backend=self.backend,
213        )
214        return clone
215
216    def fit(self, X, y, sample_weight=None, **kwargs):
217        """Fit custom model to training data (X, y).
218
219        Parameters:
220
221            X: {array-like}, shape = [n_samples, n_features]
222                Training vectors, where n_samples is the number
223                of samples and n_features is the number of features.
224
225            y: array-like, shape = [n_samples]
226                Target values.
227
228            sample_weight: array-like, shape = [n_samples]
229                Sample weights.
230
231            **kwargs: additional parameters to be passed to
232                        self.cook_training_set or self.obj.fit
233
234        Returns:
235
236            self: object
237        """
238
239        if len(X.shape) == 1:
240            if isinstance(X, pd.DataFrame):
241                X = pd.DataFrame(X.values.reshape(1, -1), columns=X.columns)
242            else:
243                X = X.reshape(1, -1)
244
245        output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
246        self.classes_ = np.unique(y)
247        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
248
249        # Wrap in CalibratedClassifierCV if needed
250        if self.cv_calibration is not None:
251            self.obj = CalibratedClassifierCV(
252                self.obj, cv=self.cv_calibration, method=self.calibration_method
253            )
254
255        # if sample_weights, else: (must use self.row_index)
256        if sample_weight is not None:
257            self.obj.fit(
258                scaled_Z,
259                output_y,
260                sample_weight=sample_weight[self.index_row_].ravel(),
261                **kwargs
262            )
263            return self
264
265        # if sample_weight is None:
266        self.obj.fit(scaled_Z, output_y, **kwargs)
267        self.classes_ = np.unique(y)  # for compatibility with sklearn
268        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
269
270        if hasattr(self.obj, "coef_"):
271            self.coef_ = self.obj.coef_
272
273        if hasattr(self.obj, "intercept_"):
274            self.intercept_ = self.obj.intercept_
275
276        return self
277
278    def partial_fit(self, X, y, sample_weight=None, **kwargs):
279        """Partial fit custom model to training data (X, y).
280
281        Parameters:
282
283            X: {array-like}, shape = [n_samples, n_features]
284                Subset of training vectors, where n_samples is the number
285                of samples and n_features is the number of features.
286
287            y: array-like, shape = [n_samples]
288                Subset of target values.
289
290            sample_weight: array-like, shape = [n_samples]
291                Sample weights.
292
293            **kwargs: additional parameters to be passed to
294                        self.cook_training_set or self.obj.fit
295
296        Returns:
297
298            self: object
299        """
300
301        if len(X.shape) == 1:
302            if isinstance(X, pd.DataFrame):
303                X = pd.DataFrame(X.values.reshape(1, -1), columns=X.columns)
304            else:
305                X = X.reshape(1, -1)
306            y = np.array([y], dtype=np.integer)
307
308        output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
309        self.n_classes_ = len(np.unique(y))  # for compatibility with sklearn
310
311        # if sample_weights, else: (must use self.row_index)
312        if sample_weight is not None:
313            try:
314                self.obj.partial_fit(
315                    scaled_Z,
316                    output_y,
317                    sample_weight=sample_weight[self.index_row_].ravel(),
318                    # **kwargs
319                )
320            except:
321                NotImplementedError
322
323            return self
324
325        # if sample_weight is None:
326        # try:
327        self.obj.partial_fit(scaled_Z, output_y)
328        # except:
329        #    raise NotImplementedError
330
331        self.classes_ = np.unique(y)  # for compatibility with sklearn
332        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
333
334        return self
335
336    def predict(self, X, **kwargs):
337        """Predict test data X.
338
339        Parameters:
340
341            X: {array-like}, shape = [n_samples, n_features]
342                Training vectors, where n_samples is the number
343                of samples and n_features is the number of features.
344
345            **kwargs: additional parameters to be passed to
346                    self.cook_test_set
347
348        Returns:
349
350            model predictions: {array-like}
351        """
352
353        if len(X.shape) == 1:
354            n_features = X.shape[0]
355            new_X = mo.rbind(
356                X.reshape(1, n_features),
357                np.ones(n_features).reshape(1, n_features),
358            )
359
360            return (
361                self.obj.predict(self.cook_test_set(new_X, **kwargs), **kwargs)
362            )[0]
363
364        return self.obj.predict(self.cook_test_set(X, **kwargs), **kwargs)
365
366    def predict_proba(self, X, **kwargs):
367        """Predict probabilities for test data X.
368
369        Args:
370
371            X: {array-like}, shape = [n_samples, n_features]
372                Training vectors, where n_samples is the number
373                of samples and n_features is the number of features.
374
375            **kwargs: additional parameters to be passed to
376                    self.cook_test_set
377
378        Returns:
379
380            probability estimates for test data: {array-like}
381        """
382
383        if len(X.shape) == 1:
384            n_features = X.shape[0]
385            new_X = mo.rbind(
386                X.reshape(1, n_features),
387                np.ones(n_features).reshape(1, n_features),
388            )
389            return (
390                self.obj.predict_proba(
391                    self.cook_test_set(new_X, **kwargs), **kwargs
392                )
393            )[0]
394        return self.obj.predict_proba(self.cook_test_set(X, **kwargs), **kwargs)
395
396    def decision_function(self, X, **kwargs):
397        """Compute the decision function of X.
398
399        Parameters:
400            X: {array-like}, shape = [n_samples, n_features]
401                Samples to compute decision function for.
402
403            **kwargs: additional parameters to be passed to
404                    self.cook_test_set
405
406        Returns:
407            array-like of shape (n_samples,) or (n_samples, n_classes)
408            Decision function of the input samples. The order of outputs is the same
409            as that of the classes passed to fit.
410        """
411        if not hasattr(self.obj, "decision_function"):
412            # If base classifier doesn't have decision_function, use predict_proba
413            proba = self.predict_proba(X, **kwargs)
414            if proba.shape[1] == 2:
415                return proba[:, 1]  # For binary classification
416            return proba  # For multiclass
417
418        if len(X.shape) == 1:
419            n_features = X.shape[0]
420            new_X = mo.rbind(
421                X.reshape(1, n_features),
422                np.ones(n_features).reshape(1, n_features),
423            )
424
425            return (
426                self.obj.decision_function(
427                    self.cook_test_set(new_X, **kwargs), **kwargs
428                )
429            )[0]
430
431        return self.obj.decision_function(
432            self.cook_test_set(X, **kwargs), **kwargs
433        )
434
435    def score(self, X, y, scoring=None):
436        """Scoring function for classification.
437
438        Args:
439
440            X: {array-like}, shape = [n_samples, n_features]
441                Training vectors, where n_samples is the number
442                of samples and n_features is the number of features.
443
444            y: array-like, shape = [n_samples]
445                Target values.
446
447            scoring: str
448                scoring method (default is accuracy)
449
450        Returns:
451
452            score: float
453        """
454
455        if scoring is None:
456            scoring = "accuracy"
457
458        if scoring == "accuracy":
459            return skm2.accuracy_score(y, self.predict(X))
460
461        if scoring == "f1":
462            return skm2.f1_score(y, self.predict(X))
463
464        if scoring == "precision":
465            return skm2.precision_score(y, self.predict(X))
466
467        if scoring == "recall":
468            return skm2.recall_score(y, self.predict(X))
469
470        if scoring == "roc_auc":
471            return skm2.roc_auc_score(y, self.predict(X))
472
473        if scoring == "log_loss":
474            return skm2.log_loss(y, self.predict_proba(X))
475
476        if scoring == "balanced_accuracy":
477            return skm2.balanced_accuracy_score(y, self.predict(X))
478
479        if scoring == "average_precision":
480            return skm2.average_precision_score(y, self.predict(X))
481
482        if scoring == "neg_brier_score":
483            return -skm2.brier_score_loss(y, self.predict_proba(X))
484
485        if scoring == "neg_log_loss":
486            return -skm2.log_loss(y, self.predict_proba(X))
487
488    @property
489    def _estimator_type(self):
490        return "classifier"

Custom Classification model

Attributes:

obj: object
    any object containing a method fit (obj.fit()) and a method predict
    (obj.predict())

n_hidden_features: int
    number of nodes in the hidden layer

activation_name: str
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'

a: float
    hyperparameter for 'prelu' or 'elu' activation function

nodes_sim: str
    type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
    'uniform'

bias: boolean
    indicates if the hidden layer contains a bias term (True) or not
    (False)

dropout: float
    regularization parameter; (random) percentage of nodes dropped out
    of the training

direct_link: boolean
    indicates if the original predictors are included (True) in model''s
    fitting or not (False)

n_clusters: int
    number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
        no clustering)

cluster_encode: bool
    defines how the variable containing clusters is treated (default is one-hot)
    if `False`, then labels are used, without one-hot encoding

type_clust: str
    type of clustering method: currently k-means ('kmeans') or Gaussian
    Mixture Model ('gmm')

type_scaling: a tuple of 3 strings
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax')

col_sample: float
    percentage of covariates randomly chosen for training

row_sample: float
    percentage of rows chosen for training, by stratified bootstrapping

cv_calibration: int, cross-validation generator, or iterable, default=2
    Determines the cross-validation splitting strategy. Same as
    `sklearn.calibration.CalibratedClassifierCV`

calibration_method: str
    {‘sigmoid’, ‘isotonic’}, default=’sigmoid’
    The method to use for calibration. Same as
    `sklearn.calibration.CalibratedClassifierCV`

seed: int
    reproducibility seed for nodes_sim=='uniform'

backend: str
    "cpu" or "gpu" or "tpu"

Examples:

Note: it's better to use the DeepClassifier or LazyDeepClassifier classes directly

import nnetsauce as ns
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_digits
from time import time

digits = load_digits()
X = digits.data
y = digits.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
                                                    random_state=123)

# layer 1 (base layer) ----
layer1_regr = RandomForestClassifier(n_estimators=10, random_state=123)

start = time()

layer1_regr.fit(X_train, y_train)

# Accuracy in layer 1
print(layer1_regr.score(X_test, y_test))

# layer 2 using layer 1 ----
layer2_regr = ns.CustomClassifier(obj = layer1_regr, n_hidden_features=5,
                        direct_link=True, bias=True,
                        nodes_sim='uniform', activation_name='relu',
                        n_clusters=2, seed=123)
layer2_regr.fit(X_train, y_train)

# Accuracy in layer 2
print(layer2_regr.score(X_test, y_test))

# layer 3 using layer 2 ----
layer3_regr = ns.CustomClassifier(obj = layer2_regr, n_hidden_features=10,
                        direct_link=True, bias=True, dropout=0.7,
                        nodes_sim='uniform', activation_name='relu',
                        n_clusters=2, seed=123)
layer3_regr.fit(X_train, y_train)

# Accuracy in layer 3
print(layer3_regr.score(X_test, y_test))

print(f"Elapsed {time() - start}")
def fit(self, X, y, sample_weight=None, **kwargs):
216    def fit(self, X, y, sample_weight=None, **kwargs):
217        """Fit custom model to training data (X, y).
218
219        Parameters:
220
221            X: {array-like}, shape = [n_samples, n_features]
222                Training vectors, where n_samples is the number
223                of samples and n_features is the number of features.
224
225            y: array-like, shape = [n_samples]
226                Target values.
227
228            sample_weight: array-like, shape = [n_samples]
229                Sample weights.
230
231            **kwargs: additional parameters to be passed to
232                        self.cook_training_set or self.obj.fit
233
234        Returns:
235
236            self: object
237        """
238
239        if len(X.shape) == 1:
240            if isinstance(X, pd.DataFrame):
241                X = pd.DataFrame(X.values.reshape(1, -1), columns=X.columns)
242            else:
243                X = X.reshape(1, -1)
244
245        output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
246        self.classes_ = np.unique(y)
247        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
248
249        # Wrap in CalibratedClassifierCV if needed
250        if self.cv_calibration is not None:
251            self.obj = CalibratedClassifierCV(
252                self.obj, cv=self.cv_calibration, method=self.calibration_method
253            )
254
255        # if sample_weights, else: (must use self.row_index)
256        if sample_weight is not None:
257            self.obj.fit(
258                scaled_Z,
259                output_y,
260                sample_weight=sample_weight[self.index_row_].ravel(),
261                **kwargs
262            )
263            return self
264
265        # if sample_weight is None:
266        self.obj.fit(scaled_Z, output_y, **kwargs)
267        self.classes_ = np.unique(y)  # for compatibility with sklearn
268        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
269
270        if hasattr(self.obj, "coef_"):
271            self.coef_ = self.obj.coef_
272
273        if hasattr(self.obj, "intercept_"):
274            self.intercept_ = self.obj.intercept_
275
276        return self

Fit custom model to training data (X, y).

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

sample_weight: array-like, shape = [n_samples]
    Sample weights.

**kwargs: additional parameters to be passed to
            self.cook_training_set or self.obj.fit

Returns:

self: object
def predict(self, X, **kwargs):
336    def predict(self, X, **kwargs):
337        """Predict test data X.
338
339        Parameters:
340
341            X: {array-like}, shape = [n_samples, n_features]
342                Training vectors, where n_samples is the number
343                of samples and n_features is the number of features.
344
345            **kwargs: additional parameters to be passed to
346                    self.cook_test_set
347
348        Returns:
349
350            model predictions: {array-like}
351        """
352
353        if len(X.shape) == 1:
354            n_features = X.shape[0]
355            new_X = mo.rbind(
356                X.reshape(1, n_features),
357                np.ones(n_features).reshape(1, n_features),
358            )
359
360            return (
361                self.obj.predict(self.cook_test_set(new_X, **kwargs), **kwargs)
362            )[0]
363
364        return self.obj.predict(self.cook_test_set(X, **kwargs), **kwargs)

Predict test data X.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

model predictions: {array-like}
def predict_proba(self, X, **kwargs):
366    def predict_proba(self, X, **kwargs):
367        """Predict probabilities for test data X.
368
369        Args:
370
371            X: {array-like}, shape = [n_samples, n_features]
372                Training vectors, where n_samples is the number
373                of samples and n_features is the number of features.
374
375            **kwargs: additional parameters to be passed to
376                    self.cook_test_set
377
378        Returns:
379
380            probability estimates for test data: {array-like}
381        """
382
383        if len(X.shape) == 1:
384            n_features = X.shape[0]
385            new_X = mo.rbind(
386                X.reshape(1, n_features),
387                np.ones(n_features).reshape(1, n_features),
388            )
389            return (
390                self.obj.predict_proba(
391                    self.cook_test_set(new_X, **kwargs), **kwargs
392                )
393            )[0]
394        return self.obj.predict_proba(self.cook_test_set(X, **kwargs), **kwargs)

Predict probabilities for test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

probability estimates for test data: {array-like}
def score(self, X, y, scoring=None):
435    def score(self, X, y, scoring=None):
436        """Scoring function for classification.
437
438        Args:
439
440            X: {array-like}, shape = [n_samples, n_features]
441                Training vectors, where n_samples is the number
442                of samples and n_features is the number of features.
443
444            y: array-like, shape = [n_samples]
445                Target values.
446
447            scoring: str
448                scoring method (default is accuracy)
449
450        Returns:
451
452            score: float
453        """
454
455        if scoring is None:
456            scoring = "accuracy"
457
458        if scoring == "accuracy":
459            return skm2.accuracy_score(y, self.predict(X))
460
461        if scoring == "f1":
462            return skm2.f1_score(y, self.predict(X))
463
464        if scoring == "precision":
465            return skm2.precision_score(y, self.predict(X))
466
467        if scoring == "recall":
468            return skm2.recall_score(y, self.predict(X))
469
470        if scoring == "roc_auc":
471            return skm2.roc_auc_score(y, self.predict(X))
472
473        if scoring == "log_loss":
474            return skm2.log_loss(y, self.predict_proba(X))
475
476        if scoring == "balanced_accuracy":
477            return skm2.balanced_accuracy_score(y, self.predict(X))
478
479        if scoring == "average_precision":
480            return skm2.average_precision_score(y, self.predict(X))
481
482        if scoring == "neg_brier_score":
483            return -skm2.brier_score_loss(y, self.predict_proba(X))
484
485        if scoring == "neg_log_loss":
486            return -skm2.log_loss(y, self.predict_proba(X))

Scoring function for classification.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

scoring: str
    scoring method (default is accuracy)

Returns:

score: float
class CustomRegressor(nnetsauce.custom.custom.Custom, sklearn.base.RegressorMixin):
 18class CustomRegressor(Custom, RegressorMixin):
 19    """Custom Regression model
 20
 21    This class is used to 'augment' any regression model with transformed features.
 22
 23    Parameters:
 24
 25        obj: object
 26            any object containing a method fit (obj.fit()) and a method predict
 27            (obj.predict())
 28
 29        n_hidden_features: int
 30            number of nodes in the hidden layer
 31
 32        activation_name: str
 33            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
 34
 35        a: float
 36            hyperparameter for 'prelu' or 'elu' activation function
 37
 38        nodes_sim: str
 39            type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
 40            'uniform'
 41
 42        bias: boolean
 43            indicates if the hidden layer contains a bias term (True) or not
 44            (False)
 45
 46        dropout: float
 47            regularization parameter; (random) percentage of nodes dropped out
 48            of the training
 49
 50        direct_link: boolean
 51            indicates if the original predictors are included (True) in model's
 52            fitting or not (False)
 53
 54        n_clusters: int
 55            number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
 56                no clustering)
 57
 58        cluster_encode: bool
 59            defines how the variable containing clusters is treated (default is one-hot)
 60            if `False`, then labels are used, without one-hot encoding
 61
 62        type_clust: str
 63            type of clustering method: currently k-means ('kmeans') or Gaussian
 64            Mixture Model ('gmm')
 65
 66        type_scaling: a tuple of 3 strings
 67            scaling methods for inputs, hidden layer, and clustering respectively
 68            (and when relevant).
 69            Currently available: standardization ('std') or MinMax scaling ('minmax')
 70
 71        type_pi: str.
 72            type of prediction interval; currently `None` (split or local
 73            conformal without simulation), "kde" or "bootstrap" (simulated split
 74            conformal).
 75
 76        replications: int.
 77            number of replications (if needed) for predictive simulation.
 78            Used only in `self.predict`, for `self.kernel` in ('gaussian',
 79            'tophat') and `self.type_pi = 'kde'`. Default is `None`.
 80
 81        kernel: str.
 82            the kernel to use for kernel density estimation (used for predictive
 83            simulation in `self.predict`, with `method='splitconformal'` and
 84            `type_pi = 'kde'`). Currently, either 'gaussian' or 'tophat'.
 85
 86        type_split: str.
 87            Type of splitting for conformal prediction. None (default), or
 88            "random" (random split of data) or "sequential" (sequential split of data)
 89
 90        col_sample: float
 91            percentage of covariates randomly chosen for training
 92
 93        row_sample: float
 94            percentage of rows chosen for training, by stratified bootstrapping
 95
 96        level: float
 97            confidence level for prediction intervals
 98
 99        pi_method: str
100            method for prediction intervals: 'splitconformal' or 'localconformal'
101
102        seed: int
103            reproducibility seed for nodes_sim=='uniform'
104
105        type_fit: str
106            'regression'
107
108        backend: str
109            "cpu" or "gpu" or "tpu"
110
111    Examples:
112
113    See [https://thierrymoudiki.github.io/blog/2024/03/18/python/conformal-and-bayesian-regression](https://thierrymoudiki.github.io/blog/2024/03/18/python/conformal-and-bayesian-regression)
114
115    """
116
117    # construct the object -----
118
119    def __init__(
120        self,
121        obj,
122        n_hidden_features=5,
123        activation_name="relu",
124        a=0.01,
125        nodes_sim="sobol",
126        bias=True,
127        dropout=0,
128        direct_link=True,
129        n_clusters=2,
130        cluster_encode=True,
131        type_clust="kmeans",
132        type_scaling=("std", "std", "std"),
133        type_pi=None,
134        replications=None,
135        kernel=None,
136        type_split=None,
137        col_sample=1,
138        row_sample=1,
139        level=None,
140        pi_method=None,
141        seed=123,
142        backend="cpu",
143    ):
144        super().__init__(
145            obj=obj,
146            n_hidden_features=n_hidden_features,
147            activation_name=activation_name,
148            a=a,
149            nodes_sim=nodes_sim,
150            bias=bias,
151            dropout=dropout,
152            direct_link=direct_link,
153            n_clusters=n_clusters,
154            cluster_encode=cluster_encode,
155            type_clust=type_clust,
156            type_scaling=type_scaling,
157            col_sample=col_sample,
158            row_sample=row_sample,
159            seed=seed,
160            backend=backend,
161        )
162
163        self.type_fit = "regression"
164        self.type_pi = type_pi
165        self.replications = replications
166        self.kernel = kernel
167        self.type_split = type_split
168        self.level = level
169        self.pi_method = pi_method
170        self.coef_ = None
171        self.intercept_ = None
172        self.X_ = None
173        self.y_ = None
174        self.aic_ = None
175        self.aicc_ = None
176        self.bic_ = None
177
178    def fit(self, X, y, sample_weight=None, **kwargs):
179        """Fit custom model to training data (X, y).
180
181        Parameters:
182
183            X: {array-like}, shape = [n_samples, n_features]
184                Training vectors, where n_samples is the number
185                of samples and n_features is the number of features.
186
187            y: array-like, shape = [n_samples]
188                Target values.
189
190            sample_weight: array-like, shape = [n_samples]
191                Sample weights.
192
193            **kwargs: additional parameters to be passed to
194                self.cook_training_set or self.obj.fit
195
196        Returns:
197
198            self: object
199
200        """
201
202        centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
203
204        if self.level is not None:
205            self.obj = PredictionInterval(
206                obj=self.obj, method=self.pi_method, level=self.level
207            )
208
209        # if sample_weights, else: (must use self.row_index)
210        if sample_weight is not None:
211            self.obj.fit(
212                scaled_Z,
213                centered_y,
214                sample_weight=sample_weight[self.index_row_].ravel(),
215                **kwargs
216            )
217
218            return self
219
220        self.obj.fit(scaled_Z, centered_y, **kwargs)
221
222        self.X_ = X
223
224        self.y_ = y
225
226        # Compute SSE
227        centered_y_pred = self.obj.predict(scaled_Z)
228        self.sse_ = np.sum((centered_y - centered_y_pred) ** 2)
229
230        # Get number of parameters
231        n_params = (
232            self.n_hidden_features + X.shape[1]
233        )  # hidden features + original features
234        if self.n_clusters > 0:
235            n_params += self.n_clusters  # add clusters if used
236
237        # Compute information criteria
238        n_samples = X.shape[0]
239        temp = n_samples * np.log(self.sse_ / n_samples)
240        self.aic_ = temp + 2 * n_params
241        self.bic_ = temp + np.log(n_samples) * n_params
242
243        if hasattr(self.obj, "coef_"):
244            self.coef_ = self.obj.coef_
245
246        if hasattr(self.obj, "intercept_"):
247            self.intercept_ = self.obj.intercept_
248
249        return self
250
251    def partial_fit(self, X, y, **kwargs):
252        """Partial fit custom model to training data (X, y).
253
254        Parameters:
255
256            X: {array-like}, shape = [n_samples, n_features]
257                Subset of training vectors, where n_samples is the number
258                of samples and n_features is the number of features.
259
260            y: array-like, shape = [n_samples]
261                Subset of target values.
262
263            **kwargs: additional parameters to be passed to
264                self.cook_training_set or self.obj.fit
265
266        Returns:
267
268            self: object
269
270        """
271
272        if len(X.shape) == 1:
273            if isinstance(X, pd.DataFrame):
274                X = pd.DataFrame(X.values.reshape(1, -1), columns=X.columns)
275            else:
276                X = X.reshape(1, -1)
277            y = np.array([y])
278
279        centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
280
281        self.obj.partial_fit(scaled_Z, centered_y, **kwargs)
282
283        self.X_ = X
284
285        self.y_ = y
286
287        return self
288
289    def predict(self, X, level=95, method="splitconformal", **kwargs):
290        """Predict test data X.
291
292        Parameters:
293
294            X: {array-like}, shape = [n_samples, n_features]
295                Training vectors, where n_samples is the number
296                of samples and n_features is the number of features.
297
298            level: int
299                Level of confidence (default = 95)
300
301            method: str
302                'splitconformal', 'localconformal'
303                prediction (if you specify `return_pi = True`)
304
305            **kwargs: additional parameters
306                    `return_pi = True` for conformal prediction,
307                    with `method` in ('splitconformal', 'localconformal')
308                    or `return_std = True` for `self.obj` in
309                    (`sklearn.linear_model.BayesianRidge`,
310                    `sklearn.linear_model.ARDRegressor`,
311                    `sklearn.gaussian_process.GaussianProcessRegressor`)`
312
313        Returns:
314
315            model predictions:
316                an array if uncertainty quantification is not requested,
317                  or a tuple if with prediction intervals and simulations
318                  if `return_std = True` (mean, standard deviation,
319                  lower and upper prediction interval) or `return_pi = True`
320                  ()
321
322        """
323
324        if "return_std" in kwargs:
325
326            alpha = 100 - level
327            pi_multiplier = norm.ppf(1 - alpha / 200)
328
329            if len(X.shape) == 1:
330
331                n_features = X.shape[0]
332                new_X = mo.rbind(
333                    X.reshape(1, n_features),
334                    np.ones(n_features).reshape(1, n_features),
335                )
336
337                mean_, std_ = self.obj.predict(
338                    self.cook_test_set(new_X, **kwargs), return_std=True
339                )[0]
340
341                preds = self.y_mean_ + mean_
342                lower = self.y_mean_ + (mean_ - pi_multiplier * std_)
343                upper = self.y_mean_ + (mean_ + pi_multiplier * std_)
344
345                DescribeResults = namedtuple(
346                    "DescribeResults", ["mean", "std", "lower", "upper"]
347                )
348
349                return DescribeResults(preds, std_, lower, upper)
350
351            # len(X.shape) > 1
352            mean_, std_ = self.obj.predict(
353                self.cook_test_set(X, **kwargs), return_std=True
354            )
355
356            preds = self.y_mean_ + mean_
357            lower = self.y_mean_ + (mean_ - pi_multiplier * std_)
358            upper = self.y_mean_ + (mean_ + pi_multiplier * std_)
359
360            DescribeResults = namedtuple(
361                "DescribeResults", ["mean", "std", "lower", "upper"]
362            )
363
364            return DescribeResults(preds, std_, lower, upper)
365
366        if "return_pi" in kwargs:
367            assert method in (
368                "splitconformal",
369                "localconformal",
370            ), "method must be in ('splitconformal', 'localconformal')"
371            self.pi = PredictionInterval(
372                obj=self,
373                method=method,
374                level=level,
375                type_pi=self.type_pi,
376                replications=self.replications,
377                kernel=self.kernel,
378            )
379
380            if len(self.X_.shape) == 1:
381                if isinstance(X, pd.DataFrame):
382                    self.X_ = pd.DataFrame(
383                        self.X_.values.reshape(1, -1), columns=self.X_.columns
384                    )
385                else:
386                    self.X_ = self.X_.reshape(1, -1)
387                self.y_ = np.array([self.y_])
388
389            self.pi.fit(self.X_, self.y_)
390            # self.X_ = None # consumes memory to keep, dangerous to delete (side effect)
391            # self.y_ = None # consumes memory to keep, dangerous to delete (side effect)
392            preds = self.pi.predict(X, return_pi=True)
393            return preds
394
395        # "return_std" not in kwargs
396        if len(X.shape) == 1:
397
398            n_features = X.shape[0]
399            new_X = mo.rbind(
400                X.reshape(1, n_features),
401                np.ones(n_features).reshape(1, n_features),
402            )
403
404            return (
405                self.y_mean_
406                + self.obj.predict(
407                    self.cook_test_set(new_X, **kwargs), **kwargs
408                )
409            )[0]
410
411        # len(X.shape) > 1
412        return self.y_mean_ + self.obj.predict(
413            self.cook_test_set(X, **kwargs), **kwargs
414        )
415
416    def score(self, X, y, scoring=None):
417        """Compute the score of the model.
418
419        Parameters:
420
421            X: {array-like}, shape = [n_samples, n_features]
422                Training vectors, where n_samples is the number
423                of samples and n_features is the number of features.
424
425            y: array-like, shape = [n_samples]
426                Target values.
427
428            scoring: str
429                scoring method
430
431        Returns:
432
433            score: float
434
435        """
436
437        if scoring is None:
438            return np.sqrt(np.mean((self.predict(X) - y) ** 2))
439
440        return skm2.get_scorer(scoring)(self, X, y)

Custom Regression model

This class is used to 'augment' any regression model with transformed features.

Parameters:

obj: object
    any object containing a method fit (obj.fit()) and a method predict
    (obj.predict())

n_hidden_features: int
    number of nodes in the hidden layer

activation_name: str
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'

a: float
    hyperparameter for 'prelu' or 'elu' activation function

nodes_sim: str
    type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
    'uniform'

bias: boolean
    indicates if the hidden layer contains a bias term (True) or not
    (False)

dropout: float
    regularization parameter; (random) percentage of nodes dropped out
    of the training

direct_link: boolean
    indicates if the original predictors are included (True) in model's
    fitting or not (False)

n_clusters: int
    number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
        no clustering)

cluster_encode: bool
    defines how the variable containing clusters is treated (default is one-hot)
    if `False`, then labels are used, without one-hot encoding

type_clust: str
    type of clustering method: currently k-means ('kmeans') or Gaussian
    Mixture Model ('gmm')

type_scaling: a tuple of 3 strings
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax')

type_pi: str.
    type of prediction interval; currently `None` (split or local
    conformal without simulation), "kde" or "bootstrap" (simulated split
    conformal).

replications: int.
    number of replications (if needed) for predictive simulation.
    Used only in `self.predict`, for `self.kernel` in ('gaussian',
    'tophat') and `self.type_pi = 'kde'`. Default is `None`.

kernel: str.
    the kernel to use for kernel density estimation (used for predictive
    simulation in `self.predict`, with `method='splitconformal'` and
    `type_pi = 'kde'`). Currently, either 'gaussian' or 'tophat'.

type_split: str.
    Type of splitting for conformal prediction. None (default), or
    "random" (random split of data) or "sequential" (sequential split of data)

col_sample: float
    percentage of covariates randomly chosen for training

row_sample: float
    percentage of rows chosen for training, by stratified bootstrapping

level: float
    confidence level for prediction intervals

pi_method: str
    method for prediction intervals: 'splitconformal' or 'localconformal'

seed: int
    reproducibility seed for nodes_sim=='uniform'

type_fit: str
    'regression'

backend: str
    "cpu" or "gpu" or "tpu"

Examples:

See https://thierrymoudiki.github.io/blog/2024/03/18/python/conformal-and-bayesian-regression

def fit(self, X, y, sample_weight=None, **kwargs):
178    def fit(self, X, y, sample_weight=None, **kwargs):
179        """Fit custom model to training data (X, y).
180
181        Parameters:
182
183            X: {array-like}, shape = [n_samples, n_features]
184                Training vectors, where n_samples is the number
185                of samples and n_features is the number of features.
186
187            y: array-like, shape = [n_samples]
188                Target values.
189
190            sample_weight: array-like, shape = [n_samples]
191                Sample weights.
192
193            **kwargs: additional parameters to be passed to
194                self.cook_training_set or self.obj.fit
195
196        Returns:
197
198            self: object
199
200        """
201
202        centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
203
204        if self.level is not None:
205            self.obj = PredictionInterval(
206                obj=self.obj, method=self.pi_method, level=self.level
207            )
208
209        # if sample_weights, else: (must use self.row_index)
210        if sample_weight is not None:
211            self.obj.fit(
212                scaled_Z,
213                centered_y,
214                sample_weight=sample_weight[self.index_row_].ravel(),
215                **kwargs
216            )
217
218            return self
219
220        self.obj.fit(scaled_Z, centered_y, **kwargs)
221
222        self.X_ = X
223
224        self.y_ = y
225
226        # Compute SSE
227        centered_y_pred = self.obj.predict(scaled_Z)
228        self.sse_ = np.sum((centered_y - centered_y_pred) ** 2)
229
230        # Get number of parameters
231        n_params = (
232            self.n_hidden_features + X.shape[1]
233        )  # hidden features + original features
234        if self.n_clusters > 0:
235            n_params += self.n_clusters  # add clusters if used
236
237        # Compute information criteria
238        n_samples = X.shape[0]
239        temp = n_samples * np.log(self.sse_ / n_samples)
240        self.aic_ = temp + 2 * n_params
241        self.bic_ = temp + np.log(n_samples) * n_params
242
243        if hasattr(self.obj, "coef_"):
244            self.coef_ = self.obj.coef_
245
246        if hasattr(self.obj, "intercept_"):
247            self.intercept_ = self.obj.intercept_
248
249        return self

Fit custom model to training data (X, y).

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

sample_weight: array-like, shape = [n_samples]
    Sample weights.

**kwargs: additional parameters to be passed to
    self.cook_training_set or self.obj.fit

Returns:

self: object
def predict(self, X, level=95, method='splitconformal', **kwargs):
289    def predict(self, X, level=95, method="splitconformal", **kwargs):
290        """Predict test data X.
291
292        Parameters:
293
294            X: {array-like}, shape = [n_samples, n_features]
295                Training vectors, where n_samples is the number
296                of samples and n_features is the number of features.
297
298            level: int
299                Level of confidence (default = 95)
300
301            method: str
302                'splitconformal', 'localconformal'
303                prediction (if you specify `return_pi = True`)
304
305            **kwargs: additional parameters
306                    `return_pi = True` for conformal prediction,
307                    with `method` in ('splitconformal', 'localconformal')
308                    or `return_std = True` for `self.obj` in
309                    (`sklearn.linear_model.BayesianRidge`,
310                    `sklearn.linear_model.ARDRegressor`,
311                    `sklearn.gaussian_process.GaussianProcessRegressor`)`
312
313        Returns:
314
315            model predictions:
316                an array if uncertainty quantification is not requested,
317                  or a tuple if with prediction intervals and simulations
318                  if `return_std = True` (mean, standard deviation,
319                  lower and upper prediction interval) or `return_pi = True`
320                  ()
321
322        """
323
324        if "return_std" in kwargs:
325
326            alpha = 100 - level
327            pi_multiplier = norm.ppf(1 - alpha / 200)
328
329            if len(X.shape) == 1:
330
331                n_features = X.shape[0]
332                new_X = mo.rbind(
333                    X.reshape(1, n_features),
334                    np.ones(n_features).reshape(1, n_features),
335                )
336
337                mean_, std_ = self.obj.predict(
338                    self.cook_test_set(new_X, **kwargs), return_std=True
339                )[0]
340
341                preds = self.y_mean_ + mean_
342                lower = self.y_mean_ + (mean_ - pi_multiplier * std_)
343                upper = self.y_mean_ + (mean_ + pi_multiplier * std_)
344
345                DescribeResults = namedtuple(
346                    "DescribeResults", ["mean", "std", "lower", "upper"]
347                )
348
349                return DescribeResults(preds, std_, lower, upper)
350
351            # len(X.shape) > 1
352            mean_, std_ = self.obj.predict(
353                self.cook_test_set(X, **kwargs), return_std=True
354            )
355
356            preds = self.y_mean_ + mean_
357            lower = self.y_mean_ + (mean_ - pi_multiplier * std_)
358            upper = self.y_mean_ + (mean_ + pi_multiplier * std_)
359
360            DescribeResults = namedtuple(
361                "DescribeResults", ["mean", "std", "lower", "upper"]
362            )
363
364            return DescribeResults(preds, std_, lower, upper)
365
366        if "return_pi" in kwargs:
367            assert method in (
368                "splitconformal",
369                "localconformal",
370            ), "method must be in ('splitconformal', 'localconformal')"
371            self.pi = PredictionInterval(
372                obj=self,
373                method=method,
374                level=level,
375                type_pi=self.type_pi,
376                replications=self.replications,
377                kernel=self.kernel,
378            )
379
380            if len(self.X_.shape) == 1:
381                if isinstance(X, pd.DataFrame):
382                    self.X_ = pd.DataFrame(
383                        self.X_.values.reshape(1, -1), columns=self.X_.columns
384                    )
385                else:
386                    self.X_ = self.X_.reshape(1, -1)
387                self.y_ = np.array([self.y_])
388
389            self.pi.fit(self.X_, self.y_)
390            # self.X_ = None # consumes memory to keep, dangerous to delete (side effect)
391            # self.y_ = None # consumes memory to keep, dangerous to delete (side effect)
392            preds = self.pi.predict(X, return_pi=True)
393            return preds
394
395        # "return_std" not in kwargs
396        if len(X.shape) == 1:
397
398            n_features = X.shape[0]
399            new_X = mo.rbind(
400                X.reshape(1, n_features),
401                np.ones(n_features).reshape(1, n_features),
402            )
403
404            return (
405                self.y_mean_
406                + self.obj.predict(
407                    self.cook_test_set(new_X, **kwargs), **kwargs
408                )
409            )[0]
410
411        # len(X.shape) > 1
412        return self.y_mean_ + self.obj.predict(
413            self.cook_test_set(X, **kwargs), **kwargs
414        )

Predict test data X.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

level: int
    Level of confidence (default = 95)

method: str
    'splitconformal', 'localconformal'
    prediction (if you specify `return_pi = True`)

**kwargs: additional parameters
        `return_pi = True` for conformal prediction,
        with `method` in ('splitconformal', 'localconformal')
        or `return_std = True` for `self.obj` in
        (`sklearn.linear_model.BayesianRidge`,
        `sklearn.linear_model.ARDRegressor`,
        `sklearn.gaussian_process.GaussianProcessRegressor`)`

Returns:

model predictions:
    an array if uncertainty quantification is not requested,
      or a tuple if with prediction intervals and simulations
      if `return_std = True` (mean, standard deviation,
      lower and upper prediction interval) or `return_pi = True`
      ()
def score(self, X, y, scoring=None):
416    def score(self, X, y, scoring=None):
417        """Compute the score of the model.
418
419        Parameters:
420
421            X: {array-like}, shape = [n_samples, n_features]
422                Training vectors, where n_samples is the number
423                of samples and n_features is the number of features.
424
425            y: array-like, shape = [n_samples]
426                Target values.
427
428            scoring: str
429                scoring method
430
431        Returns:
432
433            score: float
434
435        """
436
437        if scoring is None:
438            return np.sqrt(np.mean((self.predict(X) - y) ** 2))
439
440        return skm2.get_scorer(scoring)(self, X, y)

Compute the score of the model.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

scoring: str
    scoring method

Returns:

score: float
class CustomBackPropRegressor(nnetsauce.custom.custom.Custom, sklearn.base.RegressorMixin):
 20class CustomBackPropRegressor(Custom, RegressorMixin):
 21    """
 22    Finite difference trainer for nnetsauce models.
 23
 24    Parameters
 25    ----------
 26
 27    base_model : str
 28        The name of the base model (e.g., 'RidgeCV').
 29
 30    type_grad : {'finitediff', 'autodiff'}, optional
 31        Type of gradient computation to use (default='finitediff').
 32
 33    lr : float, optional
 34        Learning rate for optimization (default=1e-4).
 35
 36    optimizer : {'gd', 'sgd', 'adam', 'cd'}, optional
 37        Optimization algorithm: gradient descent ('gd'), stochastic gradient descent ('sgd'),
 38        Adam ('adam'), or coordinate descent ('cd'). Default is 'gd'.
 39
 40    eps : float, optional
 41        Scaling factor for adaptive finite difference step size (default=1e-3).
 42
 43    batch_size : int, optional
 44        Batch size for 'sgd' optimizer (default=32).
 45
 46    alpha : float, optional
 47        Elastic net penalty strength (default=0.0).
 48
 49    l1_ratio : float, optional
 50        Elastic net mixing parameter (0 = Ridge, 1 = Lasso, default=0.0).
 51
 52    type_loss : {'mse', 'quantile'}, optional
 53        Type of loss function to use (default='mse').
 54
 55    q : float, optional
 56        Quantile for quantile loss (default=0.5).
 57
 58    **kwargs
 59        Additional parameters to pass to the scikit-learn model.
 60
 61    """
 62
 63    def __init__(
 64        self,
 65        base_model,
 66        type_grad="finitediff",
 67        lr=1e-4,
 68        optimizer="gd",
 69        eps=1e-3,
 70        batch_size=32,
 71        alpha=0.0,
 72        l1_ratio=0.0,
 73        type_loss="mse",
 74        q=0.5,
 75        backend="cpu",
 76        **kwargs,
 77    ):
 78        super().__init__(base_model, True, **kwargs)
 79        self.base_model = base_model
 80        self.custom_kwargs = kwargs
 81        self.backend = backend
 82        self.model = ns.CustomRegressor(
 83            self.base_model, backend=self.backend, **self.custom_kwargs
 84        )
 85        assert isinstance(
 86            self.model, ns.CustomRegressor
 87        ), "'model' must be of class ns.CustomRegressor"
 88        self.type_grad = type_grad
 89        self.lr = lr
 90        self.optimizer = optimizer
 91        self.eps = eps
 92        self.loss_history_ = []
 93        self.opt_state = None
 94        self.batch_size = batch_size  # for SGD
 95        self.loss_history_ = []
 96        self._cd_index = 0  # For coordinate descent
 97        self.alpha = alpha
 98        self.l1_ratio = l1_ratio
 99        self.type_loss = type_loss
100        self.q = q
101
102    def _loss(self, X, y, **kwargs):
103        """
104        Compute the loss (with elastic net penalty) for the current model.
105
106        Parameters
107        ----------
108
109        X : array-like of shape (n_samples, n_features)
110            Input data.
111
112        y : array-like of shape (n_samples,)
113            Target values.
114
115        **kwargs
116            Additional keyword arguments for loss calculation.
117
118        Returns
119        -------
120        float
121            The computed loss value.
122        """
123        y_pred = self.model.predict(X)
124        if self.type_loss == "mse":
125            loss = np.mean((y - y_pred) ** 2)
126        elif self.type_loss == "quantile":
127            loss = mean_pinball_loss(y, y_pred, alpha=self.q, **kwargs)
128        W = self.model.W_
129        l1 = np.sum(np.abs(W))
130        l2 = np.sum(W**2)
131        return loss + self.alpha * (
132            self.l1_ratio * l1 + 0.5 * (1 - self.l1_ratio) * l2
133        )
134
135    def _compute_grad(self, X, y):
136        """
137        Compute the gradient of the loss with respect to W_ using finite differences.
138
139        Parameters
140        ----------
141
142        X : array-like of shape (n_samples, n_features)
143            Input data.
144
145        y : array-like of shape (n_samples,)
146            Target values.
147
148        Returns
149        -------
150
151        ndarray
152            Gradient array with the same shape as W_.
153        """
154        if self.type_grad == "autodiff":
155            raise NotImplementedError(
156                "Automatic differentiation is not implemented yet."
157            )
158            # Use JAX for automatic differentiation
159            W = deepcopy(self.model.W_)
160            W_flat = W.flatten()
161            n_params = W_flat.size
162
163            def loss_fn(W_flat):
164                W_reshaped = W_flat.reshape(W.shape)
165                self.model.W_ = W_reshaped
166                return self._loss(X, y)
167
168            grad_fn = jax.grad(loss_fn)
169            grad_flat = grad_fn(W_flat)
170            grad = grad_flat.reshape(W.shape)
171
172            # Add elastic net gradient
173            l1_grad = self.alpha * self.l1_ratio * np.sign(W)
174            l2_grad = self.alpha * (1 - self.l1_ratio) * W
175            grad += l1_grad + l2_grad
176
177            self.model.W_ = W
178            return grad
179
180        # Finite difference gradient computation
181        W = deepcopy(self.model.W_)
182        shape = W.shape
183        W_flat = W.flatten()
184        n_params = W_flat.size
185
186        # Adaptive finite difference step
187        h_vec = self.eps * np.maximum(1.0, np.abs(W_flat))
188        eye = np.eye(n_params)
189
190        loss_plus = np.zeros(n_params)
191        loss_minus = np.zeros(n_params)
192
193        for i in range(n_params):
194            h_i = h_vec[i]
195            Wp = W_flat.copy()
196            Wp[i] += h_i
197            Wm = W_flat.copy()
198            Wm[i] -= h_i
199
200            self.model.W_ = Wp.reshape(shape)
201            loss_plus[i] = self._loss(X, y)
202
203            self.model.W_ = Wm.reshape(shape)
204            loss_minus[i] = self._loss(X, y)
205
206        grad = ((loss_plus - loss_minus) / (2 * h_vec)).reshape(shape)
207
208        # Add elastic net gradient
209        l1_grad = self.alpha * self.l1_ratio * np.sign(W)
210        l2_grad = self.alpha * (1 - self.l1_ratio) * W
211        grad += l1_grad + l2_grad
212
213        self.model.W_ = W  # restore original
214        return grad
215
216    def fit(
217        self,
218        X,
219        y,
220        epochs=10,
221        verbose=True,
222        show_progress=True,
223        sample_weight=None,
224        **kwargs,
225    ):
226        """
227        Fit the model using finite difference optimization.
228
229        Parameters
230        ----------
231
232        X : array-like of shape (n_samples, n_features)
233            Training data.
234
235        y : array-like of shape (n_samples,)
236            Target values.
237
238        epochs : int, optional
239            Number of optimization steps (default=10).
240
241        verbose : bool, optional
242            Whether to print progress messages (default=True).
243
244        show_progress : bool, optional
245            Whether to show tqdm progress bar (default=True).
246
247        sample_weight : array-like, optional
248            Sample weights.
249
250        **kwargs
251            Additional keyword arguments.
252
253        Returns
254        -------
255
256        self : object
257            Returns self.
258        """
259
260        self.model.fit(X, y)
261
262        iterator = tqdm(range(epochs)) if show_progress else range(epochs)
263
264        for epoch in iterator:
265            grad = self._compute_grad(X, y)
266
267            if self.optimizer == "gd":
268                self.model.W_ -= self.lr * grad
269                self.model.W_ = np.clip(self.model.W_, 0, 1)
270                # print("self.model.W_", self.model.W_)
271
272            elif self.optimizer == "sgd":
273                # Sample a mini-batch for stochastic gradient
274                n_samples = X.shape[0]
275                idxs = np.random.choice(
276                    n_samples, self.batch_size, replace=False
277                )
278                if isinstance(X, pd.DataFrame):
279                    X_batch = X.iloc[idxs, :]
280                else:
281                    X_batch = X[idxs, :]
282                y_batch = y[idxs]
283                grad = self._compute_grad(X_batch, y_batch)
284
285                self.model.W_ -= self.lr * grad
286                self.model.W_ = np.clip(self.model.W_, 0, 1)
287
288            elif self.optimizer == "adam":
289                if self.opt_state is None:
290                    self.opt_state = {
291                        "m": np.zeros_like(grad),
292                        "v": np.zeros_like(grad),
293                        "t": 0,
294                    }
295                beta1, beta2, eps = 0.9, 0.999, 1e-8
296                self.opt_state["t"] += 1
297                self.opt_state["m"] = (
298                    beta1 * self.opt_state["m"] + (1 - beta1) * grad
299                )
300                self.opt_state["v"] = beta2 * self.opt_state["v"] + (
301                    1 - beta2
302                ) * (grad**2)
303                m_hat = self.opt_state["m"] / (1 - beta1 ** self.opt_state["t"])
304                v_hat = self.opt_state["v"] / (1 - beta2 ** self.opt_state["t"])
305
306                self.model.W_ -= self.lr * m_hat / (np.sqrt(v_hat) + eps)
307                self.model.W_ = np.clip(self.model.W_, 0, 1)
308                # print("self.model.W_", self.model.W_)
309
310            elif self.optimizer == "cd":  # coordinate descent
311
312                W_shape = self.model.W_.shape
313                W_flat_size = self.model.W_.size
314                W_flat = self.model.W_.flatten()
315                grad_flat = grad.flatten()
316
317                # Update only one coordinate per epoch (cyclic)
318                idx = self._cd_index % W_flat_size
319                W_flat[idx] -= self.lr * grad_flat[idx]
320                # Clip the updated value
321                W_flat[idx] = np.clip(W_flat[idx], 0, 1)
322
323                # Restore W_
324                self.model.W_ = W_flat.reshape(W_shape)
325
326                self._cd_index += 1
327
328            else:
329                raise ValueError(f"Unsupported optimizer: {self.optimizer}")
330
331            loss = self._loss(X, y)
332            self.loss_history_.append(loss)
333
334            if verbose:
335                print(f"Epoch {epoch+1}: Loss = {loss:.6f}")
336
337        # if sample_weights, else: (must use self.row_index)
338        if sample_weight in kwargs:
339            self.model.fit(
340                X,
341                y,
342                sample_weight=sample_weight[self.index_row_].ravel(),
343                **kwargs,
344            )
345
346            return self
347
348        return self
349
350    def predict(self, X, level=95, method="splitconformal", **kwargs):
351        """
352        Predict using the trained model.
353
354        Parameters
355        ----------
356
357        X : array-like of shape (n_samples, n_features)
358            Input data.
359
360        level : int, optional
361            Level of confidence for prediction intervals (default=95).
362
363        method : {'splitconformal', 'localconformal'}, optional
364            Method for conformal prediction (default='splitconformal').
365
366        **kwargs
367            Additional keyword arguments. Use `return_pi=True` for prediction intervals,
368            or `return_std=True` for standard deviation estimates.
369
370        Returns
371        -------
372
373        array or tuple
374            Model predictions, or a tuple with prediction intervals or standard deviations if requested.
375        """
376        if "return_std" in kwargs:
377
378            alpha = 100 - level
379            pi_multiplier = norm.ppf(1 - alpha / 200)
380
381            if len(X.shape) == 1:
382
383                n_features = X.shape[0]
384                new_X = mo.rbind(
385                    X.reshape(1, n_features),
386                    np.ones(n_features).reshape(1, n_features),
387                )
388
389                mean_, std_ = self.model.predict(new_X, return_std=True)[0]
390
391                preds = mean_
392                lower = mean_ - pi_multiplier * std_
393                upper = mean_ + pi_multiplier * std_
394
395                DescribeResults = namedtuple(
396                    "DescribeResults", ["mean", "std", "lower", "upper"]
397                )
398
399                return DescribeResults(preds, std_, lower, upper)
400
401            # len(X.shape) > 1
402            mean_, std_ = self.model.predict(X, return_std=True)
403
404            preds = mean_
405            lower = mean_ - pi_multiplier * std_
406            upper = mean_ + pi_multiplier * std_
407
408            DescribeResults = namedtuple(
409                "DescribeResults", ["mean", "std", "lower", "upper"]
410            )
411
412            return DescribeResults(preds, std_, lower, upper)
413
414        if "return_pi" in kwargs:
415            assert method in (
416                "splitconformal",
417                "localconformal",
418            ), "method must be in ('splitconformal', 'localconformal')"
419            self.pi = ns.PredictionInterval(
420                obj=self,
421                method=method,
422                level=level,
423                type_pi=self.type_pi,
424                replications=self.replications,
425                kernel=self.kernel,
426            )
427
428            if len(self.X_.shape) == 1:
429                if isinstance(X, pd.DataFrame):
430                    self.X_ = pd.DataFrame(
431                        self.X_.values.reshape(1, -1), columns=self.X_.columns
432                    )
433                else:
434                    self.X_ = self.X_.reshape(1, -1)
435                self.y_ = np.array([self.y_])
436
437            self.pi.fit(self.X_, self.y_)
438            # self.X_ = None # consumes memory to keep, dangerous to delete (side effect)
439            # self.y_ = None # consumes memory to keep, dangerous to delete (side effect)
440            preds = self.pi.predict(X, return_pi=True)
441            return preds
442
443        # "return_std" not in kwargs
444        if len(X.shape) == 1:
445
446            n_features = X.shape[0]
447            new_X = mo.rbind(
448                X.reshape(1, n_features),
449                np.ones(n_features).reshape(1, n_features),
450            )
451
452            return (0 + self.model.predict(new_X, **kwargs))[0]
453
454        # len(X.shape) > 1
455        return self.model.predict(X, **kwargs)

Finite difference trainer for nnetsauce models.

Parameters

base_model : str The name of the base model (e.g., 'RidgeCV').

type_grad : {'finitediff', 'autodiff'}, optional Type of gradient computation to use (default='finitediff').

lr : float, optional Learning rate for optimization (default=1e-4).

optimizer : {'gd', 'sgd', 'adam', 'cd'}, optional Optimization algorithm: gradient descent ('gd'), stochastic gradient descent ('sgd'), Adam ('adam'), or coordinate descent ('cd'). Default is 'gd'.

eps : float, optional Scaling factor for adaptive finite difference step size (default=1e-3).

batch_size : int, optional Batch size for 'sgd' optimizer (default=32).

alpha : float, optional Elastic net penalty strength (default=0.0).

l1_ratio : float, optional Elastic net mixing parameter (0 = Ridge, 1 = Lasso, default=0.0).

type_loss : {'mse', 'quantile'}, optional Type of loss function to use (default='mse').

q : float, optional Quantile for quantile loss (default=0.5).

**kwargs Additional parameters to pass to the scikit-learn model.

def fit( self, X, y, epochs=10, verbose=True, show_progress=True, sample_weight=None, **kwargs):
216    def fit(
217        self,
218        X,
219        y,
220        epochs=10,
221        verbose=True,
222        show_progress=True,
223        sample_weight=None,
224        **kwargs,
225    ):
226        """
227        Fit the model using finite difference optimization.
228
229        Parameters
230        ----------
231
232        X : array-like of shape (n_samples, n_features)
233            Training data.
234
235        y : array-like of shape (n_samples,)
236            Target values.
237
238        epochs : int, optional
239            Number of optimization steps (default=10).
240
241        verbose : bool, optional
242            Whether to print progress messages (default=True).
243
244        show_progress : bool, optional
245            Whether to show tqdm progress bar (default=True).
246
247        sample_weight : array-like, optional
248            Sample weights.
249
250        **kwargs
251            Additional keyword arguments.
252
253        Returns
254        -------
255
256        self : object
257            Returns self.
258        """
259
260        self.model.fit(X, y)
261
262        iterator = tqdm(range(epochs)) if show_progress else range(epochs)
263
264        for epoch in iterator:
265            grad = self._compute_grad(X, y)
266
267            if self.optimizer == "gd":
268                self.model.W_ -= self.lr * grad
269                self.model.W_ = np.clip(self.model.W_, 0, 1)
270                # print("self.model.W_", self.model.W_)
271
272            elif self.optimizer == "sgd":
273                # Sample a mini-batch for stochastic gradient
274                n_samples = X.shape[0]
275                idxs = np.random.choice(
276                    n_samples, self.batch_size, replace=False
277                )
278                if isinstance(X, pd.DataFrame):
279                    X_batch = X.iloc[idxs, :]
280                else:
281                    X_batch = X[idxs, :]
282                y_batch = y[idxs]
283                grad = self._compute_grad(X_batch, y_batch)
284
285                self.model.W_ -= self.lr * grad
286                self.model.W_ = np.clip(self.model.W_, 0, 1)
287
288            elif self.optimizer == "adam":
289                if self.opt_state is None:
290                    self.opt_state = {
291                        "m": np.zeros_like(grad),
292                        "v": np.zeros_like(grad),
293                        "t": 0,
294                    }
295                beta1, beta2, eps = 0.9, 0.999, 1e-8
296                self.opt_state["t"] += 1
297                self.opt_state["m"] = (
298                    beta1 * self.opt_state["m"] + (1 - beta1) * grad
299                )
300                self.opt_state["v"] = beta2 * self.opt_state["v"] + (
301                    1 - beta2
302                ) * (grad**2)
303                m_hat = self.opt_state["m"] / (1 - beta1 ** self.opt_state["t"])
304                v_hat = self.opt_state["v"] / (1 - beta2 ** self.opt_state["t"])
305
306                self.model.W_ -= self.lr * m_hat / (np.sqrt(v_hat) + eps)
307                self.model.W_ = np.clip(self.model.W_, 0, 1)
308                # print("self.model.W_", self.model.W_)
309
310            elif self.optimizer == "cd":  # coordinate descent
311
312                W_shape = self.model.W_.shape
313                W_flat_size = self.model.W_.size
314                W_flat = self.model.W_.flatten()
315                grad_flat = grad.flatten()
316
317                # Update only one coordinate per epoch (cyclic)
318                idx = self._cd_index % W_flat_size
319                W_flat[idx] -= self.lr * grad_flat[idx]
320                # Clip the updated value
321                W_flat[idx] = np.clip(W_flat[idx], 0, 1)
322
323                # Restore W_
324                self.model.W_ = W_flat.reshape(W_shape)
325
326                self._cd_index += 1
327
328            else:
329                raise ValueError(f"Unsupported optimizer: {self.optimizer}")
330
331            loss = self._loss(X, y)
332            self.loss_history_.append(loss)
333
334            if verbose:
335                print(f"Epoch {epoch+1}: Loss = {loss:.6f}")
336
337        # if sample_weights, else: (must use self.row_index)
338        if sample_weight in kwargs:
339            self.model.fit(
340                X,
341                y,
342                sample_weight=sample_weight[self.index_row_].ravel(),
343                **kwargs,
344            )
345
346            return self
347
348        return self

Fit the model using finite difference optimization.

Parameters

X : array-like of shape (n_samples, n_features) Training data.

y : array-like of shape (n_samples,) Target values.

epochs : int, optional Number of optimization steps (default=10).

verbose : bool, optional Whether to print progress messages (default=True).

show_progress : bool, optional Whether to show tqdm progress bar (default=True).

sample_weight : array-like, optional Sample weights.

**kwargs Additional keyword arguments.

Returns

self : object Returns self.

def predict(self, X, level=95, method='splitconformal', **kwargs):
350    def predict(self, X, level=95, method="splitconformal", **kwargs):
351        """
352        Predict using the trained model.
353
354        Parameters
355        ----------
356
357        X : array-like of shape (n_samples, n_features)
358            Input data.
359
360        level : int, optional
361            Level of confidence for prediction intervals (default=95).
362
363        method : {'splitconformal', 'localconformal'}, optional
364            Method for conformal prediction (default='splitconformal').
365
366        **kwargs
367            Additional keyword arguments. Use `return_pi=True` for prediction intervals,
368            or `return_std=True` for standard deviation estimates.
369
370        Returns
371        -------
372
373        array or tuple
374            Model predictions, or a tuple with prediction intervals or standard deviations if requested.
375        """
376        if "return_std" in kwargs:
377
378            alpha = 100 - level
379            pi_multiplier = norm.ppf(1 - alpha / 200)
380
381            if len(X.shape) == 1:
382
383                n_features = X.shape[0]
384                new_X = mo.rbind(
385                    X.reshape(1, n_features),
386                    np.ones(n_features).reshape(1, n_features),
387                )
388
389                mean_, std_ = self.model.predict(new_X, return_std=True)[0]
390
391                preds = mean_
392                lower = mean_ - pi_multiplier * std_
393                upper = mean_ + pi_multiplier * std_
394
395                DescribeResults = namedtuple(
396                    "DescribeResults", ["mean", "std", "lower", "upper"]
397                )
398
399                return DescribeResults(preds, std_, lower, upper)
400
401            # len(X.shape) > 1
402            mean_, std_ = self.model.predict(X, return_std=True)
403
404            preds = mean_
405            lower = mean_ - pi_multiplier * std_
406            upper = mean_ + pi_multiplier * std_
407
408            DescribeResults = namedtuple(
409                "DescribeResults", ["mean", "std", "lower", "upper"]
410            )
411
412            return DescribeResults(preds, std_, lower, upper)
413
414        if "return_pi" in kwargs:
415            assert method in (
416                "splitconformal",
417                "localconformal",
418            ), "method must be in ('splitconformal', 'localconformal')"
419            self.pi = ns.PredictionInterval(
420                obj=self,
421                method=method,
422                level=level,
423                type_pi=self.type_pi,
424                replications=self.replications,
425                kernel=self.kernel,
426            )
427
428            if len(self.X_.shape) == 1:
429                if isinstance(X, pd.DataFrame):
430                    self.X_ = pd.DataFrame(
431                        self.X_.values.reshape(1, -1), columns=self.X_.columns
432                    )
433                else:
434                    self.X_ = self.X_.reshape(1, -1)
435                self.y_ = np.array([self.y_])
436
437            self.pi.fit(self.X_, self.y_)
438            # self.X_ = None # consumes memory to keep, dangerous to delete (side effect)
439            # self.y_ = None # consumes memory to keep, dangerous to delete (side effect)
440            preds = self.pi.predict(X, return_pi=True)
441            return preds
442
443        # "return_std" not in kwargs
444        if len(X.shape) == 1:
445
446            n_features = X.shape[0]
447            new_X = mo.rbind(
448                X.reshape(1, n_features),
449                np.ones(n_features).reshape(1, n_features),
450            )
451
452            return (0 + self.model.predict(new_X, **kwargs))[0]
453
454        # len(X.shape) > 1
455        return self.model.predict(X, **kwargs)

Predict using the trained model.

Parameters

X : array-like of shape (n_samples, n_features) Input data.

level : int, optional Level of confidence for prediction intervals (default=95).

method : {'splitconformal', 'localconformal'}, optional Method for conformal prediction (default='splitconformal').

**kwargs Additional keyword arguments. Use return_pi=True for prediction intervals, or return_std=True for standard deviation estimates.

Returns

array or tuple Model predictions, or a tuple with prediction intervals or standard deviations if requested.

class DeepClassifier(nnetsauce.CustomClassifier, sklearn.base.ClassifierMixin):
 36class DeepClassifier(CustomClassifier, ClassifierMixin):
 37    """
 38    Deep Classifier
 39
 40    Parameters:
 41
 42        obj: an object
 43            A base learner, see also https://www.researchgate.net/publication/380701207_Deep_Quasi-Randomized_neural_Networks_for_classification
 44
 45        n_layers: int (default=3)
 46            Number of layers. `n_layers = 1` is a simple `CustomClassifier`
 47
 48        verbose : int, optional (default=0)
 49            Monitor progress when fitting.
 50
 51        All the other parameters are nnetsauce `CustomClassifier`'s
 52
 53    Examples:
 54
 55        ```python
 56        import nnetsauce as ns
 57        from sklearn.datasets import load_breast_cancer
 58        from sklearn.model_selection import train_test_split
 59        from sklearn.linear_model import LogisticRegressionCV
 60        data = load_breast_cancer()
 61        X = data.data
 62        y= data.target
 63        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=123)
 64        obj = LogisticRegressionCV()
 65        clf = ns.DeepClassifier(obj)
 66        clf.fit(X_train, y_train)
 67        print(clf.score(clf.predict(X_test), y_test))
 68        ```
 69    """
 70
 71    _estimator_type = "classifier"
 72
 73    def __init__(
 74        self,
 75        obj,
 76        # Defining depth
 77        n_layers=3,
 78        verbose=0,
 79        # CustomClassifier attributes
 80        n_hidden_features=5,
 81        activation_name="relu",
 82        a=0.01,
 83        nodes_sim="sobol",
 84        bias=True,
 85        dropout=0,
 86        direct_link=True,
 87        n_clusters=2,
 88        cluster_encode=True,
 89        type_clust="kmeans",
 90        type_scaling=("std", "std", "std"),
 91        col_sample=1,
 92        row_sample=1,
 93        cv_calibration=2,
 94        calibration_method="sigmoid",
 95        seed=123,
 96        backend="cpu",
 97    ):
 98        super().__init__(
 99            obj=obj,
100            n_hidden_features=n_hidden_features,
101            activation_name=activation_name,
102            a=a,
103            nodes_sim=nodes_sim,
104            bias=bias,
105            dropout=dropout,
106            direct_link=direct_link,
107            n_clusters=n_clusters,
108            cluster_encode=cluster_encode,
109            type_clust=type_clust,
110            type_scaling=type_scaling,
111            col_sample=col_sample,
112            row_sample=row_sample,
113            seed=seed,
114            backend=backend,
115        )
116        self.coef_ = None
117        self.intercept_ = None
118        self.type_fit = "classification"
119        self.cv_calibration = cv_calibration
120        self.calibration_method = calibration_method
121
122        # Only wrap in CalibratedClassifierCV if not already wrapped
123        # if not isinstance(obj, CalibratedClassifierCV):
124        #     self.obj = CalibratedClassifierCV(
125        #         self.obj,
126        #         cv=self.cv_calibration,
127        #         method=self.calibration_method
128        #     )
129        # else:
130        self.coef_ = None
131        self.intercept_ = None
132        self.type_fit = "classification"
133        self.cv_calibration = cv_calibration
134        self.calibration_method = calibration_method
135        self.obj = obj
136
137        assert n_layers >= 1, "must have n_layers >= 1"
138        self.stacked_obj = obj
139        self.verbose = verbose
140        self.n_layers = n_layers
141        self.classes_ = None
142        self.n_classes_ = None
143
144    def fit(self, X, y, **kwargs):
145        """Fit Classification algorithms to X and y.
146        Parameters
147        ----------
148        X : array-like,
149            Training vectors, where rows is the number of samples
150            and columns is the number of features.
151        y : array-like,
152            Training vectors, where rows is the number of samples
153            and columns is the number of features.
154        **kwargs: dict
155            Additional parameters to be passed to the fit method
156            of the base learner. For example, `sample_weight`.
157
158        Returns
159        -------
160        A fitted object
161        """
162
163        self.classes_ = np.unique(y)
164        self.n_classes_ = len(
165            self.classes_
166        )  # for compatibility with         scikit-learn
167
168        if isinstance(X, np.ndarray):
169            X = pd.DataFrame(X)
170
171        # init layer
172        self.stacked_obj = CustomClassifier(
173            obj=self.stacked_obj,
174            n_hidden_features=self.n_hidden_features,
175            activation_name=self.activation_name,
176            a=self.a,
177            nodes_sim=self.nodes_sim,
178            bias=self.bias,
179            dropout=self.dropout,
180            direct_link=self.direct_link,
181            n_clusters=self.n_clusters,
182            cluster_encode=self.cluster_encode,
183            type_clust=self.type_clust,
184            type_scaling=self.type_scaling,
185            col_sample=self.col_sample,
186            row_sample=self.row_sample,
187            cv_calibration=None,
188            calibration_method=None,
189            seed=self.seed,
190            backend=self.backend,
191        )
192
193        if self.verbose > 0:
194            iterator = tqdm(range(self.n_layers - 1))
195        else:
196            iterator = range(self.n_layers - 1)
197
198        for _ in iterator:
199            self.stacked_obj = deepcopy(
200                CustomClassifier(
201                    obj=self.stacked_obj,
202                    n_hidden_features=self.n_hidden_features,
203                    activation_name=self.activation_name,
204                    a=self.a,
205                    nodes_sim=self.nodes_sim,
206                    bias=self.bias,
207                    dropout=self.dropout,
208                    direct_link=self.direct_link,
209                    n_clusters=self.n_clusters,
210                    cluster_encode=self.cluster_encode,
211                    type_clust=self.type_clust,
212                    type_scaling=self.type_scaling,
213                    col_sample=self.col_sample,
214                    row_sample=self.row_sample,
215                    cv_calibration=None,
216                    calibration_method=None,
217                    seed=self.seed,
218                    backend=self.backend,
219                )
220            )
221            self.stacked_obj.fit(X, y, **kwargs)
222
223        return self
224
225    def partial_fit(self, X, y, **kwargs):
226        """Fit Regression algorithms to X and y.
227        Parameters
228        ----------
229        X : array-like,
230            Training vectors, where rows is the number of samples
231            and columns is the number of features.
232        y : array-like,
233            Training vectors, where rows is the number of samples
234            and columns is the number of features.
235        **kwargs: dict
236            Additional parameters to be passed to the fit method
237            of the base learner. For example, `sample_weight`.
238        Returns
239        -------
240        A fitted object
241        """
242        assert hasattr(self, "stacked_obj"), "model must be fitted first"
243        current_obj = self.stacked_obj
244        for _ in range(self.n_layers):
245            try:
246                input_X = current_obj.obj.cook_test_set(X)
247                current_obj.obj.partial_fit(input_X, y, **kwargs)
248                try:
249                    current_obj = current_obj.obj
250                except AttributeError:
251                    pass
252            except ValueError:
253                pass
254        return self
255
256    def predict(self, X):
257        return self.stacked_obj.predict(X)
258
259    def predict_proba(self, X):
260        return self.stacked_obj.predict_proba(X)
261
262    def score(self, X, y, scoring=None):
263        return self.stacked_obj.score(X, y, scoring)
264
265    def cross_val_optim(
266        self,
267        X_train,
268        y_train,
269        X_test=None,
270        y_test=None,
271        scoring="accuracy",
272        surrogate_obj=None,
273        cv=5,
274        n_jobs=None,
275        n_init=10,
276        n_iter=190,
277        abs_tol=1e-3,
278        verbose=2,
279        seed=123,
280        **kwargs,
281    ):
282        """Cross-validation function and hyperparameters' search
283
284        Parameters:
285
286            X_train: array-like,
287                Training vectors, where rows is the number of samples
288                and columns is the number of features.
289
290            y_train: array-like,
291                Training vectors, where rows is the number of samples
292                and columns is the number of features.
293
294            X_test: array-like,
295                Testing vectors, where rows is the number of samples
296                and columns is the number of features.
297
298            y_test: array-like,
299                Testing vectors, where rows is the number of samples
300                and columns is the number of features.
301
302            scoring: str
303                scoring metric; see https://scikit-learn.org/stable/modules/model_evaluation.html#the-scoring-parameter-defining-model-evaluation-rules
304
305            surrogate_obj: an object;
306                An ML model for estimating the uncertainty around the objective function
307
308            cv: int;
309                number of cross-validation folds
310
311            n_jobs: int;
312                number of jobs for parallel execution
313
314            n_init: an integer;
315                number of points in the initial setting, when `x_init` and `y_init` are not provided
316
317            n_iter: an integer;
318                number of iterations of the minimization algorithm
319
320            abs_tol: a float;
321                tolerance for convergence of the optimizer (early stopping based on acquisition function)
322
323            verbose: int
324                controls verbosity
325
326            seed: int
327                reproducibility seed
328
329            **kwargs: dict
330                additional parameters to be passed to the estimator
331
332        Examples:
333
334            ```python
335            ```
336        """
337
338        num_to_activation_name = {1: "relu", 2: "sigmoid", 3: "tanh"}
339        num_to_nodes_sim = {1: "sobol", 2: "uniform", 3: "hammersley"}
340        num_to_type_clust = {1: "kmeans", 2: "gmm"}
341
342        def deepclassifier_cv(
343            X_train,
344            y_train,
345            # Defining depth
346            n_layers=3,
347            # CustomClassifier attributes
348            n_hidden_features=5,
349            activation_name="relu",
350            nodes_sim="sobol",
351            dropout=0,
352            n_clusters=2,
353            type_clust="kmeans",
354            cv=5,
355            n_jobs=None,
356            scoring="accuracy",
357            seed=123,
358        ):
359            self.set_params(
360                **{
361                    "n_layers": n_layers,
362                    # CustomClassifier attributes
363                    "n_hidden_features": n_hidden_features,
364                    "activation_name": activation_name,
365                    "nodes_sim": nodes_sim,
366                    "dropout": dropout,
367                    "n_clusters": n_clusters,
368                    "type_clust": type_clust,
369                    **kwargs,
370                }
371            )
372            return -cross_val_score(
373                estimator=self,
374                X=X_train,
375                y=y_train,
376                scoring=scoring,
377                cv=cv,
378                n_jobs=n_jobs,
379                verbose=0,
380            ).mean()
381
382        # objective function for hyperparams tuning
383        def crossval_objective(xx):
384            return deepclassifier_cv(
385                X_train=X_train,
386                y_train=y_train,
387                # Defining depth
388                n_layers=int(np.ceil(xx[0])),
389                # CustomClassifier attributes
390                n_hidden_features=int(np.ceil(xx[1])),
391                activation_name=num_to_activation_name[np.ceil(xx[2])],
392                nodes_sim=num_to_nodes_sim[int(np.ceil(xx[3]))],
393                dropout=xx[4],
394                n_clusters=int(np.ceil(xx[5])),
395                type_clust=num_to_type_clust[int(np.ceil(xx[6]))],
396                cv=cv,
397                n_jobs=n_jobs,
398                scoring=scoring,
399                seed=seed,
400            )
401
402        if surrogate_obj is None:
403            gp_opt = gp.GPOpt(
404                objective_func=crossval_objective,
405                lower_bound=np.array([0, 3, 0, 0, 0.0, 0, 0]),
406                upper_bound=np.array([5, 100, 3, 3, 0.4, 5, 2]),
407                params_names=[
408                    "n_layers",
409                    # CustomClassifier attributes
410                    "n_hidden_features",
411                    "activation_name",
412                    "nodes_sim",
413                    "dropout",
414                    "n_clusters",
415                    "type_clust",
416                ],
417                method="bayesian",
418                n_init=n_init,
419                n_iter=n_iter,
420                seed=seed,
421            )
422        else:
423            gp_opt = gp.GPOpt(
424                objective_func=crossval_objective,
425                lower_bound=np.array([0, 3, 0, 0, 0.0, 0, 0]),
426                upper_bound=np.array([5, 100, 3, 3, 0.4, 5, 2]),
427                params_names=[
428                    "n_layers",
429                    # CustomClassifier attributes
430                    "n_hidden_features",
431                    "activation_name",
432                    "nodes_sim",
433                    "dropout",
434                    "n_clusters",
435                    "type_clust",
436                ],
437                acquisition="ucb",
438                method="splitconformal",
439                surrogate_obj=ns.PredictionInterval(
440                    obj=surrogate_obj, method="splitconformal"
441                ),
442                n_init=n_init,
443                n_iter=n_iter,
444                seed=seed,
445            )
446
447        res = gp_opt.optimize(verbose=verbose, abs_tol=abs_tol)
448        res.best_params["n_layers"] = int(np.ceil(res.best_params["n_layers"]))
449        res.best_params["n_hidden_features"] = int(
450            np.ceil(res.best_params["n_hidden_features"])
451        )
452        res.best_params["activation_name"] = num_to_activation_name[
453            np.ceil(res.best_params["activation_name"])
454        ]
455        res.best_params["nodes_sim"] = num_to_nodes_sim[
456            int(np.ceil(res.best_params["nodes_sim"]))
457        ]
458        res.best_params["dropout"] = res.best_params["dropout"]
459        res.best_params["n_clusters"] = int(
460            np.ceil(res.best_params["n_clusters"])
461        )
462        res.best_params["type_clust"] = num_to_type_clust[
463            int(np.ceil(res.best_params["type_clust"]))
464        ]
465
466        # out-of-sample error
467        if X_test is not None and y_test is not None:
468            self.set_params(**res.best_params, verbose=0, seed=seed)
469            preds = self.fit(X_train, y_train).predict(X_test)
470            # check error on y_test
471            oos_err = getattr(metrics, scoring + "_score")(
472                y_true=y_test, y_pred=preds
473            )
474            result = namedtuple("result", res._fields + ("test_" + scoring,))
475            return result(*res, oos_err)
476        else:
477            return res
478
479    def lazy_cross_val_optim(
480        self,
481        X_train,
482        y_train,
483        X_test=None,
484        y_test=None,
485        scoring="accuracy",
486        surrogate_objs=None,
487        customize=False,
488        cv=5,
489        n_jobs=None,
490        n_init=10,
491        n_iter=190,
492        abs_tol=1e-3,
493        verbose=1,
494        seed=123,
495    ):
496        """Automated Cross-validation function and hyperparameters' search using multiple surrogates
497
498        Parameters:
499
500            X_train: array-like,
501                Training vectors, where rows is the number of samples
502                and columns is the number of features.
503
504            y_train: array-like,
505                Training vectors, where rows is the number of samples
506                and columns is the number of features.
507
508            X_test: array-like,
509                Testing vectors, where rows is the number of samples
510                and columns is the number of features.
511
512            y_test: array-like,
513                Testing vectors, where rows is the number of samples
514                and columns is the number of features.
515
516            scoring: str
517                scoring metric; see https://scikit-learn.org/stable/modules/model_evaluation.html#the-scoring-parameter-defining-model-evaluation-rules
518
519            surrogate_objs: object names as a list of strings;
520                ML models for estimating the uncertainty around the objective function
521
522            customize: boolean
523                if True, the surrogate is transformed into a quasi-randomized network (default is False)
524
525            cv: int;
526                number of cross-validation folds
527
528            n_jobs: int;
529                number of jobs for parallel execution
530
531            n_init: an integer;
532                number of points in the initial setting, when `x_init` and `y_init` are not provided
533
534            n_iter: an integer;
535                number of iterations of the minimization algorithm
536
537            abs_tol: a float;
538                tolerance for convergence of the optimizer (early stopping based on acquisition function)
539
540            verbose: int
541                controls verbosity
542
543            seed: int
544                reproducibility seed
545
546        Examples:
547
548            ```python
549            ```
550        """
551
552        removed_regressors = [
553            "TheilSenRegressor",
554            "ARDRegression",
555            "CCA",
556            "GaussianProcessRegressor",
557            "GradientBoostingRegressor",
558            "HistGradientBoostingRegressor",
559            "IsotonicRegression",
560            "MultiOutputRegressor",
561            "MultiTaskElasticNet",
562            "MultiTaskElasticNetCV",
563            "MultiTaskLasso",
564            "MultiTaskLassoCV",
565            "OrthogonalMatchingPursuit",
566            "OrthogonalMatchingPursuitCV",
567            "PLSCanonical",
568            "PLSRegression",
569            "RadiusNeighborsRegressor",
570            "RegressorChain",
571            "StackingRegressor",
572            "VotingRegressor",
573        ]
574
575        results = []
576
577        for est in all_estimators():
578
579            if surrogate_objs is None:
580
581                if issubclass(est[1], RegressorMixin) and (
582                    est[0] not in removed_regressors
583                ):
584                    try:
585                        if customize == True:
586                            surr_obj = ns.CustomClassifier(obj=est[1]())
587                        else:
588                            surr_obj = est[1]()
589                        res = self.cross_val_optim(
590                            X_train=X_train,
591                            y_train=y_train,
592                            X_test=X_test,
593                            y_test=y_test,
594                            surrogate_obj=surr_obj,
595                            cv=cv,
596                            n_jobs=n_jobs,
597                            scoring=scoring,
598                            n_init=n_init,
599                            n_iter=n_iter,
600                            abs_tol=abs_tol,
601                            verbose=verbose,
602                            seed=seed,
603                        )
604                        if customize == True:
605                            results.append((f"CustomClassifier({est[0]})", res))
606                        else:
607                            results.append((est[0], res))
608                    except:
609                        pass
610
611            else:
612
613                if (
614                    issubclass(est[1], RegressorMixin)
615                    and (est[0] not in removed_regressors)
616                    and est[0] in surrogate_objs
617                ):
618                    try:
619                        if customize == True:
620                            surr_obj = ns.CustomClassifier(obj=est[1]())
621                        else:
622                            surr_obj = est[1]()
623                        res = self.cross_val_optim(
624                            X_train=X_train,
625                            y_train=y_train,
626                            X_test=X_test,
627                            y_test=y_test,
628                            surrogate_obj=surr_obj,
629                            cv=cv,
630                            n_jobs=n_jobs,
631                            scoring=scoring,
632                            n_init=n_init,
633                            n_iter=n_iter,
634                            abs_tol=abs_tol,
635                            verbose=verbose,
636                            seed=seed,
637                        )
638                        if customize == True:
639                            results.append((f"CustomClassifier({est[0]})", res))
640                        else:
641                            results.append((est[0], res))
642                    except:
643                        pass
644
645        return results
646
647    @property
648    def _estimator_type(self):
649        return "classifier"

Deep Classifier

Parameters:

obj: an object
    A base learner, see also https://www.researchgate.net/publication/380701207_Deep_Quasi-Randomized_neural_Networks_for_classification

n_layers: int (default=3)
    Number of layers. `n_layers = 1` is a simple `CustomClassifier`

verbose : int, optional (default=0)
    Monitor progress when fitting.

All the other parameters are nnetsauce `CustomClassifier`'s

Examples:

import nnetsauce as ns
    from sklearn.datasets import load_breast_cancer
    from sklearn.model_selection import train_test_split
    from sklearn.linear_model import LogisticRegressionCV
    data = load_breast_cancer()
    X = data.data
    y= data.target
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=123)
    obj = LogisticRegressionCV()
    clf = ns.DeepClassifier(obj)
    clf.fit(X_train, y_train)
    print(clf.score(clf.predict(X_test), y_test))
    

def fit(self, X, y, **kwargs):
144    def fit(self, X, y, **kwargs):
145        """Fit Classification algorithms to X and y.
146        Parameters
147        ----------
148        X : array-like,
149            Training vectors, where rows is the number of samples
150            and columns is the number of features.
151        y : array-like,
152            Training vectors, where rows is the number of samples
153            and columns is the number of features.
154        **kwargs: dict
155            Additional parameters to be passed to the fit method
156            of the base learner. For example, `sample_weight`.
157
158        Returns
159        -------
160        A fitted object
161        """
162
163        self.classes_ = np.unique(y)
164        self.n_classes_ = len(
165            self.classes_
166        )  # for compatibility with         scikit-learn
167
168        if isinstance(X, np.ndarray):
169            X = pd.DataFrame(X)
170
171        # init layer
172        self.stacked_obj = CustomClassifier(
173            obj=self.stacked_obj,
174            n_hidden_features=self.n_hidden_features,
175            activation_name=self.activation_name,
176            a=self.a,
177            nodes_sim=self.nodes_sim,
178            bias=self.bias,
179            dropout=self.dropout,
180            direct_link=self.direct_link,
181            n_clusters=self.n_clusters,
182            cluster_encode=self.cluster_encode,
183            type_clust=self.type_clust,
184            type_scaling=self.type_scaling,
185            col_sample=self.col_sample,
186            row_sample=self.row_sample,
187            cv_calibration=None,
188            calibration_method=None,
189            seed=self.seed,
190            backend=self.backend,
191        )
192
193        if self.verbose > 0:
194            iterator = tqdm(range(self.n_layers - 1))
195        else:
196            iterator = range(self.n_layers - 1)
197
198        for _ in iterator:
199            self.stacked_obj = deepcopy(
200                CustomClassifier(
201                    obj=self.stacked_obj,
202                    n_hidden_features=self.n_hidden_features,
203                    activation_name=self.activation_name,
204                    a=self.a,
205                    nodes_sim=self.nodes_sim,
206                    bias=self.bias,
207                    dropout=self.dropout,
208                    direct_link=self.direct_link,
209                    n_clusters=self.n_clusters,
210                    cluster_encode=self.cluster_encode,
211                    type_clust=self.type_clust,
212                    type_scaling=self.type_scaling,
213                    col_sample=self.col_sample,
214                    row_sample=self.row_sample,
215                    cv_calibration=None,
216                    calibration_method=None,
217                    seed=self.seed,
218                    backend=self.backend,
219                )
220            )
221            self.stacked_obj.fit(X, y, **kwargs)
222
223        return self

Fit Classification algorithms to X and y.

Parameters

X : array-like, Training vectors, where rows is the number of samples and columns is the number of features. y : array-like, Training vectors, where rows is the number of samples and columns is the number of features. **kwargs: dict Additional parameters to be passed to the fit method of the base learner. For example, sample_weight.

Returns

A fitted object

def predict(self, X):
256    def predict(self, X):
257        return self.stacked_obj.predict(X)

Predict test data X.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

model predictions: {array-like}
def predict_proba(self, X):
259    def predict_proba(self, X):
260        return self.stacked_obj.predict_proba(X)

Predict probabilities for test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

probability estimates for test data: {array-like}
def score(self, X, y, scoring=None):
262    def score(self, X, y, scoring=None):
263        return self.stacked_obj.score(X, y, scoring)

Scoring function for classification.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

scoring: str
    scoring method (default is accuracy)

Returns:

score: float
class DeepRegressor(nnetsauce.CustomRegressor, sklearn.base.RegressorMixin):
 13class DeepRegressor(CustomRegressor, RegressorMixin):
 14    """
 15    Deep Regressor
 16
 17    Parameters:
 18
 19        obj: an object
 20            A base learner, see also https://www.researchgate.net/publication/380701207_Deep_Quasi-Randomized_neural_Networks_for_classification
 21
 22        verbose : int, optional (default=0)
 23            Monitor progress when fitting.
 24
 25        n_layers: int (default=2)
 26            Number of layers. `n_layers = 1` is a simple `CustomRegressor`
 27
 28        All the other parameters are nnetsauce `CustomRegressor`'s
 29
 30    Examples:
 31
 32        ```python
 33        import nnetsauce as ns
 34        from sklearn.datasets import load_diabetes
 35        from sklearn.model_selection import train_test_split
 36        from sklearn.linear_model import RidgeCV
 37        data = load_diabetes()
 38        X = data.data
 39        y= data.target
 40        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=123)
 41        obj = RidgeCV()
 42        clf = ns.DeepRegressor(obj)
 43        clf.fit(X_train, y_train)
 44        print(clf.score(clf.predict(X_test), y_test))
 45        ```
 46
 47    """
 48
 49    def __init__(
 50        self,
 51        obj,
 52        # Defining depth
 53        n_layers=2,
 54        verbose=0,
 55        # CustomRegressor attributes
 56        n_hidden_features=5,
 57        activation_name="relu",
 58        a=0.01,
 59        nodes_sim="sobol",
 60        bias=True,
 61        dropout=0,
 62        direct_link=True,
 63        n_clusters=2,
 64        cluster_encode=True,
 65        type_clust="kmeans",
 66        type_scaling=("std", "std", "std"),
 67        col_sample=1,
 68        row_sample=1,
 69        level=None,
 70        pi_method="splitconformal",
 71        seed=123,
 72        backend="cpu",
 73    ):
 74        super().__init__(
 75            obj=obj,
 76            n_hidden_features=n_hidden_features,
 77            activation_name=activation_name,
 78            a=a,
 79            nodes_sim=nodes_sim,
 80            bias=bias,
 81            dropout=dropout,
 82            direct_link=direct_link,
 83            n_clusters=n_clusters,
 84            cluster_encode=cluster_encode,
 85            type_clust=type_clust,
 86            type_scaling=type_scaling,
 87            col_sample=col_sample,
 88            row_sample=row_sample,
 89            level=level,
 90            pi_method=pi_method,
 91            seed=seed,
 92            backend=backend,
 93        )
 94
 95        assert n_layers >= 1, "must have n_layers >= 1"
 96
 97        self.stacked_obj = deepcopy(obj)
 98        self.verbose = verbose
 99        self.n_layers = n_layers
100        self.level = level
101        self.pi_method = pi_method
102        self.coef_ = None
103
104    def fit(self, X, y, **kwargs):
105        """Fit Regression algorithms to X and y.
106        Parameters
107        ----------
108        X : array-like,
109            Training vectors, where rows is the number of samples
110            and columns is the number of features.
111        y : array-like,
112            Training vectors, where rows is the number of samples
113            and columns is the number of features.
114        **kwargs: dict
115            Additional parameters to be passed to the fit method
116            of the base learner. For example, `sample_weight`.
117        Returns
118        -------
119        A fitted object
120        """
121
122        if isinstance(X, np.ndarray):
123            X = pd.DataFrame(X)
124
125        # init layer
126        self.stacked_obj = CustomRegressor(
127            obj=self.stacked_obj,
128            n_hidden_features=self.n_hidden_features,
129            activation_name=self.activation_name,
130            a=self.a,
131            nodes_sim=self.nodes_sim,
132            bias=self.bias,
133            dropout=self.dropout,
134            direct_link=self.direct_link,
135            n_clusters=self.n_clusters,
136            cluster_encode=self.cluster_encode,
137            type_clust=self.type_clust,
138            type_scaling=self.type_scaling,
139            col_sample=self.col_sample,
140            row_sample=self.row_sample,
141            seed=self.seed,
142            backend=self.backend,
143        )
144
145        if self.verbose > 0:
146            iterator = tqdm(range(self.n_layers - 1))
147        else:
148            iterator = range(self.n_layers - 1)
149
150        for _ in iterator:
151            self.stacked_obj = deepcopy(
152                CustomRegressor(
153                    obj=self.stacked_obj,
154                    n_hidden_features=self.n_hidden_features,
155                    activation_name=self.activation_name,
156                    a=self.a,
157                    nodes_sim=self.nodes_sim,
158                    bias=self.bias,
159                    dropout=self.dropout,
160                    direct_link=self.direct_link,
161                    n_clusters=self.n_clusters,
162                    cluster_encode=self.cluster_encode,
163                    type_clust=self.type_clust,
164                    type_scaling=self.type_scaling,
165                    col_sample=self.col_sample,
166                    row_sample=self.row_sample,
167                    seed=self.seed,
168                    backend=self.backend,
169                )
170            )
171
172        self.stacked_obj.fit(X, y, **kwargs)
173
174        if self.level is not None:
175            self.stacked_obj = PredictionInterval(
176                obj=self.stacked_obj, method=self.pi_method, level=self.level
177            )
178
179        if hasattr(self.stacked_obj, "clustering_obj_"):
180            self.clustering_obj_ = self.stacked_obj.clustering_obj_
181
182        if hasattr(self.stacked_obj, "coef_"):
183            self.coef_ = self.stacked_obj.coef_
184
185        if hasattr(self.stacked_obj, "scaler_"):
186            self.scaler_ = self.stacked_obj.scaler_
187
188        if hasattr(self.stacked_obj, "nn_scaler_"):
189            self.nn_scaler_ = self.stacked_obj.nn_scaler_
190
191        if hasattr(self.stacked_obj, "clustering_scaler_"):
192            self.clustering_scaler_ = self.stacked_obj.clustering_scaler_
193
194        return self
195
196    def partial_fit(self, X, y, **kwargs):
197        """Fit Regression algorithms to X and y.
198        Parameters
199        ----------
200        X : array-like,
201            Training vectors, where rows is the number of samples
202            and columns is the number of features.
203        y : array-like,
204            Training vectors, where rows is the number of samples
205            and columns is the number of features.
206        **kwargs: dict
207            Additional parameters to be passed to the fit method
208            of the base learner. For example, `sample_weight`.
209        Returns
210        -------
211        A fitted object
212        """
213        assert hasattr(self, "stacked_obj"), "model must be fitted first"
214        current_obj = self.stacked_obj
215        for _ in range(self.n_layers):
216            try:
217                input_X = current_obj.obj.cook_test_set(X)
218                current_obj.obj.partial_fit(input_X, y, **kwargs)
219                try:
220                    current_obj = current_obj.obj
221                except AttributeError:
222                    pass
223            except ValueError as e:
224                print(e)
225                pass
226        return self
227
228    def predict(self, X, **kwargs):
229        if self.level is not None:
230            return self.stacked_obj.predict(X, return_pi=True)
231        return self.stacked_obj.predict(X, **kwargs)
232
233    def score(self, X, y, scoring=None):
234        return self.stacked_obj.score(X, y, scoring)

Deep Regressor

Parameters:

obj: an object
    A base learner, see also https://www.researchgate.net/publication/380701207_Deep_Quasi-Randomized_neural_Networks_for_classification

verbose : int, optional (default=0)
    Monitor progress when fitting.

n_layers: int (default=2)
    Number of layers. `n_layers = 1` is a simple `CustomRegressor`

All the other parameters are nnetsauce `CustomRegressor`'s

Examples:

import nnetsauce as ns
    from sklearn.datasets import load_diabetes
    from sklearn.model_selection import train_test_split
    from sklearn.linear_model import RidgeCV
    data = load_diabetes()
    X = data.data
    y= data.target
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=123)
    obj = RidgeCV()
    clf = ns.DeepRegressor(obj)
    clf.fit(X_train, y_train)
    print(clf.score(clf.predict(X_test), y_test))
    

def fit(self, X, y, **kwargs):
104    def fit(self, X, y, **kwargs):
105        """Fit Regression algorithms to X and y.
106        Parameters
107        ----------
108        X : array-like,
109            Training vectors, where rows is the number of samples
110            and columns is the number of features.
111        y : array-like,
112            Training vectors, where rows is the number of samples
113            and columns is the number of features.
114        **kwargs: dict
115            Additional parameters to be passed to the fit method
116            of the base learner. For example, `sample_weight`.
117        Returns
118        -------
119        A fitted object
120        """
121
122        if isinstance(X, np.ndarray):
123            X = pd.DataFrame(X)
124
125        # init layer
126        self.stacked_obj = CustomRegressor(
127            obj=self.stacked_obj,
128            n_hidden_features=self.n_hidden_features,
129            activation_name=self.activation_name,
130            a=self.a,
131            nodes_sim=self.nodes_sim,
132            bias=self.bias,
133            dropout=self.dropout,
134            direct_link=self.direct_link,
135            n_clusters=self.n_clusters,
136            cluster_encode=self.cluster_encode,
137            type_clust=self.type_clust,
138            type_scaling=self.type_scaling,
139            col_sample=self.col_sample,
140            row_sample=self.row_sample,
141            seed=self.seed,
142            backend=self.backend,
143        )
144
145        if self.verbose > 0:
146            iterator = tqdm(range(self.n_layers - 1))
147        else:
148            iterator = range(self.n_layers - 1)
149
150        for _ in iterator:
151            self.stacked_obj = deepcopy(
152                CustomRegressor(
153                    obj=self.stacked_obj,
154                    n_hidden_features=self.n_hidden_features,
155                    activation_name=self.activation_name,
156                    a=self.a,
157                    nodes_sim=self.nodes_sim,
158                    bias=self.bias,
159                    dropout=self.dropout,
160                    direct_link=self.direct_link,
161                    n_clusters=self.n_clusters,
162                    cluster_encode=self.cluster_encode,
163                    type_clust=self.type_clust,
164                    type_scaling=self.type_scaling,
165                    col_sample=self.col_sample,
166                    row_sample=self.row_sample,
167                    seed=self.seed,
168                    backend=self.backend,
169                )
170            )
171
172        self.stacked_obj.fit(X, y, **kwargs)
173
174        if self.level is not None:
175            self.stacked_obj = PredictionInterval(
176                obj=self.stacked_obj, method=self.pi_method, level=self.level
177            )
178
179        if hasattr(self.stacked_obj, "clustering_obj_"):
180            self.clustering_obj_ = self.stacked_obj.clustering_obj_
181
182        if hasattr(self.stacked_obj, "coef_"):
183            self.coef_ = self.stacked_obj.coef_
184
185        if hasattr(self.stacked_obj, "scaler_"):
186            self.scaler_ = self.stacked_obj.scaler_
187
188        if hasattr(self.stacked_obj, "nn_scaler_"):
189            self.nn_scaler_ = self.stacked_obj.nn_scaler_
190
191        if hasattr(self.stacked_obj, "clustering_scaler_"):
192            self.clustering_scaler_ = self.stacked_obj.clustering_scaler_
193
194        return self

Fit Regression algorithms to X and y.

Parameters

X : array-like, Training vectors, where rows is the number of samples and columns is the number of features. y : array-like, Training vectors, where rows is the number of samples and columns is the number of features. **kwargs: dict Additional parameters to be passed to the fit method of the base learner. For example, sample_weight.

Returns

A fitted object

def predict(self, X, **kwargs):
228    def predict(self, X, **kwargs):
229        if self.level is not None:
230            return self.stacked_obj.predict(X, return_pi=True)
231        return self.stacked_obj.predict(X, **kwargs)

Predict test data X.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

level: int
    Level of confidence (default = 95)

method: str
    'splitconformal', 'localconformal'
    prediction (if you specify `return_pi = True`)

**kwargs: additional parameters
        `return_pi = True` for conformal prediction,
        with `method` in ('splitconformal', 'localconformal')
        or `return_std = True` for `self.obj` in
        (`sklearn.linear_model.BayesianRidge`,
        `sklearn.linear_model.ARDRegressor`,
        `sklearn.gaussian_process.GaussianProcessRegressor`)`

Returns:

model predictions:
    an array if uncertainty quantification is not requested,
      or a tuple if with prediction intervals and simulations
      if `return_std = True` (mean, standard deviation,
      lower and upper prediction interval) or `return_pi = True`
      ()
def score(self, X, y, scoring=None):
233    def score(self, X, y, scoring=None):
234        return self.stacked_obj.score(X, y, scoring)

Compute the score of the model.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

scoring: str
    scoring method

Returns:

score: float
class DeepMTS(nnetsauce.MTS):
 11class DeepMTS(MTS):
 12    """Univariate and multivariate time series (DeepMTS) forecasting with Quasi-Randomized networks (Work in progress)
 13
 14    Parameters:
 15
 16        obj: object.
 17            any object containing a method fit (obj.fit()) and a method predict
 18            (obj.predict()).
 19
 20        n_layers: int.
 21            number of layers in the neural network.
 22
 23        n_hidden_features: int.
 24            number of nodes in the hidden layer.
 25
 26        activation_name: str.
 27            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'.
 28
 29        a: float.
 30            hyperparameter for 'prelu' or 'elu' activation function.
 31
 32        nodes_sim: str.
 33            type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
 34            'uniform'.
 35
 36        bias: boolean.
 37            indicates if the hidden layer contains a bias term (True) or not
 38            (False).
 39
 40        dropout: float.
 41            regularization parameter; (random) percentage of nodes dropped out
 42            of the training.
 43
 44        direct_link: boolean.
 45            indicates if the original predictors are included (True) in model's fitting or not (False).
 46
 47        n_clusters: int.
 48            number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering).
 49
 50        cluster_encode: bool.
 51            defines how the variable containing clusters is treated (default is one-hot)
 52            if `False`, then labels are used, without one-hot encoding.
 53
 54        type_clust: str.
 55            type of clustering method: currently k-means ('kmeans') or Gaussian
 56            Mixture Model ('gmm').
 57
 58        type_scaling: a tuple of 3 strings.
 59            scaling methods for inputs, hidden layer, and clustering respectively
 60            (and when relevant).
 61            Currently available: standardization ('std') or MinMax scaling ('minmax').
 62
 63        lags: int.
 64            number of lags used for each time series.
 65
 66        type_pi: str.
 67            type of prediction interval; currently:
 68            - "gaussian": simple, fast, but: assumes stationarity of Gaussian in-sample residuals and independence in the multivariate case
 69            - "kde": based on Kernel Density Estimation of in-sample residuals
 70            - "bootstrap": based on independent bootstrap of in-sample residuals
 71            - "block-bootstrap": based on basic block bootstrap of in-sample residuals
 72            - "scp-kde": Sequential split conformal prediction with Kernel Density Estimation of calibrated residuals
 73            - "scp-bootstrap": Sequential split conformal prediction with independent bootstrap of calibrated residuals
 74            - "scp-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of calibrated residuals
 75            - "scp2-kde": Sequential split conformal prediction with Kernel Density Estimation of standardized calibrated residuals
 76            - "scp2-bootstrap": Sequential split conformal prediction with independent bootstrap of standardized calibrated residuals
 77            - "scp2-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of standardized calibrated residuals
 78
 79        block_size: int.
 80            size of block for 'type_pi' in ("block-bootstrap", "scp-block-bootstrap", "scp2-block-bootstrap").
 81            Default is round(3.15*(n_residuals^1/3))
 82
 83        replications: int.
 84            number of replications (if needed, for predictive simulation). Default is 'None'.
 85
 86        kernel: str.
 87            the kernel to use for residuals density estimation (used for predictive simulation). Currently, either 'gaussian' or 'tophat'.
 88
 89        agg: str.
 90            either "mean" or "median" for simulation of bootstrap aggregating
 91
 92        seed: int.
 93            reproducibility seed for nodes_sim=='uniform' or predictive simulation.
 94
 95        backend: str.
 96            "cpu" or "gpu" or "tpu".
 97
 98        verbose: int.
 99            0: not printing; 1: printing
100
101        show_progress: bool.
102            True: progress bar when fitting each series; False: no progress bar when fitting each series
103
104    Attributes:
105
106        fit_objs_: dict
107            objects adjusted to each individual time series
108
109        y_: {array-like}
110            DeepMTS responses (most recent observations first)
111
112        X_: {array-like}
113            DeepMTS lags
114
115        xreg_: {array-like}
116            external regressors
117
118        y_means_: dict
119            a dictionary of each series mean values
120
121        preds_: {array-like}
122            successive model predictions
123
124        preds_std_: {array-like}
125            standard deviation around the predictions
126
127        return_std_: boolean
128            return uncertainty or not (set in predict)
129
130        df_: data frame
131            the input data frame, in case a data.frame is provided to `fit`
132
133    Examples:
134
135    Example 1:
136
137        ```python
138        import nnetsauce as ns
139        import numpy as np
140        from sklearn import linear_model
141        np.random.seed(123)
142
143        M = np.random.rand(10, 3)
144        M[:,0] = 10*M[:,0]
145        M[:,2] = 25*M[:,2]
146        print(M)
147
148        # Adjust Bayesian Ridge
149        regr4 = linear_model.BayesianRidge()
150        obj_DeepMTS = ns.DeepMTS(regr4, lags = 1, n_hidden_features=5)
151        obj_DeepMTS.fit(M)
152        print(obj_DeepMTS.predict())
153
154        # with credible intervals
155        print(obj_DeepMTS.predict(return_std=True, level=80))
156
157        print(obj_DeepMTS.predict(return_std=True, level=95))
158        ```
159
160    Example 2:
161
162        ```python
163        import nnetsauce as ns
164        import numpy as np
165        from sklearn import linear_model
166
167        dataset = {
168        'date' : ['2001-01-01', '2002-01-01', '2003-01-01', '2004-01-01', '2005-01-01'],
169        'series1' : [34, 30, 35.6, 33.3, 38.1],
170        'series2' : [4, 5.5, 5.6, 6.3, 5.1],
171        'series3' : [100, 100.5, 100.6, 100.2, 100.1]}
172        df = pd.DataFrame(dataset).set_index('date')
173        print(df)
174
175        # Adjust Bayesian Ridge
176        regr5 = linear_model.BayesianRidge()
177        obj_DeepMTS = ns.DeepMTS(regr5, lags = 1, n_hidden_features=5)
178        obj_DeepMTS.fit(df)
179        print(obj_DeepMTS.predict())
180
181        # with credible intervals
182        print(obj_DeepMTS.predict(return_std=True, level=80))
183
184        print(obj_DeepMTS.predict(return_std=True, level=95))
185        ```
186
187    """
188
189    # construct the object -----
190
191    def __init__(
192        self,
193        obj,
194        n_layers=3,
195        n_hidden_features=5,
196        activation_name="relu",
197        a=0.01,
198        nodes_sim="sobol",
199        bias=True,
200        dropout=0,
201        direct_link=True,
202        n_clusters=2,
203        cluster_encode=True,
204        type_clust="kmeans",
205        type_scaling=("std", "std", "std"),
206        lags=1,
207        type_pi="kde",
208        block_size=None,
209        replications=None,
210        kernel=None,
211        agg="mean",
212        seed=123,
213        backend="cpu",
214        verbose=0,
215        show_progress=True,
216    ):
217        assert int(lags) == lags, "parameter 'lags' should be an integer"
218        assert n_layers >= 1, "must have n_layers >= 1"
219        self.n_layers = int(n_layers)
220
221        if self.n_layers > 1:
222
223            for _ in range(self.n_layers - 1):
224                obj = CustomRegressor(
225                    obj=deepcopy(obj),
226                    n_hidden_features=n_hidden_features,
227                    activation_name=activation_name,
228                    a=a,
229                    nodes_sim=nodes_sim,
230                    bias=bias,
231                    dropout=dropout,
232                    direct_link=direct_link,
233                    n_clusters=n_clusters,
234                    cluster_encode=cluster_encode,
235                    type_clust=type_clust,
236                    type_scaling=type_scaling,
237                    seed=seed,
238                    backend=backend,
239                )
240
241        self.obj = deepcopy(obj)
242        super().__init__(
243            obj=self.obj,
244            n_hidden_features=n_hidden_features,
245            activation_name=activation_name,
246            a=a,
247            nodes_sim=nodes_sim,
248            bias=bias,
249            dropout=dropout,
250            direct_link=direct_link,
251            n_clusters=n_clusters,
252            cluster_encode=cluster_encode,
253            type_clust=type_clust,
254            type_scaling=type_scaling,
255            lags=lags,
256            type_pi=type_pi,
257            block_size=block_size,
258            replications=replications,
259            kernel=kernel,
260            agg=agg,
261            seed=seed,
262            backend=backend,
263            verbose=verbose,
264            show_progress=show_progress,
265        )

Univariate and multivariate time series (DeepMTS) forecasting with Quasi-Randomized networks (Work in progress)

Parameters:

obj: object.
    any object containing a method fit (obj.fit()) and a method predict
    (obj.predict()).

n_layers: int.
    number of layers in the neural network.

n_hidden_features: int.
    number of nodes in the hidden layer.

activation_name: str.
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'.

a: float.
    hyperparameter for 'prelu' or 'elu' activation function.

nodes_sim: str.
    type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
    'uniform'.

bias: boolean.
    indicates if the hidden layer contains a bias term (True) or not
    (False).

dropout: float.
    regularization parameter; (random) percentage of nodes dropped out
    of the training.

direct_link: boolean.
    indicates if the original predictors are included (True) in model's fitting or not (False).

n_clusters: int.
    number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering).

cluster_encode: bool.
    defines how the variable containing clusters is treated (default is one-hot)
    if `False`, then labels are used, without one-hot encoding.

type_clust: str.
    type of clustering method: currently k-means ('kmeans') or Gaussian
    Mixture Model ('gmm').

type_scaling: a tuple of 3 strings.
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax').

lags: int.
    number of lags used for each time series.

type_pi: str.
    type of prediction interval; currently:
    - "gaussian": simple, fast, but: assumes stationarity of Gaussian in-sample residuals and independence in the multivariate case
    - "kde": based on Kernel Density Estimation of in-sample residuals
    - "bootstrap": based on independent bootstrap of in-sample residuals
    - "block-bootstrap": based on basic block bootstrap of in-sample residuals
    - "scp-kde": Sequential split conformal prediction with Kernel Density Estimation of calibrated residuals
    - "scp-bootstrap": Sequential split conformal prediction with independent bootstrap of calibrated residuals
    - "scp-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of calibrated residuals
    - "scp2-kde": Sequential split conformal prediction with Kernel Density Estimation of standardized calibrated residuals
    - "scp2-bootstrap": Sequential split conformal prediction with independent bootstrap of standardized calibrated residuals
    - "scp2-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of standardized calibrated residuals

block_size: int.
    size of block for 'type_pi' in ("block-bootstrap", "scp-block-bootstrap", "scp2-block-bootstrap").
    Default is round(3.15*(n_residuals^1/3))

replications: int.
    number of replications (if needed, for predictive simulation). Default is 'None'.

kernel: str.
    the kernel to use for residuals density estimation (used for predictive simulation). Currently, either 'gaussian' or 'tophat'.

agg: str.
    either "mean" or "median" for simulation of bootstrap aggregating

seed: int.
    reproducibility seed for nodes_sim=='uniform' or predictive simulation.

backend: str.
    "cpu" or "gpu" or "tpu".

verbose: int.
    0: not printing; 1: printing

show_progress: bool.
    True: progress bar when fitting each series; False: no progress bar when fitting each series

Attributes:

fit_objs_: dict
    objects adjusted to each individual time series

y_: {array-like}
    DeepMTS responses (most recent observations first)

X_: {array-like}
    DeepMTS lags

xreg_: {array-like}
    external regressors

y_means_: dict
    a dictionary of each series mean values

preds_: {array-like}
    successive model predictions

preds_std_: {array-like}
    standard deviation around the predictions

return_std_: boolean
    return uncertainty or not (set in predict)

df_: data frame
    the input data frame, in case a data.frame is provided to `fit`

Examples:

Example 1:

import nnetsauce as ns
    import numpy as np
    from sklearn import linear_model
    np.random.seed(123)
 
M = np.random.rand(10, 3) M[:,0] = 10M[:,0] M[:,2] = 25M[:,2] print(M)
# Adjust Bayesian Ridge regr4 = linear_model.BayesianRidge() obj_DeepMTS = ns.DeepMTS(regr4, lags = 1, n_hidden_features=5) obj_DeepMTS.fit(M) print(obj_DeepMTS.predict())
# with credible intervals print(obj_DeepMTS.predict(return_std=True, level=80))
print(obj_DeepMTS.predict(return_std=True, level=95))

Example 2:

import nnetsauce as ns
    import numpy as np
    from sklearn import linear_model
 
dataset = { 'date' : ['2001-01-01', '2002-01-01', '2003-01-01', '2004-01-01', '2005-01-01'], 'series1' : [34, 30, 35.6, 33.3, 38.1], 'series2' : [4, 5.5, 5.6, 6.3, 5.1], 'series3' : [100, 100.5, 100.6, 100.2, 100.1]} df = pd.DataFrame(dataset).set_index('date') print(df)
# Adjust Bayesian Ridge regr5 = linear_model.BayesianRidge() obj_DeepMTS = ns.DeepMTS(regr5, lags = 1, n_hidden_features=5) obj_DeepMTS.fit(df) print(obj_DeepMTS.predict())
# with credible intervals print(obj_DeepMTS.predict(return_std=True, level=80))
print(obj_DeepMTS.predict(return_std=True, level=95))

class Downloader:
 6class Downloader:
 7    """Download datasets from data sources (R-universe for now)"""
 8
 9    def __init__(self):
10        self.pkgname = None
11        self.dataset = None
12        self.source = None
13        self.url = None
14        self.request = None
15
16    def download(
17        self,
18        pkgname="MASS",
19        dataset="Boston",
20        source="https://cran.r-universe.dev/",
21        **kwargs
22    ):
23        """Download datasets from data sources (R-universe for now)
24
25        Examples:
26
27        ```python
28        import nnetsauce as ns
29
30        downloader = ns.Downloader()
31        df = downloader.download(pkgname="MASS", dataset="Boston")
32        ```
33
34        """
35        self.pkgname = pkgname
36        self.dataset = dataset
37        self.source = source
38        self.url = source + pkgname + "/data/" + dataset + "/json"
39        self.request = requests.get(self.url)
40        return pd.DataFrame(self.request.json(), **kwargs)

Download datasets from data sources (R-universe for now)

def download( self, pkgname='MASS', dataset='Boston', source='https://cran.r-universe.dev/', **kwargs):
16    def download(
17        self,
18        pkgname="MASS",
19        dataset="Boston",
20        source="https://cran.r-universe.dev/",
21        **kwargs
22    ):
23        """Download datasets from data sources (R-universe for now)
24
25        Examples:
26
27        ```python
28        import nnetsauce as ns
29
30        downloader = ns.Downloader()
31        df = downloader.download(pkgname="MASS", dataset="Boston")
32        ```
33
34        """
35        self.pkgname = pkgname
36        self.dataset = dataset
37        self.source = source
38        self.url = source + pkgname + "/data/" + dataset + "/json"
39        self.request = requests.get(self.url)
40        return pd.DataFrame(self.request.json(), **kwargs)

Download datasets from data sources (R-universe for now)

Examples:

import nnetsauce as ns

downloader = ns.Downloader()
df = downloader.download(pkgname="MASS", dataset="Boston")
class GLMClassifier(nnetsauce.glm.glm.GLM, sklearn.base.ClassifierMixin):
 21class GLMClassifier(GLM, ClassifierMixin):
 22    """Generalized 'linear' models using quasi-randomized networks (classification)
 23
 24    Parameters:
 25
 26        n_hidden_features: int
 27            number of nodes in the hidden layer
 28
 29        lambda1: float
 30            regularization parameter for GLM coefficients on original features
 31
 32        alpha1: float
 33            controls compromize between l1 and l2 norm of GLM coefficients on original features
 34
 35        lambda2: float
 36            regularization parameter for GLM coefficients on nonlinear features
 37
 38        alpha2: float
 39            controls compromize between l1 and l2 norm of GLM coefficients on nonlinear features
 40
 41        activation_name: str
 42            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
 43
 44        a: float
 45            hyperparameter for 'prelu' or 'elu' activation function
 46
 47        nodes_sim: str
 48            type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
 49            'uniform'
 50
 51        bias: boolean
 52            indicates if the hidden layer contains a bias term (True) or not
 53            (False)
 54
 55        dropout: float
 56            regularization parameter; (random) percentage of nodes dropped out
 57            of the training
 58
 59        direct_link: boolean
 60            indicates if the original predictors are included (True) in model's
 61            fitting or not (False)
 62
 63        n_clusters: int
 64            number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
 65                no clustering)
 66
 67        cluster_encode: bool
 68            defines how the variable containing clusters is treated (default is one-hot)
 69            if `False`, then labels are used, without one-hot encoding
 70
 71        type_clust: str
 72            type of clustering method: currently k-means ('kmeans') or Gaussian
 73            Mixture Model ('gmm')
 74
 75        type_scaling: a tuple of 3 strings
 76            scaling methods for inputs, hidden layer, and clustering respectively
 77            (and when relevant).
 78            Currently available: standardization ('std') or MinMax scaling ('minmax')
 79
 80        optimizer: object
 81            optimizer, from class nnetsauce.Optimizer
 82
 83        backend: str.
 84            "cpu" or "gpu" or "tpu".
 85
 86        seed: int
 87            reproducibility seed for nodes_sim=='uniform'
 88
 89    Attributes:
 90
 91        beta_: vector
 92            regression coefficients
 93
 94    Examples:
 95
 96    See [https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_classification.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_classification.py)
 97
 98    """
 99
100    # construct the object -----
101    _estimator_type = "classifier"
102
103    def __init__(
104        self,
105        n_hidden_features=5,
106        lambda1=0.01,
107        alpha1=0.5,
108        lambda2=0.01,
109        alpha2=0.5,
110        family="expit",
111        activation_name="relu",
112        a=0.01,
113        nodes_sim="sobol",
114        bias=True,
115        dropout=0,
116        direct_link=True,
117        n_clusters=2,
118        cluster_encode=True,
119        type_clust="kmeans",
120        type_scaling=("std", "std", "std"),
121        optimizer=Optimizer(),
122        backend="cpu",
123        seed=123,
124    ):
125        super().__init__(
126            n_hidden_features=n_hidden_features,
127            lambda1=lambda1,
128            alpha1=alpha1,
129            lambda2=lambda2,
130            alpha2=alpha2,
131            activation_name=activation_name,
132            a=a,
133            nodes_sim=nodes_sim,
134            bias=bias,
135            dropout=dropout,
136            direct_link=direct_link,
137            n_clusters=n_clusters,
138            cluster_encode=cluster_encode,
139            type_clust=type_clust,
140            type_scaling=type_scaling,
141            optimizer=optimizer,
142            backend=backend,
143            seed=seed,
144        )
145
146        self.family = family
147
148    def logit_loss(self, Y, row_index, XB):
149        self.n_classes = Y.shape[1]  # len(np.unique(y))
150        # Y = mo.one_hot_encode2(y, self.n_classes)
151        # Y = self.optimizer.one_hot_encode(y, self.n_classes)
152
153        # max_double = 709.0 # only if softmax
154        # XB[XB > max_double] = max_double
155        XB[XB > 709.0] = 709.0
156
157        if row_index is None:
158            return -np.mean(np.sum(Y * XB, axis=1) - logsumexp(XB))
159
160        return -np.mean(np.sum(Y[row_index, :] * XB, axis=1) - logsumexp(XB))
161
162    def expit_erf_loss(self, Y, row_index, XB):
163        # self.n_classes = len(np.unique(y))
164        # Y = mo.one_hot_encode2(y, self.n_classes)
165        # Y = self.optimizer.one_hot_encode(y, self.n_classes)
166        self.n_classes = Y.shape[1]
167
168        if row_index is None:
169            return -np.mean(np.sum(Y * XB, axis=1) - logsumexp(XB))
170
171        return -np.mean(np.sum(Y[row_index, :] * XB, axis=1) - logsumexp(XB))
172
173    def loss_func(
174        self,
175        beta,
176        group_index,
177        X,
178        Y,
179        y,
180        row_index=None,
181        type_loss="logit",
182        **kwargs
183    ):
184        res = {
185            "logit": self.logit_loss,
186            "expit": self.expit_erf_loss,
187            "erf": self.expit_erf_loss,
188        }
189
190        if row_index is None:
191            row_index = range(len(y))
192            XB = self.compute_XB(
193                X,
194                beta=np.reshape(beta, (X.shape[1], self.n_classes), order="F"),
195            )
196
197            return res[type_loss](Y, row_index, XB) + self.compute_penalty(
198                group_index=group_index, beta=beta
199            )
200
201        XB = self.compute_XB(
202            X,
203            beta=np.reshape(beta, (X.shape[1], self.n_classes), order="F"),
204            row_index=row_index,
205        )
206
207        return res[type_loss](Y, row_index, XB) + self.compute_penalty(
208            group_index=group_index, beta=beta
209        )
210
211    def fit(self, X, y, **kwargs):
212        """Fit GLM model to training data (X, y).
213
214        Args:
215
216            X: {array-like}, shape = [n_samples, n_features]
217                Training vectors, where n_samples is the number
218                of samples and n_features is the number of features.
219
220            y: array-like, shape = [n_samples]
221                Target values.
222
223            **kwargs: additional parameters to be passed to
224                    self.cook_training_set or self.obj.fit
225
226        Returns:
227
228            self: object
229
230        """
231
232        assert mx.is_factor(
233            y
234        ), "y must contain only integers"  # change is_factor and subsampling everywhere
235
236        self.classes_ = np.unique(y)  # for compatibility with sklearn
237        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
238
239        self.beta_ = None
240
241        n, p = X.shape
242
243        self.group_index = n * X.shape[1]
244
245        self.n_classes = len(np.unique(y))
246
247        output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
248
249        # Y = mo.one_hot_encode2(output_y, self.n_classes)
250        Y = self.optimizer.one_hot_encode(output_y, self.n_classes)
251
252        # initialization
253        if self.backend == "cpu":
254            beta_ = np.linalg.lstsq(scaled_Z, Y, rcond=None)[0]
255        else:
256            beta_ = jnp.linalg.lstsq(scaled_Z, Y, rcond=None)[0]
257
258        # optimization
259        # fit(self, loss_func, response, x0, **kwargs):
260        # loss_func(self, beta, group_index, X, y,
261        #          row_index=None, type_loss="gaussian",
262        #          **kwargs)
263        self.optimizer.fit(
264            self.loss_func,
265            response=y,
266            x0=beta_.flatten(order="F"),
267            group_index=self.group_index,
268            X=scaled_Z,
269            Y=Y,
270            y=y,
271            type_loss=self.family,
272        )
273
274        self.beta_ = self.optimizer.results[0]
275        self.classes_ = np.unique(y)
276
277        return self
278
279    def predict(self, X, **kwargs):
280        """Predict test data X.
281
282        Args:
283
284            X: {array-like}, shape = [n_samples, n_features]
285                Training vectors, where n_samples is the number
286                of samples and n_features is the number of features.
287
288            **kwargs: additional parameters to be passed to
289                    self.cook_test_set
290
291        Returns:
292
293            model predictions: {array-like}
294
295        """
296
297        return np.argmax(self.predict_proba(X, **kwargs), axis=1)
298
299    def predict_proba(self, X, **kwargs):
300        """Predict probabilities for test data X.
301
302        Args:
303
304            X: {array-like}, shape = [n_samples, n_features]
305                Training vectors, where n_samples is the number
306                of samples and n_features is the number of features.
307
308            **kwargs: additional parameters to be passed to
309                    self.cook_test_set
310
311        Returns:
312
313            probability estimates for test data: {array-like}
314
315        """
316        if len(X.shape) == 1:
317            n_features = X.shape[0]
318            new_X = mo.rbind(
319                X.reshape(1, n_features),
320                np.ones(n_features).reshape(1, n_features),
321            )
322
323            Z = self.cook_test_set(new_X, **kwargs)
324
325        else:
326            Z = self.cook_test_set(X, **kwargs)
327
328        ZB = mo.safe_sparse_dot(
329            Z,
330            self.beta_.reshape(
331                self.n_classes,
332                X.shape[1] + self.n_hidden_features + self.n_clusters,
333            ).T,
334        )
335
336        if self.family == "logit":
337            exp_ZB = np.exp(ZB)
338
339            return exp_ZB / exp_ZB.sum(axis=1)[:, None]
340
341        if self.family == "expit":
342            exp_ZB = expit(ZB)
343
344            return exp_ZB / exp_ZB.sum(axis=1)[:, None]
345
346        if self.family == "erf":
347            exp_ZB = 0.5 * (1 + erf(ZB))
348
349            return exp_ZB / exp_ZB.sum(axis=1)[:, None]
350
351    def score(self, X, y, scoring=None):
352        """Scoring function for classification.
353
354        Args:
355
356            X: {array-like}, shape = [n_samples, n_features]
357                Training vectors, where n_samples is the number
358                of samples and n_features is the number of features.
359
360            y: array-like, shape = [n_samples]
361                Target values.
362
363            scoring: str
364                scoring method (default is accuracy)
365
366        Returns:
367
368            score: float
369        """
370
371        if scoring is None:
372            scoring = "accuracy"
373
374        if scoring == "accuracy":
375            return skm2.accuracy_score(y, self.predict(X))
376
377        if scoring == "f1":
378            return skm2.f1_score(y, self.predict(X))
379
380        if scoring == "precision":
381            return skm2.precision_score(y, self.predict(X))
382
383        if scoring == "recall":
384            return skm2.recall_score(y, self.predict(X))
385
386        if scoring == "roc_auc":
387            return skm2.roc_auc_score(y, self.predict(X))
388
389        if scoring == "log_loss":
390            return skm2.log_loss(y, self.predict_proba(X))
391
392        if scoring == "balanced_accuracy":
393            return skm2.balanced_accuracy_score(y, self.predict(X))
394
395        if scoring == "average_precision":
396            return skm2.average_precision_score(y, self.predict(X))
397
398        if scoring == "neg_brier_score":
399            return -skm2.brier_score_loss(y, self.predict_proba(X))
400
401        if scoring == "neg_log_loss":
402            return -skm2.log_loss(y, self.predict_proba(X))
403
404    @property
405    def _estimator_type(self):
406        return "classifier"

Generalized 'linear' models using quasi-randomized networks (classification)

Parameters:

n_hidden_features: int
    number of nodes in the hidden layer

lambda1: float
    regularization parameter for GLM coefficients on original features

alpha1: float
    controls compromize between l1 and l2 norm of GLM coefficients on original features

lambda2: float
    regularization parameter for GLM coefficients on nonlinear features

alpha2: float
    controls compromize between l1 and l2 norm of GLM coefficients on nonlinear features

activation_name: str
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'

a: float
    hyperparameter for 'prelu' or 'elu' activation function

nodes_sim: str
    type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
    'uniform'

bias: boolean
    indicates if the hidden layer contains a bias term (True) or not
    (False)

dropout: float
    regularization parameter; (random) percentage of nodes dropped out
    of the training

direct_link: boolean
    indicates if the original predictors are included (True) in model's
    fitting or not (False)

n_clusters: int
    number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
        no clustering)

cluster_encode: bool
    defines how the variable containing clusters is treated (default is one-hot)
    if `False`, then labels are used, without one-hot encoding

type_clust: str
    type of clustering method: currently k-means ('kmeans') or Gaussian
    Mixture Model ('gmm')

type_scaling: a tuple of 3 strings
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax')

optimizer: object
    optimizer, from class nnetsauce.Optimizer

backend: str.
    "cpu" or "gpu" or "tpu".

seed: int
    reproducibility seed for nodes_sim=='uniform'

Attributes:

beta_: vector
    regression coefficients

Examples:

See https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_classification.py

def fit(self, X, y, **kwargs):
211    def fit(self, X, y, **kwargs):
212        """Fit GLM model to training data (X, y).
213
214        Args:
215
216            X: {array-like}, shape = [n_samples, n_features]
217                Training vectors, where n_samples is the number
218                of samples and n_features is the number of features.
219
220            y: array-like, shape = [n_samples]
221                Target values.
222
223            **kwargs: additional parameters to be passed to
224                    self.cook_training_set or self.obj.fit
225
226        Returns:
227
228            self: object
229
230        """
231
232        assert mx.is_factor(
233            y
234        ), "y must contain only integers"  # change is_factor and subsampling everywhere
235
236        self.classes_ = np.unique(y)  # for compatibility with sklearn
237        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
238
239        self.beta_ = None
240
241        n, p = X.shape
242
243        self.group_index = n * X.shape[1]
244
245        self.n_classes = len(np.unique(y))
246
247        output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
248
249        # Y = mo.one_hot_encode2(output_y, self.n_classes)
250        Y = self.optimizer.one_hot_encode(output_y, self.n_classes)
251
252        # initialization
253        if self.backend == "cpu":
254            beta_ = np.linalg.lstsq(scaled_Z, Y, rcond=None)[0]
255        else:
256            beta_ = jnp.linalg.lstsq(scaled_Z, Y, rcond=None)[0]
257
258        # optimization
259        # fit(self, loss_func, response, x0, **kwargs):
260        # loss_func(self, beta, group_index, X, y,
261        #          row_index=None, type_loss="gaussian",
262        #          **kwargs)
263        self.optimizer.fit(
264            self.loss_func,
265            response=y,
266            x0=beta_.flatten(order="F"),
267            group_index=self.group_index,
268            X=scaled_Z,
269            Y=Y,
270            y=y,
271            type_loss=self.family,
272        )
273
274        self.beta_ = self.optimizer.results[0]
275        self.classes_ = np.unique(y)
276
277        return self

Fit GLM model to training data (X, y).

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

**kwargs: additional parameters to be passed to
        self.cook_training_set or self.obj.fit

Returns:

self: object
def predict(self, X, **kwargs):
279    def predict(self, X, **kwargs):
280        """Predict test data X.
281
282        Args:
283
284            X: {array-like}, shape = [n_samples, n_features]
285                Training vectors, where n_samples is the number
286                of samples and n_features is the number of features.
287
288            **kwargs: additional parameters to be passed to
289                    self.cook_test_set
290
291        Returns:
292
293            model predictions: {array-like}
294
295        """
296
297        return np.argmax(self.predict_proba(X, **kwargs), axis=1)

Predict test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

model predictions: {array-like}
def predict_proba(self, X, **kwargs):
299    def predict_proba(self, X, **kwargs):
300        """Predict probabilities for test data X.
301
302        Args:
303
304            X: {array-like}, shape = [n_samples, n_features]
305                Training vectors, where n_samples is the number
306                of samples and n_features is the number of features.
307
308            **kwargs: additional parameters to be passed to
309                    self.cook_test_set
310
311        Returns:
312
313            probability estimates for test data: {array-like}
314
315        """
316        if len(X.shape) == 1:
317            n_features = X.shape[0]
318            new_X = mo.rbind(
319                X.reshape(1, n_features),
320                np.ones(n_features).reshape(1, n_features),
321            )
322
323            Z = self.cook_test_set(new_X, **kwargs)
324
325        else:
326            Z = self.cook_test_set(X, **kwargs)
327
328        ZB = mo.safe_sparse_dot(
329            Z,
330            self.beta_.reshape(
331                self.n_classes,
332                X.shape[1] + self.n_hidden_features + self.n_clusters,
333            ).T,
334        )
335
336        if self.family == "logit":
337            exp_ZB = np.exp(ZB)
338
339            return exp_ZB / exp_ZB.sum(axis=1)[:, None]
340
341        if self.family == "expit":
342            exp_ZB = expit(ZB)
343
344            return exp_ZB / exp_ZB.sum(axis=1)[:, None]
345
346        if self.family == "erf":
347            exp_ZB = 0.5 * (1 + erf(ZB))
348
349            return exp_ZB / exp_ZB.sum(axis=1)[:, None]

Predict probabilities for test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

probability estimates for test data: {array-like}
def score(self, X, y, scoring=None):
351    def score(self, X, y, scoring=None):
352        """Scoring function for classification.
353
354        Args:
355
356            X: {array-like}, shape = [n_samples, n_features]
357                Training vectors, where n_samples is the number
358                of samples and n_features is the number of features.
359
360            y: array-like, shape = [n_samples]
361                Target values.
362
363            scoring: str
364                scoring method (default is accuracy)
365
366        Returns:
367
368            score: float
369        """
370
371        if scoring is None:
372            scoring = "accuracy"
373
374        if scoring == "accuracy":
375            return skm2.accuracy_score(y, self.predict(X))
376
377        if scoring == "f1":
378            return skm2.f1_score(y, self.predict(X))
379
380        if scoring == "precision":
381            return skm2.precision_score(y, self.predict(X))
382
383        if scoring == "recall":
384            return skm2.recall_score(y, self.predict(X))
385
386        if scoring == "roc_auc":
387            return skm2.roc_auc_score(y, self.predict(X))
388
389        if scoring == "log_loss":
390            return skm2.log_loss(y, self.predict_proba(X))
391
392        if scoring == "balanced_accuracy":
393            return skm2.balanced_accuracy_score(y, self.predict(X))
394
395        if scoring == "average_precision":
396            return skm2.average_precision_score(y, self.predict(X))
397
398        if scoring == "neg_brier_score":
399            return -skm2.brier_score_loss(y, self.predict_proba(X))
400
401        if scoring == "neg_log_loss":
402            return -skm2.log_loss(y, self.predict_proba(X))

Scoring function for classification.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

scoring: str
    scoring method (default is accuracy)

Returns:

score: float
class GLMRegressor(nnetsauce.glm.glm.GLM, sklearn.base.RegressorMixin):
 20class GLMRegressor(GLM, RegressorMixin):
 21    """Generalized 'linear' models using quasi-randomized networks (regression)
 22
 23    Attributes:
 24
 25        n_hidden_features: int
 26            number of nodes in the hidden layer
 27
 28        lambda1: float
 29            regularization parameter for GLM coefficients on original features
 30
 31        alpha1: float
 32            controls compromize between l1 and l2 norm of GLM coefficients on original features
 33
 34        lambda2: float
 35            regularization parameter for GLM coefficients on nonlinear features
 36
 37        alpha2: float
 38            controls compromize between l1 and l2 norm of GLM coefficients on nonlinear features
 39
 40        family: str
 41            "gaussian", "laplace", "poisson", or "quantile" (for now)
 42
 43        level: int, default=50
 44            The level of the quantiles to compute for family = "quantile".
 45            Default is the median.
 46
 47        activation_name: str
 48            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
 49
 50        a: float
 51            hyperparameter for 'prelu' or 'elu' activation function
 52
 53        nodes_sim: str
 54            type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
 55            'uniform'
 56
 57        bias: boolean
 58            indicates if the hidden layer contains a bias term (True) or not
 59            (False)
 60
 61        dropout: float
 62            regularization parameter; (random) percentage of nodes dropped out
 63            of the training
 64
 65        direct_link: boolean
 66            indicates if the original predictors are included (True) in model's
 67            fitting or not (False)
 68
 69        n_clusters: int
 70            number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
 71                no clustering)
 72
 73        cluster_encode: bool
 74            defines how the variable containing clusters is treated (default is one-hot)
 75            if `False`, then labels are used, without one-hot encoding
 76
 77        type_clust: str
 78            type of clustering method: currently k-means ('kmeans') or Gaussian
 79            Mixture Model ('gmm')
 80
 81        type_scaling: a tuple of 3 strings
 82            scaling methods for inputs, hidden layer, and clustering respectively
 83            (and when relevant).
 84            Currently available: standardization ('std') or MinMax scaling ('minmax')
 85
 86        optimizer: object
 87            optimizer, from class nnetsauce.utils.Optimizer
 88
 89        backend: str.
 90            "cpu" or "gpu" or "tpu".
 91
 92        seed: int
 93            reproducibility seed for nodes_sim=='uniform'
 94
 95        backend: str
 96            "cpu", "gpu", "tpu"
 97
 98    Attributes:
 99
100        beta_: vector
101            regression coefficients
102
103    Examples:
104
105        See [https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_regression.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_regression.py)
106
107    """
108
109    # construct the object -----
110
111    def __init__(
112        self,
113        n_hidden_features=5,
114        lambda1=0.01,
115        alpha1=0.5,
116        lambda2=0.01,
117        alpha2=0.5,
118        family="gaussian",
119        level=50,
120        activation_name="relu",
121        a=0.01,
122        nodes_sim="sobol",
123        bias=True,
124        dropout=0,
125        direct_link=True,
126        n_clusters=2,
127        cluster_encode=True,
128        type_clust="kmeans",
129        type_scaling=("std", "std", "std"),
130        optimizer=Optimizer(),
131        backend="cpu",
132        seed=123,
133    ):
134        super().__init__(
135            n_hidden_features=n_hidden_features,
136            lambda1=lambda1,
137            alpha1=alpha1,
138            lambda2=lambda2,
139            alpha2=alpha2,
140            activation_name=activation_name,
141            a=a,
142            nodes_sim=nodes_sim,
143            bias=bias,
144            dropout=dropout,
145            direct_link=direct_link,
146            n_clusters=n_clusters,
147            cluster_encode=cluster_encode,
148            type_clust=type_clust,
149            type_scaling=type_scaling,
150            optimizer=optimizer,
151            backend=backend,
152            seed=seed,
153        )
154
155        self.family = family
156        self.level = level
157        self.q = self.level / 100
158
159    def gaussian_loss(self, y, row_index, XB):
160        return 0.5 * np.mean(np.square(y[row_index] - XB))
161
162    def laplace_loss(self, y, row_index, XB):
163        return 0.5 * np.mean(np.abs(y[row_index] - XB))
164
165    def poisson_loss(self, y, row_index, XB):
166        return -np.mean(y[row_index] * XB - np.exp(XB))
167
168    def pinball_loss(self, y, row_index, XB, tau=0.5):
169        y = np.array(y[row_index])
170        y_pred = np.array(XB)
171        return mean_pinball_loss(y, y_pred, alpha=tau)
172        # return np.mean(np.maximum(tau * residuals, (tau - 1) * residuals))
173
174    def loss_func(
175        self,
176        beta,
177        group_index,
178        X,
179        y,
180        row_index=None,
181        type_loss="gaussian",
182        **kwargs
183    ):
184        res = {
185            "gaussian": self.gaussian_loss,
186            "laplace": self.laplace_loss,
187            "poisson": self.poisson_loss,
188            "quantile": self.pinball_loss,
189        }
190
191        if type_loss != "quantile":
192
193            if row_index is None:
194                row_index = range(len(y))
195                XB = self.compute_XB(X, beta=beta)
196
197                return res[type_loss](y, row_index, XB) + self.compute_penalty(
198                    group_index=group_index, beta=beta
199                )
200
201            XB = self.compute_XB(X, beta=beta, row_index=row_index)
202
203            return res[type_loss](y, row_index, XB) + self.compute_penalty(
204                group_index=group_index, beta=beta
205            )
206
207        else:  # quantile
208
209            assert (
210                self.q > 0 and self.q < 1
211            ), "'tau' must be comprised 0 < tau < 1"
212
213            if row_index is None:
214                row_index = range(len(y))
215                XB = self.compute_XB(X, beta=beta)
216                return res[type_loss](y, row_index, XB, self.q)
217
218            XB = self.compute_XB(X, beta=beta, row_index=row_index)
219            return res[type_loss](y, row_index, XB, self.q)
220
221    def fit(self, X, y, **kwargs):
222        """Fit GLM model to training data (X, y).
223
224        Args:
225
226            X: {array-like}, shape = [n_samples, n_features]
227                Training vectors, where n_samples is the number
228                of samples and n_features is the number of features.
229
230            y: array-like, shape = [n_samples]
231                Target values.
232
233            **kwargs: additional parameters to be passed to
234                    self.cook_training_set or self.obj.fit
235
236        Returns:
237
238            self: object
239
240        """
241        self.beta_ = None
242        self.n_iter = 0
243
244        _, self.group_index = X.shape
245
246        centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
247        # initialization
248        if self.backend == "cpu":
249            beta_ = np.linalg.lstsq(scaled_Z, centered_y, rcond=None)[0]
250        else:
251            beta_ = jnp.linalg.lstsq(scaled_Z, centered_y, rcond=None)[0]
252        # optimization
253        # fit(self, loss_func, response, x0, **kwargs):
254        # loss_func(self, beta, group_index, X, y,
255        #          row_index=None, type_loss="gaussian",
256        #          **kwargs)
257        self.optimizer.fit(
258            self.loss_func,
259            response=centered_y,
260            x0=beta_,
261            group_index=self.group_index,
262            X=scaled_Z,
263            y=centered_y,
264            type_loss=self.family,
265            **kwargs
266        )
267
268        self.beta_ = self.optimizer.results[0]
269
270        return self
271
272    def predict(self, X, **kwargs):
273        """Predict test data X.
274
275        Args:
276
277            X: {array-like}, shape = [n_samples, n_features]
278                Training vectors, where n_samples is the number
279                of samples and n_features is the number of features.
280
281            **kwargs: additional parameters to be passed to
282                    self.cook_test_set
283
284        Returns:
285
286            model predictions: {array-like}
287
288        """
289
290        if len(X.shape) == 1:
291            n_features = X.shape[0]
292            new_X = mo.rbind(
293                X.reshape(1, n_features),
294                np.ones(n_features).reshape(1, n_features),
295            )
296
297            return (
298                self.y_mean_
299                + np.dot(self.cook_test_set(new_X, **kwargs), self.beta_)
300            )[0]
301
302        return self.y_mean_ + np.dot(
303            self.cook_test_set(X, **kwargs), self.beta_
304        )
305
306    def score(self, X, y, scoring=None):
307        """Compute the score of the model.
308
309        Parameters:
310
311            X: {array-like}, shape = [n_samples, n_features]
312                Training vectors, where n_samples is the number
313                of samples and n_features is the number of features.
314
315            y: array-like, shape = [n_samples]
316                Target values.
317
318            scoring: str
319                scoring method
320
321        Returns:
322
323            score: float
324
325        """
326
327        if scoring is None:
328            return np.sqrt(np.mean((self.predict(X) - y) ** 2))
329
330        return skm2.get_scorer(scoring)(self, X, y)

Generalized 'linear' models using quasi-randomized networks (regression)

Attributes:

n_hidden_features: int
    number of nodes in the hidden layer

lambda1: float
    regularization parameter for GLM coefficients on original features

alpha1: float
    controls compromize between l1 and l2 norm of GLM coefficients on original features

lambda2: float
    regularization parameter for GLM coefficients on nonlinear features

alpha2: float
    controls compromize between l1 and l2 norm of GLM coefficients on nonlinear features

family: str
    "gaussian", "laplace", "poisson", or "quantile" (for now)

level: int, default=50
    The level of the quantiles to compute for family = "quantile".
    Default is the median.

activation_name: str
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'

a: float
    hyperparameter for 'prelu' or 'elu' activation function

nodes_sim: str
    type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
    'uniform'

bias: boolean
    indicates if the hidden layer contains a bias term (True) or not
    (False)

dropout: float
    regularization parameter; (random) percentage of nodes dropped out
    of the training

direct_link: boolean
    indicates if the original predictors are included (True) in model's
    fitting or not (False)

n_clusters: int
    number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
        no clustering)

cluster_encode: bool
    defines how the variable containing clusters is treated (default is one-hot)
    if `False`, then labels are used, without one-hot encoding

type_clust: str
    type of clustering method: currently k-means ('kmeans') or Gaussian
    Mixture Model ('gmm')

type_scaling: a tuple of 3 strings
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax')

optimizer: object
    optimizer, from class nnetsauce.utils.Optimizer

backend: str.
    "cpu" or "gpu" or "tpu".

seed: int
    reproducibility seed for nodes_sim=='uniform'

backend: str
    "cpu", "gpu", "tpu"

Attributes:

beta_: vector
    regression coefficients

Examples:

See [https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_regression.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_regression.py)
def fit(self, X, y, **kwargs):
221    def fit(self, X, y, **kwargs):
222        """Fit GLM model to training data (X, y).
223
224        Args:
225
226            X: {array-like}, shape = [n_samples, n_features]
227                Training vectors, where n_samples is the number
228                of samples and n_features is the number of features.
229
230            y: array-like, shape = [n_samples]
231                Target values.
232
233            **kwargs: additional parameters to be passed to
234                    self.cook_training_set or self.obj.fit
235
236        Returns:
237
238            self: object
239
240        """
241        self.beta_ = None
242        self.n_iter = 0
243
244        _, self.group_index = X.shape
245
246        centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
247        # initialization
248        if self.backend == "cpu":
249            beta_ = np.linalg.lstsq(scaled_Z, centered_y, rcond=None)[0]
250        else:
251            beta_ = jnp.linalg.lstsq(scaled_Z, centered_y, rcond=None)[0]
252        # optimization
253        # fit(self, loss_func, response, x0, **kwargs):
254        # loss_func(self, beta, group_index, X, y,
255        #          row_index=None, type_loss="gaussian",
256        #          **kwargs)
257        self.optimizer.fit(
258            self.loss_func,
259            response=centered_y,
260            x0=beta_,
261            group_index=self.group_index,
262            X=scaled_Z,
263            y=centered_y,
264            type_loss=self.family,
265            **kwargs
266        )
267
268        self.beta_ = self.optimizer.results[0]
269
270        return self

Fit GLM model to training data (X, y).

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

**kwargs: additional parameters to be passed to
        self.cook_training_set or self.obj.fit

Returns:

self: object
def predict(self, X, **kwargs):
272    def predict(self, X, **kwargs):
273        """Predict test data X.
274
275        Args:
276
277            X: {array-like}, shape = [n_samples, n_features]
278                Training vectors, where n_samples is the number
279                of samples and n_features is the number of features.
280
281            **kwargs: additional parameters to be passed to
282                    self.cook_test_set
283
284        Returns:
285
286            model predictions: {array-like}
287
288        """
289
290        if len(X.shape) == 1:
291            n_features = X.shape[0]
292            new_X = mo.rbind(
293                X.reshape(1, n_features),
294                np.ones(n_features).reshape(1, n_features),
295            )
296
297            return (
298                self.y_mean_
299                + np.dot(self.cook_test_set(new_X, **kwargs), self.beta_)
300            )[0]
301
302        return self.y_mean_ + np.dot(
303            self.cook_test_set(X, **kwargs), self.beta_
304        )

Predict test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

model predictions: {array-like}
def score(self, X, y, scoring=None):
306    def score(self, X, y, scoring=None):
307        """Compute the score of the model.
308
309        Parameters:
310
311            X: {array-like}, shape = [n_samples, n_features]
312                Training vectors, where n_samples is the number
313                of samples and n_features is the number of features.
314
315            y: array-like, shape = [n_samples]
316                Target values.
317
318            scoring: str
319                scoring method
320
321        Returns:
322
323            score: float
324
325        """
326
327        if scoring is None:
328            return np.sqrt(np.mean((self.predict(X) - y) ** 2))
329
330        return skm2.get_scorer(scoring)(self, X, y)

Compute the score of the model.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

scoring: str
    scoring method

Returns:

score: float
class KernelRidge(sklearn.base.BaseEstimator, sklearn.base.RegressorMixin):
 18class KernelRidge(BaseEstimator, RegressorMixin):
 19    """
 20    Kernel Ridge Regression with optional GPU support, Matérn kernels, and automatic input standardization.
 21
 22    Parameters:
 23    - alpha: float
 24        Regularization parameter.
 25    - kernel: str
 26        Kernel type ("linear", "rbf", or "matern").
 27    - gamma: float
 28        Kernel coefficient for "rbf". Ignored for other kernels.
 29    - nu: float
 30        Smoothness parameter for the Matérn kernel. Default is 1.5.
 31    - length_scale: float
 32        Length scale parameter for the Matérn kernel. Default is 1.0.
 33    - backend: str
 34        "cpu" or "gpu" (uses JAX if "gpu").
 35    """
 36
 37    def __init__(
 38        self,
 39        alpha=1.0,
 40        kernel="rbf",
 41        gamma=None,
 42        nu=1.5,
 43        length_scale=1.0,
 44        backend="cpu",
 45    ):
 46        self.alpha = alpha
 47        self.alpha_ = alpha
 48        self.kernel = kernel
 49        self.gamma = gamma
 50        self.nu = nu
 51        self.length_scale = length_scale
 52        self.backend = backend
 53        self.scaler = StandardScaler()
 54
 55        if backend == "gpu" and not JAX_AVAILABLE:
 56            raise ImportError(
 57                "JAX is not installed. Please install JAX to use the GPU backend."
 58            )
 59
 60    def _linear_kernel(self, X, Y):
 61        return jnp.dot(X, Y.T) if self.backend == "gpu" else np.dot(X, Y.T)
 62
 63    def _rbf_kernel(self, X, Y):
 64        if self.gamma is None:
 65            self.gamma = 1.0 / X.shape[1]
 66        if self.backend == "gpu":
 67            sq_dists = (
 68                jnp.sum(X**2, axis=1)[:, None]
 69                + jnp.sum(Y**2, axis=1)
 70                - 2 * jnp.dot(X, Y.T)
 71            )
 72            return jnp.exp(-self.gamma * sq_dists)
 73        else:
 74            sq_dists = (
 75                np.sum(X**2, axis=1)[:, None]
 76                + np.sum(Y**2, axis=1)
 77                - 2 * np.dot(X, Y.T)
 78            )
 79            return np.exp(-self.gamma * sq_dists)
 80
 81    def _matern_kernel(self, X, Y):
 82        """
 83        Compute the Matérn kernel using JAX for GPU or NumPy for CPU.
 84
 85        Parameters:
 86        - X: array-like, shape (n_samples_X, n_features)
 87        - Y: array-like, shape (n_samples_Y, n_features)
 88
 89        Returns:
 90        - Kernel matrix, shape (n_samples_X, n_samples_Y)
 91        """
 92        if self.backend == "gpu":
 93            # Compute pairwise distances
 94            dists = jnp.sqrt(
 95                jnp.sum((X[:, None, :] - Y[None, :, :]) ** 2, axis=2)
 96            )
 97            scaled_dists = jnp.sqrt(2 * self.nu) * dists / self.length_scale
 98
 99            # Matérn kernel formula
100            coeff = (2 ** (1 - self.nu)) / jnp.exp(gammaln(self.nu))
101            matern_kernel = (
102                coeff * (scaled_dists**self.nu) * kv(self.nu, scaled_dists)
103            )
104            matern_kernel = jnp.where(
105                dists == 0, 1.0, matern_kernel
106            )  # Handle the case where distance is 0
107            return matern_kernel
108        else:
109            # Use NumPy for CPU
110            from scipy.special import (
111                gammaln,
112                kv,
113            )  # Ensure scipy.special is used for CPU
114
115            dists = np.sqrt(
116                np.sum((X[:, None, :] - Y[None, :, :]) ** 2, axis=2)
117            )
118            scaled_dists = np.sqrt(2 * self.nu) * dists / self.length_scale
119
120            # Matérn kernel formula
121            coeff = (2 ** (1 - self.nu)) / np.exp(gammaln(self.nu))
122            matern_kernel = (
123                coeff * (scaled_dists**self.nu) * kv(self.nu, scaled_dists)
124            )
125            matern_kernel = np.where(
126                dists == 0, 1.0, matern_kernel
127            )  # Handle the case where distance is 0
128            return matern_kernel
129
130    def _get_kernel(self, X, Y):
131        if self.kernel == "linear":
132            return self._linear_kernel(X, Y)
133        elif self.kernel == "rbf":
134            return self._rbf_kernel(X, Y)
135        elif self.kernel == "matern":
136            return self._matern_kernel(X, Y)
137        else:
138            raise ValueError(f"Unsupported kernel: {self.kernel}")
139
140    def fit(self, X, y):
141        """
142        Fit the Kernel Ridge Regression model.
143
144        Parameters:
145        - X: array-like, shape (n_samples, n_features)
146            Training data.
147        - y: array-like, shape (n_samples,)
148            Target values.
149        """
150        # Standardize the inputs
151        X = self.scaler.fit_transform(X)
152        self.X_fit_ = X
153
154        # Center the response
155        self.y_mean_ = np.mean(y)
156        y_centered = y - self.y_mean_
157
158        n_samples = X.shape[0]
159
160        # Compute the kernel matrix
161        K = self._get_kernel(X, X)
162        self.K_ = K
163        self.y_fit_ = y_centered
164
165        if isinstance(self.alpha, (list, np.ndarray)):
166            # If alpha is a list or array, compute LOOE for each alpha
167            self.alphas_ = self.alpha  # Store the list of alphas
168            self.dual_coefs_ = []  # Store dual coefficients for each alpha
169            self.looe_ = []  # Store LOOE for each alpha
170
171            for alpha in self.alpha:
172                G = K + alpha * np.eye(n_samples)
173                G_inv = np.linalg.inv(G)
174                diag_G_inv = np.diag(G_inv)
175                dual_coef = np.linalg.solve(G, y_centered)
176                looe = np.sum((dual_coef / diag_G_inv) ** 2)  # Compute LOOE
177                self.dual_coefs_.append(dual_coef)
178                self.looe_.append(looe)
179
180            # Select the best alpha based on the smallest LOOE
181            best_index = np.argmin(self.looe_)
182            self.alpha_ = self.alpha[best_index]
183            self.dual_coef_ = self.dual_coefs_[best_index]
184        else:
185            # If alpha is a single value, proceed as usual
186            if self.backend == "gpu":
187                self.dual_coef_ = jnp.linalg.solve(
188                    K + self.alpha * jnp.eye(n_samples), y_centered
189                )
190            else:
191                self.dual_coef_ = np.linalg.solve(
192                    K + self.alpha * np.eye(n_samples), y_centered
193                )
194
195        return self
196
197    def predict(self, X, probs=False):
198        """
199        Predict using the Kernel Ridge Regression model.
200
201        Parameters:
202        - X: array-like, shape (n_samples, n_features)
203            Test data.
204
205        Returns:
206        - Predicted values, shape (n_samples,).
207        """
208        # Standardize the inputs
209        X = self.scaler.transform(X)
210        K = self._get_kernel(X, self.X_fit_)
211        if self.backend == "gpu":
212            preds = jnp.dot(K, self.dual_coef_) + self.y_mean_
213            if probs:
214                # Compute similarity to self.X_fit_
215                similarities = jnp.dot(
216                    preds, self.X_fit_.T
217                )  # Shape: (n_samples, n_fit_)
218                # Apply softmax to get probabilities
219                return jaxsoftmax(similarities, axis=1)
220            return preds
221        else:
222            preds = np.dot(K, self.dual_coef_) + self.y_mean_
223            if probs:
224                # Compute similarity to self.X_fit_
225                similarities = np.dot(
226                    preds, self.X_fit_.T
227                )  # Shape: (n_samples, n_fit_)
228                # Apply softmax to get probabilities
229                return softmax(similarities, axis=1)
230            return preds
231
232    def partial_fit(self, X, y):
233        """
234        Incrementally fit the Kernel Ridge Regression model with new data using a recursive approach.
235
236        Parameters:
237        - X: array-like, shape (n_samples, n_features)
238            New training data.
239        - y: array-like, shape (n_samples,)
240            New target values.
241
242        Returns:
243        - self: object
244            The updated model.
245        """
246        # Standardize the inputs
247        X = (
248            self.scaler.fit_transform(X)
249            if not hasattr(self, "X_fit_")
250            else self.scaler.transform(X)
251        )
252
253        if not hasattr(self, "X_fit_"):
254            # Initialize with the first batch of data
255            self.X_fit_ = X
256
257            # Center the response
258            self.y_mean_ = np.mean(y)
259            y_centered = y - self.y_mean_
260            self.y_fit_ = y_centered
261
262            n_samples = X.shape[0]
263
264            # Compute the kernel matrix for the initial data
265            self.K_ = self._get_kernel(X, X)
266
267            # Initialize dual coefficients for each alpha
268            if isinstance(self.alpha, (list, np.ndarray)):
269                self.dual_coefs_ = [np.zeros(n_samples) for _ in self.alpha]
270            else:
271                self.dual_coef_ = np.zeros(n_samples)
272        else:
273            # Incrementally update with new data
274            y_centered = y - self.y_mean_  # Center the new batch of responses
275            for x_new, y_new in zip(X, y_centered):
276                x_new = x_new.reshape(1, -1)  # Ensure x_new is 2D
277                k_new = self._get_kernel(self.X_fit_, x_new).flatten()
278
279                # Compute the kernel value for the new data point
280                k_self = self._get_kernel(x_new, x_new).item()
281
282                if isinstance(self.alpha, (list, np.ndarray)):
283                    # Update dual coefficients for each alpha
284                    for idx, alpha in enumerate(self.alpha):
285                        gamma_new = 1 / (k_self + alpha)
286                        residual = y_new - np.dot(self.dual_coefs_[idx], k_new)
287                        self.dual_coefs_[idx] = np.append(
288                            self.dual_coefs_[idx], gamma_new * residual
289                        )
290                else:
291                    # Update dual coefficients for a single alpha
292                    gamma_new = 1 / (k_self + self.alpha)
293                    residual = y_new - np.dot(self.dual_coef_, k_new)
294                    self.dual_coef_ = np.append(
295                        self.dual_coef_, gamma_new * residual
296                    )
297
298                # Update the kernel matrix
299                self.K_ = np.block(
300                    [
301                        [self.K_, k_new[:, None]],
302                        [k_new[None, :], np.array([[k_self]])],
303                    ]
304                )
305
306                # Update the stored data
307                self.X_fit_ = np.vstack([self.X_fit_, x_new])
308                self.y_fit_ = np.append(self.y_fit_, y_new)
309
310        # Select the best alpha based on LOOE after the batch
311        if isinstance(self.alpha, (list, np.ndarray)):
312            self.looe_ = []
313            for idx, alpha in enumerate(self.alpha):
314                G = self.K_ + alpha * np.eye(self.K_.shape[0])
315                G_inv = np.linalg.inv(G)
316                diag_G_inv = np.diag(G_inv)
317                looe = np.sum((self.dual_coefs_[idx] / diag_G_inv) ** 2)
318                self.looe_.append(looe)
319
320            # Select the best alpha
321            best_index = np.argmin(self.looe_)
322            self.alpha_ = self.alpha[best_index]
323            self.dual_coef_ = self.dual_coefs_[best_index]
324
325        return self

Kernel Ridge Regression with optional GPU support, Matérn kernels, and automatic input standardization.

Parameters:

  • alpha: float Regularization parameter.
  • kernel: str Kernel type ("linear", "rbf", or "matern").
  • gamma: float Kernel coefficient for "rbf". Ignored for other kernels.
  • nu: float Smoothness parameter for the Matérn kernel. Default is 1.5.
  • length_scale: float Length scale parameter for the Matérn kernel. Default is 1.0.
  • backend: str "cpu" or "gpu" (uses JAX if "gpu").
def fit(self, X, y):
140    def fit(self, X, y):
141        """
142        Fit the Kernel Ridge Regression model.
143
144        Parameters:
145        - X: array-like, shape (n_samples, n_features)
146            Training data.
147        - y: array-like, shape (n_samples,)
148            Target values.
149        """
150        # Standardize the inputs
151        X = self.scaler.fit_transform(X)
152        self.X_fit_ = X
153
154        # Center the response
155        self.y_mean_ = np.mean(y)
156        y_centered = y - self.y_mean_
157
158        n_samples = X.shape[0]
159
160        # Compute the kernel matrix
161        K = self._get_kernel(X, X)
162        self.K_ = K
163        self.y_fit_ = y_centered
164
165        if isinstance(self.alpha, (list, np.ndarray)):
166            # If alpha is a list or array, compute LOOE for each alpha
167            self.alphas_ = self.alpha  # Store the list of alphas
168            self.dual_coefs_ = []  # Store dual coefficients for each alpha
169            self.looe_ = []  # Store LOOE for each alpha
170
171            for alpha in self.alpha:
172                G = K + alpha * np.eye(n_samples)
173                G_inv = np.linalg.inv(G)
174                diag_G_inv = np.diag(G_inv)
175                dual_coef = np.linalg.solve(G, y_centered)
176                looe = np.sum((dual_coef / diag_G_inv) ** 2)  # Compute LOOE
177                self.dual_coefs_.append(dual_coef)
178                self.looe_.append(looe)
179
180            # Select the best alpha based on the smallest LOOE
181            best_index = np.argmin(self.looe_)
182            self.alpha_ = self.alpha[best_index]
183            self.dual_coef_ = self.dual_coefs_[best_index]
184        else:
185            # If alpha is a single value, proceed as usual
186            if self.backend == "gpu":
187                self.dual_coef_ = jnp.linalg.solve(
188                    K + self.alpha * jnp.eye(n_samples), y_centered
189                )
190            else:
191                self.dual_coef_ = np.linalg.solve(
192                    K + self.alpha * np.eye(n_samples), y_centered
193                )
194
195        return self

Fit the Kernel Ridge Regression model.

Parameters:

  • X: array-like, shape (n_samples, n_features) Training data.
  • y: array-like, shape (n_samples,) Target values.
def predict(self, X, probs=False):
197    def predict(self, X, probs=False):
198        """
199        Predict using the Kernel Ridge Regression model.
200
201        Parameters:
202        - X: array-like, shape (n_samples, n_features)
203            Test data.
204
205        Returns:
206        - Predicted values, shape (n_samples,).
207        """
208        # Standardize the inputs
209        X = self.scaler.transform(X)
210        K = self._get_kernel(X, self.X_fit_)
211        if self.backend == "gpu":
212            preds = jnp.dot(K, self.dual_coef_) + self.y_mean_
213            if probs:
214                # Compute similarity to self.X_fit_
215                similarities = jnp.dot(
216                    preds, self.X_fit_.T
217                )  # Shape: (n_samples, n_fit_)
218                # Apply softmax to get probabilities
219                return jaxsoftmax(similarities, axis=1)
220            return preds
221        else:
222            preds = np.dot(K, self.dual_coef_) + self.y_mean_
223            if probs:
224                # Compute similarity to self.X_fit_
225                similarities = np.dot(
226                    preds, self.X_fit_.T
227                )  # Shape: (n_samples, n_fit_)
228                # Apply softmax to get probabilities
229                return softmax(similarities, axis=1)
230            return preds

Predict using the Kernel Ridge Regression model.

Parameters:

  • X: array-like, shape (n_samples, n_features) Test data.

Returns:

  • Predicted values, shape (n_samples,).
class LazyClassifier(nnetsauce.LazyDeepClassifier):
761class LazyClassifier(LazyDeepClassifier):
762    """
763        Fitting -- almost -- all the classification algorithms with
764        nnetsauce's CustomClassifier and returning their scores (no layers).
765
766    Parameters:
767
768        verbose: int, optional (default=0)
769            Any positive number for verbosity.
770
771        ignore_warnings: bool, optional (default=True)
772            When set to True, the warning related to algorigms that are not able to run are ignored.
773
774        custom_metric: function, optional (default=None)
775            When function is provided, models are evaluated based on the custom evaluation metric provided.
776
777        predictions: bool, optional (default=False)
778            When set to True, the predictions of all the models models are returned as dataframe.
779
780        sort_by: string, optional (default='Accuracy')
781            Sort models by a metric. Available options are 'Accuracy', 'Balanced Accuracy', 'ROC AUC', 'F1 Score'
782            or a custom metric identified by its name and provided by custom_metric.
783
784        random_state: int, optional (default=42)
785            Reproducibiility seed.
786
787        estimators: list, optional (default='all')
788            list of Estimators names or just 'all' (default='all')
789
790        preprocess: bool
791            preprocessing is done when set to True
792
793        n_jobs : int, when possible, run in parallel
794            For now, only used by individual models that support it.
795
796        All the other parameters are the same as CustomClassifier's.
797
798    Attributes:
799
800        models_: dict-object
801            Returns a dictionary with each model pipeline as value
802            with key as name of models.
803
804        best_model_: object
805            Returns the best model pipeline based on the sort_by metric.
806
807    Examples:
808
809        import nnetsauce as ns
810        import numpy as np
811        from sklearn import datasets
812        from sklearn.utils import shuffle
813
814        dataset = datasets.load_iris()
815        X = dataset.data
816        y = dataset.target
817        X, y = shuffle(X, y, random_state=123)
818        X = X.astype(np.float32)
819        y = y.astype(np.float32)
820        X_train, X_test = X[:100], X[100:]
821        y_train, y_test = y[:100], y[100:]
822
823        clf = ns.LazyClassifier(verbose=0, ignore_warnings=True, custom_metric=None)
824        models, predictions = clf.fit(X_train, X_test, y_train, y_test)
825        model_dictionary = clf.provide_models(X_train,X_test,y_train,y_test)
826        print(models)
827
828    """
829
830    def __init__(
831        self,
832        verbose=0,
833        ignore_warnings=True,
834        custom_metric=None,
835        predictions=False,
836        sort_by="Accuracy",
837        random_state=42,
838        estimators="all",
839        preprocess=False,
840        n_jobs=None,
841        # CustomClassifier attributes
842        obj=None,
843        n_hidden_features=5,
844        activation_name="relu",
845        a=0.01,
846        nodes_sim="sobol",
847        bias=True,
848        dropout=0,
849        direct_link=True,
850        n_clusters=2,
851        cluster_encode=True,
852        type_clust="kmeans",
853        type_scaling=("std", "std", "std"),
854        col_sample=1,
855        row_sample=1,
856        seed=123,
857        backend="cpu",
858    ):
859        super().__init__(
860            verbose=verbose,
861            ignore_warnings=ignore_warnings,
862            custom_metric=custom_metric,
863            predictions=predictions,
864            sort_by=sort_by,
865            random_state=random_state,
866            estimators=estimators,
867            preprocess=preprocess,
868            n_jobs=n_jobs,
869            n_layers=1,
870            obj=obj,
871            n_hidden_features=n_hidden_features,
872            activation_name=activation_name,
873            a=a,
874            nodes_sim=nodes_sim,
875            bias=bias,
876            dropout=dropout,
877            direct_link=direct_link,
878            n_clusters=n_clusters,
879            cluster_encode=cluster_encode,
880            type_clust=type_clust,
881            type_scaling=type_scaling,
882            col_sample=col_sample,
883            row_sample=row_sample,
884            seed=seed,
885            backend=backend,
886        )

Fitting -- almost -- all the classification algorithms with nnetsauce's CustomClassifier and returning their scores (no layers).

Parameters:

verbose: int, optional (default=0)
    Any positive number for verbosity.

ignore_warnings: bool, optional (default=True)
    When set to True, the warning related to algorigms that are not able to run are ignored.

custom_metric: function, optional (default=None)
    When function is provided, models are evaluated based on the custom evaluation metric provided.

predictions: bool, optional (default=False)
    When set to True, the predictions of all the models models are returned as dataframe.

sort_by: string, optional (default='Accuracy')
    Sort models by a metric. Available options are 'Accuracy', 'Balanced Accuracy', 'ROC AUC', 'F1 Score'
    or a custom metric identified by its name and provided by custom_metric.

random_state: int, optional (default=42)
    Reproducibiility seed.

estimators: list, optional (default='all')
    list of Estimators names or just 'all' (default='all')

preprocess: bool
    preprocessing is done when set to True

n_jobs : int, when possible, run in parallel
    For now, only used by individual models that support it.

All the other parameters are the same as CustomClassifier's.

Attributes:

models_: dict-object
    Returns a dictionary with each model pipeline as value
    with key as name of models.

best_model_: object
    Returns the best model pipeline based on the sort_by metric.

Examples:

import nnetsauce as ns
import numpy as np
from sklearn import datasets
from sklearn.utils import shuffle

dataset = datasets.load_iris()
X = dataset.data
y = dataset.target
X, y = shuffle(X, y, random_state=123)
X = X.astype(np.float32)
y = y.astype(np.float32)
X_train, X_test = X[:100], X[100:]
y_train, y_test = y[:100], y[100:]

clf = ns.LazyClassifier(verbose=0, ignore_warnings=True, custom_metric=None)
models, predictions = clf.fit(X_train, X_test, y_train, y_test)
model_dictionary = clf.provide_models(X_train,X_test,y_train,y_test)
print(models)
class LazyRegressor(nnetsauce.LazyDeepRegressor):
657class LazyRegressor(LazyDeepRegressor):
658    """
659        Fitting -- almost -- all the regression algorithms with
660        nnetsauce's CustomRegressor and returning their scores.
661
662    Parameters:
663
664        verbose: int, optional (default=0)
665            Any positive number for verbosity.
666
667        ignore_warnings: bool, optional (default=True)
668            When set to True, the warning related to algorigms that are not able to run are ignored.
669
670        custom_metric: function, optional (default=None)
671            When function is provided, models are evaluated based on the custom evaluation metric provided.
672
673        predictions: bool, optional (default=False)
674            When set to True, the predictions of all the models models are returned as dataframe.
675
676        sort_by: string, optional (default='RMSE')
677            Sort models by a metric. Available options are 'R-Squared', 'Adjusted R-Squared', 'RMSE', 'Time Taken' and 'Custom Metric'.
678            or a custom metric identified by its name and provided by custom_metric.
679
680        random_state: int, optional (default=42)
681            Reproducibiility seed.
682
683        estimators: list, optional (default='all')
684            list of Estimators names or just 'all' (default='all')
685
686        preprocess: bool
687            preprocessing is done when set to True
688
689        n_jobs : int, when possible, run in parallel
690            For now, only used by individual models that support it.
691
692        All the other parameters are the same as CustomRegressor's.
693
694    Attributes:
695
696        models_: dict-object
697            Returns a dictionary with each model pipeline as value
698            with key as name of models.
699
700        best_model_: object
701            Returns the best model pipeline based on the sort_by metric.
702
703    Examples:
704
705        import nnetsauce as ns
706        import numpy as np
707        from sklearn import datasets
708        from sklearn.utils import shuffle
709
710        diabetes = datasets.load_diabetes()
711        X, y = shuffle(diabetes.data, diabetes.target, random_state=13)
712        X = X.astype(np.float32)
713
714        offset = int(X.shape[0] * 0.9)
715        X_train, y_train = X[:offset], y[:offset]
716        X_test, y_test = X[offset:], y[offset:]
717
718        reg = ns.LazyRegressor(verbose=0, ignore_warnings=False,
719                            custom_metric=None)
720        models, predictions = reg.fit(X_train, X_test, y_train, y_test)
721        print(models)
722
723    """
724
725    def __init__(
726        self,
727        verbose=0,
728        ignore_warnings=True,
729        custom_metric=None,
730        predictions=False,
731        sort_by="RMSE",
732        random_state=42,
733        estimators="all",
734        preprocess=False,
735        n_jobs=None,
736        # CustomRegressor attributes
737        obj=None,
738        n_hidden_features=5,
739        activation_name="relu",
740        a=0.01,
741        nodes_sim="sobol",
742        bias=True,
743        dropout=0,
744        direct_link=True,
745        n_clusters=2,
746        cluster_encode=True,
747        type_clust="kmeans",
748        type_scaling=("std", "std", "std"),
749        col_sample=1,
750        row_sample=1,
751        seed=123,
752        backend="cpu",
753    ):
754        super().__init__(
755            verbose=verbose,
756            ignore_warnings=ignore_warnings,
757            custom_metric=custom_metric,
758            predictions=predictions,
759            sort_by=sort_by,
760            random_state=random_state,
761            estimators=estimators,
762            preprocess=preprocess,
763            n_jobs=n_jobs,
764            n_layers=1,
765            obj=obj,
766            n_hidden_features=n_hidden_features,
767            activation_name=activation_name,
768            a=a,
769            nodes_sim=nodes_sim,
770            bias=bias,
771            dropout=dropout,
772            direct_link=direct_link,
773            n_clusters=n_clusters,
774            cluster_encode=cluster_encode,
775            type_clust=type_clust,
776            type_scaling=type_scaling,
777            col_sample=col_sample,
778            row_sample=row_sample,
779            seed=seed,
780            backend=backend,
781        )

Fitting -- almost -- all the regression algorithms with nnetsauce's CustomRegressor and returning their scores.

Parameters:

verbose: int, optional (default=0)
    Any positive number for verbosity.

ignore_warnings: bool, optional (default=True)
    When set to True, the warning related to algorigms that are not able to run are ignored.

custom_metric: function, optional (default=None)
    When function is provided, models are evaluated based on the custom evaluation metric provided.

predictions: bool, optional (default=False)
    When set to True, the predictions of all the models models are returned as dataframe.

sort_by: string, optional (default='RMSE')
    Sort models by a metric. Available options are 'R-Squared', 'Adjusted R-Squared', 'RMSE', 'Time Taken' and 'Custom Metric'.
    or a custom metric identified by its name and provided by custom_metric.

random_state: int, optional (default=42)
    Reproducibiility seed.

estimators: list, optional (default='all')
    list of Estimators names or just 'all' (default='all')

preprocess: bool
    preprocessing is done when set to True

n_jobs : int, when possible, run in parallel
    For now, only used by individual models that support it.

All the other parameters are the same as CustomRegressor's.

Attributes:

models_: dict-object
    Returns a dictionary with each model pipeline as value
    with key as name of models.

best_model_: object
    Returns the best model pipeline based on the sort_by metric.

Examples:

import nnetsauce as ns
import numpy as np
from sklearn import datasets
from sklearn.utils import shuffle

diabetes = datasets.load_diabetes()
X, y = shuffle(diabetes.data, diabetes.target, random_state=13)
X = X.astype(np.float32)

offset = int(X.shape[0] * 0.9)
X_train, y_train = X[:offset], y[:offset]
X_test, y_test = X[offset:], y[offset:]

reg = ns.LazyRegressor(verbose=0, ignore_warnings=False,
                    custom_metric=None)
models, predictions = reg.fit(X_train, X_test, y_train, y_test)
print(models)
class LazyDeepClassifier(nnetsauce.custom.custom.Custom, sklearn.base.ClassifierMixin):
 94class LazyDeepClassifier(Custom, ClassifierMixin):
 95    """
 96
 97    Fitting -- almost -- all the classification algorithms with layers of
 98    nnetsauce's CustomClassifier and returning their scores.
 99
100    Parameters:
101
102        verbose: int, optional (default=0)
103            Any positive number for verbosity.
104
105        ignore_warnings: bool, optional (default=True)
106            When set to True, the warning related to algorigms that are not
107            able to run are ignored.
108
109        custom_metric: function, optional (default=None)
110            When function is provided, models are evaluated based on the custom
111              evaluation metric provided.
112
113        predictions: bool, optional (default=False)
114            When set to True, the predictions of all the models models are
115            returned as data frame.
116
117        sort_by: string, optional (default='Accuracy')
118            Sort models by a metric. Available options are 'Accuracy',
119            'Balanced Accuracy', 'ROC AUC', 'F1 Score' or a custom metric
120            identified by its name and provided by custom_metric.
121
122        random_state: int, optional (default=42)
123            Reproducibiility seed.
124
125        estimators: list, optional (default='all')
126            list of Estimators names or just 'all' for > 90 classifiers
127            (default='all')
128
129        preprocess: bool, preprocessing is done when set to True
130
131        n_jobs: int, when possible, run in parallel
132            For now, only used by individual models that support it.
133
134        n_layers: int, optional (default=3)
135            Number of layers of CustomClassifiers to be used.
136
137        All the other parameters are the same as CustomClassifier's.
138
139    Attributes:
140
141        models_: dict-object
142            Returns a dictionary with each model pipeline as value
143            with key as name of models.
144
145        best_model_: object
146            Returns the best model pipeline.
147
148    Examples
149
150        ```python
151        import nnetsauce as ns
152        from sklearn.datasets import load_breast_cancer
153        from sklearn.model_selection import train_test_split
154        data = load_breast_cancer()
155        X = data.data
156        y= data.target
157        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2,
158            random_state=123)
159        clf = ns.LazyDeepClassifier(verbose=0, ignore_warnings=True, custom_metric=None)
160        models, predictions = clf.fit(X_train, X_test, y_train, y_test)
161        model_dictionary = clf.provide_models(X_train,X_test,y_train,y_test)
162        print(models)
163        ```
164
165    """
166
167    def __init__(
168        self,
169        verbose=0,
170        ignore_warnings=True,
171        custom_metric=None,
172        predictions=False,
173        sort_by="Accuracy",
174        random_state=42,
175        estimators="all",
176        preprocess=False,
177        n_jobs=None,
178        # Defining depth
179        n_layers=3,
180        # CustomClassifier attributes
181        obj=None,
182        n_hidden_features=5,
183        activation_name="relu",
184        a=0.01,
185        nodes_sim="sobol",
186        bias=True,
187        dropout=0,
188        direct_link=True,
189        n_clusters=2,
190        cluster_encode=True,
191        type_clust="kmeans",
192        type_scaling=("std", "std", "std"),
193        col_sample=1,
194        row_sample=1,
195        seed=123,
196        backend="cpu",
197    ):
198        self.verbose = verbose
199        self.ignore_warnings = ignore_warnings
200        self.custom_metric = custom_metric
201        self.predictions = predictions
202        self.sort_by = sort_by
203        self.models_ = {}
204        self.best_model_ = None
205        self.random_state = random_state
206        self.estimators = estimators
207        self.preprocess = preprocess
208        self.n_layers = n_layers - 1
209        self.n_jobs = n_jobs
210        super().__init__(
211            obj=obj,
212            n_hidden_features=n_hidden_features,
213            activation_name=activation_name,
214            a=a,
215            nodes_sim=nodes_sim,
216            bias=bias,
217            dropout=dropout,
218            direct_link=direct_link,
219            n_clusters=n_clusters,
220            cluster_encode=cluster_encode,
221            type_clust=type_clust,
222            type_scaling=type_scaling,
223            col_sample=col_sample,
224            row_sample=row_sample,
225            seed=seed,
226            backend=backend,
227        )
228
229    def fit(self, X_train, X_test, y_train, y_test):
230        """Fit classifiers to X_train and y_train, predict and score on X_test,
231        y_test.
232
233        Parameters:
234
235            X_train: array-like,
236                Training vectors, where rows is the number of samples
237                and columns is the number of features.
238
239            X_test: array-like,
240                Testing vectors, where rows is the number of samples
241                and columns is the number of features.
242
243            y_train: array-like,
244                Training vectors, where rows is the number of samples
245                and columns is the number of features.
246
247            y_test: array-like,
248                Testing vectors, where rows is the number of samples
249                and columns is the number of features.
250
251        Returns:
252
253            scores: Pandas DataFrame
254                Returns metrics of all the models in a Pandas DataFrame.
255
256            predictions: Pandas DataFrame
257                Returns predictions of all the models in a Pandas DataFrame.
258        """
259        Accuracy = []
260        B_Accuracy = []
261        ROC_AUC = []
262        F1 = []
263        names = []
264        TIME = []
265        predictions = {}
266
267        if self.custom_metric is not None:
268            CUSTOM_METRIC = []
269
270        if isinstance(X_train, np.ndarray):
271            X_train = pd.DataFrame(X_train)
272            X_test = pd.DataFrame(X_test)
273
274        numeric_features = X_train.select_dtypes(include=[np.number]).columns
275        categorical_features = X_train.select_dtypes(include=["object"]).columns
276
277        categorical_low, categorical_high = get_card_split(
278            X_train, categorical_features
279        )
280
281        if self.preprocess is True:
282            preprocessor = ColumnTransformer(
283                transformers=[
284                    ("numeric", numeric_transformer, numeric_features),
285                    (
286                        "categorical_low",
287                        categorical_transformer_low,
288                        categorical_low,
289                    ),
290                    (
291                        "categorical_high",
292                        categorical_transformer_high,
293                        categorical_high,
294                    ),
295                ]
296            )
297
298        # baseline models
299        try:
300            baseline_names = ["RandomForestClassifier", "XGBClassifier"]
301            baseline_models = [RandomForestClassifier(), xgb.XGBClassifier()]
302        except Exception as exception:
303            baseline_names = ["RandomForestClassifier"]
304            baseline_models = [RandomForestClassifier()]
305
306        for name, model in zip(baseline_names, baseline_models):
307            start = time.time()
308            try:
309                model.fit(X_train, y_train)
310                self.models_[name] = model
311                y_pred = model.predict(X_test)
312                accuracy = accuracy_score(y_test, y_pred, normalize=True)
313                b_accuracy = balanced_accuracy_score(y_test, y_pred)
314                f1 = f1_score(y_test, y_pred, average="weighted")
315                try:
316                    roc_auc = roc_auc_score(y_test, y_pred)
317                except Exception as exception:
318                    roc_auc = None
319                    if self.ignore_warnings is False:
320                        print("ROC AUC couldn't be calculated for " + name)
321                        print(exception)
322                names.append(name)
323                Accuracy.append(accuracy)
324                B_Accuracy.append(b_accuracy)
325                ROC_AUC.append(roc_auc)
326                F1.append(f1)
327                TIME.append(time.time() - start)
328                if self.custom_metric is not None:
329                    custom_metric = self.custom_metric(y_test, y_pred)
330                    CUSTOM_METRIC.append(custom_metric)
331                if self.verbose > 0:
332                    if self.custom_metric is not None:
333                        print(
334                            {
335                                "Model": name,
336                                "Accuracy": accuracy,
337                                "Balanced Accuracy": b_accuracy,
338                                "ROC AUC": roc_auc,
339                                "F1 Score": f1,
340                                self.custom_metric.__name__: custom_metric,
341                                "Time taken": time.time() - start,
342                            }
343                        )
344                    else:
345                        print(
346                            {
347                                "Model": name,
348                                "Accuracy": accuracy,
349                                "Balanced Accuracy": b_accuracy,
350                                "ROC AUC": roc_auc,
351                                "F1 Score": f1,
352                                "Time taken": time.time() - start,
353                            }
354                        )
355                if self.predictions:
356                    predictions[name] = y_pred
357            except Exception as exception:
358                if self.ignore_warnings is False:
359                    print(name + " model failed to execute")
360                    print(exception)
361
362        if self.estimators == "all":
363            self.classifiers = [
364                item
365                for sublist in [
366                    DEEPCLASSIFIERS,
367                    DEEPMULTITASKCLASSIFIERS,
368                    DEEPSIMPLEMULTITASKCLASSIFIERS,
369                ]
370                for item in sublist
371            ]
372        else:
373            self.classifiers = (
374                [
375                    ("DeepCustomClassifier(" + est[0] + ")", est[1])
376                    for est in all_estimators()
377                    if (
378                        issubclass(est[1], ClassifierMixin)
379                        and (est[0] in self.estimators)
380                    )
381                ]
382                + [
383                    (
384                        "DeepMultitaskClassifier(" + est[0] + ")",
385                        partial(MultitaskClassifier, obj=est[1]()),
386                    )
387                    for est in all_estimators()
388                    if (
389                        issubclass(est[1], RegressorMixin)
390                        and (est[0] in self.estimators)
391                    )
392                ]
393                + [
394                    (
395                        "DeepSimpleMultitaskClassifier(" + est[0] + ")",
396                        partial(SimpleMultitaskClassifier, obj=est[1]()),
397                    )
398                    for est in all_estimators()
399                    if (
400                        issubclass(est[1], RegressorMixin)
401                        and (est[0] in self.estimators)
402                    )
403                ]
404            )
405
406        if self.preprocess is True:
407
408            for name, model in tqdm(self.classifiers):  # do parallel exec
409
410                other_args = (
411                    {}
412                )  # use this trick for `random_state` too --> refactor
413                try:
414                    if (
415                        "n_jobs" in model().get_params().keys()
416                        and name.find("LogisticRegression") == -1
417                    ):
418                        other_args["n_jobs"] = self.n_jobs
419                except Exception:
420                    pass
421
422                start = time.time()
423
424                try:
425                    if "random_state" in model().get_params().keys():
426                        layer_clf = CustomClassifier(
427                            obj=model(random_state=self.random_state),
428                            n_hidden_features=self.n_hidden_features,
429                            activation_name=self.activation_name,
430                            a=self.a,
431                            nodes_sim=self.nodes_sim,
432                            bias=self.bias,
433                            dropout=self.dropout,
434                            direct_link=self.direct_link,
435                            n_clusters=self.n_clusters,
436                            cluster_encode=self.cluster_encode,
437                            type_clust=self.type_clust,
438                            type_scaling=self.type_scaling,
439                            col_sample=self.col_sample,
440                            row_sample=self.row_sample,
441                            seed=self.seed,
442                            backend=self.backend,
443                            cv_calibration=None,
444                        )
445
446                    else:
447                        layer_clf = CustomClassifier(
448                            obj=model(),
449                            n_hidden_features=self.n_hidden_features,
450                            activation_name=self.activation_name,
451                            a=self.a,
452                            nodes_sim=self.nodes_sim,
453                            bias=self.bias,
454                            dropout=self.dropout,
455                            direct_link=self.direct_link,
456                            n_clusters=self.n_clusters,
457                            cluster_encode=self.cluster_encode,
458                            type_clust=self.type_clust,
459                            type_scaling=self.type_scaling,
460                            col_sample=self.col_sample,
461                            row_sample=self.row_sample,
462                            seed=self.seed,
463                            backend=self.backend,
464                            cv_calibration=None,
465                        )
466
467                    layer_clf.fit(X_train, y_train)
468
469                    for _ in range(self.n_layers):
470                        layer_clf = deepcopy(
471                            CustomClassifier(
472                                obj=layer_clf,
473                                n_hidden_features=self.n_hidden_features,
474                                activation_name=self.activation_name,
475                                a=self.a,
476                                nodes_sim=self.nodes_sim,
477                                bias=self.bias,
478                                dropout=self.dropout,
479                                direct_link=self.direct_link,
480                                n_clusters=self.n_clusters,
481                                cluster_encode=self.cluster_encode,
482                                type_clust=self.type_clust,
483                                type_scaling=self.type_scaling,
484                                col_sample=self.col_sample,
485                                row_sample=self.row_sample,
486                                seed=self.seed,
487                                backend=self.backend,
488                                cv_calibration=None,
489                            )
490                        )
491
492                    pipe = Pipeline(
493                        [
494                            ("preprocessor", preprocessor),
495                            ("classifier", layer_clf),
496                        ]
497                    )
498
499                    pipe.fit(X_train, y_train)
500                    self.models_[name] = pipe
501                    y_pred = pipe.predict(X_test)
502                    accuracy = accuracy_score(y_test, y_pred, normalize=True)
503                    b_accuracy = balanced_accuracy_score(y_test, y_pred)
504                    f1 = f1_score(y_test, y_pred, average="weighted")
505                    try:
506                        roc_auc = roc_auc_score(y_test, y_pred)
507                    except Exception as exception:
508                        roc_auc = None
509                        if self.ignore_warnings is False:
510                            print("ROC AUC couldn't be calculated for " + name)
511                            print(exception)
512                    names.append(name)
513                    Accuracy.append(accuracy)
514                    B_Accuracy.append(b_accuracy)
515                    ROC_AUC.append(roc_auc)
516                    F1.append(f1)
517                    TIME.append(time.time() - start)
518                    if self.custom_metric is not None:
519                        custom_metric = self.custom_metric(y_test, y_pred)
520                        CUSTOM_METRIC.append(custom_metric)
521                    if self.verbose > 0:
522                        if self.custom_metric is not None:
523                            print(
524                                {
525                                    "Model": name,
526                                    "Accuracy": accuracy,
527                                    "Balanced Accuracy": b_accuracy,
528                                    "ROC AUC": roc_auc,
529                                    "F1 Score": f1,
530                                    self.custom_metric.__name__: custom_metric,
531                                    "Time taken": time.time() - start,
532                                }
533                            )
534                        else:
535                            print(
536                                {
537                                    "Model": name,
538                                    "Accuracy": accuracy,
539                                    "Balanced Accuracy": b_accuracy,
540                                    "ROC AUC": roc_auc,
541                                    "F1 Score": f1,
542                                    "Time taken": time.time() - start,
543                                }
544                            )
545                    if self.predictions:
546                        predictions[name] = y_pred
547                except Exception as exception:
548                    if self.ignore_warnings is False:
549                        print(name + " model failed to execute")
550                        print(exception)
551
552        else:  # no preprocessing
553
554            for name, model in tqdm(self.classifiers):  # do parallel exec
555                start = time.time()
556                try:
557                    if "random_state" in model().get_params().keys():
558                        layer_clf = CustomClassifier(
559                            obj=model(random_state=self.random_state),
560                            n_hidden_features=self.n_hidden_features,
561                            activation_name=self.activation_name,
562                            a=self.a,
563                            nodes_sim=self.nodes_sim,
564                            bias=self.bias,
565                            dropout=self.dropout,
566                            direct_link=self.direct_link,
567                            n_clusters=self.n_clusters,
568                            cluster_encode=self.cluster_encode,
569                            type_clust=self.type_clust,
570                            type_scaling=self.type_scaling,
571                            col_sample=self.col_sample,
572                            row_sample=self.row_sample,
573                            seed=self.seed,
574                            backend=self.backend,
575                            cv_calibration=None,
576                        )
577
578                    else:
579                        layer_clf = CustomClassifier(
580                            obj=model(),
581                            n_hidden_features=self.n_hidden_features,
582                            activation_name=self.activation_name,
583                            a=self.a,
584                            nodes_sim=self.nodes_sim,
585                            bias=self.bias,
586                            dropout=self.dropout,
587                            direct_link=self.direct_link,
588                            n_clusters=self.n_clusters,
589                            cluster_encode=self.cluster_encode,
590                            type_clust=self.type_clust,
591                            type_scaling=self.type_scaling,
592                            col_sample=self.col_sample,
593                            row_sample=self.row_sample,
594                            seed=self.seed,
595                            backend=self.backend,
596                            cv_calibration=None,
597                        )
598
599                    layer_clf.fit(X_train, y_train)
600
601                    for _ in range(self.n_layers):
602                        layer_clf = deepcopy(
603                            CustomClassifier(
604                                obj=layer_clf,
605                                n_hidden_features=self.n_hidden_features,
606                                activation_name=self.activation_name,
607                                a=self.a,
608                                nodes_sim=self.nodes_sim,
609                                bias=self.bias,
610                                dropout=self.dropout,
611                                direct_link=self.direct_link,
612                                n_clusters=self.n_clusters,
613                                cluster_encode=self.cluster_encode,
614                                type_clust=self.type_clust,
615                                type_scaling=self.type_scaling,
616                                col_sample=self.col_sample,
617                                row_sample=self.row_sample,
618                                seed=self.seed,
619                                backend=self.backend,
620                                cv_calibration=None,
621                            )
622                        )
623
624                        # layer_clf.fit(X_train, y_train)
625
626                    layer_clf.fit(X_train, y_train)
627
628                    self.models_[name] = layer_clf
629                    y_pred = layer_clf.predict(X_test)
630                    accuracy = accuracy_score(y_test, y_pred, normalize=True)
631                    b_accuracy = balanced_accuracy_score(y_test, y_pred)
632                    f1 = f1_score(y_test, y_pred, average="weighted")
633                    try:
634                        roc_auc = roc_auc_score(y_test, y_pred)
635                    except Exception as exception:
636                        roc_auc = None
637                        if self.ignore_warnings is False:
638                            print("ROC AUC couldn't be calculated for " + name)
639                            print(exception)
640                    names.append(name)
641                    Accuracy.append(accuracy)
642                    B_Accuracy.append(b_accuracy)
643                    ROC_AUC.append(roc_auc)
644                    F1.append(f1)
645                    TIME.append(time.time() - start)
646                    if self.custom_metric is not None:
647                        custom_metric = self.custom_metric(y_test, y_pred)
648                        CUSTOM_METRIC.append(custom_metric)
649                    if self.verbose > 0:
650                        if self.custom_metric is not None:
651                            print(
652                                {
653                                    "Model": name,
654                                    "Accuracy": accuracy,
655                                    "Balanced Accuracy": b_accuracy,
656                                    "ROC AUC": roc_auc,
657                                    "F1 Score": f1,
658                                    self.custom_metric.__name__: custom_metric,
659                                    "Time taken": time.time() - start,
660                                }
661                            )
662                        else:
663                            print(
664                                {
665                                    "Model": name,
666                                    "Accuracy": accuracy,
667                                    "Balanced Accuracy": b_accuracy,
668                                    "ROC AUC": roc_auc,
669                                    "F1 Score": f1,
670                                    "Time taken": time.time() - start,
671                                }
672                            )
673                    if self.predictions:
674                        predictions[name] = y_pred
675                except Exception as exception:
676                    if self.ignore_warnings is False:
677                        print(name + " model failed to execute")
678                        print(exception)
679
680        if self.custom_metric is None:
681            scores = pd.DataFrame(
682                {
683                    "Model": names,
684                    "Accuracy": Accuracy,
685                    "Balanced Accuracy": B_Accuracy,
686                    "ROC AUC": ROC_AUC,
687                    "F1 Score": F1,
688                    "Time Taken": TIME,
689                }
690            )
691        else:
692            scores = pd.DataFrame(
693                {
694                    "Model": names,
695                    "Accuracy": Accuracy,
696                    "Balanced Accuracy": B_Accuracy,
697                    "ROC AUC": ROC_AUC,
698                    "F1 Score": F1,
699                    "Custom metric": CUSTOM_METRIC,
700                    "Time Taken": TIME,
701                }
702            )
703        scores = scores.sort_values(by=self.sort_by, ascending=False).set_index(
704            "Model"
705        )
706
707        self.best_model_ = self.models_[scores.index[0]]
708
709        if self.predictions is True:
710
711            return scores, predictions
712
713        return scores
714
715    def get_best_model(self):
716        """
717        This function returns the best model pipeline based on the sort_by metric.
718
719        Returns:
720
721            best_model: object,
722                Returns the best model pipeline based on the sort_by metric.
723
724        """
725        return self.best_model_
726
727    def provide_models(self, X_train, X_test, y_train, y_test):
728        """Returns all the model objects trained. If fit hasn't been called yet,
729        then it's called to return the models.
730
731        Parameters:
732
733        X_train: array-like,
734            Training vectors, where rows is the number of samples
735            and columns is the number of features.
736
737        X_test: array-like,
738            Testing vectors, where rows is the number of samples
739            and columns is the number of features.
740
741        y_train: array-like,
742            Training vectors, where rows is the number of samples
743            and columns is the number of features.
744
745        y_test: array-like,
746            Testing vectors, where rows is the number of samples
747            and columns is the number of features.
748
749        Returns:
750
751            models: dict-object,
752                Returns a dictionary with each model's pipeline as value
753                and key = name of the model.
754        """
755        if len(self.models_.keys()) == 0:
756            self.fit(X_train, X_test, y_train, y_test)
757
758        return self.models_

Fitting -- almost -- all the classification algorithms with layers of nnetsauce's CustomClassifier and returning their scores.

Parameters:

verbose: int, optional (default=0)
    Any positive number for verbosity.

ignore_warnings: bool, optional (default=True)
    When set to True, the warning related to algorigms that are not
    able to run are ignored.

custom_metric: function, optional (default=None)
    When function is provided, models are evaluated based on the custom
      evaluation metric provided.

predictions: bool, optional (default=False)
    When set to True, the predictions of all the models models are
    returned as data frame.

sort_by: string, optional (default='Accuracy')
    Sort models by a metric. Available options are 'Accuracy',
    'Balanced Accuracy', 'ROC AUC', 'F1 Score' or a custom metric
    identified by its name and provided by custom_metric.

random_state: int, optional (default=42)
    Reproducibiility seed.

estimators: list, optional (default='all')
    list of Estimators names or just 'all' for > 90 classifiers
    (default='all')

preprocess: bool, preprocessing is done when set to True

n_jobs: int, when possible, run in parallel
    For now, only used by individual models that support it.

n_layers: int, optional (default=3)
    Number of layers of CustomClassifiers to be used.

All the other parameters are the same as CustomClassifier's.

Attributes:

models_: dict-object
    Returns a dictionary with each model pipeline as value
    with key as name of models.

best_model_: object
    Returns the best model pipeline.

Examples

import nnetsauce as ns
    from sklearn.datasets import load_breast_cancer
    from sklearn.model_selection import train_test_split
    data = load_breast_cancer()
    X = data.data
    y= data.target
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2,
        random_state=123)
    clf = ns.LazyDeepClassifier(verbose=0, ignore_warnings=True, custom_metric=None)
    models, predictions = clf.fit(X_train, X_test, y_train, y_test)
    model_dictionary = clf.provide_models(X_train,X_test,y_train,y_test)
    print(models)
    

def fit(self, X_train, X_test, y_train, y_test):
229    def fit(self, X_train, X_test, y_train, y_test):
230        """Fit classifiers to X_train and y_train, predict and score on X_test,
231        y_test.
232
233        Parameters:
234
235            X_train: array-like,
236                Training vectors, where rows is the number of samples
237                and columns is the number of features.
238
239            X_test: array-like,
240                Testing vectors, where rows is the number of samples
241                and columns is the number of features.
242
243            y_train: array-like,
244                Training vectors, where rows is the number of samples
245                and columns is the number of features.
246
247            y_test: array-like,
248                Testing vectors, where rows is the number of samples
249                and columns is the number of features.
250
251        Returns:
252
253            scores: Pandas DataFrame
254                Returns metrics of all the models in a Pandas DataFrame.
255
256            predictions: Pandas DataFrame
257                Returns predictions of all the models in a Pandas DataFrame.
258        """
259        Accuracy = []
260        B_Accuracy = []
261        ROC_AUC = []
262        F1 = []
263        names = []
264        TIME = []
265        predictions = {}
266
267        if self.custom_metric is not None:
268            CUSTOM_METRIC = []
269
270        if isinstance(X_train, np.ndarray):
271            X_train = pd.DataFrame(X_train)
272            X_test = pd.DataFrame(X_test)
273
274        numeric_features = X_train.select_dtypes(include=[np.number]).columns
275        categorical_features = X_train.select_dtypes(include=["object"]).columns
276
277        categorical_low, categorical_high = get_card_split(
278            X_train, categorical_features
279        )
280
281        if self.preprocess is True:
282            preprocessor = ColumnTransformer(
283                transformers=[
284                    ("numeric", numeric_transformer, numeric_features),
285                    (
286                        "categorical_low",
287                        categorical_transformer_low,
288                        categorical_low,
289                    ),
290                    (
291                        "categorical_high",
292                        categorical_transformer_high,
293                        categorical_high,
294                    ),
295                ]
296            )
297
298        # baseline models
299        try:
300            baseline_names = ["RandomForestClassifier", "XGBClassifier"]
301            baseline_models = [RandomForestClassifier(), xgb.XGBClassifier()]
302        except Exception as exception:
303            baseline_names = ["RandomForestClassifier"]
304            baseline_models = [RandomForestClassifier()]
305
306        for name, model in zip(baseline_names, baseline_models):
307            start = time.time()
308            try:
309                model.fit(X_train, y_train)
310                self.models_[name] = model
311                y_pred = model.predict(X_test)
312                accuracy = accuracy_score(y_test, y_pred, normalize=True)
313                b_accuracy = balanced_accuracy_score(y_test, y_pred)
314                f1 = f1_score(y_test, y_pred, average="weighted")
315                try:
316                    roc_auc = roc_auc_score(y_test, y_pred)
317                except Exception as exception:
318                    roc_auc = None
319                    if self.ignore_warnings is False:
320                        print("ROC AUC couldn't be calculated for " + name)
321                        print(exception)
322                names.append(name)
323                Accuracy.append(accuracy)
324                B_Accuracy.append(b_accuracy)
325                ROC_AUC.append(roc_auc)
326                F1.append(f1)
327                TIME.append(time.time() - start)
328                if self.custom_metric is not None:
329                    custom_metric = self.custom_metric(y_test, y_pred)
330                    CUSTOM_METRIC.append(custom_metric)
331                if self.verbose > 0:
332                    if self.custom_metric is not None:
333                        print(
334                            {
335                                "Model": name,
336                                "Accuracy": accuracy,
337                                "Balanced Accuracy": b_accuracy,
338                                "ROC AUC": roc_auc,
339                                "F1 Score": f1,
340                                self.custom_metric.__name__: custom_metric,
341                                "Time taken": time.time() - start,
342                            }
343                        )
344                    else:
345                        print(
346                            {
347                                "Model": name,
348                                "Accuracy": accuracy,
349                                "Balanced Accuracy": b_accuracy,
350                                "ROC AUC": roc_auc,
351                                "F1 Score": f1,
352                                "Time taken": time.time() - start,
353                            }
354                        )
355                if self.predictions:
356                    predictions[name] = y_pred
357            except Exception as exception:
358                if self.ignore_warnings is False:
359                    print(name + " model failed to execute")
360                    print(exception)
361
362        if self.estimators == "all":
363            self.classifiers = [
364                item
365                for sublist in [
366                    DEEPCLASSIFIERS,
367                    DEEPMULTITASKCLASSIFIERS,
368                    DEEPSIMPLEMULTITASKCLASSIFIERS,
369                ]
370                for item in sublist
371            ]
372        else:
373            self.classifiers = (
374                [
375                    ("DeepCustomClassifier(" + est[0] + ")", est[1])
376                    for est in all_estimators()
377                    if (
378                        issubclass(est[1], ClassifierMixin)
379                        and (est[0] in self.estimators)
380                    )
381                ]
382                + [
383                    (
384                        "DeepMultitaskClassifier(" + est[0] + ")",
385                        partial(MultitaskClassifier, obj=est[1]()),
386                    )
387                    for est in all_estimators()
388                    if (
389                        issubclass(est[1], RegressorMixin)
390                        and (est[0] in self.estimators)
391                    )
392                ]
393                + [
394                    (
395                        "DeepSimpleMultitaskClassifier(" + est[0] + ")",
396                        partial(SimpleMultitaskClassifier, obj=est[1]()),
397                    )
398                    for est in all_estimators()
399                    if (
400                        issubclass(est[1], RegressorMixin)
401                        and (est[0] in self.estimators)
402                    )
403                ]
404            )
405
406        if self.preprocess is True:
407
408            for name, model in tqdm(self.classifiers):  # do parallel exec
409
410                other_args = (
411                    {}
412                )  # use this trick for `random_state` too --> refactor
413                try:
414                    if (
415                        "n_jobs" in model().get_params().keys()
416                        and name.find("LogisticRegression") == -1
417                    ):
418                        other_args["n_jobs"] = self.n_jobs
419                except Exception:
420                    pass
421
422                start = time.time()
423
424                try:
425                    if "random_state" in model().get_params().keys():
426                        layer_clf = CustomClassifier(
427                            obj=model(random_state=self.random_state),
428                            n_hidden_features=self.n_hidden_features,
429                            activation_name=self.activation_name,
430                            a=self.a,
431                            nodes_sim=self.nodes_sim,
432                            bias=self.bias,
433                            dropout=self.dropout,
434                            direct_link=self.direct_link,
435                            n_clusters=self.n_clusters,
436                            cluster_encode=self.cluster_encode,
437                            type_clust=self.type_clust,
438                            type_scaling=self.type_scaling,
439                            col_sample=self.col_sample,
440                            row_sample=self.row_sample,
441                            seed=self.seed,
442                            backend=self.backend,
443                            cv_calibration=None,
444                        )
445
446                    else:
447                        layer_clf = CustomClassifier(
448                            obj=model(),
449                            n_hidden_features=self.n_hidden_features,
450                            activation_name=self.activation_name,
451                            a=self.a,
452                            nodes_sim=self.nodes_sim,
453                            bias=self.bias,
454                            dropout=self.dropout,
455                            direct_link=self.direct_link,
456                            n_clusters=self.n_clusters,
457                            cluster_encode=self.cluster_encode,
458                            type_clust=self.type_clust,
459                            type_scaling=self.type_scaling,
460                            col_sample=self.col_sample,
461                            row_sample=self.row_sample,
462                            seed=self.seed,
463                            backend=self.backend,
464                            cv_calibration=None,
465                        )
466
467                    layer_clf.fit(X_train, y_train)
468
469                    for _ in range(self.n_layers):
470                        layer_clf = deepcopy(
471                            CustomClassifier(
472                                obj=layer_clf,
473                                n_hidden_features=self.n_hidden_features,
474                                activation_name=self.activation_name,
475                                a=self.a,
476                                nodes_sim=self.nodes_sim,
477                                bias=self.bias,
478                                dropout=self.dropout,
479                                direct_link=self.direct_link,
480                                n_clusters=self.n_clusters,
481                                cluster_encode=self.cluster_encode,
482                                type_clust=self.type_clust,
483                                type_scaling=self.type_scaling,
484                                col_sample=self.col_sample,
485                                row_sample=self.row_sample,
486                                seed=self.seed,
487                                backend=self.backend,
488                                cv_calibration=None,
489                            )
490                        )
491
492                    pipe = Pipeline(
493                        [
494                            ("preprocessor", preprocessor),
495                            ("classifier", layer_clf),
496                        ]
497                    )
498
499                    pipe.fit(X_train, y_train)
500                    self.models_[name] = pipe
501                    y_pred = pipe.predict(X_test)
502                    accuracy = accuracy_score(y_test, y_pred, normalize=True)
503                    b_accuracy = balanced_accuracy_score(y_test, y_pred)
504                    f1 = f1_score(y_test, y_pred, average="weighted")
505                    try:
506                        roc_auc = roc_auc_score(y_test, y_pred)
507                    except Exception as exception:
508                        roc_auc = None
509                        if self.ignore_warnings is False:
510                            print("ROC AUC couldn't be calculated for " + name)
511                            print(exception)
512                    names.append(name)
513                    Accuracy.append(accuracy)
514                    B_Accuracy.append(b_accuracy)
515                    ROC_AUC.append(roc_auc)
516                    F1.append(f1)
517                    TIME.append(time.time() - start)
518                    if self.custom_metric is not None:
519                        custom_metric = self.custom_metric(y_test, y_pred)
520                        CUSTOM_METRIC.append(custom_metric)
521                    if self.verbose > 0:
522                        if self.custom_metric is not None:
523                            print(
524                                {
525                                    "Model": name,
526                                    "Accuracy": accuracy,
527                                    "Balanced Accuracy": b_accuracy,
528                                    "ROC AUC": roc_auc,
529                                    "F1 Score": f1,
530                                    self.custom_metric.__name__: custom_metric,
531                                    "Time taken": time.time() - start,
532                                }
533                            )
534                        else:
535                            print(
536                                {
537                                    "Model": name,
538                                    "Accuracy": accuracy,
539                                    "Balanced Accuracy": b_accuracy,
540                                    "ROC AUC": roc_auc,
541                                    "F1 Score": f1,
542                                    "Time taken": time.time() - start,
543                                }
544                            )
545                    if self.predictions:
546                        predictions[name] = y_pred
547                except Exception as exception:
548                    if self.ignore_warnings is False:
549                        print(name + " model failed to execute")
550                        print(exception)
551
552        else:  # no preprocessing
553
554            for name, model in tqdm(self.classifiers):  # do parallel exec
555                start = time.time()
556                try:
557                    if "random_state" in model().get_params().keys():
558                        layer_clf = CustomClassifier(
559                            obj=model(random_state=self.random_state),
560                            n_hidden_features=self.n_hidden_features,
561                            activation_name=self.activation_name,
562                            a=self.a,
563                            nodes_sim=self.nodes_sim,
564                            bias=self.bias,
565                            dropout=self.dropout,
566                            direct_link=self.direct_link,
567                            n_clusters=self.n_clusters,
568                            cluster_encode=self.cluster_encode,
569                            type_clust=self.type_clust,
570                            type_scaling=self.type_scaling,
571                            col_sample=self.col_sample,
572                            row_sample=self.row_sample,
573                            seed=self.seed,
574                            backend=self.backend,
575                            cv_calibration=None,
576                        )
577
578                    else:
579                        layer_clf = CustomClassifier(
580                            obj=model(),
581                            n_hidden_features=self.n_hidden_features,
582                            activation_name=self.activation_name,
583                            a=self.a,
584                            nodes_sim=self.nodes_sim,
585                            bias=self.bias,
586                            dropout=self.dropout,
587                            direct_link=self.direct_link,
588                            n_clusters=self.n_clusters,
589                            cluster_encode=self.cluster_encode,
590                            type_clust=self.type_clust,
591                            type_scaling=self.type_scaling,
592                            col_sample=self.col_sample,
593                            row_sample=self.row_sample,
594                            seed=self.seed,
595                            backend=self.backend,
596                            cv_calibration=None,
597                        )
598
599                    layer_clf.fit(X_train, y_train)
600
601                    for _ in range(self.n_layers):
602                        layer_clf = deepcopy(
603                            CustomClassifier(
604                                obj=layer_clf,
605                                n_hidden_features=self.n_hidden_features,
606                                activation_name=self.activation_name,
607                                a=self.a,
608                                nodes_sim=self.nodes_sim,
609                                bias=self.bias,
610                                dropout=self.dropout,
611                                direct_link=self.direct_link,
612                                n_clusters=self.n_clusters,
613                                cluster_encode=self.cluster_encode,
614                                type_clust=self.type_clust,
615                                type_scaling=self.type_scaling,
616                                col_sample=self.col_sample,
617                                row_sample=self.row_sample,
618                                seed=self.seed,
619                                backend=self.backend,
620                                cv_calibration=None,
621                            )
622                        )
623
624                        # layer_clf.fit(X_train, y_train)
625
626                    layer_clf.fit(X_train, y_train)
627
628                    self.models_[name] = layer_clf
629                    y_pred = layer_clf.predict(X_test)
630                    accuracy = accuracy_score(y_test, y_pred, normalize=True)
631                    b_accuracy = balanced_accuracy_score(y_test, y_pred)
632                    f1 = f1_score(y_test, y_pred, average="weighted")
633                    try:
634                        roc_auc = roc_auc_score(y_test, y_pred)
635                    except Exception as exception:
636                        roc_auc = None
637                        if self.ignore_warnings is False:
638                            print("ROC AUC couldn't be calculated for " + name)
639                            print(exception)
640                    names.append(name)
641                    Accuracy.append(accuracy)
642                    B_Accuracy.append(b_accuracy)
643                    ROC_AUC.append(roc_auc)
644                    F1.append(f1)
645                    TIME.append(time.time() - start)
646                    if self.custom_metric is not None:
647                        custom_metric = self.custom_metric(y_test, y_pred)
648                        CUSTOM_METRIC.append(custom_metric)
649                    if self.verbose > 0:
650                        if self.custom_metric is not None:
651                            print(
652                                {
653                                    "Model": name,
654                                    "Accuracy": accuracy,
655                                    "Balanced Accuracy": b_accuracy,
656                                    "ROC AUC": roc_auc,
657                                    "F1 Score": f1,
658                                    self.custom_metric.__name__: custom_metric,
659                                    "Time taken": time.time() - start,
660                                }
661                            )
662                        else:
663                            print(
664                                {
665                                    "Model": name,
666                                    "Accuracy": accuracy,
667                                    "Balanced Accuracy": b_accuracy,
668                                    "ROC AUC": roc_auc,
669                                    "F1 Score": f1,
670                                    "Time taken": time.time() - start,
671                                }
672                            )
673                    if self.predictions:
674                        predictions[name] = y_pred
675                except Exception as exception:
676                    if self.ignore_warnings is False:
677                        print(name + " model failed to execute")
678                        print(exception)
679
680        if self.custom_metric is None:
681            scores = pd.DataFrame(
682                {
683                    "Model": names,
684                    "Accuracy": Accuracy,
685                    "Balanced Accuracy": B_Accuracy,
686                    "ROC AUC": ROC_AUC,
687                    "F1 Score": F1,
688                    "Time Taken": TIME,
689                }
690            )
691        else:
692            scores = pd.DataFrame(
693                {
694                    "Model": names,
695                    "Accuracy": Accuracy,
696                    "Balanced Accuracy": B_Accuracy,
697                    "ROC AUC": ROC_AUC,
698                    "F1 Score": F1,
699                    "Custom metric": CUSTOM_METRIC,
700                    "Time Taken": TIME,
701                }
702            )
703        scores = scores.sort_values(by=self.sort_by, ascending=False).set_index(
704            "Model"
705        )
706
707        self.best_model_ = self.models_[scores.index[0]]
708
709        if self.predictions is True:
710
711            return scores, predictions
712
713        return scores

Fit classifiers to X_train and y_train, predict and score on X_test, y_test.

Parameters:

X_train: array-like,
    Training vectors, where rows is the number of samples
    and columns is the number of features.

X_test: array-like,
    Testing vectors, where rows is the number of samples
    and columns is the number of features.

y_train: array-like,
    Training vectors, where rows is the number of samples
    and columns is the number of features.

y_test: array-like,
    Testing vectors, where rows is the number of samples
    and columns is the number of features.

Returns:

scores: Pandas DataFrame
    Returns metrics of all the models in a Pandas DataFrame.

predictions: Pandas DataFrame
    Returns predictions of all the models in a Pandas DataFrame.
def provide_models(self, X_train, X_test, y_train, y_test):
727    def provide_models(self, X_train, X_test, y_train, y_test):
728        """Returns all the model objects trained. If fit hasn't been called yet,
729        then it's called to return the models.
730
731        Parameters:
732
733        X_train: array-like,
734            Training vectors, where rows is the number of samples
735            and columns is the number of features.
736
737        X_test: array-like,
738            Testing vectors, where rows is the number of samples
739            and columns is the number of features.
740
741        y_train: array-like,
742            Training vectors, where rows is the number of samples
743            and columns is the number of features.
744
745        y_test: array-like,
746            Testing vectors, where rows is the number of samples
747            and columns is the number of features.
748
749        Returns:
750
751            models: dict-object,
752                Returns a dictionary with each model's pipeline as value
753                and key = name of the model.
754        """
755        if len(self.models_.keys()) == 0:
756            self.fit(X_train, X_test, y_train, y_test)
757
758        return self.models_

Returns all the model objects trained. If fit hasn't been called yet, then it's called to return the models.

Parameters:

X_train: array-like, Training vectors, where rows is the number of samples and columns is the number of features.

X_test: array-like, Testing vectors, where rows is the number of samples and columns is the number of features.

y_train: array-like, Training vectors, where rows is the number of samples and columns is the number of features.

y_test: array-like, Testing vectors, where rows is the number of samples and columns is the number of features.

Returns:

models: dict-object,
    Returns a dictionary with each model's pipeline as value
    and key = name of the model.
class LazyDeepRegressor(nnetsauce.custom.custom.Custom, sklearn.base.RegressorMixin):
 90class LazyDeepRegressor(Custom, RegressorMixin):
 91    """
 92        Fitting -- almost -- all the regression algorithms with layers of
 93        nnetsauce's CustomRegressor and returning their scores.
 94
 95    Parameters:
 96
 97        verbose: int, optional (default=0)
 98            Any positive number for verbosity.
 99
100        ignore_warnings: bool, optional (default=True)
101            When set to True, the warning related to algorigms that are not able to run are ignored.
102
103        custom_metric: function, optional (default=None)
104            When function is provided, models are evaluated based on the custom evaluation metric provided.
105
106        predictions: bool, optional (default=False)
107            When set to True, the predictions of all the models models are returned as dataframe.
108
109        sort_by: string, optional (default='RMSE')
110            Sort models by a metric. Available options are 'R-Squared', 'Adjusted R-Squared', 'RMSE', 'Time Taken' and 'Custom Metric'.
111            or a custom metric identified by its name and provided by custom_metric.
112
113        random_state: int, optional (default=42)
114            Reproducibiility seed.
115
116        estimators: list, optional (default='all')
117            list of Estimators names or just 'all' (default='all')
118
119        preprocess: bool
120            preprocessing is done when set to True
121
122        n_jobs : int, when possible, run in parallel
123            For now, only used by individual models that support it.
124
125        n_layers: int, optional (default=3)
126            Number of layers of CustomRegressors to be used.
127
128        All the other parameters are the same as CustomRegressor's.
129
130    Attributes:
131
132        models_: dict-object
133            Returns a dictionary with each model pipeline as value
134            with key as name of models.
135
136        best_model_: object
137            Returns the best model pipeline based on the sort_by metric.
138
139    Examples:
140
141        import nnetsauce as ns
142        import numpy as np
143        from sklearn import datasets
144        from sklearn.utils import shuffle
145
146        diabetes = datasets.load_diabetes()
147        X, y = shuffle(diabetes.data, diabetes.target, random_state=13)
148        X = X.astype(np.float32)
149
150        offset = int(X.shape[0] * 0.9)
151        X_train, y_train = X[:offset], y[:offset]
152        X_test, y_test = X[offset:], y[offset:]
153
154        reg = ns.LazyDeepRegressor(verbose=0, ignore_warnings=False, custom_metric=None)
155        models, predictions = reg.fit(X_train, X_test, y_train, y_test)
156        print(models)
157
158    """
159
160    def __init__(
161        self,
162        verbose=0,
163        ignore_warnings=True,
164        custom_metric=None,
165        predictions=False,
166        sort_by="RMSE",
167        random_state=42,
168        estimators="all",
169        preprocess=False,
170        n_jobs=None,
171        # Defining depth
172        n_layers=3,
173        # CustomRegressor attributes
174        obj=None,
175        n_hidden_features=5,
176        activation_name="relu",
177        a=0.01,
178        nodes_sim="sobol",
179        bias=True,
180        dropout=0,
181        direct_link=True,
182        n_clusters=2,
183        cluster_encode=True,
184        type_clust="kmeans",
185        type_scaling=("std", "std", "std"),
186        col_sample=1,
187        row_sample=1,
188        seed=123,
189        backend="cpu",
190    ):
191        self.verbose = verbose
192        self.ignore_warnings = ignore_warnings
193        self.custom_metric = custom_metric
194        self.predictions = predictions
195        self.sort_by = sort_by
196        self.models_ = {}
197        self.best_model_ = None
198        self.random_state = random_state
199        self.estimators = estimators
200        self.preprocess = preprocess
201        self.n_layers = n_layers - 1
202        self.n_jobs = n_jobs
203        super().__init__(
204            obj=obj,
205            n_hidden_features=n_hidden_features,
206            activation_name=activation_name,
207            a=a,
208            nodes_sim=nodes_sim,
209            bias=bias,
210            dropout=dropout,
211            direct_link=direct_link,
212            n_clusters=n_clusters,
213            cluster_encode=cluster_encode,
214            type_clust=type_clust,
215            type_scaling=type_scaling,
216            col_sample=col_sample,
217            row_sample=row_sample,
218            seed=seed,
219            backend=backend,
220        )
221
222    def fit(self, X_train, X_test, y_train, y_test):
223        """Fit Regression algorithms to X_train and y_train, predict and score on X_test, y_test.
224
225        Parameters:
226
227            X_train : array-like,
228                Training vectors, where rows is the number of samples
229                and columns is the number of features.
230
231            X_test : array-like,
232                Testing vectors, where rows is the number of samples
233                and columns is the number of features.
234
235            y_train : array-like,
236                Training vectors, where rows is the number of samples
237                and columns is the number of features.
238
239            y_test : array-like,
240                Testing vectors, where rows is the number of samples
241                and columns is the number of features.
242
243        Returns:
244        -------
245        scores:  Pandas DataFrame
246            Returns metrics of all the models in a Pandas DataFrame.
247
248        predictions : Pandas DataFrame
249            Returns predictions of all the models in a Pandas DataFrame.
250
251        """
252        R2 = []
253        ADJR2 = []
254        RMSE = []
255        # WIN = []
256        names = []
257        TIME = []
258        predictions = {}
259
260        if self.custom_metric:
261            CUSTOM_METRIC = []
262
263        if isinstance(X_train, np.ndarray):
264            X_train = pd.DataFrame(X_train)
265            X_test = pd.DataFrame(X_test)
266
267        numeric_features = X_train.select_dtypes(include=[np.number]).columns
268        categorical_features = X_train.select_dtypes(include=["object"]).columns
269
270        categorical_low, categorical_high = get_card_split(
271            X_train, categorical_features
272        )
273
274        if self.preprocess is True:
275            preprocessor = ColumnTransformer(
276                transformers=[
277                    ("numeric", numeric_transformer, numeric_features),
278                    (
279                        "categorical_low",
280                        categorical_transformer_low,
281                        categorical_low,
282                    ),
283                    (
284                        "categorical_high",
285                        categorical_transformer_high,
286                        categorical_high,
287                    ),
288                ]
289            )
290
291        # base models
292        try:
293            baseline_names = ["RandomForestRegressor", "XGBRegressor"]
294            baseline_models = [RandomForestRegressor(), xgb.XGBRegressor()]
295        except Exception as exception:
296            baseline_names = ["RandomForestRegressor"]
297            baseline_models = [RandomForestRegressor()]
298
299        for name, model in zip(baseline_names, baseline_models):
300            start = time.time()
301            try:
302                model.fit(X_train, y_train)
303                self.models_[name] = model
304                y_pred = model.predict(X_test)
305                r_squared = r2_score(y_test, y_pred)
306                adj_rsquared = adjusted_rsquared(
307                    r_squared, X_test.shape[0], X_test.shape[1]
308                )
309                rmse = np.sqrt(np.mean((y_test - y_pred) ** 2))
310
311                names.append(name)
312                R2.append(r_squared)
313                ADJR2.append(adj_rsquared)
314                RMSE.append(rmse)
315                TIME.append(time.time() - start)
316
317                if self.custom_metric:
318                    custom_metric = self.custom_metric(y_test, y_pred)
319                    CUSTOM_METRIC.append(custom_metric)
320
321                if self.verbose > 0:
322                    scores_verbose = {
323                        "Model": name,
324                        "R-Squared": r_squared,
325                        "Adjusted R-Squared": adj_rsquared,
326                        "RMSE": rmse,
327                        "Time taken": time.time() - start,
328                    }
329
330                    if self.custom_metric:
331                        scores_verbose[self.custom_metric.__name__] = (
332                            custom_metric
333                        )
334
335                    print(scores_verbose)
336                if self.predictions:
337                    predictions[name] = y_pred
338            except Exception as exception:
339                if self.ignore_warnings is False:
340                    print(name + " model failed to execute")
341                    print(exception)
342
343        if self.estimators == "all":
344            self.regressors = DEEPREGRESSORS
345        else:
346            self.regressors = [
347                ("DeepCustomRegressor(" + est[0] + ")", est[1])
348                for est in all_estimators()
349                if (
350                    issubclass(est[1], RegressorMixin)
351                    and (est[0] in self.estimators)
352                )
353            ]
354
355        if self.preprocess is True:
356
357            for name, model in tqdm(self.regressors):  # do parallel exec
358                start = time.time()
359                try:
360                    if "random_state" in model().get_params().keys():
361                        layer_regr = CustomRegressor(
362                            obj=model(random_state=self.random_state),
363                            n_hidden_features=self.n_hidden_features,
364                            activation_name=self.activation_name,
365                            a=self.a,
366                            nodes_sim=self.nodes_sim,
367                            bias=self.bias,
368                            dropout=self.dropout,
369                            direct_link=self.direct_link,
370                            n_clusters=self.n_clusters,
371                            cluster_encode=self.cluster_encode,
372                            type_clust=self.type_clust,
373                            type_scaling=self.type_scaling,
374                            col_sample=self.col_sample,
375                            row_sample=self.row_sample,
376                            seed=self.seed,
377                            backend=self.backend,
378                        )
379                    else:
380                        layer_regr = CustomRegressor(
381                            obj=model(),
382                            n_hidden_features=self.n_hidden_features,
383                            activation_name=self.activation_name,
384                            a=self.a,
385                            nodes_sim=self.nodes_sim,
386                            bias=self.bias,
387                            dropout=self.dropout,
388                            direct_link=self.direct_link,
389                            n_clusters=self.n_clusters,
390                            cluster_encode=self.cluster_encode,
391                            type_clust=self.type_clust,
392                            type_scaling=self.type_scaling,
393                            col_sample=self.col_sample,
394                            row_sample=self.row_sample,
395                            seed=self.seed,
396                            backend=self.backend,
397                        )
398
399                    for _ in range(self.n_layers):
400                        layer_regr = deepcopy(
401                            CustomRegressor(
402                                obj=layer_regr,
403                                n_hidden_features=self.n_hidden_features,
404                                activation_name=self.activation_name,
405                                a=self.a,
406                                nodes_sim=self.nodes_sim,
407                                bias=self.bias,
408                                dropout=self.dropout,
409                                direct_link=self.direct_link,
410                                n_clusters=self.n_clusters,
411                                cluster_encode=self.cluster_encode,
412                                type_clust=self.type_clust,
413                                type_scaling=self.type_scaling,
414                                col_sample=self.col_sample,
415                                row_sample=self.row_sample,
416                                seed=self.seed,
417                                backend=self.backend,
418                            )
419                        )
420
421                    layer_regr.fit(X_train, y_train)
422
423                    pipe = Pipeline(
424                        steps=[
425                            ("preprocessor", preprocessor),
426                            ("regressor", layer_regr),
427                        ]
428                    )
429
430                    pipe.fit(X_train, y_train)
431
432                    self.models_[name] = pipe
433                    y_pred = pipe.predict(X_test)
434                    r_squared = r2_score(y_test, y_pred)
435                    adj_rsquared = adjusted_rsquared(
436                        r_squared, X_test.shape[0], X_test.shape[1]
437                    )
438                    rmse = np.sqrt(np.mean((y_test - y_pred) ** 2))
439
440                    names.append(name)
441                    R2.append(r_squared)
442                    ADJR2.append(adj_rsquared)
443                    RMSE.append(rmse)
444                    TIME.append(time.time() - start)
445
446                    if self.custom_metric:
447                        custom_metric = self.custom_metric(y_test, y_pred)
448                        CUSTOM_METRIC.append(custom_metric)
449
450                    if self.verbose > 0:
451                        scores_verbose = {
452                            "Model": name,
453                            "R-Squared": r_squared,
454                            "Adjusted R-Squared": adj_rsquared,
455                            "RMSE": rmse,
456                            "Time taken": time.time() - start,
457                        }
458
459                        if self.custom_metric:
460                            scores_verbose[self.custom_metric.__name__] = (
461                                custom_metric
462                            )
463
464                        print(scores_verbose)
465                    if self.predictions:
466                        predictions[name] = y_pred
467                except Exception as exception:
468                    if self.ignore_warnings is False:
469                        print(name + " model failed to execute")
470                        print(exception)
471
472        else:  # no preprocessing
473
474            for name, model in tqdm(self.regressors):  # do parallel exec
475                start = time.time()
476                try:
477                    if "random_state" in model().get_params().keys():
478                        layer_regr = CustomRegressor(
479                            obj=model(random_state=self.random_state),
480                            n_hidden_features=self.n_hidden_features,
481                            activation_name=self.activation_name,
482                            a=self.a,
483                            nodes_sim=self.nodes_sim,
484                            bias=self.bias,
485                            dropout=self.dropout,
486                            direct_link=self.direct_link,
487                            n_clusters=self.n_clusters,
488                            cluster_encode=self.cluster_encode,
489                            type_clust=self.type_clust,
490                            type_scaling=self.type_scaling,
491                            col_sample=self.col_sample,
492                            row_sample=self.row_sample,
493                            seed=self.seed,
494                            backend=self.backend,
495                        )
496                    else:
497                        layer_regr = CustomRegressor(
498                            obj=model(),
499                            n_hidden_features=self.n_hidden_features,
500                            activation_name=self.activation_name,
501                            a=self.a,
502                            nodes_sim=self.nodes_sim,
503                            bias=self.bias,
504                            dropout=self.dropout,
505                            direct_link=self.direct_link,
506                            n_clusters=self.n_clusters,
507                            cluster_encode=self.cluster_encode,
508                            type_clust=self.type_clust,
509                            type_scaling=self.type_scaling,
510                            col_sample=self.col_sample,
511                            row_sample=self.row_sample,
512                            seed=self.seed,
513                            backend=self.backend,
514                        )
515
516                    layer_regr.fit(X_train, y_train)
517
518                    for _ in range(self.n_layers):
519                        layer_regr = deepcopy(
520                            CustomRegressor(
521                                obj=layer_regr,
522                                n_hidden_features=self.n_hidden_features,
523                                activation_name=self.activation_name,
524                                a=self.a,
525                                nodes_sim=self.nodes_sim,
526                                bias=self.bias,
527                                dropout=self.dropout,
528                                direct_link=self.direct_link,
529                                n_clusters=self.n_clusters,
530                                cluster_encode=self.cluster_encode,
531                                type_clust=self.type_clust,
532                                type_scaling=self.type_scaling,
533                                col_sample=self.col_sample,
534                                row_sample=self.row_sample,
535                                seed=self.seed,
536                                backend=self.backend,
537                            )
538                        )
539
540                        # layer_regr.fit(X_train, y_train)
541
542                    layer_regr.fit(X_train, y_train)
543
544                    self.models_[name] = layer_regr
545                    y_pred = layer_regr.predict(X_test)
546
547                    r_squared = r2_score(y_test, y_pred)
548                    adj_rsquared = adjusted_rsquared(
549                        r_squared, X_test.shape[0], X_test.shape[1]
550                    )
551                    rmse = np.sqrt(np.mean((y_test - y_pred) ** 2))
552
553                    names.append(name)
554                    R2.append(r_squared)
555                    ADJR2.append(adj_rsquared)
556                    RMSE.append(rmse)
557                    TIME.append(time.time() - start)
558
559                    if self.custom_metric:
560                        custom_metric = self.custom_metric(y_test, y_pred)
561                        CUSTOM_METRIC.append(custom_metric)
562
563                    if self.verbose > 0:
564                        scores_verbose = {
565                            "Model": name,
566                            "R-Squared": r_squared,
567                            "Adjusted R-Squared": adj_rsquared,
568                            "RMSE": rmse,
569                            "Time taken": time.time() - start,
570                        }
571
572                        if self.custom_metric:
573                            scores_verbose[self.custom_metric.__name__] = (
574                                custom_metric
575                            )
576
577                        print(scores_verbose)
578                    if self.predictions:
579                        predictions[name] = y_pred
580                except Exception as exception:
581                    if self.ignore_warnings is False:
582                        print(name + " model failed to execute")
583                        print(exception)
584
585        scores = {
586            "Model": names,
587            "Adjusted R-Squared": ADJR2,
588            "R-Squared": R2,
589            "RMSE": RMSE,
590            "Time Taken": TIME,
591        }
592
593        if self.custom_metric:
594            scores["Custom metric"] = CUSTOM_METRIC
595
596        scores = pd.DataFrame(scores)
597        scores = scores.sort_values(by=self.sort_by, ascending=True).set_index(
598            "Model"
599        )
600
601        self.best_model_ = self.models_[scores.index[0]]
602
603        if self.predictions is True:
604
605            return scores, predictions
606
607        return scores
608
609    def get_best_model(self):
610        """
611        This function returns the best model pipeline based on the sort_by metric.
612
613        Returns:
614
615            best_model: object,
616                Returns the best model pipeline based on the sort_by metric.
617
618        """
619        return self.best_model_
620
621    def provide_models(self, X_train, X_test, y_train, y_test):
622        """
623        This function returns all the model objects trained in fit function.
624        If fit is not called already, then we call fit and then return the models.
625
626        Parameters:
627
628            X_train : array-like,
629                Training vectors, where rows is the number of samples
630                and columns is the number of features.
631
632            X_test : array-like,
633                Testing vectors, where rows is the number of samples
634                and columns is the number of features.
635
636            y_train : array-like,
637                Training vectors, where rows is the number of samples
638                and columns is the number of features.
639
640            y_test : array-like,
641                Testing vectors, where rows is the number of samples
642                and columns is the number of features.
643
644        Returns:
645
646            models: dict-object,
647                Returns a dictionary with each model pipeline as value
648                with key as name of models.
649
650        """
651        if len(self.models_.keys()) == 0:
652            self.fit(X_train, X_test, y_train, y_test)
653
654        return self.models_

Fitting -- almost -- all the regression algorithms with layers of nnetsauce's CustomRegressor and returning their scores.

Parameters:

verbose: int, optional (default=0)
    Any positive number for verbosity.

ignore_warnings: bool, optional (default=True)
    When set to True, the warning related to algorigms that are not able to run are ignored.

custom_metric: function, optional (default=None)
    When function is provided, models are evaluated based on the custom evaluation metric provided.

predictions: bool, optional (default=False)
    When set to True, the predictions of all the models models are returned as dataframe.

sort_by: string, optional (default='RMSE')
    Sort models by a metric. Available options are 'R-Squared', 'Adjusted R-Squared', 'RMSE', 'Time Taken' and 'Custom Metric'.
    or a custom metric identified by its name and provided by custom_metric.

random_state: int, optional (default=42)
    Reproducibiility seed.

estimators: list, optional (default='all')
    list of Estimators names or just 'all' (default='all')

preprocess: bool
    preprocessing is done when set to True

n_jobs : int, when possible, run in parallel
    For now, only used by individual models that support it.

n_layers: int, optional (default=3)
    Number of layers of CustomRegressors to be used.

All the other parameters are the same as CustomRegressor's.

Attributes:

models_: dict-object
    Returns a dictionary with each model pipeline as value
    with key as name of models.

best_model_: object
    Returns the best model pipeline based on the sort_by metric.

Examples:

import nnetsauce as ns
import numpy as np
from sklearn import datasets
from sklearn.utils import shuffle

diabetes = datasets.load_diabetes()
X, y = shuffle(diabetes.data, diabetes.target, random_state=13)
X = X.astype(np.float32)

offset = int(X.shape[0] * 0.9)
X_train, y_train = X[:offset], y[:offset]
X_test, y_test = X[offset:], y[offset:]

reg = ns.LazyDeepRegressor(verbose=0, ignore_warnings=False, custom_metric=None)
models, predictions = reg.fit(X_train, X_test, y_train, y_test)
print(models)
def fit(self, X_train, X_test, y_train, y_test):
222    def fit(self, X_train, X_test, y_train, y_test):
223        """Fit Regression algorithms to X_train and y_train, predict and score on X_test, y_test.
224
225        Parameters:
226
227            X_train : array-like,
228                Training vectors, where rows is the number of samples
229                and columns is the number of features.
230
231            X_test : array-like,
232                Testing vectors, where rows is the number of samples
233                and columns is the number of features.
234
235            y_train : array-like,
236                Training vectors, where rows is the number of samples
237                and columns is the number of features.
238
239            y_test : array-like,
240                Testing vectors, where rows is the number of samples
241                and columns is the number of features.
242
243        Returns:
244        -------
245        scores:  Pandas DataFrame
246            Returns metrics of all the models in a Pandas DataFrame.
247
248        predictions : Pandas DataFrame
249            Returns predictions of all the models in a Pandas DataFrame.
250
251        """
252        R2 = []
253        ADJR2 = []
254        RMSE = []
255        # WIN = []
256        names = []
257        TIME = []
258        predictions = {}
259
260        if self.custom_metric:
261            CUSTOM_METRIC = []
262
263        if isinstance(X_train, np.ndarray):
264            X_train = pd.DataFrame(X_train)
265            X_test = pd.DataFrame(X_test)
266
267        numeric_features = X_train.select_dtypes(include=[np.number]).columns
268        categorical_features = X_train.select_dtypes(include=["object"]).columns
269
270        categorical_low, categorical_high = get_card_split(
271            X_train, categorical_features
272        )
273
274        if self.preprocess is True:
275            preprocessor = ColumnTransformer(
276                transformers=[
277                    ("numeric", numeric_transformer, numeric_features),
278                    (
279                        "categorical_low",
280                        categorical_transformer_low,
281                        categorical_low,
282                    ),
283                    (
284                        "categorical_high",
285                        categorical_transformer_high,
286                        categorical_high,
287                    ),
288                ]
289            )
290
291        # base models
292        try:
293            baseline_names = ["RandomForestRegressor", "XGBRegressor"]
294            baseline_models = [RandomForestRegressor(), xgb.XGBRegressor()]
295        except Exception as exception:
296            baseline_names = ["RandomForestRegressor"]
297            baseline_models = [RandomForestRegressor()]
298
299        for name, model in zip(baseline_names, baseline_models):
300            start = time.time()
301            try:
302                model.fit(X_train, y_train)
303                self.models_[name] = model
304                y_pred = model.predict(X_test)
305                r_squared = r2_score(y_test, y_pred)
306                adj_rsquared = adjusted_rsquared(
307                    r_squared, X_test.shape[0], X_test.shape[1]
308                )
309                rmse = np.sqrt(np.mean((y_test - y_pred) ** 2))
310
311                names.append(name)
312                R2.append(r_squared)
313                ADJR2.append(adj_rsquared)
314                RMSE.append(rmse)
315                TIME.append(time.time() - start)
316
317                if self.custom_metric:
318                    custom_metric = self.custom_metric(y_test, y_pred)
319                    CUSTOM_METRIC.append(custom_metric)
320
321                if self.verbose > 0:
322                    scores_verbose = {
323                        "Model": name,
324                        "R-Squared": r_squared,
325                        "Adjusted R-Squared": adj_rsquared,
326                        "RMSE": rmse,
327                        "Time taken": time.time() - start,
328                    }
329
330                    if self.custom_metric:
331                        scores_verbose[self.custom_metric.__name__] = (
332                            custom_metric
333                        )
334
335                    print(scores_verbose)
336                if self.predictions:
337                    predictions[name] = y_pred
338            except Exception as exception:
339                if self.ignore_warnings is False:
340                    print(name + " model failed to execute")
341                    print(exception)
342
343        if self.estimators == "all":
344            self.regressors = DEEPREGRESSORS
345        else:
346            self.regressors = [
347                ("DeepCustomRegressor(" + est[0] + ")", est[1])
348                for est in all_estimators()
349                if (
350                    issubclass(est[1], RegressorMixin)
351                    and (est[0] in self.estimators)
352                )
353            ]
354
355        if self.preprocess is True:
356
357            for name, model in tqdm(self.regressors):  # do parallel exec
358                start = time.time()
359                try:
360                    if "random_state" in model().get_params().keys():
361                        layer_regr = CustomRegressor(
362                            obj=model(random_state=self.random_state),
363                            n_hidden_features=self.n_hidden_features,
364                            activation_name=self.activation_name,
365                            a=self.a,
366                            nodes_sim=self.nodes_sim,
367                            bias=self.bias,
368                            dropout=self.dropout,
369                            direct_link=self.direct_link,
370                            n_clusters=self.n_clusters,
371                            cluster_encode=self.cluster_encode,
372                            type_clust=self.type_clust,
373                            type_scaling=self.type_scaling,
374                            col_sample=self.col_sample,
375                            row_sample=self.row_sample,
376                            seed=self.seed,
377                            backend=self.backend,
378                        )
379                    else:
380                        layer_regr = CustomRegressor(
381                            obj=model(),
382                            n_hidden_features=self.n_hidden_features,
383                            activation_name=self.activation_name,
384                            a=self.a,
385                            nodes_sim=self.nodes_sim,
386                            bias=self.bias,
387                            dropout=self.dropout,
388                            direct_link=self.direct_link,
389                            n_clusters=self.n_clusters,
390                            cluster_encode=self.cluster_encode,
391                            type_clust=self.type_clust,
392                            type_scaling=self.type_scaling,
393                            col_sample=self.col_sample,
394                            row_sample=self.row_sample,
395                            seed=self.seed,
396                            backend=self.backend,
397                        )
398
399                    for _ in range(self.n_layers):
400                        layer_regr = deepcopy(
401                            CustomRegressor(
402                                obj=layer_regr,
403                                n_hidden_features=self.n_hidden_features,
404                                activation_name=self.activation_name,
405                                a=self.a,
406                                nodes_sim=self.nodes_sim,
407                                bias=self.bias,
408                                dropout=self.dropout,
409                                direct_link=self.direct_link,
410                                n_clusters=self.n_clusters,
411                                cluster_encode=self.cluster_encode,
412                                type_clust=self.type_clust,
413                                type_scaling=self.type_scaling,
414                                col_sample=self.col_sample,
415                                row_sample=self.row_sample,
416                                seed=self.seed,
417                                backend=self.backend,
418                            )
419                        )
420
421                    layer_regr.fit(X_train, y_train)
422
423                    pipe = Pipeline(
424                        steps=[
425                            ("preprocessor", preprocessor),
426                            ("regressor", layer_regr),
427                        ]
428                    )
429
430                    pipe.fit(X_train, y_train)
431
432                    self.models_[name] = pipe
433                    y_pred = pipe.predict(X_test)
434                    r_squared = r2_score(y_test, y_pred)
435                    adj_rsquared = adjusted_rsquared(
436                        r_squared, X_test.shape[0], X_test.shape[1]
437                    )
438                    rmse = np.sqrt(np.mean((y_test - y_pred) ** 2))
439
440                    names.append(name)
441                    R2.append(r_squared)
442                    ADJR2.append(adj_rsquared)
443                    RMSE.append(rmse)
444                    TIME.append(time.time() - start)
445
446                    if self.custom_metric:
447                        custom_metric = self.custom_metric(y_test, y_pred)
448                        CUSTOM_METRIC.append(custom_metric)
449
450                    if self.verbose > 0:
451                        scores_verbose = {
452                            "Model": name,
453                            "R-Squared": r_squared,
454                            "Adjusted R-Squared": adj_rsquared,
455                            "RMSE": rmse,
456                            "Time taken": time.time() - start,
457                        }
458
459                        if self.custom_metric:
460                            scores_verbose[self.custom_metric.__name__] = (
461                                custom_metric
462                            )
463
464                        print(scores_verbose)
465                    if self.predictions:
466                        predictions[name] = y_pred
467                except Exception as exception:
468                    if self.ignore_warnings is False:
469                        print(name + " model failed to execute")
470                        print(exception)
471
472        else:  # no preprocessing
473
474            for name, model in tqdm(self.regressors):  # do parallel exec
475                start = time.time()
476                try:
477                    if "random_state" in model().get_params().keys():
478                        layer_regr = CustomRegressor(
479                            obj=model(random_state=self.random_state),
480                            n_hidden_features=self.n_hidden_features,
481                            activation_name=self.activation_name,
482                            a=self.a,
483                            nodes_sim=self.nodes_sim,
484                            bias=self.bias,
485                            dropout=self.dropout,
486                            direct_link=self.direct_link,
487                            n_clusters=self.n_clusters,
488                            cluster_encode=self.cluster_encode,
489                            type_clust=self.type_clust,
490                            type_scaling=self.type_scaling,
491                            col_sample=self.col_sample,
492                            row_sample=self.row_sample,
493                            seed=self.seed,
494                            backend=self.backend,
495                        )
496                    else:
497                        layer_regr = CustomRegressor(
498                            obj=model(),
499                            n_hidden_features=self.n_hidden_features,
500                            activation_name=self.activation_name,
501                            a=self.a,
502                            nodes_sim=self.nodes_sim,
503                            bias=self.bias,
504                            dropout=self.dropout,
505                            direct_link=self.direct_link,
506                            n_clusters=self.n_clusters,
507                            cluster_encode=self.cluster_encode,
508                            type_clust=self.type_clust,
509                            type_scaling=self.type_scaling,
510                            col_sample=self.col_sample,
511                            row_sample=self.row_sample,
512                            seed=self.seed,
513                            backend=self.backend,
514                        )
515
516                    layer_regr.fit(X_train, y_train)
517
518                    for _ in range(self.n_layers):
519                        layer_regr = deepcopy(
520                            CustomRegressor(
521                                obj=layer_regr,
522                                n_hidden_features=self.n_hidden_features,
523                                activation_name=self.activation_name,
524                                a=self.a,
525                                nodes_sim=self.nodes_sim,
526                                bias=self.bias,
527                                dropout=self.dropout,
528                                direct_link=self.direct_link,
529                                n_clusters=self.n_clusters,
530                                cluster_encode=self.cluster_encode,
531                                type_clust=self.type_clust,
532                                type_scaling=self.type_scaling,
533                                col_sample=self.col_sample,
534                                row_sample=self.row_sample,
535                                seed=self.seed,
536                                backend=self.backend,
537                            )
538                        )
539
540                        # layer_regr.fit(X_train, y_train)
541
542                    layer_regr.fit(X_train, y_train)
543
544                    self.models_[name] = layer_regr
545                    y_pred = layer_regr.predict(X_test)
546
547                    r_squared = r2_score(y_test, y_pred)
548                    adj_rsquared = adjusted_rsquared(
549                        r_squared, X_test.shape[0], X_test.shape[1]
550                    )
551                    rmse = np.sqrt(np.mean((y_test - y_pred) ** 2))
552
553                    names.append(name)
554                    R2.append(r_squared)
555                    ADJR2.append(adj_rsquared)
556                    RMSE.append(rmse)
557                    TIME.append(time.time() - start)
558
559                    if self.custom_metric:
560                        custom_metric = self.custom_metric(y_test, y_pred)
561                        CUSTOM_METRIC.append(custom_metric)
562
563                    if self.verbose > 0:
564                        scores_verbose = {
565                            "Model": name,
566                            "R-Squared": r_squared,
567                            "Adjusted R-Squared": adj_rsquared,
568                            "RMSE": rmse,
569                            "Time taken": time.time() - start,
570                        }
571
572                        if self.custom_metric:
573                            scores_verbose[self.custom_metric.__name__] = (
574                                custom_metric
575                            )
576
577                        print(scores_verbose)
578                    if self.predictions:
579                        predictions[name] = y_pred
580                except Exception as exception:
581                    if self.ignore_warnings is False:
582                        print(name + " model failed to execute")
583                        print(exception)
584
585        scores = {
586            "Model": names,
587            "Adjusted R-Squared": ADJR2,
588            "R-Squared": R2,
589            "RMSE": RMSE,
590            "Time Taken": TIME,
591        }
592
593        if self.custom_metric:
594            scores["Custom metric"] = CUSTOM_METRIC
595
596        scores = pd.DataFrame(scores)
597        scores = scores.sort_values(by=self.sort_by, ascending=True).set_index(
598            "Model"
599        )
600
601        self.best_model_ = self.models_[scores.index[0]]
602
603        if self.predictions is True:
604
605            return scores, predictions
606
607        return scores

Fit Regression algorithms to X_train and y_train, predict and score on X_test, y_test.

Parameters:

X_train : array-like,
    Training vectors, where rows is the number of samples
    and columns is the number of features.

X_test : array-like,
    Testing vectors, where rows is the number of samples
    and columns is the number of features.

y_train : array-like,
    Training vectors, where rows is the number of samples
    and columns is the number of features.

y_test : array-like,
    Testing vectors, where rows is the number of samples
    and columns is the number of features.

Returns:

scores: Pandas DataFrame Returns metrics of all the models in a Pandas DataFrame.

predictions : Pandas DataFrame Returns predictions of all the models in a Pandas DataFrame.

def provide_models(self, X_train, X_test, y_train, y_test):
621    def provide_models(self, X_train, X_test, y_train, y_test):
622        """
623        This function returns all the model objects trained in fit function.
624        If fit is not called already, then we call fit and then return the models.
625
626        Parameters:
627
628            X_train : array-like,
629                Training vectors, where rows is the number of samples
630                and columns is the number of features.
631
632            X_test : array-like,
633                Testing vectors, where rows is the number of samples
634                and columns is the number of features.
635
636            y_train : array-like,
637                Training vectors, where rows is the number of samples
638                and columns is the number of features.
639
640            y_test : array-like,
641                Testing vectors, where rows is the number of samples
642                and columns is the number of features.
643
644        Returns:
645
646            models: dict-object,
647                Returns a dictionary with each model pipeline as value
648                with key as name of models.
649
650        """
651        if len(self.models_.keys()) == 0:
652            self.fit(X_train, X_test, y_train, y_test)
653
654        return self.models_

This function returns all the model objects trained in fit function. If fit is not called already, then we call fit and then return the models.

Parameters:

X_train : array-like,
    Training vectors, where rows is the number of samples
    and columns is the number of features.

X_test : array-like,
    Testing vectors, where rows is the number of samples
    and columns is the number of features.

y_train : array-like,
    Training vectors, where rows is the number of samples
    and columns is the number of features.

y_test : array-like,
    Testing vectors, where rows is the number of samples
    and columns is the number of features.

Returns:

models: dict-object,
    Returns a dictionary with each model pipeline as value
    with key as name of models.
class LazyMTS(nnetsauce.LazyDeepMTS):
1004class LazyMTS(LazyDeepMTS):
1005    """
1006    Fitting -- almost -- all the regression algorithms to multivariate time series
1007    and returning their scores (no layers).
1008
1009    Parameters:
1010
1011        verbose: int, optional (default=0)
1012            Any positive number for verbosity.
1013
1014        ignore_warnings: bool, optional (default=True)
1015            When set to True, the warning related to algorigms that are not
1016            able to run are ignored.
1017
1018        custom_metric: function, optional (default=None)
1019            When function is provided, models are evaluated based on the custom
1020              evaluation metric provided.
1021
1022        predictions: bool, optional (default=False)
1023            When set to True, the predictions of all the models models are returned as dataframe.
1024
1025        sort_by: string, optional (default='RMSE')
1026            Sort models by a metric. Available options are 'RMSE', 'MAE', 'MPL', 'MPE', 'MAPE',
1027            'R-Squared', 'Adjusted R-Squared' or a custom metric identified by its name and
1028            provided by custom_metric.
1029
1030        random_state: int, optional (default=42)
1031            Reproducibiility seed.
1032
1033        estimators: list, optional (default='all')
1034            list of Estimators (regression algorithms) names or just 'all' (default='all')
1035
1036        preprocess: bool, preprocessing is done when set to True
1037
1038        h: int, optional (default=None)
1039            Number of steps ahead to predict (when used, must be > 0 and < X_test.shape[0]).
1040
1041        All the other parameters are the same as MTS's.
1042
1043    Attributes:
1044
1045        models_: dict-object
1046            Returns a dictionary with each model pipeline as value
1047            with key as name of models.
1048
1049        best_model_: object
1050            Returns the best model pipeline based on the sort_by metric.
1051
1052    Examples:
1053
1054        See https://thierrymoudiki.github.io/blog/2023/10/29/python/quasirandomizednn/MTS-LazyPredict
1055
1056    """
1057
1058    def __init__(
1059        self,
1060        verbose=0,
1061        ignore_warnings=True,
1062        custom_metric=None,
1063        predictions=False,
1064        sort_by=None,  # leave it as is
1065        random_state=42,
1066        estimators="all",
1067        preprocess=False,
1068        h=None,
1069        # MTS attributes
1070        obj=None,
1071        n_hidden_features=5,
1072        activation_name="relu",
1073        a=0.01,
1074        nodes_sim="sobol",
1075        bias=True,
1076        dropout=0,
1077        direct_link=True,
1078        n_clusters=2,
1079        cluster_encode=True,
1080        type_clust="kmeans",
1081        type_scaling=("std", "std", "std"),
1082        lags=15,
1083        type_pi="scp2-kde",
1084        block_size=None,
1085        replications=None,
1086        kernel=None,
1087        agg="mean",
1088        seed=123,
1089        backend="cpu",
1090        show_progress=False,
1091    ):
1092        super().__init__(
1093            verbose=verbose,
1094            ignore_warnings=ignore_warnings,
1095            custom_metric=custom_metric,
1096            predictions=predictions,
1097            sort_by=sort_by,
1098            random_state=random_state,
1099            estimators=estimators,
1100            preprocess=preprocess,
1101            n_layers=1,
1102            h=h,
1103            obj=obj,
1104            n_hidden_features=n_hidden_features,
1105            activation_name=activation_name,
1106            a=a,
1107            nodes_sim=nodes_sim,
1108            bias=bias,
1109            dropout=dropout,
1110            direct_link=direct_link,
1111            n_clusters=n_clusters,
1112            cluster_encode=cluster_encode,
1113            type_clust=type_clust,
1114            type_scaling=type_scaling,
1115            lags=lags,
1116            type_pi=type_pi,
1117            block_size=block_size,
1118            replications=replications,
1119            kernel=kernel,
1120            agg=agg,
1121            seed=seed,
1122            backend=backend,
1123            show_progress=show_progress,
1124        )

Fitting -- almost -- all the regression algorithms to multivariate time series and returning their scores (no layers).

Parameters:

verbose: int, optional (default=0)
    Any positive number for verbosity.

ignore_warnings: bool, optional (default=True)
    When set to True, the warning related to algorigms that are not
    able to run are ignored.

custom_metric: function, optional (default=None)
    When function is provided, models are evaluated based on the custom
      evaluation metric provided.

predictions: bool, optional (default=False)
    When set to True, the predictions of all the models models are returned as dataframe.

sort_by: string, optional (default='RMSE')
    Sort models by a metric. Available options are 'RMSE', 'MAE', 'MPL', 'MPE', 'MAPE',
    'R-Squared', 'Adjusted R-Squared' or a custom metric identified by its name and
    provided by custom_metric.

random_state: int, optional (default=42)
    Reproducibiility seed.

estimators: list, optional (default='all')
    list of Estimators (regression algorithms) names or just 'all' (default='all')

preprocess: bool, preprocessing is done when set to True

h: int, optional (default=None)
    Number of steps ahead to predict (when used, must be > 0 and < X_test.shape[0]).

All the other parameters are the same as MTS's.

Attributes:

models_: dict-object
    Returns a dictionary with each model pipeline as value
    with key as name of models.

best_model_: object
    Returns the best model pipeline based on the sort_by metric.

Examples:

See https://thierrymoudiki.github.io/blog/2023/10/29/python/quasirandomizednn/MTS-LazyPredict
class LazyDeepMTS(nnetsauce.MTS):
 104class LazyDeepMTS(MTS):
 105    """
 106
 107    Fitting -- almost -- all the regression algorithms with layers of
 108    nnetsauce's CustomRegressor to multivariate time series
 109    and returning their scores.
 110
 111    Parameters:
 112
 113        verbose: int, optional (default=0)
 114            Any positive number for verbosity.
 115
 116        ignore_warnings: bool, optional (default=True)
 117            When set to True, the warning related to algorigms that are not
 118            able to run are ignored.
 119
 120        custom_metric: function, optional (default=None)
 121            When function is provided, models are evaluated based on the custom
 122              evaluation metric provided.
 123
 124        predictions: bool, optional (default=False)
 125            When set to True, the predictions of all the models models are returned as dataframe.
 126
 127        sort_by: string, optional (default='RMSE')
 128            Sort models by a metric. Available options are 'RMSE', 'MAE', 'MPL', 'MPE', 'MAPE',
 129            'R-Squared', 'Adjusted R-Squared' or a custom metric identified by its name and
 130            provided by custom_metric.
 131
 132        random_state: int, optional (default=42)
 133            Reproducibiility seed.
 134
 135        estimators: list, optional (default='all')
 136            list of Estimators (regression algorithms) names or just 'all' (default='all')
 137
 138        preprocess: bool, preprocessing is done when set to True
 139
 140        n_layers: int, optional (default=1)
 141            Number of layers in the network. When set to 1, the model is equivalent to a MTS.
 142
 143        h: int, optional (default=None)
 144            Number of steps ahead to predict (when used, must be > 0 and < X_test.shape[0]).
 145
 146        All the other parameters are the same as MTS's.
 147
 148    Attributes:
 149
 150        models_: dict-object
 151            Returns a dictionary with each model pipeline as value
 152            with key as name of models.
 153
 154        best_model_: object
 155            Returns the best model pipeline based on the sort_by metric.
 156
 157    Examples:
 158
 159        See https://thierrymoudiki.github.io/blog/2023/10/29/python/quasirandomizednn/MTS-LazyPredict
 160
 161    """
 162
 163    def __init__(
 164        self,
 165        verbose=0,
 166        ignore_warnings=True,
 167        custom_metric=None,
 168        predictions=False,
 169        sort_by=None,  # leave it as is
 170        random_state=42,
 171        estimators="all",
 172        preprocess=False,
 173        n_layers=1,
 174        h=None,
 175        # MTS attributes
 176        obj=None,
 177        n_hidden_features=5,
 178        activation_name="relu",
 179        a=0.01,
 180        nodes_sim="sobol",
 181        bias=True,
 182        dropout=0,
 183        direct_link=True,
 184        n_clusters=2,
 185        cluster_encode=True,
 186        type_clust="kmeans",
 187        type_scaling=("std", "std", "std"),
 188        lags=15,
 189        type_pi="scp2-kde",
 190        block_size=None,
 191        replications=None,
 192        kernel=None,
 193        agg="mean",
 194        seed=123,
 195        backend="cpu",
 196        show_progress=False,
 197    ):
 198        self.verbose = verbose
 199        self.ignore_warnings = ignore_warnings
 200        self.custom_metric = custom_metric
 201        self.predictions = predictions
 202        self.sort_by = sort_by
 203        self.models_ = {}
 204        self.best_model_ = None
 205        self.random_state = random_state
 206        self.estimators = estimators
 207        self.preprocess = preprocess
 208        self.n_layers = n_layers
 209        self.h = h
 210        super().__init__(
 211            obj=obj,
 212            n_hidden_features=n_hidden_features,
 213            activation_name=activation_name,
 214            a=a,
 215            nodes_sim=nodes_sim,
 216            bias=bias,
 217            dropout=dropout,
 218            direct_link=direct_link,
 219            n_clusters=n_clusters,
 220            cluster_encode=cluster_encode,
 221            type_clust=type_clust,
 222            type_scaling=type_scaling,
 223            seed=seed,
 224            backend=backend,
 225            lags=lags,
 226            type_pi=type_pi,
 227            block_size=block_size,
 228            replications=replications,
 229            kernel=kernel,
 230            agg=agg,
 231            verbose=verbose,
 232            show_progress=show_progress,
 233        )
 234        if self.replications is not None or self.type_pi == "gaussian":
 235            if self.sort_by is None:
 236                self.sort_by = "WINKLERSCORE"
 237        else:
 238            if self.sort_by is None:
 239                self.sort_by = "RMSE"
 240
 241    def fit(self, X_train, X_test, xreg=None, per_series=False, **kwargs):
 242        """Fit Regression algorithms to X_train, predict and score on X_test.
 243
 244        Parameters:
 245
 246            X_train: array-like or data frame,
 247                Training vectors, where rows is the number of samples
 248                and columns is the number of features.
 249
 250            X_test: array-like or data frame,
 251                Testing vectors, where rows is the number of samples
 252                and columns is the number of features.
 253
 254            xreg: array-like, optional (default=None)
 255                Additional (external) regressors to be passed to self.obj
 256                xreg must be in 'increasing' order (most recent observations last)
 257
 258            per_series: bool, optional (default=False)
 259                When set to True, the metrics are computed series by series.
 260
 261            **kwargs: dict, optional (default=None)
 262                Additional parameters to be passed to `fit` method of `obj`.
 263
 264        Returns:
 265
 266            scores: Pandas DataFrame
 267                Returns metrics of all the models in a Pandas DataFrame.
 268
 269            predictions: Pandas DataFrame
 270                Returns predictions of all the models in a Pandas DataFrame.
 271
 272        """
 273        R2 = []
 274        ADJR2 = []
 275        ME = []
 276        MPL = []
 277        RMSE = []
 278        MAE = []
 279        MPE = []
 280        MAPE = []
 281        WINKLERSCORE = []
 282        COVERAGE = []
 283
 284        # WIN = []
 285        names = []
 286        TIME = []
 287        predictions = {}
 288
 289        if self.custom_metric is not None:
 290            CUSTOM_METRIC = []
 291
 292        if self.h is None:
 293            assert X_test is not None, "If h is None, X_test must be provided."
 294
 295        if isinstance(X_train, np.ndarray):
 296            X_train = pd.DataFrame(X_train)
 297            X_test = pd.DataFrame(X_test)
 298
 299        self.series_names = X_train.columns.tolist()
 300
 301        X_train = convert_df_to_numeric(X_train)
 302        X_test = convert_df_to_numeric(X_test)
 303
 304        numeric_features = X_train.select_dtypes(include=[np.number]).columns
 305        categorical_features = X_train.select_dtypes(include=["object"]).columns
 306
 307        categorical_low, categorical_high = get_card_split(
 308            X_train, categorical_features
 309        )
 310
 311        if self.preprocess:
 312            preprocessor = ColumnTransformer(
 313                transformers=[
 314                    ("numeric", numeric_transformer, numeric_features),
 315                    (
 316                        "categorical_low",
 317                        categorical_transformer_low,
 318                        categorical_low,
 319                    ),
 320                    (
 321                        "categorical_high",
 322                        categorical_transformer_high,
 323                        categorical_high,
 324                    ),
 325                ]
 326            )
 327
 328        # baselines (Classical MTS) ----
 329        for i, name in enumerate(["ARIMA", "ETS", "Theta", "VAR", "VECM"]):
 330            try:
 331                start = time.time()
 332                regr = ClassicalMTS(model=name)
 333                regr.fit(X_train, **kwargs)
 334                self.models_[name] = regr
 335                if self.h is None:
 336                    X_pred = regr.predict(h=X_test.shape[0], **kwargs)
 337                else:
 338                    assert self.h > 0, "h must be > 0"
 339                    X_pred = regr.predict(h=self.h, **kwargs)
 340                    try:
 341                        X_test = X_test[0: self.h, :]
 342                    except Exception as e:
 343                        X_test = X_test.iloc[0: self.h, :]
 344
 345                rmse = mean_errors(
 346                    actual=X_test,
 347                    pred=X_pred,
 348                    scoring="root_mean_squared_error",
 349                    per_series=per_series,
 350                )
 351                mae = mean_errors(
 352                    actual=X_test,
 353                    pred=X_pred,
 354                    scoring="mean_absolute_error",
 355                    per_series=per_series,
 356                )
 357                mpl = mean_errors(
 358                    actual=X_test,
 359                    pred=X_pred,
 360                    scoring="mean_pinball_loss",
 361                    per_series=per_series,
 362                )
 363            except Exception:
 364
 365                continue
 366
 367            names.append(name)
 368            RMSE.append(rmse)
 369            MAE.append(mae)
 370            MPL.append(mpl)
 371
 372            if self.custom_metric is not None:
 373                try:
 374                    if self.h is None:
 375                        custom_metric = self.custom_metric(X_test, X_pred)
 376                    else:
 377                        custom_metric = self.custom_metric(X_test_h, X_pred)
 378                    CUSTOM_METRIC.append(custom_metric)
 379                except Exception as e:
 380                    custom_metric = np.iinfo(np.float32).max
 381                    CUSTOM_METRIC.append(np.iinfo(np.float32).max)
 382
 383            if (self.replications is not None) or (self.type_pi == "gaussian"):
 384                if per_series == False:
 385                    winklerscore = winkler_score(
 386                        obj=X_pred, actual=X_test, level=95
 387                    )
 388                    coveragecalc = coverage(X_pred, X_test, level=95)
 389                else:
 390                    winklerscore = winkler_score(
 391                        obj=X_pred, actual=X_test, level=95, per_series=True
 392                    )
 393                    coveragecalc = coverage(
 394                        X_pred, X_test, level=95, per_series=True
 395                    )
 396                WINKLERSCORE.append(winklerscore)
 397                COVERAGE.append(coveragecalc)
 398            TIME.append(time.time() - start)
 399
 400        if self.estimators == "all":
 401            if self.n_layers <= 1:
 402                self.regressors = REGRESSORSMTS
 403            else:
 404                self.regressors = DEEPREGRESSORSMTS
 405        else:
 406            if self.n_layers <= 1:
 407                self.regressors = [
 408                    ("MTS(" + est[0] + ")", est[1])
 409                    for est in all_estimators()
 410                    if (
 411                        issubclass(est[1], RegressorMixin)
 412                        and (est[0] in self.estimators)
 413                    )
 414                ]
 415            else:  # self.n_layers > 1
 416                self.regressors = [
 417                    ("DeepMTS(" + est[0] + ")", est[1])
 418                    for est in all_estimators()
 419                    if (
 420                        issubclass(est[1], RegressorMixin)
 421                        and (est[0] in self.estimators)
 422                    )
 423                ]
 424
 425        if self.preprocess is True:
 426            for name, model in tqdm(self.regressors):  # do parallel exec
 427                start = time.time()
 428                try:
 429                    if "random_state" in model().get_params().keys():
 430                        pipe = Pipeline(
 431                            steps=[
 432                                ("preprocessor", preprocessor),
 433                                (
 434                                    "regressor",
 435                                    DeepMTS(
 436                                        obj=model(
 437                                            random_state=self.random_state,
 438                                            **kwargs,
 439                                        ),
 440                                        n_layers=self.n_layers,
 441                                        n_hidden_features=self.n_hidden_features,
 442                                        activation_name=self.activation_name,
 443                                        a=self.a,
 444                                        nodes_sim=self.nodes_sim,
 445                                        bias=self.bias,
 446                                        dropout=self.dropout,
 447                                        direct_link=self.direct_link,
 448                                        n_clusters=self.n_clusters,
 449                                        cluster_encode=self.cluster_encode,
 450                                        type_clust=self.type_clust,
 451                                        type_scaling=self.type_scaling,
 452                                        lags=self.lags,
 453                                        type_pi=self.type_pi,
 454                                        block_size=self.block_size,
 455                                        replications=self.replications,
 456                                        kernel=self.kernel,
 457                                        agg=self.agg,
 458                                        seed=self.seed,
 459                                        backend=self.backend,
 460                                        show_progress=self.show_progress,
 461                                    ),
 462                                ),
 463                            ]
 464                        )
 465                    else:  # "random_state" in model().get_params().keys()
 466                        pipe = Pipeline(
 467                            steps=[
 468                                ("preprocessor", preprocessor),
 469                                (
 470                                    "regressor",
 471                                    DeepMTS(
 472                                        obj=model(**kwargs),
 473                                        n_layers=self.n_layers,
 474                                        n_hidden_features=self.n_hidden_features,
 475                                        activation_name=self.activation_name,
 476                                        a=self.a,
 477                                        nodes_sim=self.nodes_sim,
 478                                        bias=self.bias,
 479                                        dropout=self.dropout,
 480                                        direct_link=self.direct_link,
 481                                        n_clusters=self.n_clusters,
 482                                        cluster_encode=self.cluster_encode,
 483                                        type_clust=self.type_clust,
 484                                        type_scaling=self.type_scaling,
 485                                        lags=self.lags,
 486                                        type_pi=self.type_pi,
 487                                        block_size=self.block_size,
 488                                        replications=self.replications,
 489                                        kernel=self.kernel,
 490                                        agg=self.agg,
 491                                        seed=self.seed,
 492                                        backend=self.backend,
 493                                        show_progress=self.show_progress,
 494                                    ),
 495                                ),
 496                            ]
 497                        )
 498
 499                    pipe.fit(X_train, **kwargs)
 500                    # pipe.fit(X_train, xreg=xreg)
 501
 502                    self.models_[name] = pipe
 503
 504                    if self.h is None:
 505                        X_pred = pipe["regressor"].predict(h=self.h, **kwargs)
 506                    else:
 507                        assert self.h > 0, "h must be > 0"
 508                        X_pred = pipe["regressor"].predict(h=self.h, **kwargs)
 509
 510                    if (self.replications is not None) or (
 511                        self.type_pi == "gaussian"
 512                    ):
 513                        rmse = mean_errors(
 514                            actual=X_test,
 515                            pred=X_pred,
 516                            scoring="root_mean_squared_error",
 517                            per_series=per_series,
 518                        )
 519                        mae = mean_errors(
 520                            actual=X_test,
 521                            pred=X_pred,
 522                            scoring="mean_absolute_error",
 523                            per_series=per_series,
 524                        )
 525                        mpl = mean_errors(
 526                            actual=X_test,
 527                            pred=X_pred,
 528                            scoring="mean_pinball_loss",
 529                            per_series=per_series,
 530                        )
 531                        winklerscore = winkler_score(
 532                            obj=X_pred,
 533                            actual=X_test,
 534                            level=95,
 535                            per_series=per_series,
 536                        )
 537                        coveragecalc = coverage(
 538                            X_pred, X_test, level=95, per_series=per_series
 539                        )
 540                    else:
 541                        rmse = mean_errors(
 542                            actual=X_test,
 543                            pred=X_pred,
 544                            scoring="root_mean_squared_error",
 545                            per_series=per_series,
 546                        )
 547                        mae = mean_errors(
 548                            actual=X_test,
 549                            pred=X_pred,
 550                            scoring="mean_absolute_error",
 551                            per_series=per_series,
 552                        )
 553                        mpl = mean_errors(
 554                            actual=X_test,
 555                            pred=X_pred,
 556                            scoring="mean_pinball_loss",
 557                            per_series=per_series,
 558                        )
 559
 560                    names.append(name)
 561                    RMSE.append(rmse)
 562                    MAE.append(mae)
 563                    MPL.append(mpl)
 564
 565                    if (self.replications is not None) or (
 566                        self.type_pi == "gaussian"
 567                    ):
 568                        WINKLERSCORE.append(winklerscore)
 569                        COVERAGE.append(coveragecalc)
 570                    TIME.append(time.time() - start)
 571
 572                    if self.custom_metric is not None:
 573                        try:
 574                            custom_metric = self.custom_metric(X_test, X_pred)
 575                            CUSTOM_METRIC.append(custom_metric)
 576                        except Exception as e:
 577                            custom_metric = np.iinfo(np.float32).max
 578                            CUSTOM_METRIC.append(custom_metric)
 579
 580                    if self.verbose > 0:
 581                        if (self.replications is not None) or (
 582                            self.type_pi == "gaussian"
 583                        ):
 584                            scores_verbose = {
 585                                "Model": name,
 586                                "RMSE": rmse,
 587                                "MAE": mae,
 588                                "MPL": mpl,
 589                                "WINKLERSCORE": winklerscore,
 590                                "COVERAGE": coveragecalc,
 591                                "Time taken": time.time() - start,
 592                            }
 593                        else:
 594                            scores_verbose = {
 595                                "Model": name,
 596                                "RMSE": rmse,
 597                                "MAE": mae,
 598                                "MPL": mpl,
 599                                "Time taken": time.time() - start,
 600                            }
 601
 602                        if self.custom_metric is not None:
 603                            scores_verbose["Custom metric"] = custom_metric
 604
 605                    if self.predictions:
 606                        predictions[name] = X_pred
 607                except Exception as exception:
 608                    if self.ignore_warnings is False:
 609                        print(name + " model failed to execute")
 610                        print(exception)
 611
 612        else:  # no preprocessing
 613
 614            for name, model in tqdm(self.regressors):  # do parallel exec
 615                start = time.time()
 616                try:
 617                    if "random_state" in model().get_params().keys():
 618                        pipe = DeepMTS(
 619                            obj=model(random_state=self.random_state, **kwargs),
 620                            n_layers=self.n_layers,
 621                            n_hidden_features=self.n_hidden_features,
 622                            activation_name=self.activation_name,
 623                            a=self.a,
 624                            nodes_sim=self.nodes_sim,
 625                            bias=self.bias,
 626                            dropout=self.dropout,
 627                            direct_link=self.direct_link,
 628                            n_clusters=self.n_clusters,
 629                            cluster_encode=self.cluster_encode,
 630                            type_clust=self.type_clust,
 631                            type_scaling=self.type_scaling,
 632                            lags=self.lags,
 633                            type_pi=self.type_pi,
 634                            block_size=self.block_size,
 635                            replications=self.replications,
 636                            kernel=self.kernel,
 637                            agg=self.agg,
 638                            seed=self.seed,
 639                            backend=self.backend,
 640                            show_progress=self.show_progress,
 641                        )
 642                    else:
 643                        pipe = DeepMTS(
 644                            obj=model(**kwargs),
 645                            n_layers=self.n_layers,
 646                            n_hidden_features=self.n_hidden_features,
 647                            activation_name=self.activation_name,
 648                            a=self.a,
 649                            nodes_sim=self.nodes_sim,
 650                            bias=self.bias,
 651                            dropout=self.dropout,
 652                            direct_link=self.direct_link,
 653                            n_clusters=self.n_clusters,
 654                            cluster_encode=self.cluster_encode,
 655                            type_clust=self.type_clust,
 656                            type_scaling=self.type_scaling,
 657                            lags=self.lags,
 658                            type_pi=self.type_pi,
 659                            block_size=self.block_size,
 660                            replications=self.replications,
 661                            kernel=self.kernel,
 662                            agg=self.agg,
 663                            seed=self.seed,
 664                            backend=self.backend,
 665                            show_progress=self.show_progress,
 666                        )
 667
 668                    pipe.fit(X_train, xreg, **kwargs)
 669                    # pipe.fit(X_train, xreg=xreg) # DO xreg like in `ahead`
 670
 671                    self.models_[name] = pipe
 672
 673                    if self.preprocess is True:
 674                        if self.h is None:
 675                            X_pred = pipe["regressor"].predict(
 676                                h=X_test.shape[0], **kwargs
 677                            )
 678                        else:
 679                            assert (
 680                                self.h > 0 and self.h <= X_test.shape[0]
 681                            ), "h must be > 0 and < X_test.shape[0]"
 682                            X_pred = pipe["regressor"].predict(
 683                                h=self.h, **kwargs
 684                            )
 685
 686                    else:
 687
 688                        if self.h is None:
 689                            X_pred = pipe.predict(
 690                                h=X_test.shape[0],
 691                                **kwargs,
 692                                # X_pred = pipe.predict(h=X_test.shape[0], new_xreg=new_xreg) ## DO xreg like in `ahead`
 693                            )
 694                        else:
 695                            assert (
 696                                self.h > 0 and self.h <= X_test.shape[0]
 697                            ), "h must be > 0 and < X_test.shape[0]"
 698                            X_pred = pipe.predict(h=self.h, **kwargs)
 699
 700                    if self.h is None:
 701                        if (self.replications is not None) or (
 702                            self.type_pi == "gaussian"
 703                        ):
 704                            rmse = mean_errors(
 705                                actual=X_test,
 706                                pred=X_pred.mean,
 707                                scoring="root_mean_squared_error",
 708                                per_series=per_series,
 709                            )
 710                            mae = mean_errors(
 711                                actual=X_test,
 712                                pred=X_pred.mean,
 713                                scoring="mean_absolute_error",
 714                                per_series=per_series,
 715                            )
 716                            mpl = mean_errors(
 717                                actual=X_test,
 718                                pred=X_pred.mean,
 719                                scoring="mean_pinball_loss",
 720                                per_series=per_series,
 721                            )
 722                            winklerscore = winkler_score(
 723                                obj=X_pred,
 724                                actual=X_test,
 725                                level=95,
 726                                per_series=per_series,
 727                            )
 728                            coveragecalc = coverage(
 729                                X_pred, X_test, level=95, per_series=per_series
 730                            )
 731                        else:  # no prediction interval
 732                            rmse = mean_errors(
 733                                actual=X_test,
 734                                pred=X_pred,
 735                                scoring="root_mean_squared_error",
 736                                per_series=per_series,
 737                            )
 738                            mae = mean_errors(
 739                                actual=X_test,
 740                                pred=X_pred,
 741                                scoring="mean_absolute_error",
 742                                per_series=per_series,
 743                            )
 744                            mpl = mean_errors(
 745                                actual=X_test,
 746                                pred=X_pred,
 747                                scoring="mean_pinball_loss",
 748                                per_series=per_series,
 749                            )
 750                    else:  # self.h is not None
 751                        if (self.replications is not None) or (
 752                            self.type_pi == "gaussian"
 753                        ):
 754
 755                            if isinstance(X_test, pd.DataFrame):
 756                                X_test_h = X_test.iloc[0: self.h, :]
 757                                rmse = mean_errors(
 758                                    actual=X_test_h,
 759                                    pred=X_pred,
 760                                    scoring="root_mean_squared_error",
 761                                    per_series=per_series,
 762                                )
 763                                mae = mean_errors(
 764                                    actual=X_test_h,
 765                                    pred=X_pred,
 766                                    scoring="mean_absolute_error",
 767                                    per_series=per_series,
 768                                )
 769                                mpl = mean_errors(
 770                                    actual=X_test_h,
 771                                    pred=X_pred,
 772                                    scoring="mean_pinball_loss",
 773                                    per_series=per_series,
 774                                )
 775                                winklerscore = winkler_score(
 776                                    obj=X_pred,
 777                                    actual=X_test_h,
 778                                    level=95,
 779                                    per_series=per_series,
 780                                )
 781                                coveragecalc = coverage(
 782                                    X_pred,
 783                                    X_test_h,
 784                                    level=95,
 785                                    per_series=per_series,
 786                                )
 787                            else:
 788                                X_test_h = X_test[0: self.h, :]
 789                                rmse = mean_errors(
 790                                    actual=X_test_h,
 791                                    pred=X_pred,
 792                                    scoring="root_mean_squared_error",
 793                                    per_series=per_series,
 794                                )
 795                                mae = mean_errors(
 796                                    actual=X_test_h,
 797                                    pred=X_pred,
 798                                    scoring="mean_absolute_error",
 799                                    per_series=per_series,
 800                                )
 801                                mpl = mean_errors(
 802                                    actual=X_test_h,
 803                                    pred=X_pred,
 804                                    scoring="mean_pinball_loss",
 805                                    per_series=per_series,
 806                                )
 807                                winklerscore = winkler_score(
 808                                    obj=X_pred,
 809                                    actual=X_test_h,
 810                                    level=95,
 811                                    per_series=per_series,
 812                                )
 813                                coveragecalc = coverage(
 814                                    X_pred,
 815                                    X_test_h,
 816                                    level=95,
 817                                    per_series=per_series,
 818                                )
 819                        else:  # no prediction interval
 820
 821                            if isinstance(X_test, pd.DataFrame):
 822                                X_test_h = X_test.iloc[0: self.h, :]
 823                                rmse = mean_errors(
 824                                    actual=X_test_h,
 825                                    pred=X_pred,
 826                                    scoring="root_mean_squared_error",
 827                                    per_series=per_series,
 828                                )
 829                                mae = mean_errors(
 830                                    actual=X_test_h,
 831                                    pred=X_pred,
 832                                    scoring="mean_absolute_error",
 833                                    per_series=per_series,
 834                                )
 835                                mpl = mean_errors(
 836                                    actual=X_test_h,
 837                                    pred=X_pred,
 838                                    scoring="mean_pinball_loss",
 839                                    per_series=per_series,
 840                                )
 841                            else:
 842                                X_test_h = X_test[0: self.h, :]
 843                                rmse = mean_errors(
 844                                    actual=X_test_h,
 845                                    pred=X_pred,
 846                                    scoring="root_mean_squared_error",
 847                                    per_series=per_series,
 848                                )
 849                                mae = mean_errors(
 850                                    actual=X_test_h,
 851                                    pred=X_pred,
 852                                    scoring="mean_absolute_error",
 853                                    per_series=per_series,
 854                                )
 855
 856                    names.append(name)
 857                    RMSE.append(rmse)
 858                    MAE.append(mae)
 859                    MPL.append(mpl)
 860                    if (self.replications is not None) or (
 861                        self.type_pi == "gaussian"
 862                    ):
 863                        WINKLERSCORE.append(winklerscore)
 864                        COVERAGE.append(coveragecalc)
 865                    TIME.append(time.time() - start)
 866
 867                    if self.custom_metric is not None:
 868                        try:
 869                            if self.h is None:
 870                                custom_metric = self.custom_metric(
 871                                    X_test, X_pred
 872                                )
 873                            else:
 874                                custom_metric = self.custom_metric(
 875                                    X_test_h, X_pred
 876                                )
 877                            CUSTOM_METRIC.append(custom_metric)
 878                        except Exception as e:
 879                            custom_metric = np.iinfo(np.float32).max
 880                            CUSTOM_METRIC.append(np.iinfo(np.float32).max)
 881
 882                    if self.verbose > 0:
 883                        if (self.replications is not None) or (
 884                            self.type_pi == "gaussian"
 885                        ):
 886                            scores_verbose = {
 887                                "Model": name,
 888                                "RMSE": rmse,
 889                                "MAE": mae,
 890                                "MPL": mpl,
 891                                "WINKLERSCORE": winklerscore,
 892                                "COVERAGE": coveragecalc,
 893                                "Time taken": time.time() - start,
 894                            }
 895                        else:
 896                            scores_verbose = {
 897                                "Model": name,
 898                                "RMSE": rmse,
 899                                "MAE": mae,
 900                                "MPL": mpl,
 901                                "Time taken": time.time() - start,
 902                            }
 903
 904                        if self.custom_metric is not None:
 905                            scores_verbose["Custom metric"] = custom_metric
 906
 907                    if self.predictions:
 908                        predictions[name] = X_pred
 909
 910                except Exception as exception:
 911                    if self.ignore_warnings is False:
 912                        print(name + " model failed to execute")
 913                        print(exception)
 914
 915        if (self.replications is not None) or (self.type_pi == "gaussian"):
 916            scores = {
 917                "Model": names,
 918                "RMSE": RMSE,
 919                "MAE": MAE,
 920                "MPL": MPL,
 921                "WINKLERSCORE": WINKLERSCORE,
 922                "COVERAGE": COVERAGE,
 923                "Time Taken": TIME,
 924            }
 925        else:
 926            scores = {
 927                "Model": names,
 928                "RMSE": RMSE,
 929                "MAE": MAE,
 930                "MPL": MPL,
 931                "Time Taken": TIME,
 932            }
 933
 934        if self.custom_metric is not None:
 935            scores["Custom metric"] = CUSTOM_METRIC
 936
 937        if per_series:
 938            scores = dict_to_dataframe_series(scores, self.series_names)
 939        else:
 940            scores = pd.DataFrame(scores)
 941
 942        try:  # case per_series, can't be sorted
 943            scores = scores.sort_values(
 944                by=self.sort_by, ascending=True
 945            ).set_index("Model")
 946
 947            self.best_model_ = self.models_[scores.index[0]]
 948        except Exception as e:
 949            pass
 950
 951        if self.predictions is True:
 952
 953            return scores, predictions
 954
 955        return scores
 956
 957    def get_best_model(self):
 958        """
 959        This function returns the best model pipeline based on the sort_by metric.
 960
 961        Returns:
 962
 963            best_model: object,
 964                Returns the best model pipeline based on the sort_by metric.
 965
 966        """
 967        return self.best_model_
 968
 969    def provide_models(self, X_train, X_test):
 970        """
 971        This function returns all the model objects trained in fit function.
 972        If fit is not called already, then we call fit and then return the models.
 973
 974        Parameters:
 975
 976            X_train : array-like,
 977                Training vectors, where rows is the number of samples
 978                and columns is the number of features.
 979
 980            X_test : array-like,
 981                Testing vectors, where rows is the number of samples
 982                and columns is the number of features.
 983
 984        Returns:
 985
 986            models: dict-object,
 987                Returns a dictionary with each model pipeline as value
 988                with key as name of models.
 989
 990        """
 991        if self.h is None:
 992            if len(self.models_.keys()) == 0:
 993                self.fit(X_train, X_test)
 994        else:
 995            if len(self.models_.keys()) == 0:
 996                if isinstance(X_test, pd.DataFrame):
 997                    self.fit(X_train, X_test.iloc[0: self.h, :])
 998                else:
 999                    self.fit(X_train, X_test[0: self.h, :])
1000
1001        return self.models_

Fitting -- almost -- all the regression algorithms with layers of nnetsauce's CustomRegressor to multivariate time series and returning their scores.

Parameters:

verbose: int, optional (default=0)
    Any positive number for verbosity.

ignore_warnings: bool, optional (default=True)
    When set to True, the warning related to algorigms that are not
    able to run are ignored.

custom_metric: function, optional (default=None)
    When function is provided, models are evaluated based on the custom
      evaluation metric provided.

predictions: bool, optional (default=False)
    When set to True, the predictions of all the models models are returned as dataframe.

sort_by: string, optional (default='RMSE')
    Sort models by a metric. Available options are 'RMSE', 'MAE', 'MPL', 'MPE', 'MAPE',
    'R-Squared', 'Adjusted R-Squared' or a custom metric identified by its name and
    provided by custom_metric.

random_state: int, optional (default=42)
    Reproducibiility seed.

estimators: list, optional (default='all')
    list of Estimators (regression algorithms) names or just 'all' (default='all')

preprocess: bool, preprocessing is done when set to True

n_layers: int, optional (default=1)
    Number of layers in the network. When set to 1, the model is equivalent to a MTS.

h: int, optional (default=None)
    Number of steps ahead to predict (when used, must be > 0 and < X_test.shape[0]).

All the other parameters are the same as MTS's.

Attributes:

models_: dict-object
    Returns a dictionary with each model pipeline as value
    with key as name of models.

best_model_: object
    Returns the best model pipeline based on the sort_by metric.

Examples:

See https://thierrymoudiki.github.io/blog/2023/10/29/python/quasirandomizednn/MTS-LazyPredict
def fit(self, X_train, X_test, xreg=None, per_series=False, **kwargs):
241    def fit(self, X_train, X_test, xreg=None, per_series=False, **kwargs):
242        """Fit Regression algorithms to X_train, predict and score on X_test.
243
244        Parameters:
245
246            X_train: array-like or data frame,
247                Training vectors, where rows is the number of samples
248                and columns is the number of features.
249
250            X_test: array-like or data frame,
251                Testing vectors, where rows is the number of samples
252                and columns is the number of features.
253
254            xreg: array-like, optional (default=None)
255                Additional (external) regressors to be passed to self.obj
256                xreg must be in 'increasing' order (most recent observations last)
257
258            per_series: bool, optional (default=False)
259                When set to True, the metrics are computed series by series.
260
261            **kwargs: dict, optional (default=None)
262                Additional parameters to be passed to `fit` method of `obj`.
263
264        Returns:
265
266            scores: Pandas DataFrame
267                Returns metrics of all the models in a Pandas DataFrame.
268
269            predictions: Pandas DataFrame
270                Returns predictions of all the models in a Pandas DataFrame.
271
272        """
273        R2 = []
274        ADJR2 = []
275        ME = []
276        MPL = []
277        RMSE = []
278        MAE = []
279        MPE = []
280        MAPE = []
281        WINKLERSCORE = []
282        COVERAGE = []
283
284        # WIN = []
285        names = []
286        TIME = []
287        predictions = {}
288
289        if self.custom_metric is not None:
290            CUSTOM_METRIC = []
291
292        if self.h is None:
293            assert X_test is not None, "If h is None, X_test must be provided."
294
295        if isinstance(X_train, np.ndarray):
296            X_train = pd.DataFrame(X_train)
297            X_test = pd.DataFrame(X_test)
298
299        self.series_names = X_train.columns.tolist()
300
301        X_train = convert_df_to_numeric(X_train)
302        X_test = convert_df_to_numeric(X_test)
303
304        numeric_features = X_train.select_dtypes(include=[np.number]).columns
305        categorical_features = X_train.select_dtypes(include=["object"]).columns
306
307        categorical_low, categorical_high = get_card_split(
308            X_train, categorical_features
309        )
310
311        if self.preprocess:
312            preprocessor = ColumnTransformer(
313                transformers=[
314                    ("numeric", numeric_transformer, numeric_features),
315                    (
316                        "categorical_low",
317                        categorical_transformer_low,
318                        categorical_low,
319                    ),
320                    (
321                        "categorical_high",
322                        categorical_transformer_high,
323                        categorical_high,
324                    ),
325                ]
326            )
327
328        # baselines (Classical MTS) ----
329        for i, name in enumerate(["ARIMA", "ETS", "Theta", "VAR", "VECM"]):
330            try:
331                start = time.time()
332                regr = ClassicalMTS(model=name)
333                regr.fit(X_train, **kwargs)
334                self.models_[name] = regr
335                if self.h is None:
336                    X_pred = regr.predict(h=X_test.shape[0], **kwargs)
337                else:
338                    assert self.h > 0, "h must be > 0"
339                    X_pred = regr.predict(h=self.h, **kwargs)
340                    try:
341                        X_test = X_test[0: self.h, :]
342                    except Exception as e:
343                        X_test = X_test.iloc[0: self.h, :]
344
345                rmse = mean_errors(
346                    actual=X_test,
347                    pred=X_pred,
348                    scoring="root_mean_squared_error",
349                    per_series=per_series,
350                )
351                mae = mean_errors(
352                    actual=X_test,
353                    pred=X_pred,
354                    scoring="mean_absolute_error",
355                    per_series=per_series,
356                )
357                mpl = mean_errors(
358                    actual=X_test,
359                    pred=X_pred,
360                    scoring="mean_pinball_loss",
361                    per_series=per_series,
362                )
363            except Exception:
364
365                continue
366
367            names.append(name)
368            RMSE.append(rmse)
369            MAE.append(mae)
370            MPL.append(mpl)
371
372            if self.custom_metric is not None:
373                try:
374                    if self.h is None:
375                        custom_metric = self.custom_metric(X_test, X_pred)
376                    else:
377                        custom_metric = self.custom_metric(X_test_h, X_pred)
378                    CUSTOM_METRIC.append(custom_metric)
379                except Exception as e:
380                    custom_metric = np.iinfo(np.float32).max
381                    CUSTOM_METRIC.append(np.iinfo(np.float32).max)
382
383            if (self.replications is not None) or (self.type_pi == "gaussian"):
384                if per_series == False:
385                    winklerscore = winkler_score(
386                        obj=X_pred, actual=X_test, level=95
387                    )
388                    coveragecalc = coverage(X_pred, X_test, level=95)
389                else:
390                    winklerscore = winkler_score(
391                        obj=X_pred, actual=X_test, level=95, per_series=True
392                    )
393                    coveragecalc = coverage(
394                        X_pred, X_test, level=95, per_series=True
395                    )
396                WINKLERSCORE.append(winklerscore)
397                COVERAGE.append(coveragecalc)
398            TIME.append(time.time() - start)
399
400        if self.estimators == "all":
401            if self.n_layers <= 1:
402                self.regressors = REGRESSORSMTS
403            else:
404                self.regressors = DEEPREGRESSORSMTS
405        else:
406            if self.n_layers <= 1:
407                self.regressors = [
408                    ("MTS(" + est[0] + ")", est[1])
409                    for est in all_estimators()
410                    if (
411                        issubclass(est[1], RegressorMixin)
412                        and (est[0] in self.estimators)
413                    )
414                ]
415            else:  # self.n_layers > 1
416                self.regressors = [
417                    ("DeepMTS(" + est[0] + ")", est[1])
418                    for est in all_estimators()
419                    if (
420                        issubclass(est[1], RegressorMixin)
421                        and (est[0] in self.estimators)
422                    )
423                ]
424
425        if self.preprocess is True:
426            for name, model in tqdm(self.regressors):  # do parallel exec
427                start = time.time()
428                try:
429                    if "random_state" in model().get_params().keys():
430                        pipe = Pipeline(
431                            steps=[
432                                ("preprocessor", preprocessor),
433                                (
434                                    "regressor",
435                                    DeepMTS(
436                                        obj=model(
437                                            random_state=self.random_state,
438                                            **kwargs,
439                                        ),
440                                        n_layers=self.n_layers,
441                                        n_hidden_features=self.n_hidden_features,
442                                        activation_name=self.activation_name,
443                                        a=self.a,
444                                        nodes_sim=self.nodes_sim,
445                                        bias=self.bias,
446                                        dropout=self.dropout,
447                                        direct_link=self.direct_link,
448                                        n_clusters=self.n_clusters,
449                                        cluster_encode=self.cluster_encode,
450                                        type_clust=self.type_clust,
451                                        type_scaling=self.type_scaling,
452                                        lags=self.lags,
453                                        type_pi=self.type_pi,
454                                        block_size=self.block_size,
455                                        replications=self.replications,
456                                        kernel=self.kernel,
457                                        agg=self.agg,
458                                        seed=self.seed,
459                                        backend=self.backend,
460                                        show_progress=self.show_progress,
461                                    ),
462                                ),
463                            ]
464                        )
465                    else:  # "random_state" in model().get_params().keys()
466                        pipe = Pipeline(
467                            steps=[
468                                ("preprocessor", preprocessor),
469                                (
470                                    "regressor",
471                                    DeepMTS(
472                                        obj=model(**kwargs),
473                                        n_layers=self.n_layers,
474                                        n_hidden_features=self.n_hidden_features,
475                                        activation_name=self.activation_name,
476                                        a=self.a,
477                                        nodes_sim=self.nodes_sim,
478                                        bias=self.bias,
479                                        dropout=self.dropout,
480                                        direct_link=self.direct_link,
481                                        n_clusters=self.n_clusters,
482                                        cluster_encode=self.cluster_encode,
483                                        type_clust=self.type_clust,
484                                        type_scaling=self.type_scaling,
485                                        lags=self.lags,
486                                        type_pi=self.type_pi,
487                                        block_size=self.block_size,
488                                        replications=self.replications,
489                                        kernel=self.kernel,
490                                        agg=self.agg,
491                                        seed=self.seed,
492                                        backend=self.backend,
493                                        show_progress=self.show_progress,
494                                    ),
495                                ),
496                            ]
497                        )
498
499                    pipe.fit(X_train, **kwargs)
500                    # pipe.fit(X_train, xreg=xreg)
501
502                    self.models_[name] = pipe
503
504                    if self.h is None:
505                        X_pred = pipe["regressor"].predict(h=self.h, **kwargs)
506                    else:
507                        assert self.h > 0, "h must be > 0"
508                        X_pred = pipe["regressor"].predict(h=self.h, **kwargs)
509
510                    if (self.replications is not None) or (
511                        self.type_pi == "gaussian"
512                    ):
513                        rmse = mean_errors(
514                            actual=X_test,
515                            pred=X_pred,
516                            scoring="root_mean_squared_error",
517                            per_series=per_series,
518                        )
519                        mae = mean_errors(
520                            actual=X_test,
521                            pred=X_pred,
522                            scoring="mean_absolute_error",
523                            per_series=per_series,
524                        )
525                        mpl = mean_errors(
526                            actual=X_test,
527                            pred=X_pred,
528                            scoring="mean_pinball_loss",
529                            per_series=per_series,
530                        )
531                        winklerscore = winkler_score(
532                            obj=X_pred,
533                            actual=X_test,
534                            level=95,
535                            per_series=per_series,
536                        )
537                        coveragecalc = coverage(
538                            X_pred, X_test, level=95, per_series=per_series
539                        )
540                    else:
541                        rmse = mean_errors(
542                            actual=X_test,
543                            pred=X_pred,
544                            scoring="root_mean_squared_error",
545                            per_series=per_series,
546                        )
547                        mae = mean_errors(
548                            actual=X_test,
549                            pred=X_pred,
550                            scoring="mean_absolute_error",
551                            per_series=per_series,
552                        )
553                        mpl = mean_errors(
554                            actual=X_test,
555                            pred=X_pred,
556                            scoring="mean_pinball_loss",
557                            per_series=per_series,
558                        )
559
560                    names.append(name)
561                    RMSE.append(rmse)
562                    MAE.append(mae)
563                    MPL.append(mpl)
564
565                    if (self.replications is not None) or (
566                        self.type_pi == "gaussian"
567                    ):
568                        WINKLERSCORE.append(winklerscore)
569                        COVERAGE.append(coveragecalc)
570                    TIME.append(time.time() - start)
571
572                    if self.custom_metric is not None:
573                        try:
574                            custom_metric = self.custom_metric(X_test, X_pred)
575                            CUSTOM_METRIC.append(custom_metric)
576                        except Exception as e:
577                            custom_metric = np.iinfo(np.float32).max
578                            CUSTOM_METRIC.append(custom_metric)
579
580                    if self.verbose > 0:
581                        if (self.replications is not None) or (
582                            self.type_pi == "gaussian"
583                        ):
584                            scores_verbose = {
585                                "Model": name,
586                                "RMSE": rmse,
587                                "MAE": mae,
588                                "MPL": mpl,
589                                "WINKLERSCORE": winklerscore,
590                                "COVERAGE": coveragecalc,
591                                "Time taken": time.time() - start,
592                            }
593                        else:
594                            scores_verbose = {
595                                "Model": name,
596                                "RMSE": rmse,
597                                "MAE": mae,
598                                "MPL": mpl,
599                                "Time taken": time.time() - start,
600                            }
601
602                        if self.custom_metric is not None:
603                            scores_verbose["Custom metric"] = custom_metric
604
605                    if self.predictions:
606                        predictions[name] = X_pred
607                except Exception as exception:
608                    if self.ignore_warnings is False:
609                        print(name + " model failed to execute")
610                        print(exception)
611
612        else:  # no preprocessing
613
614            for name, model in tqdm(self.regressors):  # do parallel exec
615                start = time.time()
616                try:
617                    if "random_state" in model().get_params().keys():
618                        pipe = DeepMTS(
619                            obj=model(random_state=self.random_state, **kwargs),
620                            n_layers=self.n_layers,
621                            n_hidden_features=self.n_hidden_features,
622                            activation_name=self.activation_name,
623                            a=self.a,
624                            nodes_sim=self.nodes_sim,
625                            bias=self.bias,
626                            dropout=self.dropout,
627                            direct_link=self.direct_link,
628                            n_clusters=self.n_clusters,
629                            cluster_encode=self.cluster_encode,
630                            type_clust=self.type_clust,
631                            type_scaling=self.type_scaling,
632                            lags=self.lags,
633                            type_pi=self.type_pi,
634                            block_size=self.block_size,
635                            replications=self.replications,
636                            kernel=self.kernel,
637                            agg=self.agg,
638                            seed=self.seed,
639                            backend=self.backend,
640                            show_progress=self.show_progress,
641                        )
642                    else:
643                        pipe = DeepMTS(
644                            obj=model(**kwargs),
645                            n_layers=self.n_layers,
646                            n_hidden_features=self.n_hidden_features,
647                            activation_name=self.activation_name,
648                            a=self.a,
649                            nodes_sim=self.nodes_sim,
650                            bias=self.bias,
651                            dropout=self.dropout,
652                            direct_link=self.direct_link,
653                            n_clusters=self.n_clusters,
654                            cluster_encode=self.cluster_encode,
655                            type_clust=self.type_clust,
656                            type_scaling=self.type_scaling,
657                            lags=self.lags,
658                            type_pi=self.type_pi,
659                            block_size=self.block_size,
660                            replications=self.replications,
661                            kernel=self.kernel,
662                            agg=self.agg,
663                            seed=self.seed,
664                            backend=self.backend,
665                            show_progress=self.show_progress,
666                        )
667
668                    pipe.fit(X_train, xreg, **kwargs)
669                    # pipe.fit(X_train, xreg=xreg) # DO xreg like in `ahead`
670
671                    self.models_[name] = pipe
672
673                    if self.preprocess is True:
674                        if self.h is None:
675                            X_pred = pipe["regressor"].predict(
676                                h=X_test.shape[0], **kwargs
677                            )
678                        else:
679                            assert (
680                                self.h > 0 and self.h <= X_test.shape[0]
681                            ), "h must be > 0 and < X_test.shape[0]"
682                            X_pred = pipe["regressor"].predict(
683                                h=self.h, **kwargs
684                            )
685
686                    else:
687
688                        if self.h is None:
689                            X_pred = pipe.predict(
690                                h=X_test.shape[0],
691                                **kwargs,
692                                # X_pred = pipe.predict(h=X_test.shape[0], new_xreg=new_xreg) ## DO xreg like in `ahead`
693                            )
694                        else:
695                            assert (
696                                self.h > 0 and self.h <= X_test.shape[0]
697                            ), "h must be > 0 and < X_test.shape[0]"
698                            X_pred = pipe.predict(h=self.h, **kwargs)
699
700                    if self.h is None:
701                        if (self.replications is not None) or (
702                            self.type_pi == "gaussian"
703                        ):
704                            rmse = mean_errors(
705                                actual=X_test,
706                                pred=X_pred.mean,
707                                scoring="root_mean_squared_error",
708                                per_series=per_series,
709                            )
710                            mae = mean_errors(
711                                actual=X_test,
712                                pred=X_pred.mean,
713                                scoring="mean_absolute_error",
714                                per_series=per_series,
715                            )
716                            mpl = mean_errors(
717                                actual=X_test,
718                                pred=X_pred.mean,
719                                scoring="mean_pinball_loss",
720                                per_series=per_series,
721                            )
722                            winklerscore = winkler_score(
723                                obj=X_pred,
724                                actual=X_test,
725                                level=95,
726                                per_series=per_series,
727                            )
728                            coveragecalc = coverage(
729                                X_pred, X_test, level=95, per_series=per_series
730                            )
731                        else:  # no prediction interval
732                            rmse = mean_errors(
733                                actual=X_test,
734                                pred=X_pred,
735                                scoring="root_mean_squared_error",
736                                per_series=per_series,
737                            )
738                            mae = mean_errors(
739                                actual=X_test,
740                                pred=X_pred,
741                                scoring="mean_absolute_error",
742                                per_series=per_series,
743                            )
744                            mpl = mean_errors(
745                                actual=X_test,
746                                pred=X_pred,
747                                scoring="mean_pinball_loss",
748                                per_series=per_series,
749                            )
750                    else:  # self.h is not None
751                        if (self.replications is not None) or (
752                            self.type_pi == "gaussian"
753                        ):
754
755                            if isinstance(X_test, pd.DataFrame):
756                                X_test_h = X_test.iloc[0: self.h, :]
757                                rmse = mean_errors(
758                                    actual=X_test_h,
759                                    pred=X_pred,
760                                    scoring="root_mean_squared_error",
761                                    per_series=per_series,
762                                )
763                                mae = mean_errors(
764                                    actual=X_test_h,
765                                    pred=X_pred,
766                                    scoring="mean_absolute_error",
767                                    per_series=per_series,
768                                )
769                                mpl = mean_errors(
770                                    actual=X_test_h,
771                                    pred=X_pred,
772                                    scoring="mean_pinball_loss",
773                                    per_series=per_series,
774                                )
775                                winklerscore = winkler_score(
776                                    obj=X_pred,
777                                    actual=X_test_h,
778                                    level=95,
779                                    per_series=per_series,
780                                )
781                                coveragecalc = coverage(
782                                    X_pred,
783                                    X_test_h,
784                                    level=95,
785                                    per_series=per_series,
786                                )
787                            else:
788                                X_test_h = X_test[0: self.h, :]
789                                rmse = mean_errors(
790                                    actual=X_test_h,
791                                    pred=X_pred,
792                                    scoring="root_mean_squared_error",
793                                    per_series=per_series,
794                                )
795                                mae = mean_errors(
796                                    actual=X_test_h,
797                                    pred=X_pred,
798                                    scoring="mean_absolute_error",
799                                    per_series=per_series,
800                                )
801                                mpl = mean_errors(
802                                    actual=X_test_h,
803                                    pred=X_pred,
804                                    scoring="mean_pinball_loss",
805                                    per_series=per_series,
806                                )
807                                winklerscore = winkler_score(
808                                    obj=X_pred,
809                                    actual=X_test_h,
810                                    level=95,
811                                    per_series=per_series,
812                                )
813                                coveragecalc = coverage(
814                                    X_pred,
815                                    X_test_h,
816                                    level=95,
817                                    per_series=per_series,
818                                )
819                        else:  # no prediction interval
820
821                            if isinstance(X_test, pd.DataFrame):
822                                X_test_h = X_test.iloc[0: self.h, :]
823                                rmse = mean_errors(
824                                    actual=X_test_h,
825                                    pred=X_pred,
826                                    scoring="root_mean_squared_error",
827                                    per_series=per_series,
828                                )
829                                mae = mean_errors(
830                                    actual=X_test_h,
831                                    pred=X_pred,
832                                    scoring="mean_absolute_error",
833                                    per_series=per_series,
834                                )
835                                mpl = mean_errors(
836                                    actual=X_test_h,
837                                    pred=X_pred,
838                                    scoring="mean_pinball_loss",
839                                    per_series=per_series,
840                                )
841                            else:
842                                X_test_h = X_test[0: self.h, :]
843                                rmse = mean_errors(
844                                    actual=X_test_h,
845                                    pred=X_pred,
846                                    scoring="root_mean_squared_error",
847                                    per_series=per_series,
848                                )
849                                mae = mean_errors(
850                                    actual=X_test_h,
851                                    pred=X_pred,
852                                    scoring="mean_absolute_error",
853                                    per_series=per_series,
854                                )
855
856                    names.append(name)
857                    RMSE.append(rmse)
858                    MAE.append(mae)
859                    MPL.append(mpl)
860                    if (self.replications is not None) or (
861                        self.type_pi == "gaussian"
862                    ):
863                        WINKLERSCORE.append(winklerscore)
864                        COVERAGE.append(coveragecalc)
865                    TIME.append(time.time() - start)
866
867                    if self.custom_metric is not None:
868                        try:
869                            if self.h is None:
870                                custom_metric = self.custom_metric(
871                                    X_test, X_pred
872                                )
873                            else:
874                                custom_metric = self.custom_metric(
875                                    X_test_h, X_pred
876                                )
877                            CUSTOM_METRIC.append(custom_metric)
878                        except Exception as e:
879                            custom_metric = np.iinfo(np.float32).max
880                            CUSTOM_METRIC.append(np.iinfo(np.float32).max)
881
882                    if self.verbose > 0:
883                        if (self.replications is not None) or (
884                            self.type_pi == "gaussian"
885                        ):
886                            scores_verbose = {
887                                "Model": name,
888                                "RMSE": rmse,
889                                "MAE": mae,
890                                "MPL": mpl,
891                                "WINKLERSCORE": winklerscore,
892                                "COVERAGE": coveragecalc,
893                                "Time taken": time.time() - start,
894                            }
895                        else:
896                            scores_verbose = {
897                                "Model": name,
898                                "RMSE": rmse,
899                                "MAE": mae,
900                                "MPL": mpl,
901                                "Time taken": time.time() - start,
902                            }
903
904                        if self.custom_metric is not None:
905                            scores_verbose["Custom metric"] = custom_metric
906
907                    if self.predictions:
908                        predictions[name] = X_pred
909
910                except Exception as exception:
911                    if self.ignore_warnings is False:
912                        print(name + " model failed to execute")
913                        print(exception)
914
915        if (self.replications is not None) or (self.type_pi == "gaussian"):
916            scores = {
917                "Model": names,
918                "RMSE": RMSE,
919                "MAE": MAE,
920                "MPL": MPL,
921                "WINKLERSCORE": WINKLERSCORE,
922                "COVERAGE": COVERAGE,
923                "Time Taken": TIME,
924            }
925        else:
926            scores = {
927                "Model": names,
928                "RMSE": RMSE,
929                "MAE": MAE,
930                "MPL": MPL,
931                "Time Taken": TIME,
932            }
933
934        if self.custom_metric is not None:
935            scores["Custom metric"] = CUSTOM_METRIC
936
937        if per_series:
938            scores = dict_to_dataframe_series(scores, self.series_names)
939        else:
940            scores = pd.DataFrame(scores)
941
942        try:  # case per_series, can't be sorted
943            scores = scores.sort_values(
944                by=self.sort_by, ascending=True
945            ).set_index("Model")
946
947            self.best_model_ = self.models_[scores.index[0]]
948        except Exception as e:
949            pass
950
951        if self.predictions is True:
952
953            return scores, predictions
954
955        return scores

Fit Regression algorithms to X_train, predict and score on X_test.

Parameters:

X_train: array-like or data frame,
    Training vectors, where rows is the number of samples
    and columns is the number of features.

X_test: array-like or data frame,
    Testing vectors, where rows is the number of samples
    and columns is the number of features.

xreg: array-like, optional (default=None)
    Additional (external) regressors to be passed to self.obj
    xreg must be in 'increasing' order (most recent observations last)

per_series: bool, optional (default=False)
    When set to True, the metrics are computed series by series.

**kwargs: dict, optional (default=None)
    Additional parameters to be passed to `fit` method of `obj`.

Returns:

scores: Pandas DataFrame
    Returns metrics of all the models in a Pandas DataFrame.

predictions: Pandas DataFrame
    Returns predictions of all the models in a Pandas DataFrame.
def provide_models(self, X_train, X_test):
 969    def provide_models(self, X_train, X_test):
 970        """
 971        This function returns all the model objects trained in fit function.
 972        If fit is not called already, then we call fit and then return the models.
 973
 974        Parameters:
 975
 976            X_train : array-like,
 977                Training vectors, where rows is the number of samples
 978                and columns is the number of features.
 979
 980            X_test : array-like,
 981                Testing vectors, where rows is the number of samples
 982                and columns is the number of features.
 983
 984        Returns:
 985
 986            models: dict-object,
 987                Returns a dictionary with each model pipeline as value
 988                with key as name of models.
 989
 990        """
 991        if self.h is None:
 992            if len(self.models_.keys()) == 0:
 993                self.fit(X_train, X_test)
 994        else:
 995            if len(self.models_.keys()) == 0:
 996                if isinstance(X_test, pd.DataFrame):
 997                    self.fit(X_train, X_test.iloc[0: self.h, :])
 998                else:
 999                    self.fit(X_train, X_test[0: self.h, :])
1000
1001        return self.models_

This function returns all the model objects trained in fit function. If fit is not called already, then we call fit and then return the models.

Parameters:

X_train : array-like,
    Training vectors, where rows is the number of samples
    and columns is the number of features.

X_test : array-like,
    Testing vectors, where rows is the number of samples
    and columns is the number of features.

Returns:

models: dict-object,
    Returns a dictionary with each model pipeline as value
    with key as name of models.
class MLARCH(nnetsauce.MTS):
 18class MLARCH(MTS):
 19    """Machine Learning with ARCH effects for time series forecasting
 20
 21    Parameters:
 22
 23        model_mean: object of class nnetsauce.MTS
 24            Model for mean prediction (default: None, uses obj)
 25
 26        model_sigma: object of class nnetsauce.MTS
 27            Model for residuals volatility prediction (default: None, uses obj)
 28
 29        model_residuals: object of class nnetsauce.MTS
 30            Model for residuals prediction (default: None, uses obj)
 31
 32    Examples:
 33
 34        See examples/mlarch.py
 35
 36    """
 37
 38    def __init__(self, model_mean, model_sigma, model_residuals):
 39        assert isinstance(
 40            model_mean, MTS
 41        ), "model_mean must be an object of class nnetsauce.MTS"
 42        assert isinstance(
 43            model_sigma, MTS
 44        ), "model_sigma must be an object of class nnetsauce.MTS"
 45        assert isinstance(
 46            model_residuals, MTS
 47        ), "model_residuals must be an object of class nnetsauce.MTS"
 48        assert (
 49            model_sigma.type_pi.startswith("scp")
 50            and model_sigma.replications is not None
 51        ), "for now, the models must be conformalized, i.e type_pi must start with 'scp' and replications must be an integer"
 52        assert (
 53            model_residuals.type_pi.startswith("scp")
 54            and model_residuals.replications is not None
 55        ), "for now, the models must be conformalized, i.e type_pi must start with 'scp' and replications must be an integer"
 56
 57        self.model_mean = model_mean
 58        self.model_sigma = model_sigma
 59        self.model_residuals = model_residuals
 60
 61        self.mean_residuals_ = None
 62        self.mean_residuals_wilcoxon_test_ = None
 63        self.mean_residuals_kpss_test_ = None
 64        self.standardized_residuals_ = None
 65
 66    def fit(self, y):
 67        """Fit the MLARCH model to the time series data.
 68
 69        Parameters
 70        ----------
 71        y : array-like of shape (n_samples,)
 72            The target time series to be fitted.
 73
 74        Returns
 75        -------
 76        self : object
 77            Returns self.
 78
 79        Notes
 80        -----
 81        This method:
 82
 83        1. Fits the mean model to the time series
 84        2. Performs statistical tests on the residuals (Wilcoxon and KPSS)
 85        3. Fits the volatility model to the squared residuals
 86        4. Computes standardized residuals
 87        5. Fits the residuals model to the standardized residuals
 88        """
 89        n = len(y)
 90        self.model_mean.fit(y.reshape(-1, 1))
 91        # Wilcoxon signed-rank test on residuals (mean = 0)
 92        self.mean_residuals_wilcoxon_test_ = stats.wilcoxon(
 93            self.model_mean.residuals_
 94        )
 95        # KPSS test for stationarity on residuals
 96        self.mean_residuals_kpss_test_ = kpss(
 97            self.model_mean.residuals_, regression="c"
 98        )
 99        self.model_sigma.fit(
100            np.log(self.model_mean.residuals_.reshape(-1, 1) ** 2)
101        )
102        # n//2 here because the model is conformalized
103        fitted_sigma = (
104            self.model_sigma.residuals_
105            + np.log(self.model_mean.residuals_**2)[(n // 2):, :]
106        )
107        # standardized residuals
108        self.standardized_residuals_ = self.model_mean.residuals_[
109            (n // 2):, :
110        ] / np.sqrt(np.exp(fitted_sigma))
111        self.model_residuals.fit(self.standardized_residuals_.reshape(-1, 1))
112
113        # Calculate AIC
114        # Get predictions from all models
115        mean_pred = self.model_mean.predict(h=0).values.ravel()
116        sigma_pred = self.model_sigma.predict(h=0).values.ravel()
117        z_pred = self.model_residuals.predict(h=0).values.ravel()
118
119        # Calculate combined predictions
120        combined_pred = mean_pred + z_pred * np.sqrt(np.exp(sigma_pred))
121
122        # Calculate SSE using the last half of the data (matching standardized_residuals_)
123        y_actual = y[(n // 2):].ravel()
124        self.sse_ = np.sum((y_actual - combined_pred) ** 2)
125
126        # Calculate number of parameters (sum of parameters from all three models)
127        n_params = (
128            self.model_mean.n_hidden_features
129            + 1  # mean model
130            + self.model_sigma.n_hidden_features
131            + 1  # sigma model
132            + self.model_residuals.n_hidden_features
133            + 1
134        )  # residuals model
135
136        # Calculate AIC
137        n_samples = len(y_actual)
138        self.aic_ = n_samples * np.log(self.sse_ / n_samples) + 2 * n_params
139
140        return self
141
142    def predict(self, h=5, level=95):
143        """Predict (probabilistic) future values of the time series.
144
145        Parameters
146        ----------
147        h : int, default=5
148            The forecast horizon.
149        level : int, default=95
150            The confidence level for prediction intervals.
151
152        Returns
153        -------
154        DescribeResult : namedtuple
155            A named tuple containing:
156
157            - mean : array-like of shape (h,)
158                The mean forecast.
159            - sims : array-like of shape (h, n_replications)
160                The simulated forecasts.
161            - lower : array-like of shape (h,)
162                The lower bound of the prediction interval.
163            - upper : array-like of shape (h,)
164                The upper bound of the prediction interval.
165
166        Notes
167        -----
168        This method:
169        1. Generates mean forecasts using the mean model
170        2. Generates standardized residual forecasts using the residuals model
171        3. Generates volatility forecasts using the sigma model
172        4. Combines these forecasts to generate the final predictions
173        5. Computes prediction intervals at the specified confidence level
174        """
175        DescribeResult = namedtuple(
176            "DescribeResult", ("mean", "sims", "lower", "upper")
177        )
178        mean_forecast = self.model_mean.predict(h=h).values.ravel()
179        preds_z = self.model_residuals.predict(h=h)
180        preds_sigma = self.model_sigma.predict(h=h)
181        sims_z = preds_z.sims
182        sims_sigma = preds_sigma.sims
183
184        f = []
185        for i in range(len(sims_z)):
186            f.append(
187                mean_forecast
188                + sims_z[i].values.ravel()
189                * np.sqrt(np.exp(sims_sigma[i].values.ravel()))
190            )
191
192        f = np.asarray(f).T
193        mean_f = np.mean(f, axis=1)
194        alpha = 1 - level / 100
195        lower_bound = np.quantile(f, alpha / 2, axis=1)
196        upper_bound = np.quantile(f, 1 - alpha / 2, axis=1)
197
198        return DescribeResult(mean_f, f, lower_bound, upper_bound)

Machine Learning with ARCH effects for time series forecasting

Parameters:

model_mean: object of class nnetsauce.MTS
    Model for mean prediction (default: None, uses obj)

model_sigma: object of class nnetsauce.MTS
    Model for residuals volatility prediction (default: None, uses obj)

model_residuals: object of class nnetsauce.MTS
    Model for residuals prediction (default: None, uses obj)

Examples:

See examples/mlarch.py
def fit(self, y):
 66    def fit(self, y):
 67        """Fit the MLARCH model to the time series data.
 68
 69        Parameters
 70        ----------
 71        y : array-like of shape (n_samples,)
 72            The target time series to be fitted.
 73
 74        Returns
 75        -------
 76        self : object
 77            Returns self.
 78
 79        Notes
 80        -----
 81        This method:
 82
 83        1. Fits the mean model to the time series
 84        2. Performs statistical tests on the residuals (Wilcoxon and KPSS)
 85        3. Fits the volatility model to the squared residuals
 86        4. Computes standardized residuals
 87        5. Fits the residuals model to the standardized residuals
 88        """
 89        n = len(y)
 90        self.model_mean.fit(y.reshape(-1, 1))
 91        # Wilcoxon signed-rank test on residuals (mean = 0)
 92        self.mean_residuals_wilcoxon_test_ = stats.wilcoxon(
 93            self.model_mean.residuals_
 94        )
 95        # KPSS test for stationarity on residuals
 96        self.mean_residuals_kpss_test_ = kpss(
 97            self.model_mean.residuals_, regression="c"
 98        )
 99        self.model_sigma.fit(
100            np.log(self.model_mean.residuals_.reshape(-1, 1) ** 2)
101        )
102        # n//2 here because the model is conformalized
103        fitted_sigma = (
104            self.model_sigma.residuals_
105            + np.log(self.model_mean.residuals_**2)[(n // 2):, :]
106        )
107        # standardized residuals
108        self.standardized_residuals_ = self.model_mean.residuals_[
109            (n // 2):, :
110        ] / np.sqrt(np.exp(fitted_sigma))
111        self.model_residuals.fit(self.standardized_residuals_.reshape(-1, 1))
112
113        # Calculate AIC
114        # Get predictions from all models
115        mean_pred = self.model_mean.predict(h=0).values.ravel()
116        sigma_pred = self.model_sigma.predict(h=0).values.ravel()
117        z_pred = self.model_residuals.predict(h=0).values.ravel()
118
119        # Calculate combined predictions
120        combined_pred = mean_pred + z_pred * np.sqrt(np.exp(sigma_pred))
121
122        # Calculate SSE using the last half of the data (matching standardized_residuals_)
123        y_actual = y[(n // 2):].ravel()
124        self.sse_ = np.sum((y_actual - combined_pred) ** 2)
125
126        # Calculate number of parameters (sum of parameters from all three models)
127        n_params = (
128            self.model_mean.n_hidden_features
129            + 1  # mean model
130            + self.model_sigma.n_hidden_features
131            + 1  # sigma model
132            + self.model_residuals.n_hidden_features
133            + 1
134        )  # residuals model
135
136        # Calculate AIC
137        n_samples = len(y_actual)
138        self.aic_ = n_samples * np.log(self.sse_ / n_samples) + 2 * n_params
139
140        return self

Fit the MLARCH model to the time series data.

Parameters

y : array-like of shape (n_samples,) The target time series to be fitted.

Returns

self : object Returns self.

Notes

This method:

  1. Fits the mean model to the time series
  2. Performs statistical tests on the residuals (Wilcoxon and KPSS)
  3. Fits the volatility model to the squared residuals
  4. Computes standardized residuals
  5. Fits the residuals model to the standardized residuals
def predict(self, h=5, level=95):
142    def predict(self, h=5, level=95):
143        """Predict (probabilistic) future values of the time series.
144
145        Parameters
146        ----------
147        h : int, default=5
148            The forecast horizon.
149        level : int, default=95
150            The confidence level for prediction intervals.
151
152        Returns
153        -------
154        DescribeResult : namedtuple
155            A named tuple containing:
156
157            - mean : array-like of shape (h,)
158                The mean forecast.
159            - sims : array-like of shape (h, n_replications)
160                The simulated forecasts.
161            - lower : array-like of shape (h,)
162                The lower bound of the prediction interval.
163            - upper : array-like of shape (h,)
164                The upper bound of the prediction interval.
165
166        Notes
167        -----
168        This method:
169        1. Generates mean forecasts using the mean model
170        2. Generates standardized residual forecasts using the residuals model
171        3. Generates volatility forecasts using the sigma model
172        4. Combines these forecasts to generate the final predictions
173        5. Computes prediction intervals at the specified confidence level
174        """
175        DescribeResult = namedtuple(
176            "DescribeResult", ("mean", "sims", "lower", "upper")
177        )
178        mean_forecast = self.model_mean.predict(h=h).values.ravel()
179        preds_z = self.model_residuals.predict(h=h)
180        preds_sigma = self.model_sigma.predict(h=h)
181        sims_z = preds_z.sims
182        sims_sigma = preds_sigma.sims
183
184        f = []
185        for i in range(len(sims_z)):
186            f.append(
187                mean_forecast
188                + sims_z[i].values.ravel()
189                * np.sqrt(np.exp(sims_sigma[i].values.ravel()))
190            )
191
192        f = np.asarray(f).T
193        mean_f = np.mean(f, axis=1)
194        alpha = 1 - level / 100
195        lower_bound = np.quantile(f, alpha / 2, axis=1)
196        upper_bound = np.quantile(f, 1 - alpha / 2, axis=1)
197
198        return DescribeResult(mean_f, f, lower_bound, upper_bound)

Predict (probabilistic) future values of the time series.

Parameters

h : int, default=5 The forecast horizon. level : int, default=95 The confidence level for prediction intervals.

Returns

DescribeResult : namedtuple A named tuple containing:

- mean : array-like of shape (h,)
    The mean forecast.
- sims : array-like of shape (h, n_replications)
    The simulated forecasts.
- lower : array-like of shape (h,)
    The lower bound of the prediction interval.
- upper : array-like of shape (h,)
    The upper bound of the prediction interval.

Notes

This method:

  1. Generates mean forecasts using the mean model
  2. Generates standardized residual forecasts using the residuals model
  3. Generates volatility forecasts using the sigma model
  4. Combines these forecasts to generate the final predictions
  5. Computes prediction intervals at the specified confidence level
class MedianVotingRegressor(sklearn.ensemble._voting.VotingRegressor):
 6class MedianVotingRegressor(VotingRegressor):
 7    def predict(self, X):
 8        """
 9        Predict using the median of the base regressors' predictions.
10
11        Parameters:
12        X (array-like): Feature matrix for predictions.
13
14        Returns:
15        y_pred (array): Median of predictions from the base regressors.
16        """
17        predictions = np.asarray(
18            [regressor.predict(X) for regressor in self.estimators_]
19        )
20        return np.median(predictions, axis=0)

Prediction voting regressor for unfitted estimators.

A voting regressor is an ensemble meta-estimator that fits several base regressors, each on the whole dataset. Then it averages the individual predictions to form a final prediction.

Read more in the :ref:User Guide <voting_regressor>.

New in version 0.21.

Parameters

estimators : list of (str, estimator) tuples Invoking the fit method on the VotingRegressor will fit clones of those original estimators that will be stored in the class attribute self.estimators_. An estimator can be set to 'drop' using set_params().

*Changed in version 0.21:*
``'drop'`` is accepted. Using None was deprecated in 0.22 and
support was removed in 0.24.

weights : array-like of shape (n_regressors,), default=None Sequence of weights (float or int) to weight the occurrences of predicted values before averaging. Uses uniform weights if None.

n_jobs : int, default=None The number of jobs to run in parallel for fit. None means 1 unless in a joblib.parallel_backend context. -1 means using all processors. See :term:Glossary <n_jobs> for more details.

verbose : bool, default=False If True, the time elapsed while fitting will be printed as it is completed.

*New in version 0.23.*

Attributes

estimators_ : list of regressors The collection of fitted sub-estimators as defined in estimators that are not 'drop'.

named_estimators_ : ~sklearn.utils.Bunch Attribute to access any fitted sub-estimators by name.

*New in version 0.20.*

n_features_in_ : int Number of features seen during :term:fit. Only defined if the underlying regressor exposes such an attribute when fit.

*New in version 0.24.*

feature_names_in_ : ndarray of shape (n_features_in_,) Names of features seen during :term:fit. Only defined if the underlying estimators expose such an attribute when fit.

*New in version 1.0.*

See Also

VotingClassifier : Soft Voting/Majority Rule classifier.

Examples

>>> import numpy as np
>>> from sklearn.linear_model import LinearRegression
>>> from sklearn.ensemble import RandomForestRegressor
>>> from sklearn.ensemble import VotingRegressor
>>> from sklearn.neighbors import KNeighborsRegressor
>>> r1 = LinearRegression()
>>> r2 = RandomForestRegressor(n_estimators=10, random_state=1)
>>> r3 = KNeighborsRegressor()
>>> X = np.array([[1, 1], [2, 4], [3, 9], [4, 16], [5, 25], [6, 36]])
>>> y = np.array([2, 6, 12, 20, 30, 42])
>>> er = VotingRegressor([('lr', r1), ('rf', r2), ('r3', r3)])
>>> print(er.fit(X, y).predict(X))
[ 6.8...  8.4... 12.5... 17.8... 26...  34...]

In the following example, we drop the 'lr' estimator with ~VotingRegressor.set_params() and fit the remaining two estimators:

>>> er = er.set_params(lr='drop')
>>> er = er.fit(X, y)
>>> len(er.estimators_)
2
def predict(self, X):
 7    def predict(self, X):
 8        """
 9        Predict using the median of the base regressors' predictions.
10
11        Parameters:
12        X (array-like): Feature matrix for predictions.
13
14        Returns:
15        y_pred (array): Median of predictions from the base regressors.
16        """
17        predictions = np.asarray(
18            [regressor.predict(X) for regressor in self.estimators_]
19        )
20        return np.median(predictions, axis=0)

Predict using the median of the base regressors' predictions.

Parameters: X (array-like): Feature matrix for predictions.

Returns: y_pred (array): Median of predictions from the base regressors.

class MTS(nnetsauce.Base):
  30class MTS(Base):
  31    """Univariate and multivariate time series (MTS) forecasting with Quasi-Randomized networks
  32
  33    Parameters:
  34
  35        obj: object.
  36            any object containing a method fit (obj.fit()) and a method predict
  37            (obj.predict()).
  38
  39        n_hidden_features: int.
  40            number of nodes in the hidden layer.
  41
  42        activation_name: str.
  43            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'.
  44
  45        a: float.
  46            hyperparameter for 'prelu' or 'elu' activation function.
  47
  48        nodes_sim: str.
  49            type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
  50            'uniform'.
  51
  52        bias: boolean.
  53            indicates if the hidden layer contains a bias term (True) or not
  54            (False).
  55
  56        dropout: float.
  57            regularization parameter; (random) percentage of nodes dropped out
  58            of the training.
  59
  60        direct_link: boolean.
  61            indicates if the original predictors are included (True) in model's fitting or not (False).
  62
  63        n_clusters: int.
  64            number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering).
  65
  66        cluster_encode: bool.
  67            defines how the variable containing clusters is treated (default is one-hot)
  68            if `False`, then labels are used, without one-hot encoding.
  69
  70        type_clust: str.
  71            type of clustering method: currently k-means ('kmeans') or Gaussian
  72            Mixture Model ('gmm').
  73
  74        type_scaling: a tuple of 3 strings.
  75            scaling methods for inputs, hidden layer, and clustering respectively
  76            (and when relevant).
  77            Currently available: standardization ('std') or MinMax scaling ('minmax').
  78
  79        lags: int.
  80            number of lags used for each time series.
  81            If string, lags must be one of 'AIC', 'AICc', or 'BIC'.
  82
  83        type_pi: str.
  84            type of prediction interval; currently:
  85            - "gaussian": simple, fast, but: assumes stationarity of Gaussian in-sample residuals and independence in the multivariate case
  86            - "quantile": use model-agnostic quantile regression under the hood
  87            - "kde": based on Kernel Density Estimation of in-sample residuals
  88            - "bootstrap": based on independent bootstrap of in-sample residuals
  89            - "block-bootstrap": based on basic block bootstrap of in-sample residuals
  90            - "scp-kde": Sequential split conformal prediction with Kernel Density Estimation of calibrated residuals
  91            - "scp-bootstrap": Sequential split conformal prediction with independent bootstrap of calibrated residuals
  92            - "scp-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of calibrated residuals
  93            - "scp2-kde": Sequential split conformal prediction with Kernel Density Estimation of standardized calibrated residuals
  94            - "scp2-bootstrap": Sequential split conformal prediction with independent bootstrap of standardized calibrated residuals
  95            - "scp2-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of standardized calibrated residuals
  96            - based on copulas of in-sample residuals: 'vine-tll', 'vine-bb1', 'vine-bb6', 'vine-bb7', 'vine-bb8', 'vine-clayton',
  97            'vine-frank', 'vine-gaussian', 'vine-gumbel', 'vine-indep', 'vine-joe', 'vine-student'
  98            - 'scp-vine-tll', 'scp-vine-bb1', 'scp-vine-bb6', 'scp-vine-bb7', 'scp-vine-bb8', 'scp-vine-clayton',
  99            'scp-vine-frank', 'scp-vine-gaussian', 'scp-vine-gumbel', 'scp-vine-indep', 'scp-vine-joe', 'scp-vine-student'
 100            - 'scp2-vine-tll', 'scp2-vine-bb1', 'scp2-vine-bb6', 'scp2-vine-bb7', 'scp2-vine-bb8', 'scp2-vine-clayton',
 101            'scp2-vine-frank', 'scp2-vine-gaussian', 'scp2-vine-gumbel', 'scp2-vine-indep', 'scp2-vine-joe', 'scp2-vine-student'
 102
 103        level: int.
 104            level of confidence for `type_pi == 'quantile'` (default is `95`)
 105
 106        block_size: int.
 107            size of block for 'type_pi' in ("block-bootstrap", "scp-block-bootstrap", "scp2-block-bootstrap").
 108            Default is round(3.15*(n_residuals^1/3))
 109
 110        replications: int.
 111            number of replications (if needed, for predictive simulation). Default is 'None'.
 112
 113        kernel: str.
 114            the kernel to use for residuals density estimation (used for predictive simulation). Currently, either 'gaussian' or 'tophat'.
 115
 116        agg: str.
 117            either "mean" or "median" for simulation of bootstrap aggregating
 118
 119        seed: int.
 120            reproducibility seed for nodes_sim=='uniform' or predictive simulation.
 121
 122        backend: str.
 123            "cpu" or "gpu" or "tpu".
 124
 125        verbose: int.
 126            0: not printing; 1: printing
 127
 128        show_progress: bool.
 129            True: progress bar when fitting each series; False: no progress bar when fitting each series
 130
 131    Attributes:
 132
 133        fit_objs_: dict
 134            objects adjusted to each individual time series
 135
 136        y_: {array-like}
 137            MTS responses (most recent observations first)
 138
 139        X_: {array-like}
 140            MTS lags
 141
 142        xreg_: {array-like}
 143            external regressors
 144
 145        y_means_: dict
 146            a dictionary of each series mean values
 147
 148        preds_: {array-like}
 149            successive model predictions
 150
 151        preds_std_: {array-like}
 152            standard deviation around the predictions for Bayesian base learners (`obj`)
 153
 154        gaussian_preds_std_: {array-like}
 155            standard deviation around the predictions for `type_pi='gaussian'`
 156
 157        return_std_: boolean
 158            return uncertainty or not (set in predict)
 159
 160        df_: data frame
 161            the input data frame, in case a data.frame is provided to `fit`
 162
 163        n_obs_: int
 164            number of time series observations (number of rows for multivariate)
 165
 166        level_: int
 167            level of confidence for prediction intervals (default is 95)
 168
 169        residuals_: {array-like}
 170            in-sample residuals (for `type_pi` not conformal prediction) or calibrated residuals
 171            (for `type_pi` in conformal prediction)
 172
 173        residuals_sims_: tuple of {array-like}
 174            simulations of in-sample residuals (for `type_pi` not conformal prediction) or
 175            calibrated residuals (for `type_pi` in conformal prediction)
 176
 177        kde_: A scikit-learn object, see https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KernelDensity.html
 178
 179        residuals_std_dev_: residuals standard deviation
 180
 181    Examples:
 182
 183    Example 1:
 184
 185    ```python
 186    import nnetsauce as ns
 187    import numpy as np
 188    from sklearn import linear_model
 189    np.random.seed(123)
 190
 191    M = np.random.rand(10, 3)
 192    M[:,0] = 10*M[:,0]
 193    M[:,2] = 25*M[:,2]
 194    print(M)
 195
 196    # Adjust Bayesian Ridge
 197    regr4 = linear_model.BayesianRidge()
 198    obj_MTS = ns.MTS(regr4, lags = 1, n_hidden_features=5)
 199    obj_MTS.fit(M)
 200    print(obj_MTS.predict())
 201
 202    # with credible intervals
 203    print(obj_MTS.predict(return_std=True, level=80))
 204
 205    print(obj_MTS.predict(return_std=True, level=95))
 206    ```
 207
 208    Example 2:
 209
 210    ```python
 211    import nnetsauce as ns
 212    import numpy as np
 213    from sklearn import linear_model
 214
 215    dataset = {
 216    'date' : ['2001-01-01', '2002-01-01', '2003-01-01', '2004-01-01', '2005-01-01'],
 217    'series1' : [34, 30, 35.6, 33.3, 38.1],
 218    'series2' : [4, 5.5, 5.6, 6.3, 5.1],
 219    'series3' : [100, 100.5, 100.6, 100.2, 100.1]}
 220    df = pd.DataFrame(dataset).set_index('date')
 221    print(df)
 222
 223    # Adjust Bayesian Ridge
 224    regr5 = linear_model.BayesianRidge()
 225    obj_MTS = ns.MTS(regr5, lags = 1, n_hidden_features=5)
 226    obj_MTS.fit(df)
 227    print(obj_MTS.predict())
 228
 229    # with credible intervals
 230    print(obj_MTS.predict(return_std=True, level=80))
 231
 232    print(obj_MTS.predict(return_std=True, level=95))
 233    ```
 234    """
 235
 236    # construct the object -----
 237
 238    def __init__(
 239        self,
 240        obj,
 241        n_hidden_features=5,
 242        activation_name="relu",
 243        a=0.01,
 244        nodes_sim="sobol",
 245        bias=True,
 246        dropout=0,
 247        direct_link=True,
 248        n_clusters=2,
 249        cluster_encode=True,
 250        type_clust="kmeans",
 251        type_scaling=("std", "std", "std"),
 252        lags=1,
 253        type_pi="kde",
 254        level=95,
 255        block_size=None,
 256        replications=None,
 257        kernel="gaussian",
 258        agg="mean",
 259        seed=123,
 260        backend="cpu",
 261        verbose=0,
 262        show_progress=True,
 263    ):
 264
 265        super().__init__(
 266            n_hidden_features=n_hidden_features,
 267            activation_name=activation_name,
 268            a=a,
 269            nodes_sim=nodes_sim,
 270            bias=bias,
 271            dropout=dropout,
 272            direct_link=direct_link,
 273            n_clusters=n_clusters,
 274            cluster_encode=cluster_encode,
 275            type_clust=type_clust,
 276            type_scaling=type_scaling,
 277            seed=seed,
 278            backend=backend,
 279        )
 280
 281        # Add validation for lags parameter
 282        if isinstance(lags, str):
 283            assert lags in (
 284                "AIC",
 285                "AICc",
 286                "BIC",
 287            ), "if string, lags must be one of 'AIC', 'AICc', or 'BIC'"
 288        else:
 289            assert (
 290                int(lags) == lags
 291            ), "if numeric, lags parameter should be an integer"
 292
 293        self.obj = obj
 294        self.n_series = None
 295        self.lags = lags
 296        self.type_pi = type_pi
 297        self.level = level
 298        if self.type_pi == "quantile":
 299            self.obj = QuantileRegressor(
 300                self.obj, level=self.level, scoring="conformal"
 301            )
 302        self.block_size = block_size
 303        self.replications = replications
 304        self.kernel = kernel
 305        self.agg = agg
 306        self.verbose = verbose
 307        self.show_progress = show_progress
 308        self.series_names = None
 309        self.input_dates = None
 310        self.quantiles = None
 311        self.fit_objs_ = {}
 312        self.y_ = None  # MTS responses (most recent observations first)
 313        self.X_ = None  # MTS lags
 314        self.xreg_ = None
 315        self.y_means_ = {}
 316        self.mean_ = None
 317        self.median_ = None
 318        self.upper_ = None
 319        self.lower_ = None
 320        self.output_dates_ = None
 321        self.preds_std_ = []
 322        self.gaussian_preds_std_ = None
 323        self.alpha_ = None
 324        self.return_std_ = None
 325        self.df_ = None
 326        self.residuals_ = []
 327        self.abs_calib_residuals_ = None
 328        self.calib_residuals_quantile_ = None
 329        self.residuals_sims_ = None
 330        self.kde_ = None
 331        self.sims_ = None
 332        self.residuals_std_dev_ = None
 333        self.n_obs_ = None
 334        self.level_ = None
 335        self.init_n_series_ = None
 336
 337    def fit(self, X, xreg=None, **kwargs):
 338        """Fit MTS model to training data X, with optional regressors xreg
 339
 340        Parameters:
 341
 342        X: {array-like}, shape = [n_samples, n_features]
 343            Training time series, where n_samples is the number
 344            of samples and n_features is the number of features;
 345            X must be in increasing order (most recent observations last)
 346
 347        xreg: {array-like}, shape = [n_samples, n_features_xreg]
 348            Additional (external) regressors to be passed to self.obj
 349            xreg must be in 'increasing' order (most recent observations last)
 350
 351        **kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity)
 352
 353        Returns:
 354
 355        self: object
 356        """
 357        try:
 358            self.init_n_series_ = X.shape[1]
 359        except IndexError as e:
 360            self.init_n_series_ = 1
 361
 362        # Automatic lag selection if requested
 363        if isinstance(self.lags, str):
 364            max_lags = min(25, X.shape[0] // 4)
 365            best_ic = float("inf")
 366            best_lags = 1
 367
 368            if self.verbose:
 369                print(
 370                    f"\nSelecting optimal number of lags using {self.lags}..."
 371                )
 372                iterator = tqdm(range(1, max_lags + 1))
 373            else:
 374                iterator = range(1, max_lags + 1)
 375
 376            for lag in iterator:
 377                # Convert DataFrame to numpy array before reversing
 378                if isinstance(X, pd.DataFrame):
 379                    X_values = X.values[::-1]
 380                else:
 381                    X_values = X[::-1]
 382
 383                # Try current lag value
 384                if self.init_n_series_ > 1:
 385                    mts_input = ts.create_train_inputs(X_values, lag)
 386                else:
 387                    mts_input = ts.create_train_inputs(
 388                        X_values.reshape(-1, 1), lag
 389                    )
 390
 391                # Cook training set and fit model
 392                dummy_y, scaled_Z = self.cook_training_set(
 393                    y=np.ones(mts_input[0].shape[0]), X=mts_input[1]
 394                )
 395                residuals_ = []
 396
 397                for i in range(self.init_n_series_):
 398                    y_mean = np.mean(mts_input[0][:, i])
 399                    centered_y_i = mts_input[0][:, i] - y_mean
 400                    self.obj.fit(X=scaled_Z, y=centered_y_i)
 401                    residuals_.append(
 402                        (centered_y_i - self.obj.predict(scaled_Z)).tolist()
 403                    )
 404
 405                self.residuals_ = np.asarray(residuals_).T
 406                ic = self._compute_information_criterion(
 407                    curr_lags=lag, criterion=self.lags
 408                )
 409
 410                if self.verbose:
 411                    print(f"Trying lags={lag}, {self.lags}={ic:.2f}")
 412
 413                if ic < best_ic:
 414                    best_ic = ic
 415                    best_lags = lag
 416
 417            if self.verbose:
 418                print(
 419                    f"\nSelected {best_lags} lags with {self.lags}={best_ic:.2f}"
 420                )
 421
 422            self.lags = best_lags
 423
 424        self.input_dates = None
 425        self.df_ = None
 426
 427        if isinstance(X, pd.DataFrame) is False:
 428            # input data set is a numpy array
 429            if xreg is None:
 430                X = pd.DataFrame(X)
 431                self.series_names = [
 432                    "series" + str(i) for i in range(X.shape[1])
 433                ]
 434            else:
 435                # xreg is not None
 436                X = mo.cbind(X, xreg)
 437                self.xreg_ = xreg
 438
 439        else:  # input data set is a DataFrame with column names
 440
 441            X_index = None
 442            if X.index is not None:
 443                X_index = X.index
 444            if xreg is None:
 445                X = copy.deepcopy(mo.convert_df_to_numeric(X))
 446            else:
 447                X = copy.deepcopy(mo.cbind(mo.convert_df_to_numeric(X), xreg))
 448                self.xreg_ = xreg
 449            if X_index is not None:
 450                X.index = X_index
 451            self.series_names = X.columns.tolist()
 452
 453        if isinstance(X, pd.DataFrame):
 454            if self.df_ is None:
 455                self.df_ = X
 456                X = X.values
 457            else:
 458                input_dates_prev = pd.DatetimeIndex(self.df_.index.values)
 459                frequency = pd.infer_freq(input_dates_prev)
 460                self.df_ = pd.concat([self.df_, X], axis=0)
 461                self.input_dates = pd.date_range(
 462                    start=input_dates_prev[0],
 463                    periods=len(input_dates_prev) + X.shape[0],
 464                    freq=frequency,
 465                ).values.tolist()
 466                self.df_.index = self.input_dates
 467                X = self.df_.values
 468            self.df_.columns = self.series_names
 469        else:
 470            if self.df_ is None:
 471                self.df_ = pd.DataFrame(X, columns=self.series_names)
 472            else:
 473                self.df_ = pd.concat(
 474                    [self.df_, pd.DataFrame(X, columns=self.series_names)],
 475                    axis=0,
 476                )
 477
 478        self.input_dates = ts.compute_input_dates(self.df_)
 479
 480        try:
 481            # multivariate time series
 482            n, p = X.shape
 483        except:
 484            # univariate time series
 485            n = X.shape[0]
 486            p = 1
 487        self.n_obs_ = n
 488
 489        rep_1_n = np.repeat(1, n)
 490
 491        self.y_ = None
 492        self.X_ = None
 493        self.n_series = p
 494        self.fit_objs_.clear()
 495        self.y_means_.clear()
 496        residuals_ = []
 497        self.residuals_ = None
 498        self.residuals_sims_ = None
 499        self.kde_ = None
 500        self.sims_ = None
 501        self.scaled_Z_ = None
 502        self.centered_y_is_ = []
 503
 504        if self.init_n_series_ > 1:
 505            # multivariate time series
 506            mts_input = ts.create_train_inputs(X[::-1], self.lags)
 507        else:
 508            # univariate time series
 509            mts_input = ts.create_train_inputs(
 510                X.reshape(-1, 1)[::-1], self.lags
 511            )
 512
 513        self.y_ = mts_input[0]
 514
 515        self.X_ = mts_input[1]
 516
 517        dummy_y, scaled_Z = self.cook_training_set(y=rep_1_n, X=self.X_)
 518
 519        self.scaled_Z_ = scaled_Z
 520
 521        # loop on all the time series and adjust self.obj.fit
 522        if self.verbose > 0:
 523            print(
 524                f"\n Adjusting {type(self.obj).__name__} to multivariate time series... \n"
 525            )
 526
 527        if self.show_progress is True:
 528            iterator = tqdm(range(self.init_n_series_))
 529        else:
 530            iterator = range(self.init_n_series_)
 531
 532        if self.type_pi in (
 533            "gaussian",
 534            "kde",
 535            "bootstrap",
 536            "block-bootstrap",
 537        ) or self.type_pi.startswith("vine"):
 538            for i in iterator:
 539                y_mean = np.mean(self.y_[:, i])
 540                self.y_means_[i] = y_mean
 541                centered_y_i = self.y_[:, i] - y_mean
 542                self.centered_y_is_.append(centered_y_i)
 543                self.obj.fit(X=scaled_Z, y=centered_y_i)
 544                self.fit_objs_[i] = deepcopy(self.obj)
 545                residuals_.append(
 546                    (
 547                        centered_y_i - self.fit_objs_[i].predict(scaled_Z)
 548                    ).tolist()
 549                )
 550
 551        if self.type_pi == "quantile":
 552            for i in iterator:
 553                y_mean = np.mean(self.y_[:, i])
 554                self.y_means_[i] = y_mean
 555                centered_y_i = self.y_[:, i] - y_mean
 556                self.centered_y_is_.append(centered_y_i)
 557                self.obj.fit(X=scaled_Z, y=centered_y_i)
 558                self.fit_objs_[i] = deepcopy(self.obj)
 559
 560        if self.type_pi.startswith("scp"):
 561            # split conformal prediction
 562            for i in iterator:
 563                n_y = self.y_.shape[0]
 564                n_y_half = n_y // 2
 565                first_half_idx = range(0, n_y_half)
 566                second_half_idx = range(n_y_half, n_y)
 567                y_mean_temp = np.mean(self.y_[first_half_idx, i])
 568                centered_y_i_temp = self.y_[first_half_idx, i] - y_mean_temp
 569                self.obj.fit(X=scaled_Z[first_half_idx, :], y=centered_y_i_temp)
 570                # calibrated residuals actually
 571                residuals_.append(
 572                    (
 573                        self.y_[second_half_idx, i]
 574                        - (
 575                            y_mean_temp
 576                            + self.obj.predict(scaled_Z[second_half_idx, :])
 577                        )
 578                    ).tolist()
 579                )
 580                # fit on the second half
 581                y_mean = np.mean(self.y_[second_half_idx, i])
 582                self.y_means_[i] = y_mean
 583                centered_y_i = self.y_[second_half_idx, i] - y_mean
 584                self.obj.fit(X=scaled_Z[second_half_idx, :], y=centered_y_i)
 585                self.fit_objs_[i] = deepcopy(self.obj)
 586
 587        self.residuals_ = np.asarray(residuals_).T
 588
 589        if self.type_pi == "gaussian":
 590            self.gaussian_preds_std_ = np.std(self.residuals_, axis=0)
 591
 592        if self.type_pi.startswith("scp2"):
 593            # Calculate mean and standard deviation for each column
 594            data_mean = np.mean(self.residuals_, axis=0)
 595            self.residuals_std_dev_ = np.std(self.residuals_, axis=0)
 596            # Center and scale the array using broadcasting
 597            self.residuals_ = (
 598                self.residuals_ - data_mean[np.newaxis, :]
 599            ) / self.residuals_std_dev_[np.newaxis, :]
 600
 601        if self.replications != None and "kde" in self.type_pi:
 602            if self.verbose > 0:
 603                print(f"\n Simulate residuals using {self.kernel} kernel... \n")
 604            assert self.kernel in (
 605                "gaussian",
 606                "tophat",
 607            ), "currently, 'kernel' must be either 'gaussian' or 'tophat'"
 608            kernel_bandwidths = {"bandwidth": np.logspace(-6, 6, 150)}
 609            grid = GridSearchCV(
 610                KernelDensity(kernel=self.kernel, **kwargs),
 611                param_grid=kernel_bandwidths,
 612            )
 613            grid.fit(self.residuals_)
 614
 615            if self.verbose > 0:
 616                print(
 617                    f"\n Best parameters for {self.kernel} kernel: {grid.best_params_} \n"
 618                )
 619
 620            self.kde_ = grid.best_estimator_
 621
 622        return self
 623
 624    def partial_fit(self, X, xreg=None, **kwargs):
 625        """Update the model with new observations X, with optional regressors xreg
 626
 627        Parameters:
 628
 629        X: {array-like}, shape = [n_samples, n_features]
 630            Training time series, where n_samples is the number
 631            of samples and n_features is the number of features;
 632            X must be in increasing order (most recent observations last)
 633
 634        xreg: {array-like}, shape = [n_samples, n_features_xreg]
 635            Additional (external) regressors to be passed to self.obj
 636            xreg must be in 'increasing' order (most recent observations last)
 637
 638        **kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity)
 639
 640        Returns:
 641
 642        self: object
 643        """
 644
 645        assert self.df_ is not None, "fit() must be called before partial_fit()"
 646
 647        if (isinstance(X, pd.DataFrame) is False) and isinstance(
 648            X, pd.Series
 649        ) is False:
 650            if len(X.shape) == 1:
 651                X = X.reshape(1, -1)
 652
 653            return self.fit(X, xreg, **kwargs)
 654
 655        else:
 656            if len(X.shape) == 1:
 657                X = pd.DataFrame(
 658                    X.values.reshape(1, -1), columns=self.df_.columns
 659                )
 660
 661            return self.fit(X, xreg, **kwargs)
 662
 663    def _predict_quantiles(self, h, quantiles, **kwargs):
 664        """Predict arbitrary quantiles from simulated paths."""
 665        # Ensure output dates are set
 666        self.output_dates_, _ = ts.compute_output_dates(self.df_, h)
 667
 668        # Trigger full prediction to generate self.sims_
 669        if not hasattr(self, "sims_") or self.sims_ is None:
 670            _ = self.predict(h=h, level=95, **kwargs)  # Any level triggers sim
 671
 672        result_dict = {}
 673
 674        # Stack simulations: (R, h, n_series)
 675        sims_array = np.stack([sim.values for sim in self.sims_], axis=0)
 676
 677        # Compute quantiles over replication axis
 678        q_values = np.quantile(
 679            sims_array, quantiles, axis=0
 680        )  # (n_q, h, n_series)
 681
 682        for i, q in enumerate(quantiles):
 683            # Clean label: 0.05 → "05", 0.1 → "10", 0.95 → "95"
 684            q_label = (
 685                f"{int(q * 100):02d}"
 686                if (q * 100).is_integer()
 687                else f"{q:.3f}".replace(".", "_")
 688            )
 689            for series_id in range(self.init_n_series_):
 690                series_name = self.series_names[series_id]
 691                col_name = f"quantile_{q_label}_{series_name}"
 692                result_dict[col_name] = q_values[i, :, series_id]
 693
 694        df_return_quantiles = pd.DataFrame(
 695            result_dict, index=self.output_dates_
 696        )
 697
 698        return df_return_quantiles
 699
 700    def predict(self, h=5, level=95, quantiles=None, **kwargs):
 701        """Forecast all the time series, h steps ahead"""
 702
 703        if quantiles is not None:
 704            # Validate
 705            quantiles = np.asarray(quantiles)
 706            if not ((quantiles > 0) & (quantiles < 1)).all():
 707                raise ValueError("quantiles must be between 0 and 1.")
 708            # Delegate to dedicated method
 709            return self._predict_quantiles(h=h, quantiles=quantiles, **kwargs)
 710
 711        if isinstance(level, list) or isinstance(level, np.ndarray):
 712            # Store results
 713            result_dict = {}
 714            # Loop through alphas and calculate lower/upper for each alpha level
 715            # E.g [0.5, 2.5, 5, 16.5, 25, 50]
 716            for lev in level:
 717                # Get the forecast for this alpha
 718                res = self.predict(h=h, level=lev, **kwargs)
 719                # Adjust index and collect lower/upper bounds
 720                res.lower.index = pd.to_datetime(res.lower.index)
 721                res.upper.index = pd.to_datetime(res.upper.index)
 722                # Loop over each time series (multivariate) and flatten results
 723                if isinstance(res.lower, pd.DataFrame):
 724                    for (
 725                        series
 726                    ) in (
 727                        res.lower.columns
 728                    ):  # Assumes 'lower' and 'upper' have multiple series
 729                        result_dict[f"lower_{lev}_{series}"] = (
 730                            res.lower[series].to_numpy().flatten()
 731                        )
 732                        result_dict[f"upper_{lev}_{series}"] = (
 733                            res.upper[series].to_numpy().flatten()
 734                        )
 735                else:
 736                    for series_id in range(
 737                        self.n_series
 738                    ):  # Assumes 'lower' and 'upper' have multiple series
 739                        result_dict[f"lower_{lev}_{series_id}"] = (
 740                            res.lower[series_id, :].to_numpy().flatten()
 741                        )
 742                        result_dict[f"upper_{lev}_{series_id}"] = (
 743                            res.upper[series_id, :].to_numpy().flatten()
 744                        )
 745            return pd.DataFrame(result_dict, index=self.output_dates_)
 746
 747        # only one prediction interval
 748        self.output_dates_, frequency = ts.compute_output_dates(self.df_, h)
 749
 750        self.level_ = level
 751
 752        self.return_std_ = False  # do not remove (/!\)
 753
 754        self.mean_ = None  # do not remove (/!\)
 755
 756        self.mean_ = deepcopy(self.y_)  # do not remove (/!\)
 757
 758        self.lower_ = None  # do not remove (/!\)
 759
 760        self.upper_ = None  # do not remove (/!\)
 761
 762        self.sims_ = None  # do not remove (/!\)
 763
 764        y_means_ = np.asarray(
 765            [self.y_means_[i] for i in range(self.init_n_series_)]
 766        )
 767
 768        n_features = self.init_n_series_ * self.lags
 769
 770        self.alpha_ = 100 - level
 771
 772        pi_multiplier = norm.ppf(1 - self.alpha_ / 200)
 773
 774        if "return_std" in kwargs:  # bayesian forecasting
 775            self.return_std_ = True
 776            self.preds_std_ = []
 777            DescribeResult = namedtuple(
 778                "DescribeResult", ("mean", "lower", "upper")
 779            )  # to be updated
 780
 781        if "return_pi" in kwargs:  # split conformal, without simulation
 782            mean_pi_ = []
 783            lower_pi_ = []
 784            upper_pi_ = []
 785            median_pi_ = []
 786            DescribeResult = namedtuple(
 787                "DescribeResult", ("mean", "lower", "upper")
 788            )  # to be updated
 789
 790        if self.kde_ != None and "kde" in self.type_pi:  # kde
 791            target_cols = self.df_.columns[
 792                : self.init_n_series_
 793            ]  # Get target column names
 794            if self.verbose == 1:
 795                self.residuals_sims_ = tuple(
 796                    self.kde_.sample(
 797                        n_samples=h, random_state=self.seed + 100 * i
 798                    )  # Keep full sample
 799                    for i in tqdm(range(self.replications))
 800                )
 801            elif self.verbose == 0:
 802                self.residuals_sims_ = tuple(
 803                    self.kde_.sample(
 804                        n_samples=h, random_state=self.seed + 100 * i
 805                    )  # Keep full sample
 806                    for i in range(self.replications)
 807                )
 808
 809            # Convert to DataFrames after sampling
 810            self.residuals_sims_ = tuple(
 811                pd.DataFrame(
 812                    sim,  # Keep all columns
 813                    columns=target_cols,  # Use original target column names
 814                    index=self.output_dates_,
 815                )
 816                for sim in self.residuals_sims_
 817            )
 818
 819        if self.type_pi in ("bootstrap", "scp-bootstrap", "scp2-bootstrap"):
 820            assert self.replications is not None and isinstance(
 821                self.replications, int
 822            ), "'replications' must be provided and be an integer"
 823            if self.verbose == 1:
 824                self.residuals_sims_ = tuple(
 825                    ts.bootstrap(
 826                        self.residuals_,
 827                        h=h,
 828                        block_size=None,
 829                        seed=self.seed + 100 * i,
 830                    )
 831                    for i in tqdm(range(self.replications))
 832                )
 833            elif self.verbose == 0:
 834                self.residuals_sims_ = tuple(
 835                    ts.bootstrap(
 836                        self.residuals_,
 837                        h=h,
 838                        block_size=None,
 839                        seed=self.seed + 100 * i,
 840                    )
 841                    for i in range(self.replications)
 842                )
 843
 844        if self.type_pi in (
 845            "block-bootstrap",
 846            "scp-block-bootstrap",
 847            "scp2-block-bootstrap",
 848        ):
 849            if self.block_size is None:
 850                self.block_size = int(
 851                    np.ceil(3.15 * (self.residuals_.shape[0] ** (1 / 3)))
 852                )
 853
 854            assert self.replications is not None and isinstance(
 855                self.replications, int
 856            ), "'replications' must be provided and be an integer"
 857            if self.verbose == 1:
 858                self.residuals_sims_ = tuple(
 859                    ts.bootstrap(
 860                        self.residuals_,
 861                        h=h,
 862                        block_size=self.block_size,
 863                        seed=self.seed + 100 * i,
 864                    )
 865                    for i in tqdm(range(self.replications))
 866                )
 867            elif self.verbose == 0:
 868                self.residuals_sims_ = tuple(
 869                    ts.bootstrap(
 870                        self.residuals_,
 871                        h=h,
 872                        block_size=self.block_size,
 873                        seed=self.seed + 100 * i,
 874                    )
 875                    for i in range(self.replications)
 876                )
 877
 878        if "vine" in self.type_pi:
 879            if self.verbose == 1:
 880                self.residuals_sims_ = tuple(
 881                    vinecopula_sample(
 882                        x=self.residuals_,
 883                        n_samples=h,
 884                        method=self.type_pi,
 885                        random_state=self.seed + 100 * i,
 886                    )
 887                    for i in tqdm(range(self.replications))
 888                )
 889            elif self.verbose == 0:
 890                self.residuals_sims_ = tuple(
 891                    vinecopula_sample(
 892                        x=self.residuals_,
 893                        n_samples=h,
 894                        method=self.type_pi,
 895                        random_state=self.seed + 100 * i,
 896                    )
 897                    for i in range(self.replications)
 898                )
 899
 900        mean_ = deepcopy(self.mean_)
 901
 902        for i in range(h):
 903
 904            new_obs = ts.reformat_response(mean_, self.lags)
 905            new_X = new_obs.reshape(1, -1)
 906            cooked_new_X = self.cook_test_set(new_X, **kwargs)
 907
 908            if "return_std" in kwargs:
 909                self.preds_std_.append(
 910                    [
 911                        np.asarray(
 912                            self.fit_objs_[i].predict(
 913                                cooked_new_X, return_std=True
 914                            )[1]
 915                        ).item()
 916                        for i in range(self.n_series)
 917                    ]
 918                )
 919
 920            if "return_pi" in kwargs:
 921                for i in range(self.n_series):
 922                    preds_pi = self.fit_objs_[i].predict(cooked_new_X, **kwargs)
 923                    mean_pi_.append(preds_pi.mean[0])
 924                    lower_pi_.append(preds_pi.lower[0])
 925                    upper_pi_.append(preds_pi.upper[0])
 926
 927            if self.type_pi != "quantile":
 928                predicted_cooked_new_X = np.asarray(
 929                    [
 930                        np.asarray(
 931                            self.fit_objs_[i].predict(cooked_new_X)
 932                        ).item()
 933                        for i in range(self.init_n_series_)
 934                    ]
 935                )
 936            else:
 937                predicted_cooked_new_X = np.asarray(
 938                    [
 939                        np.asarray(
 940                            self.fit_objs_[i]
 941                            .predict(cooked_new_X, return_pi=True)
 942                            .upper
 943                        ).item()
 944                        for i in range(self.init_n_series_)
 945                    ]
 946                )
 947
 948            preds = np.asarray(y_means_ + predicted_cooked_new_X)
 949
 950            # Create full row with both predictions and external regressors
 951            if self.xreg_ is not None and "xreg" in kwargs:
 952                next_xreg = kwargs["xreg"].iloc[i: i + 1].values.flatten()
 953                full_row = np.concatenate([preds, next_xreg])
 954            else:
 955                full_row = preds
 956
 957            # Create a new row with same number of columns as mean_
 958            new_row = np.zeros((1, mean_.shape[1]))
 959            new_row[0, : full_row.shape[0]] = full_row
 960
 961            # Maintain the full dimensionality by using vstack instead of rbind
 962            mean_ = np.vstack([new_row, mean_[:-1]])
 963
 964        # Final output should only include the target columns
 965        self.mean_ = pd.DataFrame(
 966            mean_[0:h, : self.init_n_series_][::-1],
 967            columns=self.df_.columns[: self.init_n_series_],
 968            index=self.output_dates_,
 969        )
 970
 971        # function's return ----------------------------------------------------------------------
 972        if (
 973            (("return_std" not in kwargs) and ("return_pi" not in kwargs))
 974            and (self.type_pi not in ("gaussian", "scp"))
 975        ) or ("vine" in self.type_pi):
 976
 977            if self.replications is None:
 978                return self.mean_.iloc[:, : self.init_n_series_]
 979
 980            # if "return_std" not in kwargs and self.replications is not None
 981            meanf = []
 982            medianf = []
 983            lower = []
 984            upper = []
 985
 986            if "scp2" in self.type_pi:
 987
 988                if self.verbose == 1:
 989                    self.sims_ = tuple(
 990                        (
 991                            self.mean_
 992                            + self.residuals_sims_[i]
 993                            * self.residuals_std_dev_[np.newaxis, :]
 994                            for i in tqdm(range(self.replications))
 995                        )
 996                    )
 997                elif self.verbose == 0:
 998                    self.sims_ = tuple(
 999                        (
1000                            self.mean_
1001                            + self.residuals_sims_[i]
1002                            * self.residuals_std_dev_[np.newaxis, :]
1003                            for i in range(self.replications)
1004                        )
1005                    )
1006            else:
1007
1008                if self.verbose == 1:
1009                    self.sims_ = tuple(
1010                        (
1011                            self.mean_ + self.residuals_sims_[i]
1012                            for i in tqdm(range(self.replications))
1013                        )
1014                    )
1015                elif self.verbose == 0:
1016                    self.sims_ = tuple(
1017                        (
1018                            self.mean_ + self.residuals_sims_[i]
1019                            for i in range(self.replications)
1020                        )
1021                    )
1022
1023            DescribeResult = namedtuple(
1024                "DescribeResult", ("mean", "sims", "lower", "upper")
1025            )
1026            for ix in range(self.init_n_series_):
1027                sims_ix = getsims(self.sims_, ix)
1028                if self.agg == "mean":
1029                    meanf.append(np.mean(sims_ix, axis=1))
1030                else:
1031                    medianf.append(np.median(sims_ix, axis=1))
1032                lower.append(np.quantile(sims_ix, q=self.alpha_ / 200, axis=1))
1033                upper.append(
1034                    np.quantile(sims_ix, q=1 - self.alpha_ / 200, axis=1)
1035                )
1036            self.mean_ = pd.DataFrame(
1037                np.asarray(meanf).T,
1038                columns=self.series_names[
1039                    : self.init_n_series_
1040                ],  # self.df_.columns,
1041                index=self.output_dates_,
1042            )
1043
1044            self.lower_ = pd.DataFrame(
1045                np.asarray(lower).T,
1046                columns=self.series_names[
1047                    : self.init_n_series_
1048                ],  # self.df_.columns,
1049                index=self.output_dates_,
1050            )
1051
1052            self.upper_ = pd.DataFrame(
1053                np.asarray(upper).T,
1054                columns=self.series_names[
1055                    : self.init_n_series_
1056                ],  # self.df_.columns,
1057                index=self.output_dates_,
1058            )
1059
1060            try:
1061                self.median_ = pd.DataFrame(
1062                    np.asarray(medianf).T,
1063                    columns=self.series_names[
1064                        : self.init_n_series_
1065                    ],  # self.df_.columns,
1066                    index=self.output_dates_,
1067                )
1068            except Exception as e:
1069                pass
1070
1071            return DescribeResult(
1072                self.mean_, self.sims_, self.lower_, self.upper_
1073            )
1074
1075        if (
1076            (("return_std" in kwargs) or ("return_pi" in kwargs))
1077            and (self.type_pi not in ("gaussian", "scp"))
1078        ) or "vine" in self.type_pi:
1079            DescribeResult = namedtuple(
1080                "DescribeResult", ("mean", "lower", "upper")
1081            )
1082
1083            self.mean_ = pd.DataFrame(
1084                np.asarray(self.mean_),
1085                columns=self.series_names,  # self.df_.columns,
1086                index=self.output_dates_,
1087            )
1088
1089            if "return_std" in kwargs:
1090
1091                self.preds_std_ = np.asarray(self.preds_std_)
1092
1093                self.lower_ = pd.DataFrame(
1094                    self.mean_.values - pi_multiplier * self.preds_std_,
1095                    columns=self.series_names,  # self.df_.columns,
1096                    index=self.output_dates_,
1097                )
1098
1099                self.upper_ = pd.DataFrame(
1100                    self.mean_.values + pi_multiplier * self.preds_std_,
1101                    columns=self.series_names,  # self.df_.columns,
1102                    index=self.output_dates_,
1103                )
1104
1105            if "return_pi" in kwargs:
1106
1107                self.lower_ = pd.DataFrame(
1108                    np.asarray(lower_pi_).reshape(h, self.n_series)
1109                    + y_means_[np.newaxis, :],
1110                    columns=self.series_names,  # self.df_.columns,
1111                    index=self.output_dates_,
1112                )
1113
1114                self.upper_ = pd.DataFrame(
1115                    np.asarray(upper_pi_).reshape(h, self.n_series)
1116                    + y_means_[np.newaxis, :],
1117                    columns=self.series_names,  # self.df_.columns,
1118                    index=self.output_dates_,
1119                )
1120
1121            res = DescribeResult(self.mean_, self.lower_, self.upper_)
1122
1123            if self.xreg_ is not None:
1124                if len(self.xreg_.shape) > 1:
1125                    res2 = mx.tuple_map(
1126                        res,
1127                        lambda x: mo.delete_last_columns(
1128                            x, num_columns=self.xreg_.shape[1]
1129                        ),
1130                    )
1131                else:
1132                    res2 = mx.tuple_map(
1133                        res, lambda x: mo.delete_last_columns(x, num_columns=1)
1134                    )
1135                return DescribeResult(res2[0], res2[1], res2[2])
1136
1137            return res
1138
1139        if self.type_pi == "gaussian":
1140
1141            DescribeResult = namedtuple(
1142                "DescribeResult", ("mean", "lower", "upper")
1143            )
1144
1145            self.mean_ = pd.DataFrame(
1146                np.asarray(self.mean_),
1147                columns=self.series_names,  # self.df_.columns,
1148                index=self.output_dates_,
1149            )
1150
1151            self.lower_ = pd.DataFrame(
1152                self.mean_.values - pi_multiplier * self.gaussian_preds_std_,
1153                columns=self.series_names,  # self.df_.columns,
1154                index=self.output_dates_,
1155            )
1156
1157            self.upper_ = pd.DataFrame(
1158                self.mean_.values + pi_multiplier * self.gaussian_preds_std_,
1159                columns=self.series_names,  # self.df_.columns,
1160                index=self.output_dates_,
1161            )
1162
1163            res = DescribeResult(self.mean_, self.lower_, self.upper_)
1164
1165            if self.xreg_ is not None:
1166                if len(self.xreg_.shape) > 1:
1167                    res2 = mx.tuple_map(
1168                        res,
1169                        lambda x: mo.delete_last_columns(
1170                            x, num_columns=self.xreg_.shape[1]
1171                        ),
1172                    )
1173                else:
1174                    res2 = mx.tuple_map(
1175                        res, lambda x: mo.delete_last_columns(x, num_columns=1)
1176                    )
1177                return DescribeResult(res2[0], res2[1], res2[2])
1178
1179            return res
1180
1181        if self.type_pi == "quantile":
1182
1183            DescribeResult = namedtuple("DescribeResult", ("mean"))
1184
1185            self.mean_ = pd.DataFrame(
1186                np.asarray(self.mean_),
1187                columns=self.series_names,  # self.df_.columns,
1188                index=self.output_dates_,
1189            )
1190
1191            res = DescribeResult(self.mean_)
1192
1193            if self.xreg_ is not None:
1194                if len(self.xreg_.shape) > 1:
1195                    res2 = mx.tuple_map(
1196                        res,
1197                        lambda x: mo.delete_last_columns(
1198                            x, num_columns=self.xreg_.shape[1]
1199                        ),
1200                    )
1201                else:
1202                    res2 = mx.tuple_map(
1203                        res, lambda x: mo.delete_last_columns(x, num_columns=1)
1204                    )
1205                return DescribeResult(res2[0])
1206
1207            return res
1208
1209        # After prediction loop, ensure sims only contain target columns
1210        if self.sims_ is not None:
1211            if self.verbose == 1:
1212                self.sims_ = tuple(
1213                    sim[:h,]  # Only keep target columns and h rows
1214                    for sim in tqdm(self.sims_)
1215                )
1216            elif self.verbose == 0:
1217                self.sims_ = tuple(
1218                    sim[:h,]  # Only keep target columns and h rows
1219                    for sim in self.sims_
1220                )
1221
1222            # Convert numpy arrays to DataFrames with proper columns
1223            self.sims_ = tuple(
1224                pd.DataFrame(
1225                    sim,
1226                    columns=self.df_.columns[: self.init_n_series_],
1227                    index=self.output_dates_,
1228                )
1229                for sim in self.sims_
1230            )
1231
1232        if self.type_pi in (
1233            "kde",
1234            "bootstrap",
1235            "block-bootstrap",
1236            "vine-copula",
1237        ):
1238            if self.xreg_ is not None:
1239                # Use getsimsxreg when external regressors are present
1240                target_cols = self.df_.columns[: self.init_n_series_]
1241                self.sims_ = getsimsxreg(
1242                    self.sims_, self.output_dates_, target_cols
1243                )
1244            else:
1245                # Use original getsims for backward compatibility
1246                self.sims_ = getsims(self.sims_)
1247
1248    def _crps_ensemble(self, y_true, simulations, axis=0):
1249        """
1250        Compute the Continuous Ranked Probability Score (CRPS) for an ensemble of simulations.
1251
1252        The CRPS is a measure of the distance between the cumulative distribution
1253        function (CDF) of a forecast and the CDF of the observed value. This method
1254        computes the CRPS in a vectorized form for an ensemble of simulations, efficiently
1255        handling the case where there is only one simulation.
1256
1257        Parameters
1258        ----------
1259        y_true : array_like, shape (n,)
1260            A 1D array of true values (observations).
1261            Each element represents the true value for a given sample.
1262
1263        simulations : array_like, shape (n, R)
1264            A 2D array of simulated values. Each row corresponds to a different sample
1265            and each column corresponds to a different simulation of that sample.
1266
1267        axis : int, optional, default=0
1268            Axis along which to transpose the simulations if needed.
1269            If axis=0, the simulations are transposed to shape (R, n).
1270
1271        Returns
1272        -------
1273        crps : ndarray, shape (n,)
1274            A 1D array of CRPS scores, one for each sample.
1275
1276        Notes
1277        -----
1278        The CRPS score is computed as:
1279
1280        CRPS(y_true, simulations) = E[|X - y|] - 0.5 * E[|X - X'|]
1281
1282        Where:
1283        - `X` is the ensemble of simulations.
1284        - `y` is the true value.
1285        - `X'` is a second independent sample from the ensemble.
1286
1287        The calculation is vectorized to optimize performance for large datasets.
1288
1289        The edge case where `R=1` (only one simulation) is handled by returning
1290        only `term1` (i.e., no ensemble spread).
1291        """
1292        sims = np.asarray(simulations)  # Convert simulations to numpy array
1293        if axis == 0:
1294            sims = sims.T  # Transpose if the axis is 0
1295        n, R = sims.shape  # n = number of samples, R = number of simulations
1296        # Term 1: E|X - y|, average absolute difference between simulations and true value
1297        term1 = np.mean(np.abs(sims - y_true[:, np.newaxis]), axis=1)
1298        # Handle edge case: if R == 1, return term1 (no spread in ensemble)
1299        if R == 1:
1300            return term1
1301        # Term 2: 0.5 * E|X - X'|, using efficient sorted formula
1302        sims_sorted = np.sort(sims, axis=1)  # Sort simulations along each row
1303        # Correct coefficients for efficient calculation
1304        j = np.arange(R)  # 0-indexed positions in the sorted simulations
1305        coefficients = (2 * (j + 1) - R - 1) / (
1306            R * (R - 1)
1307        )  # Efficient coefficient calculation
1308        # Dot product along the second axis (over the simulations)
1309        term2 = np.dot(sims_sorted, coefficients)
1310        # Return CRPS score: term1 - 0.5 * term2
1311        return term1 - 0.5 * term2
1312
1313    def score(
1314        self,
1315        X,
1316        training_index,
1317        testing_index,
1318        scoring=None,
1319        alpha=0.5,
1320        **kwargs,
1321    ):
1322        """Train on training_index, score on testing_index."""
1323
1324        assert (
1325            bool(set(training_index).intersection(set(testing_index))) == False
1326        ), "Non-overlapping 'training_index' and 'testing_index' required"
1327
1328        # Dimensions
1329        try:
1330            # multivariate time series
1331            n, p = X.shape
1332        except:
1333            # univariate time series
1334            n = X.shape[0]
1335            p = 1
1336
1337        # Training and testing sets
1338        if p > 1:
1339            X_train = X[training_index, :]
1340            X_test = X[testing_index, :]
1341        else:
1342            X_train = X[training_index]
1343            X_test = X[testing_index]
1344
1345        # Horizon
1346        h = len(testing_index)
1347        assert (
1348            len(training_index) + h
1349        ) <= n, "Please check lengths of training and testing windows"
1350
1351        # Fit and predict
1352        self.fit(X_train, **kwargs)
1353        preds = self.predict(h=h, **kwargs)
1354
1355        if scoring is None:
1356            scoring = "neg_root_mean_squared_error"
1357
1358        if scoring == "pinball":
1359            # Predict requested quantile
1360            q_pred = self.predict(h=h, quantiles=[alpha], **kwargs)
1361            # Handle multivariate
1362            scores = []
1363            for j in range(p):
1364                series_name = getattr(self, "series_names", [f"Series_{j}"])[j]
1365                q_label = (
1366                    f"{int(alpha * 100):02d}"
1367                    if (alpha * 100).is_integer()
1368                    else f"{alpha:.3f}".replace(".", "_")
1369                )
1370                col = f"quantile_{q_label}_{series_name}"
1371                if col not in q_pred.columns:
1372                    raise ValueError(
1373                        f"Column '{col}' not found in quantile forecast output."
1374                    )
1375                y_true_j = X_test[:, j]
1376                y_pred_j = q_pred[col].values
1377                # Compute pinball loss for this series
1378                loss = mean_pinball_loss(y_true_j, y_pred_j, alpha=alpha)
1379                scores.append(loss)
1380            # Return average over series
1381            return np.mean(scores)
1382
1383        if scoring == "crps":
1384            # Ensure simulations exist
1385            preds = self.predict(h=h, **kwargs)  # triggers self.sims_
1386            # Extract simulations: list of DataFrames → (R, h, p)
1387            sims_vals = np.stack(
1388                [sim.values for sim in self.sims_], axis=0
1389            )  # (R, h, p)
1390            crps_scores = []
1391            for j in range(p):
1392                y_true_j = X_test[:, j]
1393                sims_j = sims_vals[:, :, j]  # (R, h)
1394                crps_j = self._crps_ensemble(np.asarray(y_true_j), sims_j)
1395                crps_scores.append(np.mean(crps_j))  # average over horizon
1396            return np.mean(crps_scores)  # average over series
1397
1398        # check inputs
1399        assert scoring in (
1400            "explained_variance",
1401            "neg_mean_absolute_error",
1402            "neg_mean_squared_error",
1403            "neg_root_mean_squared_error",
1404            "neg_mean_squared_log_error",
1405            "neg_median_absolute_error",
1406            "r2",
1407        ), "'scoring' should be in ('explained_variance', 'neg_mean_absolute_error', \
1408                               'neg_mean_squared_error', 'neg_root_mean_squared_error', 'neg_mean_squared_log_error', \
1409                               'neg_median_absolute_error', 'r2')"
1410
1411        scoring_options = {
1412            "explained_variance": skm2.explained_variance_score,
1413            "neg_mean_absolute_error": skm2.mean_absolute_error,
1414            "neg_mean_squared_error": lambda x, y: np.mean((x - y) ** 2),
1415            "neg_root_mean_squared_error": lambda x, y: np.sqrt(
1416                np.mean((x - y) ** 2)
1417            ),
1418            "neg_mean_squared_log_error": skm2.mean_squared_log_error,
1419            "neg_median_absolute_error": skm2.median_absolute_error,
1420            "r2": skm2.r2_score,
1421        }
1422
1423        return scoring_options[scoring](X_test, preds)
1424
1425    def plot(self, series=None, type_axis="dates", type_plot="pi"):
1426        """Plot time series forecast
1427
1428        Parameters:
1429
1430        series: {integer} or {string}
1431            series index or name
1432
1433        """
1434
1435        assert all(
1436            [
1437                self.mean_ is not None,
1438                self.lower_ is not None,
1439                self.upper_ is not None,
1440                self.output_dates_ is not None,
1441            ]
1442        ), "model forecasting must be obtained first (with predict)"
1443
1444        if series is None:
1445            # assert (
1446            #    self.init_n_series_ == 1
1447            # ), "please specify series index or name (n_series > 1)"
1448            series = 0
1449
1450        if isinstance(series, str):
1451            assert (
1452                series in self.series_names
1453            ), f"series {series} doesn't exist in the input dataset"
1454            series_idx = self.df_.columns.get_loc(series)
1455        else:
1456            assert isinstance(series, int) and (
1457                0 <= series < self.n_series
1458            ), f"check series index (< {self.n_series})"
1459            series_idx = series
1460
1461        y_all = list(self.df_.iloc[:, series_idx]) + list(
1462            self.mean_.iloc[:, series_idx]
1463        )
1464        y_test = list(self.mean_.iloc[:, series_idx])
1465        n_points_all = len(y_all)
1466        n_points_train = self.df_.shape[0]
1467
1468        if type_axis == "numeric":
1469            x_all = [i for i in range(n_points_all)]
1470            x_test = [i for i in range(n_points_train, n_points_all)]
1471
1472        if type_axis == "dates":  # use dates
1473            x_all = np.concatenate(
1474                (self.input_dates.values, self.output_dates_.values), axis=None
1475            )
1476            x_test = self.output_dates_.values
1477
1478        if type_plot == "pi":
1479            fig, ax = plt.subplots()
1480            ax.plot(x_all, y_all, "-")
1481            ax.plot(x_test, y_test, "-", color="orange")
1482            ax.fill_between(
1483                x_test,
1484                self.lower_.iloc[:, series_idx],
1485                self.upper_.iloc[:, series_idx],
1486                alpha=0.2,
1487                color="orange",
1488            )
1489            if self.replications is None:
1490                if self.n_series > 1:
1491                    plt.title(
1492                        f"prediction intervals for {series}",
1493                        loc="left",
1494                        fontsize=12,
1495                        fontweight=0,
1496                        color="black",
1497                    )
1498                else:
1499                    plt.title(
1500                        f"prediction intervals for input time series",
1501                        loc="left",
1502                        fontsize=12,
1503                        fontweight=0,
1504                        color="black",
1505                    )
1506                plt.show()
1507            else:  # self.replications is not None
1508                if self.n_series > 1:
1509                    plt.title(
1510                        f"prediction intervals for {self.replications} simulations of {series}",
1511                        loc="left",
1512                        fontsize=12,
1513                        fontweight=0,
1514                        color="black",
1515                    )
1516                else:
1517                    plt.title(
1518                        f"prediction intervals for {self.replications} simulations of input time series",
1519                        loc="left",
1520                        fontsize=12,
1521                        fontweight=0,
1522                        color="black",
1523                    )
1524                plt.show()
1525
1526        if type_plot == "spaghetti":
1527            palette = plt.get_cmap("Set1")
1528            sims_ix = getsims(self.sims_, series_idx)
1529            plt.plot(x_all, y_all, "-")
1530            for col_ix in range(
1531                sims_ix.shape[1]
1532            ):  # avoid this when there are thousands of simulations
1533                plt.plot(
1534                    x_test,
1535                    sims_ix[:, col_ix],
1536                    "-",
1537                    color=palette(col_ix),
1538                    linewidth=1,
1539                    alpha=0.9,
1540                )
1541            plt.plot(x_all, y_all, "-", color="black")
1542            plt.plot(x_test, y_test, "-", color="blue")
1543            # Add titles
1544            if self.n_series > 1:
1545                plt.title(
1546                    f"{self.replications} simulations of {series}",
1547                    loc="left",
1548                    fontsize=12,
1549                    fontweight=0,
1550                    color="black",
1551                )
1552            else:
1553                plt.title(
1554                    f"{self.replications} simulations of input time series",
1555                    loc="left",
1556                    fontsize=12,
1557                    fontweight=0,
1558                    color="black",
1559                )
1560            plt.xlabel("Time")
1561            plt.ylabel("Values")
1562            # Show the graph
1563            plt.show()
1564
1565    def cross_val_score(
1566        self,
1567        X,
1568        scoring="root_mean_squared_error",
1569        n_jobs=None,
1570        verbose=0,
1571        xreg=None,
1572        initial_window=5,
1573        horizon=3,
1574        fixed_window=False,
1575        show_progress=True,
1576        level=95,
1577        alpha=0.5,
1578        **kwargs,
1579    ):
1580        """Evaluate a score by time series cross-validation.
1581
1582        Parameters:
1583
1584            X: {array-like, sparse matrix} of shape (n_samples, n_features)
1585                The data to fit.
1586
1587            scoring: str or a function
1588                A str in ('root_mean_squared_error', 'mean_squared_error', 'mean_error',
1589                'mean_absolute_error', 'mean_error', 'mean_percentage_error',
1590                'mean_absolute_percentage_error',  'winkler_score', 'coverage')
1591                Or a function defined as 'coverage' and 'winkler_score' in `utils.timeseries`
1592
1593            n_jobs: int, default=None
1594                Number of jobs to run in parallel.
1595
1596            verbose: int, default=0
1597                The verbosity level.
1598
1599            xreg: array-like, optional (default=None)
1600                Additional (external) regressors to be passed to `fit`
1601                xreg must be in 'increasing' order (most recent observations last)
1602
1603            initial_window: int
1604                initial number of consecutive values in each training set sample
1605
1606            horizon: int
1607                number of consecutive values in test set sample
1608
1609            fixed_window: boolean
1610                if False, all training samples start at index 0, and the training
1611                window's size is increasing.
1612                if True, the training window's size is fixed, and the window is
1613                rolling forward
1614
1615            show_progress: boolean
1616                if True, a progress bar is printed
1617
1618            level: int
1619                confidence level for prediction intervals
1620
1621            alpha: float
1622                quantile level for pinball loss if scoring='pinball'
1623                0 < alpha < 1
1624
1625            **kwargs: dict
1626                additional parameters to be passed to `fit` and `predict`
1627
1628        Returns:
1629
1630            A tuple: descriptive statistics or errors and raw errors
1631
1632        """
1633        tscv = TimeSeriesSplit()
1634
1635        tscv_obj = tscv.split(
1636            X,
1637            initial_window=initial_window,
1638            horizon=horizon,
1639            fixed_window=fixed_window,
1640        )
1641
1642        if isinstance(scoring, str):
1643
1644            assert scoring in (
1645                "pinball",
1646                "crps",
1647                "root_mean_squared_error",
1648                "mean_squared_error",
1649                "mean_error",
1650                "mean_absolute_error",
1651                "mean_percentage_error",
1652                "mean_absolute_percentage_error",
1653                "winkler_score",
1654                "coverage",
1655            ), "must have scoring in ('pinball', 'crps', 'root_mean_squared_error', 'mean_squared_error', 'mean_error', 'mean_absolute_error', 'mean_error', 'mean_percentage_error', 'mean_absolute_percentage_error',  'winkler_score', 'coverage')"
1656
1657            def err_func(X_test, X_pred, scoring, alpha=0.5):
1658                if (self.replications is not None) or (
1659                    self.type_pi == "gaussian"
1660                ):  # probabilistic
1661                    if scoring == "pinball":
1662                        # Predict requested quantile
1663                        q_pred = self.predict(
1664                            h=len(X_test), quantiles=[alpha], **kwargs
1665                        )
1666                        # Handle multivariate
1667                        scores = []
1668                        p = X_test.shape[1] if len(X_test.shape) > 1 else 1
1669                        for j in range(p):
1670                            series_name = getattr(
1671                                self, "series_names", [f"Series_{j}"]
1672                            )[j]
1673                            q_label = (
1674                                f"{int(alpha * 100):02d}"
1675                                if (alpha * 100).is_integer()
1676                                else f"{alpha:.3f}".replace(".", "_")
1677                            )
1678                            col = f"quantile_{q_label}_{series_name}"
1679                            if col not in q_pred.columns:
1680                                raise ValueError(
1681                                    f"Column '{col}' not found in quantile forecast output."
1682                                )
1683                            try:
1684                                y_true_j = X_test[:, j] if p > 1 else X_test
1685                            except:
1686                                y_true_j = (
1687                                    X_test.iloc[:, j]
1688                                    if p > 1
1689                                    else X_test.values
1690                                )
1691                            y_pred_j = q_pred[col].values
1692                            # Compute pinball loss for this series
1693                            loss = mean_pinball_loss(
1694                                y_true_j, y_pred_j, alpha=alpha
1695                            )
1696                            scores.append(loss)
1697                        # Return average over series
1698                        return np.mean(scores)
1699                    elif scoring == "crps":
1700                        # Ensure simulations exist
1701                        _ = self.predict(
1702                            h=len(X_test), **kwargs
1703                        )  # triggers self.sims_
1704                        # Extract simulations: list of DataFrames → (R, h, p)
1705                        sims_vals = np.stack(
1706                            [sim.values for sim in self.sims_], axis=0
1707                        )  # (R, h, p)
1708                        crps_scores = []
1709                        p = X_test.shape[1] if len(X_test.shape) > 1 else 1
1710                        for j in range(p):
1711                            try:
1712                                y_true_j = X_test[:, j] if p > 1 else X_test
1713                            except Exception as e:
1714                                y_true_j = (
1715                                    X_test.iloc[:, j]
1716                                    if p > 1
1717                                    else X_test.values
1718                                )
1719                            sims_j = sims_vals[:, :, j]  # (R, h)
1720                            crps_j = self._crps_ensemble(
1721                                np.asarray(y_true_j), sims_j
1722                            )
1723                            crps_scores.append(
1724                                np.mean(crps_j)
1725                            )  # average over horizon
1726                        return np.mean(crps_scores)  # average over series
1727                    if scoring == "winkler_score":
1728                        return winkler_score(X_pred, X_test, level=level)
1729                    elif scoring == "coverage":
1730                        return coverage(X_pred, X_test, level=level)
1731                    else:
1732                        return mean_errors(
1733                            pred=X_pred.mean, actual=X_test, scoring=scoring
1734                        )
1735                else:  # not probabilistic
1736                    return mean_errors(
1737                        pred=X_pred, actual=X_test, scoring=scoring
1738                    )
1739
1740        else:  # isinstance(scoring, str) = False
1741
1742            err_func = scoring
1743
1744        errors = []
1745
1746        train_indices = []
1747
1748        test_indices = []
1749
1750        for train_index, test_index in tscv_obj:
1751            train_indices.append(train_index)
1752            test_indices.append(test_index)
1753
1754        if show_progress is True:
1755            iterator = tqdm(
1756                zip(train_indices, test_indices), total=len(train_indices)
1757            )
1758        else:
1759            iterator = zip(train_indices, test_indices)
1760
1761        for train_index, test_index in iterator:
1762
1763            if verbose == 1:
1764                print(f"TRAIN: {train_index}")
1765                print(f"TEST: {test_index}")
1766
1767            if isinstance(X, pd.DataFrame):
1768                self.fit(X.iloc[train_index, :], xreg=xreg, **kwargs)
1769                X_test = X.iloc[test_index, :]
1770            else:
1771                self.fit(X[train_index, :], xreg=xreg, **kwargs)
1772                X_test = X[test_index, :]
1773            X_pred = self.predict(h=int(len(test_index)), level=level, **kwargs)
1774
1775            errors.append(err_func(X_test, X_pred, scoring, alpha=alpha))
1776
1777        res = np.asarray(errors)
1778
1779        return res, describe(res)
1780
1781    def _compute_information_criterion(self, curr_lags, criterion="AIC"):
1782        """Compute information criterion using existing residuals
1783
1784        Parameters
1785        ----------
1786        curr_lags : int
1787            Current number of lags being evaluated
1788        criterion : str
1789            One of 'AIC', 'AICc', or 'BIC'
1790
1791        Returns
1792        -------
1793        float
1794            Information criterion value or inf if parameters exceed observations
1795        """
1796        # Get dimensions
1797        n_obs = self.residuals_.shape[0]
1798        n_features = int(self.init_n_series_ * curr_lags)
1799        n_hidden = int(self.n_hidden_features)
1800        # Calculate number of parameters
1801        term1 = int(n_features * n_hidden)
1802        term2 = int(n_hidden * self.init_n_series_)
1803        n_params = term1 + term2
1804        # Check if we have enough observations for the number of parameters
1805        if n_obs <= n_params + 1:
1806            return float("inf")  # Return infinity if too many parameters
1807        # Compute RSS using existing residuals
1808        rss = np.sum(self.residuals_**2)
1809        # Compute criterion
1810        if criterion == "AIC":
1811            ic = n_obs * np.log(rss / n_obs) + 2 * n_params
1812        elif criterion == "AICc":
1813            ic = n_obs * np.log(rss / n_obs) + 2 * n_params * (
1814                n_obs / (n_obs - n_params - 1)
1815            )
1816        else:  # BIC
1817            ic = n_obs * np.log(rss / n_obs) + n_params * np.log(n_obs)
1818
1819        return ic

Univariate and multivariate time series (MTS) forecasting with Quasi-Randomized networks

Parameters:

obj: object.
    any object containing a method fit (obj.fit()) and a method predict
    (obj.predict()).

n_hidden_features: int.
    number of nodes in the hidden layer.

activation_name: str.
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'.

a: float.
    hyperparameter for 'prelu' or 'elu' activation function.

nodes_sim: str.
    type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
    'uniform'.

bias: boolean.
    indicates if the hidden layer contains a bias term (True) or not
    (False).

dropout: float.
    regularization parameter; (random) percentage of nodes dropped out
    of the training.

direct_link: boolean.
    indicates if the original predictors are included (True) in model's fitting or not (False).

n_clusters: int.
    number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering).

cluster_encode: bool.
    defines how the variable containing clusters is treated (default is one-hot)
    if `False`, then labels are used, without one-hot encoding.

type_clust: str.
    type of clustering method: currently k-means ('kmeans') or Gaussian
    Mixture Model ('gmm').

type_scaling: a tuple of 3 strings.
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax').

lags: int.
    number of lags used for each time series.
    If string, lags must be one of 'AIC', 'AICc', or 'BIC'.

type_pi: str.
    type of prediction interval; currently:
    - "gaussian": simple, fast, but: assumes stationarity of Gaussian in-sample residuals and independence in the multivariate case
    - "quantile": use model-agnostic quantile regression under the hood
    - "kde": based on Kernel Density Estimation of in-sample residuals
    - "bootstrap": based on independent bootstrap of in-sample residuals
    - "block-bootstrap": based on basic block bootstrap of in-sample residuals
    - "scp-kde": Sequential split conformal prediction with Kernel Density Estimation of calibrated residuals
    - "scp-bootstrap": Sequential split conformal prediction with independent bootstrap of calibrated residuals
    - "scp-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of calibrated residuals
    - "scp2-kde": Sequential split conformal prediction with Kernel Density Estimation of standardized calibrated residuals
    - "scp2-bootstrap": Sequential split conformal prediction with independent bootstrap of standardized calibrated residuals
    - "scp2-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of standardized calibrated residuals
    - based on copulas of in-sample residuals: 'vine-tll', 'vine-bb1', 'vine-bb6', 'vine-bb7', 'vine-bb8', 'vine-clayton',
    'vine-frank', 'vine-gaussian', 'vine-gumbel', 'vine-indep', 'vine-joe', 'vine-student'
    - 'scp-vine-tll', 'scp-vine-bb1', 'scp-vine-bb6', 'scp-vine-bb7', 'scp-vine-bb8', 'scp-vine-clayton',
    'scp-vine-frank', 'scp-vine-gaussian', 'scp-vine-gumbel', 'scp-vine-indep', 'scp-vine-joe', 'scp-vine-student'
    - 'scp2-vine-tll', 'scp2-vine-bb1', 'scp2-vine-bb6', 'scp2-vine-bb7', 'scp2-vine-bb8', 'scp2-vine-clayton',
    'scp2-vine-frank', 'scp2-vine-gaussian', 'scp2-vine-gumbel', 'scp2-vine-indep', 'scp2-vine-joe', 'scp2-vine-student'

level: int.
    level of confidence for `type_pi == 'quantile'` (default is `95`)

block_size: int.
    size of block for 'type_pi' in ("block-bootstrap", "scp-block-bootstrap", "scp2-block-bootstrap").
    Default is round(3.15*(n_residuals^1/3))

replications: int.
    number of replications (if needed, for predictive simulation). Default is 'None'.

kernel: str.
    the kernel to use for residuals density estimation (used for predictive simulation). Currently, either 'gaussian' or 'tophat'.

agg: str.
    either "mean" or "median" for simulation of bootstrap aggregating

seed: int.
    reproducibility seed for nodes_sim=='uniform' or predictive simulation.

backend: str.
    "cpu" or "gpu" or "tpu".

verbose: int.
    0: not printing; 1: printing

show_progress: bool.
    True: progress bar when fitting each series; False: no progress bar when fitting each series

Attributes:

fit_objs_: dict
    objects adjusted to each individual time series

y_: {array-like}
    MTS responses (most recent observations first)

X_: {array-like}
    MTS lags

xreg_: {array-like}
    external regressors

y_means_: dict
    a dictionary of each series mean values

preds_: {array-like}
    successive model predictions

preds_std_: {array-like}
    standard deviation around the predictions for Bayesian base learners (`obj`)

gaussian_preds_std_: {array-like}
    standard deviation around the predictions for `type_pi='gaussian'`

return_std_: boolean
    return uncertainty or not (set in predict)

df_: data frame
    the input data frame, in case a data.frame is provided to `fit`

n_obs_: int
    number of time series observations (number of rows for multivariate)

level_: int
    level of confidence for prediction intervals (default is 95)

residuals_: {array-like}
    in-sample residuals (for `type_pi` not conformal prediction) or calibrated residuals
    (for `type_pi` in conformal prediction)

residuals_sims_: tuple of {array-like}
    simulations of in-sample residuals (for `type_pi` not conformal prediction) or
    calibrated residuals (for `type_pi` in conformal prediction)

kde_: A scikit-learn object, see https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KernelDensity.html

residuals_std_dev_: residuals standard deviation

Examples:

Example 1:

import nnetsauce as ns
import numpy as np
from sklearn import linear_model
np.random.seed(123)

M = np.random.rand(10, 3)
M[:,0] = 10*M[:,0]
M[:,2] = 25*M[:,2]
print(M)

# Adjust Bayesian Ridge
regr4 = linear_model.BayesianRidge()
obj_MTS = ns.MTS(regr4, lags = 1, n_hidden_features=5)
obj_MTS.fit(M)
print(obj_MTS.predict())

# with credible intervals
print(obj_MTS.predict(return_std=True, level=80))

print(obj_MTS.predict(return_std=True, level=95))

Example 2:

import nnetsauce as ns
import numpy as np
from sklearn import linear_model

dataset = {
'date' : ['2001-01-01', '2002-01-01', '2003-01-01', '2004-01-01', '2005-01-01'],
'series1' : [34, 30, 35.6, 33.3, 38.1],
'series2' : [4, 5.5, 5.6, 6.3, 5.1],
'series3' : [100, 100.5, 100.6, 100.2, 100.1]}
df = pd.DataFrame(dataset).set_index('date')
print(df)

# Adjust Bayesian Ridge
regr5 = linear_model.BayesianRidge()
obj_MTS = ns.MTS(regr5, lags = 1, n_hidden_features=5)
obj_MTS.fit(df)
print(obj_MTS.predict())

# with credible intervals
print(obj_MTS.predict(return_std=True, level=80))

print(obj_MTS.predict(return_std=True, level=95))
def fit(self, X, xreg=None, **kwargs):
337    def fit(self, X, xreg=None, **kwargs):
338        """Fit MTS model to training data X, with optional regressors xreg
339
340        Parameters:
341
342        X: {array-like}, shape = [n_samples, n_features]
343            Training time series, where n_samples is the number
344            of samples and n_features is the number of features;
345            X must be in increasing order (most recent observations last)
346
347        xreg: {array-like}, shape = [n_samples, n_features_xreg]
348            Additional (external) regressors to be passed to self.obj
349            xreg must be in 'increasing' order (most recent observations last)
350
351        **kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity)
352
353        Returns:
354
355        self: object
356        """
357        try:
358            self.init_n_series_ = X.shape[1]
359        except IndexError as e:
360            self.init_n_series_ = 1
361
362        # Automatic lag selection if requested
363        if isinstance(self.lags, str):
364            max_lags = min(25, X.shape[0] // 4)
365            best_ic = float("inf")
366            best_lags = 1
367
368            if self.verbose:
369                print(
370                    f"\nSelecting optimal number of lags using {self.lags}..."
371                )
372                iterator = tqdm(range(1, max_lags + 1))
373            else:
374                iterator = range(1, max_lags + 1)
375
376            for lag in iterator:
377                # Convert DataFrame to numpy array before reversing
378                if isinstance(X, pd.DataFrame):
379                    X_values = X.values[::-1]
380                else:
381                    X_values = X[::-1]
382
383                # Try current lag value
384                if self.init_n_series_ > 1:
385                    mts_input = ts.create_train_inputs(X_values, lag)
386                else:
387                    mts_input = ts.create_train_inputs(
388                        X_values.reshape(-1, 1), lag
389                    )
390
391                # Cook training set and fit model
392                dummy_y, scaled_Z = self.cook_training_set(
393                    y=np.ones(mts_input[0].shape[0]), X=mts_input[1]
394                )
395                residuals_ = []
396
397                for i in range(self.init_n_series_):
398                    y_mean = np.mean(mts_input[0][:, i])
399                    centered_y_i = mts_input[0][:, i] - y_mean
400                    self.obj.fit(X=scaled_Z, y=centered_y_i)
401                    residuals_.append(
402                        (centered_y_i - self.obj.predict(scaled_Z)).tolist()
403                    )
404
405                self.residuals_ = np.asarray(residuals_).T
406                ic = self._compute_information_criterion(
407                    curr_lags=lag, criterion=self.lags
408                )
409
410                if self.verbose:
411                    print(f"Trying lags={lag}, {self.lags}={ic:.2f}")
412
413                if ic < best_ic:
414                    best_ic = ic
415                    best_lags = lag
416
417            if self.verbose:
418                print(
419                    f"\nSelected {best_lags} lags with {self.lags}={best_ic:.2f}"
420                )
421
422            self.lags = best_lags
423
424        self.input_dates = None
425        self.df_ = None
426
427        if isinstance(X, pd.DataFrame) is False:
428            # input data set is a numpy array
429            if xreg is None:
430                X = pd.DataFrame(X)
431                self.series_names = [
432                    "series" + str(i) for i in range(X.shape[1])
433                ]
434            else:
435                # xreg is not None
436                X = mo.cbind(X, xreg)
437                self.xreg_ = xreg
438
439        else:  # input data set is a DataFrame with column names
440
441            X_index = None
442            if X.index is not None:
443                X_index = X.index
444            if xreg is None:
445                X = copy.deepcopy(mo.convert_df_to_numeric(X))
446            else:
447                X = copy.deepcopy(mo.cbind(mo.convert_df_to_numeric(X), xreg))
448                self.xreg_ = xreg
449            if X_index is not None:
450                X.index = X_index
451            self.series_names = X.columns.tolist()
452
453        if isinstance(X, pd.DataFrame):
454            if self.df_ is None:
455                self.df_ = X
456                X = X.values
457            else:
458                input_dates_prev = pd.DatetimeIndex(self.df_.index.values)
459                frequency = pd.infer_freq(input_dates_prev)
460                self.df_ = pd.concat([self.df_, X], axis=0)
461                self.input_dates = pd.date_range(
462                    start=input_dates_prev[0],
463                    periods=len(input_dates_prev) + X.shape[0],
464                    freq=frequency,
465                ).values.tolist()
466                self.df_.index = self.input_dates
467                X = self.df_.values
468            self.df_.columns = self.series_names
469        else:
470            if self.df_ is None:
471                self.df_ = pd.DataFrame(X, columns=self.series_names)
472            else:
473                self.df_ = pd.concat(
474                    [self.df_, pd.DataFrame(X, columns=self.series_names)],
475                    axis=0,
476                )
477
478        self.input_dates = ts.compute_input_dates(self.df_)
479
480        try:
481            # multivariate time series
482            n, p = X.shape
483        except:
484            # univariate time series
485            n = X.shape[0]
486            p = 1
487        self.n_obs_ = n
488
489        rep_1_n = np.repeat(1, n)
490
491        self.y_ = None
492        self.X_ = None
493        self.n_series = p
494        self.fit_objs_.clear()
495        self.y_means_.clear()
496        residuals_ = []
497        self.residuals_ = None
498        self.residuals_sims_ = None
499        self.kde_ = None
500        self.sims_ = None
501        self.scaled_Z_ = None
502        self.centered_y_is_ = []
503
504        if self.init_n_series_ > 1:
505            # multivariate time series
506            mts_input = ts.create_train_inputs(X[::-1], self.lags)
507        else:
508            # univariate time series
509            mts_input = ts.create_train_inputs(
510                X.reshape(-1, 1)[::-1], self.lags
511            )
512
513        self.y_ = mts_input[0]
514
515        self.X_ = mts_input[1]
516
517        dummy_y, scaled_Z = self.cook_training_set(y=rep_1_n, X=self.X_)
518
519        self.scaled_Z_ = scaled_Z
520
521        # loop on all the time series and adjust self.obj.fit
522        if self.verbose > 0:
523            print(
524                f"\n Adjusting {type(self.obj).__name__} to multivariate time series... \n"
525            )
526
527        if self.show_progress is True:
528            iterator = tqdm(range(self.init_n_series_))
529        else:
530            iterator = range(self.init_n_series_)
531
532        if self.type_pi in (
533            "gaussian",
534            "kde",
535            "bootstrap",
536            "block-bootstrap",
537        ) or self.type_pi.startswith("vine"):
538            for i in iterator:
539                y_mean = np.mean(self.y_[:, i])
540                self.y_means_[i] = y_mean
541                centered_y_i = self.y_[:, i] - y_mean
542                self.centered_y_is_.append(centered_y_i)
543                self.obj.fit(X=scaled_Z, y=centered_y_i)
544                self.fit_objs_[i] = deepcopy(self.obj)
545                residuals_.append(
546                    (
547                        centered_y_i - self.fit_objs_[i].predict(scaled_Z)
548                    ).tolist()
549                )
550
551        if self.type_pi == "quantile":
552            for i in iterator:
553                y_mean = np.mean(self.y_[:, i])
554                self.y_means_[i] = y_mean
555                centered_y_i = self.y_[:, i] - y_mean
556                self.centered_y_is_.append(centered_y_i)
557                self.obj.fit(X=scaled_Z, y=centered_y_i)
558                self.fit_objs_[i] = deepcopy(self.obj)
559
560        if self.type_pi.startswith("scp"):
561            # split conformal prediction
562            for i in iterator:
563                n_y = self.y_.shape[0]
564                n_y_half = n_y // 2
565                first_half_idx = range(0, n_y_half)
566                second_half_idx = range(n_y_half, n_y)
567                y_mean_temp = np.mean(self.y_[first_half_idx, i])
568                centered_y_i_temp = self.y_[first_half_idx, i] - y_mean_temp
569                self.obj.fit(X=scaled_Z[first_half_idx, :], y=centered_y_i_temp)
570                # calibrated residuals actually
571                residuals_.append(
572                    (
573                        self.y_[second_half_idx, i]
574                        - (
575                            y_mean_temp
576                            + self.obj.predict(scaled_Z[second_half_idx, :])
577                        )
578                    ).tolist()
579                )
580                # fit on the second half
581                y_mean = np.mean(self.y_[second_half_idx, i])
582                self.y_means_[i] = y_mean
583                centered_y_i = self.y_[second_half_idx, i] - y_mean
584                self.obj.fit(X=scaled_Z[second_half_idx, :], y=centered_y_i)
585                self.fit_objs_[i] = deepcopy(self.obj)
586
587        self.residuals_ = np.asarray(residuals_).T
588
589        if self.type_pi == "gaussian":
590            self.gaussian_preds_std_ = np.std(self.residuals_, axis=0)
591
592        if self.type_pi.startswith("scp2"):
593            # Calculate mean and standard deviation for each column
594            data_mean = np.mean(self.residuals_, axis=0)
595            self.residuals_std_dev_ = np.std(self.residuals_, axis=0)
596            # Center and scale the array using broadcasting
597            self.residuals_ = (
598                self.residuals_ - data_mean[np.newaxis, :]
599            ) / self.residuals_std_dev_[np.newaxis, :]
600
601        if self.replications != None and "kde" in self.type_pi:
602            if self.verbose > 0:
603                print(f"\n Simulate residuals using {self.kernel} kernel... \n")
604            assert self.kernel in (
605                "gaussian",
606                "tophat",
607            ), "currently, 'kernel' must be either 'gaussian' or 'tophat'"
608            kernel_bandwidths = {"bandwidth": np.logspace(-6, 6, 150)}
609            grid = GridSearchCV(
610                KernelDensity(kernel=self.kernel, **kwargs),
611                param_grid=kernel_bandwidths,
612            )
613            grid.fit(self.residuals_)
614
615            if self.verbose > 0:
616                print(
617                    f"\n Best parameters for {self.kernel} kernel: {grid.best_params_} \n"
618                )
619
620            self.kde_ = grid.best_estimator_
621
622        return self

Fit MTS model to training data X, with optional regressors xreg

Parameters:

X: {array-like}, shape = [n_samples, n_features] Training time series, where n_samples is the number of samples and n_features is the number of features; X must be in increasing order (most recent observations last)

xreg: {array-like}, shape = [n_samples, n_features_xreg] Additional (external) regressors to be passed to self.obj xreg must be in 'increasing' order (most recent observations last)

**kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity)

Returns:

self: object

def predict(self, h=5, level=95, quantiles=None, **kwargs):
 700    def predict(self, h=5, level=95, quantiles=None, **kwargs):
 701        """Forecast all the time series, h steps ahead"""
 702
 703        if quantiles is not None:
 704            # Validate
 705            quantiles = np.asarray(quantiles)
 706            if not ((quantiles > 0) & (quantiles < 1)).all():
 707                raise ValueError("quantiles must be between 0 and 1.")
 708            # Delegate to dedicated method
 709            return self._predict_quantiles(h=h, quantiles=quantiles, **kwargs)
 710
 711        if isinstance(level, list) or isinstance(level, np.ndarray):
 712            # Store results
 713            result_dict = {}
 714            # Loop through alphas and calculate lower/upper for each alpha level
 715            # E.g [0.5, 2.5, 5, 16.5, 25, 50]
 716            for lev in level:
 717                # Get the forecast for this alpha
 718                res = self.predict(h=h, level=lev, **kwargs)
 719                # Adjust index and collect lower/upper bounds
 720                res.lower.index = pd.to_datetime(res.lower.index)
 721                res.upper.index = pd.to_datetime(res.upper.index)
 722                # Loop over each time series (multivariate) and flatten results
 723                if isinstance(res.lower, pd.DataFrame):
 724                    for (
 725                        series
 726                    ) in (
 727                        res.lower.columns
 728                    ):  # Assumes 'lower' and 'upper' have multiple series
 729                        result_dict[f"lower_{lev}_{series}"] = (
 730                            res.lower[series].to_numpy().flatten()
 731                        )
 732                        result_dict[f"upper_{lev}_{series}"] = (
 733                            res.upper[series].to_numpy().flatten()
 734                        )
 735                else:
 736                    for series_id in range(
 737                        self.n_series
 738                    ):  # Assumes 'lower' and 'upper' have multiple series
 739                        result_dict[f"lower_{lev}_{series_id}"] = (
 740                            res.lower[series_id, :].to_numpy().flatten()
 741                        )
 742                        result_dict[f"upper_{lev}_{series_id}"] = (
 743                            res.upper[series_id, :].to_numpy().flatten()
 744                        )
 745            return pd.DataFrame(result_dict, index=self.output_dates_)
 746
 747        # only one prediction interval
 748        self.output_dates_, frequency = ts.compute_output_dates(self.df_, h)
 749
 750        self.level_ = level
 751
 752        self.return_std_ = False  # do not remove (/!\)
 753
 754        self.mean_ = None  # do not remove (/!\)
 755
 756        self.mean_ = deepcopy(self.y_)  # do not remove (/!\)
 757
 758        self.lower_ = None  # do not remove (/!\)
 759
 760        self.upper_ = None  # do not remove (/!\)
 761
 762        self.sims_ = None  # do not remove (/!\)
 763
 764        y_means_ = np.asarray(
 765            [self.y_means_[i] for i in range(self.init_n_series_)]
 766        )
 767
 768        n_features = self.init_n_series_ * self.lags
 769
 770        self.alpha_ = 100 - level
 771
 772        pi_multiplier = norm.ppf(1 - self.alpha_ / 200)
 773
 774        if "return_std" in kwargs:  # bayesian forecasting
 775            self.return_std_ = True
 776            self.preds_std_ = []
 777            DescribeResult = namedtuple(
 778                "DescribeResult", ("mean", "lower", "upper")
 779            )  # to be updated
 780
 781        if "return_pi" in kwargs:  # split conformal, without simulation
 782            mean_pi_ = []
 783            lower_pi_ = []
 784            upper_pi_ = []
 785            median_pi_ = []
 786            DescribeResult = namedtuple(
 787                "DescribeResult", ("mean", "lower", "upper")
 788            )  # to be updated
 789
 790        if self.kde_ != None and "kde" in self.type_pi:  # kde
 791            target_cols = self.df_.columns[
 792                : self.init_n_series_
 793            ]  # Get target column names
 794            if self.verbose == 1:
 795                self.residuals_sims_ = tuple(
 796                    self.kde_.sample(
 797                        n_samples=h, random_state=self.seed + 100 * i
 798                    )  # Keep full sample
 799                    for i in tqdm(range(self.replications))
 800                )
 801            elif self.verbose == 0:
 802                self.residuals_sims_ = tuple(
 803                    self.kde_.sample(
 804                        n_samples=h, random_state=self.seed + 100 * i
 805                    )  # Keep full sample
 806                    for i in range(self.replications)
 807                )
 808
 809            # Convert to DataFrames after sampling
 810            self.residuals_sims_ = tuple(
 811                pd.DataFrame(
 812                    sim,  # Keep all columns
 813                    columns=target_cols,  # Use original target column names
 814                    index=self.output_dates_,
 815                )
 816                for sim in self.residuals_sims_
 817            )
 818
 819        if self.type_pi in ("bootstrap", "scp-bootstrap", "scp2-bootstrap"):
 820            assert self.replications is not None and isinstance(
 821                self.replications, int
 822            ), "'replications' must be provided and be an integer"
 823            if self.verbose == 1:
 824                self.residuals_sims_ = tuple(
 825                    ts.bootstrap(
 826                        self.residuals_,
 827                        h=h,
 828                        block_size=None,
 829                        seed=self.seed + 100 * i,
 830                    )
 831                    for i in tqdm(range(self.replications))
 832                )
 833            elif self.verbose == 0:
 834                self.residuals_sims_ = tuple(
 835                    ts.bootstrap(
 836                        self.residuals_,
 837                        h=h,
 838                        block_size=None,
 839                        seed=self.seed + 100 * i,
 840                    )
 841                    for i in range(self.replications)
 842                )
 843
 844        if self.type_pi in (
 845            "block-bootstrap",
 846            "scp-block-bootstrap",
 847            "scp2-block-bootstrap",
 848        ):
 849            if self.block_size is None:
 850                self.block_size = int(
 851                    np.ceil(3.15 * (self.residuals_.shape[0] ** (1 / 3)))
 852                )
 853
 854            assert self.replications is not None and isinstance(
 855                self.replications, int
 856            ), "'replications' must be provided and be an integer"
 857            if self.verbose == 1:
 858                self.residuals_sims_ = tuple(
 859                    ts.bootstrap(
 860                        self.residuals_,
 861                        h=h,
 862                        block_size=self.block_size,
 863                        seed=self.seed + 100 * i,
 864                    )
 865                    for i in tqdm(range(self.replications))
 866                )
 867            elif self.verbose == 0:
 868                self.residuals_sims_ = tuple(
 869                    ts.bootstrap(
 870                        self.residuals_,
 871                        h=h,
 872                        block_size=self.block_size,
 873                        seed=self.seed + 100 * i,
 874                    )
 875                    for i in range(self.replications)
 876                )
 877
 878        if "vine" in self.type_pi:
 879            if self.verbose == 1:
 880                self.residuals_sims_ = tuple(
 881                    vinecopula_sample(
 882                        x=self.residuals_,
 883                        n_samples=h,
 884                        method=self.type_pi,
 885                        random_state=self.seed + 100 * i,
 886                    )
 887                    for i in tqdm(range(self.replications))
 888                )
 889            elif self.verbose == 0:
 890                self.residuals_sims_ = tuple(
 891                    vinecopula_sample(
 892                        x=self.residuals_,
 893                        n_samples=h,
 894                        method=self.type_pi,
 895                        random_state=self.seed + 100 * i,
 896                    )
 897                    for i in range(self.replications)
 898                )
 899
 900        mean_ = deepcopy(self.mean_)
 901
 902        for i in range(h):
 903
 904            new_obs = ts.reformat_response(mean_, self.lags)
 905            new_X = new_obs.reshape(1, -1)
 906            cooked_new_X = self.cook_test_set(new_X, **kwargs)
 907
 908            if "return_std" in kwargs:
 909                self.preds_std_.append(
 910                    [
 911                        np.asarray(
 912                            self.fit_objs_[i].predict(
 913                                cooked_new_X, return_std=True
 914                            )[1]
 915                        ).item()
 916                        for i in range(self.n_series)
 917                    ]
 918                )
 919
 920            if "return_pi" in kwargs:
 921                for i in range(self.n_series):
 922                    preds_pi = self.fit_objs_[i].predict(cooked_new_X, **kwargs)
 923                    mean_pi_.append(preds_pi.mean[0])
 924                    lower_pi_.append(preds_pi.lower[0])
 925                    upper_pi_.append(preds_pi.upper[0])
 926
 927            if self.type_pi != "quantile":
 928                predicted_cooked_new_X = np.asarray(
 929                    [
 930                        np.asarray(
 931                            self.fit_objs_[i].predict(cooked_new_X)
 932                        ).item()
 933                        for i in range(self.init_n_series_)
 934                    ]
 935                )
 936            else:
 937                predicted_cooked_new_X = np.asarray(
 938                    [
 939                        np.asarray(
 940                            self.fit_objs_[i]
 941                            .predict(cooked_new_X, return_pi=True)
 942                            .upper
 943                        ).item()
 944                        for i in range(self.init_n_series_)
 945                    ]
 946                )
 947
 948            preds = np.asarray(y_means_ + predicted_cooked_new_X)
 949
 950            # Create full row with both predictions and external regressors
 951            if self.xreg_ is not None and "xreg" in kwargs:
 952                next_xreg = kwargs["xreg"].iloc[i: i + 1].values.flatten()
 953                full_row = np.concatenate([preds, next_xreg])
 954            else:
 955                full_row = preds
 956
 957            # Create a new row with same number of columns as mean_
 958            new_row = np.zeros((1, mean_.shape[1]))
 959            new_row[0, : full_row.shape[0]] = full_row
 960
 961            # Maintain the full dimensionality by using vstack instead of rbind
 962            mean_ = np.vstack([new_row, mean_[:-1]])
 963
 964        # Final output should only include the target columns
 965        self.mean_ = pd.DataFrame(
 966            mean_[0:h, : self.init_n_series_][::-1],
 967            columns=self.df_.columns[: self.init_n_series_],
 968            index=self.output_dates_,
 969        )
 970
 971        # function's return ----------------------------------------------------------------------
 972        if (
 973            (("return_std" not in kwargs) and ("return_pi" not in kwargs))
 974            and (self.type_pi not in ("gaussian", "scp"))
 975        ) or ("vine" in self.type_pi):
 976
 977            if self.replications is None:
 978                return self.mean_.iloc[:, : self.init_n_series_]
 979
 980            # if "return_std" not in kwargs and self.replications is not None
 981            meanf = []
 982            medianf = []
 983            lower = []
 984            upper = []
 985
 986            if "scp2" in self.type_pi:
 987
 988                if self.verbose == 1:
 989                    self.sims_ = tuple(
 990                        (
 991                            self.mean_
 992                            + self.residuals_sims_[i]
 993                            * self.residuals_std_dev_[np.newaxis, :]
 994                            for i in tqdm(range(self.replications))
 995                        )
 996                    )
 997                elif self.verbose == 0:
 998                    self.sims_ = tuple(
 999                        (
1000                            self.mean_
1001                            + self.residuals_sims_[i]
1002                            * self.residuals_std_dev_[np.newaxis, :]
1003                            for i in range(self.replications)
1004                        )
1005                    )
1006            else:
1007
1008                if self.verbose == 1:
1009                    self.sims_ = tuple(
1010                        (
1011                            self.mean_ + self.residuals_sims_[i]
1012                            for i in tqdm(range(self.replications))
1013                        )
1014                    )
1015                elif self.verbose == 0:
1016                    self.sims_ = tuple(
1017                        (
1018                            self.mean_ + self.residuals_sims_[i]
1019                            for i in range(self.replications)
1020                        )
1021                    )
1022
1023            DescribeResult = namedtuple(
1024                "DescribeResult", ("mean", "sims", "lower", "upper")
1025            )
1026            for ix in range(self.init_n_series_):
1027                sims_ix = getsims(self.sims_, ix)
1028                if self.agg == "mean":
1029                    meanf.append(np.mean(sims_ix, axis=1))
1030                else:
1031                    medianf.append(np.median(sims_ix, axis=1))
1032                lower.append(np.quantile(sims_ix, q=self.alpha_ / 200, axis=1))
1033                upper.append(
1034                    np.quantile(sims_ix, q=1 - self.alpha_ / 200, axis=1)
1035                )
1036            self.mean_ = pd.DataFrame(
1037                np.asarray(meanf).T,
1038                columns=self.series_names[
1039                    : self.init_n_series_
1040                ],  # self.df_.columns,
1041                index=self.output_dates_,
1042            )
1043
1044            self.lower_ = pd.DataFrame(
1045                np.asarray(lower).T,
1046                columns=self.series_names[
1047                    : self.init_n_series_
1048                ],  # self.df_.columns,
1049                index=self.output_dates_,
1050            )
1051
1052            self.upper_ = pd.DataFrame(
1053                np.asarray(upper).T,
1054                columns=self.series_names[
1055                    : self.init_n_series_
1056                ],  # self.df_.columns,
1057                index=self.output_dates_,
1058            )
1059
1060            try:
1061                self.median_ = pd.DataFrame(
1062                    np.asarray(medianf).T,
1063                    columns=self.series_names[
1064                        : self.init_n_series_
1065                    ],  # self.df_.columns,
1066                    index=self.output_dates_,
1067                )
1068            except Exception as e:
1069                pass
1070
1071            return DescribeResult(
1072                self.mean_, self.sims_, self.lower_, self.upper_
1073            )
1074
1075        if (
1076            (("return_std" in kwargs) or ("return_pi" in kwargs))
1077            and (self.type_pi not in ("gaussian", "scp"))
1078        ) or "vine" in self.type_pi:
1079            DescribeResult = namedtuple(
1080                "DescribeResult", ("mean", "lower", "upper")
1081            )
1082
1083            self.mean_ = pd.DataFrame(
1084                np.asarray(self.mean_),
1085                columns=self.series_names,  # self.df_.columns,
1086                index=self.output_dates_,
1087            )
1088
1089            if "return_std" in kwargs:
1090
1091                self.preds_std_ = np.asarray(self.preds_std_)
1092
1093                self.lower_ = pd.DataFrame(
1094                    self.mean_.values - pi_multiplier * self.preds_std_,
1095                    columns=self.series_names,  # self.df_.columns,
1096                    index=self.output_dates_,
1097                )
1098
1099                self.upper_ = pd.DataFrame(
1100                    self.mean_.values + pi_multiplier * self.preds_std_,
1101                    columns=self.series_names,  # self.df_.columns,
1102                    index=self.output_dates_,
1103                )
1104
1105            if "return_pi" in kwargs:
1106
1107                self.lower_ = pd.DataFrame(
1108                    np.asarray(lower_pi_).reshape(h, self.n_series)
1109                    + y_means_[np.newaxis, :],
1110                    columns=self.series_names,  # self.df_.columns,
1111                    index=self.output_dates_,
1112                )
1113
1114                self.upper_ = pd.DataFrame(
1115                    np.asarray(upper_pi_).reshape(h, self.n_series)
1116                    + y_means_[np.newaxis, :],
1117                    columns=self.series_names,  # self.df_.columns,
1118                    index=self.output_dates_,
1119                )
1120
1121            res = DescribeResult(self.mean_, self.lower_, self.upper_)
1122
1123            if self.xreg_ is not None:
1124                if len(self.xreg_.shape) > 1:
1125                    res2 = mx.tuple_map(
1126                        res,
1127                        lambda x: mo.delete_last_columns(
1128                            x, num_columns=self.xreg_.shape[1]
1129                        ),
1130                    )
1131                else:
1132                    res2 = mx.tuple_map(
1133                        res, lambda x: mo.delete_last_columns(x, num_columns=1)
1134                    )
1135                return DescribeResult(res2[0], res2[1], res2[2])
1136
1137            return res
1138
1139        if self.type_pi == "gaussian":
1140
1141            DescribeResult = namedtuple(
1142                "DescribeResult", ("mean", "lower", "upper")
1143            )
1144
1145            self.mean_ = pd.DataFrame(
1146                np.asarray(self.mean_),
1147                columns=self.series_names,  # self.df_.columns,
1148                index=self.output_dates_,
1149            )
1150
1151            self.lower_ = pd.DataFrame(
1152                self.mean_.values - pi_multiplier * self.gaussian_preds_std_,
1153                columns=self.series_names,  # self.df_.columns,
1154                index=self.output_dates_,
1155            )
1156
1157            self.upper_ = pd.DataFrame(
1158                self.mean_.values + pi_multiplier * self.gaussian_preds_std_,
1159                columns=self.series_names,  # self.df_.columns,
1160                index=self.output_dates_,
1161            )
1162
1163            res = DescribeResult(self.mean_, self.lower_, self.upper_)
1164
1165            if self.xreg_ is not None:
1166                if len(self.xreg_.shape) > 1:
1167                    res2 = mx.tuple_map(
1168                        res,
1169                        lambda x: mo.delete_last_columns(
1170                            x, num_columns=self.xreg_.shape[1]
1171                        ),
1172                    )
1173                else:
1174                    res2 = mx.tuple_map(
1175                        res, lambda x: mo.delete_last_columns(x, num_columns=1)
1176                    )
1177                return DescribeResult(res2[0], res2[1], res2[2])
1178
1179            return res
1180
1181        if self.type_pi == "quantile":
1182
1183            DescribeResult = namedtuple("DescribeResult", ("mean"))
1184
1185            self.mean_ = pd.DataFrame(
1186                np.asarray(self.mean_),
1187                columns=self.series_names,  # self.df_.columns,
1188                index=self.output_dates_,
1189            )
1190
1191            res = DescribeResult(self.mean_)
1192
1193            if self.xreg_ is not None:
1194                if len(self.xreg_.shape) > 1:
1195                    res2 = mx.tuple_map(
1196                        res,
1197                        lambda x: mo.delete_last_columns(
1198                            x, num_columns=self.xreg_.shape[1]
1199                        ),
1200                    )
1201                else:
1202                    res2 = mx.tuple_map(
1203                        res, lambda x: mo.delete_last_columns(x, num_columns=1)
1204                    )
1205                return DescribeResult(res2[0])
1206
1207            return res
1208
1209        # After prediction loop, ensure sims only contain target columns
1210        if self.sims_ is not None:
1211            if self.verbose == 1:
1212                self.sims_ = tuple(
1213                    sim[:h,]  # Only keep target columns and h rows
1214                    for sim in tqdm(self.sims_)
1215                )
1216            elif self.verbose == 0:
1217                self.sims_ = tuple(
1218                    sim[:h,]  # Only keep target columns and h rows
1219                    for sim in self.sims_
1220                )
1221
1222            # Convert numpy arrays to DataFrames with proper columns
1223            self.sims_ = tuple(
1224                pd.DataFrame(
1225                    sim,
1226                    columns=self.df_.columns[: self.init_n_series_],
1227                    index=self.output_dates_,
1228                )
1229                for sim in self.sims_
1230            )
1231
1232        if self.type_pi in (
1233            "kde",
1234            "bootstrap",
1235            "block-bootstrap",
1236            "vine-copula",
1237        ):
1238            if self.xreg_ is not None:
1239                # Use getsimsxreg when external regressors are present
1240                target_cols = self.df_.columns[: self.init_n_series_]
1241                self.sims_ = getsimsxreg(
1242                    self.sims_, self.output_dates_, target_cols
1243                )
1244            else:
1245                # Use original getsims for backward compatibility
1246                self.sims_ = getsims(self.sims_)

Forecast all the time series, h steps ahead

def score( self, X, training_index, testing_index, scoring=None, alpha=0.5, **kwargs):
1313    def score(
1314        self,
1315        X,
1316        training_index,
1317        testing_index,
1318        scoring=None,
1319        alpha=0.5,
1320        **kwargs,
1321    ):
1322        """Train on training_index, score on testing_index."""
1323
1324        assert (
1325            bool(set(training_index).intersection(set(testing_index))) == False
1326        ), "Non-overlapping 'training_index' and 'testing_index' required"
1327
1328        # Dimensions
1329        try:
1330            # multivariate time series
1331            n, p = X.shape
1332        except:
1333            # univariate time series
1334            n = X.shape[0]
1335            p = 1
1336
1337        # Training and testing sets
1338        if p > 1:
1339            X_train = X[training_index, :]
1340            X_test = X[testing_index, :]
1341        else:
1342            X_train = X[training_index]
1343            X_test = X[testing_index]
1344
1345        # Horizon
1346        h = len(testing_index)
1347        assert (
1348            len(training_index) + h
1349        ) <= n, "Please check lengths of training and testing windows"
1350
1351        # Fit and predict
1352        self.fit(X_train, **kwargs)
1353        preds = self.predict(h=h, **kwargs)
1354
1355        if scoring is None:
1356            scoring = "neg_root_mean_squared_error"
1357
1358        if scoring == "pinball":
1359            # Predict requested quantile
1360            q_pred = self.predict(h=h, quantiles=[alpha], **kwargs)
1361            # Handle multivariate
1362            scores = []
1363            for j in range(p):
1364                series_name = getattr(self, "series_names", [f"Series_{j}"])[j]
1365                q_label = (
1366                    f"{int(alpha * 100):02d}"
1367                    if (alpha * 100).is_integer()
1368                    else f"{alpha:.3f}".replace(".", "_")
1369                )
1370                col = f"quantile_{q_label}_{series_name}"
1371                if col not in q_pred.columns:
1372                    raise ValueError(
1373                        f"Column '{col}' not found in quantile forecast output."
1374                    )
1375                y_true_j = X_test[:, j]
1376                y_pred_j = q_pred[col].values
1377                # Compute pinball loss for this series
1378                loss = mean_pinball_loss(y_true_j, y_pred_j, alpha=alpha)
1379                scores.append(loss)
1380            # Return average over series
1381            return np.mean(scores)
1382
1383        if scoring == "crps":
1384            # Ensure simulations exist
1385            preds = self.predict(h=h, **kwargs)  # triggers self.sims_
1386            # Extract simulations: list of DataFrames → (R, h, p)
1387            sims_vals = np.stack(
1388                [sim.values for sim in self.sims_], axis=0
1389            )  # (R, h, p)
1390            crps_scores = []
1391            for j in range(p):
1392                y_true_j = X_test[:, j]
1393                sims_j = sims_vals[:, :, j]  # (R, h)
1394                crps_j = self._crps_ensemble(np.asarray(y_true_j), sims_j)
1395                crps_scores.append(np.mean(crps_j))  # average over horizon
1396            return np.mean(crps_scores)  # average over series
1397
1398        # check inputs
1399        assert scoring in (
1400            "explained_variance",
1401            "neg_mean_absolute_error",
1402            "neg_mean_squared_error",
1403            "neg_root_mean_squared_error",
1404            "neg_mean_squared_log_error",
1405            "neg_median_absolute_error",
1406            "r2",
1407        ), "'scoring' should be in ('explained_variance', 'neg_mean_absolute_error', \
1408                               'neg_mean_squared_error', 'neg_root_mean_squared_error', 'neg_mean_squared_log_error', \
1409                               'neg_median_absolute_error', 'r2')"
1410
1411        scoring_options = {
1412            "explained_variance": skm2.explained_variance_score,
1413            "neg_mean_absolute_error": skm2.mean_absolute_error,
1414            "neg_mean_squared_error": lambda x, y: np.mean((x - y) ** 2),
1415            "neg_root_mean_squared_error": lambda x, y: np.sqrt(
1416                np.mean((x - y) ** 2)
1417            ),
1418            "neg_mean_squared_log_error": skm2.mean_squared_log_error,
1419            "neg_median_absolute_error": skm2.median_absolute_error,
1420            "r2": skm2.r2_score,
1421        }
1422
1423        return scoring_options[scoring](X_test, preds)

Train on training_index, score on testing_index.

class MultitaskClassifier(nnetsauce.Base, sklearn.base.ClassifierMixin):
 16class MultitaskClassifier(Base, ClassifierMixin):
 17    """Multitask Classification model based on regression models, with shared covariates
 18
 19    Parameters:
 20
 21        obj: object
 22            any object (must be a regression model) containing a method fit (obj.fit())
 23            and a method predict (obj.predict())
 24
 25        n_hidden_features: int
 26            number of nodes in the hidden layer
 27
 28        activation_name: str
 29            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
 30
 31        a: float
 32            hyperparameter for 'prelu' or 'elu' activation function
 33
 34        nodes_sim: str
 35            type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
 36            'uniform'
 37
 38        bias: boolean
 39            indicates if the hidden layer contains a bias term (True) or not
 40            (False)
 41
 42        dropout: float
 43            regularization parameter; (random) percentage of nodes dropped out
 44            of the training
 45
 46        direct_link: boolean
 47            indicates if the original predictors are included (True) in model's
 48            fitting or not (False)
 49
 50        n_clusters: int
 51            number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
 52                no clustering)
 53
 54        cluster_encode: bool
 55            defines how the variable containing clusters is treated (default is one-hot)
 56            if `False`, then labels are used, without one-hot encoding
 57
 58        type_clust: str
 59            type of clustering method: currently k-means ('kmeans') or Gaussian
 60            Mixture Model ('gmm')
 61
 62        type_scaling: a tuple of 3 strings
 63            scaling methods for inputs, hidden layer, and clustering respectively
 64            (and when relevant).
 65            Currently available: standardization ('std') or MinMax scaling ('minmax')
 66
 67        col_sample: float
 68            percentage of covariates randomly chosen for training
 69
 70        row_sample: float
 71            percentage of rows chosen for training, by stratified bootstrapping
 72
 73        seed: int
 74            reproducibility seed for nodes_sim=='uniform'
 75
 76        backend: str
 77            "cpu" or "gpu" or "tpu"
 78
 79    Attributes:
 80
 81        fit_objs_: dict
 82            objects adjusted to each individual time series
 83
 84        n_classes_: int
 85            number of classes for the classifier
 86
 87    Examples:
 88
 89    See also [https://github.com/Techtonique/nnetsauce/blob/master/examples/mtask_classification.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/mtask_classification.py)
 90
 91    ```python
 92    import nnetsauce as ns
 93    import numpy as np
 94    from sklearn.datasets import load_breast_cancer
 95    from sklearn.linear_model import LinearRegression
 96    from sklearn.model_selection import train_test_split
 97    from sklearn import metrics
 98    from time import time
 99
100    breast_cancer = load_breast_cancer()
101    Z = breast_cancer.data
102    t = breast_cancer.target
103
104    X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2,
105                                                        random_state=123+2*10)
106
107    # Linear Regression is used
108    regr = LinearRegression()
109    fit_obj = ns.MultitaskClassifier(regr, n_hidden_features=5,
110                                n_clusters=2, type_clust="gmm")
111
112    start = time()
113    fit_obj.fit(X_train, y_train)
114    print(f"Elapsed {time() - start}")
115
116    print(fit_obj.score(X_test, y_test))
117    print(fit_obj.score(X_test, y_test, scoring="roc_auc"))
118
119    start = time()
120    preds = fit_obj.predict(X_test)
121    print(f"Elapsed {time() - start}")
122    print(metrics.classification_report(preds, y_test))
123    ```
124
125    """
126
127    # construct the object -----
128    _estimator_type = "classifier"
129
130    def __init__(
131        self,
132        obj,
133        n_hidden_features=5,
134        activation_name="relu",
135        a=0.01,
136        nodes_sim="sobol",
137        bias=True,
138        dropout=0,
139        direct_link=True,
140        n_clusters=2,
141        cluster_encode=True,
142        type_clust="kmeans",
143        type_scaling=("std", "std", "std"),
144        col_sample=1,
145        row_sample=1,
146        seed=123,
147        backend="cpu",
148    ):
149        super().__init__(
150            n_hidden_features=n_hidden_features,
151            activation_name=activation_name,
152            a=a,
153            nodes_sim=nodes_sim,
154            bias=bias,
155            dropout=dropout,
156            direct_link=direct_link,
157            n_clusters=n_clusters,
158            cluster_encode=cluster_encode,
159            type_clust=type_clust,
160            type_scaling=type_scaling,
161            col_sample=col_sample,
162            row_sample=row_sample,
163            seed=seed,
164            backend=backend,
165        )
166
167        self.type_fit = "classification"
168        self.obj = obj
169        self.fit_objs_ = {}
170
171    def fit(self, X, y, sample_weight=None, **kwargs):
172        """Fit MultitaskClassifier to training data (X, y).
173
174        Args:
175
176            X: {array-like}, shape = [n_samples, n_features]
177                Training vectors, where n_samples is the number
178                of samples and n_features is the number of features.
179
180            y: array-like, shape = [n_samples]
181                Target values.
182
183            **kwargs: additional parameters to be passed to
184                    self.cook_training_set or self.obj.fit
185
186        Returns:
187
188            self: object
189
190        """
191
192        assert mx.is_factor(y), "y must contain only integers"
193
194        output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
195
196        self.classes_ = np.unique(y)  # for compatibility with sklearn
197        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
198
199        # multitask response
200        Y = mo.one_hot_encode2(output_y, self.n_classes_)
201
202        # if sample_weight is None:
203        for i in range(self.n_classes_):
204            self.fit_objs_[i] = deepcopy(
205                self.obj.fit(scaled_Z, Y[:, i], **kwargs)
206            )
207
208        self.classes_ = np.unique(y)
209        return self
210
211    def predict(self, X, **kwargs):
212        """Predict test data X.
213
214        Args:
215
216            X: {array-like}, shape = [n_samples, n_features]
217                Training vectors, where n_samples is the number
218                of samples and n_features is the number of features.
219
220            **kwargs: additional parameters to be passed to
221                    self.cook_test_set
222
223        Returns:
224
225            model predictions: {array-like}
226
227        """
228        return np.argmax(self.predict_proba(X, **kwargs), axis=1)
229
230    def predict_proba(self, X, **kwargs):
231        """Predict probabilities for test data X.
232
233        Args:
234
235            X: {array-like}, shape = [n_samples, n_features]
236                Training vectors, where n_samples is the number
237                of samples and n_features is the number of features.
238
239            **kwargs: additional parameters to be passed to
240                    self.cook_test_set
241
242        Returns:
243
244            probability estimates for test data: {array-like}
245
246        """
247
248        shape_X = X.shape
249
250        probs = np.zeros((shape_X[0], self.n_classes_))
251
252        if len(shape_X) == 1:
253            n_features = shape_X[0]
254
255            new_X = mo.rbind(
256                X.reshape(1, n_features),
257                np.ones(n_features).reshape(1, n_features),
258            )
259
260            Z = self.cook_test_set(new_X, **kwargs)
261
262            # loop on all the classes
263            for i in range(self.n_classes_):
264                probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs)[0]
265
266        else:
267            Z = self.cook_test_set(X, **kwargs)
268
269            # loop on all the classes
270            for i in range(self.n_classes_):
271                probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs)
272
273        expit_raw_probs = expit(probs)
274
275        return expit_raw_probs / expit_raw_probs.sum(axis=1)[:, None]
276
277    def decision_function(self, X, **kwargs):
278        """Compute the decision function of X.
279
280        Parameters:
281            X: {array-like}, shape = [n_samples, n_features]
282                Samples to compute decision function for.
283
284            **kwargs: additional parameters to be passed to
285                    self.cook_test_set
286
287        Returns:
288            array-like of shape (n_samples,) or (n_samples, n_classes)
289            Decision function of the input samples. The order of outputs is the same
290            as that of the classes passed to fit.
291        """
292        if not hasattr(self.obj, "decision_function"):
293            # If base classifier doesn't have decision_function, use predict_proba
294            proba = self.predict_proba(X, **kwargs)
295            if proba.shape[1] == 2:
296                return proba[:, 1]  # For binary classification
297            return proba  # For multiclass
298
299        if len(X.shape) == 1:
300            n_features = X.shape[0]
301            new_X = mo.rbind(
302                X.reshape(1, n_features),
303                np.ones(n_features).reshape(1, n_features),
304            )
305
306            return (
307                self.obj.decision_function(
308                    self.cook_test_set(new_X, **kwargs), **kwargs
309                )
310            )[0]
311
312        return self.obj.decision_function(
313            self.cook_test_set(X, **kwargs), **kwargs
314        )
315
316    @property
317    def _estimator_type(self):
318        return "classifier"

Multitask Classification model based on regression models, with shared covariates

Parameters:

obj: object
    any object (must be a regression model) containing a method fit (obj.fit())
    and a method predict (obj.predict())

n_hidden_features: int
    number of nodes in the hidden layer

activation_name: str
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'

a: float
    hyperparameter for 'prelu' or 'elu' activation function

nodes_sim: str
    type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
    'uniform'

bias: boolean
    indicates if the hidden layer contains a bias term (True) or not
    (False)

dropout: float
    regularization parameter; (random) percentage of nodes dropped out
    of the training

direct_link: boolean
    indicates if the original predictors are included (True) in model's
    fitting or not (False)

n_clusters: int
    number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
        no clustering)

cluster_encode: bool
    defines how the variable containing clusters is treated (default is one-hot)
    if `False`, then labels are used, without one-hot encoding

type_clust: str
    type of clustering method: currently k-means ('kmeans') or Gaussian
    Mixture Model ('gmm')

type_scaling: a tuple of 3 strings
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax')

col_sample: float
    percentage of covariates randomly chosen for training

row_sample: float
    percentage of rows chosen for training, by stratified bootstrapping

seed: int
    reproducibility seed for nodes_sim=='uniform'

backend: str
    "cpu" or "gpu" or "tpu"

Attributes:

fit_objs_: dict
    objects adjusted to each individual time series

n_classes_: int
    number of classes for the classifier

Examples:

See also https://github.com/Techtonique/nnetsauce/blob/master/examples/mtask_classification.py

import nnetsauce as ns
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics
from time import time

breast_cancer = load_breast_cancer()
Z = breast_cancer.data
t = breast_cancer.target

X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2,
                                                    random_state=123+2*10)

# Linear Regression is used
regr = LinearRegression()
fit_obj = ns.MultitaskClassifier(regr, n_hidden_features=5,
                            n_clusters=2, type_clust="gmm")

start = time()
fit_obj.fit(X_train, y_train)
print(f"Elapsed {time() - start}")

print(fit_obj.score(X_test, y_test))
print(fit_obj.score(X_test, y_test, scoring="roc_auc"))

start = time()
preds = fit_obj.predict(X_test)
print(f"Elapsed {time() - start}")
print(metrics.classification_report(preds, y_test))
def fit(self, X, y, sample_weight=None, **kwargs):
171    def fit(self, X, y, sample_weight=None, **kwargs):
172        """Fit MultitaskClassifier to training data (X, y).
173
174        Args:
175
176            X: {array-like}, shape = [n_samples, n_features]
177                Training vectors, where n_samples is the number
178                of samples and n_features is the number of features.
179
180            y: array-like, shape = [n_samples]
181                Target values.
182
183            **kwargs: additional parameters to be passed to
184                    self.cook_training_set or self.obj.fit
185
186        Returns:
187
188            self: object
189
190        """
191
192        assert mx.is_factor(y), "y must contain only integers"
193
194        output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
195
196        self.classes_ = np.unique(y)  # for compatibility with sklearn
197        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
198
199        # multitask response
200        Y = mo.one_hot_encode2(output_y, self.n_classes_)
201
202        # if sample_weight is None:
203        for i in range(self.n_classes_):
204            self.fit_objs_[i] = deepcopy(
205                self.obj.fit(scaled_Z, Y[:, i], **kwargs)
206            )
207
208        self.classes_ = np.unique(y)
209        return self

Fit MultitaskClassifier to training data (X, y).

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

**kwargs: additional parameters to be passed to
        self.cook_training_set or self.obj.fit

Returns:

self: object
def predict(self, X, **kwargs):
211    def predict(self, X, **kwargs):
212        """Predict test data X.
213
214        Args:
215
216            X: {array-like}, shape = [n_samples, n_features]
217                Training vectors, where n_samples is the number
218                of samples and n_features is the number of features.
219
220            **kwargs: additional parameters to be passed to
221                    self.cook_test_set
222
223        Returns:
224
225            model predictions: {array-like}
226
227        """
228        return np.argmax(self.predict_proba(X, **kwargs), axis=1)

Predict test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

model predictions: {array-like}
def predict_proba(self, X, **kwargs):
230    def predict_proba(self, X, **kwargs):
231        """Predict probabilities for test data X.
232
233        Args:
234
235            X: {array-like}, shape = [n_samples, n_features]
236                Training vectors, where n_samples is the number
237                of samples and n_features is the number of features.
238
239            **kwargs: additional parameters to be passed to
240                    self.cook_test_set
241
242        Returns:
243
244            probability estimates for test data: {array-like}
245
246        """
247
248        shape_X = X.shape
249
250        probs = np.zeros((shape_X[0], self.n_classes_))
251
252        if len(shape_X) == 1:
253            n_features = shape_X[0]
254
255            new_X = mo.rbind(
256                X.reshape(1, n_features),
257                np.ones(n_features).reshape(1, n_features),
258            )
259
260            Z = self.cook_test_set(new_X, **kwargs)
261
262            # loop on all the classes
263            for i in range(self.n_classes_):
264                probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs)[0]
265
266        else:
267            Z = self.cook_test_set(X, **kwargs)
268
269            # loop on all the classes
270            for i in range(self.n_classes_):
271                probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs)
272
273        expit_raw_probs = expit(probs)
274
275        return expit_raw_probs / expit_raw_probs.sum(axis=1)[:, None]

Predict probabilities for test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

probability estimates for test data: {array-like}
class NeuralNetRegressor(sklearn.base.BaseEstimator, sklearn.base.RegressorMixin):
110class NeuralNetRegressor(BaseEstimator, RegressorMixin):
111    """
112    (Pretrained) Neural Network Regressor.
113
114    Parameters:
115
116        hidden_layer_sizes : tuple, default=(100,)
117            The number of neurons in each hidden layer.
118        max_iter : int, default=100
119            The maximum number of iterations to train the model.
120        learning_rate : float, default=0.01
121            The learning rate for the optimizer.
122        l1_ratio : float, default=0.5
123            The ratio of L1 regularization.
124        alpha : float, default=1e-6
125            The regularization parameter.
126        activation_name : str, default="relu"
127            The activation function to use.
128        dropout : float, default=0.0
129            The dropout rate.
130        random_state : int, default=None
131            The random state for the random number generator.
132        weights : list, default=None
133            The weights to initialize the model with.
134
135    Attributes:
136
137        weights : list
138            The weights of the model.
139        params : list
140            The parameters of the model.
141        scaler_ : sklearn.preprocessing.StandardScaler
142            The scaler used to standardize the input features.
143        y_mean_ : float
144            The mean of the target variable.
145
146    Methods:
147
148        fit(X, y)
149            Fit the model to the data.
150        predict(X)
151            Predict the target variable.
152        get_weights()
153            Get the weights of the model.
154        set_weights(weights)
155            Set the weights of the model.
156    """
157
158    def __init__(
159        self,
160        hidden_layer_sizes=None,
161        max_iter=100,
162        learning_rate=0.01,
163        l1_ratio=0.5,
164        alpha=1e-6,
165        activation_name="relu",
166        dropout=0,
167        weights=None,
168        random_state=None,
169    ):
170        if weights is None and hidden_layer_sizes is None:
171            hidden_layer_sizes = (100,)  # default value if neither is provided
172        self.hidden_layer_sizes = hidden_layer_sizes
173        self.max_iter = max_iter
174        self.learning_rate = learning_rate
175        self.l1_ratio = l1_ratio
176        self.alpha = alpha
177        self.activation_name = activation_name
178        self.dropout = dropout
179        self.weights = weights
180        self.random_state = random_state
181        self.params = None
182        self.scaler_ = StandardScaler()
183        self.y_mean_ = None
184
185    def _validate_weights(self, input_dim):
186        """Validate that weights dimensions are coherent."""
187        if not self.weights:
188            return False
189
190        try:
191            # Check each layer's weights and biases
192            prev_dim = input_dim
193            for W, b in self.weights:
194                # Check weight matrix dimensions
195                if W.shape[0] != prev_dim:
196                    raise ValueError(
197                        f"Weight matrix input dimension {W.shape[0]} does not match, previous layer output dimension {prev_dim}"
198                    )
199                # Check bias dimension matches weight matrix output
200                if W.shape[1] != b.shape[0]:
201                    raise ValueError(
202                        f"Bias dimension {b.shape[0]} does not match weight matrix, output dimension {W.shape[1]}"
203                    )
204                prev_dim = W.shape[1]
205
206            # Check final output dimension is 1 for regression
207            if prev_dim != 1:
208                raise ValueError(
209                    f"Final layer output dimension {prev_dim} must be 1 for regression"
210                )
211
212            return True
213        except (AttributeError, IndexError):
214            raise ValueError(
215                "Weights format is invalid. Expected list of (weight, bias) tuples"
216            )
217
218    def fit(self, X, y):
219        # Standardize the input features
220        X = self.scaler_.fit_transform(X)
221        # Ensure y is 2D for consistency
222        y = y.reshape(-1, 1)
223        self.y_mean_ = jnp.mean(y)
224        y = y - self.y_mean_
225        # Validate or initialize weights
226        if self.weights is not None:
227            if self._validate_weights(X.shape[1]):
228                self.params = self.weights
229        else:
230            if self.hidden_layer_sizes is None:
231                raise ValueError(
232                    "Either weights or hidden_layer_sizes must be provided"
233                )
234            self.params = initialize_params(
235                X.shape[1], self.hidden_layer_sizes, self.random_state
236            )
237        loss_fn = partial(loss, l1_ratio=self.l1_ratio, alpha=self.alpha)
238        grad_loss = jit(grad(loss_fn))  # compiled gradient evaluation function
239        perex_grads = jit(
240            vmap(grad_loss, in_axes=(None, 0, 0))
241        )  # fast per-example grads
242        # Training loop
243        for _ in range(self.max_iter):
244            grads = perex_grads(self.params, X, y)
245            # Average gradients across examples
246            grads = jax.tree_map(lambda g: jnp.mean(g, axis=0), grads)
247            # Update parameters
248            self.params = [
249                (W - self.learning_rate * dW, b - self.learning_rate * db)
250                for (W, b), (dW, db) in zip(self.params, grads)
251            ]
252        # Store final weights
253        self.weights = self.params
254        return self
255
256    def get_weights(self):
257        """Return the current weights of the model."""
258        if self.weights is None:
259            raise ValueError(
260                "No weights available. Model has not been fitted yet."
261            )
262        return self.weights
263
264    def set_weights(self, weights):
265        """Set the weights of the model manually."""
266        self.weights = weights
267        self.params = weights
268
269    def predict(self, X):
270        X = self.scaler_.transform(X)
271        if self.params is None:
272            raise ValueError("Model has not been fitted yet.")
273        predictions = predict_internal(
274            self.params,
275            X,
276            activation_func=self.activation_name,
277            dropout=self.dropout,
278            seed=self.random_state,
279        )
280        return predictions.reshape(-1) + self.y_mean_

(Pretrained) Neural Network Regressor.

Parameters:

hidden_layer_sizes : tuple, default=(100,)
    The number of neurons in each hidden layer.
max_iter : int, default=100
    The maximum number of iterations to train the model.
learning_rate : float, default=0.01
    The learning rate for the optimizer.
l1_ratio : float, default=0.5
    The ratio of L1 regularization.
alpha : float, default=1e-6
    The regularization parameter.
activation_name : str, default="relu"
    The activation function to use.
dropout : float, default=0.0
    The dropout rate.
random_state : int, default=None
    The random state for the random number generator.
weights : list, default=None
    The weights to initialize the model with.

Attributes:

weights : list
    The weights of the model.
params : list
    The parameters of the model.
scaler_ : sklearn.preprocessing.StandardScaler
    The scaler used to standardize the input features.
y_mean_ : float
    The mean of the target variable.

Methods:

fit(X, y)
    Fit the model to the data.
predict(X)
    Predict the target variable.
get_weights()
    Get the weights of the model.
set_weights(weights)
    Set the weights of the model.
def fit(self, X, y):
218    def fit(self, X, y):
219        # Standardize the input features
220        X = self.scaler_.fit_transform(X)
221        # Ensure y is 2D for consistency
222        y = y.reshape(-1, 1)
223        self.y_mean_ = jnp.mean(y)
224        y = y - self.y_mean_
225        # Validate or initialize weights
226        if self.weights is not None:
227            if self._validate_weights(X.shape[1]):
228                self.params = self.weights
229        else:
230            if self.hidden_layer_sizes is None:
231                raise ValueError(
232                    "Either weights or hidden_layer_sizes must be provided"
233                )
234            self.params = initialize_params(
235                X.shape[1], self.hidden_layer_sizes, self.random_state
236            )
237        loss_fn = partial(loss, l1_ratio=self.l1_ratio, alpha=self.alpha)
238        grad_loss = jit(grad(loss_fn))  # compiled gradient evaluation function
239        perex_grads = jit(
240            vmap(grad_loss, in_axes=(None, 0, 0))
241        )  # fast per-example grads
242        # Training loop
243        for _ in range(self.max_iter):
244            grads = perex_grads(self.params, X, y)
245            # Average gradients across examples
246            grads = jax.tree_map(lambda g: jnp.mean(g, axis=0), grads)
247            # Update parameters
248            self.params = [
249                (W - self.learning_rate * dW, b - self.learning_rate * db)
250                for (W, b), (dW, db) in zip(self.params, grads)
251            ]
252        # Store final weights
253        self.weights = self.params
254        return self
def predict(self, X):
269    def predict(self, X):
270        X = self.scaler_.transform(X)
271        if self.params is None:
272            raise ValueError("Model has not been fitted yet.")
273        predictions = predict_internal(
274            self.params,
275            X,
276            activation_func=self.activation_name,
277            dropout=self.dropout,
278            seed=self.random_state,
279        )
280        return predictions.reshape(-1) + self.y_mean_
class NeuralNetClassifier(sklearn.base.BaseEstimator, sklearn.base.ClassifierMixin):
 10class NeuralNetClassifier(BaseEstimator, ClassifierMixin):
 11    """
 12    (Pretrained) Neural Network Classifier.
 13
 14    Parameters:
 15
 16        hidden_layer_sizes : tuple, default=(100,)
 17            The number of neurons in each hidden layer.
 18        max_iter : int, default=100
 19            The maximum number of iterations to train the model.
 20        learning_rate : float, default=0.01
 21            The learning rate for the optimizer.
 22        l1_ratio : float, default=0.5
 23            The ratio of L1 regularization.
 24        alpha : float, default=1e-6
 25            The regularization parameter.
 26        activation_name : str, default="relu"
 27            The activation function to use.
 28        dropout : float, default=0.0
 29            The dropout rate.
 30        random_state : int, default=None
 31            The random state for the random number generator.
 32        weights : list, default=None
 33            The weights to initialize the model with.
 34
 35    Attributes:
 36
 37        weights : list
 38            The weights of the model.
 39        params : list
 40            The parameters of the model.
 41        scaler_ : sklearn.preprocessing.StandardScaler
 42            The scaler used to standardize the input features.
 43        y_mean_ : float
 44            The mean of the target variable.
 45
 46    Methods:
 47
 48        fit(X, y)
 49            Fit the model to the data.
 50        predict(X)
 51            Predict the target variable.
 52        predict_proba(X)
 53            Predict the probability of the target variable.
 54        get_weights()
 55            Get the weights of the model.
 56        set_weights(weights)
 57            Set the weights of the model.
 58    """
 59
 60    _estimator_type = "classifier"
 61
 62    def __init__(
 63        self,
 64        hidden_layer_sizes=(100,),
 65        max_iter=100,
 66        learning_rate=0.01,
 67        weights=None,
 68        l1_ratio=0.5,
 69        alpha=1e-6,
 70        activation_name="relu",
 71        dropout=0.0,
 72        random_state=None,
 73    ):
 74        self.hidden_layer_sizes = hidden_layer_sizes
 75        self.max_iter = max_iter
 76        self.learning_rate = learning_rate
 77        self.weights = weights
 78        self.l1_ratio = l1_ratio
 79        self.alpha = alpha
 80        self.activation_name = activation_name
 81        self.dropout = dropout
 82        self.random_state = random_state
 83        self.regr = None
 84
 85    def fit(self, X, y):
 86        """Fit the model to the data.
 87
 88        Parameters:
 89
 90            X: {array-like}, shape = [n_samples, n_features]
 91                Training vectors, where n_samples is the number of samples and
 92                n_features is the number of features.
 93            y: array-like, shape = [n_samples]
 94                Target values.
 95        """
 96        regressor = NeuralNetRegressor(
 97            hidden_layer_sizes=self.hidden_layer_sizes,
 98            max_iter=self.max_iter,
 99            learning_rate=self.learning_rate,
100            weights=self.weights,
101            l1_ratio=self.l1_ratio,
102            alpha=self.alpha,
103            activation_name=self.activation_name,
104            dropout=self.dropout,
105            random_state=self.random_state,
106        )
107        self.regr = SimpleMultitaskClassifier(regressor)
108        self.regr.fit(X, y)
109        self.classes_ = np.unique(y)
110        self.n_classes_ = len(self.classes_)
111        self.n_tasks_ = 1
112        self.n_features_in_ = X.shape[1]
113        self.n_outputs_ = 1
114        self.n_samples_fit_ = X.shape[0]
115        self.n_samples_test_ = X.shape[0]
116        self.n_features_out_ = 1
117        self.n_outputs_ = 1
118        self.n_features_in_ = X.shape[1]
119        self.n_features_out_ = 1
120        self.n_outputs_ = 1
121        return self
122
123    def predict_proba(self, X):
124        """Predict the probability of the target variable.
125
126        Parameters:
127
128            X: {array-like}, shape = [n_samples, n_features]
129                Training vectors, where n_samples is the number of samples and
130                n_features is the number of features.
131        """
132        return self.regr.predict_proba(X)
133
134    def predict(self, X):
135        """Predict the target variable.
136
137        Parameters:
138
139            X: {array-like}, shape = [n_samples, n_features]
140                Training vectors, where n_samples is the number of samples and
141                n_features is the number of features.
142        """
143        return self.regr.predict(X)
144
145    @property
146    def _estimator_type(self):
147        return "classifier"

(Pretrained) Neural Network Classifier.

Parameters:

hidden_layer_sizes : tuple, default=(100,)
    The number of neurons in each hidden layer.
max_iter : int, default=100
    The maximum number of iterations to train the model.
learning_rate : float, default=0.01
    The learning rate for the optimizer.
l1_ratio : float, default=0.5
    The ratio of L1 regularization.
alpha : float, default=1e-6
    The regularization parameter.
activation_name : str, default="relu"
    The activation function to use.
dropout : float, default=0.0
    The dropout rate.
random_state : int, default=None
    The random state for the random number generator.
weights : list, default=None
    The weights to initialize the model with.

Attributes:

weights : list
    The weights of the model.
params : list
    The parameters of the model.
scaler_ : sklearn.preprocessing.StandardScaler
    The scaler used to standardize the input features.
y_mean_ : float
    The mean of the target variable.

Methods:

fit(X, y)
    Fit the model to the data.
predict(X)
    Predict the target variable.
predict_proba(X)
    Predict the probability of the target variable.
get_weights()
    Get the weights of the model.
set_weights(weights)
    Set the weights of the model.
def fit(self, X, y):
 85    def fit(self, X, y):
 86        """Fit the model to the data.
 87
 88        Parameters:
 89
 90            X: {array-like}, shape = [n_samples, n_features]
 91                Training vectors, where n_samples is the number of samples and
 92                n_features is the number of features.
 93            y: array-like, shape = [n_samples]
 94                Target values.
 95        """
 96        regressor = NeuralNetRegressor(
 97            hidden_layer_sizes=self.hidden_layer_sizes,
 98            max_iter=self.max_iter,
 99            learning_rate=self.learning_rate,
100            weights=self.weights,
101            l1_ratio=self.l1_ratio,
102            alpha=self.alpha,
103            activation_name=self.activation_name,
104            dropout=self.dropout,
105            random_state=self.random_state,
106        )
107        self.regr = SimpleMultitaskClassifier(regressor)
108        self.regr.fit(X, y)
109        self.classes_ = np.unique(y)
110        self.n_classes_ = len(self.classes_)
111        self.n_tasks_ = 1
112        self.n_features_in_ = X.shape[1]
113        self.n_outputs_ = 1
114        self.n_samples_fit_ = X.shape[0]
115        self.n_samples_test_ = X.shape[0]
116        self.n_features_out_ = 1
117        self.n_outputs_ = 1
118        self.n_features_in_ = X.shape[1]
119        self.n_features_out_ = 1
120        self.n_outputs_ = 1
121        return self

Fit the model to the data.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number of samples and
    n_features is the number of features.
y: array-like, shape = [n_samples]
    Target values.
def predict_proba(self, X):
123    def predict_proba(self, X):
124        """Predict the probability of the target variable.
125
126        Parameters:
127
128            X: {array-like}, shape = [n_samples, n_features]
129                Training vectors, where n_samples is the number of samples and
130                n_features is the number of features.
131        """
132        return self.regr.predict_proba(X)

Predict the probability of the target variable.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number of samples and
    n_features is the number of features.
def predict(self, X):
134    def predict(self, X):
135        """Predict the target variable.
136
137        Parameters:
138
139            X: {array-like}, shape = [n_samples, n_features]
140                Training vectors, where n_samples is the number of samples and
141                n_features is the number of features.
142        """
143        return self.regr.predict(X)

Predict the target variable.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number of samples and
    n_features is the number of features.
class PredictionInterval(sklearn.base.BaseEstimator, sklearn.base.RegressorMixin):
 19class PredictionInterval(BaseEstimator, RegressorMixin):
 20    """Class PredictionInterval: Obtain prediction intervals.
 21
 22    Attributes:
 23
 24        obj: an object;
 25            fitted object containing methods `fit` and `predict`
 26
 27        method: a string;
 28            method for constructing the prediction intervals.
 29            Currently "splitconformal" (default) and "localconformal"
 30
 31        level: a float;
 32            Confidence level for prediction intervals. Default is 95,
 33            equivalent to a miscoverage error of 5 (%)
 34
 35        replications: an integer;
 36            Number of replications for simulated conformal (default is `None`),
 37            for type_pi = "bootstrap" or "kde"
 38
 39        type_pi: a string;
 40            type of prediction interval: currently `None`
 41            (split conformal without simulation), "kde" or "bootstrap"
 42
 43        type_split: a string;
 44            "random" (random split of data) or "sequential" (sequential split of data)
 45
 46        seed: an integer;
 47            Reproducibility of fit (there's a random split between fitting and calibration data)
 48    """
 49
 50    def __init__(
 51        self,
 52        obj,
 53        method="splitconformal",
 54        level=95,
 55        type_pi=None,
 56        type_split="random",
 57        replications=None,
 58        kernel=None,
 59        agg="mean",
 60        seed=123,
 61    ):
 62
 63        self.obj = obj
 64        self.method = method
 65        self.level = level
 66        self.type_pi = type_pi
 67        self.type_split = type_split
 68        self.replications = replications
 69        self.kernel = kernel
 70        self.agg = agg
 71        self.seed = seed
 72        self.alpha_ = 1 - self.level / 100
 73        self.quantile_ = None
 74        self.icp_ = None
 75        self.calibrated_residuals_ = None
 76        self.scaled_calibrated_residuals_ = None
 77        self.calibrated_residuals_scaler_ = None
 78        self.kde_ = None
 79        self.aic_ = None
 80        self.aicc_ = None
 81        self.bic_ = None
 82        self.sse_ = None
 83
 84    def fit(self, X, y, sample_weight=None, **kwargs):
 85        """Fit the `method` to training data (X, y).
 86
 87        Args:
 88
 89            X: array-like, shape = [n_samples, n_features];
 90                Training set vectors, where n_samples is the number
 91                of samples and n_features is the number of features.
 92
 93            y: array-like, shape = [n_samples, ]; Target values.
 94
 95            sample_weight: array-like, shape = [n_samples]
 96                Sample weights.
 97
 98        """
 99
100        if self.type_split == "random":
101
102            X_train, X_calibration, y_train, y_calibration = train_test_split(
103                X, y, test_size=0.5, random_state=self.seed
104            )
105
106        elif self.type_split == "sequential":
107
108            n_x = X.shape[0]
109            n_x_half = n_x // 2
110            first_half_idx = range(0, n_x_half)
111            second_half_idx = range(n_x_half, n_x)
112            X_train = X[first_half_idx, :]
113            X_calibration = X[second_half_idx, :]
114            y_train = y[first_half_idx]
115            y_calibration = y[second_half_idx]
116
117        if self.method == "splitconformal":
118
119            self.obj.fit(X_train, y_train)
120            preds_calibration = self.obj.predict(X_calibration)
121            self.calibrated_residuals_ = y_calibration - preds_calibration
122            absolute_residuals = np.abs(self.calibrated_residuals_)
123            self.calibrated_residuals_scaler_ = StandardScaler(
124                with_mean=True, with_std=True
125            )
126            self.scaled_calibrated_residuals_ = (
127                self.calibrated_residuals_scaler_.fit_transform(
128                    self.calibrated_residuals_.reshape(-1, 1)
129                ).ravel()
130            )
131            try:
132                # numpy version >= 1.22
133                self.quantile_ = np.quantile(
134                    a=absolute_residuals, q=self.level / 100, method="higher"
135                )
136            except Exception:
137                # numpy version < 1.22
138                self.quantile_ = np.quantile(
139                    a=absolute_residuals,
140                    q=self.level / 100,
141                    interpolation="higher",
142                )
143
144        if self.method == "localconformal":
145
146            mad_estimator = ExtraTreesRegressor()
147            normalizer = RegressorNormalizer(
148                self.obj, mad_estimator, AbsErrorErrFunc()
149            )
150            nc = RegressorNc(self.obj, AbsErrorErrFunc(), normalizer)
151            self.icp_ = IcpRegressor(nc)
152            self.icp_.fit(X_train, y_train)
153            self.icp_.calibrate(X_calibration, y_calibration)
154
155        # Calculate AIC
156        # Get predictions
157        preds = self.obj.predict(X_calibration)
158
159        # Calculate SSE
160        self.sse_ = np.sum((y_calibration - preds) ** 2)
161
162        # Get number of parameters from the base model
163        n_params = (
164            getattr(self.obj, "n_hidden_features", 0) + X_calibration.shape[1]
165        )
166
167        # Calculate AIC
168        n_samples = len(y_calibration)
169        temp = n_samples * np.log(self.sse_ / n_samples)
170        self.aic_ = temp + 2 * n_params
171        self.bic_ = temp + np.log(n_samples) * n_params
172
173        return self
174
175    def predict(self, X, return_pi=False):
176        """Obtain predictions and prediction intervals
177
178        Args:
179
180            X: array-like, shape = [n_samples, n_features];
181                Testing set vectors, where n_samples is the number
182                of samples and n_features is the number of features.
183
184            return_pi: boolean
185                Whether the prediction interval is returned or not.
186                Default is False, for compatibility with other _estimators_.
187                If True, a tuple containing the predictions + lower and upper
188                bounds is returned.
189
190        """
191
192        if self.method == "splitconformal":
193            pred = self.obj.predict(X)
194
195        if self.method == "localconformal":
196            pred = self.icp_.predict(X)
197
198        if self.method == "splitconformal":
199
200            if (
201                self.replications is None and self.type_pi is None
202            ):  # type_pi is not used here, no bootstrap or kde
203
204                if return_pi:
205
206                    DescribeResult = namedtuple(
207                        "DescribeResult", ("mean", "lower", "upper")
208                    )
209                    return DescribeResult(
210                        pred, pred - self.quantile_, pred + self.quantile_
211                    )
212
213                else:
214
215                    return pred
216
217            else:  # self.method == "splitconformal" and if self.replications is not None, type_pi must be used
218
219                if self.type_pi is None:
220                    self.type_pi = "kde"
221                    raise Warning("type_pi must be set, setting to 'kde'")
222
223                if self.replications is None:
224                    self.replications = 100
225                    raise Warning("replications must be set, setting to 100")
226
227                assert self.type_pi in (
228                    "bootstrap",
229                    "kde",
230                ), "`self.type_pi` must be in ('bootstrap', 'kde')"
231
232                if self.type_pi == "bootstrap":
233                    np.random.seed(self.seed)
234                    self.residuals_sims_ = np.asarray(
235                        [
236                            np.random.choice(
237                                a=self.scaled_calibrated_residuals_,
238                                size=X.shape[0],
239                            )
240                            for _ in range(self.replications)
241                        ]
242                    ).T
243                    self.sims_ = np.asarray(
244                        [
245                            pred
246                            + self.calibrated_residuals_scaler_.scale_[0]
247                            * self.residuals_sims_[:, i].ravel()
248                            for i in range(self.replications)
249                        ]
250                    ).T
251                elif self.type_pi == "kde":
252                    self.kde_ = gaussian_kde(
253                        dataset=self.scaled_calibrated_residuals_
254                    )
255                    self.sims_ = np.asarray(
256                        [
257                            pred
258                            + self.calibrated_residuals_scaler_.scale_[0]
259                            * self.kde_.resample(
260                                size=X.shape[0], seed=self.seed + i
261                            ).ravel()
262                            for i in range(self.replications)
263                        ]
264                    ).T
265
266                self.mean_ = np.mean(self.sims_, axis=1)
267                self.lower_ = np.quantile(
268                    self.sims_, q=self.alpha_ / 200, axis=1
269                )
270                self.upper_ = np.quantile(
271                    self.sims_, q=1 - self.alpha_ / 200, axis=1
272                )
273
274                DescribeResult = namedtuple(
275                    "DescribeResult", ("mean", "sims", "lower", "upper")
276                )
277
278                return DescribeResult(
279                    self.mean_, self.sims_, self.lower_, self.upper_
280                )
281
282        if self.method == "localconformal":
283
284            if self.replications is None:
285
286                if return_pi:
287
288                    predictions_bounds = self.icp_.predict(
289                        X, significance=1 - self.level
290                    )
291                    DescribeResult = namedtuple(
292                        "DescribeResult", ("mean", "lower", "upper")
293                    )
294                    return DescribeResult(
295                        pred, predictions_bounds[:, 0], predictions_bounds[:, 1]
296                    )
297
298                else:
299
300                    return pred
301
302            else:  # (self.method == "localconformal") and if self.replications is not None
303
304                raise NotImplementedError(
305                    "When self.method == 'localconformal', there are no simulations"
306                )

Class PredictionInterval: Obtain prediction intervals.

Attributes:

obj: an object;
    fitted object containing methods `fit` and `predict`

method: a string;
    method for constructing the prediction intervals.
    Currently "splitconformal" (default) and "localconformal"

level: a float;
    Confidence level for prediction intervals. Default is 95,
    equivalent to a miscoverage error of 5 (%)

replications: an integer;
    Number of replications for simulated conformal (default is `None`),
    for type_pi = "bootstrap" or "kde"

type_pi: a string;
    type of prediction interval: currently `None`
    (split conformal without simulation), "kde" or "bootstrap"

type_split: a string;
    "random" (random split of data) or "sequential" (sequential split of data)

seed: an integer;
    Reproducibility of fit (there's a random split between fitting and calibration data)
def fit(self, X, y, sample_weight=None, **kwargs):
 84    def fit(self, X, y, sample_weight=None, **kwargs):
 85        """Fit the `method` to training data (X, y).
 86
 87        Args:
 88
 89            X: array-like, shape = [n_samples, n_features];
 90                Training set vectors, where n_samples is the number
 91                of samples and n_features is the number of features.
 92
 93            y: array-like, shape = [n_samples, ]; Target values.
 94
 95            sample_weight: array-like, shape = [n_samples]
 96                Sample weights.
 97
 98        """
 99
100        if self.type_split == "random":
101
102            X_train, X_calibration, y_train, y_calibration = train_test_split(
103                X, y, test_size=0.5, random_state=self.seed
104            )
105
106        elif self.type_split == "sequential":
107
108            n_x = X.shape[0]
109            n_x_half = n_x // 2
110            first_half_idx = range(0, n_x_half)
111            second_half_idx = range(n_x_half, n_x)
112            X_train = X[first_half_idx, :]
113            X_calibration = X[second_half_idx, :]
114            y_train = y[first_half_idx]
115            y_calibration = y[second_half_idx]
116
117        if self.method == "splitconformal":
118
119            self.obj.fit(X_train, y_train)
120            preds_calibration = self.obj.predict(X_calibration)
121            self.calibrated_residuals_ = y_calibration - preds_calibration
122            absolute_residuals = np.abs(self.calibrated_residuals_)
123            self.calibrated_residuals_scaler_ = StandardScaler(
124                with_mean=True, with_std=True
125            )
126            self.scaled_calibrated_residuals_ = (
127                self.calibrated_residuals_scaler_.fit_transform(
128                    self.calibrated_residuals_.reshape(-1, 1)
129                ).ravel()
130            )
131            try:
132                # numpy version >= 1.22
133                self.quantile_ = np.quantile(
134                    a=absolute_residuals, q=self.level / 100, method="higher"
135                )
136            except Exception:
137                # numpy version < 1.22
138                self.quantile_ = np.quantile(
139                    a=absolute_residuals,
140                    q=self.level / 100,
141                    interpolation="higher",
142                )
143
144        if self.method == "localconformal":
145
146            mad_estimator = ExtraTreesRegressor()
147            normalizer = RegressorNormalizer(
148                self.obj, mad_estimator, AbsErrorErrFunc()
149            )
150            nc = RegressorNc(self.obj, AbsErrorErrFunc(), normalizer)
151            self.icp_ = IcpRegressor(nc)
152            self.icp_.fit(X_train, y_train)
153            self.icp_.calibrate(X_calibration, y_calibration)
154
155        # Calculate AIC
156        # Get predictions
157        preds = self.obj.predict(X_calibration)
158
159        # Calculate SSE
160        self.sse_ = np.sum((y_calibration - preds) ** 2)
161
162        # Get number of parameters from the base model
163        n_params = (
164            getattr(self.obj, "n_hidden_features", 0) + X_calibration.shape[1]
165        )
166
167        # Calculate AIC
168        n_samples = len(y_calibration)
169        temp = n_samples * np.log(self.sse_ / n_samples)
170        self.aic_ = temp + 2 * n_params
171        self.bic_ = temp + np.log(n_samples) * n_params
172
173        return self

Fit the method to training data (X, y).

Args:

X: array-like, shape = [n_samples, n_features];
    Training set vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples, ]; Target values.

sample_weight: array-like, shape = [n_samples]
    Sample weights.
def predict(self, X, return_pi=False):
175    def predict(self, X, return_pi=False):
176        """Obtain predictions and prediction intervals
177
178        Args:
179
180            X: array-like, shape = [n_samples, n_features];
181                Testing set vectors, where n_samples is the number
182                of samples and n_features is the number of features.
183
184            return_pi: boolean
185                Whether the prediction interval is returned or not.
186                Default is False, for compatibility with other _estimators_.
187                If True, a tuple containing the predictions + lower and upper
188                bounds is returned.
189
190        """
191
192        if self.method == "splitconformal":
193            pred = self.obj.predict(X)
194
195        if self.method == "localconformal":
196            pred = self.icp_.predict(X)
197
198        if self.method == "splitconformal":
199
200            if (
201                self.replications is None and self.type_pi is None
202            ):  # type_pi is not used here, no bootstrap or kde
203
204                if return_pi:
205
206                    DescribeResult = namedtuple(
207                        "DescribeResult", ("mean", "lower", "upper")
208                    )
209                    return DescribeResult(
210                        pred, pred - self.quantile_, pred + self.quantile_
211                    )
212
213                else:
214
215                    return pred
216
217            else:  # self.method == "splitconformal" and if self.replications is not None, type_pi must be used
218
219                if self.type_pi is None:
220                    self.type_pi = "kde"
221                    raise Warning("type_pi must be set, setting to 'kde'")
222
223                if self.replications is None:
224                    self.replications = 100
225                    raise Warning("replications must be set, setting to 100")
226
227                assert self.type_pi in (
228                    "bootstrap",
229                    "kde",
230                ), "`self.type_pi` must be in ('bootstrap', 'kde')"
231
232                if self.type_pi == "bootstrap":
233                    np.random.seed(self.seed)
234                    self.residuals_sims_ = np.asarray(
235                        [
236                            np.random.choice(
237                                a=self.scaled_calibrated_residuals_,
238                                size=X.shape[0],
239                            )
240                            for _ in range(self.replications)
241                        ]
242                    ).T
243                    self.sims_ = np.asarray(
244                        [
245                            pred
246                            + self.calibrated_residuals_scaler_.scale_[0]
247                            * self.residuals_sims_[:, i].ravel()
248                            for i in range(self.replications)
249                        ]
250                    ).T
251                elif self.type_pi == "kde":
252                    self.kde_ = gaussian_kde(
253                        dataset=self.scaled_calibrated_residuals_
254                    )
255                    self.sims_ = np.asarray(
256                        [
257                            pred
258                            + self.calibrated_residuals_scaler_.scale_[0]
259                            * self.kde_.resample(
260                                size=X.shape[0], seed=self.seed + i
261                            ).ravel()
262                            for i in range(self.replications)
263                        ]
264                    ).T
265
266                self.mean_ = np.mean(self.sims_, axis=1)
267                self.lower_ = np.quantile(
268                    self.sims_, q=self.alpha_ / 200, axis=1
269                )
270                self.upper_ = np.quantile(
271                    self.sims_, q=1 - self.alpha_ / 200, axis=1
272                )
273
274                DescribeResult = namedtuple(
275                    "DescribeResult", ("mean", "sims", "lower", "upper")
276                )
277
278                return DescribeResult(
279                    self.mean_, self.sims_, self.lower_, self.upper_
280                )
281
282        if self.method == "localconformal":
283
284            if self.replications is None:
285
286                if return_pi:
287
288                    predictions_bounds = self.icp_.predict(
289                        X, significance=1 - self.level
290                    )
291                    DescribeResult = namedtuple(
292                        "DescribeResult", ("mean", "lower", "upper")
293                    )
294                    return DescribeResult(
295                        pred, predictions_bounds[:, 0], predictions_bounds[:, 1]
296                    )
297
298                else:
299
300                    return pred
301
302            else:  # (self.method == "localconformal") and if self.replications is not None
303
304                raise NotImplementedError(
305                    "When self.method == 'localconformal', there are no simulations"
306                )

Obtain predictions and prediction intervals

Args:

X: array-like, shape = [n_samples, n_features];
    Testing set vectors, where n_samples is the number
    of samples and n_features is the number of features.

return_pi: boolean
    Whether the prediction interval is returned or not.
    Default is False, for compatibility with other _estimators_.
    If True, a tuple containing the predictions + lower and upper
    bounds is returned.
class SimpleMultitaskClassifier(nnetsauce.Base, sklearn.base.ClassifierMixin):
 18class SimpleMultitaskClassifier(Base, ClassifierMixin):
 19    """Multitask Classification model based on regression models, with shared covariates
 20
 21    Parameters:
 22
 23        obj: object
 24            any object (must be a regression model) containing a method fit (obj.fit())
 25            and a method predict (obj.predict())
 26
 27        seed: int
 28            reproducibility seed
 29
 30    Attributes:
 31
 32        fit_objs_: dict
 33            objects adjusted to each individual time series
 34
 35        n_classes_: int
 36            number of classes for the classifier
 37
 38    Examples:
 39
 40    ```python
 41    import nnetsauce as ns
 42    import numpy as np
 43    from sklearn.datasets import load_breast_cancer
 44    from sklearn.linear_model import LinearRegression
 45    from sklearn.model_selection import train_test_split
 46    from sklearn import metrics
 47    from time import time
 48
 49    breast_cancer = load_breast_cancer()
 50    Z = breast_cancer.data
 51    t = breast_cancer.target
 52
 53    X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2,
 54                                                        random_state=123+2*10)
 55
 56    # Linear Regression is used
 57    regr = LinearRegression()
 58    fit_obj = ns.SimpleMultitaskClassifier(regr)
 59
 60    start = time()
 61    fit_obj.fit(X_train, y_train)
 62    print(f"Elapsed {time() - start}")
 63
 64    print(fit_obj.score(X_test, y_test))
 65    print(fit_obj.score(X_test, y_test, scoring="roc_auc"))
 66
 67    start = time()
 68    preds = fit_obj.predict(X_test)
 69    print(f"Elapsed {time() - start}")
 70    print(metrics.classification_report(preds, y_test))
 71    ```
 72
 73    """
 74
 75    # construct the object -----
 76    _estimator_type = "classifier"
 77
 78    def __init__(
 79        self,
 80        obj,
 81    ):
 82        self.type_fit = "classification"
 83        self.obj = obj
 84        self.fit_objs_ = {}
 85        self.X_scaler_ = StandardScaler()
 86        self.scaled_X_ = None
 87
 88    def fit(self, X, y, sample_weight=None, **kwargs):
 89        """Fit SimpleMultitaskClassifier to training data (X, y).
 90
 91        Args:
 92
 93            X: {array-like}, shape = [n_samples, n_features]
 94                Training vectors, where n_samples is the number
 95                of samples and n_features is the number of features.
 96
 97            y: array-like, shape = [n_samples]
 98                Target values.
 99
100            **kwargs: additional parameters to be passed to
101                    self.cook_training_set or self.obj.fit
102
103        Returns:
104
105            self: object
106
107        """
108
109        assert mx.is_factor(y), "y must contain only integers"
110
111        self.classes_ = np.unique(y)  # for compatibility with sklearn
112        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
113
114        self.scaled_X_ = self.X_scaler_.fit_transform(X)
115
116        # multitask response
117        Y = mo.one_hot_encode2(y, self.n_classes_)
118
119        try:
120            for i in range(self.n_classes_):
121                self.fit_objs_[i] = deepcopy(
122                    self.obj.fit(
123                        self.scaled_X_,
124                        Y[:, i],
125                        sample_weight=sample_weight,
126                        **kwargs
127                    )
128                )
129        except Exception as e:
130            for i in range(self.n_classes_):
131                self.fit_objs_[i] = deepcopy(
132                    self.obj.fit(self.scaled_X_, Y[:, i], **kwargs)
133                )
134        return self
135
136    def predict(self, X, **kwargs):
137        """Predict test data X.
138
139        Args:
140
141            X: {array-like}, shape = [n_samples, n_features]
142                Training vectors, where n_samples is the number
143                of samples and n_features is the number of features.
144
145            **kwargs: additional parameters
146
147        Returns:
148
149            model predictions: {array-like}
150
151        """
152        return np.argmax(self.predict_proba(X, **kwargs), axis=1)
153
154    def predict_proba(self, X, **kwargs):
155        """Predict probabilities for test data X.
156
157        Args:
158
159            X: {array-like}, shape = [n_samples, n_features]
160                Training vectors, where n_samples is the number
161                of samples and n_features is the number of features.
162
163            **kwargs: additional parameters
164
165        Returns:
166
167            probability estimates for test data: {array-like}
168
169        """
170
171        shape_X = X.shape
172
173        probs = np.zeros((shape_X[0], self.n_classes_))
174
175        if len(shape_X) == 1:  # one example
176
177            n_features = shape_X[0]
178
179            new_X = mo.rbind(
180                X.reshape(1, n_features),
181                np.ones(n_features).reshape(1, n_features),
182            )
183
184            Z = self.X_scaler_.transform(new_X, **kwargs)
185
186            # Fallback to standard model
187            for i in range(self.n_classes_):
188                probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs)[0]
189
190        else:  # multiple rows
191
192            Z = self.X_scaler_.transform(X, **kwargs)
193
194            # Fallback to standard model
195            for i in range(self.n_classes_):
196                probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs)
197
198        expit_raw_probs = expit(probs)
199
200        # Add small epsilon to avoid division by zero
201        row_sums = expit_raw_probs.sum(axis=1)[:, None]
202        row_sums[row_sums < 1e-10] = 1e-10
203
204        return expit_raw_probs / row_sums
205
206    def decision_function(self, X, **kwargs):
207        """Compute the decision function of X.
208
209        Parameters:
210            X: {array-like}, shape = [n_samples, n_features]
211                Samples to compute decision function for.
212
213            **kwargs: additional parameters to be passed to
214                    self.cook_test_set
215
216        Returns:
217            array-like of shape (n_samples,) or (n_samples, n_classes)
218            Decision function of the input samples. The order of outputs is the same
219            as that of the classes passed to fit.
220        """
221        if not hasattr(self.obj, "decision_function"):
222            # If base classifier doesn't have decision_function, use predict_proba
223            proba = self.predict_proba(X, **kwargs)
224            if proba.shape[1] == 2:
225                return proba[:, 1]  # For binary classification
226            return proba  # For multiclass
227
228        if len(X.shape) == 1:
229            n_features = X.shape[0]
230            new_X = mo.rbind(
231                X.reshape(1, n_features),
232                np.ones(n_features).reshape(1, n_features),
233            )
234
235            return (
236                self.obj.decision_function(
237                    self.cook_test_set(new_X, **kwargs), **kwargs
238                )
239            )[0]
240
241        return self.obj.decision_function(
242            self.cook_test_set(X, **kwargs), **kwargs
243        )
244
245    @property
246    def _estimator_type(self):
247        return "classifier"

Multitask Classification model based on regression models, with shared covariates

Parameters:

obj: object
    any object (must be a regression model) containing a method fit (obj.fit())
    and a method predict (obj.predict())

seed: int
    reproducibility seed

Attributes:

fit_objs_: dict
    objects adjusted to each individual time series

n_classes_: int
    number of classes for the classifier

Examples:

import nnetsauce as ns
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics
from time import time

breast_cancer = load_breast_cancer()
Z = breast_cancer.data
t = breast_cancer.target

X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2,
                                                    random_state=123+2*10)

# Linear Regression is used
regr = LinearRegression()
fit_obj = ns.SimpleMultitaskClassifier(regr)

start = time()
fit_obj.fit(X_train, y_train)
print(f"Elapsed {time() - start}")

print(fit_obj.score(X_test, y_test))
print(fit_obj.score(X_test, y_test, scoring="roc_auc"))

start = time()
preds = fit_obj.predict(X_test)
print(f"Elapsed {time() - start}")
print(metrics.classification_report(preds, y_test))
def fit(self, X, y, sample_weight=None, **kwargs):
 88    def fit(self, X, y, sample_weight=None, **kwargs):
 89        """Fit SimpleMultitaskClassifier to training data (X, y).
 90
 91        Args:
 92
 93            X: {array-like}, shape = [n_samples, n_features]
 94                Training vectors, where n_samples is the number
 95                of samples and n_features is the number of features.
 96
 97            y: array-like, shape = [n_samples]
 98                Target values.
 99
100            **kwargs: additional parameters to be passed to
101                    self.cook_training_set or self.obj.fit
102
103        Returns:
104
105            self: object
106
107        """
108
109        assert mx.is_factor(y), "y must contain only integers"
110
111        self.classes_ = np.unique(y)  # for compatibility with sklearn
112        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
113
114        self.scaled_X_ = self.X_scaler_.fit_transform(X)
115
116        # multitask response
117        Y = mo.one_hot_encode2(y, self.n_classes_)
118
119        try:
120            for i in range(self.n_classes_):
121                self.fit_objs_[i] = deepcopy(
122                    self.obj.fit(
123                        self.scaled_X_,
124                        Y[:, i],
125                        sample_weight=sample_weight,
126                        **kwargs
127                    )
128                )
129        except Exception as e:
130            for i in range(self.n_classes_):
131                self.fit_objs_[i] = deepcopy(
132                    self.obj.fit(self.scaled_X_, Y[:, i], **kwargs)
133                )
134        return self

Fit SimpleMultitaskClassifier to training data (X, y).

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

**kwargs: additional parameters to be passed to
        self.cook_training_set or self.obj.fit

Returns:

self: object
def predict(self, X, **kwargs):
136    def predict(self, X, **kwargs):
137        """Predict test data X.
138
139        Args:
140
141            X: {array-like}, shape = [n_samples, n_features]
142                Training vectors, where n_samples is the number
143                of samples and n_features is the number of features.
144
145            **kwargs: additional parameters
146
147        Returns:
148
149            model predictions: {array-like}
150
151        """
152        return np.argmax(self.predict_proba(X, **kwargs), axis=1)

Predict test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters

Returns:

model predictions: {array-like}
def predict_proba(self, X, **kwargs):
154    def predict_proba(self, X, **kwargs):
155        """Predict probabilities for test data X.
156
157        Args:
158
159            X: {array-like}, shape = [n_samples, n_features]
160                Training vectors, where n_samples is the number
161                of samples and n_features is the number of features.
162
163            **kwargs: additional parameters
164
165        Returns:
166
167            probability estimates for test data: {array-like}
168
169        """
170
171        shape_X = X.shape
172
173        probs = np.zeros((shape_X[0], self.n_classes_))
174
175        if len(shape_X) == 1:  # one example
176
177            n_features = shape_X[0]
178
179            new_X = mo.rbind(
180                X.reshape(1, n_features),
181                np.ones(n_features).reshape(1, n_features),
182            )
183
184            Z = self.X_scaler_.transform(new_X, **kwargs)
185
186            # Fallback to standard model
187            for i in range(self.n_classes_):
188                probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs)[0]
189
190        else:  # multiple rows
191
192            Z = self.X_scaler_.transform(X, **kwargs)
193
194            # Fallback to standard model
195            for i in range(self.n_classes_):
196                probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs)
197
198        expit_raw_probs = expit(probs)
199
200        # Add small epsilon to avoid division by zero
201        row_sums = expit_raw_probs.sum(axis=1)[:, None]
202        row_sums[row_sums < 1e-10] = 1e-10
203
204        return expit_raw_probs / row_sums

Predict probabilities for test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters

Returns:

probability estimates for test data: {array-like}
class Optimizer:
  9class Optimizer:
 10    """Optimizer class
 11
 12    Attributes:
 13
 14        type_optim: str
 15            type of optimizer, (currently) either 'sgd' (stochastic minibatch gradient descent)
 16            or 'scd' (stochastic minibatch coordinate descent)
 17
 18        num_iters: int
 19            number of iterations of the optimizer
 20
 21        learning_rate: float
 22            step size
 23
 24        batch_prop: float
 25            proportion of the initial data used at each optimization step
 26
 27        learning_method: str
 28            "poly" - learning rate decreasing as a polynomial function
 29            of # of iterations (default)
 30            "exp" - learning rate decreasing as an exponential function
 31            of # of iterations
 32            "momentum" - gradient descent using momentum
 33
 34        randomization: str
 35            type of randomization applied at each step
 36            "strat" - stratified subsampling (default)
 37            "shuffle" - random subsampling
 38
 39        mass: float
 40            mass on velocity, for `method` == "momentum"
 41
 42        decay: float
 43            coefficient of decrease of the learning rate for
 44            `method` == "poly" and `method` == "exp"
 45
 46        tolerance: float
 47            early stopping parameter (convergence of loss function)
 48
 49        verbose: int
 50            controls verbosity of gradient descent
 51            0 - nothing is printed
 52            1 - a progress bar is printed
 53            2 - successive loss function values are printed
 54
 55    """
 56
 57    # construct the object -----
 58
 59    def __init__(
 60        self,
 61        type_optim="sgd",
 62        num_iters=100,
 63        learning_rate=0.01,
 64        batch_prop=1.0,
 65        learning_method="momentum",
 66        randomization="strat",
 67        mass=0.9,
 68        decay=0.1,
 69        tolerance=1e-3,
 70        verbose=1,
 71    ):
 72        self.type_optim = type_optim
 73        self.num_iters = num_iters
 74        self.learning_rate = learning_rate
 75        self.batch_prop = batch_prop
 76        self.learning_method = learning_method
 77        self.randomization = randomization
 78        self.mass = mass
 79        self.decay = decay
 80        self.tolerance = tolerance
 81        self.verbose = verbose
 82        self.opt = None
 83
 84    def fit(self, loss_func, response, x0, q=None, **kwargs):
 85        """Fit GLM model to training data (X, y).
 86
 87        Args:
 88
 89            loss_func: loss function
 90
 91            response: array-like, shape = [n_samples]
 92            target variable (used for subsampling)
 93
 94            x0: array-like, shape = [n_features]
 95                initial value provided to the optimizer
 96
 97            **kwargs: additional parameters to be passed to
 98                    loss function
 99
100        Returns:
101
102            self: object
103
104        """
105
106        if self.type_optim == "scd":
107            self.results = scd(
108                loss_func,
109                response=response,
110                x=x0,
111                num_iters=self.num_iters,
112                batch_prop=self.batch_prop,
113                learning_rate=self.learning_rate,
114                learning_method=self.learning_method,
115                mass=self.mass,
116                decay=self.decay,
117                randomization=self.randomization,
118                tolerance=self.tolerance,
119                verbose=self.verbose,
120                **kwargs
121            )
122
123        if self.type_optim == "sgd":
124            self.results = sgd(
125                loss_func,
126                response=response,
127                x=x0,
128                num_iters=self.num_iters,
129                batch_prop=self.batch_prop,
130                learning_rate=self.learning_rate,
131                learning_method=self.learning_method,
132                mass=self.mass,
133                decay=self.decay,
134                randomization=self.randomization,
135                tolerance=self.tolerance,
136                verbose=self.verbose,
137                **kwargs
138            )
139
140        return self
141
142    def one_hot_encode(self, y, n_classes):
143        return one_hot_encode(y, n_classes)

Optimizer class

Attributes:

type_optim: str
    type of optimizer, (currently) either 'sgd' (stochastic minibatch gradient descent)
    or 'scd' (stochastic minibatch coordinate descent)

num_iters: int
    number of iterations of the optimizer

learning_rate: float
    step size

batch_prop: float
    proportion of the initial data used at each optimization step

learning_method: str
    "poly" - learning rate decreasing as a polynomial function
    of # of iterations (default)
    "exp" - learning rate decreasing as an exponential function
    of # of iterations
    "momentum" - gradient descent using momentum

randomization: str
    type of randomization applied at each step
    "strat" - stratified subsampling (default)
    "shuffle" - random subsampling

mass: float
    mass on velocity, for `method` == "momentum"

decay: float
    coefficient of decrease of the learning rate for
    `method` == "poly" and `method` == "exp"

tolerance: float
    early stopping parameter (convergence of loss function)

verbose: int
    controls verbosity of gradient descent
    0 - nothing is printed
    1 - a progress bar is printed
    2 - successive loss function values are printed
def fit(self, loss_func, response, x0, q=None, **kwargs):
 84    def fit(self, loss_func, response, x0, q=None, **kwargs):
 85        """Fit GLM model to training data (X, y).
 86
 87        Args:
 88
 89            loss_func: loss function
 90
 91            response: array-like, shape = [n_samples]
 92            target variable (used for subsampling)
 93
 94            x0: array-like, shape = [n_features]
 95                initial value provided to the optimizer
 96
 97            **kwargs: additional parameters to be passed to
 98                    loss function
 99
100        Returns:
101
102            self: object
103
104        """
105
106        if self.type_optim == "scd":
107            self.results = scd(
108                loss_func,
109                response=response,
110                x=x0,
111                num_iters=self.num_iters,
112                batch_prop=self.batch_prop,
113                learning_rate=self.learning_rate,
114                learning_method=self.learning_method,
115                mass=self.mass,
116                decay=self.decay,
117                randomization=self.randomization,
118                tolerance=self.tolerance,
119                verbose=self.verbose,
120                **kwargs
121            )
122
123        if self.type_optim == "sgd":
124            self.results = sgd(
125                loss_func,
126                response=response,
127                x=x0,
128                num_iters=self.num_iters,
129                batch_prop=self.batch_prop,
130                learning_rate=self.learning_rate,
131                learning_method=self.learning_method,
132                mass=self.mass,
133                decay=self.decay,
134                randomization=self.randomization,
135                tolerance=self.tolerance,
136                verbose=self.verbose,
137                **kwargs
138            )
139
140        return self

Fit GLM model to training data (X, y).

Args:

loss_func: loss function

response: array-like, shape = [n_samples]
target variable (used for subsampling)

x0: array-like, shape = [n_features]
    initial value provided to the optimizer

**kwargs: additional parameters to be passed to
        loss function

Returns:

self: object
class QuantileRegressor(sklearn.base.BaseEstimator, sklearn.base.RegressorMixin):
 37class QuantileRegressor(BaseEstimator, RegressorMixin):
 38    """
 39    Quantile Regressor.
 40
 41    Parameters:
 42
 43        obj: base model (regression model)
 44            The base regressor from which to build a
 45            quantile regressor.
 46
 47        level: int, default=95
 48            The level of the quantiles to compute.
 49
 50        scoring: str, default="predictions"
 51            The scoring to use for the optimization and constructing
 52            prediction intervals (predictions, residuals, conformal,
 53              studentized, conformal-studentized).
 54
 55    Attributes:
 56
 57        obj_ : base model (regression model)
 58            The base regressor from which to build a
 59            quantile regressor.
 60
 61        offset_multipliers_ : list
 62            The multipliers for the offset.
 63
 64        scoring_residuals_ : list
 65            The residuals for the scoring.
 66
 67        student_multiplier_ : float
 68            The multiplier for the student.
 69
 70    """
 71
 72    def __init__(self, obj, level=95, scoring="predictions"):
 73        assert scoring in (
 74            "predictions",
 75            "residuals",
 76            "conformal",
 77            "studentized",
 78            "conformal-studentized",
 79        ), "scoring must be 'predictions' or 'residuals'"
 80        self.obj = obj
 81        low_risk_level = (1 - level / 100) / 2
 82        self.quantiles = [low_risk_level, 0.5, 1 - low_risk_level]
 83        self.scoring = scoring
 84        self.offset_multipliers_ = None
 85        self.obj_ = None
 86        self.scoring_residuals_ = None
 87        self.student_multiplier_ = None
 88
 89    def _compute_quantile_loss(self, residuals, quantile):
 90        """
 91        Compute the quantile loss for a given set of residuals and quantile.
 92        """
 93        return np.mean(
 94            residuals
 95            * (quantile * (residuals >= 0) + (quantile - 1) * (residuals < 0))
 96        )
 97
 98    def _optimize_multiplier(
 99        self,
100        y,
101        base_predictions,
102        prev_predictions,
103        scoring_residuals=None,
104        quantile=0.5,
105    ):
106        """
107        Optimize the multiplier for a given quantile.
108        """
109        if not 0 < quantile < 1:
110            raise ValueError("Quantile should be between 0 and 1.")
111
112        n = len(y)
113
114        def objective(log_multiplier):
115            """
116            Objective function for optimization.
117            """
118            # Convert to positive multiplier using exp
119            multiplier = np.exp(log_multiplier[0])
120            if self.scoring == "predictions":
121                assert (
122                    base_predictions is not None
123                ), "base_predictions must be not None"
124                # Calculate predictions
125                if prev_predictions is None:
126                    # For first quantile, subtract from conditional expectation
127                    predictions = base_predictions - multiplier * np.abs(
128                        base_predictions
129                    )
130                else:
131                    # For other quantiles, add to previous quantile
132                    offset = multiplier * np.abs(base_predictions)
133                    predictions = prev_predictions + offset
134            elif self.scoring in ("residuals", "conformal"):
135                assert (
136                    scoring_residuals is not None
137                ), "scoring_residuals must be not None"
138                # print("scoring_residuals", scoring_residuals)
139                # Calculate predictions
140                if prev_predictions is None:
141                    # For first quantile, subtract from conditional expectation
142                    predictions = base_predictions - multiplier * np.std(
143                        scoring_residuals
144                    ) / np.sqrt(len(scoring_residuals))
145                    # print("predictions", predictions)
146                else:
147                    # For other quantiles, add to previous quantile
148                    offset = (
149                        multiplier
150                        * np.std(scoring_residuals)
151                        / np.sqrt(len(scoring_residuals))
152                    )
153                    predictions = prev_predictions + offset
154            elif self.scoring in ("studentized", "conformal-studentized"):
155                assert (
156                    scoring_residuals is not None
157                ), "scoring_residuals must be not None"
158                # Calculate predictions
159                if prev_predictions is None:
160                    # For first quantile, subtract from conditional expectation
161                    predictions = (
162                        base_predictions - multiplier * self.student_multiplier_
163                    )
164                    # print("predictions", predictions)
165                else:
166                    # For other quantiles, add to previous quantile
167                    offset = multiplier * self.student_multiplier_
168                    predictions = prev_predictions + offset
169            else:
170                raise ValueError("Invalid argument 'scoring'")
171
172            return self._compute_quantile_loss(y - predictions, quantile)
173
174        # Optimize in log space for numerical stability
175        # bounds = [(-10, 10)]  # log space bounds
176        bounds = [(-100, 100)]  # log space bounds
177        result = differential_evolution(
178            objective,
179            bounds,
180            # popsize=15,
181            # maxiter=100,
182            # tol=1e-4,
183            popsize=25,
184            maxiter=200,
185            tol=1e-6,
186            disp=False,
187        )
188
189        return np.exp(result.x[0])
190
191    def fit(self, X, y):
192        """Fit the model to the data.
193
194        Parameters:
195
196            X: {array-like}, shape = [n_samples, n_features]
197                Training vectors, where n_samples is the number of samples and
198                n_features is the number of features.
199            y: array-like, shape = [n_samples]
200                Target values.
201        """
202        self.obj_ = clone(self.obj)
203
204        if self.scoring in ("predictions", "residuals"):
205
206            self.obj_.fit(X, y)
207            base_predictions = self.obj_.predict(X)
208            scoring_residuals = y - base_predictions
209            self.scoring_residuals_ = scoring_residuals
210
211        elif self.scoring == "conformal":
212
213            X_train, X_calib, y_train, y_calib = train_test_split(
214                X, y, test_size=0.5, random_state=42
215            )
216            self.obj_.fit(X_train, y_train)
217            scoring_residuals = y_calib - self.obj_.predict(
218                X_calib
219            )  # These are calibration predictions
220            self.scoring_residuals_ = scoring_residuals
221            # Update base_predictions to use training predictions for optimization
222            self.obj_.fit(X_calib, y_calib)
223            base_predictions = self.obj_.predict(X_calib)
224
225        elif self.scoring in ("studentized", "conformal-studentized"):
226
227            # Calculate student multiplier
228            if self.scoring == "conformal-studentized":
229                X_train, X_calib, y_train, y_calib = train_test_split(
230                    X, y, test_size=0.5, random_state=42
231                )
232                self.obj_.fit(X_train, y_train)
233                scoring_residuals = y_calib - self.obj_.predict(X_calib)
234                # Calculate studentized multiplier using calibration data
235                self.student_multiplier_ = np.std(y_calib, ddof=1) / np.sqrt(
236                    len(y_calib) - 1
237                )
238                self.obj_.fit(X_calib, y_calib)
239                base_predictions = self.obj_.predict(X_calib)
240            else:  # regular studentized
241                self.obj_.fit(X, y)
242                base_predictions = self.obj_.predict(X)
243                scoring_residuals = y - base_predictions
244                self.student_multiplier_ = np.std(y, ddof=1) / np.sqrt(
245                    len(y) - 1
246                )
247
248        # Initialize storage for multipliers
249        self.offset_multipliers_ = []
250        # Keep track of current predictions for each quantile
251        current_predictions = None
252
253        # Fit each quantile sequentially
254        for i, quantile in enumerate(self.quantiles):
255
256            if self.scoring == "predictions":
257
258                multiplier = self._optimize_multiplier(
259                    y=y,
260                    base_predictions=base_predictions,
261                    prev_predictions=current_predictions,
262                    quantile=quantile,
263                )
264
265                self.offset_multipliers_.append(multiplier)
266
267                # Update current predictions
268                if current_predictions is None:
269                    # First quantile (lowest)
270                    current_predictions = (
271                        base_predictions - multiplier * np.abs(base_predictions)
272                    )
273                else:
274                    # Subsequent quantiles
275                    offset = multiplier * np.abs(base_predictions)
276                    current_predictions = current_predictions + offset
277
278            elif self.scoring == "residuals":
279
280                multiplier = self._optimize_multiplier(
281                    y=y,
282                    base_predictions=base_predictions,
283                    scoring_residuals=scoring_residuals,
284                    prev_predictions=current_predictions,
285                    quantile=quantile,
286                )
287
288                self.offset_multipliers_.append(multiplier)
289
290                # Update current predictions
291                if current_predictions is None:
292                    # First quantile (lowest)
293                    current_predictions = (
294                        base_predictions
295                        - multiplier
296                        * np.std(scoring_residuals)
297                        / np.sqrt(len(scoring_residuals))
298                    )
299                else:
300                    # Subsequent quantiles
301                    offset = (
302                        multiplier
303                        * np.std(scoring_residuals)
304                        / np.sqrt(len(scoring_residuals))
305                    )
306                    current_predictions = current_predictions + offset
307
308            elif self.scoring == "conformal":
309
310                multiplier = self._optimize_multiplier(
311                    y=y_calib,
312                    base_predictions=base_predictions,
313                    scoring_residuals=scoring_residuals,
314                    prev_predictions=current_predictions,
315                    quantile=quantile,
316                )
317
318                self.offset_multipliers_.append(multiplier)
319
320                # Update current predictions
321                if current_predictions is None:
322                    # First quantile (lowest)
323                    current_predictions = (
324                        base_predictions
325                        - multiplier
326                        * np.std(scoring_residuals)
327                        / np.sqrt(len(scoring_residuals))
328                    )
329                else:
330                    # Subsequent quantiles
331                    offset = (
332                        multiplier
333                        * np.std(scoring_residuals)
334                        / np.sqrt(len(scoring_residuals))
335                    )
336                    current_predictions = current_predictions + offset
337
338            elif self.scoring in ("studentized", "conformal-studentized"):
339
340                multiplier = self._optimize_multiplier(
341                    y=y_calib if self.scoring == "conformal-studentized" else y,
342                    base_predictions=base_predictions,
343                    scoring_residuals=scoring_residuals,
344                    prev_predictions=current_predictions,
345                    quantile=quantile,
346                )
347
348                self.offset_multipliers_.append(multiplier)
349
350                # Update current predictions
351                if current_predictions is None:
352                    current_predictions = (
353                        base_predictions - multiplier * self.student_multiplier_
354                    )
355                else:
356                    offset = multiplier * self.student_multiplier_
357                    current_predictions = current_predictions + offset
358
359        return self
360
361    def predict(self, X, return_pi=False):
362        """Predict the target variable.
363
364        Parameters:
365
366            X: {array-like}, shape = [n_samples, n_features]
367                Training vectors, where n_samples is the number of samples and
368                n_features is the number of features.
369
370            return_pi: bool, default=True
371                Whether to return the prediction intervals.
372        """
373        if self.obj_ is None or self.offset_multipliers_ is None:
374            raise ValueError("Model not fitted yet.")
375
376        base_predictions = self.obj_.predict(X)
377        all_predictions = []
378
379        if self.scoring == "predictions":
380
381            # Generate first quantile
382            current_predictions = base_predictions - self.offset_multipliers_[
383                0
384            ] * np.abs(base_predictions)
385            all_predictions.append(current_predictions)
386
387            # Generate remaining quantiles
388            for multiplier in self.offset_multipliers_[1:]:
389                offset = multiplier * np.abs(base_predictions)
390                current_predictions = current_predictions + offset
391                all_predictions.append(current_predictions)
392
393        elif self.scoring in ("residuals", "conformal"):
394
395            # Generate first quantile
396            current_predictions = base_predictions - self.offset_multipliers_[
397                0
398            ] * np.std(self.scoring_residuals_) / np.sqrt(
399                len(self.scoring_residuals_)
400            )
401            all_predictions.append(current_predictions)
402
403            # Generate remaining quantiles
404            for multiplier in self.offset_multipliers_[1:]:
405                offset = (
406                    multiplier
407                    * np.std(self.scoring_residuals_)
408                    / np.sqrt(len(self.scoring_residuals_))
409                )
410                current_predictions = current_predictions + offset
411                all_predictions.append(current_predictions)
412
413        elif self.scoring in ("studentized", "conformal-studentized"):
414            # Generate first quantile
415            current_predictions = (
416                base_predictions
417                - self.offset_multipliers_[0] * self.student_multiplier_
418            )
419            all_predictions.append(current_predictions)
420
421            # Generate remaining quantiles
422            for multiplier in self.offset_multipliers_[1:]:
423                offset = multiplier * self.student_multiplier_
424                current_predictions = current_predictions + offset
425                all_predictions.append(current_predictions)
426
427        if return_pi == False:
428            return np.asarray(all_predictions[1])
429
430        DescribeResult = namedtuple(
431            "DecribeResult", ["mean", "lower", "upper", "median"]
432        )
433        DescribeResult.mean = base_predictions
434        DescribeResult.lower = np.asarray(all_predictions[0])
435        DescribeResult.median = np.asarray(all_predictions[1])
436        DescribeResult.upper = np.asarray(all_predictions[2])
437
438        return DescribeResult

Quantile Regressor.

Parameters:

obj: base model (regression model)
    The base regressor from which to build a
    quantile regressor.

level: int, default=95
    The level of the quantiles to compute.

scoring: str, default="predictions"
    The scoring to use for the optimization and constructing
    prediction intervals (predictions, residuals, conformal,
      studentized, conformal-studentized).

Attributes:

obj_ : base model (regression model)
    The base regressor from which to build a
    quantile regressor.

offset_multipliers_ : list
    The multipliers for the offset.

scoring_residuals_ : list
    The residuals for the scoring.

student_multiplier_ : float
    The multiplier for the student.
def fit(self, X, y):
191    def fit(self, X, y):
192        """Fit the model to the data.
193
194        Parameters:
195
196            X: {array-like}, shape = [n_samples, n_features]
197                Training vectors, where n_samples is the number of samples and
198                n_features is the number of features.
199            y: array-like, shape = [n_samples]
200                Target values.
201        """
202        self.obj_ = clone(self.obj)
203
204        if self.scoring in ("predictions", "residuals"):
205
206            self.obj_.fit(X, y)
207            base_predictions = self.obj_.predict(X)
208            scoring_residuals = y - base_predictions
209            self.scoring_residuals_ = scoring_residuals
210
211        elif self.scoring == "conformal":
212
213            X_train, X_calib, y_train, y_calib = train_test_split(
214                X, y, test_size=0.5, random_state=42
215            )
216            self.obj_.fit(X_train, y_train)
217            scoring_residuals = y_calib - self.obj_.predict(
218                X_calib
219            )  # These are calibration predictions
220            self.scoring_residuals_ = scoring_residuals
221            # Update base_predictions to use training predictions for optimization
222            self.obj_.fit(X_calib, y_calib)
223            base_predictions = self.obj_.predict(X_calib)
224
225        elif self.scoring in ("studentized", "conformal-studentized"):
226
227            # Calculate student multiplier
228            if self.scoring == "conformal-studentized":
229                X_train, X_calib, y_train, y_calib = train_test_split(
230                    X, y, test_size=0.5, random_state=42
231                )
232                self.obj_.fit(X_train, y_train)
233                scoring_residuals = y_calib - self.obj_.predict(X_calib)
234                # Calculate studentized multiplier using calibration data
235                self.student_multiplier_ = np.std(y_calib, ddof=1) / np.sqrt(
236                    len(y_calib) - 1
237                )
238                self.obj_.fit(X_calib, y_calib)
239                base_predictions = self.obj_.predict(X_calib)
240            else:  # regular studentized
241                self.obj_.fit(X, y)
242                base_predictions = self.obj_.predict(X)
243                scoring_residuals = y - base_predictions
244                self.student_multiplier_ = np.std(y, ddof=1) / np.sqrt(
245                    len(y) - 1
246                )
247
248        # Initialize storage for multipliers
249        self.offset_multipliers_ = []
250        # Keep track of current predictions for each quantile
251        current_predictions = None
252
253        # Fit each quantile sequentially
254        for i, quantile in enumerate(self.quantiles):
255
256            if self.scoring == "predictions":
257
258                multiplier = self._optimize_multiplier(
259                    y=y,
260                    base_predictions=base_predictions,
261                    prev_predictions=current_predictions,
262                    quantile=quantile,
263                )
264
265                self.offset_multipliers_.append(multiplier)
266
267                # Update current predictions
268                if current_predictions is None:
269                    # First quantile (lowest)
270                    current_predictions = (
271                        base_predictions - multiplier * np.abs(base_predictions)
272                    )
273                else:
274                    # Subsequent quantiles
275                    offset = multiplier * np.abs(base_predictions)
276                    current_predictions = current_predictions + offset
277
278            elif self.scoring == "residuals":
279
280                multiplier = self._optimize_multiplier(
281                    y=y,
282                    base_predictions=base_predictions,
283                    scoring_residuals=scoring_residuals,
284                    prev_predictions=current_predictions,
285                    quantile=quantile,
286                )
287
288                self.offset_multipliers_.append(multiplier)
289
290                # Update current predictions
291                if current_predictions is None:
292                    # First quantile (lowest)
293                    current_predictions = (
294                        base_predictions
295                        - multiplier
296                        * np.std(scoring_residuals)
297                        / np.sqrt(len(scoring_residuals))
298                    )
299                else:
300                    # Subsequent quantiles
301                    offset = (
302                        multiplier
303                        * np.std(scoring_residuals)
304                        / np.sqrt(len(scoring_residuals))
305                    )
306                    current_predictions = current_predictions + offset
307
308            elif self.scoring == "conformal":
309
310                multiplier = self._optimize_multiplier(
311                    y=y_calib,
312                    base_predictions=base_predictions,
313                    scoring_residuals=scoring_residuals,
314                    prev_predictions=current_predictions,
315                    quantile=quantile,
316                )
317
318                self.offset_multipliers_.append(multiplier)
319
320                # Update current predictions
321                if current_predictions is None:
322                    # First quantile (lowest)
323                    current_predictions = (
324                        base_predictions
325                        - multiplier
326                        * np.std(scoring_residuals)
327                        / np.sqrt(len(scoring_residuals))
328                    )
329                else:
330                    # Subsequent quantiles
331                    offset = (
332                        multiplier
333                        * np.std(scoring_residuals)
334                        / np.sqrt(len(scoring_residuals))
335                    )
336                    current_predictions = current_predictions + offset
337
338            elif self.scoring in ("studentized", "conformal-studentized"):
339
340                multiplier = self._optimize_multiplier(
341                    y=y_calib if self.scoring == "conformal-studentized" else y,
342                    base_predictions=base_predictions,
343                    scoring_residuals=scoring_residuals,
344                    prev_predictions=current_predictions,
345                    quantile=quantile,
346                )
347
348                self.offset_multipliers_.append(multiplier)
349
350                # Update current predictions
351                if current_predictions is None:
352                    current_predictions = (
353                        base_predictions - multiplier * self.student_multiplier_
354                    )
355                else:
356                    offset = multiplier * self.student_multiplier_
357                    current_predictions = current_predictions + offset
358
359        return self

Fit the model to the data.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number of samples and
    n_features is the number of features.
y: array-like, shape = [n_samples]
    Target values.
def predict(self, X, return_pi=False):
361    def predict(self, X, return_pi=False):
362        """Predict the target variable.
363
364        Parameters:
365
366            X: {array-like}, shape = [n_samples, n_features]
367                Training vectors, where n_samples is the number of samples and
368                n_features is the number of features.
369
370            return_pi: bool, default=True
371                Whether to return the prediction intervals.
372        """
373        if self.obj_ is None or self.offset_multipliers_ is None:
374            raise ValueError("Model not fitted yet.")
375
376        base_predictions = self.obj_.predict(X)
377        all_predictions = []
378
379        if self.scoring == "predictions":
380
381            # Generate first quantile
382            current_predictions = base_predictions - self.offset_multipliers_[
383                0
384            ] * np.abs(base_predictions)
385            all_predictions.append(current_predictions)
386
387            # Generate remaining quantiles
388            for multiplier in self.offset_multipliers_[1:]:
389                offset = multiplier * np.abs(base_predictions)
390                current_predictions = current_predictions + offset
391                all_predictions.append(current_predictions)
392
393        elif self.scoring in ("residuals", "conformal"):
394
395            # Generate first quantile
396            current_predictions = base_predictions - self.offset_multipliers_[
397                0
398            ] * np.std(self.scoring_residuals_) / np.sqrt(
399                len(self.scoring_residuals_)
400            )
401            all_predictions.append(current_predictions)
402
403            # Generate remaining quantiles
404            for multiplier in self.offset_multipliers_[1:]:
405                offset = (
406                    multiplier
407                    * np.std(self.scoring_residuals_)
408                    / np.sqrt(len(self.scoring_residuals_))
409                )
410                current_predictions = current_predictions + offset
411                all_predictions.append(current_predictions)
412
413        elif self.scoring in ("studentized", "conformal-studentized"):
414            # Generate first quantile
415            current_predictions = (
416                base_predictions
417                - self.offset_multipliers_[0] * self.student_multiplier_
418            )
419            all_predictions.append(current_predictions)
420
421            # Generate remaining quantiles
422            for multiplier in self.offset_multipliers_[1:]:
423                offset = multiplier * self.student_multiplier_
424                current_predictions = current_predictions + offset
425                all_predictions.append(current_predictions)
426
427        if return_pi == False:
428            return np.asarray(all_predictions[1])
429
430        DescribeResult = namedtuple(
431            "DecribeResult", ["mean", "lower", "upper", "median"]
432        )
433        DescribeResult.mean = base_predictions
434        DescribeResult.lower = np.asarray(all_predictions[0])
435        DescribeResult.median = np.asarray(all_predictions[1])
436        DescribeResult.upper = np.asarray(all_predictions[2])
437
438        return DescribeResult

Predict the target variable.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number of samples and
    n_features is the number of features.

return_pi: bool, default=True
    Whether to return the prediction intervals.
class QuantileClassifier(sklearn.base.BaseEstimator, sklearn.base.ClassifierMixin):
 43class QuantileClassifier(BaseEstimator, ClassifierMixin):
 44    """
 45    Quantile Classifier.
 46
 47    Parameters:
 48
 49        obj: base model (classification model)
 50            The base classifier from which to build a
 51            quantile classifier.
 52
 53        level: int, default=95
 54            The level of the quantiles to compute.
 55
 56        scoring: str, default="predictions"
 57            The scoring to use for the optimization and constructing
 58            prediction intervals (predictions, residuals, conformal,
 59              studentized, conformal-studentized).
 60
 61    Attributes:
 62
 63        obj_ : base model (classification model)
 64            The base classifier from which to build a
 65            quantile classifier.
 66
 67        offset_multipliers_ : list
 68            The multipliers for the offset.
 69
 70        scoring_residuals_ : list
 71            The residuals for the scoring.
 72
 73        student_multiplier_ : float
 74            The multiplier for the student.
 75
 76
 77    """
 78
 79    def __init__(self, obj, level=95, scoring="predictions"):
 80        assert scoring in (
 81            "predictions",
 82            "residuals",
 83            "conformal",
 84            "studentized",
 85            "conformal-studentized",
 86        ), "scoring must be 'predictions' or 'residuals'"
 87        self.obj = obj
 88        quantileregressor = QuantileRegressor(self.obj)
 89        quantileregressor.predict = partial(
 90            quantileregressor.predict, return_pi=False
 91        )
 92        self.obj_ = SimpleMultitaskClassifier(quantileregressor)
 93
 94    def fit(self, X, y, **kwargs):
 95        self.obj_.fit(X, y, **kwargs)
 96
 97    def predict(self, X, **kwargs):
 98        return self.obj_.predict(X, **kwargs)
 99
100    def predict_proba(self, X, **kwargs):
101        return self.obj_.predict_proba(X, **kwargs)

Quantile Classifier.

Parameters:

obj: base model (classification model)
    The base classifier from which to build a
    quantile classifier.

level: int, default=95
    The level of the quantiles to compute.

scoring: str, default="predictions"
    The scoring to use for the optimization and constructing
    prediction intervals (predictions, residuals, conformal,
      studentized, conformal-studentized).

Attributes:

obj_ : base model (classification model)
    The base classifier from which to build a
    quantile classifier.

offset_multipliers_ : list
    The multipliers for the offset.

scoring_residuals_ : list
    The residuals for the scoring.

student_multiplier_ : float
    The multiplier for the student.
def fit(self, X, y, **kwargs):
94    def fit(self, X, y, **kwargs):
95        self.obj_.fit(X, y, **kwargs)
def predict(self, X, **kwargs):
97    def predict(self, X, **kwargs):
98        return self.obj_.predict(X, **kwargs)
def predict_proba(self, X, **kwargs):
100    def predict_proba(self, X, **kwargs):
101        return self.obj_.predict_proba(X, **kwargs)
class RandomBagRegressor(nnetsauce.randombag.bag.RandomBag, sklearn.base.RegressorMixin):
 18class RandomBagRegressor(RandomBag, RegressorMixin):
 19    """Randomized 'Bagging' Regression model
 20
 21    Parameters:
 22
 23        obj: object
 24            any object containing a method fit (obj.fit()) and a method predict
 25            (obj.predict())
 26
 27        n_estimators: int
 28            number of boosting iterations
 29
 30        n_hidden_features: int
 31            number of nodes in the hidden layer
 32
 33        activation_name: str
 34            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
 35
 36        a: float
 37            hyperparameter for 'prelu' or 'elu' activation function
 38
 39        nodes_sim: str
 40            type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
 41            'uniform'
 42
 43        bias: boolean
 44            indicates if the hidden layer contains a bias term (True) or not
 45            (False)
 46
 47        dropout: float
 48            regularization parameter; (random) percentage of nodes dropped out
 49            of the training
 50
 51        direct_link: boolean
 52            indicates if the original predictors are included (True) in model''s
 53            fitting or not (False)
 54
 55        n_clusters: int
 56            number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
 57                no clustering)
 58
 59        cluster_encode: bool
 60            defines how the variable containing clusters is treated (default is one-hot)
 61            if `False`, then labels are used, without one-hot encoding
 62
 63        type_clust: str
 64            type of clustering method: currently k-means ('kmeans') or Gaussian
 65            Mixture Model ('gmm')
 66
 67        type_scaling: a tuple of 3 strings
 68            scaling methods for inputs, hidden layer, and clustering respectively
 69            (and when relevant).
 70            Currently available: standardization ('std') or MinMax scaling ('minmax')
 71
 72        col_sample: float
 73            percentage of covariates randomly chosen for training
 74
 75        row_sample: float
 76            percentage of rows chosen for training, by stratified bootstrapping
 77
 78        seed: int
 79            reproducibility seed for nodes_sim=='uniform'
 80
 81        backend: str
 82            "cpu" or "gpu" or "tpu"
 83
 84    Attributes:
 85
 86        voter_: dict
 87            dictionary containing all the fitted base-learners
 88
 89
 90    Examples:
 91
 92    ```python
 93    import numpy as np
 94    import nnetsauce as ns
 95    from sklearn.datasets import fetch_california_housing
 96    from sklearn.tree import DecisionTreeRegressor
 97    from sklearn.model_selection import train_test_split
 98
 99    X, y = fetch_california_housing(return_X_y=True, as_frame=False)
100
101    # split data into training test and test set
102    X_train, X_test, y_train, y_test = train_test_split(X, y,
103                                                        test_size=0.2, random_state=13)
104
105    # Requires further tuning
106    obj = DecisionTreeRegressor(max_depth=3, random_state=123)
107    obj2 = ns.RandomBagRegressor(obj=obj, direct_link=False,
108                                n_estimators=50,
109                                col_sample=0.9, row_sample=0.9,
110                                dropout=0, n_clusters=0, verbose=1)
111
112    obj2.fit(X_train, y_train)
113
114    print(np.sqrt(obj2.score(X_test, y_test))) # RMSE
115
116    ```
117
118    """
119
120    # construct the object -----
121
122    def __init__(
123        self,
124        obj,
125        n_estimators=10,
126        n_hidden_features=1,
127        activation_name="relu",
128        a=0.01,
129        nodes_sim="sobol",
130        bias=True,
131        dropout=0,
132        direct_link=False,
133        n_clusters=2,
134        cluster_encode=True,
135        type_clust="kmeans",
136        type_scaling=("std", "std", "std"),
137        col_sample=1,
138        row_sample=1,
139        n_jobs=None,
140        seed=123,
141        verbose=1,
142        backend="cpu",
143    ):
144        super().__init__(
145            obj=obj,
146            n_estimators=n_estimators,
147            n_hidden_features=n_hidden_features,
148            activation_name=activation_name,
149            a=a,
150            nodes_sim=nodes_sim,
151            bias=bias,
152            dropout=dropout,
153            direct_link=direct_link,
154            n_clusters=n_clusters,
155            cluster_encode=cluster_encode,
156            type_clust=type_clust,
157            type_scaling=type_scaling,
158            col_sample=col_sample,
159            row_sample=row_sample,
160            seed=seed,
161            backend=backend,
162        )
163
164        self.type_fit = "regression"
165        self.verbose = verbose
166        self.n_jobs = n_jobs
167        self.voter_ = {}
168
169    def fit(self, X, y, **kwargs):
170        """Fit Random 'Bagging' model to training data (X, y).
171
172        Args:
173
174            X: {array-like}, shape = [n_samples, n_features]
175                Training vectors, where n_samples is the number
176                of samples and n_features is the number of features.
177
178            y: array-like, shape = [n_samples]
179                Target values.
180
181            **kwargs: additional parameters to be passed to
182                    self.cook_training_set or self.obj.fit
183
184        Returns:
185
186            self: object
187
188        """
189
190        base_learner = CustomRegressor(
191            self.obj,
192            n_hidden_features=self.n_hidden_features,
193            activation_name=self.activation_name,
194            a=self.a,
195            nodes_sim=self.nodes_sim,
196            bias=self.bias,
197            dropout=self.dropout,
198            direct_link=self.direct_link,
199            n_clusters=self.n_clusters,
200            type_clust=self.type_clust,
201            type_scaling=self.type_scaling,
202            col_sample=self.col_sample,
203            row_sample=self.row_sample,
204            seed=self.seed,
205        )
206
207        # 1 - Sequential training -----
208
209        if self.n_jobs is None:
210            self.voter_ = rbagloop_regression(
211                base_learner, X, y, self.n_estimators, self.verbose, self.seed
212            )
213
214            self.n_estimators = len(self.voter_)
215
216            return self
217
218        # 2 - Parallel training -----
219        # buggy
220        # if self.n_jobs is not None:
221        def fit_estimators(m):
222            base_learner__ = deepcopy(base_learner)
223            base_learner__.set_params(seed=self.seed + m * 1000)
224            base_learner__.fit(X, y, **kwargs)
225            return base_learner__
226
227        if self.verbose == 1:
228            voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")(
229                delayed(fit_estimators)(m)
230                for m in tqdm(range(self.n_estimators))
231            )
232        else:
233            voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")(
234                delayed(fit_estimators)(m) for m in range(self.n_estimators)
235            )
236
237        self.voter_ = {i: elt for i, elt in enumerate(voters_list)}
238
239        self.n_estimators = len(self.voter_)
240
241        return self
242
243    def predict(self, X, weights=None, **kwargs):
244        """Predict for test data X.
245
246        Args:
247
248            X: {array-like}, shape = [n_samples, n_features]
249                Training vectors, where n_samples is the number
250                of samples and n_features is the number of features.
251
252            **kwargs: additional parameters to be passed to
253                    self.cook_test_set
254
255        Returns:
256
257            estimates for test data: {array-like}
258
259        """
260
261        def calculate_preds(voter, weights=None):
262            ensemble_preds = 0
263
264            n_iter = len(voter)
265
266            assert n_iter > 0, "no estimator found in `RandomBag` ensemble"
267
268            if weights is None:
269                for idx, elt in voter.items():
270                    ensemble_preds += elt.predict(X)
271
272                return ensemble_preds / n_iter
273
274            # if weights is not None:
275            for idx, elt in voter.items():
276                ensemble_preds += weights[idx] * elt.predict(X)
277
278            return ensemble_preds
279
280        # end calculate_preds ----
281
282        if weights is None:
283            return calculate_preds(self.voter_)
284
285        # if weights is not None:
286        self.weights = weights
287
288        return calculate_preds(self.voter_, weights)

Randomized 'Bagging' Regression model

Parameters:

obj: object
    any object containing a method fit (obj.fit()) and a method predict
    (obj.predict())

n_estimators: int
    number of boosting iterations

n_hidden_features: int
    number of nodes in the hidden layer

activation_name: str
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'

a: float
    hyperparameter for 'prelu' or 'elu' activation function

nodes_sim: str
    type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
    'uniform'

bias: boolean
    indicates if the hidden layer contains a bias term (True) or not
    (False)

dropout: float
    regularization parameter; (random) percentage of nodes dropped out
    of the training

direct_link: boolean
    indicates if the original predictors are included (True) in model''s
    fitting or not (False)

n_clusters: int
    number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
        no clustering)

cluster_encode: bool
    defines how the variable containing clusters is treated (default is one-hot)
    if `False`, then labels are used, without one-hot encoding

type_clust: str
    type of clustering method: currently k-means ('kmeans') or Gaussian
    Mixture Model ('gmm')

type_scaling: a tuple of 3 strings
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax')

col_sample: float
    percentage of covariates randomly chosen for training

row_sample: float
    percentage of rows chosen for training, by stratified bootstrapping

seed: int
    reproducibility seed for nodes_sim=='uniform'

backend: str
    "cpu" or "gpu" or "tpu"

Attributes:

voter_: dict
    dictionary containing all the fitted base-learners

Examples:

import numpy as np
import nnetsauce as ns
from sklearn.datasets import fetch_california_housing
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split

X, y = fetch_california_housing(return_X_y=True, as_frame=False)

# split data into training test and test set
X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    test_size=0.2, random_state=13)

# Requires further tuning
obj = DecisionTreeRegressor(max_depth=3, random_state=123)
obj2 = ns.RandomBagRegressor(obj=obj, direct_link=False,
                            n_estimators=50,
                            col_sample=0.9, row_sample=0.9,
                            dropout=0, n_clusters=0, verbose=1)

obj2.fit(X_train, y_train)

print(np.sqrt(obj2.score(X_test, y_test))) # RMSE
def fit(self, X, y, **kwargs):
169    def fit(self, X, y, **kwargs):
170        """Fit Random 'Bagging' model to training data (X, y).
171
172        Args:
173
174            X: {array-like}, shape = [n_samples, n_features]
175                Training vectors, where n_samples is the number
176                of samples and n_features is the number of features.
177
178            y: array-like, shape = [n_samples]
179                Target values.
180
181            **kwargs: additional parameters to be passed to
182                    self.cook_training_set or self.obj.fit
183
184        Returns:
185
186            self: object
187
188        """
189
190        base_learner = CustomRegressor(
191            self.obj,
192            n_hidden_features=self.n_hidden_features,
193            activation_name=self.activation_name,
194            a=self.a,
195            nodes_sim=self.nodes_sim,
196            bias=self.bias,
197            dropout=self.dropout,
198            direct_link=self.direct_link,
199            n_clusters=self.n_clusters,
200            type_clust=self.type_clust,
201            type_scaling=self.type_scaling,
202            col_sample=self.col_sample,
203            row_sample=self.row_sample,
204            seed=self.seed,
205        )
206
207        # 1 - Sequential training -----
208
209        if self.n_jobs is None:
210            self.voter_ = rbagloop_regression(
211                base_learner, X, y, self.n_estimators, self.verbose, self.seed
212            )
213
214            self.n_estimators = len(self.voter_)
215
216            return self
217
218        # 2 - Parallel training -----
219        # buggy
220        # if self.n_jobs is not None:
221        def fit_estimators(m):
222            base_learner__ = deepcopy(base_learner)
223            base_learner__.set_params(seed=self.seed + m * 1000)
224            base_learner__.fit(X, y, **kwargs)
225            return base_learner__
226
227        if self.verbose == 1:
228            voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")(
229                delayed(fit_estimators)(m)
230                for m in tqdm(range(self.n_estimators))
231            )
232        else:
233            voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")(
234                delayed(fit_estimators)(m) for m in range(self.n_estimators)
235            )
236
237        self.voter_ = {i: elt for i, elt in enumerate(voters_list)}
238
239        self.n_estimators = len(self.voter_)
240
241        return self

Fit Random 'Bagging' model to training data (X, y).

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

**kwargs: additional parameters to be passed to
        self.cook_training_set or self.obj.fit

Returns:

self: object
def predict(self, X, weights=None, **kwargs):
243    def predict(self, X, weights=None, **kwargs):
244        """Predict for test data X.
245
246        Args:
247
248            X: {array-like}, shape = [n_samples, n_features]
249                Training vectors, where n_samples is the number
250                of samples and n_features is the number of features.
251
252            **kwargs: additional parameters to be passed to
253                    self.cook_test_set
254
255        Returns:
256
257            estimates for test data: {array-like}
258
259        """
260
261        def calculate_preds(voter, weights=None):
262            ensemble_preds = 0
263
264            n_iter = len(voter)
265
266            assert n_iter > 0, "no estimator found in `RandomBag` ensemble"
267
268            if weights is None:
269                for idx, elt in voter.items():
270                    ensemble_preds += elt.predict(X)
271
272                return ensemble_preds / n_iter
273
274            # if weights is not None:
275            for idx, elt in voter.items():
276                ensemble_preds += weights[idx] * elt.predict(X)
277
278            return ensemble_preds
279
280        # end calculate_preds ----
281
282        if weights is None:
283            return calculate_preds(self.voter_)
284
285        # if weights is not None:
286        self.weights = weights
287
288        return calculate_preds(self.voter_, weights)

Predict for test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

estimates for test data: {array-like}
class RandomBagClassifier(nnetsauce.randombag.bag.RandomBag, sklearn.base.ClassifierMixin):
 18class RandomBagClassifier(RandomBag, ClassifierMixin):
 19    """Randomized 'Bagging' Classification model
 20
 21    Parameters:
 22
 23        obj: object
 24            any object containing a method fit (obj.fit()) and a method predict
 25            (obj.predict())
 26
 27        n_estimators: int
 28            number of boosting iterations
 29
 30        n_hidden_features: int
 31            number of nodes in the hidden layer
 32
 33        activation_name: str
 34            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
 35
 36        a: float
 37            hyperparameter for 'prelu' or 'elu' activation function
 38
 39        nodes_sim: str
 40            type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
 41            'uniform'
 42
 43        bias: boolean
 44            indicates if the hidden layer contains a bias term (True) or not
 45            (False)
 46
 47        dropout: float
 48            regularization parameter; (random) percentage of nodes dropped out
 49            of the training
 50
 51        direct_link: boolean
 52            indicates if the original predictors are included (True) in model's
 53            fitting or not (False)
 54
 55        n_clusters: int
 56            number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
 57                no clustering)
 58
 59        cluster_encode: bool
 60            defines how the variable containing clusters is treated (default is one-hot)
 61            if `False`, then labels are used, without one-hot encoding
 62
 63        type_clust: str
 64            type of clustering method: currently k-means ('kmeans') or Gaussian
 65            Mixture Model ('gmm')
 66
 67        type_scaling: a tuple of 3 strings
 68            scaling methods for inputs, hidden layer, and clustering respectively
 69            (and when relevant).
 70            Currently available: standardization ('std') or MinMax scaling ('minmax')
 71
 72        col_sample: float
 73            percentage of covariates randomly chosen for training
 74
 75        row_sample: float
 76            percentage of rows chosen for training, by stratified bootstrapping
 77
 78        seed: int
 79            reproducibility seed for nodes_sim=='uniform'
 80
 81        backend: str
 82            "cpu" or "gpu" or "tpu"
 83
 84    Attributes:
 85
 86        voter_: dict
 87            dictionary containing all the fitted base-learners
 88
 89
 90    Examples:
 91
 92    See also [https://github.com/Techtonique/nnetsauce/blob/master/examples/randombag_classification.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/randombag_classification.py)
 93
 94    ```python
 95    import nnetsauce as ns
 96    from sklearn.datasets import load_breast_cancer
 97    from sklearn.tree import DecisionTreeClassifier
 98    from sklearn.model_selection import train_test_split
 99    from sklearn import metrics
100    from time import time
101
102
103    breast_cancer = load_breast_cancer()
104    Z = breast_cancer.data
105    t = breast_cancer.target
106    np.random.seed(123)
107    X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2)
108
109    # decision tree
110    clf = DecisionTreeClassifier(max_depth=2, random_state=123)
111    fit_obj = ns.RandomBagClassifier(clf, n_hidden_features=2,
112                                    direct_link=True,
113                                    n_estimators=100,
114                                    col_sample=0.9, row_sample=0.9,
115                                    dropout=0.3, n_clusters=0, verbose=1)
116
117    start = time()
118    fit_obj.fit(X_train, y_train)
119    print(f"Elapsed {time() - start}")
120
121    print(fit_obj.score(X_test, y_test))
122    print(fit_obj.score(X_test, y_test, scoring="roc_auc"))
123
124    start = time()
125    preds = fit_obj.predict(X_test)
126    print(f"Elapsed {time() - start}")
127    print(metrics.classification_report(preds, y_test))
128    ```
129
130    """
131
132    # construct the object -----
133    _estimator_type = "classifier"
134
135    def __init__(
136        self,
137        obj,
138        n_estimators=10,
139        n_hidden_features=1,
140        activation_name="relu",
141        a=0.01,
142        nodes_sim="sobol",
143        bias=True,
144        dropout=0,
145        direct_link=False,
146        n_clusters=2,
147        cluster_encode=True,
148        type_clust="kmeans",
149        type_scaling=("std", "std", "std"),
150        col_sample=1,
151        row_sample=1,
152        n_jobs=None,
153        seed=123,
154        verbose=1,
155        backend="cpu",
156    ):
157        super().__init__(
158            obj=obj,
159            n_estimators=n_estimators,
160            n_hidden_features=n_hidden_features,
161            activation_name=activation_name,
162            a=a,
163            nodes_sim=nodes_sim,
164            bias=bias,
165            dropout=dropout,
166            direct_link=direct_link,
167            n_clusters=n_clusters,
168            cluster_encode=cluster_encode,
169            type_clust=type_clust,
170            type_scaling=type_scaling,
171            col_sample=col_sample,
172            row_sample=row_sample,
173            seed=seed,
174            backend=backend,
175        )
176
177        self.type_fit = "classification"
178        self.verbose = verbose
179        self.n_jobs = n_jobs
180        self.voter_ = {}
181
182    def fit(self, X, y, **kwargs):
183        """Fit Random 'Bagging' model to training data (X, y).
184
185        Args:
186
187            X: {array-like}, shape = [n_samples, n_features]
188                Training vectors, where n_samples is the number
189                of samples and n_features is the number of features.
190
191            y: array-like, shape = [n_samples]
192                Target values.
193
194            **kwargs: additional parameters to be passed to
195                    self.cook_training_set or self.obj.fit
196
197        Returns:
198
199            self: object
200
201        """
202
203        assert mx.is_factor(y), "y must contain only integers"
204
205        self.n_classes_ = len(np.unique(y))  # for compatibility with sklearn
206
207        # training
208        self.n_classes = len(np.unique(y))
209
210        base_learner = CustomClassifier(
211            self.obj,
212            n_hidden_features=self.n_hidden_features,
213            activation_name=self.activation_name,
214            a=self.a,
215            nodes_sim=self.nodes_sim,
216            bias=self.bias,
217            dropout=self.dropout,
218            direct_link=self.direct_link,
219            n_clusters=self.n_clusters,
220            type_clust=self.type_clust,
221            type_scaling=self.type_scaling,
222            col_sample=self.col_sample,
223            row_sample=self.row_sample,
224            seed=self.seed,
225            cv_calibration=None,
226        )
227
228        # 1 - Sequential training -----
229
230        if self.n_jobs is None:
231            self.voter_ = rbagloop_classification(
232                base_learner, X, y, self.n_estimators, self.verbose, self.seed
233            )
234
235            self.n_estimators = len(self.voter_)
236
237            return self
238
239        # 2 - Parallel training -----
240        # buggy
241        # if self.n_jobs is not None:
242        def fit_estimators(m):
243            base_learner__ = deepcopy(base_learner)
244            base_learner__.set_params(seed=self.seed + m * 1000)
245            base_learner__.fit(X, y, **kwargs)
246            return base_learner__
247
248        if self.verbose == 1:
249            voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")(
250                delayed(fit_estimators)(m)
251                for m in tqdm(range(self.n_estimators))
252            )
253        else:
254            voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")(
255                delayed(fit_estimators)(m) for m in range(self.n_estimators)
256            )
257
258        self.voter_ = {idx: elt for idx, elt in enumerate(voters_list)}
259
260        self.n_estimators = len(self.voter_)
261        self.classes_ = np.unique(y)
262        return self
263
264    def predict(self, X, weights=None, **kwargs):
265        """Predict test data X.
266
267        Args:
268
269            X: {array-like}, shape = [n_samples, n_features]
270                Training vectors, where n_samples is the number
271                of samples and n_features is the number of features.
272
273            **kwargs: additional parameters to be passed to
274                    self.cook_test_set
275
276        Returns:
277
278            model predictions: {array-like}
279
280        """
281        return self.predict_proba(X, weights, **kwargs).argmax(axis=1)
282
283    def predict_proba(self, X, weights=None, **kwargs):
284        """Predict probabilities for test data X.
285
286        Args:
287
288            X: {array-like}, shape = [n_samples, n_features]
289                Training vectors, where n_samples is the number
290                of samples and n_features is the number of features.
291
292            **kwargs: additional parameters to be passed to
293                    self.cook_test_set
294
295        Returns:
296
297            probability estimates for test data: {array-like}
298
299        """
300
301        def calculate_probas(voter, weights=None, verbose=None):
302            ensemble_proba = 0
303
304            n_iter = len(voter)
305
306            assert n_iter > 0, "no estimator found in `RandomBag` ensemble"
307
308            if weights is None:
309                for idx, elt in voter.items():
310                    try:
311                        ensemble_proba += elt.predict_proba(X)
312
313                        # if verbose == 1:
314                        #    pbar.update(idx)
315
316                    except:
317                        continue
318
319                # if verbose == 1:
320                #    pbar.update(n_iter)
321
322                return ensemble_proba / n_iter
323
324            # if weights is not None:
325            for idx, elt in voter.items():
326                ensemble_proba += weights[idx] * elt.predict_proba(X)
327
328                # if verbose == 1:
329                #    pbar.update(idx)
330
331            # if verbose == 1:
332            #    pbar.update(n_iter)
333
334            return ensemble_proba
335
336        # end calculate_probas ----
337
338        if self.n_jobs is None:
339            # if self.verbose == 1:
340            #    pbar = Progbar(self.n_estimators)
341
342            if weights is None:
343                return calculate_probas(self.voter_, verbose=self.verbose)
344
345            # if weights is not None:
346            self.weights = weights
347
348            return calculate_probas(self.voter_, weights, verbose=self.verbose)
349
350        # if self.n_jobs is not None:
351        def predict_estimator(m):
352            try:
353                return self.voter_[m].predict_proba(X)
354            except:
355                pass
356
357        if self.verbose == 1:
358            preds = Parallel(n_jobs=self.n_jobs, prefer="threads")(
359                delayed(predict_estimator)(m)
360                for m in tqdm(range(self.n_estimators))
361            )
362
363        else:
364            preds = Parallel(n_jobs=self.n_jobs, prefer="threads")(
365                delayed(predict_estimator)(m) for m in range(self.n_estimators)
366            )
367
368        ensemble_proba = 0
369
370        if weights is None:
371            for i in range(self.n_estimators):
372                ensemble_proba += preds[i]
373
374            return ensemble_proba / self.n_estimators
375
376        for i in range(self.n_estimators):
377            ensemble_proba += weights[i] * preds[i]
378
379        return ensemble_proba
380
381    @property
382    def _estimator_type(self):
383        return "classifier"

Randomized 'Bagging' Classification model

Parameters:

obj: object
    any object containing a method fit (obj.fit()) and a method predict
    (obj.predict())

n_estimators: int
    number of boosting iterations

n_hidden_features: int
    number of nodes in the hidden layer

activation_name: str
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'

a: float
    hyperparameter for 'prelu' or 'elu' activation function

nodes_sim: str
    type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
    'uniform'

bias: boolean
    indicates if the hidden layer contains a bias term (True) or not
    (False)

dropout: float
    regularization parameter; (random) percentage of nodes dropped out
    of the training

direct_link: boolean
    indicates if the original predictors are included (True) in model's
    fitting or not (False)

n_clusters: int
    number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
        no clustering)

cluster_encode: bool
    defines how the variable containing clusters is treated (default is one-hot)
    if `False`, then labels are used, without one-hot encoding

type_clust: str
    type of clustering method: currently k-means ('kmeans') or Gaussian
    Mixture Model ('gmm')

type_scaling: a tuple of 3 strings
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax')

col_sample: float
    percentage of covariates randomly chosen for training

row_sample: float
    percentage of rows chosen for training, by stratified bootstrapping

seed: int
    reproducibility seed for nodes_sim=='uniform'

backend: str
    "cpu" or "gpu" or "tpu"

Attributes:

voter_: dict
    dictionary containing all the fitted base-learners

Examples:

See also https://github.com/Techtonique/nnetsauce/blob/master/examples/randombag_classification.py

import nnetsauce as ns
from sklearn.datasets import load_breast_cancer
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn import metrics
from time import time


breast_cancer = load_breast_cancer()
Z = breast_cancer.data
t = breast_cancer.target
np.random.seed(123)
X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2)

# decision tree
clf = DecisionTreeClassifier(max_depth=2, random_state=123)
fit_obj = ns.RandomBagClassifier(clf, n_hidden_features=2,
                                direct_link=True,
                                n_estimators=100,
                                col_sample=0.9, row_sample=0.9,
                                dropout=0.3, n_clusters=0, verbose=1)

start = time()
fit_obj.fit(X_train, y_train)
print(f"Elapsed {time() - start}")

print(fit_obj.score(X_test, y_test))
print(fit_obj.score(X_test, y_test, scoring="roc_auc"))

start = time()
preds = fit_obj.predict(X_test)
print(f"Elapsed {time() - start}")
print(metrics.classification_report(preds, y_test))
def fit(self, X, y, **kwargs):
182    def fit(self, X, y, **kwargs):
183        """Fit Random 'Bagging' model to training data (X, y).
184
185        Args:
186
187            X: {array-like}, shape = [n_samples, n_features]
188                Training vectors, where n_samples is the number
189                of samples and n_features is the number of features.
190
191            y: array-like, shape = [n_samples]
192                Target values.
193
194            **kwargs: additional parameters to be passed to
195                    self.cook_training_set or self.obj.fit
196
197        Returns:
198
199            self: object
200
201        """
202
203        assert mx.is_factor(y), "y must contain only integers"
204
205        self.n_classes_ = len(np.unique(y))  # for compatibility with sklearn
206
207        # training
208        self.n_classes = len(np.unique(y))
209
210        base_learner = CustomClassifier(
211            self.obj,
212            n_hidden_features=self.n_hidden_features,
213            activation_name=self.activation_name,
214            a=self.a,
215            nodes_sim=self.nodes_sim,
216            bias=self.bias,
217            dropout=self.dropout,
218            direct_link=self.direct_link,
219            n_clusters=self.n_clusters,
220            type_clust=self.type_clust,
221            type_scaling=self.type_scaling,
222            col_sample=self.col_sample,
223            row_sample=self.row_sample,
224            seed=self.seed,
225            cv_calibration=None,
226        )
227
228        # 1 - Sequential training -----
229
230        if self.n_jobs is None:
231            self.voter_ = rbagloop_classification(
232                base_learner, X, y, self.n_estimators, self.verbose, self.seed
233            )
234
235            self.n_estimators = len(self.voter_)
236
237            return self
238
239        # 2 - Parallel training -----
240        # buggy
241        # if self.n_jobs is not None:
242        def fit_estimators(m):
243            base_learner__ = deepcopy(base_learner)
244            base_learner__.set_params(seed=self.seed + m * 1000)
245            base_learner__.fit(X, y, **kwargs)
246            return base_learner__
247
248        if self.verbose == 1:
249            voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")(
250                delayed(fit_estimators)(m)
251                for m in tqdm(range(self.n_estimators))
252            )
253        else:
254            voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")(
255                delayed(fit_estimators)(m) for m in range(self.n_estimators)
256            )
257
258        self.voter_ = {idx: elt for idx, elt in enumerate(voters_list)}
259
260        self.n_estimators = len(self.voter_)
261        self.classes_ = np.unique(y)
262        return self

Fit Random 'Bagging' model to training data (X, y).

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

**kwargs: additional parameters to be passed to
        self.cook_training_set or self.obj.fit

Returns:

self: object
def predict(self, X, weights=None, **kwargs):
264    def predict(self, X, weights=None, **kwargs):
265        """Predict test data X.
266
267        Args:
268
269            X: {array-like}, shape = [n_samples, n_features]
270                Training vectors, where n_samples is the number
271                of samples and n_features is the number of features.
272
273            **kwargs: additional parameters to be passed to
274                    self.cook_test_set
275
276        Returns:
277
278            model predictions: {array-like}
279
280        """
281        return self.predict_proba(X, weights, **kwargs).argmax(axis=1)

Predict test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

model predictions: {array-like}
def predict_proba(self, X, weights=None, **kwargs):
283    def predict_proba(self, X, weights=None, **kwargs):
284        """Predict probabilities for test data X.
285
286        Args:
287
288            X: {array-like}, shape = [n_samples, n_features]
289                Training vectors, where n_samples is the number
290                of samples and n_features is the number of features.
291
292            **kwargs: additional parameters to be passed to
293                    self.cook_test_set
294
295        Returns:
296
297            probability estimates for test data: {array-like}
298
299        """
300
301        def calculate_probas(voter, weights=None, verbose=None):
302            ensemble_proba = 0
303
304            n_iter = len(voter)
305
306            assert n_iter > 0, "no estimator found in `RandomBag` ensemble"
307
308            if weights is None:
309                for idx, elt in voter.items():
310                    try:
311                        ensemble_proba += elt.predict_proba(X)
312
313                        # if verbose == 1:
314                        #    pbar.update(idx)
315
316                    except:
317                        continue
318
319                # if verbose == 1:
320                #    pbar.update(n_iter)
321
322                return ensemble_proba / n_iter
323
324            # if weights is not None:
325            for idx, elt in voter.items():
326                ensemble_proba += weights[idx] * elt.predict_proba(X)
327
328                # if verbose == 1:
329                #    pbar.update(idx)
330
331            # if verbose == 1:
332            #    pbar.update(n_iter)
333
334            return ensemble_proba
335
336        # end calculate_probas ----
337
338        if self.n_jobs is None:
339            # if self.verbose == 1:
340            #    pbar = Progbar(self.n_estimators)
341
342            if weights is None:
343                return calculate_probas(self.voter_, verbose=self.verbose)
344
345            # if weights is not None:
346            self.weights = weights
347
348            return calculate_probas(self.voter_, weights, verbose=self.verbose)
349
350        # if self.n_jobs is not None:
351        def predict_estimator(m):
352            try:
353                return self.voter_[m].predict_proba(X)
354            except:
355                pass
356
357        if self.verbose == 1:
358            preds = Parallel(n_jobs=self.n_jobs, prefer="threads")(
359                delayed(predict_estimator)(m)
360                for m in tqdm(range(self.n_estimators))
361            )
362
363        else:
364            preds = Parallel(n_jobs=self.n_jobs, prefer="threads")(
365                delayed(predict_estimator)(m) for m in range(self.n_estimators)
366            )
367
368        ensemble_proba = 0
369
370        if weights is None:
371            for i in range(self.n_estimators):
372                ensemble_proba += preds[i]
373
374            return ensemble_proba / self.n_estimators
375
376        for i in range(self.n_estimators):
377            ensemble_proba += weights[i] * preds[i]
378
379        return ensemble_proba

Predict probabilities for test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

probability estimates for test data: {array-like}
class RegressorUpdater(sklearn.base.BaseEstimator, sklearn.base.RegressorMixin):
 16class RegressorUpdater(BaseEstimator, RegressorMixin):
 17    """
 18    Update a regression model with new observations
 19
 20    Parameters
 21    ----------
 22    regr: object
 23        A regression model with a coef_ attribute
 24    alpha: float
 25        Updating factor's exponent
 26
 27    Attributes
 28    ----------
 29    n_obs_: int
 30        Number of observations
 31    coef_: np.ndarray
 32        Coefficients of the model
 33    updating_factor_: float
 34        Updating factor
 35
 36    """
 37
 38    def __init__(self, regr, alpha=0.5):
 39        self.regr = regr
 40        self.alpha = alpha
 41        self.n_obs_ = None
 42        self.coef_ = None
 43        self.updating_factor_ = None
 44        try:
 45            self.coef_ = self.regr.coef_
 46            if isinstance(self.regr, Base):
 47                self.n_obs_ = self.regr.scaler_.n_samples_seen_
 48        except AttributeError:
 49            pass
 50
 51    def fit(self, X, y, **kwargs):
 52
 53        if isinstance(
 54            self.regr, CustomRegressor
 55        ):  # nnetsauce model not deep ---
 56            if check_is_fitted(self.regr) == False:
 57                self.regr.fit(X, y, **kwargs)
 58                self.n_obs_ = X.shape[0]
 59                if hasattr(self.regr, "coef_"):
 60                    self.coef_ = self.regr.coef_
 61                return self
 62            self.n_obs_ = self.regr.scaler_.n_samples_seen_
 63            if hasattr(self.regr, "coef_"):
 64                self.coef_ = self.regr.coef_
 65            return self
 66
 67        if (
 68            hasattr(self.regr, "coef_") == False
 69        ):  # sklearn model or CustomRegressor model ---
 70            self.regr.fit(X, y)
 71            self.n_obs_ = X.shape[0]
 72            self.regr.fit(X, y)
 73            if hasattr(self.regr, "stacked_obj"):
 74                self.coef_ = self.regr.stacked_obj.coef_
 75            else:
 76                self.coef_ = self.regr.coef_
 77            return self
 78        self.n_obs_ = X.shape[0]
 79        if hasattr(self.regr, "coef_"):
 80            self.coef_ = self.regr.coef_
 81        return self
 82
 83    def predict(self, X):
 84        # assert hasattr(self.regr, "coef_"), "model must have coef_ attribute"
 85        return self.regr.predict(X)
 86
 87    def partial_fit(self, X, y):
 88
 89        assert hasattr(
 90            self.regr, "coef_"
 91        ), "model must be fitted first (i.e have 'coef_' attribute)"
 92        assert (
 93            self.n_obs_ is not None
 94        ), "model must be fitted first (i.e have 'n_obs_' attribute)"
 95
 96        if len(X.shape) == 1:
 97            X = X.reshape(1, -1)
 98
 99        assert X.shape[0] == 1, "X must have one row"
100
101        self.updating_factor_ = self.n_obs_ ** (-self.alpha)
102
103        if isinstance(self.regr, Base):  # nnetsauce model ---
104
105            newX = deepcopy(X)
106
107            if isinstance(
108                self.regr, CustomRegressor
109            ):  # other nnetsauce model (CustomRegressor) ---
110                newX = self.regr.cook_test_set(X=X)
111                if isinstance(X, pd.DataFrame):
112                    newx = newX.values.ravel()
113                else:
114                    newx = newX.ravel()
115
116        else:  # an sklearn model ---
117
118            if isinstance(X, pd.DataFrame):
119                newx = X.values.ravel()
120            else:
121                newx = X.ravel()
122
123        new_coef = self.regr.coef_ + self.updating_factor_ * np.dot(
124            newx, y - np.dot(newx, self.regr.coef_)
125        )
126        self.regr.coef_ = _update_mean(self.regr.coef_, self.n_obs_, new_coef)
127        self.coef_ = deepcopy(self.regr.coef_)
128        self.n_obs_ += 1
129        return self

Update a regression model with new observations

Parameters

regr: object A regression model with a coef_ attribute alpha: float Updating factor's exponent

Attributes

n_obs_: int Number of observations coef_: np.ndarray Coefficients of the model updating_factor_: float Updating factor

def fit(self, X, y, **kwargs):
51    def fit(self, X, y, **kwargs):
52
53        if isinstance(
54            self.regr, CustomRegressor
55        ):  # nnetsauce model not deep ---
56            if check_is_fitted(self.regr) == False:
57                self.regr.fit(X, y, **kwargs)
58                self.n_obs_ = X.shape[0]
59                if hasattr(self.regr, "coef_"):
60                    self.coef_ = self.regr.coef_
61                return self
62            self.n_obs_ = self.regr.scaler_.n_samples_seen_
63            if hasattr(self.regr, "coef_"):
64                self.coef_ = self.regr.coef_
65            return self
66
67        if (
68            hasattr(self.regr, "coef_") == False
69        ):  # sklearn model or CustomRegressor model ---
70            self.regr.fit(X, y)
71            self.n_obs_ = X.shape[0]
72            self.regr.fit(X, y)
73            if hasattr(self.regr, "stacked_obj"):
74                self.coef_ = self.regr.stacked_obj.coef_
75            else:
76                self.coef_ = self.regr.coef_
77            return self
78        self.n_obs_ = X.shape[0]
79        if hasattr(self.regr, "coef_"):
80            self.coef_ = self.regr.coef_
81        return self
def predict(self, X):
83    def predict(self, X):
84        # assert hasattr(self.regr, "coef_"), "model must have coef_ attribute"
85        return self.regr.predict(X)
class ClassifierUpdater(sklearn.base.BaseEstimator, sklearn.base.ClassifierMixin):
 16class ClassifierUpdater(BaseEstimator, ClassifierMixin):
 17    """
 18    Update a regression model with new observations
 19
 20    Parameters
 21    ----------
 22    clf: object
 23        A regression model with a coef_ attribute
 24    alpha: float
 25        Updating factor's exponent
 26
 27    Attributes
 28    ----------
 29    n_obs_: int
 30        Number of observations
 31    coef_: np.ndarray
 32        Coefficients of the model
 33    updating_factor_: float
 34        Updating factor
 35
 36    """
 37
 38    _estimator_type = "classifier"
 39
 40    def __init__(self, clf, alpha=0.5):
 41        self.clf = clf
 42        self.alpha = alpha
 43        self.n_obs_ = None
 44        self.coef_ = None
 45        self.updating_factor_ = None
 46        try:
 47            self.coef_ = self.clf.coef_
 48            if isinstance(self.clf, Base):
 49                self.n_obs_ = self.clf.scaler_.n_samples_seen_
 50        except AttributeError:
 51            pass
 52
 53    def fit(self, X, y, **kwargs):
 54
 55        raise NotImplementedError(
 56            "fit method is not implemented for ClassifierUpdater"
 57        )
 58
 59        if isinstance(
 60            self.clf, CustomClassifier
 61        ):  # nnetsauce model not deep ---
 62            if check_is_fitted(self.clf) == False:
 63                self.clf.fit(X, y, **kwargs)
 64                self.n_obs_ = X.shape[0]
 65                if hasattr(self.clf, "coef_"):
 66                    self.coef_ = self.clf.coef_
 67                return self
 68            self.n_obs_ = self.clf.scaler_.n_samples_seen_
 69            if hasattr(self.clf, "coef_"):
 70                self.coef_ = self.clf.coef_
 71            return self
 72
 73        if (
 74            hasattr(self.clf, "coef_") == False
 75        ):  # sklearn model or CustomClassifier model ---
 76            self.clf.fit(X, y)
 77            self.n_obs_ = X.shape[0]
 78            self.clf.fit(X, y)
 79            if hasattr(self.clf, "stacked_obj"):
 80                self.coef_ = self.clf.stacked_obj.coef_
 81            else:
 82                self.coef_ = self.clf.coef_
 83            return self
 84        self.n_obs_ = X.shape[0]
 85        if hasattr(self.clf, "coef_"):
 86            self.coef_ = self.clf.coef_
 87        return self
 88
 89    def predict(self, X):
 90
 91        raise NotImplementedError(
 92            "predict method is not implemented for ClassifierUpdater"
 93        )
 94        # assert hasattr(self.clf, "coef_"), "model must have coef_ attribute"
 95        return self.clf.predict(X)
 96
 97    def partial_fit(self, X, y):
 98
 99        raise NotImplementedError(
100            "partial_fit method is not implemented for ClassifierUpdater"
101        )
102
103        assert hasattr(
104            self.clf, "coef_"
105        ), "model must be fitted first (i.e have 'coef_' attribute)"
106        assert (
107            self.n_obs_ is not None
108        ), "model must be fitted first (i.e have 'n_obs_' attribute)"
109
110        if len(X.shape) == 1:
111            X = X.reshape(1, -1)
112
113        assert X.shape[0] == 1, "X must have one row"
114
115        self.updating_factor_ = self.n_obs_ ** (-self.alpha)
116
117        if isinstance(self.clf, Base):  # nnetsauce model ---
118
119            newX = deepcopy(X)
120
121            if isinstance(
122                self.clf, CustomClassifier
123            ):  # other nnetsauce model (CustomClassifier) ---
124                newX = self.clf.cook_test_set(X=X)
125                if isinstance(X, pd.DataFrame):
126                    newx = newX.values.ravel()
127                else:
128                    newx = newX.ravel()
129
130        else:  # an sklearn model ---
131
132            if isinstance(X, pd.DataFrame):
133                newx = X.values.ravel()
134            else:
135                newx = X.ravel()
136
137        new_coef = self.clf.coef_ + self.updating_factor_ * np.dot(
138            newx, y - np.dot(newx, self.clf.coef_)
139        )
140        self.clf.coef_ = _update_mean(self.clf.coef_, self.n_obs_, new_coef)
141        self.coef_ = deepcopy(self.clf.coef_)
142        self.n_obs_ += 1
143        return self

Update a regression model with new observations

Parameters

clf: object A regression model with a coef_ attribute alpha: float Updating factor's exponent

Attributes

n_obs_: int Number of observations coef_: np.ndarray Coefficients of the model updating_factor_: float Updating factor

def fit(self, X, y, **kwargs):
53    def fit(self, X, y, **kwargs):
54
55        raise NotImplementedError(
56            "fit method is not implemented for ClassifierUpdater"
57        )
58
59        if isinstance(
60            self.clf, CustomClassifier
61        ):  # nnetsauce model not deep ---
62            if check_is_fitted(self.clf) == False:
63                self.clf.fit(X, y, **kwargs)
64                self.n_obs_ = X.shape[0]
65                if hasattr(self.clf, "coef_"):
66                    self.coef_ = self.clf.coef_
67                return self
68            self.n_obs_ = self.clf.scaler_.n_samples_seen_
69            if hasattr(self.clf, "coef_"):
70                self.coef_ = self.clf.coef_
71            return self
72
73        if (
74            hasattr(self.clf, "coef_") == False
75        ):  # sklearn model or CustomClassifier model ---
76            self.clf.fit(X, y)
77            self.n_obs_ = X.shape[0]
78            self.clf.fit(X, y)
79            if hasattr(self.clf, "stacked_obj"):
80                self.coef_ = self.clf.stacked_obj.coef_
81            else:
82                self.coef_ = self.clf.coef_
83            return self
84        self.n_obs_ = X.shape[0]
85        if hasattr(self.clf, "coef_"):
86            self.coef_ = self.clf.coef_
87        return self
def predict(self, X):
89    def predict(self, X):
90
91        raise NotImplementedError(
92            "predict method is not implemented for ClassifierUpdater"
93        )
94        # assert hasattr(self.clf, "coef_"), "model must have coef_ attribute"
95        return self.clf.predict(X)
class RidgeRegressor(sklearn.base.BaseEstimator, sklearn.base.RegressorMixin):
 24class RidgeRegressor(BaseEstimator, RegressorMixin):
 25    """Ridge.
 26
 27    Attributes:
 28
 29        reg_lambda: float
 30            regularization parameter.
 31
 32        backend: str
 33            type of backend; must be in ('cpu', 'gpu', 'tpu')
 34
 35    """
 36
 37    def __init__(self, reg_lambda=0.1, backend="cpu"):
 38        assert backend in (
 39            "cpu",
 40            "gpu",
 41            "tpu",
 42        ), "`backend` must be in ('cpu', 'gpu', 'tpu')"
 43
 44        sys_platform = platform.system()
 45
 46        if (sys_platform == "Windows") and (backend in ("gpu", "tpu")):
 47            warnings.warn(
 48                "No GPU/TPU computing on Windows yet, backend set to 'cpu'"
 49            )
 50            backend = "cpu"
 51
 52        self.reg_lambda = reg_lambda
 53        self.backend = backend
 54        self.coef_ = None
 55
 56    def fit(self, X, y, **kwargs):
 57        """Fit matrixops (classifier) to training data (X, y)
 58
 59        Args:
 60
 61            X: {array-like}, shape = [n_samples, n_features]
 62                Training vectors, where n_samples is the number
 63                of samples and n_features is the number of features.
 64
 65            y: array-like, shape = [n_samples]
 66                Target values.
 67
 68            **kwargs: additional parameters to be passed to self.cook_training_set.
 69
 70        Returns:
 71
 72            self: object.
 73
 74        """
 75        self.ym, centered_y = mo.center_response(y)
 76        self.xm = X.mean(axis=0)
 77        self.xsd = X.std(axis=0)
 78        self.xsd[self.xsd == 0] = 1  # avoid division by zero
 79        X_ = (X - self.xm[None, :]) / self.xsd[None, :]
 80
 81        if self.backend == "cpu":
 82            if len(centered_y.shape) <= 1:
 83                eye_term = np.sqrt(self.reg_lambda) * np.eye(X.shape[1])
 84                X_ = np.row_stack((X_, eye_term))
 85                y_ = np.concatenate((centered_y, np.zeros(X.shape[1])))
 86                beta_info = get_beta(X_, y_)
 87                self.coef_ = beta_info[0]
 88            else:
 89                try:
 90                    eye_term = np.sqrt(self.reg_lambda) * np.eye(X.shape[1])
 91                    X_ = np.row_stack((X_, eye_term))
 92                    y_ = np.row_stack(
 93                        (
 94                            centered_y,
 95                            np.zeros((eye_term.shape[0], centered_y.shape[1])),
 96                        )
 97                    )
 98                    beta_info = get_beta(X_, y_)
 99                    self.coef_ = beta_info[0]
100                except Exception:
101                    x = inv(
102                        mo.crossprod(X_) + self.reg_lambda * np.eye(X_.shape[1])
103                    )
104                    hat_matrix = mo.tcrossprod(x, X_)
105                    self.coef_ = mo.safe_sparse_dot(hat_matrix, centered_y)
106            return self
107
108        x = jinv(
109            mo.crossprod(X_, backend=self.backend)
110            + self.reg_lambda * jnp.eye(X_.shape[1])
111        )
112
113        hat_matrix = mo.tcrossprod(x, X_, backend=self.backend)
114        self.coef_ = mo.safe_sparse_dot(
115            hat_matrix, centered_y, backend=self.backend
116        )
117        return self
118
119    def predict(self, X, **kwargs):
120        """Predict test data X.
121
122        Args:
123
124            X: {array-like}, shape = [n_samples, n_features]
125                Training vectors, where n_samples is the number
126                of samples and n_features is the number of features.
127
128            **kwargs: additional parameters to be passed to `predict_proba`
129
130        Returns:
131
132            model predictions: {array-like}
133
134        """
135        X_ = (X - self.xm[None, :]) / self.xsd[None, :]
136
137        if self.backend == "cpu":
138            if isinstance(self.ym, float):
139                return self.ym + mo.safe_sparse_dot(X_, self.coef_)
140            return self.ym[None, :] + mo.safe_sparse_dot(X_, self.coef_)
141
142        # if self.backend in ("gpu", "tpu"):
143        if isinstance(self.ym, float):
144            return self.ym + mo.safe_sparse_dot(
145                X_, self.coef_, backend=self.backend
146            )
147        return self.ym[None, :] + mo.safe_sparse_dot(
148            X_, self.coef_, backend=self.backend
149        )

Ridge.

Attributes:

reg_lambda: float
    regularization parameter.

backend: str
    type of backend; must be in ('cpu', 'gpu', 'tpu')
def fit(self, X, y, **kwargs):
 56    def fit(self, X, y, **kwargs):
 57        """Fit matrixops (classifier) to training data (X, y)
 58
 59        Args:
 60
 61            X: {array-like}, shape = [n_samples, n_features]
 62                Training vectors, where n_samples is the number
 63                of samples and n_features is the number of features.
 64
 65            y: array-like, shape = [n_samples]
 66                Target values.
 67
 68            **kwargs: additional parameters to be passed to self.cook_training_set.
 69
 70        Returns:
 71
 72            self: object.
 73
 74        """
 75        self.ym, centered_y = mo.center_response(y)
 76        self.xm = X.mean(axis=0)
 77        self.xsd = X.std(axis=0)
 78        self.xsd[self.xsd == 0] = 1  # avoid division by zero
 79        X_ = (X - self.xm[None, :]) / self.xsd[None, :]
 80
 81        if self.backend == "cpu":
 82            if len(centered_y.shape) <= 1:
 83                eye_term = np.sqrt(self.reg_lambda) * np.eye(X.shape[1])
 84                X_ = np.row_stack((X_, eye_term))
 85                y_ = np.concatenate((centered_y, np.zeros(X.shape[1])))
 86                beta_info = get_beta(X_, y_)
 87                self.coef_ = beta_info[0]
 88            else:
 89                try:
 90                    eye_term = np.sqrt(self.reg_lambda) * np.eye(X.shape[1])
 91                    X_ = np.row_stack((X_, eye_term))
 92                    y_ = np.row_stack(
 93                        (
 94                            centered_y,
 95                            np.zeros((eye_term.shape[0], centered_y.shape[1])),
 96                        )
 97                    )
 98                    beta_info = get_beta(X_, y_)
 99                    self.coef_ = beta_info[0]
100                except Exception:
101                    x = inv(
102                        mo.crossprod(X_) + self.reg_lambda * np.eye(X_.shape[1])
103                    )
104                    hat_matrix = mo.tcrossprod(x, X_)
105                    self.coef_ = mo.safe_sparse_dot(hat_matrix, centered_y)
106            return self
107
108        x = jinv(
109            mo.crossprod(X_, backend=self.backend)
110            + self.reg_lambda * jnp.eye(X_.shape[1])
111        )
112
113        hat_matrix = mo.tcrossprod(x, X_, backend=self.backend)
114        self.coef_ = mo.safe_sparse_dot(
115            hat_matrix, centered_y, backend=self.backend
116        )
117        return self

Fit matrixops (classifier) to training data (X, y)

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

**kwargs: additional parameters to be passed to self.cook_training_set.

Returns:

self: object.
def predict(self, X, **kwargs):
119    def predict(self, X, **kwargs):
120        """Predict test data X.
121
122        Args:
123
124            X: {array-like}, shape = [n_samples, n_features]
125                Training vectors, where n_samples is the number
126                of samples and n_features is the number of features.
127
128            **kwargs: additional parameters to be passed to `predict_proba`
129
130        Returns:
131
132            model predictions: {array-like}
133
134        """
135        X_ = (X - self.xm[None, :]) / self.xsd[None, :]
136
137        if self.backend == "cpu":
138            if isinstance(self.ym, float):
139                return self.ym + mo.safe_sparse_dot(X_, self.coef_)
140            return self.ym[None, :] + mo.safe_sparse_dot(X_, self.coef_)
141
142        # if self.backend in ("gpu", "tpu"):
143        if isinstance(self.ym, float):
144            return self.ym + mo.safe_sparse_dot(
145                X_, self.coef_, backend=self.backend
146            )
147        return self.ym[None, :] + mo.safe_sparse_dot(
148            X_, self.coef_, backend=self.backend
149        )

Predict test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to `predict_proba`

Returns:

model predictions: {array-like}
class Ridge2Regressor(nnetsauce.ridge2.ridge2.Ridge2, sklearn.base.RegressorMixin):
 23class Ridge2Regressor(Ridge2, RegressorMixin):
 24    """Ridge regression with 2 regularization parameters derived from class Ridge
 25
 26    Parameters:
 27
 28        n_hidden_features: int
 29            number of nodes in the hidden layer
 30
 31        activation_name: str
 32            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
 33
 34        a: float
 35            hyperparameter for 'prelu' or 'elu' activation function
 36
 37        nodes_sim: str
 38            type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
 39            'uniform'
 40
 41        bias: boolean
 42            indicates if the hidden layer contains a bias term (True) or not
 43            (False)
 44
 45        dropout: float
 46            regularization parameter; (random) percentage of nodes dropped out
 47            of the training
 48
 49        n_clusters: int
 50            number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
 51                no clustering)
 52
 53        cluster_encode: bool
 54            defines how the variable containing clusters is treated (default is one-hot)
 55            if `False`, then labels are used, without one-hot encoding
 56
 57        type_clust: str
 58            type of clustering method: currently k-means ('kmeans') or Gaussian
 59            Mixture Model ('gmm')
 60
 61        type_scaling: a tuple of 3 strings
 62            scaling methods for inputs, hidden layer, and clustering respectively
 63            (and when relevant).
 64            Currently available: standardization ('std') or MinMax scaling ('minmax')
 65
 66        lambda1: float
 67            regularization parameter on direct link
 68
 69        lambda2: float
 70            regularization parameter on hidden layer
 71
 72        seed: int
 73            reproducibility seed for nodes_sim=='uniform'
 74
 75        backend: str
 76            'cpu' or 'gpu' or 'tpu'
 77
 78    Attributes:
 79
 80        beta_: {array-like}
 81            regression coefficients
 82
 83        y_mean_: float
 84            average response
 85
 86    """
 87
 88    # construct the object -----
 89
 90    def __init__(
 91        self,
 92        n_hidden_features=5,
 93        activation_name="relu",
 94        a=0.01,
 95        nodes_sim="sobol",
 96        bias=True,
 97        dropout=0,
 98        n_clusters=2,
 99        cluster_encode=True,
100        type_clust="kmeans",
101        type_scaling=("std", "std", "std"),
102        lambda1=0.1,
103        lambda2=0.1,
104        seed=123,
105        backend="cpu",
106    ):
107        super().__init__(
108            n_hidden_features=n_hidden_features,
109            activation_name=activation_name,
110            a=a,
111            nodes_sim=nodes_sim,
112            bias=bias,
113            dropout=dropout,
114            n_clusters=n_clusters,
115            cluster_encode=cluster_encode,
116            type_clust=type_clust,
117            type_scaling=type_scaling,
118            lambda1=lambda1,
119            lambda2=lambda2,
120            seed=seed,
121            backend=backend,
122        )
123
124        self.type_fit = "regression"
125
126    def fit(self, X, y, **kwargs):
127        """Fit Ridge model to training data (X, y).
128
129        Args:
130
131            X: {array-like}, shape = [n_samples, n_features]
132                Training vectors, where n_samples is the number
133                of samples and n_features is the number of features.
134
135            y: array-like, shape = [n_samples]
136                Target values.
137
138            **kwargs: additional parameters to be passed to
139                    self.cook_training_set or self.obj.fit
140
141        Returns:
142
143            self: object
144
145        """
146
147        sys_platform = platform.system()
148
149        centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
150
151        n_X, p_X = X.shape
152        n_Z, p_Z = scaled_Z.shape
153
154        if self.n_clusters > 0:
155            if self.encode_clusters == True:
156                n_features = p_X + self.n_clusters
157            else:
158                n_features = p_X + 1
159        else:
160            n_features = p_X
161
162        X_ = scaled_Z[:, 0:n_features]
163        Phi_X_ = scaled_Z[:, n_features:p_Z]
164
165        B = mo.crossprod(x=X_, backend=self.backend) + self.lambda1 * np.diag(
166            np.repeat(1, n_features)
167        )
168        C = mo.crossprod(x=Phi_X_, y=X_, backend=self.backend)
169        D = mo.crossprod(
170            x=Phi_X_, backend=self.backend
171        ) + self.lambda2 * np.diag(np.repeat(1, Phi_X_.shape[1]))
172
173        if sys_platform in ("Linux", "Darwin"):
174            B_inv = pinv(B) if self.backend == "cpu" else jpinv(B)
175        else:
176            B_inv = pinv(B)
177
178        W = mo.safe_sparse_dot(a=C, b=B_inv, backend=self.backend)
179        S_mat = D - mo.tcrossprod(x=W, y=C, backend=self.backend)
180
181        if sys_platform in ("Linux", "Darwin"):
182            S_inv = pinv(S_mat) if self.backend == "cpu" else jpinv(S_mat)
183        else:
184            S_inv = pinv(S_mat)
185
186        Y = mo.safe_sparse_dot(a=S_inv, b=W, backend=self.backend)
187        inv = mo.rbind(
188            mo.cbind(
189                x=B_inv + mo.crossprod(x=W, y=Y, backend=self.backend),
190                y=-np.transpose(Y),
191                backend=self.backend,
192            ),
193            mo.cbind(x=-Y, y=S_inv, backend=self.backend),
194            backend=self.backend,
195        )
196
197        self.beta_ = mo.safe_sparse_dot(
198            a=inv,
199            b=mo.crossprod(x=scaled_Z, y=centered_y, backend=self.backend),
200            backend=self.backend,
201        )
202
203        return self
204
205    def predict(self, X, **kwargs):
206        """Predict test data X.
207
208        Args:
209
210            X: {array-like}, shape = [n_samples, n_features]
211                Training vectors, where n_samples is the number
212                of samples and n_features is the number of features.
213
214            **kwargs: additional parameters to be passed to
215                    self.cook_test_set
216
217        Returns:
218
219            model predictions: {array-like}
220
221        """
222
223        if len(X.shape) == 1:
224            n_features = X.shape[0]
225            new_X = mo.rbind(
226                x=X.reshape(1, n_features),
227                y=np.ones(n_features).reshape(1, n_features),
228                backend=self.backend,
229            )
230
231            return (
232                self.y_mean_
233                + mo.safe_sparse_dot(
234                    a=self.cook_test_set(new_X, **kwargs),
235                    b=self.beta_,
236                    backend=self.backend,
237                )
238            )[0]
239
240        return self.y_mean_ + mo.safe_sparse_dot(
241            a=self.cook_test_set(X, **kwargs),
242            b=self.beta_,
243            backend=self.backend,
244        )

Ridge regression with 2 regularization parameters derived from class Ridge

Parameters:

n_hidden_features: int
    number of nodes in the hidden layer

activation_name: str
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'

a: float
    hyperparameter for 'prelu' or 'elu' activation function

nodes_sim: str
    type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
    'uniform'

bias: boolean
    indicates if the hidden layer contains a bias term (True) or not
    (False)

dropout: float
    regularization parameter; (random) percentage of nodes dropped out
    of the training

n_clusters: int
    number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
        no clustering)

cluster_encode: bool
    defines how the variable containing clusters is treated (default is one-hot)
    if `False`, then labels are used, without one-hot encoding

type_clust: str
    type of clustering method: currently k-means ('kmeans') or Gaussian
    Mixture Model ('gmm')

type_scaling: a tuple of 3 strings
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax')

lambda1: float
    regularization parameter on direct link

lambda2: float
    regularization parameter on hidden layer

seed: int
    reproducibility seed for nodes_sim=='uniform'

backend: str
    'cpu' or 'gpu' or 'tpu'

Attributes:

beta_: {array-like}
    regression coefficients

y_mean_: float
    average response
def fit(self, X, y, **kwargs):
126    def fit(self, X, y, **kwargs):
127        """Fit Ridge model to training data (X, y).
128
129        Args:
130
131            X: {array-like}, shape = [n_samples, n_features]
132                Training vectors, where n_samples is the number
133                of samples and n_features is the number of features.
134
135            y: array-like, shape = [n_samples]
136                Target values.
137
138            **kwargs: additional parameters to be passed to
139                    self.cook_training_set or self.obj.fit
140
141        Returns:
142
143            self: object
144
145        """
146
147        sys_platform = platform.system()
148
149        centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
150
151        n_X, p_X = X.shape
152        n_Z, p_Z = scaled_Z.shape
153
154        if self.n_clusters > 0:
155            if self.encode_clusters == True:
156                n_features = p_X + self.n_clusters
157            else:
158                n_features = p_X + 1
159        else:
160            n_features = p_X
161
162        X_ = scaled_Z[:, 0:n_features]
163        Phi_X_ = scaled_Z[:, n_features:p_Z]
164
165        B = mo.crossprod(x=X_, backend=self.backend) + self.lambda1 * np.diag(
166            np.repeat(1, n_features)
167        )
168        C = mo.crossprod(x=Phi_X_, y=X_, backend=self.backend)
169        D = mo.crossprod(
170            x=Phi_X_, backend=self.backend
171        ) + self.lambda2 * np.diag(np.repeat(1, Phi_X_.shape[1]))
172
173        if sys_platform in ("Linux", "Darwin"):
174            B_inv = pinv(B) if self.backend == "cpu" else jpinv(B)
175        else:
176            B_inv = pinv(B)
177
178        W = mo.safe_sparse_dot(a=C, b=B_inv, backend=self.backend)
179        S_mat = D - mo.tcrossprod(x=W, y=C, backend=self.backend)
180
181        if sys_platform in ("Linux", "Darwin"):
182            S_inv = pinv(S_mat) if self.backend == "cpu" else jpinv(S_mat)
183        else:
184            S_inv = pinv(S_mat)
185
186        Y = mo.safe_sparse_dot(a=S_inv, b=W, backend=self.backend)
187        inv = mo.rbind(
188            mo.cbind(
189                x=B_inv + mo.crossprod(x=W, y=Y, backend=self.backend),
190                y=-np.transpose(Y),
191                backend=self.backend,
192            ),
193            mo.cbind(x=-Y, y=S_inv, backend=self.backend),
194            backend=self.backend,
195        )
196
197        self.beta_ = mo.safe_sparse_dot(
198            a=inv,
199            b=mo.crossprod(x=scaled_Z, y=centered_y, backend=self.backend),
200            backend=self.backend,
201        )
202
203        return self

Fit Ridge model to training data (X, y).

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

**kwargs: additional parameters to be passed to
        self.cook_training_set or self.obj.fit

Returns:

self: object
def predict(self, X, **kwargs):
205    def predict(self, X, **kwargs):
206        """Predict test data X.
207
208        Args:
209
210            X: {array-like}, shape = [n_samples, n_features]
211                Training vectors, where n_samples is the number
212                of samples and n_features is the number of features.
213
214            **kwargs: additional parameters to be passed to
215                    self.cook_test_set
216
217        Returns:
218
219            model predictions: {array-like}
220
221        """
222
223        if len(X.shape) == 1:
224            n_features = X.shape[0]
225            new_X = mo.rbind(
226                x=X.reshape(1, n_features),
227                y=np.ones(n_features).reshape(1, n_features),
228                backend=self.backend,
229            )
230
231            return (
232                self.y_mean_
233                + mo.safe_sparse_dot(
234                    a=self.cook_test_set(new_X, **kwargs),
235                    b=self.beta_,
236                    backend=self.backend,
237                )
238            )[0]
239
240        return self.y_mean_ + mo.safe_sparse_dot(
241            a=self.cook_test_set(X, **kwargs),
242            b=self.beta_,
243            backend=self.backend,
244        )

Predict test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

model predictions: {array-like}
class Ridge2Classifier(nnetsauce.ridge2.ridge2.Ridge2, sklearn.base.ClassifierMixin):
 18class Ridge2Classifier(Ridge2, ClassifierMixin):
 19    """Multinomial logit classification with 2 regularization parameters
 20
 21    Parameters:
 22
 23        n_hidden_features: int
 24            number of nodes in the hidden layer
 25
 26        activation_name: str
 27            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
 28
 29        a: float
 30            hyperparameter for 'prelu' or 'elu' activation function
 31
 32        nodes_sim: str
 33            type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
 34            'uniform'
 35
 36        bias: boolean
 37            indicates if the hidden layer contains a bias term (True) or not
 38            (False)
 39
 40        dropout: float
 41            regularization parameter; (random) percentage of nodes dropped out
 42            of the training
 43
 44        direct_link: boolean
 45            indicates if the original predictors are included (True) in model's
 46            fitting or not (False)
 47
 48        n_clusters: int
 49            number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
 50                no clustering)
 51
 52        cluster_encode: bool
 53            defines how the variable containing clusters is treated (default is one-hot)
 54            if `False`, then labels are used, without one-hot encoding
 55
 56        type_clust: str
 57            type of clustering method: currently k-means ('kmeans') or Gaussian
 58            Mixture Model ('gmm')
 59
 60        type_scaling: a tuple of 3 strings
 61            scaling methods for inputs, hidden layer, and clustering respectively
 62            (and when relevant).
 63            Currently available: standardization ('std') or MinMax scaling ('minmax')
 64
 65        lambda1: float
 66            regularization parameter on direct link
 67
 68        lambda2: float
 69            regularization parameter on hidden layer
 70
 71        solver: str
 72            optimization function "L-BFGS-B",  "Newton-CG",
 73            "trust-ncg", "L-BFGS-B-lstsq", "Newton-CG-lstsq",
 74            "trust-ncg-lstsq" (see scipy.optimize.minimize)
 75            When using "L-BFGS-B-lstsq", "Newton-CG-lstsq", or "trust-ncg-lstsq",
 76            the initial value for the optimization is set to the least squares solution
 77
 78        seed: int
 79            reproducibility seed for nodes_sim=='uniform'
 80
 81        backend: str
 82            "cpu" or "gpu" or "tpu"
 83
 84    Attributes:
 85
 86        beta_: {array-like}
 87            regression coefficients
 88
 89        classes_: {array-like}
 90            unique classes in the target variable
 91
 92        minloglik_: float
 93            minimum value of the negative log-likelihood
 94
 95    Examples:
 96
 97    See also [https://github.com/Techtonique/nnetsauce/blob/master/examples/ridge_classification.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/ridge_classification.py)
 98
 99    ```python
100    import nnetsauce as ns
101    import numpy as np
102    from sklearn.datasets import load_breast_cancer
103    from sklearn.model_selection import train_test_split
104    from time import time
105
106
107    breast_cancer = load_breast_cancer()
108    X = breast_cancer.data
109    y = breast_cancer.target
110
111    # split data into training test and test set
112    np.random.seed(123)
113    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
114
115    # create the model with nnetsauce
116    fit_obj = ns.Ridge2Classifier(lambda1 = 6.90185578e+04,
117                                lambda2 = 3.17392781e+02,
118                                n_hidden_features=95,
119                                n_clusters=2,
120                                dropout = 3.62817383e-01,
121                                type_clust = "gmm")
122
123    # fit the model on training set
124    start = time()
125    fit_obj.fit(X_train, y_train)
126    print(f"Elapsed {time() - start}")
127
128    # get the accuracy on test set
129    start = time()
130    print(fit_obj.score(X_test, y_test))
131    print(f"Elapsed {time() - start}")
132
133    # get area under the curve on test set (auc)
134    print(fit_obj.score(X_test, y_test, scoring="roc_auc"))
135    ```
136
137
138    """
139
140    _estimator_type = "classifier"
141
142    # construct the object -----
143
144    def __init__(
145        self,
146        n_hidden_features=5,
147        activation_name="relu",
148        a=0.01,
149        nodes_sim="sobol",
150        bias=True,
151        dropout=0,
152        direct_link=True,
153        n_clusters=2,
154        cluster_encode=True,
155        type_clust="kmeans",
156        type_scaling=("std", "std", "std"),
157        lambda1=0.1,
158        lambda2=0.1,
159        solver="L-BFGS-B",
160        seed=123,
161        backend="cpu",
162    ):
163        super().__init__(
164            n_hidden_features=n_hidden_features,
165            activation_name=activation_name,
166            a=a,
167            nodes_sim=nodes_sim,
168            bias=bias,
169            dropout=dropout,
170            direct_link=direct_link,
171            n_clusters=n_clusters,
172            cluster_encode=cluster_encode,
173            type_clust=type_clust,
174            type_scaling=type_scaling,
175            lambda1=lambda1,
176            lambda2=lambda2,
177            seed=seed,
178            backend=backend,
179        )
180
181        self.type_fit = "classification"
182        self.solver = solver
183        self.beta_ = None
184        self.classes_ = None
185        self.minloglik_ = None
186
187    def loglik(self, X, Y, **kwargs):
188        """Log-likelihood for training data (X, Y).
189
190        Args:
191
192            X: {array-like}, shape = [n_samples, n_features]
193                Training vectors, where n_samples is the number
194                of samples and n_features is the number of features.
195
196            Y: array-like, shape = [n_samples]
197                One-hot encode target values.
198
199            **kwargs: additional parameters to be passed to
200                    self.cook_training_set or self.obj.fit
201
202        Returns:
203
204        """
205
206        def loglik_grad_hess(Y, X, B, XB, hessian=True, **kwargs):
207            # nobs, n_classes
208            n, K = Y.shape
209
210            # total number of covariates
211            p = X.shape[1]
212
213            # initial number of covariates
214            init_p = p - self.n_hidden_features
215
216            max_double = 709.0
217            XB[XB > max_double] = max_double
218            exp_XB = np.exp(XB)
219            probs = exp_XB / exp_XB.sum(axis=1)[:, None]
220
221            # gradient -----
222            # (Y - p) -> (n, K)
223            # X -> (n, p)
224            # (K, n) %*% (n, p) -> (K, p)
225            if hessian is False:
226                grad = (
227                    -mo.safe_sparse_dot(
228                        a=(Y - probs).T, b=X, backend=self.backend
229                    )
230                    / n
231                )
232                grad += self.lambda1 * B[0:init_p, :].sum(axis=0)[:, None]
233                grad += self.lambda2 * B[init_p:p, :].sum(axis=0)[:, None]
234
235                return grad.flatten()
236
237            # hessian -----
238            if hessian is True:
239                Kp = K * p
240                hess = np.zeros((Kp, Kp), float)
241                for k1 in range(K):
242                    x_index = range(k1 * p, (k1 + 1) * p)
243                    for k2 in range(k1, K):
244                        y_index = range(k2 * p, (k2 + 1) * p)
245                        H_sub = (
246                            -mo.safe_sparse_dot(
247                                a=X.T,
248                                b=(probs[:, k1] * probs[:, k2])[:, None] * X,
249                                backend=self.backend,
250                            )
251                            / n
252                        )  # do not store
253                        hess[np.ix_(x_index, y_index)] = hess[
254                            np.ix_(y_index, x_index)
255                        ] = H_sub
256
257                return hess + (self.lambda1 + self.lambda2) * np.identity(Kp)
258
259        # total number of covariates
260        p = X.shape[1]
261
262        # initial number of covariates
263        init_p = p - self.n_hidden_features
264
265        # log-likelihood (1st return)
266        def loglik_func(x):
267            # (p, K)
268            B = x.reshape(Y.shape[1], p).T
269
270            # (n, K)
271            XB = mo.safe_sparse_dot(X, B, backend=self.backend)
272
273            res = -(np.sum(Y * XB, axis=1) - logsumexp(XB)).mean()
274
275            res += (
276                0.5
277                * self.lambda1
278                * mo.squared_norm(B[0:init_p, :], backend=self.backend)
279            )
280            res += (
281                0.5
282                * self.lambda2
283                * mo.squared_norm(B[init_p:p, :], backend=self.backend)
284            )
285
286            return res
287
288        # gradient of log-likelihood
289        def grad_func(x):
290            # (p, K)
291            B = x.reshape(Y.shape[1], p).T
292
293            return loglik_grad_hess(
294                Y=Y,
295                X=X,
296                B=B,
297                XB=mo.safe_sparse_dot(X, B, backend=self.backend),
298                hessian=False,
299                **kwargs
300            )
301
302        # hessian of log-likelihood
303        def hessian_func(x):
304            # (p, K)
305            B = x.reshape(Y.shape[1], p).T
306
307            return loglik_grad_hess(
308                Y=Y,
309                X=X,
310                B=B,
311                XB=mo.safe_sparse_dot(X, B, backend=self.backend),
312                hessian=True,
313                **kwargs
314            )
315
316        return loglik_func, grad_func, hessian_func
317
318    # newton-cg
319    # L-BFGS-B
320    def fit(self, X, y, **kwargs):
321        """Fit Ridge model to training data (X, y).
322
323        for beta: regression coeffs (beta11, ..., beta1p, ..., betaK1, ..., betaKp)
324        for K classes and p covariates.
325
326        Args:
327
328            X: {array-like}, shape = [n_samples, n_features]
329                Training vectors, where n_samples is the number
330                of samples and n_features is the number of features.
331
332            y: array-like, shape = [n_samples]
333                Target values.
334
335            **kwargs: additional parameters to be passed to
336                    self.cook_training_set or self.obj.fit
337
338        Returns:
339
340            self: object
341
342        """
343
344        assert mx.is_factor(y), "y must contain only integers"
345
346        output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
347
348        self.n_classes = len(np.unique(y))
349        self.classes_ = np.unique(y)  # for compatibility with sklearn
350        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
351
352        Y = mo.one_hot_encode2(output_y, self.n_classes)
353
354        # optimize for beta, minimize self.loglik (maximize loglik) -----
355        loglik_func, grad_func, hessian_func = self.loglik(X=scaled_Z, Y=Y)
356
357        if self.solver == "L-BFGS-B":
358            opt = minimize(
359                fun=loglik_func,
360                x0=np.zeros(scaled_Z.shape[1] * self.n_classes),
361                jac=grad_func,
362                method=self.solver,
363            )
364            self.beta_ = opt.x
365            self.minloglik_ = opt.fun
366
367        if self.solver in ("Newton-CG", "trust-ncg"):
368            opt = minimize(
369                fun=loglik_func,
370                x0=np.zeros(scaled_Z.shape[1] * self.n_classes),
371                jac=grad_func,
372                hess=hessian_func,
373                method=self.solver,
374            )
375            self.beta_ = opt.x
376            self.minloglik_ = opt.fun
377
378        if self.solver == "L-BFGS-B-lstsq":
379            opt = minimize(
380                fun=loglik_func,
381                x0=np.linalg.lstsq(scaled_Z, Y, rcond=None)[0].flatten(
382                    order="F"
383                ),
384                jac=grad_func,
385                method="L-BFGS-B",
386            )
387            self.beta_ = opt.x
388            self.minloglik_ = opt.fun
389
390        if self.solver in "Newton-CG-lstsq":
391            opt = minimize(
392                fun=loglik_func,
393                x0=np.linalg.lstsq(scaled_Z, Y, rcond=None)[0].flatten(
394                    order="F"
395                ),
396                jac=grad_func,
397                hess=hessian_func,
398                method="Newton-CG",
399            )
400            self.beta_ = opt.x
401            self.minloglik_ = opt.fun
402
403        if self.solver in "trust-ncg-lstsq":
404            opt = minimize(
405                fun=loglik_func,
406                x0=np.linalg.lstsq(scaled_Z, Y, rcond=None)[0].flatten(
407                    order="F"
408                ),
409                jac=grad_func,
410                hess=hessian_func,
411                method="trust-ncg",
412            )
413            self.beta_ = opt.x
414            self.minloglik_ = opt.fun
415
416        self.classes_ = np.unique(y)
417
418        return self
419
420    def predict(self, X, **kwargs):
421        """Predict test data X.
422
423        Args:
424
425            X: {array-like}, shape = [n_samples, n_features]
426                Training vectors, where n_samples is the number
427                of samples and n_features is the number of features.
428
429            **kwargs: additional parameters to be passed to
430                    self.cook_test_set
431
432        Returns:
433
434            model predictions: {array-like}
435        """
436
437        return np.argmax(self.predict_proba(X, **kwargs), axis=1)
438
439    def predict_proba(self, X, **kwargs):
440        """Predict probabilities for test data X.
441
442        Args:
443
444            X: {array-like}, shape = [n_samples, n_features]
445                Training vectors, where n_samples is the number
446                of samples and n_features is the number of features.
447
448            **kwargs: additional parameters to be passed to
449                    self.cook_test_set
450
451        Returns:
452
453            probability estimates for test data: {array-like}
454
455        """
456        if len(X.shape) == 1:
457            n_features = X.shape[0]
458            new_X = mo.rbind(
459                X.reshape(1, n_features),
460                np.ones(n_features).reshape(1, n_features),
461            )
462
463            Z = self.cook_test_set(new_X, **kwargs)
464
465        else:
466            Z = self.cook_test_set(X, **kwargs)
467
468        ZB = mo.safe_sparse_dot(
469            a=Z,
470            b=self.beta_.reshape(
471                self.n_classes,
472                X.shape[1] + self.n_hidden_features + self.n_clusters,
473            ).T,
474            backend=self.backend,
475        )
476
477        exp_ZB = np.exp(ZB)
478
479        return exp_ZB / exp_ZB.sum(axis=1)[:, None]
480
481    @property
482    def _estimator_type(self):
483        return "classifier"

Multinomial logit classification with 2 regularization parameters

Parameters:

n_hidden_features: int
    number of nodes in the hidden layer

activation_name: str
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'

a: float
    hyperparameter for 'prelu' or 'elu' activation function

nodes_sim: str
    type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
    'uniform'

bias: boolean
    indicates if the hidden layer contains a bias term (True) or not
    (False)

dropout: float
    regularization parameter; (random) percentage of nodes dropped out
    of the training

direct_link: boolean
    indicates if the original predictors are included (True) in model's
    fitting or not (False)

n_clusters: int
    number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
        no clustering)

cluster_encode: bool
    defines how the variable containing clusters is treated (default is one-hot)
    if `False`, then labels are used, without one-hot encoding

type_clust: str
    type of clustering method: currently k-means ('kmeans') or Gaussian
    Mixture Model ('gmm')

type_scaling: a tuple of 3 strings
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax')

lambda1: float
    regularization parameter on direct link

lambda2: float
    regularization parameter on hidden layer

solver: str
    optimization function "L-BFGS-B",  "Newton-CG",
    "trust-ncg", "L-BFGS-B-lstsq", "Newton-CG-lstsq",
    "trust-ncg-lstsq" (see scipy.optimize.minimize)
    When using "L-BFGS-B-lstsq", "Newton-CG-lstsq", or "trust-ncg-lstsq",
    the initial value for the optimization is set to the least squares solution

seed: int
    reproducibility seed for nodes_sim=='uniform'

backend: str
    "cpu" or "gpu" or "tpu"

Attributes:

beta_: {array-like}
    regression coefficients

classes_: {array-like}
    unique classes in the target variable

minloglik_: float
    minimum value of the negative log-likelihood

Examples:

See also https://github.com/Techtonique/nnetsauce/blob/master/examples/ridge_classification.py

import nnetsauce as ns
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from time import time


breast_cancer = load_breast_cancer()
X = breast_cancer.data
y = breast_cancer.target

# split data into training test and test set
np.random.seed(123)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# create the model with nnetsauce
fit_obj = ns.Ridge2Classifier(lambda1 = 6.90185578e+04,
                            lambda2 = 3.17392781e+02,
                            n_hidden_features=95,
                            n_clusters=2,
                            dropout = 3.62817383e-01,
                            type_clust = "gmm")

# fit the model on training set
start = time()
fit_obj.fit(X_train, y_train)
print(f"Elapsed {time() - start}")

# get the accuracy on test set
start = time()
print(fit_obj.score(X_test, y_test))
print(f"Elapsed {time() - start}")

# get area under the curve on test set (auc)
print(fit_obj.score(X_test, y_test, scoring="roc_auc"))
def fit(self, X, y, **kwargs):
320    def fit(self, X, y, **kwargs):
321        """Fit Ridge model to training data (X, y).
322
323        for beta: regression coeffs (beta11, ..., beta1p, ..., betaK1, ..., betaKp)
324        for K classes and p covariates.
325
326        Args:
327
328            X: {array-like}, shape = [n_samples, n_features]
329                Training vectors, where n_samples is the number
330                of samples and n_features is the number of features.
331
332            y: array-like, shape = [n_samples]
333                Target values.
334
335            **kwargs: additional parameters to be passed to
336                    self.cook_training_set or self.obj.fit
337
338        Returns:
339
340            self: object
341
342        """
343
344        assert mx.is_factor(y), "y must contain only integers"
345
346        output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
347
348        self.n_classes = len(np.unique(y))
349        self.classes_ = np.unique(y)  # for compatibility with sklearn
350        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
351
352        Y = mo.one_hot_encode2(output_y, self.n_classes)
353
354        # optimize for beta, minimize self.loglik (maximize loglik) -----
355        loglik_func, grad_func, hessian_func = self.loglik(X=scaled_Z, Y=Y)
356
357        if self.solver == "L-BFGS-B":
358            opt = minimize(
359                fun=loglik_func,
360                x0=np.zeros(scaled_Z.shape[1] * self.n_classes),
361                jac=grad_func,
362                method=self.solver,
363            )
364            self.beta_ = opt.x
365            self.minloglik_ = opt.fun
366
367        if self.solver in ("Newton-CG", "trust-ncg"):
368            opt = minimize(
369                fun=loglik_func,
370                x0=np.zeros(scaled_Z.shape[1] * self.n_classes),
371                jac=grad_func,
372                hess=hessian_func,
373                method=self.solver,
374            )
375            self.beta_ = opt.x
376            self.minloglik_ = opt.fun
377
378        if self.solver == "L-BFGS-B-lstsq":
379            opt = minimize(
380                fun=loglik_func,
381                x0=np.linalg.lstsq(scaled_Z, Y, rcond=None)[0].flatten(
382                    order="F"
383                ),
384                jac=grad_func,
385                method="L-BFGS-B",
386            )
387            self.beta_ = opt.x
388            self.minloglik_ = opt.fun
389
390        if self.solver in "Newton-CG-lstsq":
391            opt = minimize(
392                fun=loglik_func,
393                x0=np.linalg.lstsq(scaled_Z, Y, rcond=None)[0].flatten(
394                    order="F"
395                ),
396                jac=grad_func,
397                hess=hessian_func,
398                method="Newton-CG",
399            )
400            self.beta_ = opt.x
401            self.minloglik_ = opt.fun
402
403        if self.solver in "trust-ncg-lstsq":
404            opt = minimize(
405                fun=loglik_func,
406                x0=np.linalg.lstsq(scaled_Z, Y, rcond=None)[0].flatten(
407                    order="F"
408                ),
409                jac=grad_func,
410                hess=hessian_func,
411                method="trust-ncg",
412            )
413            self.beta_ = opt.x
414            self.minloglik_ = opt.fun
415
416        self.classes_ = np.unique(y)
417
418        return self

Fit Ridge model to training data (X, y).

for beta: regression coeffs (beta11, ..., beta1p, ..., betaK1, ..., betaKp) for K classes and p covariates.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

**kwargs: additional parameters to be passed to
        self.cook_training_set or self.obj.fit

Returns:

self: object
def predict(self, X, **kwargs):
420    def predict(self, X, **kwargs):
421        """Predict test data X.
422
423        Args:
424
425            X: {array-like}, shape = [n_samples, n_features]
426                Training vectors, where n_samples is the number
427                of samples and n_features is the number of features.
428
429            **kwargs: additional parameters to be passed to
430                    self.cook_test_set
431
432        Returns:
433
434            model predictions: {array-like}
435        """
436
437        return np.argmax(self.predict_proba(X, **kwargs), axis=1)

Predict test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

model predictions: {array-like}
def predict_proba(self, X, **kwargs):
439    def predict_proba(self, X, **kwargs):
440        """Predict probabilities for test data X.
441
442        Args:
443
444            X: {array-like}, shape = [n_samples, n_features]
445                Training vectors, where n_samples is the number
446                of samples and n_features is the number of features.
447
448            **kwargs: additional parameters to be passed to
449                    self.cook_test_set
450
451        Returns:
452
453            probability estimates for test data: {array-like}
454
455        """
456        if len(X.shape) == 1:
457            n_features = X.shape[0]
458            new_X = mo.rbind(
459                X.reshape(1, n_features),
460                np.ones(n_features).reshape(1, n_features),
461            )
462
463            Z = self.cook_test_set(new_X, **kwargs)
464
465        else:
466            Z = self.cook_test_set(X, **kwargs)
467
468        ZB = mo.safe_sparse_dot(
469            a=Z,
470            b=self.beta_.reshape(
471                self.n_classes,
472                X.shape[1] + self.n_hidden_features + self.n_clusters,
473            ).T,
474            backend=self.backend,
475        )
476
477        exp_ZB = np.exp(ZB)
478
479        return exp_ZB / exp_ZB.sum(axis=1)[:, None]

Predict probabilities for test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

probability estimates for test data: {array-like}
class Ridge2MultitaskClassifier(nnetsauce.ridge2.ridge2.Ridge2, sklearn.base.ClassifierMixin):
 23class Ridge2MultitaskClassifier(Ridge2, ClassifierMixin):
 24    """Multitask Ridge classification with 2 regularization parameters
 25
 26    Parameters:
 27
 28        n_hidden_features: int
 29            number of nodes in the hidden layer
 30
 31        activation_name: str
 32            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
 33
 34        a: float
 35            hyperparameter for 'prelu' or 'elu' activation function
 36
 37        nodes_sim: str
 38            type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
 39            'uniform'
 40
 41        bias: boolean
 42            indicates if the hidden layer contains a bias term (True) or not
 43            (False)
 44
 45        dropout: float
 46            regularization parameter; (random) percentage of nodes dropped out
 47            of the training
 48
 49        n_clusters: int
 50            number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
 51                no clustering)
 52
 53        cluster_encode: bool
 54            defines how the variable containing clusters is treated (default is one-hot)
 55            if `False`, then labels are used, without one-hot encoding
 56
 57        type_clust: str
 58            type of clustering method: currently k-means ('kmeans') or Gaussian
 59            Mixture Model ('gmm')
 60
 61        type_scaling: a tuple of 3 strings
 62            scaling methods for inputs, hidden layer, and clustering respectively
 63            (and when relevant).
 64            Currently available: standardization ('std') or MinMax scaling ('minmax')
 65
 66        lambda1: float
 67            regularization parameter on direct link
 68
 69        lambda2: float
 70            regularization parameter on hidden layer
 71
 72        seed: int
 73            reproducibility seed for nodes_sim=='uniform'
 74
 75        backend: str
 76            "cpu" or "gpu" or "tpu"
 77
 78    Attributes:
 79
 80        beta_: {array-like}
 81            regression coefficients
 82
 83    Examples:
 84
 85    See also [https://github.com/Techtonique/nnetsauce/blob/master/examples/ridgemtask_classification.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/ridgemtask_classification.py)
 86
 87    ```python
 88    import nnetsauce as ns
 89    import numpy as np
 90    from sklearn.datasets import load_breast_cancer
 91    from sklearn.model_selection import train_test_split
 92    from sklearn import metrics
 93    from time import time
 94
 95    breast_cancer = load_breast_cancer()
 96    Z = breast_cancer.data
 97    t = breast_cancer.target
 98    np.random.seed(123)
 99    X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2)
100
101    fit_obj = ns.Ridge2MultitaskClassifier(n_hidden_features=int(9.83730469e+01),
102                                    dropout=4.31054687e-01,
103                                    n_clusters=int(1.71484375e+00),
104                                    lambda1=1.24023438e+01, lambda2=7.30263672e+03)
105
106    start = time()
107    fit_obj.fit(X_train, y_train)
108    print(f"Elapsed {time() - start}")
109
110    print(fit_obj.score(X_test, y_test))
111    print(fit_obj.score(X_test, y_test, scoring="roc_auc"))
112
113    start = time()
114    preds = fit_obj.predict(X_test)
115    print(f"Elapsed {time() - start}")
116    print(metrics.classification_report(preds, y_test))
117    ```
118
119    """
120
121    # construct the object -----
122    _estimator_type = "classifier"
123
124    def __init__(
125        self,
126        n_hidden_features=5,
127        activation_name="relu",
128        a=0.01,
129        nodes_sim="sobol",
130        bias=True,
131        dropout=0,
132        n_clusters=2,
133        cluster_encode=True,
134        type_clust="kmeans",
135        type_scaling=("std", "std", "std"),
136        lambda1=0.1,
137        lambda2=0.1,
138        seed=123,
139        backend="cpu",
140    ):
141        super().__init__(
142            n_hidden_features=n_hidden_features,
143            activation_name=activation_name,
144            a=a,
145            nodes_sim=nodes_sim,
146            bias=bias,
147            dropout=dropout,
148            n_clusters=n_clusters,
149            cluster_encode=cluster_encode,
150            type_clust=type_clust,
151            type_scaling=type_scaling,
152            lambda1=lambda1,
153            lambda2=lambda2,
154            seed=seed,
155            backend=backend,
156        )
157
158        self.type_fit = "classification"
159
160    def fit(self, X, y, **kwargs):
161        """Fit Ridge model to training data (X, y).
162
163        Args:
164
165            X: {array-like}, shape = [n_samples, n_features]
166                Training vectors, where n_samples is the number
167                of samples and n_features is the number of features.
168
169            y: array-like, shape = [n_samples]
170                Target values.
171
172            **kwargs: additional parameters to be passed to
173                    self.cook_training_set or self.obj.fit
174
175        Returns:
176
177            self: object
178
179        """
180
181        sys_platform = platform.system()
182
183        assert mx.is_factor(y), "y must contain only integers"
184
185        self.classes_ = np.unique(y)  # for compatibility with sklearn
186        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
187
188        output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
189
190        n_X, p_X = X.shape
191        n_Z, p_Z = scaled_Z.shape
192
193        self.n_classes = len(np.unique(y))
194
195        # multitask response
196        Y = mo.one_hot_encode2(output_y, self.n_classes)
197
198        if self.n_clusters > 0:
199            if self.encode_clusters == True:
200                n_features = p_X + self.n_clusters
201            else:
202                n_features = p_X + 1
203        else:
204            n_features = p_X
205
206        X_ = scaled_Z[:, 0:n_features]
207        Phi_X_ = scaled_Z[:, n_features:p_Z]
208
209        B = mo.crossprod(x=X_, backend=self.backend) + self.lambda1 * np.diag(
210            np.repeat(1, X_.shape[1])
211        )
212        C = mo.crossprod(x=Phi_X_, y=X_, backend=self.backend)
213        D = mo.crossprod(
214            x=Phi_X_, backend=self.backend
215        ) + self.lambda2 * np.diag(np.repeat(1, Phi_X_.shape[1]))
216
217        if sys_platform in ("Linux", "Darwin"):
218            B_inv = pinv(B) if self.backend == "cpu" else jpinv(B)
219        else:
220            B_inv = pinv(B)
221
222        W = mo.safe_sparse_dot(a=C, b=B_inv, backend=self.backend)
223        S_mat = D - mo.tcrossprod(x=W, y=C, backend=self.backend)
224
225        if sys_platform in ("Linux", "Darwin"):
226            S_inv = pinv(S_mat) if self.backend == "cpu" else jpinv(S_mat)
227        else:
228            S_inv = pinv(S_mat)
229
230        Y2 = mo.safe_sparse_dot(a=S_inv, b=W, backend=self.backend)
231        inv = mo.rbind(
232            mo.cbind(
233                x=B_inv + mo.crossprod(x=W, y=Y2, backend=self.backend),
234                y=-np.transpose(Y2),
235                backend=self.backend,
236            ),
237            mo.cbind(x=-Y2, y=S_inv, backend=self.backend),
238            backend=self.backend,
239        )
240
241        self.beta_ = mo.safe_sparse_dot(
242            a=inv,
243            b=mo.crossprod(x=scaled_Z, y=Y, backend=self.backend),
244            backend=self.backend,
245        )
246        self.classes_ = np.unique(y)
247        return self
248
249    def predict(self, X, **kwargs):
250        """Predict test data X.
251
252        Args:
253
254            X: {array-like}, shape = [n_samples, n_features]
255                Training vectors, where n_samples is the number
256                of samples and n_features is the number of features.
257
258            **kwargs: additional parameters to be passed to
259                    self.cook_test_set
260
261        Returns:
262
263            model predictions: {array-like}
264
265        """
266
267        return np.argmax(self.predict_proba(X, **kwargs), axis=1)
268
269    def predict_proba(self, X, **kwargs):
270        """Predict probabilities for test data X.
271
272        Args:
273
274            X: {array-like}, shape = [n_samples, n_features]
275                Training vectors, where n_samples is the number
276                of samples and n_features is the number of features.
277
278            **kwargs: additional parameters to be passed to
279                    self.cook_test_set
280
281        Returns:
282
283            probability estimates for test data: {array-like}
284
285        """
286
287        if len(X.shape) == 1:
288            n_features = X.shape[0]
289            new_X = mo.rbind(
290                x=X.reshape(1, n_features),
291                y=np.ones(n_features).reshape(1, n_features),
292                backend=self.backend,
293            )
294
295            Z = self.cook_test_set(new_X, **kwargs)
296
297        else:
298            Z = self.cook_test_set(X, **kwargs)
299
300        ZB = mo.safe_sparse_dot(a=Z, b=self.beta_, backend=self.backend)
301
302        exp_ZB = np.exp(ZB)
303
304        return exp_ZB / exp_ZB.sum(axis=1)[:, None]
305
306    def score(self, X, y, scoring=None):
307        """Scoring function for classification.
308
309        Args:
310
311            X: {array-like}, shape = [n_samples, n_features]
312                Training vectors, where n_samples is the number
313                of samples and n_features is the number of features.
314
315            y: array-like, shape = [n_samples]
316                Target values.
317
318            scoring: str
319                scoring method (default is accuracy)
320
321        Returns:
322
323            score: float
324        """
325
326        if scoring is None:
327            scoring = "accuracy"
328
329        if scoring == "accuracy":
330            return skm2.accuracy_score(y, self.predict(X))
331
332        if scoring == "f1":
333            return skm2.f1_score(y, self.predict(X))
334
335        if scoring == "precision":
336            return skm2.precision_score(y, self.predict(X))
337
338        if scoring == "recall":
339            return skm2.recall_score(y, self.predict(X))
340
341        if scoring == "roc_auc":
342            return skm2.roc_auc_score(y, self.predict(X))
343
344        if scoring == "log_loss":
345            return skm2.log_loss(y, self.predict_proba(X))
346
347        if scoring == "balanced_accuracy":
348            return skm2.balanced_accuracy_score(y, self.predict(X))
349
350        if scoring == "average_precision":
351            return skm2.average_precision_score(y, self.predict(X))
352
353        if scoring == "neg_brier_score":
354            return -skm2.brier_score_loss(y, self.predict_proba(X))
355
356        if scoring == "neg_log_loss":
357            return -skm2.log_loss(y, self.predict_proba(X))
358
359    @property
360    def _estimator_type(self):
361        return "classifier"

Multitask Ridge classification with 2 regularization parameters

Parameters:

n_hidden_features: int
    number of nodes in the hidden layer

activation_name: str
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'

a: float
    hyperparameter for 'prelu' or 'elu' activation function

nodes_sim: str
    type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
    'uniform'

bias: boolean
    indicates if the hidden layer contains a bias term (True) or not
    (False)

dropout: float
    regularization parameter; (random) percentage of nodes dropped out
    of the training

n_clusters: int
    number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
        no clustering)

cluster_encode: bool
    defines how the variable containing clusters is treated (default is one-hot)
    if `False`, then labels are used, without one-hot encoding

type_clust: str
    type of clustering method: currently k-means ('kmeans') or Gaussian
    Mixture Model ('gmm')

type_scaling: a tuple of 3 strings
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax')

lambda1: float
    regularization parameter on direct link

lambda2: float
    regularization parameter on hidden layer

seed: int
    reproducibility seed for nodes_sim=='uniform'

backend: str
    "cpu" or "gpu" or "tpu"

Attributes:

beta_: {array-like}
    regression coefficients

Examples:

See also https://github.com/Techtonique/nnetsauce/blob/master/examples/ridgemtask_classification.py

import nnetsauce as ns
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn import metrics
from time import time

breast_cancer = load_breast_cancer()
Z = breast_cancer.data
t = breast_cancer.target
np.random.seed(123)
X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2)

fit_obj = ns.Ridge2MultitaskClassifier(n_hidden_features=int(9.83730469e+01),
                                dropout=4.31054687e-01,
                                n_clusters=int(1.71484375e+00),
                                lambda1=1.24023438e+01, lambda2=7.30263672e+03)

start = time()
fit_obj.fit(X_train, y_train)
print(f"Elapsed {time() - start}")

print(fit_obj.score(X_test, y_test))
print(fit_obj.score(X_test, y_test, scoring="roc_auc"))

start = time()
preds = fit_obj.predict(X_test)
print(f"Elapsed {time() - start}")
print(metrics.classification_report(preds, y_test))
def fit(self, X, y, **kwargs):
160    def fit(self, X, y, **kwargs):
161        """Fit Ridge model to training data (X, y).
162
163        Args:
164
165            X: {array-like}, shape = [n_samples, n_features]
166                Training vectors, where n_samples is the number
167                of samples and n_features is the number of features.
168
169            y: array-like, shape = [n_samples]
170                Target values.
171
172            **kwargs: additional parameters to be passed to
173                    self.cook_training_set or self.obj.fit
174
175        Returns:
176
177            self: object
178
179        """
180
181        sys_platform = platform.system()
182
183        assert mx.is_factor(y), "y must contain only integers"
184
185        self.classes_ = np.unique(y)  # for compatibility with sklearn
186        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
187
188        output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
189
190        n_X, p_X = X.shape
191        n_Z, p_Z = scaled_Z.shape
192
193        self.n_classes = len(np.unique(y))
194
195        # multitask response
196        Y = mo.one_hot_encode2(output_y, self.n_classes)
197
198        if self.n_clusters > 0:
199            if self.encode_clusters == True:
200                n_features = p_X + self.n_clusters
201            else:
202                n_features = p_X + 1
203        else:
204            n_features = p_X
205
206        X_ = scaled_Z[:, 0:n_features]
207        Phi_X_ = scaled_Z[:, n_features:p_Z]
208
209        B = mo.crossprod(x=X_, backend=self.backend) + self.lambda1 * np.diag(
210            np.repeat(1, X_.shape[1])
211        )
212        C = mo.crossprod(x=Phi_X_, y=X_, backend=self.backend)
213        D = mo.crossprod(
214            x=Phi_X_, backend=self.backend
215        ) + self.lambda2 * np.diag(np.repeat(1, Phi_X_.shape[1]))
216
217        if sys_platform in ("Linux", "Darwin"):
218            B_inv = pinv(B) if self.backend == "cpu" else jpinv(B)
219        else:
220            B_inv = pinv(B)
221
222        W = mo.safe_sparse_dot(a=C, b=B_inv, backend=self.backend)
223        S_mat = D - mo.tcrossprod(x=W, y=C, backend=self.backend)
224
225        if sys_platform in ("Linux", "Darwin"):
226            S_inv = pinv(S_mat) if self.backend == "cpu" else jpinv(S_mat)
227        else:
228            S_inv = pinv(S_mat)
229
230        Y2 = mo.safe_sparse_dot(a=S_inv, b=W, backend=self.backend)
231        inv = mo.rbind(
232            mo.cbind(
233                x=B_inv + mo.crossprod(x=W, y=Y2, backend=self.backend),
234                y=-np.transpose(Y2),
235                backend=self.backend,
236            ),
237            mo.cbind(x=-Y2, y=S_inv, backend=self.backend),
238            backend=self.backend,
239        )
240
241        self.beta_ = mo.safe_sparse_dot(
242            a=inv,
243            b=mo.crossprod(x=scaled_Z, y=Y, backend=self.backend),
244            backend=self.backend,
245        )
246        self.classes_ = np.unique(y)
247        return self

Fit Ridge model to training data (X, y).

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

**kwargs: additional parameters to be passed to
        self.cook_training_set or self.obj.fit

Returns:

self: object
def predict(self, X, **kwargs):
249    def predict(self, X, **kwargs):
250        """Predict test data X.
251
252        Args:
253
254            X: {array-like}, shape = [n_samples, n_features]
255                Training vectors, where n_samples is the number
256                of samples and n_features is the number of features.
257
258            **kwargs: additional parameters to be passed to
259                    self.cook_test_set
260
261        Returns:
262
263            model predictions: {array-like}
264
265        """
266
267        return np.argmax(self.predict_proba(X, **kwargs), axis=1)

Predict test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

model predictions: {array-like}
def predict_proba(self, X, **kwargs):
269    def predict_proba(self, X, **kwargs):
270        """Predict probabilities for test data X.
271
272        Args:
273
274            X: {array-like}, shape = [n_samples, n_features]
275                Training vectors, where n_samples is the number
276                of samples and n_features is the number of features.
277
278            **kwargs: additional parameters to be passed to
279                    self.cook_test_set
280
281        Returns:
282
283            probability estimates for test data: {array-like}
284
285        """
286
287        if len(X.shape) == 1:
288            n_features = X.shape[0]
289            new_X = mo.rbind(
290                x=X.reshape(1, n_features),
291                y=np.ones(n_features).reshape(1, n_features),
292                backend=self.backend,
293            )
294
295            Z = self.cook_test_set(new_X, **kwargs)
296
297        else:
298            Z = self.cook_test_set(X, **kwargs)
299
300        ZB = mo.safe_sparse_dot(a=Z, b=self.beta_, backend=self.backend)
301
302        exp_ZB = np.exp(ZB)
303
304        return exp_ZB / exp_ZB.sum(axis=1)[:, None]

Predict probabilities for test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

probability estimates for test data: {array-like}
def score(self, X, y, scoring=None):
306    def score(self, X, y, scoring=None):
307        """Scoring function for classification.
308
309        Args:
310
311            X: {array-like}, shape = [n_samples, n_features]
312                Training vectors, where n_samples is the number
313                of samples and n_features is the number of features.
314
315            y: array-like, shape = [n_samples]
316                Target values.
317
318            scoring: str
319                scoring method (default is accuracy)
320
321        Returns:
322
323            score: float
324        """
325
326        if scoring is None:
327            scoring = "accuracy"
328
329        if scoring == "accuracy":
330            return skm2.accuracy_score(y, self.predict(X))
331
332        if scoring == "f1":
333            return skm2.f1_score(y, self.predict(X))
334
335        if scoring == "precision":
336            return skm2.precision_score(y, self.predict(X))
337
338        if scoring == "recall":
339            return skm2.recall_score(y, self.predict(X))
340
341        if scoring == "roc_auc":
342            return skm2.roc_auc_score(y, self.predict(X))
343
344        if scoring == "log_loss":
345            return skm2.log_loss(y, self.predict_proba(X))
346
347        if scoring == "balanced_accuracy":
348            return skm2.balanced_accuracy_score(y, self.predict(X))
349
350        if scoring == "average_precision":
351            return skm2.average_precision_score(y, self.predict(X))
352
353        if scoring == "neg_brier_score":
354            return -skm2.brier_score_loss(y, self.predict_proba(X))
355
356        if scoring == "neg_log_loss":
357            return -skm2.log_loss(y, self.predict_proba(X))

Scoring function for classification.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

scoring: str
    scoring method (default is accuracy)

Returns:

score: float
class SubSampler:
 6class SubSampler:
 7    """Subsampling class.
 8
 9    Attributes:
10
11       y: array-like, shape = [n_samples]
12           Target values.
13
14       row_sample: double
15           subsampling fraction
16
17       n_samples: int
18            subsampling by using the number of rows (supersedes row_sample)
19
20       seed: int
21           reproductibility seed
22
23       n_jobs: int
24            number of jobs to run in parallel
25
26       verbose: bool
27            print progress messages and bars
28    """
29
30    def __init__(
31        self,
32        y,
33        row_sample=0.8,
34        n_samples=None,
35        seed=123,
36        n_jobs=None,
37        verbose=False,
38    ):
39        self.y = y
40        self.n_samples = n_samples
41        if self.n_samples is None:
42            assert (
43                row_sample < 1 and row_sample >= 0
44            ), "'row_sample' must be provided, plus < 1 and >= 0"
45            self.row_sample = row_sample
46        else:
47            assert self.n_samples < len(y), "'n_samples' must be < len(y)"
48            self.row_sample = self.n_samples / len(y)
49        self.seed = seed
50        self.indices = None
51        self.n_jobs = n_jobs
52        self.verbose = verbose
53
54    def subsample(self):
55        """Returns indices of subsampled input data.
56
57        Examples:
58
59        <ul>
60            <li> <a href="https://github.com/Techtonique/nnetsauce/blob/master/nnetsauce/demo/thierrymoudiki_20240105_subsampling.ipynb">20240105_subsampling.ipynb</a> </li>
61            <li> <a href="https://github.com/Techtonique/nnetsauce/blob/master/nnetsauce/demo/thierrymoudiki_20240131_subsampling_nsamples.ipynb">20240131_subsampling_nsamples.ipynb</a> </li>
62        </ul>
63
64        """
65        self.indices = dosubsample(
66            y=self.y,
67            row_sample=self.row_sample,
68            seed=self.seed,
69            n_jobs=self.n_jobs,
70            verbose=self.verbose,
71        )
72        return self.indices

Subsampling class.

Attributes:

y: array-like, shape = [n_samples] Target values.

row_sample: double subsampling fraction

n_samples: int subsampling by using the number of rows (supersedes row_sample)

seed: int reproductibility seed

n_jobs: int number of jobs to run in parallel

verbose: bool print progress messages and bars

def subsample(self):
54    def subsample(self):
55        """Returns indices of subsampled input data.
56
57        Examples:
58
59        <ul>
60            <li> <a href="https://github.com/Techtonique/nnetsauce/blob/master/nnetsauce/demo/thierrymoudiki_20240105_subsampling.ipynb">20240105_subsampling.ipynb</a> </li>
61            <li> <a href="https://github.com/Techtonique/nnetsauce/blob/master/nnetsauce/demo/thierrymoudiki_20240131_subsampling_nsamples.ipynb">20240131_subsampling_nsamples.ipynb</a> </li>
62        </ul>
63
64        """
65        self.indices = dosubsample(
66            y=self.y,
67            row_sample=self.row_sample,
68            seed=self.seed,
69            n_jobs=self.n_jobs,
70            verbose=self.verbose,
71        )
72        return self.indices

Returns indices of subsampled input data.

Examples: