nnetsauce

 1from .base.base import Base
 2from .base.baseRegressor import BaseRegressor
 3from .boosting.adaBoostClassifier import AdaBoostClassifier
 4from .custom.customClassifier import CustomClassifier
 5from .custom.customRegressor import CustomRegressor
 6from .datasets import Downloader
 7from .deep.deepClassifier import DeepClassifier
 8from .deep.deepRegressor import DeepRegressor
 9from .deep.deepMTS import DeepMTS
10from .glm.glmClassifier import GLMClassifier
11from .glm.glmRegressor import GLMRegressor
12from .kernel.kernel import KernelRidge
13from .lazypredict.lazydeepClassifier import LazyDeepClassifier, LazyClassifier
14from .lazypredict.lazydeepRegressor import LazyDeepRegressor, LazyRegressor
15from .lazypredict.lazydeepClassifier import LazyDeepClassifier
16from .lazypredict.lazydeepRegressor import LazyDeepRegressor
17from .lazypredict.lazydeepMTS import LazyDeepMTS, LazyMTS
18from .mts.mts import MTS
19from .mts.mlarch import MLARCH
20from .mts.classical import ClassicalMTS
21from .multitask.multitaskClassifier import MultitaskClassifier
22from .multitask.simplemultitaskClassifier import SimpleMultitaskClassifier
23from .neuralnet.neuralnetregression import NeuralNetRegressor
24from .neuralnet.neuralnetclassification import NeuralNetClassifier
25from .optimizers.optimizer import Optimizer
26from .predictioninterval import PredictionInterval
27from .quantile.quantileregression import QuantileRegressor
28from .quantile.quantileclassification import QuantileClassifier
29from .randombag.randomBagClassifier import RandomBagClassifier
30from .randombag.randomBagRegressor import RandomBagRegressor
31from .ridge.ridge import RidgeRegressor
32from .ridge2.ridge2Classifier import Ridge2Classifier
33from .ridge2.ridge2Regressor import Ridge2Regressor
34from .ridge2.ridge2MultitaskClassifier import Ridge2MultitaskClassifier
35from .rvfl.bayesianrvflRegressor import BayesianRVFLRegressor
36from .rvfl.bayesianrvfl2Regressor import BayesianRVFL2Regressor
37from .sampling import SubSampler
38from .updater import RegressorUpdater, ClassifierUpdater
39from .votingregressor import MedianVotingRegressor
40
41__all__ = [
42    "AdaBoostClassifier",
43    "Base",
44    "BaseRegressor",
45    "BayesianRVFLRegressor",
46    "BayesianRVFL2Regressor",
47    "ClassicalMTS",
48    "CustomClassifier",
49    "CustomRegressor",
50    "DeepClassifier",
51    "DeepRegressor",
52    "DeepMTS",
53    "Downloader",
54    "GLMClassifier",
55    "GLMRegressor",
56    "KernelRidge",
57    "LazyClassifier",
58    "LazyRegressor",
59    "LazyDeepClassifier",
60    "LazyDeepRegressor",
61    "LazyMTS",
62    "LazyDeepMTS",
63    "MLARCH",
64    "MedianVotingRegressor",
65    "MTS",
66    "MultitaskClassifier",
67    "NeuralNetRegressor",
68    "NeuralNetClassifier",
69    "PredictionInterval",
70    "SimpleMultitaskClassifier",
71    "Optimizer",
72    "QuantileRegressor",
73    "QuantileClassifier",
74    "RandomBagRegressor",
75    "RandomBagClassifier",
76    "RegressorUpdater",
77    "ClassifierUpdater",
78    "RidgeRegressor",
79    "Ridge2Regressor",
80    "Ridge2Classifier",
81    "Ridge2MultitaskClassifier",
82    "SubSampler",
83]
class AdaBoostClassifier(nnetsauce.boosting.bst.Boosting, sklearn.base.ClassifierMixin):
 21class AdaBoostClassifier(Boosting, ClassifierMixin):
 22    """AdaBoost Classification (SAMME) model class derived from class Boosting
 23
 24    Parameters:
 25
 26        obj: object
 27            any object containing a method fit (obj.fit()) and a method predict
 28            (obj.predict())
 29
 30        n_estimators: int
 31            number of boosting iterations
 32
 33        learning_rate: float
 34            learning rate of the boosting procedure
 35
 36        n_hidden_features: int
 37            number of nodes in the hidden layer
 38
 39        reg_lambda: float
 40            regularization parameter for weights
 41
 42        reg_alpha: float
 43            controls compromize between l1 and l2 norm of weights
 44
 45        activation_name: str
 46            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
 47
 48        a: float
 49            hyperparameter for 'prelu' or 'elu' activation function
 50
 51        nodes_sim: str
 52            type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
 53            'uniform'
 54
 55        bias: boolean
 56            indicates if the hidden layer contains a bias term (True) or not
 57            (False)
 58
 59        dropout: float
 60            regularization parameter; (random) percentage of nodes dropped out
 61            of the training
 62
 63        direct_link: boolean
 64            indicates if the original predictors are included (True) in model's
 65            fitting or not (False)
 66
 67        n_clusters: int
 68            number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
 69                no clustering)
 70
 71        cluster_encode: bool
 72            defines how the variable containing clusters is treated (default is one-hot)
 73            if `False`, then labels are used, without one-hot encoding
 74
 75        type_clust: str
 76            type of clustering method: currently k-means ('kmeans') or Gaussian
 77            Mixture Model ('gmm')
 78
 79        type_scaling: a tuple of 3 strings
 80            scaling methods for inputs, hidden layer, and clustering respectively
 81            (and when relevant).
 82            Currently available: standardization ('std') or MinMax scaling ('minmax')
 83
 84        col_sample: float
 85            percentage of covariates randomly chosen for training
 86
 87        row_sample: float
 88            percentage of rows chosen for training, by stratified bootstrapping
 89
 90        seed: int
 91            reproducibility seed for nodes_sim=='uniform'
 92
 93        verbose: int
 94            0 for no output, 1 for a progress bar (default is 1)
 95
 96        method: str
 97            type of Adaboost method, 'SAMME' (discrete) or 'SAMME.R' (real)
 98
 99        backend: str
100            "cpu" or "gpu" or "tpu"
101
102    Attributes:
103
104        alpha_: list
105            AdaBoost coefficients alpha_m
106
107        base_learners_: dict
108            a dictionary containing the base learners
109
110    Examples:
111
112    See also [https://github.com/Techtonique/nnetsauce/blob/master/examples/adaboost_classification.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/adaboost_classification.py)
113
114    ```python
115    import nnetsauce as ns
116    import numpy as np
117    from sklearn.datasets import load_breast_cancer
118    from sklearn.linear_model import LogisticRegression
119    from sklearn.model_selection import train_test_split
120    from sklearn import metrics
121    from time import time
122
123    breast_cancer = load_breast_cancer()
124    Z = breast_cancer.data
125    t = breast_cancer.target
126    np.random.seed(123)
127    X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2)
128
129    # SAMME.R
130    clf = LogisticRegression(solver='liblinear', multi_class = 'ovr',
131                            random_state=123)
132    fit_obj = ns.AdaBoostClassifier(clf,
133                                    n_hidden_features=int(11.22338867),
134                                    direct_link=True,
135                                    n_estimators=250, learning_rate=0.01126343,
136                                    col_sample=0.72684326, row_sample=0.86429443,
137                                    dropout=0.63078613, n_clusters=2,
138                                    type_clust="gmm",
139                                    verbose=1, seed = 123,
140                                    method="SAMME.R")
141
142    start = time()
143    fit_obj.fit(X_train, y_train)
144    print(f"Elapsed {time() - start}")
145
146    start = time()
147    print(fit_obj.score(X_test, y_test))
148    print(f"Elapsed {time() - start}")
149
150    preds = fit_obj.predict(X_test)
151
152    print(metrics.classification_report(preds, y_test))
153
154    ```
155
156    """
157
158    # construct the object -----
159
160    def __init__(
161        self,
162        obj,
163        n_estimators=10,
164        learning_rate=0.1,
165        n_hidden_features=1,
166        reg_lambda=0,
167        reg_alpha=0.5,
168        activation_name="relu",
169        a=0.01,
170        nodes_sim="sobol",
171        bias=True,
172        dropout=0,
173        direct_link=False,
174        n_clusters=2,
175        cluster_encode=True,
176        type_clust="kmeans",
177        type_scaling=("std", "std", "std"),
178        col_sample=1,
179        row_sample=1,
180        seed=123,
181        verbose=1,
182        method="SAMME",
183        backend="cpu",
184    ):
185        self.type_fit = "classification"
186        self.verbose = verbose
187        self.method = method
188        self.reg_lambda = reg_lambda
189        self.reg_alpha = reg_alpha
190
191        super().__init__(
192            obj=obj,
193            n_estimators=n_estimators,
194            learning_rate=learning_rate,
195            n_hidden_features=n_hidden_features,
196            activation_name=activation_name,
197            a=a,
198            nodes_sim=nodes_sim,
199            bias=bias,
200            dropout=dropout,
201            direct_link=direct_link,
202            n_clusters=n_clusters,
203            cluster_encode=cluster_encode,
204            type_clust=type_clust,
205            type_scaling=type_scaling,
206            col_sample=col_sample,
207            row_sample=row_sample,
208            seed=seed,
209            backend=backend,
210        )
211
212        self.alpha_ = []
213        self.base_learners_ = dict.fromkeys(range(n_estimators))
214
215    def fit(self, X, y, sample_weight=None, **kwargs):
216        """Fit Boosting model to training data (X, y).
217
218        Parameters:
219
220            X: {array-like}, shape = [n_samples, n_features]
221                Training vectors, where n_samples is the number
222                of samples and n_features is the number of features.
223
224            y: array-like, shape = [n_samples]
225                Target values.
226
227            **kwargs: additional parameters to be passed to
228                    self.cook_training_set or self.obj.fit
229
230        Returns:
231
232             self: object
233        """
234
235        assert mx.is_factor(y), "y must contain only integers"
236
237        assert self.method in (
238            "SAMME",
239            "SAMME.R",
240        ), "`method` must be either 'SAMME' or 'SAMME.R'"
241
242        assert (self.reg_lambda <= 1) & (
243            self.reg_lambda >= 0
244        ), "must have self.reg_lambda <= 1 &  self.reg_lambda >= 0"
245
246        assert (self.reg_alpha <= 1) & (
247            self.reg_alpha >= 0
248        ), "must have self.reg_alpha <= 1 &  self.reg_alpha >= 0"
249
250        # training
251        n, p = X.shape
252        self.n_classes = len(np.unique(y))
253        self.classes_ = np.unique(y)  # for compatibility with sklearn
254        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
255
256        if sample_weight is None:
257            w_m = np.repeat(1.0 / n, n)
258        else:
259            w_m = np.asarray(sample_weight)
260
261        base_learner = CustomClassifier(
262            self.obj,
263            n_hidden_features=self.n_hidden_features,
264            activation_name=self.activation_name,
265            a=self.a,
266            nodes_sim=self.nodes_sim,
267            bias=self.bias,
268            dropout=self.dropout,
269            direct_link=self.direct_link,
270            n_clusters=self.n_clusters,
271            type_clust=self.type_clust,
272            type_scaling=self.type_scaling,
273            col_sample=self.col_sample,
274            row_sample=self.row_sample,
275            seed=self.seed,
276        )
277
278        if self.verbose == 1:
279            pbar = Progbar(self.n_estimators)
280
281        if self.method == "SAMME":
282            err_m = 1e6
283            err_bound = 1 - 1 / self.n_classes
284            self.alpha_.append(1.0)
285            x_range_n = range(n)
286
287            for m in range(self.n_estimators):
288                preds = base_learner.fit(
289                    X, y, sample_weight=w_m.ravel(), **kwargs
290                ).predict(X)
291
292                self.base_learners_.update({m: deepcopy(base_learner)})
293
294                cond = [y[i] != preds[i] for i in x_range_n]
295
296                err_m = max(
297                    sum([elt[0] * elt[1] for elt in zip(cond, w_m)]),
298                    2.220446049250313e-16,
299                )  # sum(w_m) == 1
300
301                if self.reg_lambda > 0:
302                    err_m += self.reg_lambda * (
303                        (1 - self.reg_alpha) * 0.5 * sum([x**2 for x in w_m])
304                        + self.reg_alpha * sum([abs(x) for x in w_m])
305                    )
306
307                err_m = min(err_m, err_bound)
308
309                alpha_m = self.learning_rate * log(
310                    (self.n_classes - 1) * (1 - err_m) / err_m
311                )
312
313                self.alpha_.append(alpha_m)
314
315                w_m_temp = [exp(alpha_m * cond[i]) for i in x_range_n]
316
317                sum_w_m = sum(w_m_temp)
318
319                w_m = np.asarray([w_m_temp[i] / sum_w_m for i in x_range_n])
320
321                base_learner.set_params(seed=self.seed + (m + 1) * 1000)
322
323                if self.verbose == 1:
324                    pbar.update(m)
325
326            if self.verbose == 1:
327                pbar.update(self.n_estimators)
328
329            self.n_estimators = len(self.base_learners_)
330            self.classes_ = np.unique(y)
331
332            return self
333
334        if self.method == "SAMME.R":
335            Y = mo.one_hot_encode2(y, self.n_classes)
336
337            if sample_weight is None:
338                w_m = np.repeat(1.0 / n, n)  # (N, 1)
339
340            else:
341                w_m = np.asarray(sample_weight)
342
343            for m in range(self.n_estimators):
344                probs = base_learner.fit(
345                    X, y, sample_weight=w_m.ravel(), **kwargs
346                ).predict_proba(X)
347
348                np.clip(a=probs, a_min=2.220446049250313e-16, a_max=1.0, out=probs)
349
350                self.base_learners_.update({m: deepcopy(base_learner)})
351
352                w_m *= np.exp(
353                    -1.0
354                    * self.learning_rate
355                    * (1.0 - 1.0 / self.n_classes)
356                    * xlogy(Y, probs).sum(axis=1)
357                )
358
359                w_m /= np.sum(w_m)
360
361                base_learner.set_params(seed=self.seed + (m + 1) * 1000)
362
363                if self.verbose == 1:
364                    pbar.update(m)
365
366            if self.verbose == 1:
367                pbar.update(self.n_estimators)
368
369            self.n_estimators = len(self.base_learners_)
370            self.classes_ = np.unique(y)
371
372            return self
373
374    def predict(self, X, **kwargs):
375        """Predict test data X.
376
377        Parameters:
378
379            X: {array-like}, shape = [n_samples, n_features]
380                Training vectors, where n_samples is the number
381                of samples and n_features is the number of features.
382
383            **kwargs: additional parameters to be passed to
384                  self.cook_test_set
385
386        Returns:
387
388            model predictions: {array-like}
389        """
390        return self.predict_proba(X, **kwargs).argmax(axis=1)
391
392    def predict_proba(self, X, **kwargs):
393        """Predict probabilities for test data X.
394
395        Parameters:
396
397            X: {array-like}, shape = [n_samples, n_features]
398                Training vectors, where n_samples is the number
399                of samples and n_features is the number of features.
400
401            **kwargs: additional parameters to be passed to
402                  self.cook_test_set
403
404        Returns:
405
406            probability estimates for test data: {array-like}
407
408        """
409
410        n_iter = len(self.base_learners_)
411
412        if self.method == "SAMME":
413            ensemble_learner = np.zeros((X.shape[0], self.n_classes))
414
415            # if self.verbose == 1:
416            #    pbar = Progbar(n_iter)
417
418            for idx, base_learner in self.base_learners_.items():
419                preds = base_learner.predict(X, **kwargs)
420
421                ensemble_learner += self.alpha_[idx] * mo.one_hot_encode2(
422                    preds, self.n_classes
423                )
424
425                # if self.verbose == 1:
426                #    pbar.update(idx)
427
428            # if self.verbose == 1:
429            #    pbar.update(n_iter)
430
431            expit_ensemble_learner = expit(ensemble_learner)
432
433            sum_ensemble = expit_ensemble_learner.sum(axis=1)
434
435            return expit_ensemble_learner / sum_ensemble[:, None]
436
437        # if self.method == "SAMME.R":
438        ensemble_learner = 0
439
440        # if self.verbose == 1:
441        #    pbar = Progbar(n_iter)
442
443        for idx, base_learner in self.base_learners_.items():
444            probs = base_learner.predict_proba(X, **kwargs)
445
446            np.clip(a=probs, a_min=2.220446049250313e-16, a_max=1.0, out=probs)
447
448            log_preds_proba = np.log(probs)
449
450            ensemble_learner += log_preds_proba - log_preds_proba.mean(axis=1)[:, None]
451
452            # if self.verbose == 1:
453            #    pbar.update(idx)
454
455        ensemble_learner *= self.n_classes - 1
456
457        # if self.verbose == 1:
458        #    pbar.update(n_iter)
459
460        expit_ensemble_learner = expit(ensemble_learner)
461
462        sum_ensemble = expit_ensemble_learner.sum(axis=1)
463
464        return expit_ensemble_learner / sum_ensemble[:, None]

AdaBoost Classification (SAMME) model class derived from class Boosting

Parameters:

obj: object
    any object containing a method fit (obj.fit()) and a method predict
    (obj.predict())

n_estimators: int
    number of boosting iterations

learning_rate: float
    learning rate of the boosting procedure

n_hidden_features: int
    number of nodes in the hidden layer

reg_lambda: float
    regularization parameter for weights

reg_alpha: float
    controls compromize between l1 and l2 norm of weights

activation_name: str
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'

a: float
    hyperparameter for 'prelu' or 'elu' activation function

nodes_sim: str
    type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
    'uniform'

bias: boolean
    indicates if the hidden layer contains a bias term (True) or not
    (False)

dropout: float
    regularization parameter; (random) percentage of nodes dropped out
    of the training

direct_link: boolean
    indicates if the original predictors are included (True) in model's
    fitting or not (False)

n_clusters: int
    number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
        no clustering)

cluster_encode: bool
    defines how the variable containing clusters is treated (default is one-hot)
    if `False`, then labels are used, without one-hot encoding

type_clust: str
    type of clustering method: currently k-means ('kmeans') or Gaussian
    Mixture Model ('gmm')

type_scaling: a tuple of 3 strings
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax')

col_sample: float
    percentage of covariates randomly chosen for training

row_sample: float
    percentage of rows chosen for training, by stratified bootstrapping

seed: int
    reproducibility seed for nodes_sim=='uniform'

verbose: int
    0 for no output, 1 for a progress bar (default is 1)

method: str
    type of Adaboost method, 'SAMME' (discrete) or 'SAMME.R' (real)

backend: str
    "cpu" or "gpu" or "tpu"

Attributes:

alpha_: list
    AdaBoost coefficients alpha_m

base_learners_: dict
    a dictionary containing the base learners

Examples:

See also https://github.com/Techtonique/nnetsauce/blob/master/examples/adaboost_classification.py

import nnetsauce as ns
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics
from time import time

breast_cancer = load_breast_cancer()
Z = breast_cancer.data
t = breast_cancer.target
np.random.seed(123)
X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2)

# SAMME.R
clf = LogisticRegression(solver='liblinear', multi_class = 'ovr',
                        random_state=123)
fit_obj = ns.AdaBoostClassifier(clf,
                                n_hidden_features=int(11.22338867),
                                direct_link=True,
                                n_estimators=250, learning_rate=0.01126343,
                                col_sample=0.72684326, row_sample=0.86429443,
                                dropout=0.63078613, n_clusters=2,
                                type_clust="gmm",
                                verbose=1, seed = 123,
                                method="SAMME.R")

start = time()
fit_obj.fit(X_train, y_train)
print(f"Elapsed {time() - start}")

start = time()
print(fit_obj.score(X_test, y_test))
print(f"Elapsed {time() - start}")

preds = fit_obj.predict(X_test)

print(metrics.classification_report(preds, y_test))
def fit(self, X, y, sample_weight=None, **kwargs):
215    def fit(self, X, y, sample_weight=None, **kwargs):
216        """Fit Boosting model to training data (X, y).
217
218        Parameters:
219
220            X: {array-like}, shape = [n_samples, n_features]
221                Training vectors, where n_samples is the number
222                of samples and n_features is the number of features.
223
224            y: array-like, shape = [n_samples]
225                Target values.
226
227            **kwargs: additional parameters to be passed to
228                    self.cook_training_set or self.obj.fit
229
230        Returns:
231
232             self: object
233        """
234
235        assert mx.is_factor(y), "y must contain only integers"
236
237        assert self.method in (
238            "SAMME",
239            "SAMME.R",
240        ), "`method` must be either 'SAMME' or 'SAMME.R'"
241
242        assert (self.reg_lambda <= 1) & (
243            self.reg_lambda >= 0
244        ), "must have self.reg_lambda <= 1 &  self.reg_lambda >= 0"
245
246        assert (self.reg_alpha <= 1) & (
247            self.reg_alpha >= 0
248        ), "must have self.reg_alpha <= 1 &  self.reg_alpha >= 0"
249
250        # training
251        n, p = X.shape
252        self.n_classes = len(np.unique(y))
253        self.classes_ = np.unique(y)  # for compatibility with sklearn
254        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
255
256        if sample_weight is None:
257            w_m = np.repeat(1.0 / n, n)
258        else:
259            w_m = np.asarray(sample_weight)
260
261        base_learner = CustomClassifier(
262            self.obj,
263            n_hidden_features=self.n_hidden_features,
264            activation_name=self.activation_name,
265            a=self.a,
266            nodes_sim=self.nodes_sim,
267            bias=self.bias,
268            dropout=self.dropout,
269            direct_link=self.direct_link,
270            n_clusters=self.n_clusters,
271            type_clust=self.type_clust,
272            type_scaling=self.type_scaling,
273            col_sample=self.col_sample,
274            row_sample=self.row_sample,
275            seed=self.seed,
276        )
277
278        if self.verbose == 1:
279            pbar = Progbar(self.n_estimators)
280
281        if self.method == "SAMME":
282            err_m = 1e6
283            err_bound = 1 - 1 / self.n_classes
284            self.alpha_.append(1.0)
285            x_range_n = range(n)
286
287            for m in range(self.n_estimators):
288                preds = base_learner.fit(
289                    X, y, sample_weight=w_m.ravel(), **kwargs
290                ).predict(X)
291
292                self.base_learners_.update({m: deepcopy(base_learner)})
293
294                cond = [y[i] != preds[i] for i in x_range_n]
295
296                err_m = max(
297                    sum([elt[0] * elt[1] for elt in zip(cond, w_m)]),
298                    2.220446049250313e-16,
299                )  # sum(w_m) == 1
300
301                if self.reg_lambda > 0:
302                    err_m += self.reg_lambda * (
303                        (1 - self.reg_alpha) * 0.5 * sum([x**2 for x in w_m])
304                        + self.reg_alpha * sum([abs(x) for x in w_m])
305                    )
306
307                err_m = min(err_m, err_bound)
308
309                alpha_m = self.learning_rate * log(
310                    (self.n_classes - 1) * (1 - err_m) / err_m
311                )
312
313                self.alpha_.append(alpha_m)
314
315                w_m_temp = [exp(alpha_m * cond[i]) for i in x_range_n]
316
317                sum_w_m = sum(w_m_temp)
318
319                w_m = np.asarray([w_m_temp[i] / sum_w_m for i in x_range_n])
320
321                base_learner.set_params(seed=self.seed + (m + 1) * 1000)
322
323                if self.verbose == 1:
324                    pbar.update(m)
325
326            if self.verbose == 1:
327                pbar.update(self.n_estimators)
328
329            self.n_estimators = len(self.base_learners_)
330            self.classes_ = np.unique(y)
331
332            return self
333
334        if self.method == "SAMME.R":
335            Y = mo.one_hot_encode2(y, self.n_classes)
336
337            if sample_weight is None:
338                w_m = np.repeat(1.0 / n, n)  # (N, 1)
339
340            else:
341                w_m = np.asarray(sample_weight)
342
343            for m in range(self.n_estimators):
344                probs = base_learner.fit(
345                    X, y, sample_weight=w_m.ravel(), **kwargs
346                ).predict_proba(X)
347
348                np.clip(a=probs, a_min=2.220446049250313e-16, a_max=1.0, out=probs)
349
350                self.base_learners_.update({m: deepcopy(base_learner)})
351
352                w_m *= np.exp(
353                    -1.0
354                    * self.learning_rate
355                    * (1.0 - 1.0 / self.n_classes)
356                    * xlogy(Y, probs).sum(axis=1)
357                )
358
359                w_m /= np.sum(w_m)
360
361                base_learner.set_params(seed=self.seed + (m + 1) * 1000)
362
363                if self.verbose == 1:
364                    pbar.update(m)
365
366            if self.verbose == 1:
367                pbar.update(self.n_estimators)
368
369            self.n_estimators = len(self.base_learners_)
370            self.classes_ = np.unique(y)
371
372            return self

Fit Boosting model to training data (X, y).

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

**kwargs: additional parameters to be passed to
        self.cook_training_set or self.obj.fit

Returns:

 self: object
def predict(self, X, **kwargs):
374    def predict(self, X, **kwargs):
375        """Predict test data X.
376
377        Parameters:
378
379            X: {array-like}, shape = [n_samples, n_features]
380                Training vectors, where n_samples is the number
381                of samples and n_features is the number of features.
382
383            **kwargs: additional parameters to be passed to
384                  self.cook_test_set
385
386        Returns:
387
388            model predictions: {array-like}
389        """
390        return self.predict_proba(X, **kwargs).argmax(axis=1)

Predict test data X.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
      self.cook_test_set

Returns:

model predictions: {array-like}
def predict_proba(self, X, **kwargs):
392    def predict_proba(self, X, **kwargs):
393        """Predict probabilities for test data X.
394
395        Parameters:
396
397            X: {array-like}, shape = [n_samples, n_features]
398                Training vectors, where n_samples is the number
399                of samples and n_features is the number of features.
400
401            **kwargs: additional parameters to be passed to
402                  self.cook_test_set
403
404        Returns:
405
406            probability estimates for test data: {array-like}
407
408        """
409
410        n_iter = len(self.base_learners_)
411
412        if self.method == "SAMME":
413            ensemble_learner = np.zeros((X.shape[0], self.n_classes))
414
415            # if self.verbose == 1:
416            #    pbar = Progbar(n_iter)
417
418            for idx, base_learner in self.base_learners_.items():
419                preds = base_learner.predict(X, **kwargs)
420
421                ensemble_learner += self.alpha_[idx] * mo.one_hot_encode2(
422                    preds, self.n_classes
423                )
424
425                # if self.verbose == 1:
426                #    pbar.update(idx)
427
428            # if self.verbose == 1:
429            #    pbar.update(n_iter)
430
431            expit_ensemble_learner = expit(ensemble_learner)
432
433            sum_ensemble = expit_ensemble_learner.sum(axis=1)
434
435            return expit_ensemble_learner / sum_ensemble[:, None]
436
437        # if self.method == "SAMME.R":
438        ensemble_learner = 0
439
440        # if self.verbose == 1:
441        #    pbar = Progbar(n_iter)
442
443        for idx, base_learner in self.base_learners_.items():
444            probs = base_learner.predict_proba(X, **kwargs)
445
446            np.clip(a=probs, a_min=2.220446049250313e-16, a_max=1.0, out=probs)
447
448            log_preds_proba = np.log(probs)
449
450            ensemble_learner += log_preds_proba - log_preds_proba.mean(axis=1)[:, None]
451
452            # if self.verbose == 1:
453            #    pbar.update(idx)
454
455        ensemble_learner *= self.n_classes - 1
456
457        # if self.verbose == 1:
458        #    pbar.update(n_iter)
459
460        expit_ensemble_learner = expit(ensemble_learner)
461
462        sum_ensemble = expit_ensemble_learner.sum(axis=1)
463
464        return expit_ensemble_learner / sum_ensemble[:, None]

Predict probabilities for test data X.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
      self.cook_test_set

Returns:

probability estimates for test data: {array-like}
class Base(sklearn.base.BaseEstimator):
 46class Base(BaseEstimator):
 47    """Base model from which all the other classes inherit.
 48
 49    This class contains the most important data preprocessing/feature engineering methods.
 50
 51    Parameters:
 52
 53        n_hidden_features: int
 54            number of nodes in the hidden layer
 55
 56        activation_name: str
 57            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
 58
 59        a: float
 60            hyperparameter for 'prelu' or 'elu' activation function
 61
 62        nodes_sim: str
 63            type of simulation for hidden layer nodes: 'sobol', 'hammersley', 'halton',
 64            'uniform'
 65
 66        bias: boolean
 67            indicates if the hidden layer contains a bias term (True) or
 68            not (False)
 69
 70        dropout: float
 71            regularization parameter; (random) percentage of nodes dropped out
 72            of the training
 73
 74        direct_link: boolean
 75            indicates if the original features are included (True) in model's
 76            fitting or not (False)
 77
 78        n_clusters: int
 79            number of clusters for type_clust='kmeans' or type_clust='gmm'
 80            clustering (could be 0: no clustering)
 81
 82        cluster_encode: bool
 83            defines how the variable containing clusters is treated (default is one-hot);
 84            if `False`, then labels are used, without one-hot encoding
 85
 86        type_clust: str
 87            type of clustering method: currently k-means ('kmeans') or Gaussian
 88            Mixture Model ('gmm')
 89
 90        type_scaling: a tuple of 3 strings
 91            scaling methods for inputs, hidden layer, and clustering respectively
 92            (and when relevant).
 93            Currently available: standardization ('std') or MinMax scaling ('minmax') or robust scaling ('robust') or  max absolute scaling ('maxabs')
 94
 95        col_sample: float
 96            percentage of features randomly chosen for training
 97
 98        row_sample: float
 99            percentage of rows chosen for training, by stratified bootstrapping
100
101        seed: int
102            reproducibility seed for nodes_sim=='uniform', clustering and dropout
103
104        backend: str
105            "cpu" or "gpu" or "tpu"
106
107    """
108
109    # construct the object -----
110
111    def __init__(
112        self,
113        n_hidden_features=5,
114        activation_name="relu",
115        a=0.01,
116        nodes_sim="sobol",
117        bias=True,
118        dropout=0,
119        direct_link=True,
120        n_clusters=2,
121        cluster_encode=True,
122        type_clust="kmeans",
123        type_scaling=("std", "std", "std"),
124        col_sample=1,
125        row_sample=1,
126        seed=123,
127        backend="cpu",
128    ):
129        # input checks -----
130
131        sys_platform = platform.system()
132
133        if (sys_platform == "Windows") and (backend in ("gpu", "tpu")):
134            warnings.warn("No GPU/TPU computing on Windows yet, backend set to 'cpu'")
135            backend = "cpu"
136
137        assert activation_name in (
138            "relu",
139            "tanh",
140            "sigmoid",
141            "prelu",
142            "elu",
143        ), "'activation_name' must be in ('relu', 'tanh', 'sigmoid','prelu', 'elu')"
144
145        assert nodes_sim in (
146            "sobol",
147            "hammersley",
148            "uniform",
149            "halton",
150        ), "'nodes_sim' must be in ('sobol', 'hammersley', 'uniform', 'halton')"
151
152        assert type_clust in (
153            "kmeans",
154            "gmm",
155        ), "'type_clust' must be in ('kmeans', 'gmm')"
156
157        assert (len(type_scaling) == 3) & all(
158            type_scaling[i] in ("minmax", "std", "robust", "maxabs")
159            for i in range(len(type_scaling))
160        ), "'type_scaling' must have length 3, and available scaling methods are 'minmax' scaling, standardization ('std'), robust scaling ('robust') and max absolute ('maxabs')"
161
162        assert (col_sample >= 0) & (
163            col_sample <= 1
164        ), "'col_sample' must be comprised between 0 and 1 (both included)"
165
166        assert backend in (
167            "cpu",
168            "gpu",
169            "tpu",
170        ), "must have 'backend' in ('cpu', 'gpu', 'tpu')"
171
172        self.n_hidden_features = n_hidden_features
173        self.activation_name = activation_name
174        self.a = a
175        self.nodes_sim = nodes_sim
176        self.bias = bias
177        self.seed = seed
178        self.backend = backend
179        self.dropout = dropout
180        self.direct_link = direct_link
181        self.cluster_encode = cluster_encode
182        self.type_clust = type_clust
183        self.type_scaling = type_scaling
184        self.col_sample = col_sample
185        self.row_sample = row_sample
186        self.n_clusters = n_clusters
187        if isinstance(self, RegressorMixin):
188            self.type_fit = "regression"
189        elif isinstance(self, ClassifierMixin):
190            self.type_fit = "classification"
191        self.subsampler_ = None
192        self.index_col_ = None
193        self.index_row_ = True
194        self.clustering_obj_ = None
195        self.clustering_scaler_ = None
196        self.nn_scaler_ = None
197        self.scaler_ = None
198        self.encoder_ = None
199        self.W_ = None
200        self.X_ = None
201        self.y_ = None
202        self.y_mean_ = None
203        self.beta_ = None
204
205        # activation function -----
206        if sys_platform in ("Linux", "Darwin"):
207            activation_options = {
208                "relu": ac.relu if (self.backend == "cpu") else jnn.relu,
209                "tanh": np.tanh if (self.backend == "cpu") else jnp.tanh,
210                "sigmoid": (ac.sigmoid if (self.backend == "cpu") else jnn.sigmoid),
211                "prelu": partial(ac.prelu, a=a),
212                "elu": (
213                    partial(ac.elu, a=a)
214                    if (self.backend == "cpu")
215                    else partial(jnn.elu, a=a)
216                ),
217            }
218        else:  # on Windows currently, no JAX
219            activation_options = {
220                "relu": (ac.relu if (self.backend == "cpu") else NotImplementedError),
221                "tanh": (np.tanh if (self.backend == "cpu") else NotImplementedError),
222                "sigmoid": (
223                    ac.sigmoid if (self.backend == "cpu") else NotImplementedError
224                ),
225                "prelu": partial(ac.prelu, a=a),
226                "elu": (
227                    partial(ac.elu, a=a)
228                    if (self.backend == "cpu")
229                    else NotImplementedError
230                ),
231            }
232        self.activation_func = activation_options[activation_name]
233
234    # "preprocessing" methods to be inherited -----
235
236    def encode_clusters(self, X=None, predict=False, scaler=None, **kwargs):  #
237        """Create new covariates with kmeans or GMM clustering
238
239        Parameters:
240
241            X: {array-like}, shape = [n_samples, n_features]
242                Training vectors, where n_samples is the number
243                of samples and n_features is the number of features.
244
245            predict: boolean
246                is False on training set and True on test set
247
248            scaler: {object} of class StandardScaler, MinMaxScaler, RobustScaler or MaxAbsScaler
249                if scaler has already been fitted on training data (online training), it can be passed here
250
251            **kwargs:
252                additional parameters to be passed to the
253                clustering method
254
255        Returns:
256
257            Clusters' matrix, one-hot encoded: {array-like}
258
259        """
260
261        np.random.seed(self.seed)
262
263        if X is None:
264            X = self.X_
265
266        if isinstance(X, pd.DataFrame):
267            X = copy.deepcopy(X.values.astype(float))
268
269        if len(X.shape) == 1:
270            X = X.reshape(1, -1)
271
272        if predict is False:  # encode training set
273
274            # scale input data before clustering
275            self.clustering_scaler_, scaled_X = mo.scale_covariates(
276                X, choice=self.type_scaling[2], scaler=self.clustering_scaler_
277            )
278
279            self.clustering_obj_, X_clustered = mo.cluster_covariates(
280                scaled_X,
281                self.n_clusters,
282                self.seed,
283                type_clust=self.type_clust,
284                **kwargs
285            )
286
287            if self.cluster_encode == True:
288                return mo.one_hot_encode(X_clustered, self.n_clusters).astype(
289                    np.float16
290                )
291
292            return X_clustered.astype(np.float16)
293
294        # if predict == True, encode test set
295        X_clustered = self.clustering_obj_.predict(self.clustering_scaler_.transform(X))
296
297        if self.cluster_encode == True:
298            return mo.one_hot_encode(X_clustered, self.n_clusters).astype(np.float16)
299
300        return X_clustered.astype(np.float16)
301
302    def create_layer(self, scaled_X, W=None):
303        """Create hidden layer.
304
305        Parameters:
306
307            scaled_X: {array-like}, shape = [n_samples, n_features]
308                Training vectors, where n_samples is the number
309                of samples and n_features is the number of features
310
311            W: {array-like}, shape = [n_features, hidden_features]
312                if provided, constructs the hidden layer with W; otherwise computed internally
313
314        Returns:
315
316            Hidden layer matrix: {array-like}
317
318        """
319
320        n_features = scaled_X.shape[1]
321
322        # hash_sim = {
323        #         "sobol": generate_sobol,
324        #         "hammersley": generate_hammersley,
325        #         "uniform": generate_uniform,
326        #         "halton": generate_halton
327        #     }
328
329        if self.bias is False:  # no bias term in the hidden layer
330            if W is None:
331                if self.nodes_sim == "sobol":
332                    self.W_ = generate_sobol(
333                        n_dims=n_features,
334                        n_points=self.n_hidden_features,
335                        seed=self.seed,
336                    )
337                elif self.nodes_sim == "hammersley":
338                    self.W_ = generate_hammersley(
339                        n_dims=n_features,
340                        n_points=self.n_hidden_features,
341                        seed=self.seed,
342                    )
343                elif self.nodes_sim == "uniform":
344                    self.W_ = generate_uniform(
345                        n_dims=n_features,
346                        n_points=self.n_hidden_features,
347                        seed=self.seed,
348                    )
349                else:
350                    self.W_ = generate_halton(
351                        n_dims=n_features,
352                        n_points=self.n_hidden_features,
353                        seed=self.seed,
354                    )
355
356                # self.W_ = hash_sim[self.nodes_sim](
357                #             n_dims=n_features,
358                #             n_points=self.n_hidden_features,
359                #             seed=self.seed,
360                #         )
361
362                assert (
363                    scaled_X.shape[1] == self.W_.shape[0]
364                ), "check dimensions of covariates X and matrix W"
365
366                return mo.dropout(
367                    x=self.activation_func(
368                        mo.safe_sparse_dot(a=scaled_X, b=self.W_, backend=self.backend)
369                    ),
370                    drop_prob=self.dropout,
371                    seed=self.seed,
372                )
373
374            # W is not none
375            assert (
376                scaled_X.shape[1] == W.shape[0]
377            ), "check dimensions of covariates X and matrix W"
378
379            # self.W_ = W
380            return mo.dropout(
381                x=self.activation_func(
382                    mo.safe_sparse_dot(a=scaled_X, b=W, backend=self.backend)
383                ),
384                drop_prob=self.dropout,
385                seed=self.seed,
386            )
387
388        # with bias term in the hidden layer
389        if W is None:
390            n_features_1 = n_features + 1
391
392            if self.nodes_sim == "sobol":
393                self.W_ = generate_sobol(
394                    n_dims=n_features_1,
395                    n_points=self.n_hidden_features,
396                    seed=self.seed,
397                )
398            elif self.nodes_sim == "hammersley":
399                self.W_ = generate_hammersley(
400                    n_dims=n_features_1,
401                    n_points=self.n_hidden_features,
402                    seed=self.seed,
403                )
404            elif self.nodes_sim == "uniform":
405                self.W_ = generate_uniform(
406                    n_dims=n_features_1,
407                    n_points=self.n_hidden_features,
408                    seed=self.seed,
409                )
410            else:
411                self.W_ = generate_halton(
412                    n_dims=n_features_1,
413                    n_points=self.n_hidden_features,
414                    seed=self.seed,
415                )
416
417            # self.W_ = hash_sim[self.nodes_sim](
418            #         n_dims=n_features_1,
419            #         n_points=self.n_hidden_features,
420            #         seed=self.seed,
421            #     )
422
423            return mo.dropout(
424                x=self.activation_func(
425                    mo.safe_sparse_dot(
426                        a=mo.cbind(
427                            np.ones(scaled_X.shape[0]),
428                            scaled_X,
429                            backend=self.backend,
430                        ),
431                        b=self.W_,
432                        backend=self.backend,
433                    )
434                ),
435                drop_prob=self.dropout,
436                seed=self.seed,
437            )
438
439        # W is not None
440        # self.W_ = W
441        return mo.dropout(
442            x=self.activation_func(
443                mo.safe_sparse_dot(
444                    a=mo.cbind(
445                        np.ones(scaled_X.shape[0]),
446                        scaled_X,
447                        backend=self.backend,
448                    ),
449                    b=W,
450                    backend=self.backend,
451                )
452            ),
453            drop_prob=self.dropout,
454            seed=self.seed,
455        )
456
457    def cook_training_set(self, y=None, X=None, W=None, **kwargs):
458        """Create new hidden features for training set, with hidden layer, center the response.
459
460        Parameters:
461
462            y: array-like, shape = [n_samples]
463                Target values
464
465            X: {array-like}, shape = [n_samples, n_features]
466                Training vectors, where n_samples is the number
467                of samples and n_features is the number of features
468
469            W: {array-like}, shape = [n_features, hidden_features]
470                if provided, constructs the hidden layer via W
471
472        Returns:
473
474            (centered response, direct link + hidden layer matrix): {tuple}
475
476        """
477
478        # either X and y are stored or not
479        # assert ((y is None) & (X is None)) | ((y is not None) & (X is not None))
480        if self.n_hidden_features > 0:  # has a hidden layer
481            assert (
482                len(self.type_scaling) >= 2
483            ), "must have len(self.type_scaling) >= 2 when self.n_hidden_features > 0"
484
485        if X is None:
486
487            if self.col_sample == 1:
488                input_X = self.X_
489            else:
490                n_features = self.X_.shape[1]
491                new_n_features = int(np.ceil(n_features * self.col_sample))
492                assert (
493                    new_n_features >= 1
494                ), "check class attribute 'col_sample' and the number of covariates provided for X"
495                np.random.seed(self.seed)
496                index_col = np.random.choice(
497                    range(n_features), size=new_n_features, replace=False
498                )
499                self.index_col_ = index_col
500                input_X = self.X_[:, self.index_col_]
501
502        else:  # X is not None # keep X vs self.X_
503
504            if isinstance(X, pd.DataFrame):
505                X = copy.deepcopy(X.values.astype(float))
506
507            if self.col_sample == 1:
508                input_X = X
509            else:
510                n_features = X.shape[1]
511                new_n_features = int(np.ceil(n_features * self.col_sample))
512                assert (
513                    new_n_features >= 1
514                ), "check class attribute 'col_sample' and the number of covariates provided for X"
515                np.random.seed(self.seed)
516                index_col = np.random.choice(
517                    range(n_features), size=new_n_features, replace=False
518                )
519                self.index_col_ = index_col
520                input_X = X[:, self.index_col_]
521
522        if self.n_clusters <= 0:
523            # data without any clustering: self.n_clusters is None -----
524
525            if self.n_hidden_features > 0:  # with hidden layer
526
527                self.nn_scaler_, scaled_X = mo.scale_covariates(
528                    input_X, choice=self.type_scaling[1], scaler=self.nn_scaler_
529                )
530                Phi_X = (
531                    self.create_layer(scaled_X)
532                    if W is None
533                    else self.create_layer(scaled_X, W=W)
534                )
535                Z = (
536                    mo.cbind(input_X, Phi_X, backend=self.backend)
537                    if self.direct_link is True
538                    else Phi_X
539                )
540                self.scaler_, scaled_Z = mo.scale_covariates(
541                    Z, choice=self.type_scaling[0], scaler=self.scaler_
542                )
543            else:  # no hidden layer
544                Z = input_X
545                self.scaler_, scaled_Z = mo.scale_covariates(
546                    Z, choice=self.type_scaling[0], scaler=self.scaler_
547                )
548
549        else:
550
551            # data with clustering: self.n_clusters is not None ----- # keep
552
553            augmented_X = mo.cbind(
554                input_X,
555                self.encode_clusters(input_X, **kwargs),
556                backend=self.backend,
557            )
558
559            if self.n_hidden_features > 0:  # with hidden layer
560
561                self.nn_scaler_, scaled_X = mo.scale_covariates(
562                    augmented_X,
563                    choice=self.type_scaling[1],
564                    scaler=self.nn_scaler_,
565                )
566                Phi_X = (
567                    self.create_layer(scaled_X)
568                    if W is None
569                    else self.create_layer(scaled_X, W=W)
570                )
571                Z = (
572                    mo.cbind(augmented_X, Phi_X, backend=self.backend)
573                    if self.direct_link is True
574                    else Phi_X
575                )
576                self.scaler_, scaled_Z = mo.scale_covariates(
577                    Z, choice=self.type_scaling[0], scaler=self.scaler_
578                )
579            else:  # no hidden layer
580                Z = augmented_X
581                self.scaler_, scaled_Z = mo.scale_covariates(
582                    Z, choice=self.type_scaling[0], scaler=self.scaler_
583                )
584
585        # Returning model inputs -----
586        if mx.is_factor(y) is False:  # regression
587            # center y
588            if y is None:
589                self.y_mean_, centered_y = mo.center_response(self.y_)
590            else:
591                self.y_mean_, centered_y = mo.center_response(y)
592
593            # y is subsampled
594            if self.row_sample < 1:
595                n, p = Z.shape
596
597                self.subsampler_ = (
598                    SubSampler(y=self.y_, row_sample=self.row_sample, seed=self.seed)
599                    if y is None
600                    else SubSampler(y=y, row_sample=self.row_sample, seed=self.seed)
601                )
602
603                self.index_row_ = self.subsampler_.subsample()
604
605                n_row_sample = len(self.index_row_)
606                # regression
607                return (
608                    centered_y[self.index_row_].reshape(n_row_sample),
609                    self.scaler_.transform(
610                        Z[self.index_row_, :].reshape(n_row_sample, p)
611                    ),
612                )
613            # y is not subsampled
614            # regression
615            return (centered_y, self.scaler_.transform(Z))
616
617        # classification
618        # y is subsampled
619        if self.row_sample < 1:
620            n, p = Z.shape
621
622            self.subsampler_ = (
623                SubSampler(y=self.y_, row_sample=self.row_sample, seed=self.seed)
624                if y is None
625                else SubSampler(y=y, row_sample=self.row_sample, seed=self.seed)
626            )
627
628            self.index_row_ = self.subsampler_.subsample()
629
630            n_row_sample = len(self.index_row_)
631            # classification
632            return (
633                y[self.index_row_].reshape(n_row_sample),
634                self.scaler_.transform(Z[self.index_row_, :].reshape(n_row_sample, p)),
635            )
636        # y is not subsampled
637        # classification
638        return (y, self.scaler_.transform(Z))
639
640    def cook_test_set(self, X, **kwargs):
641        """Transform data from test set, with hidden layer.
642
643        Parameters:
644
645            X: {array-like}, shape = [n_samples, n_features]
646                Training vectors, where n_samples is the number
647                of samples and n_features is the number of features
648
649            **kwargs: additional parameters to be passed to self.encode_cluster
650
651        Returns:
652
653            Transformed test set : {array-like}
654        """
655
656        if isinstance(X, pd.DataFrame):
657            X = copy.deepcopy(X.values.astype(float))
658
659        if len(X.shape) == 1:
660            X = X.reshape(1, -1)
661
662        if (
663            self.n_clusters == 0
664        ):  # data without clustering: self.n_clusters is None -----
665            if self.n_hidden_features > 0:
666                # if hidden layer
667                scaled_X = (
668                    self.nn_scaler_.transform(X)
669                    if (self.col_sample == 1)
670                    else self.nn_scaler_.transform(X[:, self.index_col_])
671                )
672                Phi_X = self.create_layer(scaled_X, self.W_)
673                if self.direct_link == True:
674                    return self.scaler_.transform(
675                        mo.cbind(scaled_X, Phi_X, backend=self.backend)
676                    )
677                # when self.direct_link == False
678                return self.scaler_.transform(Phi_X)
679            # if no hidden layer # self.n_hidden_features == 0
680            return self.scaler_.transform(X)
681
682        # data with clustering: self.n_clusters > 0 -----
683        if self.col_sample == 1:
684            predicted_clusters = self.encode_clusters(X=X, predict=True, **kwargs)
685            augmented_X = mo.cbind(X, predicted_clusters, backend=self.backend)
686        else:
687            predicted_clusters = self.encode_clusters(
688                X=X[:, self.index_col_], predict=True, **kwargs
689            )
690            augmented_X = mo.cbind(
691                X[:, self.index_col_], predicted_clusters, backend=self.backend
692            )
693
694        if self.n_hidden_features > 0:  # if hidden layer
695            scaled_X = self.nn_scaler_.transform(augmented_X)
696            Phi_X = self.create_layer(scaled_X, self.W_)
697            if self.direct_link == True:
698                return self.scaler_.transform(
699                    mo.cbind(augmented_X, Phi_X, backend=self.backend)
700                )
701            return self.scaler_.transform(Phi_X)
702
703        # if no hidden layer
704        return self.scaler_.transform(augmented_X)
705
706    def cross_val_score(
707        self,
708        X,
709        y,
710        cv=5,
711        scoring="accuracy",
712        random_state=42,
713        n_jobs=-1,
714        epsilon=0.5,
715        penalized=True,
716        objective="abs",
717        **kwargs
718    ):
719        """
720        Penalized Cross-validation score for a model.
721
722        Parameters:
723
724            X: {array-like}, shape = [n_samples, n_features]
725                Training vectors, where n_samples is the number
726                of samples and n_features is the number of features
727
728            y: array-like, shape = [n_samples]
729                Target values
730
731            X_test: {array-like}, shape = [n_samples, n_features]
732                Test vectors, where n_samples is the number
733                of samples and n_features is the number of features
734
735            y_test: array-like, shape = [n_samples]
736                Target values
737
738            cv: int
739                Number of folds
740
741            scoring: str
742                Scoring metric
743
744            random_state: int
745                Random state
746
747            n_jobs: int
748                Number of jobs to run in parallel
749
750            epsilon: float
751                Penalty parameter
752
753            penalized: bool
754                Whether to obtain penalized cross-validation score or not
755
756            objective: str
757                'abs': Minimize the absolute difference between cross-validation score and validation score
758                'relative': Minimize the relative difference between cross-validation score and validation score
759        Returns:
760
761            A namedtuple with the following fields:
762                - cv_score: float
763                    cross-validation score
764                - val_score: float
765                    validation score
766                - penalized_score: float
767                    penalized cross-validation score: cv_score / val_score + epsilon*(1/val_score + 1/cv_score)
768                    If higher scoring metric is better, minimize the function result.
769                    If lower scoring metric is better, maximize the function result.
770        """
771        if scoring == "accuracy":
772            scoring_func = accuracy_score
773        elif scoring == "balanced_accuracy":
774            scoring_func = balanced_accuracy_score
775        elif scoring == "f1":
776            scoring_func = f1_score
777        elif scoring == "roc_auc":
778            scoring_func = roc_auc_score
779        elif scoring == "r2":
780            scoring_func = r2_score
781        elif scoring == "mse":
782            scoring_func = mean_squared_error
783        elif scoring == "mae":
784            scoring_func = mean_absolute_error
785        elif scoring == "mape":
786            scoring_func = mean_absolute_percentage_error
787        elif scoring == "rmse":
788
789            def scoring_func(y_true, y_pred):
790                return np.sqrt(mean_squared_error(y_true, y_pred))
791
792        X_train, X_val, y_train, y_val = train_test_split(
793            X, y, test_size=0.2, random_state=random_state
794        )
795
796        res = cross_val_score(
797            self, X_train, y_train, cv=cv, scoring=scoring, n_jobs=n_jobs
798        )  # cross-validation error
799
800        if penalized == False:
801            return res
802
803        DescribeResult = namedtuple(
804            "DescribeResult", ["cv_score", "val_score", "penalized_score"]
805        )
806
807        numerator = res.mean()
808
809        # Evaluate on the (cv+1)-th fold
810        preds_val = self.fit(X_train, y_train).predict(X_val)
811        try:
812            denominator = scoring(y_val, preds_val)  # validation error
813        except Exception as e:
814            denominator = scoring_func(y_val, preds_val)
815
816        # if higher is better
817        if objective == "abs":
818            penalized_score = np.abs(numerator - denominator) + epsilon * (
819                1 / denominator + 1 / numerator
820            )
821        elif objective == "relative":
822            ratio = numerator / denominator
823            penalized_score = np.abs(ratio - 1) + epsilon * (
824                1 / denominator + 1 / numerator
825            )
826
827        return DescribeResult(
828            cv_score=numerator,
829            val_score=denominator,
830            penalized_score=penalized_score,
831        )

Base model from which all the other classes inherit.

This class contains the most important data preprocessing/feature engineering methods.

Parameters:

n_hidden_features: int
    number of nodes in the hidden layer

activation_name: str
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'

a: float
    hyperparameter for 'prelu' or 'elu' activation function

nodes_sim: str
    type of simulation for hidden layer nodes: 'sobol', 'hammersley', 'halton',
    'uniform'

bias: boolean
    indicates if the hidden layer contains a bias term (True) or
    not (False)

dropout: float
    regularization parameter; (random) percentage of nodes dropped out
    of the training

direct_link: boolean
    indicates if the original features are included (True) in model's
    fitting or not (False)

n_clusters: int
    number of clusters for type_clust='kmeans' or type_clust='gmm'
    clustering (could be 0: no clustering)

cluster_encode: bool
    defines how the variable containing clusters is treated (default is one-hot);
    if `False`, then labels are used, without one-hot encoding

type_clust: str
    type of clustering method: currently k-means ('kmeans') or Gaussian
    Mixture Model ('gmm')

type_scaling: a tuple of 3 strings
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax') or robust scaling ('robust') or  max absolute scaling ('maxabs')

col_sample: float
    percentage of features randomly chosen for training

row_sample: float
    percentage of rows chosen for training, by stratified bootstrapping

seed: int
    reproducibility seed for nodes_sim=='uniform', clustering and dropout

backend: str
    "cpu" or "gpu" or "tpu"
def encode_clusters(self, X=None, predict=False, scaler=None, **kwargs):
236    def encode_clusters(self, X=None, predict=False, scaler=None, **kwargs):  #
237        """Create new covariates with kmeans or GMM clustering
238
239        Parameters:
240
241            X: {array-like}, shape = [n_samples, n_features]
242                Training vectors, where n_samples is the number
243                of samples and n_features is the number of features.
244
245            predict: boolean
246                is False on training set and True on test set
247
248            scaler: {object} of class StandardScaler, MinMaxScaler, RobustScaler or MaxAbsScaler
249                if scaler has already been fitted on training data (online training), it can be passed here
250
251            **kwargs:
252                additional parameters to be passed to the
253                clustering method
254
255        Returns:
256
257            Clusters' matrix, one-hot encoded: {array-like}
258
259        """
260
261        np.random.seed(self.seed)
262
263        if X is None:
264            X = self.X_
265
266        if isinstance(X, pd.DataFrame):
267            X = copy.deepcopy(X.values.astype(float))
268
269        if len(X.shape) == 1:
270            X = X.reshape(1, -1)
271
272        if predict is False:  # encode training set
273
274            # scale input data before clustering
275            self.clustering_scaler_, scaled_X = mo.scale_covariates(
276                X, choice=self.type_scaling[2], scaler=self.clustering_scaler_
277            )
278
279            self.clustering_obj_, X_clustered = mo.cluster_covariates(
280                scaled_X,
281                self.n_clusters,
282                self.seed,
283                type_clust=self.type_clust,
284                **kwargs
285            )
286
287            if self.cluster_encode == True:
288                return mo.one_hot_encode(X_clustered, self.n_clusters).astype(
289                    np.float16
290                )
291
292            return X_clustered.astype(np.float16)
293
294        # if predict == True, encode test set
295        X_clustered = self.clustering_obj_.predict(self.clustering_scaler_.transform(X))
296
297        if self.cluster_encode == True:
298            return mo.one_hot_encode(X_clustered, self.n_clusters).astype(np.float16)
299
300        return X_clustered.astype(np.float16)

Create new covariates with kmeans or GMM clustering

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

predict: boolean
    is False on training set and True on test set

scaler: {object} of class StandardScaler, MinMaxScaler, RobustScaler or MaxAbsScaler
    if scaler has already been fitted on training data (online training), it can be passed here

**kwargs:
    additional parameters to be passed to the
    clustering method

Returns:

Clusters' matrix, one-hot encoded: {array-like}
def create_layer(self, scaled_X, W=None):
302    def create_layer(self, scaled_X, W=None):
303        """Create hidden layer.
304
305        Parameters:
306
307            scaled_X: {array-like}, shape = [n_samples, n_features]
308                Training vectors, where n_samples is the number
309                of samples and n_features is the number of features
310
311            W: {array-like}, shape = [n_features, hidden_features]
312                if provided, constructs the hidden layer with W; otherwise computed internally
313
314        Returns:
315
316            Hidden layer matrix: {array-like}
317
318        """
319
320        n_features = scaled_X.shape[1]
321
322        # hash_sim = {
323        #         "sobol": generate_sobol,
324        #         "hammersley": generate_hammersley,
325        #         "uniform": generate_uniform,
326        #         "halton": generate_halton
327        #     }
328
329        if self.bias is False:  # no bias term in the hidden layer
330            if W is None:
331                if self.nodes_sim == "sobol":
332                    self.W_ = generate_sobol(
333                        n_dims=n_features,
334                        n_points=self.n_hidden_features,
335                        seed=self.seed,
336                    )
337                elif self.nodes_sim == "hammersley":
338                    self.W_ = generate_hammersley(
339                        n_dims=n_features,
340                        n_points=self.n_hidden_features,
341                        seed=self.seed,
342                    )
343                elif self.nodes_sim == "uniform":
344                    self.W_ = generate_uniform(
345                        n_dims=n_features,
346                        n_points=self.n_hidden_features,
347                        seed=self.seed,
348                    )
349                else:
350                    self.W_ = generate_halton(
351                        n_dims=n_features,
352                        n_points=self.n_hidden_features,
353                        seed=self.seed,
354                    )
355
356                # self.W_ = hash_sim[self.nodes_sim](
357                #             n_dims=n_features,
358                #             n_points=self.n_hidden_features,
359                #             seed=self.seed,
360                #         )
361
362                assert (
363                    scaled_X.shape[1] == self.W_.shape[0]
364                ), "check dimensions of covariates X and matrix W"
365
366                return mo.dropout(
367                    x=self.activation_func(
368                        mo.safe_sparse_dot(a=scaled_X, b=self.W_, backend=self.backend)
369                    ),
370                    drop_prob=self.dropout,
371                    seed=self.seed,
372                )
373
374            # W is not none
375            assert (
376                scaled_X.shape[1] == W.shape[0]
377            ), "check dimensions of covariates X and matrix W"
378
379            # self.W_ = W
380            return mo.dropout(
381                x=self.activation_func(
382                    mo.safe_sparse_dot(a=scaled_X, b=W, backend=self.backend)
383                ),
384                drop_prob=self.dropout,
385                seed=self.seed,
386            )
387
388        # with bias term in the hidden layer
389        if W is None:
390            n_features_1 = n_features + 1
391
392            if self.nodes_sim == "sobol":
393                self.W_ = generate_sobol(
394                    n_dims=n_features_1,
395                    n_points=self.n_hidden_features,
396                    seed=self.seed,
397                )
398            elif self.nodes_sim == "hammersley":
399                self.W_ = generate_hammersley(
400                    n_dims=n_features_1,
401                    n_points=self.n_hidden_features,
402                    seed=self.seed,
403                )
404            elif self.nodes_sim == "uniform":
405                self.W_ = generate_uniform(
406                    n_dims=n_features_1,
407                    n_points=self.n_hidden_features,
408                    seed=self.seed,
409                )
410            else:
411                self.W_ = generate_halton(
412                    n_dims=n_features_1,
413                    n_points=self.n_hidden_features,
414                    seed=self.seed,
415                )
416
417            # self.W_ = hash_sim[self.nodes_sim](
418            #         n_dims=n_features_1,
419            #         n_points=self.n_hidden_features,
420            #         seed=self.seed,
421            #     )
422
423            return mo.dropout(
424                x=self.activation_func(
425                    mo.safe_sparse_dot(
426                        a=mo.cbind(
427                            np.ones(scaled_X.shape[0]),
428                            scaled_X,
429                            backend=self.backend,
430                        ),
431                        b=self.W_,
432                        backend=self.backend,
433                    )
434                ),
435                drop_prob=self.dropout,
436                seed=self.seed,
437            )
438
439        # W is not None
440        # self.W_ = W
441        return mo.dropout(
442            x=self.activation_func(
443                mo.safe_sparse_dot(
444                    a=mo.cbind(
445                        np.ones(scaled_X.shape[0]),
446                        scaled_X,
447                        backend=self.backend,
448                    ),
449                    b=W,
450                    backend=self.backend,
451                )
452            ),
453            drop_prob=self.dropout,
454            seed=self.seed,
455        )

Create hidden layer.

Parameters:

scaled_X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features

W: {array-like}, shape = [n_features, hidden_features]
    if provided, constructs the hidden layer with W; otherwise computed internally

Returns:

Hidden layer matrix: {array-like}
def cook_training_set(self, y=None, X=None, W=None, **kwargs):
457    def cook_training_set(self, y=None, X=None, W=None, **kwargs):
458        """Create new hidden features for training set, with hidden layer, center the response.
459
460        Parameters:
461
462            y: array-like, shape = [n_samples]
463                Target values
464
465            X: {array-like}, shape = [n_samples, n_features]
466                Training vectors, where n_samples is the number
467                of samples and n_features is the number of features
468
469            W: {array-like}, shape = [n_features, hidden_features]
470                if provided, constructs the hidden layer via W
471
472        Returns:
473
474            (centered response, direct link + hidden layer matrix): {tuple}
475
476        """
477
478        # either X and y are stored or not
479        # assert ((y is None) & (X is None)) | ((y is not None) & (X is not None))
480        if self.n_hidden_features > 0:  # has a hidden layer
481            assert (
482                len(self.type_scaling) >= 2
483            ), "must have len(self.type_scaling) >= 2 when self.n_hidden_features > 0"
484
485        if X is None:
486
487            if self.col_sample == 1:
488                input_X = self.X_
489            else:
490                n_features = self.X_.shape[1]
491                new_n_features = int(np.ceil(n_features * self.col_sample))
492                assert (
493                    new_n_features >= 1
494                ), "check class attribute 'col_sample' and the number of covariates provided for X"
495                np.random.seed(self.seed)
496                index_col = np.random.choice(
497                    range(n_features), size=new_n_features, replace=False
498                )
499                self.index_col_ = index_col
500                input_X = self.X_[:, self.index_col_]
501
502        else:  # X is not None # keep X vs self.X_
503
504            if isinstance(X, pd.DataFrame):
505                X = copy.deepcopy(X.values.astype(float))
506
507            if self.col_sample == 1:
508                input_X = X
509            else:
510                n_features = X.shape[1]
511                new_n_features = int(np.ceil(n_features * self.col_sample))
512                assert (
513                    new_n_features >= 1
514                ), "check class attribute 'col_sample' and the number of covariates provided for X"
515                np.random.seed(self.seed)
516                index_col = np.random.choice(
517                    range(n_features), size=new_n_features, replace=False
518                )
519                self.index_col_ = index_col
520                input_X = X[:, self.index_col_]
521
522        if self.n_clusters <= 0:
523            # data without any clustering: self.n_clusters is None -----
524
525            if self.n_hidden_features > 0:  # with hidden layer
526
527                self.nn_scaler_, scaled_X = mo.scale_covariates(
528                    input_X, choice=self.type_scaling[1], scaler=self.nn_scaler_
529                )
530                Phi_X = (
531                    self.create_layer(scaled_X)
532                    if W is None
533                    else self.create_layer(scaled_X, W=W)
534                )
535                Z = (
536                    mo.cbind(input_X, Phi_X, backend=self.backend)
537                    if self.direct_link is True
538                    else Phi_X
539                )
540                self.scaler_, scaled_Z = mo.scale_covariates(
541                    Z, choice=self.type_scaling[0], scaler=self.scaler_
542                )
543            else:  # no hidden layer
544                Z = input_X
545                self.scaler_, scaled_Z = mo.scale_covariates(
546                    Z, choice=self.type_scaling[0], scaler=self.scaler_
547                )
548
549        else:
550
551            # data with clustering: self.n_clusters is not None ----- # keep
552
553            augmented_X = mo.cbind(
554                input_X,
555                self.encode_clusters(input_X, **kwargs),
556                backend=self.backend,
557            )
558
559            if self.n_hidden_features > 0:  # with hidden layer
560
561                self.nn_scaler_, scaled_X = mo.scale_covariates(
562                    augmented_X,
563                    choice=self.type_scaling[1],
564                    scaler=self.nn_scaler_,
565                )
566                Phi_X = (
567                    self.create_layer(scaled_X)
568                    if W is None
569                    else self.create_layer(scaled_X, W=W)
570                )
571                Z = (
572                    mo.cbind(augmented_X, Phi_X, backend=self.backend)
573                    if self.direct_link is True
574                    else Phi_X
575                )
576                self.scaler_, scaled_Z = mo.scale_covariates(
577                    Z, choice=self.type_scaling[0], scaler=self.scaler_
578                )
579            else:  # no hidden layer
580                Z = augmented_X
581                self.scaler_, scaled_Z = mo.scale_covariates(
582                    Z, choice=self.type_scaling[0], scaler=self.scaler_
583                )
584
585        # Returning model inputs -----
586        if mx.is_factor(y) is False:  # regression
587            # center y
588            if y is None:
589                self.y_mean_, centered_y = mo.center_response(self.y_)
590            else:
591                self.y_mean_, centered_y = mo.center_response(y)
592
593            # y is subsampled
594            if self.row_sample < 1:
595                n, p = Z.shape
596
597                self.subsampler_ = (
598                    SubSampler(y=self.y_, row_sample=self.row_sample, seed=self.seed)
599                    if y is None
600                    else SubSampler(y=y, row_sample=self.row_sample, seed=self.seed)
601                )
602
603                self.index_row_ = self.subsampler_.subsample()
604
605                n_row_sample = len(self.index_row_)
606                # regression
607                return (
608                    centered_y[self.index_row_].reshape(n_row_sample),
609                    self.scaler_.transform(
610                        Z[self.index_row_, :].reshape(n_row_sample, p)
611                    ),
612                )
613            # y is not subsampled
614            # regression
615            return (centered_y, self.scaler_.transform(Z))
616
617        # classification
618        # y is subsampled
619        if self.row_sample < 1:
620            n, p = Z.shape
621
622            self.subsampler_ = (
623                SubSampler(y=self.y_, row_sample=self.row_sample, seed=self.seed)
624                if y is None
625                else SubSampler(y=y, row_sample=self.row_sample, seed=self.seed)
626            )
627
628            self.index_row_ = self.subsampler_.subsample()
629
630            n_row_sample = len(self.index_row_)
631            # classification
632            return (
633                y[self.index_row_].reshape(n_row_sample),
634                self.scaler_.transform(Z[self.index_row_, :].reshape(n_row_sample, p)),
635            )
636        # y is not subsampled
637        # classification
638        return (y, self.scaler_.transform(Z))

Create new hidden features for training set, with hidden layer, center the response.

Parameters:

y: array-like, shape = [n_samples]
    Target values

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features

W: {array-like}, shape = [n_features, hidden_features]
    if provided, constructs the hidden layer via W

Returns:

(centered response, direct link + hidden layer matrix): {tuple}
def cook_test_set(self, X, **kwargs):
640    def cook_test_set(self, X, **kwargs):
641        """Transform data from test set, with hidden layer.
642
643        Parameters:
644
645            X: {array-like}, shape = [n_samples, n_features]
646                Training vectors, where n_samples is the number
647                of samples and n_features is the number of features
648
649            **kwargs: additional parameters to be passed to self.encode_cluster
650
651        Returns:
652
653            Transformed test set : {array-like}
654        """
655
656        if isinstance(X, pd.DataFrame):
657            X = copy.deepcopy(X.values.astype(float))
658
659        if len(X.shape) == 1:
660            X = X.reshape(1, -1)
661
662        if (
663            self.n_clusters == 0
664        ):  # data without clustering: self.n_clusters is None -----
665            if self.n_hidden_features > 0:
666                # if hidden layer
667                scaled_X = (
668                    self.nn_scaler_.transform(X)
669                    if (self.col_sample == 1)
670                    else self.nn_scaler_.transform(X[:, self.index_col_])
671                )
672                Phi_X = self.create_layer(scaled_X, self.W_)
673                if self.direct_link == True:
674                    return self.scaler_.transform(
675                        mo.cbind(scaled_X, Phi_X, backend=self.backend)
676                    )
677                # when self.direct_link == False
678                return self.scaler_.transform(Phi_X)
679            # if no hidden layer # self.n_hidden_features == 0
680            return self.scaler_.transform(X)
681
682        # data with clustering: self.n_clusters > 0 -----
683        if self.col_sample == 1:
684            predicted_clusters = self.encode_clusters(X=X, predict=True, **kwargs)
685            augmented_X = mo.cbind(X, predicted_clusters, backend=self.backend)
686        else:
687            predicted_clusters = self.encode_clusters(
688                X=X[:, self.index_col_], predict=True, **kwargs
689            )
690            augmented_X = mo.cbind(
691                X[:, self.index_col_], predicted_clusters, backend=self.backend
692            )
693
694        if self.n_hidden_features > 0:  # if hidden layer
695            scaled_X = self.nn_scaler_.transform(augmented_X)
696            Phi_X = self.create_layer(scaled_X, self.W_)
697            if self.direct_link == True:
698                return self.scaler_.transform(
699                    mo.cbind(augmented_X, Phi_X, backend=self.backend)
700                )
701            return self.scaler_.transform(Phi_X)
702
703        # if no hidden layer
704        return self.scaler_.transform(augmented_X)

Transform data from test set, with hidden layer.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features

**kwargs: additional parameters to be passed to self.encode_cluster

Returns:

Transformed test set : {array-like}
class BaseRegressor(nnetsauce.Base, sklearn.base.RegressorMixin):
 15class BaseRegressor(Base, RegressorMixin):
 16    """Random Vector Functional Link Network regression without shrinkage
 17
 18    Parameters:
 19
 20        n_hidden_features: int
 21            number of nodes in the hidden layer
 22
 23        activation_name: str
 24            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
 25
 26        a: float
 27            hyperparameter for 'prelu' or 'elu' activation function
 28
 29        nodes_sim: str
 30            type of simulation for hidden layer nodes: 'sobol', 'hammersley', 'halton',
 31            'uniform'
 32
 33        bias: boolean
 34            indicates if the hidden layer contains a bias term (True) or
 35            not (False)
 36
 37        dropout: float
 38            regularization parameter; (random) percentage of nodes dropped out
 39            of the training
 40
 41        direct_link: boolean
 42            indicates if the original features are included (True) in model's
 43            fitting or not (False)
 44
 45        n_clusters: int
 46            number of clusters for type_clust='kmeans' or type_clust='gmm'
 47            clustering (could be 0: no clustering)
 48
 49        cluster_encode: bool
 50            defines how the variable containing clusters is treated (default is one-hot);
 51            if `False`, then labels are used, without one-hot encoding
 52
 53        type_clust: str
 54            type of clustering method: currently k-means ('kmeans') or Gaussian
 55            Mixture Model ('gmm')
 56
 57        type_scaling: a tuple of 3 strings
 58            scaling methods for inputs, hidden layer, and clustering respectively
 59            (and when relevant).
 60            Currently available: standardization ('std') or MinMax scaling ('minmax')
 61
 62        col_sample: float
 63            percentage of features randomly chosen for training
 64
 65        row_sample: float
 66            percentage of rows chosen for training, by stratified bootstrapping
 67
 68        seed: int
 69            reproducibility seed for nodes_sim=='uniform', clustering and dropout
 70
 71        backend: str
 72            "cpu" or "gpu" or "tpu"
 73
 74    Attributes:
 75
 76        beta_: vector
 77            regression coefficients
 78
 79        GCV_: float
 80            Generalized Cross-Validation error
 81
 82    """
 83
 84    # construct the object -----
 85
 86    def __init__(
 87        self,
 88        n_hidden_features=5,
 89        activation_name="relu",
 90        a=0.01,
 91        nodes_sim="sobol",
 92        bias=True,
 93        dropout=0,
 94        direct_link=True,
 95        n_clusters=2,
 96        cluster_encode=True,
 97        type_clust="kmeans",
 98        type_scaling=("std", "std", "std"),
 99        col_sample=1,
100        row_sample=1,
101        seed=123,
102        backend="cpu",
103    ):
104        super().__init__(
105            n_hidden_features=n_hidden_features,
106            activation_name=activation_name,
107            a=a,
108            nodes_sim=nodes_sim,
109            bias=bias,
110            dropout=dropout,
111            direct_link=direct_link,
112            n_clusters=n_clusters,
113            cluster_encode=cluster_encode,
114            type_clust=type_clust,
115            type_scaling=type_scaling,
116            col_sample=col_sample,
117            row_sample=row_sample,
118            seed=seed,
119            backend=backend,
120        )
121
122    def fit(self, X, y, **kwargs):
123        """Fit BaseRegressor to training data (X, y)
124
125        Parameters:
126
127            X: {array-like}, shape = [n_samples, n_features]
128                Training vectors, where n_samples is the number
129                of samples and n_features is the number of features
130
131            y: array-like, shape = [n_samples]
132                Target values
133
134            **kwargs: additional parameters to be passed to self.cook_training_set
135
136        Returns:
137
138            self: object
139        """
140
141        centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
142
143        fit_obj = lmf.beta_Sigma_hat(X=scaled_Z, y=centered_y, backend=self.backend)
144
145        self.beta_ = fit_obj["beta_hat"]
146
147        self.GCV_ = fit_obj["GCV"]
148
149        return self
150
151    def predict(self, X, **kwargs):
152        """Predict test data X.
153
154        Parameters:
155
156            X: {array-like}, shape = [n_samples, n_features]
157                Training vectors, where n_samples is the number
158                of samples and n_features is the number of features
159
160            **kwargs: additional parameters to be passed to self.cook_test_set
161
162        Returns:
163
164            model predictions: {array-like}
165        """
166
167        if len(X.shape) == 1:
168            n_features = X.shape[0]
169            new_X = mo.rbind(
170                X.reshape(1, n_features),
171                np.ones(n_features).reshape(1, n_features),
172            )
173
174            return (
175                self.y_mean_
176                + mo.safe_sparse_dot(
177                    a=self.cook_test_set(new_X, **kwargs),
178                    b=self.beta_,
179                    backend=self.backend,
180                )
181            )[0]
182
183        return self.y_mean_ + mo.safe_sparse_dot(
184            a=self.cook_test_set(X, **kwargs),
185            b=self.beta_,
186            backend=self.backend,
187        )

Random Vector Functional Link Network regression without shrinkage

Parameters:

n_hidden_features: int
    number of nodes in the hidden layer

activation_name: str
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'

a: float
    hyperparameter for 'prelu' or 'elu' activation function

nodes_sim: str
    type of simulation for hidden layer nodes: 'sobol', 'hammersley', 'halton',
    'uniform'

bias: boolean
    indicates if the hidden layer contains a bias term (True) or
    not (False)

dropout: float
    regularization parameter; (random) percentage of nodes dropped out
    of the training

direct_link: boolean
    indicates if the original features are included (True) in model's
    fitting or not (False)

n_clusters: int
    number of clusters for type_clust='kmeans' or type_clust='gmm'
    clustering (could be 0: no clustering)

cluster_encode: bool
    defines how the variable containing clusters is treated (default is one-hot);
    if `False`, then labels are used, without one-hot encoding

type_clust: str
    type of clustering method: currently k-means ('kmeans') or Gaussian
    Mixture Model ('gmm')

type_scaling: a tuple of 3 strings
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax')

col_sample: float
    percentage of features randomly chosen for training

row_sample: float
    percentage of rows chosen for training, by stratified bootstrapping

seed: int
    reproducibility seed for nodes_sim=='uniform', clustering and dropout

backend: str
    "cpu" or "gpu" or "tpu"

Attributes:

beta_: vector
    regression coefficients

GCV_: float
    Generalized Cross-Validation error
def fit(self, X, y, **kwargs):
122    def fit(self, X, y, **kwargs):
123        """Fit BaseRegressor to training data (X, y)
124
125        Parameters:
126
127            X: {array-like}, shape = [n_samples, n_features]
128                Training vectors, where n_samples is the number
129                of samples and n_features is the number of features
130
131            y: array-like, shape = [n_samples]
132                Target values
133
134            **kwargs: additional parameters to be passed to self.cook_training_set
135
136        Returns:
137
138            self: object
139        """
140
141        centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
142
143        fit_obj = lmf.beta_Sigma_hat(X=scaled_Z, y=centered_y, backend=self.backend)
144
145        self.beta_ = fit_obj["beta_hat"]
146
147        self.GCV_ = fit_obj["GCV"]
148
149        return self

Fit BaseRegressor to training data (X, y)

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features

y: array-like, shape = [n_samples]
    Target values

**kwargs: additional parameters to be passed to self.cook_training_set

Returns:

self: object
def predict(self, X, **kwargs):
151    def predict(self, X, **kwargs):
152        """Predict test data X.
153
154        Parameters:
155
156            X: {array-like}, shape = [n_samples, n_features]
157                Training vectors, where n_samples is the number
158                of samples and n_features is the number of features
159
160            **kwargs: additional parameters to be passed to self.cook_test_set
161
162        Returns:
163
164            model predictions: {array-like}
165        """
166
167        if len(X.shape) == 1:
168            n_features = X.shape[0]
169            new_X = mo.rbind(
170                X.reshape(1, n_features),
171                np.ones(n_features).reshape(1, n_features),
172            )
173
174            return (
175                self.y_mean_
176                + mo.safe_sparse_dot(
177                    a=self.cook_test_set(new_X, **kwargs),
178                    b=self.beta_,
179                    backend=self.backend,
180                )
181            )[0]
182
183        return self.y_mean_ + mo.safe_sparse_dot(
184            a=self.cook_test_set(X, **kwargs),
185            b=self.beta_,
186            backend=self.backend,
187        )

Predict test data X.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features

**kwargs: additional parameters to be passed to self.cook_test_set

Returns:

model predictions: {array-like}
class BayesianRVFLRegressor(nnetsauce.Base, sklearn.base.RegressorMixin):
 15class BayesianRVFLRegressor(Base, RegressorMixin):
 16    """Bayesian Random Vector Functional Link Network regression with one prior
 17
 18    Parameters:
 19
 20        n_hidden_features: int
 21            number of nodes in the hidden layer
 22
 23        activation_name: str
 24            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
 25
 26        a: float
 27            hyperparameter for 'prelu' or 'elu' activation function
 28
 29        nodes_sim: str
 30            type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 'uniform'
 31
 32        bias: boolean
 33            indicates if the hidden layer contains a bias term (True) or not (False)
 34
 35        dropout: float
 36            regularization parameter; (random) percentage of nodes dropped out
 37            of the training
 38
 39        direct_link: boolean
 40            indicates if the original features are included (True) in model''s fitting or not (False)
 41
 42        n_clusters: int
 43            number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering)
 44
 45        cluster_encode: bool
 46            defines how the variable containing clusters is treated (default is one-hot)
 47            if `False`, then labels are used, without one-hot encoding
 48
 49        type_clust: str
 50            type of clustering method: currently k-means ('kmeans') or Gaussian Mixture Model ('gmm')
 51
 52        type_scaling: a tuple of 3 strings
 53            scaling methods for inputs, hidden layer, and clustering respectively
 54            (and when relevant).
 55            Currently available: standardization ('std') or MinMax scaling ('minmax')
 56
 57        seed: int
 58            reproducibility seed for nodes_sim=='uniform'
 59
 60        s: float
 61            std. dev. of regression parameters in Bayesian Ridge Regression
 62
 63        sigma: float
 64            std. dev. of residuals in Bayesian Ridge Regression
 65
 66        return_std: boolean
 67            if True, uncertainty around predictions is evaluated
 68
 69        backend: str
 70            "cpu" or "gpu" or "tpu"
 71
 72    Attributes:
 73
 74        beta_: array-like
 75            regression''s coefficients
 76
 77        Sigma_: array-like
 78            covariance of the distribution of fitted parameters
 79
 80        GCV_: float
 81            Generalized cross-validation error
 82
 83        y_mean_: float
 84            average response
 85
 86    Examples:
 87
 88    ```python
 89    TBD
 90    ```
 91
 92    """
 93
 94    # construct the object -----
 95
 96    def __init__(
 97        self,
 98        n_hidden_features=5,
 99        activation_name="relu",
100        a=0.01,
101        nodes_sim="sobol",
102        bias=True,
103        dropout=0,
104        direct_link=True,
105        n_clusters=2,
106        cluster_encode=True,
107        type_clust="kmeans",
108        type_scaling=("std", "std", "std"),
109        seed=123,
110        s=0.1,
111        sigma=0.05,
112        return_std=True,
113        backend="cpu",
114    ):
115        super().__init__(
116            n_hidden_features=n_hidden_features,
117            activation_name=activation_name,
118            a=a,
119            nodes_sim=nodes_sim,
120            bias=bias,
121            dropout=dropout,
122            direct_link=direct_link,
123            n_clusters=n_clusters,
124            cluster_encode=cluster_encode,
125            type_clust=type_clust,
126            type_scaling=type_scaling,
127            seed=seed,
128            backend=backend,
129        )
130        self.s = s
131        self.sigma = sigma
132        self.beta_ = None
133        self.Sigma_ = None
134        self.GCV_ = None
135        self.return_std = return_std
136
137    def fit(self, X, y, **kwargs):
138        """Fit BayesianRVFLRegressor to training data (X, y).
139
140        Parameters:
141
142            X: {array-like}, shape = [n_samples, n_features]
143                Training vectors, where n_samples is the number
144                of samples and n_features is the number of features.
145
146            y: array-like, shape = [n_samples]
147                Target values.
148
149            **kwargs: additional parameters to be passed to
150                    self.cook_training_set
151
152        Returns:
153
154            self: object
155
156        """
157
158        centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
159
160        fit_obj = lmf.beta_Sigma_hat_rvfl(
161            X=scaled_Z,
162            y=centered_y,
163            s=self.s,
164            sigma=self.sigma,
165            fit_intercept=False,
166            return_cov=self.return_std,
167            backend=self.backend,
168        )
169
170        self.beta_ = fit_obj["beta_hat"]
171
172        if self.return_std == True:
173            self.Sigma_ = fit_obj["Sigma_hat"]
174
175        self.GCV_ = fit_obj["GCV"]
176
177        return self
178
179    def predict(self, X, return_std=False, **kwargs):
180        """Predict test data X.
181
182        Parameters:
183
184            X: {array-like}, shape = [n_samples, n_features]
185                Training vectors, where n_samples is the number
186                of samples and n_features is the number of features.
187
188            return_std: {boolean}, standard dev. is returned or not
189
190            **kwargs: additional parameters to be passed to
191                    self.cook_test_set
192
193        Returns:
194
195            model predictions: {array-like}
196
197        """
198
199        if len(X.shape) == 1:  # one observation in the test set only
200            n_features = X.shape[0]
201            new_X = mo.rbind(
202                x=X.reshape(1, n_features),
203                y=np.ones(n_features).reshape(1, n_features),
204                backend=self.backend,
205            )
206
207        self.return_std = return_std
208
209        if self.return_std == False:
210            if len(X.shape) == 1:
211                return (
212                    self.y_mean_
213                    + mo.safe_sparse_dot(
214                        a=self.cook_test_set(new_X, **kwargs),
215                        b=self.beta_,
216                        backend=self.backend,
217                    )
218                )[0]
219
220            return self.y_mean_ + mo.safe_sparse_dot(
221                a=self.cook_test_set(X, **kwargs),
222                b=self.beta_,
223                backend=self.backend,
224            )
225
226        else:  # confidence interval required for preds?
227            if len(X.shape) == 1:
228                Z = self.cook_test_set(new_X, **kwargs)
229
230                pred_obj = lmf.beta_Sigma_hat_rvfl(
231                    s=self.s,
232                    sigma=self.sigma,
233                    X_star=Z,
234                    return_cov=True,
235                    beta_hat_=self.beta_,
236                    Sigma_hat_=self.Sigma_,
237                    backend=self.backend,
238                )
239
240                return (
241                    self.y_mean_ + pred_obj["preds"][0],
242                    pred_obj["preds_std"][0],
243                )
244
245            Z = self.cook_test_set(X, **kwargs)
246
247            pred_obj = lmf.beta_Sigma_hat_rvfl(
248                s=self.s,
249                sigma=self.sigma,
250                X_star=Z,
251                return_cov=True,
252                beta_hat_=self.beta_,
253                Sigma_hat_=self.Sigma_,
254                backend=self.backend,
255            )
256
257            return (self.y_mean_ + pred_obj["preds"], pred_obj["preds_std"])

Bayesian Random Vector Functional Link Network regression with one prior

Parameters:

n_hidden_features: int
    number of nodes in the hidden layer

activation_name: str
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'

a: float
    hyperparameter for 'prelu' or 'elu' activation function

nodes_sim: str
    type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 'uniform'

bias: boolean
    indicates if the hidden layer contains a bias term (True) or not (False)

dropout: float
    regularization parameter; (random) percentage of nodes dropped out
    of the training

direct_link: boolean
    indicates if the original features are included (True) in model''s fitting or not (False)

n_clusters: int
    number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering)

cluster_encode: bool
    defines how the variable containing clusters is treated (default is one-hot)
    if `False`, then labels are used, without one-hot encoding

type_clust: str
    type of clustering method: currently k-means ('kmeans') or Gaussian Mixture Model ('gmm')

type_scaling: a tuple of 3 strings
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax')

seed: int
    reproducibility seed for nodes_sim=='uniform'

s: float
    std. dev. of regression parameters in Bayesian Ridge Regression

sigma: float
    std. dev. of residuals in Bayesian Ridge Regression

return_std: boolean
    if True, uncertainty around predictions is evaluated

backend: str
    "cpu" or "gpu" or "tpu"

Attributes:

beta_: array-like
    regression''s coefficients

Sigma_: array-like
    covariance of the distribution of fitted parameters

GCV_: float
    Generalized cross-validation error

y_mean_: float
    average response

Examples:

TBD
def fit(self, X, y, **kwargs):
137    def fit(self, X, y, **kwargs):
138        """Fit BayesianRVFLRegressor to training data (X, y).
139
140        Parameters:
141
142            X: {array-like}, shape = [n_samples, n_features]
143                Training vectors, where n_samples is the number
144                of samples and n_features is the number of features.
145
146            y: array-like, shape = [n_samples]
147                Target values.
148
149            **kwargs: additional parameters to be passed to
150                    self.cook_training_set
151
152        Returns:
153
154            self: object
155
156        """
157
158        centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
159
160        fit_obj = lmf.beta_Sigma_hat_rvfl(
161            X=scaled_Z,
162            y=centered_y,
163            s=self.s,
164            sigma=self.sigma,
165            fit_intercept=False,
166            return_cov=self.return_std,
167            backend=self.backend,
168        )
169
170        self.beta_ = fit_obj["beta_hat"]
171
172        if self.return_std == True:
173            self.Sigma_ = fit_obj["Sigma_hat"]
174
175        self.GCV_ = fit_obj["GCV"]
176
177        return self

Fit BayesianRVFLRegressor to training data (X, y).

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

**kwargs: additional parameters to be passed to
        self.cook_training_set

Returns:

self: object
def predict(self, X, return_std=False, **kwargs):
179    def predict(self, X, return_std=False, **kwargs):
180        """Predict test data X.
181
182        Parameters:
183
184            X: {array-like}, shape = [n_samples, n_features]
185                Training vectors, where n_samples is the number
186                of samples and n_features is the number of features.
187
188            return_std: {boolean}, standard dev. is returned or not
189
190            **kwargs: additional parameters to be passed to
191                    self.cook_test_set
192
193        Returns:
194
195            model predictions: {array-like}
196
197        """
198
199        if len(X.shape) == 1:  # one observation in the test set only
200            n_features = X.shape[0]
201            new_X = mo.rbind(
202                x=X.reshape(1, n_features),
203                y=np.ones(n_features).reshape(1, n_features),
204                backend=self.backend,
205            )
206
207        self.return_std = return_std
208
209        if self.return_std == False:
210            if len(X.shape) == 1:
211                return (
212                    self.y_mean_
213                    + mo.safe_sparse_dot(
214                        a=self.cook_test_set(new_X, **kwargs),
215                        b=self.beta_,
216                        backend=self.backend,
217                    )
218                )[0]
219
220            return self.y_mean_ + mo.safe_sparse_dot(
221                a=self.cook_test_set(X, **kwargs),
222                b=self.beta_,
223                backend=self.backend,
224            )
225
226        else:  # confidence interval required for preds?
227            if len(X.shape) == 1:
228                Z = self.cook_test_set(new_X, **kwargs)
229
230                pred_obj = lmf.beta_Sigma_hat_rvfl(
231                    s=self.s,
232                    sigma=self.sigma,
233                    X_star=Z,
234                    return_cov=True,
235                    beta_hat_=self.beta_,
236                    Sigma_hat_=self.Sigma_,
237                    backend=self.backend,
238                )
239
240                return (
241                    self.y_mean_ + pred_obj["preds"][0],
242                    pred_obj["preds_std"][0],
243                )
244
245            Z = self.cook_test_set(X, **kwargs)
246
247            pred_obj = lmf.beta_Sigma_hat_rvfl(
248                s=self.s,
249                sigma=self.sigma,
250                X_star=Z,
251                return_cov=True,
252                beta_hat_=self.beta_,
253                Sigma_hat_=self.Sigma_,
254                backend=self.backend,
255            )
256
257            return (self.y_mean_ + pred_obj["preds"], pred_obj["preds_std"])

Predict test data X.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

return_std: {boolean}, standard dev. is returned or not

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

model predictions: {array-like}
class BayesianRVFL2Regressor(nnetsauce.Base, sklearn.base.RegressorMixin):
 15class BayesianRVFL2Regressor(Base, RegressorMixin):
 16    """Bayesian Random Vector Functional Link Network regression with two priors
 17
 18    Parameters:
 19
 20        n_hidden_features: int
 21            number of nodes in the hidden layer
 22
 23        activation_name: str
 24            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
 25
 26        a: float
 27            hyperparameter for 'prelu' or 'elu' activation function
 28
 29        nodes_sim: str
 30            type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 'uniform'
 31
 32        bias: boolean
 33            indicates if the hidden layer contains a bias term (True) or not (False)
 34
 35        dropout: float
 36            regularization parameter; (random) percentage of nodes dropped out
 37            of the training
 38
 39        direct_link: boolean
 40            indicates if the original features are included (True) in model''s fitting or not (False)
 41
 42        n_clusters: int
 43            number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering)
 44
 45        cluster_encode: bool
 46            defines how the variable containing clusters is treated (default is one-hot)
 47            if `False`, then labels are used, without one-hot encoding
 48
 49        type_clust: str
 50            type of clustering method: currently k-means ('kmeans') or Gaussian Mixture Model ('gmm')
 51
 52        type_scaling: a tuple of 3 strings
 53            scaling methods for inputs, hidden layer, and clustering respectively
 54            (and when relevant).
 55            Currently available: standardization ('std') or MinMax scaling ('minmax')
 56
 57        seed: int
 58            reproducibility seed for nodes_sim=='uniform'
 59
 60        s1: float
 61            std. dev. of init. regression parameters in Bayesian Ridge Regression
 62
 63        s2: float
 64            std. dev. of augmented regression parameters in Bayesian Ridge Regression
 65
 66        sigma: float
 67            std. dev. of residuals in Bayesian Ridge Regression
 68
 69        return_std: boolean
 70            if True, uncertainty around predictions is evaluated
 71
 72        backend: str
 73            "cpu" or "gpu" or "tpu"
 74
 75    Attributes:
 76
 77        beta_: array-like
 78            regression''s coefficients
 79
 80        Sigma_: array-like
 81            covariance of the distribution of fitted parameters
 82
 83        GCV_: float
 84            Generalized cross-validation error
 85
 86        y_mean_: float
 87            average response
 88
 89    Examples:
 90
 91    ```python
 92    TBD
 93    ```
 94
 95    """
 96
 97    # construct the object -----
 98
 99    def __init__(
100        self,
101        n_hidden_features=5,
102        activation_name="relu",
103        a=0.01,
104        nodes_sim="sobol",
105        bias=True,
106        dropout=0,
107        direct_link=True,
108        n_clusters=0,
109        cluster_encode=True,
110        type_clust="kmeans",
111        type_scaling=("std", "std", "std"),
112        seed=123,
113        s1=0.1,
114        s2=0.1,
115        sigma=0.05,
116        return_std=True,
117        backend="cpu",
118    ):
119        super().__init__(
120            n_hidden_features=n_hidden_features,
121            activation_name=activation_name,
122            a=a,
123            nodes_sim=nodes_sim,
124            bias=bias,
125            dropout=dropout,
126            direct_link=direct_link,
127            n_clusters=n_clusters,
128            cluster_encode=cluster_encode,
129            type_clust=type_clust,
130            type_scaling=type_scaling,
131            seed=seed,
132            backend=backend,
133        )
134
135        self.s1 = s1
136        self.s2 = s2
137        self.sigma = sigma
138        self.beta_ = None
139        self.Sigma_ = None
140        self.GCV_ = None
141        self.return_std = return_std
142
143    def fit(self, X, y, **kwargs):
144        """Fit BayesianRVFL2Regressor to training data (X, y)
145
146        Parameters:
147
148            X: {array-like}, shape = [n_samples, n_features]
149                Training vectors, where n_samples is the number
150                of samples and n_features is the number of features
151
152            y: array-like, shape = [n_samples]
153                Target values
154
155            **kwargs: additional parameters to be passed to
156                    self.cook_training_set
157
158        Returns:
159
160            self: object
161
162        """
163
164        centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
165
166        n, p = X.shape
167        q = self.n_hidden_features
168
169        if self.direct_link == True:
170            r = p + self.n_clusters
171
172            block11 = (self.s1**2) * np.eye(r)
173            block12 = np.zeros((r, q))
174            block21 = np.zeros((q, r))
175            block22 = (self.s2**2) * np.eye(q)
176
177            Sigma_prior = mo.rbind(
178                x=mo.cbind(x=block11, y=block12, backend=self.backend),
179                y=mo.cbind(x=block21, y=block22, backend=self.backend),
180                backend=self.backend,
181            )
182
183        else:
184            Sigma_prior = (self.s2**2) * np.eye(q)
185
186        fit_obj = lmf.beta_Sigma_hat_rvfl2(
187            X=scaled_Z,
188            y=centered_y,
189            Sigma=Sigma_prior,
190            sigma=self.sigma,
191            fit_intercept=False,
192            return_cov=self.return_std,
193            backend=self.backend,
194        )
195
196        self.beta_ = fit_obj["beta_hat"]
197
198        if self.return_std == True:
199            self.Sigma_ = fit_obj["Sigma_hat"]
200
201        self.GCV_ = fit_obj["GCV"]
202
203        return self
204
205    def predict(self, X, return_std=False, **kwargs):
206        """Predict test data X.
207
208        Parameters:
209
210            X: {array-like}, shape = [n_samples, n_features]
211                Training vectors, where n_samples is the number
212                of samples and n_features is the number of features.
213
214            return_std: {boolean}, standard dev. is returned or not
215
216            **kwargs: additional parameters to be passed to
217                    self.cook_test_set
218
219        Returns:
220
221            model predictions: {array-like}
222
223        """
224
225        if len(X.shape) == 1:  # one observation in the test set only
226            n_features = X.shape[0]
227            new_X = mo.rbind(
228                x=X.reshape(1, n_features),
229                y=np.ones(n_features).reshape(1, n_features),
230                backend=self.backend,
231            )
232
233        self.return_std = return_std
234
235        if self.return_std == False:
236            if len(X.shape) == 1:
237                return (
238                    self.y_mean_
239                    + mo.safe_sparse_dot(
240                        self.cook_test_set(new_X, **kwargs),
241                        self.beta_,
242                        backend=self.backend,
243                    )
244                )[0]
245
246            return self.y_mean_ + mo.safe_sparse_dot(
247                self.cook_test_set(X, **kwargs),
248                self.beta_,
249                backend=self.backend,
250            )
251
252        else:  # confidence interval required for preds?
253            if len(X.shape) == 1:
254                Z = self.cook_test_set(new_X, **kwargs)
255
256                pred_obj = lmf.beta_Sigma_hat_rvfl2(
257                    X_star=Z,
258                    return_cov=self.return_std,
259                    beta_hat_=self.beta_,
260                    Sigma_hat_=self.Sigma_,
261                    backend=self.backend,
262                )
263
264                return (
265                    self.y_mean_ + pred_obj["preds"][0],
266                    pred_obj["preds_std"][0],
267                )
268
269            Z = self.cook_test_set(X, **kwargs)
270
271            pred_obj = lmf.beta_Sigma_hat_rvfl2(
272                X_star=Z,
273                return_cov=self.return_std,
274                beta_hat_=self.beta_,
275                Sigma_hat_=self.Sigma_,
276                backend=self.backend,
277            )
278
279            return (self.y_mean_ + pred_obj["preds"], pred_obj["preds_std"])

Bayesian Random Vector Functional Link Network regression with two priors

Parameters:

n_hidden_features: int
    number of nodes in the hidden layer

activation_name: str
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'

a: float
    hyperparameter for 'prelu' or 'elu' activation function

nodes_sim: str
    type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 'uniform'

bias: boolean
    indicates if the hidden layer contains a bias term (True) or not (False)

dropout: float
    regularization parameter; (random) percentage of nodes dropped out
    of the training

direct_link: boolean
    indicates if the original features are included (True) in model''s fitting or not (False)

n_clusters: int
    number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering)

cluster_encode: bool
    defines how the variable containing clusters is treated (default is one-hot)
    if `False`, then labels are used, without one-hot encoding

type_clust: str
    type of clustering method: currently k-means ('kmeans') or Gaussian Mixture Model ('gmm')

type_scaling: a tuple of 3 strings
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax')

seed: int
    reproducibility seed for nodes_sim=='uniform'

s1: float
    std. dev. of init. regression parameters in Bayesian Ridge Regression

s2: float
    std. dev. of augmented regression parameters in Bayesian Ridge Regression

sigma: float
    std. dev. of residuals in Bayesian Ridge Regression

return_std: boolean
    if True, uncertainty around predictions is evaluated

backend: str
    "cpu" or "gpu" or "tpu"

Attributes:

beta_: array-like
    regression''s coefficients

Sigma_: array-like
    covariance of the distribution of fitted parameters

GCV_: float
    Generalized cross-validation error

y_mean_: float
    average response

Examples:

TBD
def fit(self, X, y, **kwargs):
143    def fit(self, X, y, **kwargs):
144        """Fit BayesianRVFL2Regressor to training data (X, y)
145
146        Parameters:
147
148            X: {array-like}, shape = [n_samples, n_features]
149                Training vectors, where n_samples is the number
150                of samples and n_features is the number of features
151
152            y: array-like, shape = [n_samples]
153                Target values
154
155            **kwargs: additional parameters to be passed to
156                    self.cook_training_set
157
158        Returns:
159
160            self: object
161
162        """
163
164        centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
165
166        n, p = X.shape
167        q = self.n_hidden_features
168
169        if self.direct_link == True:
170            r = p + self.n_clusters
171
172            block11 = (self.s1**2) * np.eye(r)
173            block12 = np.zeros((r, q))
174            block21 = np.zeros((q, r))
175            block22 = (self.s2**2) * np.eye(q)
176
177            Sigma_prior = mo.rbind(
178                x=mo.cbind(x=block11, y=block12, backend=self.backend),
179                y=mo.cbind(x=block21, y=block22, backend=self.backend),
180                backend=self.backend,
181            )
182
183        else:
184            Sigma_prior = (self.s2**2) * np.eye(q)
185
186        fit_obj = lmf.beta_Sigma_hat_rvfl2(
187            X=scaled_Z,
188            y=centered_y,
189            Sigma=Sigma_prior,
190            sigma=self.sigma,
191            fit_intercept=False,
192            return_cov=self.return_std,
193            backend=self.backend,
194        )
195
196        self.beta_ = fit_obj["beta_hat"]
197
198        if self.return_std == True:
199            self.Sigma_ = fit_obj["Sigma_hat"]
200
201        self.GCV_ = fit_obj["GCV"]
202
203        return self

Fit BayesianRVFL2Regressor to training data (X, y)

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features

y: array-like, shape = [n_samples]
    Target values

**kwargs: additional parameters to be passed to
        self.cook_training_set

Returns:

self: object
def predict(self, X, return_std=False, **kwargs):
205    def predict(self, X, return_std=False, **kwargs):
206        """Predict test data X.
207
208        Parameters:
209
210            X: {array-like}, shape = [n_samples, n_features]
211                Training vectors, where n_samples is the number
212                of samples and n_features is the number of features.
213
214            return_std: {boolean}, standard dev. is returned or not
215
216            **kwargs: additional parameters to be passed to
217                    self.cook_test_set
218
219        Returns:
220
221            model predictions: {array-like}
222
223        """
224
225        if len(X.shape) == 1:  # one observation in the test set only
226            n_features = X.shape[0]
227            new_X = mo.rbind(
228                x=X.reshape(1, n_features),
229                y=np.ones(n_features).reshape(1, n_features),
230                backend=self.backend,
231            )
232
233        self.return_std = return_std
234
235        if self.return_std == False:
236            if len(X.shape) == 1:
237                return (
238                    self.y_mean_
239                    + mo.safe_sparse_dot(
240                        self.cook_test_set(new_X, **kwargs),
241                        self.beta_,
242                        backend=self.backend,
243                    )
244                )[0]
245
246            return self.y_mean_ + mo.safe_sparse_dot(
247                self.cook_test_set(X, **kwargs),
248                self.beta_,
249                backend=self.backend,
250            )
251
252        else:  # confidence interval required for preds?
253            if len(X.shape) == 1:
254                Z = self.cook_test_set(new_X, **kwargs)
255
256                pred_obj = lmf.beta_Sigma_hat_rvfl2(
257                    X_star=Z,
258                    return_cov=self.return_std,
259                    beta_hat_=self.beta_,
260                    Sigma_hat_=self.Sigma_,
261                    backend=self.backend,
262                )
263
264                return (
265                    self.y_mean_ + pred_obj["preds"][0],
266                    pred_obj["preds_std"][0],
267                )
268
269            Z = self.cook_test_set(X, **kwargs)
270
271            pred_obj = lmf.beta_Sigma_hat_rvfl2(
272                X_star=Z,
273                return_cov=self.return_std,
274                beta_hat_=self.beta_,
275                Sigma_hat_=self.Sigma_,
276                backend=self.backend,
277            )
278
279            return (self.y_mean_ + pred_obj["preds"], pred_obj["preds_std"])

Predict test data X.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

return_std: {boolean}, standard dev. is returned or not

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

model predictions: {array-like}
class ClassicalMTS(nnetsauce.Base):
 42class ClassicalMTS(Base):
 43    """Multivariate time series (FactorMTS) forecasting with Factor models
 44
 45    Parameters:
 46
 47        model: type of model: str.
 48            currently, 'VAR', 'VECM', 'ARIMA', 'ETS', 'Theta'
 49
 50    Attributes:
 51
 52        df_: data frame
 53            the input data frame, in case a data.frame is provided to `fit`
 54
 55        level_: int
 56            level of confidence for prediction intervals (default is 95)
 57
 58    Examples:
 59    See examples/classical_mts_timeseries.py
 60    """
 61
 62    # construct the object -----
 63
 64    def __init__(self, model="VAR"):
 65
 66        self.model = model
 67        if self.model == "VAR":
 68            self.obj = VAR
 69        elif self.model == "VECM":
 70            self.obj = VECM
 71        elif self.model == "ARIMA":
 72            self.obj = ARIMA
 73        elif self.model == "ETS":
 74            self.obj = ExponentialSmoothing
 75        elif self.model == "Theta":
 76            self.obj = ThetaModel
 77        else:
 78            raise ValueError("model not recognized")
 79        self.n_series = None
 80        self.replications = None
 81        self.mean_ = None
 82        self.upper_ = None
 83        self.lower_ = None
 84        self.output_dates_ = None
 85        self.alpha_ = None
 86        self.df_ = None
 87        self.residuals_ = []
 88        self.sims_ = None
 89        self.level_ = None
 90
 91    def fit(self, X, **kwargs):
 92        """Fit FactorMTS model to training data X, with optional regressors xreg
 93
 94        Parameters:
 95
 96        X: {array-like}, shape = [n_samples, n_features]
 97            Training time series, where n_samples is the number
 98            of samples and n_features is the number of features;
 99            X must be in increasing order (most recent observations last)
100
101        **kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity)
102
103        Returns:
104
105        self: object
106        """
107
108        try:
109            self.n_series = X.shape[1]
110        except Exception:
111            self.n_series = 1
112
113        if (isinstance(X, pd.DataFrame) is False) and isinstance(
114            X, pd.Series
115        ) is False:  # input data set is a numpy array
116
117            X = pd.DataFrame(X)
118            if self.n_series > 1:
119                self.series_names = ["series" + str(i) for i in range(X.shape[1])]
120            else:
121                self.series_names = "series0"
122
123        else:  # input data set is a DataFrame or Series with column names
124
125            X_index = None
126            if X.index is not None and len(X.shape) > 1:
127                X_index = X.index
128                X = copy.deepcopy(mo.convert_df_to_numeric(X))
129            if X_index is not None:
130                try:
131                    X.index = X_index
132                except Exception:
133                    pass
134            if isinstance(X, pd.DataFrame):
135                self.series_names = X.columns.tolist()
136            else:
137                self.series_names = X.name
138
139        if isinstance(X, pd.DataFrame) or isinstance(X, pd.Series):
140            self.df_ = X
141            X = X.values
142            self.df_.columns = self.series_names
143            self.input_dates = ts.compute_input_dates(self.df_)
144        else:
145            self.df_ = pd.DataFrame(X, columns=self.series_names)
146
147        if self.model == "Theta":
148            self.obj = self.obj(self.df_, **kwargs).fit()
149        else:
150            self.obj = self.obj(X, **kwargs).fit(**kwargs)
151
152        return self
153
154    def predict(self, h=5, level=95, **kwargs):
155        """Forecast all the time series, h steps ahead
156
157        Parameters:
158
159        h: {integer}
160            Forecasting horizon
161
162        **kwargs: additional parameters to be passed to
163                self.cook_test_set
164
165        Returns:
166
167        model predictions for horizon = h: {array-like}
168
169        """
170
171        self.output_dates_, frequency = ts.compute_output_dates(self.df_, h)
172
173        self.level_ = level
174
175        self.lower_ = None  # do not remove (/!\)
176
177        self.upper_ = None  # do not remove (/!\)
178
179        self.sims_ = None  # do not remove (/!\)
180
181        self.level_ = level
182
183        self.alpha_ = 100 - level
184
185        pi_multiplier = norm.ppf(1 - self.alpha_ / 200)
186
187        # Named tuple for forecast results
188        DescribeResult = namedtuple("DescribeResult", ("mean", "lower", "upper"))
189
190        if self.model == "VAR":
191            mean_forecast, lower_bound, upper_bound = self.obj.forecast_interval(
192                self.obj.endog, steps=h, alpha=self.alpha_ / 100, **kwargs
193            )
194
195        elif self.model == "VECM":
196            forecast_result = self.obj.predict(steps=h)
197            mean_forecast = forecast_result
198            lower_bound, upper_bound = self._compute_confidence_intervals(
199                forecast_result, alpha=self.alpha_ / 100, **kwargs
200            )
201
202        elif self.model == "ARIMA":
203            forecast_result = self.obj.get_forecast(steps=h)
204            mean_forecast = forecast_result.predicted_mean
205            lower_bound = forecast_result.conf_int()[:, 0]
206            upper_bound = forecast_result.conf_int()[:, 1]
207
208        elif self.model == "ETS":
209            forecast_result = self.obj.forecast(steps=h)
210            residuals = self.obj.resid
211            std_errors = np.std(residuals)
212            mean_forecast = forecast_result
213            lower_bound = forecast_result - pi_multiplier * std_errors
214            upper_bound = forecast_result + pi_multiplier * std_errors
215
216        elif self.model == "Theta":
217            try:
218                mean_forecast = self.obj.forecast(steps=h).values
219                forecast_result = self.obj.prediction_intervals(
220                    steps=h, alpha=self.alpha_ / 100, **kwargs
221                )
222                lower_bound = forecast_result["lower"].values
223                upper_bound = forecast_result["upper"].values
224            except Exception:
225                mean_forecast = self.obj.forecast(steps=h)
226                forecast_result = self.obj.prediction_intervals(
227                    steps=h, alpha=self.alpha_ / 100, **kwargs
228                )
229                lower_bound = forecast_result["lower"]
230                upper_bound = forecast_result["upper"]
231
232        else:
233
234            raise ValueError("model not recognized")
235
236        try:
237            self.mean_ = pd.DataFrame(
238                mean_forecast,
239                columns=self.series_names,
240                index=self.output_dates_,
241            )
242            self.lower_ = pd.DataFrame(
243                lower_bound, columns=self.series_names, index=self.output_dates_
244            )
245            self.upper_ = pd.DataFrame(
246                upper_bound, columns=self.series_names, index=self.output_dates_
247            )
248        except Exception:
249            self.mean_ = pd.Series(
250                mean_forecast, name=self.series_names, index=self.output_dates_
251            )
252            self.lower_ = pd.Series(
253                lower_bound, name=self.series_names, index=self.output_dates_
254            )
255            self.upper_ = pd.Series(
256                upper_bound, name=self.series_names, index=self.output_dates_
257            )
258
259        return DescribeResult(mean=self.mean_, lower=self.lower_, upper=self.upper_)
260
261    def _compute_confidence_intervals(self, forecast_result, alpha):
262        """
263        Compute confidence intervals for VECM forecasts.
264        Uses the covariance of residuals to approximate the confidence intervals.
265        """
266        residuals = self.obj.resid
267        cov_matrix = np.cov(residuals.T)  # Covariance matrix of residuals
268        std_errors = np.sqrt(np.diag(cov_matrix))  # Standard errors
269
270        z_value = norm.ppf(1 - alpha / 2)  # Z-score for the given alpha level
271        lower_bound = forecast_result - z_value * std_errors
272        upper_bound = forecast_result + z_value * std_errors
273
274        return lower_bound, upper_bound
275
276    def score(self, X, training_index, testing_index, scoring=None, **kwargs):
277        """Train on training_index, score on testing_index."""
278
279        assert (
280            bool(set(training_index).intersection(set(testing_index))) == False
281        ), "Non-overlapping 'training_index' and 'testing_index' required"
282
283        # Dimensions
284        try:
285            # multivariate time series
286            n, p = X.shape
287        except:
288            # univariate time series
289            n = X.shape[0]
290            p = 1
291
292        # Training and testing sets
293        if p > 1:
294            X_train = X[training_index, :]
295            X_test = X[testing_index, :]
296        else:
297            X_train = X[training_index]
298            X_test = X[testing_index]
299
300        # Horizon
301        h = len(testing_index)
302        assert (
303            len(training_index) + h
304        ) <= n, "Please check lengths of training and testing windows"
305
306        # Fit and predict
307        self.fit(X_train, **kwargs)
308        preds = self.predict(h=h, **kwargs)
309
310        if scoring is None:
311            scoring = "neg_root_mean_squared_error"
312
313        # check inputs
314        assert scoring in (
315            "explained_variance",
316            "neg_mean_absolute_error",
317            "neg_mean_squared_error",
318            "neg_root_mean_squared_error",
319            "neg_mean_squared_log_error",
320            "neg_median_absolute_error",
321            "r2",
322        ), "'scoring' should be in ('explained_variance', 'neg_mean_absolute_error', \
323                               'neg_mean_squared_error', 'neg_root_mean_squared_error', 'neg_mean_squared_log_error', \
324                               'neg_median_absolute_error', 'r2')"
325
326        scoring_options = {
327            "explained_variance": skm2.explained_variance_score,
328            "neg_mean_absolute_error": skm2.mean_absolute_error,
329            "neg_mean_squared_error": lambda x, y: np.mean((x - y) ** 2),
330            "neg_root_mean_squared_error": lambda x, y: np.sqrt(np.mean((x - y) ** 2)),
331            "neg_mean_squared_log_error": skm2.mean_squared_log_error,
332            "neg_median_absolute_error": skm2.median_absolute_error,
333            "r2": skm2.r2_score,
334        }
335
336        # if p > 1:
337        #     return tuple(
338        #         [
339        #             scoring_options[scoring](
340        #                 X_test[:, i], preds[:, i]#, **kwargs
341        #             )
342        #             for i in range(p)
343        #         ]
344        #     )
345        # else:
346        return scoring_options[scoring](X_test, preds)
347
348    def plot(self, series=None, type_axis="dates", type_plot="pi"):
349        """Plot time series forecast
350
351        Parameters:
352
353        series: {integer} or {string}
354            series index or name
355
356        """
357
358        assert all(
359            [
360                self.mean_ is not None,
361                self.lower_ is not None,
362                self.upper_ is not None,
363                self.output_dates_ is not None,
364            ]
365        ), "model forecasting must be obtained first (with predict)"
366
367        if series is None:
368            assert (
369                self.n_series == 1
370            ), "please specify series index or name (n_series > 1)"
371            series = 0
372
373        if isinstance(series, str):
374            assert (
375                series in self.series_names
376            ), f"series {series} doesn't exist in the input dataset"
377            series_idx = self.df_.columns.get_loc(series)
378        else:
379            assert isinstance(series, int) and (
380                0 <= series < self.n_series
381            ), f"check series index (< {self.n_series})"
382            series_idx = series
383
384        if isinstance(self.df_, pd.DataFrame):
385            y_all = list(self.df_.iloc[:, series_idx]) + list(
386                self.mean_.iloc[:, series_idx]
387            )
388            y_test = list(self.mean_.iloc[:, series_idx])
389        else:
390            y_all = list(self.df_.values) + list(self.mean_.values)
391            y_test = list(self.mean_.values)
392        n_points_all = len(y_all)
393        n_points_train = self.df_.shape[0]
394
395        if type_axis == "numeric":
396            x_all = [i for i in range(n_points_all)]
397            x_test = [i for i in range(n_points_train, n_points_all)]
398
399        if type_axis == "dates":  # use dates
400            x_all = np.concatenate(
401                (self.input_dates.values, self.output_dates_.values), axis=None
402            )
403            x_test = self.output_dates_.values
404
405        if type_plot == "pi":
406            fig, ax = plt.subplots()
407            ax.plot(x_all, y_all, "-")
408            ax.plot(x_test, y_test, "-", color="orange")
409            try:
410                ax.fill_between(
411                    x_test,
412                    self.lower_.iloc[:, series_idx],
413                    self.upper_.iloc[:, series_idx],
414                    alpha=0.2,
415                    color="orange",
416                )
417            except Exception:
418                ax.fill_between(
419                    x_test,
420                    self.lower_.values,
421                    self.upper_.values,
422                    alpha=0.2,
423                    color="orange",
424                )
425            if self.replications is None:
426                if self.n_series > 1:
427                    plt.title(
428                        f"prediction intervals for {series}",
429                        loc="left",
430                        fontsize=12,
431                        fontweight=0,
432                        color="black",
433                    )
434                else:
435                    plt.title(
436                        f"prediction intervals for input time series",
437                        loc="left",
438                        fontsize=12,
439                        fontweight=0,
440                        color="black",
441                    )
442                plt.show()
443            else:  # self.replications is not None
444                if self.n_series > 1:
445                    plt.title(
446                        f"prediction intervals for {self.replications} simulations of {series}",
447                        loc="left",
448                        fontsize=12,
449                        fontweight=0,
450                        color="black",
451                    )
452                else:
453                    plt.title(
454                        f"prediction intervals for {self.replications} simulations of input time series",
455                        loc="left",
456                        fontsize=12,
457                        fontweight=0,
458                        color="black",
459                    )
460                plt.show()
461
462        if type_plot == "spaghetti":
463            palette = plt.get_cmap("Set1")
464            sims_ix = getsims(self.sims_, series_idx)
465            plt.plot(x_all, y_all, "-")
466            for col_ix in range(
467                sims_ix.shape[1]
468            ):  # avoid this when there are thousands of simulations
469                plt.plot(
470                    x_test,
471                    sims_ix[:, col_ix],
472                    "-",
473                    color=palette(col_ix),
474                    linewidth=1,
475                    alpha=0.9,
476                )
477            plt.plot(x_all, y_all, "-", color="black")
478            plt.plot(x_test, y_test, "-", color="blue")
479            # Add titles
480            if self.n_series > 1:
481                plt.title(
482                    f"{self.replications} simulations of {series}",
483                    loc="left",
484                    fontsize=12,
485                    fontweight=0,
486                    color="black",
487                )
488            else:
489                plt.title(
490                    f"{self.replications} simulations of input time series",
491                    loc="left",
492                    fontsize=12,
493                    fontweight=0,
494                    color="black",
495                )
496            plt.xlabel("Time")
497            plt.ylabel("Values")
498            # Show the graph
499            plt.show()
500
501    def cross_val_score(
502        self,
503        X,
504        scoring="root_mean_squared_error",
505        n_jobs=None,
506        verbose=0,
507        xreg=None,
508        initial_window=5,
509        horizon=3,
510        fixed_window=False,
511        show_progress=True,
512        level=95,
513        **kwargs,
514    ):
515        """Evaluate a score by time series cross-validation.
516
517        Parameters:
518
519            X: {array-like, sparse matrix} of shape (n_samples, n_features)
520                The data to fit.
521
522            scoring: str or a function
523                A str in ('root_mean_squared_error', 'mean_squared_error', 'mean_error',
524                'mean_absolute_error', 'mean_error', 'mean_percentage_error',
525                'mean_absolute_percentage_error',  'winkler_score', 'coverage')
526                Or a function defined as 'coverage' and 'winkler_score' in `utils.timeseries`
527
528            n_jobs: int, default=None
529                Number of jobs to run in parallel.
530
531            verbose: int, default=0
532                The verbosity level.
533
534            xreg: array-like, optional (default=None)
535                Additional (external) regressors to be passed to `fit`
536                xreg must be in 'increasing' order (most recent observations last)
537
538            initial_window: int
539                initial number of consecutive values in each training set sample
540
541            horizon: int
542                number of consecutive values in test set sample
543
544            fixed_window: boolean
545                if False, all training samples start at index 0, and the training
546                window's size is increasing.
547                if True, the training window's size is fixed, and the window is
548                rolling forward
549
550            show_progress: boolean
551                if True, a progress bar is printed
552
553            **kwargs: dict
554                additional parameters to be passed to `fit` and `predict`
555
556        Returns:
557
558            A tuple: descriptive statistics or errors and raw errors
559
560        """
561        tscv = TimeSeriesSplit()
562
563        tscv_obj = tscv.split(
564            X,
565            initial_window=initial_window,
566            horizon=horizon,
567            fixed_window=fixed_window,
568        )
569
570        if isinstance(scoring, str):
571
572            assert scoring in (
573                "root_mean_squared_error",
574                "mean_squared_error",
575                "mean_error",
576                "mean_absolute_error",
577                "mean_percentage_error",
578                "mean_absolute_percentage_error",
579                "winkler_score",
580                "coverage",
581            ), "must have scoring in ('root_mean_squared_error', 'mean_squared_error', 'mean_error', 'mean_absolute_error', 'mean_error', 'mean_percentage_error', 'mean_absolute_percentage_error',  'winkler_score', 'coverage')"
582
583            def err_func(X_test, X_pred, scoring):
584                if (self.replications is not None) or (
585                    self.type_pi == "gaussian"
586                ):  # probabilistic
587                    if scoring == "winkler_score":
588                        return winkler_score(X_pred, X_test, level=level)
589                    elif scoring == "coverage":
590                        return coverage(X_pred, X_test, level=level)
591                    else:
592                        return mean_errors(
593                            pred=X_pred.mean, actual=X_test, scoring=scoring
594                        )
595                else:  # not probabilistic
596                    return mean_errors(pred=X_pred, actual=X_test, scoring=scoring)
597
598        else:  # isinstance(scoring, str) = False
599
600            err_func = scoring
601
602        errors = []
603
604        train_indices = []
605
606        test_indices = []
607
608        for train_index, test_index in tscv_obj:
609            train_indices.append(train_index)
610            test_indices.append(test_index)
611
612        if show_progress is True:
613            iterator = tqdm(zip(train_indices, test_indices), total=len(train_indices))
614        else:
615            iterator = zip(train_indices, test_indices)
616
617        for train_index, test_index in iterator:
618
619            if verbose == 1:
620                print(f"TRAIN: {train_index}")
621                print(f"TEST: {test_index}")
622
623            if isinstance(X, pd.DataFrame):
624                self.fit(X.iloc[train_index, :], xreg=xreg, **kwargs)
625                X_test = X.iloc[test_index, :]
626            else:
627                self.fit(X[train_index, :], xreg=xreg, **kwargs)
628                X_test = X[test_index, :]
629            X_pred = self.predict(h=int(len(test_index)), level=level, **kwargs)
630
631            errors.append(err_func(X_test, X_pred, scoring))
632
633        res = np.asarray(errors)
634
635        return res, describe(res)

Multivariate time series (FactorMTS) forecasting with Factor models

Parameters:

model: type of model: str.
    currently, 'VAR', 'VECM', 'ARIMA', 'ETS', 'Theta'

Attributes:

df_: data frame
    the input data frame, in case a data.frame is provided to `fit`

level_: int
    level of confidence for prediction intervals (default is 95)

Examples: See examples/classical_mts_timeseries.py

def fit(self, X, **kwargs):
 91    def fit(self, X, **kwargs):
 92        """Fit FactorMTS model to training data X, with optional regressors xreg
 93
 94        Parameters:
 95
 96        X: {array-like}, shape = [n_samples, n_features]
 97            Training time series, where n_samples is the number
 98            of samples and n_features is the number of features;
 99            X must be in increasing order (most recent observations last)
100
101        **kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity)
102
103        Returns:
104
105        self: object
106        """
107
108        try:
109            self.n_series = X.shape[1]
110        except Exception:
111            self.n_series = 1
112
113        if (isinstance(X, pd.DataFrame) is False) and isinstance(
114            X, pd.Series
115        ) is False:  # input data set is a numpy array
116
117            X = pd.DataFrame(X)
118            if self.n_series > 1:
119                self.series_names = ["series" + str(i) for i in range(X.shape[1])]
120            else:
121                self.series_names = "series0"
122
123        else:  # input data set is a DataFrame or Series with column names
124
125            X_index = None
126            if X.index is not None and len(X.shape) > 1:
127                X_index = X.index
128                X = copy.deepcopy(mo.convert_df_to_numeric(X))
129            if X_index is not None:
130                try:
131                    X.index = X_index
132                except Exception:
133                    pass
134            if isinstance(X, pd.DataFrame):
135                self.series_names = X.columns.tolist()
136            else:
137                self.series_names = X.name
138
139        if isinstance(X, pd.DataFrame) or isinstance(X, pd.Series):
140            self.df_ = X
141            X = X.values
142            self.df_.columns = self.series_names
143            self.input_dates = ts.compute_input_dates(self.df_)
144        else:
145            self.df_ = pd.DataFrame(X, columns=self.series_names)
146
147        if self.model == "Theta":
148            self.obj = self.obj(self.df_, **kwargs).fit()
149        else:
150            self.obj = self.obj(X, **kwargs).fit(**kwargs)
151
152        return self

Fit FactorMTS model to training data X, with optional regressors xreg

Parameters:

X: {array-like}, shape = [n_samples, n_features] Training time series, where n_samples is the number of samples and n_features is the number of features; X must be in increasing order (most recent observations last)

**kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity)

Returns:

self: object

def predict(self, h=5, level=95, **kwargs):
154    def predict(self, h=5, level=95, **kwargs):
155        """Forecast all the time series, h steps ahead
156
157        Parameters:
158
159        h: {integer}
160            Forecasting horizon
161
162        **kwargs: additional parameters to be passed to
163                self.cook_test_set
164
165        Returns:
166
167        model predictions for horizon = h: {array-like}
168
169        """
170
171        self.output_dates_, frequency = ts.compute_output_dates(self.df_, h)
172
173        self.level_ = level
174
175        self.lower_ = None  # do not remove (/!\)
176
177        self.upper_ = None  # do not remove (/!\)
178
179        self.sims_ = None  # do not remove (/!\)
180
181        self.level_ = level
182
183        self.alpha_ = 100 - level
184
185        pi_multiplier = norm.ppf(1 - self.alpha_ / 200)
186
187        # Named tuple for forecast results
188        DescribeResult = namedtuple("DescribeResult", ("mean", "lower", "upper"))
189
190        if self.model == "VAR":
191            mean_forecast, lower_bound, upper_bound = self.obj.forecast_interval(
192                self.obj.endog, steps=h, alpha=self.alpha_ / 100, **kwargs
193            )
194
195        elif self.model == "VECM":
196            forecast_result = self.obj.predict(steps=h)
197            mean_forecast = forecast_result
198            lower_bound, upper_bound = self._compute_confidence_intervals(
199                forecast_result, alpha=self.alpha_ / 100, **kwargs
200            )
201
202        elif self.model == "ARIMA":
203            forecast_result = self.obj.get_forecast(steps=h)
204            mean_forecast = forecast_result.predicted_mean
205            lower_bound = forecast_result.conf_int()[:, 0]
206            upper_bound = forecast_result.conf_int()[:, 1]
207
208        elif self.model == "ETS":
209            forecast_result = self.obj.forecast(steps=h)
210            residuals = self.obj.resid
211            std_errors = np.std(residuals)
212            mean_forecast = forecast_result
213            lower_bound = forecast_result - pi_multiplier * std_errors
214            upper_bound = forecast_result + pi_multiplier * std_errors
215
216        elif self.model == "Theta":
217            try:
218                mean_forecast = self.obj.forecast(steps=h).values
219                forecast_result = self.obj.prediction_intervals(
220                    steps=h, alpha=self.alpha_ / 100, **kwargs
221                )
222                lower_bound = forecast_result["lower"].values
223                upper_bound = forecast_result["upper"].values
224            except Exception:
225                mean_forecast = self.obj.forecast(steps=h)
226                forecast_result = self.obj.prediction_intervals(
227                    steps=h, alpha=self.alpha_ / 100, **kwargs
228                )
229                lower_bound = forecast_result["lower"]
230                upper_bound = forecast_result["upper"]
231
232        else:
233
234            raise ValueError("model not recognized")
235
236        try:
237            self.mean_ = pd.DataFrame(
238                mean_forecast,
239                columns=self.series_names,
240                index=self.output_dates_,
241            )
242            self.lower_ = pd.DataFrame(
243                lower_bound, columns=self.series_names, index=self.output_dates_
244            )
245            self.upper_ = pd.DataFrame(
246                upper_bound, columns=self.series_names, index=self.output_dates_
247            )
248        except Exception:
249            self.mean_ = pd.Series(
250                mean_forecast, name=self.series_names, index=self.output_dates_
251            )
252            self.lower_ = pd.Series(
253                lower_bound, name=self.series_names, index=self.output_dates_
254            )
255            self.upper_ = pd.Series(
256                upper_bound, name=self.series_names, index=self.output_dates_
257            )
258
259        return DescribeResult(mean=self.mean_, lower=self.lower_, upper=self.upper_)

Forecast all the time series, h steps ahead

Parameters:

h: {integer} Forecasting horizon

**kwargs: additional parameters to be passed to self.cook_test_set

Returns:

model predictions for horizon = h: {array-like}

def score(self, X, training_index, testing_index, scoring=None, **kwargs):
276    def score(self, X, training_index, testing_index, scoring=None, **kwargs):
277        """Train on training_index, score on testing_index."""
278
279        assert (
280            bool(set(training_index).intersection(set(testing_index))) == False
281        ), "Non-overlapping 'training_index' and 'testing_index' required"
282
283        # Dimensions
284        try:
285            # multivariate time series
286            n, p = X.shape
287        except:
288            # univariate time series
289            n = X.shape[0]
290            p = 1
291
292        # Training and testing sets
293        if p > 1:
294            X_train = X[training_index, :]
295            X_test = X[testing_index, :]
296        else:
297            X_train = X[training_index]
298            X_test = X[testing_index]
299
300        # Horizon
301        h = len(testing_index)
302        assert (
303            len(training_index) + h
304        ) <= n, "Please check lengths of training and testing windows"
305
306        # Fit and predict
307        self.fit(X_train, **kwargs)
308        preds = self.predict(h=h, **kwargs)
309
310        if scoring is None:
311            scoring = "neg_root_mean_squared_error"
312
313        # check inputs
314        assert scoring in (
315            "explained_variance",
316            "neg_mean_absolute_error",
317            "neg_mean_squared_error",
318            "neg_root_mean_squared_error",
319            "neg_mean_squared_log_error",
320            "neg_median_absolute_error",
321            "r2",
322        ), "'scoring' should be in ('explained_variance', 'neg_mean_absolute_error', \
323                               'neg_mean_squared_error', 'neg_root_mean_squared_error', 'neg_mean_squared_log_error', \
324                               'neg_median_absolute_error', 'r2')"
325
326        scoring_options = {
327            "explained_variance": skm2.explained_variance_score,
328            "neg_mean_absolute_error": skm2.mean_absolute_error,
329            "neg_mean_squared_error": lambda x, y: np.mean((x - y) ** 2),
330            "neg_root_mean_squared_error": lambda x, y: np.sqrt(np.mean((x - y) ** 2)),
331            "neg_mean_squared_log_error": skm2.mean_squared_log_error,
332            "neg_median_absolute_error": skm2.median_absolute_error,
333            "r2": skm2.r2_score,
334        }
335
336        # if p > 1:
337        #     return tuple(
338        #         [
339        #             scoring_options[scoring](
340        #                 X_test[:, i], preds[:, i]#, **kwargs
341        #             )
342        #             for i in range(p)
343        #         ]
344        #     )
345        # else:
346        return scoring_options[scoring](X_test, preds)

Train on training_index, score on testing_index.

class CustomClassifier(nnetsauce.custom.custom.Custom, sklearn.base.ClassifierMixin):
 16class CustomClassifier(Custom, ClassifierMixin):
 17    """Custom Classification model
 18
 19    Attributes:
 20
 21        obj: object
 22            any object containing a method fit (obj.fit()) and a method predict
 23            (obj.predict())
 24
 25        n_hidden_features: int
 26            number of nodes in the hidden layer
 27
 28        activation_name: str
 29            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
 30
 31        a: float
 32            hyperparameter for 'prelu' or 'elu' activation function
 33
 34        nodes_sim: str
 35            type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
 36            'uniform'
 37
 38        bias: boolean
 39            indicates if the hidden layer contains a bias term (True) or not
 40            (False)
 41
 42        dropout: float
 43            regularization parameter; (random) percentage of nodes dropped out
 44            of the training
 45
 46        direct_link: boolean
 47            indicates if the original predictors are included (True) in model''s
 48            fitting or not (False)
 49
 50        n_clusters: int
 51            number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
 52                no clustering)
 53
 54        cluster_encode: bool
 55            defines how the variable containing clusters is treated (default is one-hot)
 56            if `False`, then labels are used, without one-hot encoding
 57
 58        type_clust: str
 59            type of clustering method: currently k-means ('kmeans') or Gaussian
 60            Mixture Model ('gmm')
 61
 62        type_scaling: a tuple of 3 strings
 63            scaling methods for inputs, hidden layer, and clustering respectively
 64            (and when relevant).
 65            Currently available: standardization ('std') or MinMax scaling ('minmax')
 66
 67        col_sample: float
 68            percentage of covariates randomly chosen for training
 69
 70        row_sample: float
 71            percentage of rows chosen for training, by stratified bootstrapping
 72        
 73        cv_calibration: int, cross-validation generator, or iterable, default=2
 74            Determines the cross-validation splitting strategy. Same as 
 75            `sklearn.calibration.CalibratedClassifierCV`
 76
 77        calibration_method: str
 78            {‘sigmoid’, ‘isotonic’}, default=’sigmoid’
 79            The method to use for calibration. Same as 
 80            `sklearn.calibration.CalibratedClassifierCV`
 81
 82        seed: int
 83            reproducibility seed for nodes_sim=='uniform'
 84
 85        backend: str
 86            "cpu" or "gpu" or "tpu"
 87
 88    Examples:
 89
 90    Note: it's better to use the `DeepClassifier` or `LazyDeepClassifier` classes directly
 91
 92    ```python
 93    import nnetsauce as ns
 94    from sklearn.ensemble import RandomForestClassifier
 95    from sklearn.model_selection import train_test_split
 96    from sklearn.datasets import load_digits
 97    from time import time
 98
 99    digits = load_digits()
100    X = digits.data
101    y = digits.target
102    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
103                                                        random_state=123)
104
105    # layer 1 (base layer) ----
106    layer1_regr = RandomForestClassifier(n_estimators=10, random_state=123)
107
108    start = time()
109
110    layer1_regr.fit(X_train, y_train)
111
112    # Accuracy in layer 1
113    print(layer1_regr.score(X_test, y_test))
114
115    # layer 2 using layer 1 ----
116    layer2_regr = ns.CustomClassifier(obj = layer1_regr, n_hidden_features=5,
117                            direct_link=True, bias=True,
118                            nodes_sim='uniform', activation_name='relu',
119                            n_clusters=2, seed=123)
120    layer2_regr.fit(X_train, y_train)
121
122    # Accuracy in layer 2
123    print(layer2_regr.score(X_test, y_test))
124
125    # layer 3 using layer 2 ----
126    layer3_regr = ns.CustomClassifier(obj = layer2_regr, n_hidden_features=10,
127                            direct_link=True, bias=True, dropout=0.7,
128                            nodes_sim='uniform', activation_name='relu',
129                            n_clusters=2, seed=123)
130    layer3_regr.fit(X_train, y_train)
131
132    # Accuracy in layer 3
133    print(layer3_regr.score(X_test, y_test))
134
135    print(f"Elapsed {time() - start}")
136    ```
137
138    """
139
140    # construct the object -----
141
142    def __init__(
143        self,
144        obj,
145        n_hidden_features=5,
146        activation_name="relu",
147        a=0.01,
148        nodes_sim="sobol",
149        bias=True,
150        dropout=0,
151        direct_link=True,
152        n_clusters=2,
153        cluster_encode=True,
154        type_clust="kmeans",
155        type_scaling=("std", "std", "std"),
156        col_sample=1,
157        row_sample=1,
158        cv_calibration=2,
159        calibration_method="sigmoid",
160        seed=123,
161        backend="cpu",
162    ):
163        super().__init__(
164            obj=obj,
165            n_hidden_features=n_hidden_features,
166            activation_name=activation_name,
167            a=a,
168            nodes_sim=nodes_sim,
169            bias=bias,
170            dropout=dropout,
171            direct_link=direct_link,
172            n_clusters=n_clusters,
173            cluster_encode=cluster_encode,
174            type_clust=type_clust,
175            type_scaling=type_scaling,
176            col_sample=col_sample,
177            row_sample=row_sample,
178            seed=seed,
179            backend=backend,
180        )
181        self.coef_ = None
182        self.intercept_ = None
183        self.type_fit = "classification"
184        self.cv_calibration = cv_calibration
185        self.calibration_method = calibration_method
186        self._estimator_type = "classifier"  # Explicitly mark as classifier
187
188    def __sklearn_clone__(self):
189        """Create a clone of the estimator.
190        
191        This is required for scikit-learn's calibration system to work properly.
192        """
193        # Create a new instance with the same parameters
194        clone = CustomClassifier(
195            obj=self.obj,
196            n_hidden_features=self.n_hidden_features,
197            activation_name=self.activation_name,
198            a=self.a,
199            nodes_sim=self.nodes_sim,
200            bias=self.bias,
201            dropout=self.dropout,
202            direct_link=self.direct_link,
203            n_clusters=self.n_clusters,
204            cluster_encode=self.cluster_encode,
205            type_clust=self.type_clust,
206            type_scaling=self.type_scaling,
207            col_sample=self.col_sample,
208            row_sample=self.row_sample,
209            cv_calibration=self.cv_calibration,
210            calibration_method=self.calibration_method,
211            seed=self.seed,
212            backend=self.backend
213        )
214        return clone
215
216    def fit(self, X, y, sample_weight=None, **kwargs):
217        """Fit custom model to training data (X, y).
218
219        Parameters:
220
221            X: {array-like}, shape = [n_samples, n_features]
222                Training vectors, where n_samples is the number
223                of samples and n_features is the number of features.
224
225            y: array-like, shape = [n_samples]
226                Target values.
227
228            sample_weight: array-like, shape = [n_samples]
229                Sample weights.
230
231            **kwargs: additional parameters to be passed to
232                        self.cook_training_set or self.obj.fit
233
234        Returns:
235
236            self: object
237        """
238
239        if len(X.shape) == 1:
240            if isinstance(X, pd.DataFrame):
241                X = pd.DataFrame(X.values.reshape(1, -1), columns=X.columns)
242            else:
243                X = X.reshape(1, -1)
244
245        output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
246        self.classes_ = np.unique(y)
247        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
248
249        # Wrap in CalibratedClassifierCV if needed
250        if self.cv_calibration is not None:
251            self.obj = CalibratedClassifierCV(
252                self.obj, 
253                cv=self.cv_calibration,
254                method=self.calibration_method
255            )
256
257        # if sample_weights, else: (must use self.row_index)
258        if sample_weight is not None:
259            self.obj.fit(
260                scaled_Z,
261                output_y,
262                sample_weight=sample_weight[self.index_row_].ravel(),
263                **kwargs
264            )
265            return self
266
267        # if sample_weight is None:
268        self.obj.fit(scaled_Z, output_y, **kwargs)
269        self.classes_ = np.unique(y)  # for compatibility with sklearn
270        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
271
272        if hasattr(self.obj, "coef_"):
273            self.coef_ = self.obj.coef_
274
275        if hasattr(self.obj, "intercept_"):
276            self.intercept_ = self.obj.intercept_
277
278        return self
279
280    def partial_fit(self, X, y, sample_weight=None, **kwargs):
281        """Partial fit custom model to training data (X, y).
282
283        Parameters:
284
285            X: {array-like}, shape = [n_samples, n_features]
286                Subset of training vectors, where n_samples is the number
287                of samples and n_features is the number of features.
288
289            y: array-like, shape = [n_samples]
290                Subset of target values.
291
292            sample_weight: array-like, shape = [n_samples]
293                Sample weights.
294
295            **kwargs: additional parameters to be passed to
296                        self.cook_training_set or self.obj.fit
297
298        Returns:
299
300            self: object
301        """
302
303        if len(X.shape) == 1:
304            if isinstance(X, pd.DataFrame):
305                X = pd.DataFrame(X.values.reshape(1, -1), columns=X.columns)
306            else:
307                X = X.reshape(1, -1)
308            y = np.array([y], dtype=np.integer)
309
310        output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
311        self.n_classes_ = len(np.unique(y))  # for compatibility with sklearn
312
313        # if sample_weights, else: (must use self.row_index)
314        if sample_weight is not None:
315            try:
316                self.obj.partial_fit(
317                    scaled_Z,
318                    output_y,
319                    sample_weight=sample_weight[self.index_row_].ravel(),
320                    # **kwargs
321                )
322            except:
323                NotImplementedError
324
325            return self
326
327        # if sample_weight is None:
328        #try:
329        self.obj.partial_fit(scaled_Z, output_y)
330        #except:
331        #    raise NotImplementedError
332
333        self.classes_ = np.unique(y)  # for compatibility with sklearn
334        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
335
336        return self
337
338    def predict(self, X, **kwargs):
339        """Predict test data X.
340
341        Parameters:
342
343            X: {array-like}, shape = [n_samples, n_features]
344                Training vectors, where n_samples is the number
345                of samples and n_features is the number of features.
346
347            **kwargs: additional parameters to be passed to
348                    self.cook_test_set
349
350        Returns:
351
352            model predictions: {array-like}
353        """
354
355        if len(X.shape) == 1:
356            n_features = X.shape[0]
357            new_X = mo.rbind(
358                X.reshape(1, n_features),
359                np.ones(n_features).reshape(1, n_features),
360            )
361
362            return (self.obj.predict(self.cook_test_set(new_X, **kwargs), **kwargs))[0]
363
364        return self.obj.predict(self.cook_test_set(X, **kwargs), **kwargs)
365
366    def predict_proba(self, X, **kwargs):
367        """Predict probabilities for test data X.
368
369        Args:
370
371            X: {array-like}, shape = [n_samples, n_features]
372                Training vectors, where n_samples is the number
373                of samples and n_features is the number of features.
374
375            **kwargs: additional parameters to be passed to
376                    self.cook_test_set
377
378        Returns:
379
380            probability estimates for test data: {array-like}
381        """
382
383        if len(X.shape) == 1:
384            n_features = X.shape[0]
385            new_X = mo.rbind(
386                X.reshape(1, n_features),
387                np.ones(n_features).reshape(1, n_features),
388            )
389            return (
390                self.obj.predict_proba(self.cook_test_set(new_X, **kwargs), **kwargs)
391            )[0]
392        return self.obj.predict_proba(self.cook_test_set(X, **kwargs), **kwargs)
393
394    def decision_function(self, X, **kwargs):
395        """Compute the decision function of X.
396
397        Parameters:
398            X: {array-like}, shape = [n_samples, n_features]
399                Samples to compute decision function for.
400
401            **kwargs: additional parameters to be passed to
402                    self.cook_test_set
403
404        Returns:
405            array-like of shape (n_samples,) or (n_samples, n_classes)
406            Decision function of the input samples. The order of outputs is the same
407            as that of the classes passed to fit.
408        """
409        if not hasattr(self.obj, "decision_function"):
410            # If base classifier doesn't have decision_function, use predict_proba
411            proba = self.predict_proba(X, **kwargs)
412            if proba.shape[1] == 2:
413                return proba[:, 1]  # For binary classification
414            return proba  # For multiclass
415
416        if len(X.shape) == 1:
417            n_features = X.shape[0]
418            new_X = mo.rbind(
419                X.reshape(1, n_features),
420                np.ones(n_features).reshape(1, n_features),
421            )
422
423            return (
424                self.obj.decision_function(
425                    self.cook_test_set(new_X, **kwargs), **kwargs
426                )
427            )[0]
428
429        return self.obj.decision_function(self.cook_test_set(X, **kwargs), **kwargs)
430
431    def score(self, X, y, scoring=None):
432        """Scoring function for classification.
433
434        Args:
435
436            X: {array-like}, shape = [n_samples, n_features]
437                Training vectors, where n_samples is the number
438                of samples and n_features is the number of features.
439
440            y: array-like, shape = [n_samples]
441                Target values.
442
443            scoring: str
444                scoring method (default is accuracy)
445
446        Returns:
447
448            score: float
449        """
450
451        if scoring is None:
452            scoring = "accuracy"
453
454        if scoring == "accuracy":
455            return skm2.accuracy_score(y, self.predict(X))
456
457        if scoring == "f1":
458            return skm2.f1_score(y, self.predict(X))
459
460        if scoring == "precision":
461            return skm2.precision_score(y, self.predict(X))
462
463        if scoring == "recall":
464            return skm2.recall_score(y, self.predict(X))
465
466        if scoring == "roc_auc":
467            return skm2.roc_auc_score(y, self.predict(X))
468
469        if scoring == "log_loss":
470            return skm2.log_loss(y, self.predict_proba(X))
471
472        if scoring == "balanced_accuracy":
473            return skm2.balanced_accuracy_score(y, self.predict(X))
474
475        if scoring == "average_precision":
476            return skm2.average_precision_score(y, self.predict(X))
477
478        if scoring == "neg_brier_score":
479            return -skm2.brier_score_loss(y, self.predict_proba(X))
480
481        if scoring == "neg_log_loss":
482            return -skm2.log_loss(y, self.predict_proba(X))

Custom Classification model

Attributes:

obj: object
    any object containing a method fit (obj.fit()) and a method predict
    (obj.predict())

n_hidden_features: int
    number of nodes in the hidden layer

activation_name: str
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'

a: float
    hyperparameter for 'prelu' or 'elu' activation function

nodes_sim: str
    type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
    'uniform'

bias: boolean
    indicates if the hidden layer contains a bias term (True) or not
    (False)

dropout: float
    regularization parameter; (random) percentage of nodes dropped out
    of the training

direct_link: boolean
    indicates if the original predictors are included (True) in model''s
    fitting or not (False)

n_clusters: int
    number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
        no clustering)

cluster_encode: bool
    defines how the variable containing clusters is treated (default is one-hot)
    if `False`, then labels are used, without one-hot encoding

type_clust: str
    type of clustering method: currently k-means ('kmeans') or Gaussian
    Mixture Model ('gmm')

type_scaling: a tuple of 3 strings
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax')

col_sample: float
    percentage of covariates randomly chosen for training

row_sample: float
    percentage of rows chosen for training, by stratified bootstrapping

cv_calibration: int, cross-validation generator, or iterable, default=2
    Determines the cross-validation splitting strategy. Same as 
    `sklearn.calibration.CalibratedClassifierCV`

calibration_method: str
    {‘sigmoid’, ‘isotonic’}, default=’sigmoid’
    The method to use for calibration. Same as 
    `sklearn.calibration.CalibratedClassifierCV`

seed: int
    reproducibility seed for nodes_sim=='uniform'

backend: str
    "cpu" or "gpu" or "tpu"

Examples:

Note: it's better to use the DeepClassifier or LazyDeepClassifier classes directly

import nnetsauce as ns
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_digits
from time import time

digits = load_digits()
X = digits.data
y = digits.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
                                                    random_state=123)

# layer 1 (base layer) ----
layer1_regr = RandomForestClassifier(n_estimators=10, random_state=123)

start = time()

layer1_regr.fit(X_train, y_train)

# Accuracy in layer 1
print(layer1_regr.score(X_test, y_test))

# layer 2 using layer 1 ----
layer2_regr = ns.CustomClassifier(obj = layer1_regr, n_hidden_features=5,
                        direct_link=True, bias=True,
                        nodes_sim='uniform', activation_name='relu',
                        n_clusters=2, seed=123)
layer2_regr.fit(X_train, y_train)

# Accuracy in layer 2
print(layer2_regr.score(X_test, y_test))

# layer 3 using layer 2 ----
layer3_regr = ns.CustomClassifier(obj = layer2_regr, n_hidden_features=10,
                        direct_link=True, bias=True, dropout=0.7,
                        nodes_sim='uniform', activation_name='relu',
                        n_clusters=2, seed=123)
layer3_regr.fit(X_train, y_train)

# Accuracy in layer 3
print(layer3_regr.score(X_test, y_test))

print(f"Elapsed {time() - start}")
def fit(self, X, y, sample_weight=None, **kwargs):
216    def fit(self, X, y, sample_weight=None, **kwargs):
217        """Fit custom model to training data (X, y).
218
219        Parameters:
220
221            X: {array-like}, shape = [n_samples, n_features]
222                Training vectors, where n_samples is the number
223                of samples and n_features is the number of features.
224
225            y: array-like, shape = [n_samples]
226                Target values.
227
228            sample_weight: array-like, shape = [n_samples]
229                Sample weights.
230
231            **kwargs: additional parameters to be passed to
232                        self.cook_training_set or self.obj.fit
233
234        Returns:
235
236            self: object
237        """
238
239        if len(X.shape) == 1:
240            if isinstance(X, pd.DataFrame):
241                X = pd.DataFrame(X.values.reshape(1, -1), columns=X.columns)
242            else:
243                X = X.reshape(1, -1)
244
245        output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
246        self.classes_ = np.unique(y)
247        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
248
249        # Wrap in CalibratedClassifierCV if needed
250        if self.cv_calibration is not None:
251            self.obj = CalibratedClassifierCV(
252                self.obj, 
253                cv=self.cv_calibration,
254                method=self.calibration_method
255            )
256
257        # if sample_weights, else: (must use self.row_index)
258        if sample_weight is not None:
259            self.obj.fit(
260                scaled_Z,
261                output_y,
262                sample_weight=sample_weight[self.index_row_].ravel(),
263                **kwargs
264            )
265            return self
266
267        # if sample_weight is None:
268        self.obj.fit(scaled_Z, output_y, **kwargs)
269        self.classes_ = np.unique(y)  # for compatibility with sklearn
270        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
271
272        if hasattr(self.obj, "coef_"):
273            self.coef_ = self.obj.coef_
274
275        if hasattr(self.obj, "intercept_"):
276            self.intercept_ = self.obj.intercept_
277
278        return self

Fit custom model to training data (X, y).

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

sample_weight: array-like, shape = [n_samples]
    Sample weights.

**kwargs: additional parameters to be passed to
            self.cook_training_set or self.obj.fit

Returns:

self: object
def predict(self, X, **kwargs):
338    def predict(self, X, **kwargs):
339        """Predict test data X.
340
341        Parameters:
342
343            X: {array-like}, shape = [n_samples, n_features]
344                Training vectors, where n_samples is the number
345                of samples and n_features is the number of features.
346
347            **kwargs: additional parameters to be passed to
348                    self.cook_test_set
349
350        Returns:
351
352            model predictions: {array-like}
353        """
354
355        if len(X.shape) == 1:
356            n_features = X.shape[0]
357            new_X = mo.rbind(
358                X.reshape(1, n_features),
359                np.ones(n_features).reshape(1, n_features),
360            )
361
362            return (self.obj.predict(self.cook_test_set(new_X, **kwargs), **kwargs))[0]
363
364        return self.obj.predict(self.cook_test_set(X, **kwargs), **kwargs)

Predict test data X.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

model predictions: {array-like}
def predict_proba(self, X, **kwargs):
366    def predict_proba(self, X, **kwargs):
367        """Predict probabilities for test data X.
368
369        Args:
370
371            X: {array-like}, shape = [n_samples, n_features]
372                Training vectors, where n_samples is the number
373                of samples and n_features is the number of features.
374
375            **kwargs: additional parameters to be passed to
376                    self.cook_test_set
377
378        Returns:
379
380            probability estimates for test data: {array-like}
381        """
382
383        if len(X.shape) == 1:
384            n_features = X.shape[0]
385            new_X = mo.rbind(
386                X.reshape(1, n_features),
387                np.ones(n_features).reshape(1, n_features),
388            )
389            return (
390                self.obj.predict_proba(self.cook_test_set(new_X, **kwargs), **kwargs)
391            )[0]
392        return self.obj.predict_proba(self.cook_test_set(X, **kwargs), **kwargs)

Predict probabilities for test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

probability estimates for test data: {array-like}
def score(self, X, y, scoring=None):
431    def score(self, X, y, scoring=None):
432        """Scoring function for classification.
433
434        Args:
435
436            X: {array-like}, shape = [n_samples, n_features]
437                Training vectors, where n_samples is the number
438                of samples and n_features is the number of features.
439
440            y: array-like, shape = [n_samples]
441                Target values.
442
443            scoring: str
444                scoring method (default is accuracy)
445
446        Returns:
447
448            score: float
449        """
450
451        if scoring is None:
452            scoring = "accuracy"
453
454        if scoring == "accuracy":
455            return skm2.accuracy_score(y, self.predict(X))
456
457        if scoring == "f1":
458            return skm2.f1_score(y, self.predict(X))
459
460        if scoring == "precision":
461            return skm2.precision_score(y, self.predict(X))
462
463        if scoring == "recall":
464            return skm2.recall_score(y, self.predict(X))
465
466        if scoring == "roc_auc":
467            return skm2.roc_auc_score(y, self.predict(X))
468
469        if scoring == "log_loss":
470            return skm2.log_loss(y, self.predict_proba(X))
471
472        if scoring == "balanced_accuracy":
473            return skm2.balanced_accuracy_score(y, self.predict(X))
474
475        if scoring == "average_precision":
476            return skm2.average_precision_score(y, self.predict(X))
477
478        if scoring == "neg_brier_score":
479            return -skm2.brier_score_loss(y, self.predict_proba(X))
480
481        if scoring == "neg_log_loss":
482            return -skm2.log_loss(y, self.predict_proba(X))

Scoring function for classification.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

scoring: str
    scoring method (default is accuracy)

Returns:

score: float
class CustomRegressor(nnetsauce.custom.custom.Custom, sklearn.base.RegressorMixin):
 18class CustomRegressor(Custom, RegressorMixin):
 19    """Custom Regression model
 20
 21    This class is used to 'augment' any regression model with transformed features.
 22
 23    Parameters:
 24
 25        obj: object
 26            any object containing a method fit (obj.fit()) and a method predict
 27            (obj.predict())
 28
 29        n_hidden_features: int
 30            number of nodes in the hidden layer
 31
 32        activation_name: str
 33            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
 34
 35        a: float
 36            hyperparameter for 'prelu' or 'elu' activation function
 37
 38        nodes_sim: str
 39            type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
 40            'uniform'
 41
 42        bias: boolean
 43            indicates if the hidden layer contains a bias term (True) or not
 44            (False)
 45
 46        dropout: float
 47            regularization parameter; (random) percentage of nodes dropped out
 48            of the training
 49
 50        direct_link: boolean
 51            indicates if the original predictors are included (True) in model's
 52            fitting or not (False)
 53
 54        n_clusters: int
 55            number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
 56                no clustering)
 57
 58        cluster_encode: bool
 59            defines how the variable containing clusters is treated (default is one-hot)
 60            if `False`, then labels are used, without one-hot encoding
 61
 62        type_clust: str
 63            type of clustering method: currently k-means ('kmeans') or Gaussian
 64            Mixture Model ('gmm')
 65
 66        type_scaling: a tuple of 3 strings
 67            scaling methods for inputs, hidden layer, and clustering respectively
 68            (and when relevant).
 69            Currently available: standardization ('std') or MinMax scaling ('minmax')
 70
 71        type_pi: str.
 72            type of prediction interval; currently `None` (split or local
 73            conformal without simulation), "kde" or "bootstrap" (simulated split
 74            conformal).
 75
 76        replications: int.
 77            number of replications (if needed) for predictive simulation.
 78            Used only in `self.predict`, for `self.kernel` in ('gaussian',
 79            'tophat') and `self.type_pi = 'kde'`. Default is `None`.
 80
 81        kernel: str.
 82            the kernel to use for kernel density estimation (used for predictive
 83            simulation in `self.predict`, with `method='splitconformal'` and
 84            `type_pi = 'kde'`). Currently, either 'gaussian' or 'tophat'.
 85
 86        type_split: str.
 87            Type of splitting for conformal prediction. None (default), or
 88            "random" (random split of data) or "sequential" (sequential split of data)
 89
 90        col_sample: float
 91            percentage of covariates randomly chosen for training
 92
 93        row_sample: float
 94            percentage of rows chosen for training, by stratified bootstrapping
 95
 96        level: float
 97            confidence level for prediction intervals
 98
 99        pi_method: str
100            method for prediction intervals: 'splitconformal' or 'localconformal'
101
102        seed: int
103            reproducibility seed for nodes_sim=='uniform'
104
105        type_fit: str
106            'regression'
107
108        backend: str
109            "cpu" or "gpu" or "tpu"
110
111    Examples:
112
113    See [https://thierrymoudiki.github.io/blog/2024/03/18/python/conformal-and-bayesian-regression](https://thierrymoudiki.github.io/blog/2024/03/18/python/conformal-and-bayesian-regression)
114
115    """
116
117    # construct the object -----
118
119    def __init__(
120        self,
121        obj,
122        n_hidden_features=5,
123        activation_name="relu",
124        a=0.01,
125        nodes_sim="sobol",
126        bias=True,
127        dropout=0,
128        direct_link=True,
129        n_clusters=2,
130        cluster_encode=True,
131        type_clust="kmeans",
132        type_scaling=("std", "std", "std"),
133        type_pi=None,
134        replications=None,
135        kernel=None,
136        type_split=None,
137        col_sample=1,
138        row_sample=1,
139        level=None,
140        pi_method=None,
141        seed=123,
142        backend="cpu",
143    ):
144        super().__init__(
145            obj=obj,
146            n_hidden_features=n_hidden_features,
147            activation_name=activation_name,
148            a=a,
149            nodes_sim=nodes_sim,
150            bias=bias,
151            dropout=dropout,
152            direct_link=direct_link,
153            n_clusters=n_clusters,
154            cluster_encode=cluster_encode,
155            type_clust=type_clust,
156            type_scaling=type_scaling,
157            col_sample=col_sample,
158            row_sample=row_sample,
159            seed=seed,
160            backend=backend,
161        )
162
163        self.type_fit = "regression"
164        self.type_pi = type_pi
165        self.replications = replications
166        self.kernel = kernel
167        self.type_split = type_split
168        self.level = level
169        self.pi_method = pi_method
170        self.coef_ = None
171        self.intercept_ = None
172        self.X_ = None
173        self.y_ = None
174        self.aic_ = None 
175        self.aicc_ = None
176        self.bic_ = None
177
178    def fit(self, X, y, sample_weight=None, **kwargs):
179        """Fit custom model to training data (X, y).
180
181        Parameters:
182
183            X: {array-like}, shape = [n_samples, n_features]
184                Training vectors, where n_samples is the number
185                of samples and n_features is the number of features.
186
187            y: array-like, shape = [n_samples]
188                Target values.
189
190            sample_weight: array-like, shape = [n_samples]
191                Sample weights.
192
193            **kwargs: additional parameters to be passed to
194                self.cook_training_set or self.obj.fit
195
196        Returns:
197
198            self: object
199
200        """
201
202        centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
203
204        if self.level is not None:
205            self.obj = PredictionInterval(
206                obj=self.obj, method=self.pi_method, level=self.level
207            )
208
209        # if sample_weights, else: (must use self.row_index)
210        if sample_weight is not None:
211            self.obj.fit(
212                scaled_Z,
213                centered_y,
214                sample_weight=sample_weight[self.index_row_].ravel(),
215                **kwargs
216            )
217
218            return self
219
220        self.obj.fit(scaled_Z, centered_y, **kwargs)
221
222        self.X_ = X
223
224        self.y_ = y
225
226        # Compute SSE
227        y_pred = self.predict(X)
228        self.sse_ = np.sum((y - y_pred) ** 2)
229        
230        # Get number of parameters
231        n_params = self.n_hidden_features + X.shape[1]  # hidden features + original features
232        if self.n_clusters > 0:
233            n_params += self.n_clusters  # add clusters if used
234            
235        # Compute information criteria
236        n_samples = X.shape[0]
237        temp = n_samples * np.log(self.sse_/n_samples)
238        self.aic_ = temp + 2 * n_params
239        self.aicc_ = self.aic_ + (2 * n_params * (n_params + 1))/(n_samples - n_params - 1)
240        self.bic_ = temp + n_params * np.log(n_samples)
241
242        if hasattr(self.obj, "coef_"):
243            self.coef_ = self.obj.coef_
244
245        if hasattr(self.obj, "intercept_"):
246            self.intercept_ = self.obj.intercept_
247
248        return self
249
250    def partial_fit(self, X, y, **kwargs):
251        """Partial fit custom model to training data (X, y).
252
253        Parameters:
254
255            X: {array-like}, shape = [n_samples, n_features]
256                Subset of training vectors, where n_samples is the number
257                of samples and n_features is the number of features.
258
259            y: array-like, shape = [n_samples]
260                Subset of target values.
261
262            **kwargs: additional parameters to be passed to
263                self.cook_training_set or self.obj.fit
264
265        Returns:
266
267            self: object
268
269        """
270
271        if len(X.shape) == 1:
272            if isinstance(X, pd.DataFrame):
273                X = pd.DataFrame(X.values.reshape(1, -1), columns=X.columns)
274            else:
275                X = X.reshape(1, -1)
276            y = np.array([y])
277
278        centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
279
280        self.obj.partial_fit(scaled_Z, centered_y, **kwargs)
281
282        self.X_ = X
283
284        self.y_ = y
285
286        return self
287
288    def predict(self, X, level=95, method=None, **kwargs):
289        """Predict test data X.
290
291        Parameters:
292
293            X: {array-like}, shape = [n_samples, n_features]
294                Training vectors, where n_samples is the number
295                of samples and n_features is the number of features.
296
297            level: int
298                Level of confidence (default = 95)
299
300            method: str
301                `None`, or 'splitconformal', 'localconformal'
302                prediction (if you specify `return_pi = True`)
303
304            **kwargs: additional parameters
305                    `return_pi = True` for conformal prediction,
306                    with `method` in ('splitconformal', 'localconformal')
307                    or `return_std = True` for `self.obj` in
308                    (`sklearn.linear_model.BayesianRidge`,
309                    `sklearn.linear_model.ARDRegressor`,
310                    `sklearn.gaussian_process.GaussianProcessRegressor`)`
311
312        Returns:
313
314            model predictions:
315                an array if uncertainty quantification is not requested,
316                  or a tuple if with prediction intervals and simulations
317                  if `return_std = True` (mean, standard deviation,
318                  lower and upper prediction interval) or `return_pi = True`
319                  ()
320
321        """
322
323        if "return_std" in kwargs:
324
325            alpha = 100 - level
326            pi_multiplier = norm.ppf(1 - alpha / 200)
327
328            if len(X.shape) == 1:
329
330                n_features = X.shape[0]
331                new_X = mo.rbind(
332                    X.reshape(1, n_features),
333                    np.ones(n_features).reshape(1, n_features),
334                )
335
336                mean_, std_ = self.obj.predict(
337                    self.cook_test_set(new_X, **kwargs), return_std=True
338                )[0]
339
340                preds = self.y_mean_ + mean_
341                lower = self.y_mean_ + (mean_ - pi_multiplier * std_)
342                upper = self.y_mean_ + (mean_ + pi_multiplier * std_)
343
344                DescribeResults = namedtuple(
345                    "DescribeResults", ["mean", "std", "lower", "upper"]
346                )
347
348                return DescribeResults(preds, std_, lower, upper)
349
350            # len(X.shape) > 1
351            mean_, std_ = self.obj.predict(
352                self.cook_test_set(X, **kwargs), return_std=True
353            )
354
355            preds = self.y_mean_ + mean_
356            lower = self.y_mean_ + (mean_ - pi_multiplier * std_)
357            upper = self.y_mean_ + (mean_ + pi_multiplier * std_)
358
359            DescribeResults = namedtuple(
360                "DescribeResults", ["mean", "std", "lower", "upper"]
361            )
362
363            return DescribeResults(preds, std_, lower, upper)
364
365        if "return_pi" in kwargs:
366            assert method in (
367                "splitconformal",
368                "localconformal",
369            ), "method must be in ('splitconformal', 'localconformal')"
370            self.pi = PredictionInterval(
371                obj=self,
372                method=method,
373                level=level,
374                type_pi=self.type_pi,
375                replications=self.replications,
376                kernel=self.kernel,
377            )
378
379            if len(self.X_.shape) == 1:
380                if isinstance(X, pd.DataFrame):
381                    self.X_ = pd.DataFrame(
382                        self.X_.values.reshape(1, -1), columns=self.X_.columns
383                    )
384                else:
385                    self.X_ = self.X_.reshape(1, -1)
386                self.y_ = np.array([self.y_])
387
388            self.pi.fit(self.X_, self.y_)
389            # self.X_ = None # consumes memory to keep, dangerous to delete (side effect)
390            # self.y_ = None # consumes memory to keep, dangerous to delete (side effect)
391            preds = self.pi.predict(X, return_pi=True)
392            return preds
393
394        # "return_std" not in kwargs
395        if len(X.shape) == 1:
396
397            n_features = X.shape[0]
398            new_X = mo.rbind(
399                X.reshape(1, n_features),
400                np.ones(n_features).reshape(1, n_features),
401            )
402
403            return (
404                self.y_mean_
405                + self.obj.predict(self.cook_test_set(new_X, **kwargs), **kwargs)
406            )[0]
407
408        # len(X.shape) > 1
409        return self.y_mean_ + self.obj.predict(
410            self.cook_test_set(X, **kwargs), **kwargs
411        )
412
413    def score(self, X, y, scoring=None):
414        """Compute the score of the model.
415
416        Parameters:
417
418            X: {array-like}, shape = [n_samples, n_features]
419                Training vectors, where n_samples is the number
420                of samples and n_features is the number of features.
421
422            y: array-like, shape = [n_samples]
423                Target values.
424
425            scoring: str
426                scoring method
427
428        Returns:
429
430            score: float
431
432        """
433
434        if scoring is None:
435            return np.sqrt(np.mean((self.predict(X) - y) ** 2))
436
437        return skm2.get_scorer(scoring)(self, X, y)

Custom Regression model

This class is used to 'augment' any regression model with transformed features.

Parameters:

obj: object
    any object containing a method fit (obj.fit()) and a method predict
    (obj.predict())

n_hidden_features: int
    number of nodes in the hidden layer

activation_name: str
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'

a: float
    hyperparameter for 'prelu' or 'elu' activation function

nodes_sim: str
    type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
    'uniform'

bias: boolean
    indicates if the hidden layer contains a bias term (True) or not
    (False)

dropout: float
    regularization parameter; (random) percentage of nodes dropped out
    of the training

direct_link: boolean
    indicates if the original predictors are included (True) in model's
    fitting or not (False)

n_clusters: int
    number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
        no clustering)

cluster_encode: bool
    defines how the variable containing clusters is treated (default is one-hot)
    if `False`, then labels are used, without one-hot encoding

type_clust: str
    type of clustering method: currently k-means ('kmeans') or Gaussian
    Mixture Model ('gmm')

type_scaling: a tuple of 3 strings
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax')

type_pi: str.
    type of prediction interval; currently `None` (split or local
    conformal without simulation), "kde" or "bootstrap" (simulated split
    conformal).

replications: int.
    number of replications (if needed) for predictive simulation.
    Used only in `self.predict`, for `self.kernel` in ('gaussian',
    'tophat') and `self.type_pi = 'kde'`. Default is `None`.

kernel: str.
    the kernel to use for kernel density estimation (used for predictive
    simulation in `self.predict`, with `method='splitconformal'` and
    `type_pi = 'kde'`). Currently, either 'gaussian' or 'tophat'.

type_split: str.
    Type of splitting for conformal prediction. None (default), or
    "random" (random split of data) or "sequential" (sequential split of data)

col_sample: float
    percentage of covariates randomly chosen for training

row_sample: float
    percentage of rows chosen for training, by stratified bootstrapping

level: float
    confidence level for prediction intervals

pi_method: str
    method for prediction intervals: 'splitconformal' or 'localconformal'

seed: int
    reproducibility seed for nodes_sim=='uniform'

type_fit: str
    'regression'

backend: str
    "cpu" or "gpu" or "tpu"

Examples:

See https://thierrymoudiki.github.io/blog/2024/03/18/python/conformal-and-bayesian-regression

def fit(self, X, y, sample_weight=None, **kwargs):
178    def fit(self, X, y, sample_weight=None, **kwargs):
179        """Fit custom model to training data (X, y).
180
181        Parameters:
182
183            X: {array-like}, shape = [n_samples, n_features]
184                Training vectors, where n_samples is the number
185                of samples and n_features is the number of features.
186
187            y: array-like, shape = [n_samples]
188                Target values.
189
190            sample_weight: array-like, shape = [n_samples]
191                Sample weights.
192
193            **kwargs: additional parameters to be passed to
194                self.cook_training_set or self.obj.fit
195
196        Returns:
197
198            self: object
199
200        """
201
202        centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
203
204        if self.level is not None:
205            self.obj = PredictionInterval(
206                obj=self.obj, method=self.pi_method, level=self.level
207            )
208
209        # if sample_weights, else: (must use self.row_index)
210        if sample_weight is not None:
211            self.obj.fit(
212                scaled_Z,
213                centered_y,
214                sample_weight=sample_weight[self.index_row_].ravel(),
215                **kwargs
216            )
217
218            return self
219
220        self.obj.fit(scaled_Z, centered_y, **kwargs)
221
222        self.X_ = X
223
224        self.y_ = y
225
226        # Compute SSE
227        y_pred = self.predict(X)
228        self.sse_ = np.sum((y - y_pred) ** 2)
229        
230        # Get number of parameters
231        n_params = self.n_hidden_features + X.shape[1]  # hidden features + original features
232        if self.n_clusters > 0:
233            n_params += self.n_clusters  # add clusters if used
234            
235        # Compute information criteria
236        n_samples = X.shape[0]
237        temp = n_samples * np.log(self.sse_/n_samples)
238        self.aic_ = temp + 2 * n_params
239        self.aicc_ = self.aic_ + (2 * n_params * (n_params + 1))/(n_samples - n_params - 1)
240        self.bic_ = temp + n_params * np.log(n_samples)
241
242        if hasattr(self.obj, "coef_"):
243            self.coef_ = self.obj.coef_
244
245        if hasattr(self.obj, "intercept_"):
246            self.intercept_ = self.obj.intercept_
247
248        return self

Fit custom model to training data (X, y).

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

sample_weight: array-like, shape = [n_samples]
    Sample weights.

**kwargs: additional parameters to be passed to
    self.cook_training_set or self.obj.fit

Returns:

self: object
def predict(self, X, level=95, method=None, **kwargs):
288    def predict(self, X, level=95, method=None, **kwargs):
289        """Predict test data X.
290
291        Parameters:
292
293            X: {array-like}, shape = [n_samples, n_features]
294                Training vectors, where n_samples is the number
295                of samples and n_features is the number of features.
296
297            level: int
298                Level of confidence (default = 95)
299
300            method: str
301                `None`, or 'splitconformal', 'localconformal'
302                prediction (if you specify `return_pi = True`)
303
304            **kwargs: additional parameters
305                    `return_pi = True` for conformal prediction,
306                    with `method` in ('splitconformal', 'localconformal')
307                    or `return_std = True` for `self.obj` in
308                    (`sklearn.linear_model.BayesianRidge`,
309                    `sklearn.linear_model.ARDRegressor`,
310                    `sklearn.gaussian_process.GaussianProcessRegressor`)`
311
312        Returns:
313
314            model predictions:
315                an array if uncertainty quantification is not requested,
316                  or a tuple if with prediction intervals and simulations
317                  if `return_std = True` (mean, standard deviation,
318                  lower and upper prediction interval) or `return_pi = True`
319                  ()
320
321        """
322
323        if "return_std" in kwargs:
324
325            alpha = 100 - level
326            pi_multiplier = norm.ppf(1 - alpha / 200)
327
328            if len(X.shape) == 1:
329
330                n_features = X.shape[0]
331                new_X = mo.rbind(
332                    X.reshape(1, n_features),
333                    np.ones(n_features).reshape(1, n_features),
334                )
335
336                mean_, std_ = self.obj.predict(
337                    self.cook_test_set(new_X, **kwargs), return_std=True
338                )[0]
339
340                preds = self.y_mean_ + mean_
341                lower = self.y_mean_ + (mean_ - pi_multiplier * std_)
342                upper = self.y_mean_ + (mean_ + pi_multiplier * std_)
343
344                DescribeResults = namedtuple(
345                    "DescribeResults", ["mean", "std", "lower", "upper"]
346                )
347
348                return DescribeResults(preds, std_, lower, upper)
349
350            # len(X.shape) > 1
351            mean_, std_ = self.obj.predict(
352                self.cook_test_set(X, **kwargs), return_std=True
353            )
354
355            preds = self.y_mean_ + mean_
356            lower = self.y_mean_ + (mean_ - pi_multiplier * std_)
357            upper = self.y_mean_ + (mean_ + pi_multiplier * std_)
358
359            DescribeResults = namedtuple(
360                "DescribeResults", ["mean", "std", "lower", "upper"]
361            )
362
363            return DescribeResults(preds, std_, lower, upper)
364
365        if "return_pi" in kwargs:
366            assert method in (
367                "splitconformal",
368                "localconformal",
369            ), "method must be in ('splitconformal', 'localconformal')"
370            self.pi = PredictionInterval(
371                obj=self,
372                method=method,
373                level=level,
374                type_pi=self.type_pi,
375                replications=self.replications,
376                kernel=self.kernel,
377            )
378
379            if len(self.X_.shape) == 1:
380                if isinstance(X, pd.DataFrame):
381                    self.X_ = pd.DataFrame(
382                        self.X_.values.reshape(1, -1), columns=self.X_.columns
383                    )
384                else:
385                    self.X_ = self.X_.reshape(1, -1)
386                self.y_ = np.array([self.y_])
387
388            self.pi.fit(self.X_, self.y_)
389            # self.X_ = None # consumes memory to keep, dangerous to delete (side effect)
390            # self.y_ = None # consumes memory to keep, dangerous to delete (side effect)
391            preds = self.pi.predict(X, return_pi=True)
392            return preds
393
394        # "return_std" not in kwargs
395        if len(X.shape) == 1:
396
397            n_features = X.shape[0]
398            new_X = mo.rbind(
399                X.reshape(1, n_features),
400                np.ones(n_features).reshape(1, n_features),
401            )
402
403            return (
404                self.y_mean_
405                + self.obj.predict(self.cook_test_set(new_X, **kwargs), **kwargs)
406            )[0]
407
408        # len(X.shape) > 1
409        return self.y_mean_ + self.obj.predict(
410            self.cook_test_set(X, **kwargs), **kwargs
411        )

Predict test data X.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

level: int
    Level of confidence (default = 95)

method: str
    `None`, or 'splitconformal', 'localconformal'
    prediction (if you specify `return_pi = True`)

**kwargs: additional parameters
        `return_pi = True` for conformal prediction,
        with `method` in ('splitconformal', 'localconformal')
        or `return_std = True` for `self.obj` in
        (`sklearn.linear_model.BayesianRidge`,
        `sklearn.linear_model.ARDRegressor`,
        `sklearn.gaussian_process.GaussianProcessRegressor`)`

Returns:

model predictions:
    an array if uncertainty quantification is not requested,
      or a tuple if with prediction intervals and simulations
      if `return_std = True` (mean, standard deviation,
      lower and upper prediction interval) or `return_pi = True`
      ()
def score(self, X, y, scoring=None):
413    def score(self, X, y, scoring=None):
414        """Compute the score of the model.
415
416        Parameters:
417
418            X: {array-like}, shape = [n_samples, n_features]
419                Training vectors, where n_samples is the number
420                of samples and n_features is the number of features.
421
422            y: array-like, shape = [n_samples]
423                Target values.
424
425            scoring: str
426                scoring method
427
428        Returns:
429
430            score: float
431
432        """
433
434        if scoring is None:
435            return np.sqrt(np.mean((self.predict(X) - y) ** 2))
436
437        return skm2.get_scorer(scoring)(self, X, y)

Compute the score of the model.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

scoring: str
    scoring method

Returns:

score: float
class DeepClassifier(nnetsauce.CustomClassifier, sklearn.base.ClassifierMixin):
 35class DeepClassifier(CustomClassifier, ClassifierMixin):
 36    """
 37    Deep Classifier
 38
 39    Parameters:
 40
 41        obj: an object
 42            A base learner, see also https://www.researchgate.net/publication/380701207_Deep_Quasi-Randomized_neural_Networks_for_classification
 43
 44        n_layers: int (default=3)
 45            Number of layers. `n_layers = 1` is a simple `CustomClassifier`
 46
 47        verbose : int, optional (default=0)
 48            Monitor progress when fitting.
 49
 50        All the other parameters are nnetsauce `CustomClassifier`'s
 51
 52    Examples:
 53
 54        ```python
 55        import nnetsauce as ns
 56        from sklearn.datasets import load_breast_cancer
 57        from sklearn.model_selection import train_test_split
 58        from sklearn.linear_model import LogisticRegressionCV
 59        data = load_breast_cancer()
 60        X = data.data
 61        y= data.target
 62        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=123)
 63        obj = LogisticRegressionCV()
 64        clf = ns.DeepClassifier(obj)
 65        clf.fit(X_train, y_train)
 66        print(clf.score(clf.predict(X_test), y_test))
 67        ```
 68    """
 69
 70    def __init__(
 71        self,
 72        obj,
 73        # Defining depth
 74        n_layers=3,
 75        verbose=0,
 76        # CustomClassifier attributes
 77        n_hidden_features=5,
 78        activation_name="relu",
 79        a=0.01,
 80        nodes_sim="sobol",
 81        bias=True,
 82        dropout=0,
 83        direct_link=True,
 84        n_clusters=2,
 85        cluster_encode=True,
 86        type_clust="kmeans",
 87        type_scaling=("std", "std", "std"),
 88        col_sample=1,
 89        row_sample=1,
 90        cv_calibration=2,
 91        calibration_method="sigmoid",
 92        seed=123,
 93        backend="cpu",
 94    ):
 95        super().__init__(
 96            obj=obj,
 97            n_hidden_features=n_hidden_features,
 98            activation_name=activation_name,
 99            a=a,
100            nodes_sim=nodes_sim,
101            bias=bias,
102            dropout=dropout,
103            direct_link=direct_link,
104            n_clusters=n_clusters,
105            cluster_encode=cluster_encode,
106            type_clust=type_clust,
107            type_scaling=type_scaling,
108            col_sample=col_sample,
109            row_sample=row_sample,
110            seed=seed,
111            backend=backend,
112        )
113        self.coef_ = None
114        self.intercept_ = None
115        self.type_fit = "classification"
116        self.cv_calibration = cv_calibration
117        self.calibration_method = calibration_method
118        
119        # Only wrap in CalibratedClassifierCV if not already wrapped
120        # if not isinstance(obj, CalibratedClassifierCV):
121        #     self.obj = CalibratedClassifierCV(
122        #         self.obj, 
123        #         cv=self.cv_calibration,
124        #         method=self.calibration_method
125        #     )
126        # else:
127        self.coef_ = None
128        self.intercept_ = None
129        self.type_fit = "classification"
130        self.cv_calibration = cv_calibration
131        self.calibration_method = calibration_method
132        self.obj = obj
133        self._estimator_type = "classifier"  # Add this line to explicitly mark as classifier
134
135        assert n_layers >= 1, "must have n_layers >= 1"
136        self.stacked_obj = obj
137        self.verbose = verbose
138        self.n_layers = n_layers
139        self.classes_ = None
140        self.n_classes_ = None
141
142    def fit(self, X, y, **kwargs):
143        """Fit Classification algorithms to X and y.
144        Parameters
145        ----------
146        X : array-like,
147            Training vectors, where rows is the number of samples
148            and columns is the number of features.
149        y : array-like,
150            Training vectors, where rows is the number of samples
151            and columns is the number of features.
152        **kwargs: dict
153            Additional parameters to be passed to the fit method
154            of the base learner. For example, `sample_weight`.
155
156        Returns
157        -------
158        A fitted object
159        """
160
161        self.classes_ = np.unique(y)
162        self.n_classes_ = len(
163            self.classes_
164        )  # for compatibility with         scikit-learn
165
166        if isinstance(X, np.ndarray):
167            X = pd.DataFrame(X)
168
169        # init layer
170        self.stacked_obj = CustomClassifier(
171            obj=self.stacked_obj,
172            n_hidden_features=self.n_hidden_features,
173            activation_name=self.activation_name,
174            a=self.a,
175            nodes_sim=self.nodes_sim,
176            bias=self.bias,
177            dropout=self.dropout,
178            direct_link=self.direct_link,
179            n_clusters=self.n_clusters,
180            cluster_encode=self.cluster_encode,
181            type_clust=self.type_clust,
182            type_scaling=self.type_scaling,
183            col_sample=self.col_sample,
184            row_sample=self.row_sample,
185            cv_calibration=None,
186            calibration_method=None,
187            seed=self.seed,
188            backend=self.backend,
189        )
190
191        if self.verbose > 0:
192            iterator = tqdm(range(self.n_layers - 1))
193        else:
194            iterator = range(self.n_layers - 1)
195
196        for _ in iterator:
197            self.stacked_obj = deepcopy(
198                CustomClassifier(
199                    obj=self.stacked_obj,
200                    n_hidden_features=self.n_hidden_features,
201                    activation_name=self.activation_name,
202                    a=self.a,
203                    nodes_sim=self.nodes_sim,
204                    bias=self.bias,
205                    dropout=self.dropout,
206                    direct_link=self.direct_link,
207                    n_clusters=self.n_clusters,
208                    cluster_encode=self.cluster_encode,
209                    type_clust=self.type_clust,
210                    type_scaling=self.type_scaling,
211                    col_sample=self.col_sample,
212                    row_sample=self.row_sample,
213                    cv_calibration=None,
214                    calibration_method=None,
215                    seed=self.seed,
216                    backend=self.backend,
217                )
218            )
219            self.stacked_obj.fit(X, y, **kwargs)
220        
221        return self
222
223    def partial_fit(self, X, y, **kwargs):
224        """Fit Regression algorithms to X and y.
225        Parameters
226        ----------
227        X : array-like,
228            Training vectors, where rows is the number of samples
229            and columns is the number of features.
230        y : array-like,
231            Training vectors, where rows is the number of samples
232            and columns is the number of features.
233        **kwargs: dict
234            Additional parameters to be passed to the fit method
235            of the base learner. For example, `sample_weight`.
236        Returns
237        -------
238        A fitted object
239        """
240        assert hasattr(self, "stacked_obj"), "model must be fitted first"
241        current_obj = self.stacked_obj
242        for _ in range(self.n_layers):
243            try:
244                input_X = current_obj.obj.cook_test_set(X)
245                current_obj.obj.partial_fit(input_X, y, **kwargs)
246                try:
247                    current_obj = current_obj.obj
248                except AttributeError:
249                    pass
250            except ValueError:
251                pass
252        return self
253
254    def predict(self, X):
255        return self.stacked_obj.predict(X)
256
257    def predict_proba(self, X):
258        return self.stacked_obj.predict_proba(X)
259
260    def score(self, X, y, scoring=None):
261        return self.stacked_obj.score(X, y, scoring)
262
263    def cross_val_optim(
264        self,
265        X_train,
266        y_train,
267        X_test=None,
268        y_test=None,
269        scoring="accuracy",
270        surrogate_obj=None,
271        cv=5,
272        n_jobs=None,
273        n_init=10,
274        n_iter=190,
275        abs_tol=1e-3,
276        verbose=2,
277        seed=123,
278        **kwargs,
279    ):
280        """Cross-validation function and hyperparameters' search
281
282        Parameters:
283
284            X_train: array-like,
285                Training vectors, where rows is the number of samples
286                and columns is the number of features.
287
288            y_train: array-like,
289                Training vectors, where rows is the number of samples
290                and columns is the number of features.
291
292            X_test: array-like,
293                Testing vectors, where rows is the number of samples
294                and columns is the number of features.
295
296            y_test: array-like,
297                Testing vectors, where rows is the number of samples
298                and columns is the number of features.
299
300            scoring: str
301                scoring metric; see https://scikit-learn.org/stable/modules/model_evaluation.html#the-scoring-parameter-defining-model-evaluation-rules
302
303            surrogate_obj: an object;
304                An ML model for estimating the uncertainty around the objective function
305
306            cv: int;
307                number of cross-validation folds
308
309            n_jobs: int;
310                number of jobs for parallel execution
311
312            n_init: an integer;
313                number of points in the initial setting, when `x_init` and `y_init` are not provided
314
315            n_iter: an integer;
316                number of iterations of the minimization algorithm
317
318            abs_tol: a float;
319                tolerance for convergence of the optimizer (early stopping based on acquisition function)
320
321            verbose: int
322                controls verbosity
323
324            seed: int
325                reproducibility seed
326
327            **kwargs: dict
328                additional parameters to be passed to the estimator
329
330        Examples:
331
332            ```python
333            ```
334        """
335
336        num_to_activation_name = {1: "relu", 2: "sigmoid", 3: "tanh"}
337        num_to_nodes_sim = {1: "sobol", 2: "uniform", 3: "hammersley"}
338        num_to_type_clust = {1: "kmeans", 2: "gmm"}
339
340        def deepclassifier_cv(
341            X_train,
342            y_train,
343            # Defining depth
344            n_layers=3,
345            # CustomClassifier attributes
346            n_hidden_features=5,
347            activation_name="relu",
348            nodes_sim="sobol",
349            dropout=0,
350            n_clusters=2,
351            type_clust="kmeans",
352            cv=5,
353            n_jobs=None,
354            scoring="accuracy",
355            seed=123,
356        ):
357            self.set_params(
358                **{
359                    "n_layers": n_layers,
360                    # CustomClassifier attributes
361                    "n_hidden_features": n_hidden_features,
362                    "activation_name": activation_name,
363                    "nodes_sim": nodes_sim,
364                    "dropout": dropout,
365                    "n_clusters": n_clusters,
366                    "type_clust": type_clust,
367                    **kwargs,
368                }
369            )
370            return -cross_val_score(
371                estimator=self,
372                X=X_train,
373                y=y_train,
374                scoring=scoring,
375                cv=cv,
376                n_jobs=n_jobs,
377                verbose=0,
378            ).mean()
379
380        # objective function for hyperparams tuning
381        def crossval_objective(xx):
382            return deepclassifier_cv(
383                X_train=X_train,
384                y_train=y_train,
385                # Defining depth
386                n_layers=int(np.ceil(xx[0])),
387                # CustomClassifier attributes
388                n_hidden_features=int(np.ceil(xx[1])),
389                activation_name=num_to_activation_name[np.ceil(xx[2])],
390                nodes_sim=num_to_nodes_sim[int(np.ceil(xx[3]))],
391                dropout=xx[4],
392                n_clusters=int(np.ceil(xx[5])),
393                type_clust=num_to_type_clust[int(np.ceil(xx[6]))],
394                cv=cv,
395                n_jobs=n_jobs,
396                scoring=scoring,
397                seed=seed,
398            )
399
400        if surrogate_obj is None:
401            gp_opt = gp.GPOpt(
402                objective_func=crossval_objective,
403                lower_bound=np.array([0, 3, 0, 0, 0.0, 0, 0]),
404                upper_bound=np.array([5, 100, 3, 3, 0.4, 5, 2]),
405                params_names=[
406                    "n_layers",
407                    # CustomClassifier attributes
408                    "n_hidden_features",
409                    "activation_name",
410                    "nodes_sim",
411                    "dropout",
412                    "n_clusters",
413                    "type_clust",
414                ],
415                method="bayesian",
416                n_init=n_init,
417                n_iter=n_iter,
418                seed=seed,
419            )
420        else:
421            gp_opt = gp.GPOpt(
422                objective_func=crossval_objective,
423                lower_bound=np.array([0, 3, 0, 0, 0.0, 0, 0]),
424                upper_bound=np.array([5, 100, 3, 3, 0.4, 5, 2]),
425                params_names=[
426                    "n_layers",
427                    # CustomClassifier attributes
428                    "n_hidden_features",
429                    "activation_name",
430                    "nodes_sim",
431                    "dropout",
432                    "n_clusters",
433                    "type_clust",
434                ],
435                acquisition="ucb",
436                method="splitconformal",
437                surrogate_obj=ns.PredictionInterval(
438                    obj=surrogate_obj, method="splitconformal"
439                ),
440                n_init=n_init,
441                n_iter=n_iter,
442                seed=seed,
443            )
444
445        res = gp_opt.optimize(verbose=verbose, abs_tol=abs_tol)
446        res.best_params["n_layers"] = int(np.ceil(res.best_params["n_layers"]))
447        res.best_params["n_hidden_features"] = int(
448            np.ceil(res.best_params["n_hidden_features"])
449        )
450        res.best_params["activation_name"] = num_to_activation_name[
451            np.ceil(res.best_params["activation_name"])
452        ]
453        res.best_params["nodes_sim"] = num_to_nodes_sim[
454            int(np.ceil(res.best_params["nodes_sim"]))
455        ]
456        res.best_params["dropout"] = res.best_params["dropout"]
457        res.best_params["n_clusters"] = int(np.ceil(res.best_params["n_clusters"]))
458        res.best_params["type_clust"] = num_to_type_clust[
459            int(np.ceil(res.best_params["type_clust"]))
460        ]
461
462        # out-of-sample error
463        if X_test is not None and y_test is not None:
464            self.set_params(**res.best_params, verbose=0, seed=seed)
465            preds = self.fit(X_train, y_train).predict(X_test)
466            # check error on y_test
467            oos_err = getattr(metrics, scoring + "_score")(y_true=y_test, y_pred=preds)
468            result = namedtuple("result", res._fields + ("test_" + scoring,))
469            return result(*res, oos_err)
470        else:
471            return res
472
473    def lazy_cross_val_optim(
474        self,
475        X_train,
476        y_train,
477        X_test=None,
478        y_test=None,
479        scoring="accuracy",
480        surrogate_objs=None,
481        customize=False,
482        cv=5,
483        n_jobs=None,
484        n_init=10,
485        n_iter=190,
486        abs_tol=1e-3,
487        verbose=1,
488        seed=123,
489    ):
490        """Automated Cross-validation function and hyperparameters' search using multiple surrogates
491
492        Parameters:
493
494            X_train: array-like,
495                Training vectors, where rows is the number of samples
496                and columns is the number of features.
497
498            y_train: array-like,
499                Training vectors, where rows is the number of samples
500                and columns is the number of features.
501
502            X_test: array-like,
503                Testing vectors, where rows is the number of samples
504                and columns is the number of features.
505
506            y_test: array-like,
507                Testing vectors, where rows is the number of samples
508                and columns is the number of features.
509
510            scoring: str
511                scoring metric; see https://scikit-learn.org/stable/modules/model_evaluation.html#the-scoring-parameter-defining-model-evaluation-rules
512
513            surrogate_objs: object names as a list of strings;
514                ML models for estimating the uncertainty around the objective function
515
516            customize: boolean
517                if True, the surrogate is transformed into a quasi-randomized network (default is False)
518
519            cv: int;
520                number of cross-validation folds
521
522            n_jobs: int;
523                number of jobs for parallel execution
524
525            n_init: an integer;
526                number of points in the initial setting, when `x_init` and `y_init` are not provided
527
528            n_iter: an integer;
529                number of iterations of the minimization algorithm
530
531            abs_tol: a float;
532                tolerance for convergence of the optimizer (early stopping based on acquisition function)
533
534            verbose: int
535                controls verbosity
536
537            seed: int
538                reproducibility seed
539
540        Examples:
541
542            ```python
543            ```
544        """
545
546        removed_regressors = [
547            "TheilSenRegressor",
548            "ARDRegression",
549            "CCA",
550            "GaussianProcessRegressor",
551            "GradientBoostingRegressor",
552            "HistGradientBoostingRegressor",
553            "IsotonicRegression",
554            "MultiOutputRegressor",
555            "MultiTaskElasticNet",
556            "MultiTaskElasticNetCV",
557            "MultiTaskLasso",
558            "MultiTaskLassoCV",
559            "OrthogonalMatchingPursuit",
560            "OrthogonalMatchingPursuitCV",
561            "PLSCanonical",
562            "PLSRegression",
563            "RadiusNeighborsRegressor",
564            "RegressorChain",
565            "StackingRegressor",
566            "VotingRegressor",
567        ]
568
569        results = []
570
571        for est in all_estimators():
572
573            if surrogate_objs is None:
574
575                if issubclass(est[1], RegressorMixin) and (
576                    est[0] not in removed_regressors
577                ):
578                    try:
579                        if customize == True:
580                            surr_obj = ns.CustomClassifier(obj=est[1]())
581                        else:
582                            surr_obj = est[1]()
583                        res = self.cross_val_optim(
584                            X_train=X_train,
585                            y_train=y_train,
586                            X_test=X_test,
587                            y_test=y_test,
588                            surrogate_obj=surr_obj,
589                            cv=cv,
590                            n_jobs=n_jobs,
591                            scoring=scoring,
592                            n_init=n_init,
593                            n_iter=n_iter,
594                            abs_tol=abs_tol,
595                            verbose=verbose,
596                            seed=seed,
597                        )
598                        if customize == True:
599                            results.append((f"CustomClassifier({est[0]})", res))
600                        else:
601                            results.append((est[0], res))
602                    except:
603                        pass
604
605            else:
606
607                if (
608                    issubclass(est[1], RegressorMixin)
609                    and (est[0] not in removed_regressors)
610                    and est[0] in surrogate_objs
611                ):
612                    try:
613                        if customize == True:
614                            surr_obj = ns.CustomClassifier(obj=est[1]())
615                        else:
616                            surr_obj = est[1]()
617                        res = self.cross_val_optim(
618                            X_train=X_train,
619                            y_train=y_train,
620                            X_test=X_test,
621                            y_test=y_test,
622                            surrogate_obj=surr_obj,
623                            cv=cv,
624                            n_jobs=n_jobs,
625                            scoring=scoring,
626                            n_init=n_init,
627                            n_iter=n_iter,
628                            abs_tol=abs_tol,
629                            verbose=verbose,
630                            seed=seed,
631                        )
632                        if customize == True:
633                            results.append((f"CustomClassifier({est[0]})", res))
634                        else:
635                            results.append((est[0], res))
636                    except:
637                        pass
638
639        return results

Deep Classifier

Parameters:

obj: an object
    A base learner, see also https://www.researchgate.net/publication/380701207_Deep_Quasi-Randomized_neural_Networks_for_classification

n_layers: int (default=3)
    Number of layers. `n_layers = 1` is a simple `CustomClassifier`

verbose : int, optional (default=0)
    Monitor progress when fitting.

All the other parameters are nnetsauce `CustomClassifier`'s

Examples:

import nnetsauce as ns
    from sklearn.datasets import load_breast_cancer
    from sklearn.model_selection import train_test_split
    from sklearn.linear_model import LogisticRegressionCV
    data = load_breast_cancer()
    X = data.data
    y= data.target
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=123)
    obj = LogisticRegressionCV()
    clf = ns.DeepClassifier(obj)
    clf.fit(X_train, y_train)
    print(clf.score(clf.predict(X_test), y_test))
    

def fit(self, X, y, **kwargs):
142    def fit(self, X, y, **kwargs):
143        """Fit Classification algorithms to X and y.
144        Parameters
145        ----------
146        X : array-like,
147            Training vectors, where rows is the number of samples
148            and columns is the number of features.
149        y : array-like,
150            Training vectors, where rows is the number of samples
151            and columns is the number of features.
152        **kwargs: dict
153            Additional parameters to be passed to the fit method
154            of the base learner. For example, `sample_weight`.
155
156        Returns
157        -------
158        A fitted object
159        """
160
161        self.classes_ = np.unique(y)
162        self.n_classes_ = len(
163            self.classes_
164        )  # for compatibility with         scikit-learn
165
166        if isinstance(X, np.ndarray):
167            X = pd.DataFrame(X)
168
169        # init layer
170        self.stacked_obj = CustomClassifier(
171            obj=self.stacked_obj,
172            n_hidden_features=self.n_hidden_features,
173            activation_name=self.activation_name,
174            a=self.a,
175            nodes_sim=self.nodes_sim,
176            bias=self.bias,
177            dropout=self.dropout,
178            direct_link=self.direct_link,
179            n_clusters=self.n_clusters,
180            cluster_encode=self.cluster_encode,
181            type_clust=self.type_clust,
182            type_scaling=self.type_scaling,
183            col_sample=self.col_sample,
184            row_sample=self.row_sample,
185            cv_calibration=None,
186            calibration_method=None,
187            seed=self.seed,
188            backend=self.backend,
189        )
190
191        if self.verbose > 0:
192            iterator = tqdm(range(self.n_layers - 1))
193        else:
194            iterator = range(self.n_layers - 1)
195
196        for _ in iterator:
197            self.stacked_obj = deepcopy(
198                CustomClassifier(
199                    obj=self.stacked_obj,
200                    n_hidden_features=self.n_hidden_features,
201                    activation_name=self.activation_name,
202                    a=self.a,
203                    nodes_sim=self.nodes_sim,
204                    bias=self.bias,
205                    dropout=self.dropout,
206                    direct_link=self.direct_link,
207                    n_clusters=self.n_clusters,
208                    cluster_encode=self.cluster_encode,
209                    type_clust=self.type_clust,
210                    type_scaling=self.type_scaling,
211                    col_sample=self.col_sample,
212                    row_sample=self.row_sample,
213                    cv_calibration=None,
214                    calibration_method=None,
215                    seed=self.seed,
216                    backend=self.backend,
217                )
218            )
219            self.stacked_obj.fit(X, y, **kwargs)
220        
221        return self

Fit Classification algorithms to X and y.

Parameters

X : array-like, Training vectors, where rows is the number of samples and columns is the number of features. y : array-like, Training vectors, where rows is the number of samples and columns is the number of features. **kwargs: dict Additional parameters to be passed to the fit method of the base learner. For example, sample_weight.

Returns

A fitted object

def predict(self, X):
254    def predict(self, X):
255        return self.stacked_obj.predict(X)

Predict test data X.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

model predictions: {array-like}
def predict_proba(self, X):
257    def predict_proba(self, X):
258        return self.stacked_obj.predict_proba(X)

Predict probabilities for test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

probability estimates for test data: {array-like}
def score(self, X, y, scoring=None):
260    def score(self, X, y, scoring=None):
261        return self.stacked_obj.score(X, y, scoring)

Scoring function for classification.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

scoring: str
    scoring method (default is accuracy)

Returns:

score: float
class DeepRegressor(nnetsauce.CustomRegressor, sklearn.base.RegressorMixin):
 13class DeepRegressor(CustomRegressor, RegressorMixin):
 14    """
 15    Deep Regressor
 16
 17    Parameters:
 18
 19        obj: an object
 20            A base learner, see also https://www.researchgate.net/publication/380701207_Deep_Quasi-Randomized_neural_Networks_for_classification
 21
 22        verbose : int, optional (default=0)
 23            Monitor progress when fitting.
 24
 25        n_layers: int (default=2)
 26            Number of layers. `n_layers = 1` is a simple `CustomRegressor`
 27
 28        All the other parameters are nnetsauce `CustomRegressor`'s
 29
 30    Examples:
 31
 32        ```python
 33        import nnetsauce as ns
 34        from sklearn.datasets import load_diabetes
 35        from sklearn.model_selection import train_test_split
 36        from sklearn.linear_model import RidgeCV
 37        data = load_diabetes()
 38        X = data.data
 39        y= data.target
 40        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=123)
 41        obj = RidgeCV()
 42        clf = ns.DeepRegressor(obj)
 43        clf.fit(X_train, y_train)
 44        print(clf.score(clf.predict(X_test), y_test))
 45        ```
 46
 47    """
 48
 49    def __init__(
 50        self,
 51        obj,
 52        # Defining depth
 53        n_layers=2,
 54        verbose=0,
 55        # CustomRegressor attributes
 56        n_hidden_features=5,
 57        activation_name="relu",
 58        a=0.01,
 59        nodes_sim="sobol",
 60        bias=True,
 61        dropout=0,
 62        direct_link=True,
 63        n_clusters=2,
 64        cluster_encode=True,
 65        type_clust="kmeans",
 66        type_scaling=("std", "std", "std"),
 67        col_sample=1,
 68        row_sample=1,
 69        level=None,
 70        pi_method="splitconformal",
 71        seed=123,
 72        backend="cpu",
 73    ):
 74        super().__init__(
 75            obj=obj,
 76            n_hidden_features=n_hidden_features,
 77            activation_name=activation_name,
 78            a=a,
 79            nodes_sim=nodes_sim,
 80            bias=bias,
 81            dropout=dropout,
 82            direct_link=direct_link,
 83            n_clusters=n_clusters,
 84            cluster_encode=cluster_encode,
 85            type_clust=type_clust,
 86            type_scaling=type_scaling,
 87            col_sample=col_sample,
 88            row_sample=row_sample,
 89            level=level,
 90            pi_method=pi_method,
 91            seed=seed,
 92            backend=backend,
 93        )
 94
 95        assert n_layers >= 1, "must have n_layers >= 1"
 96
 97        self.stacked_obj = deepcopy(obj)
 98        self.verbose = verbose
 99        self.n_layers = n_layers
100        self.level = level
101        self.pi_method = pi_method
102        self.coef_ = None
103
104    def fit(self, X, y, **kwargs):
105        """Fit Regression algorithms to X and y.
106        Parameters
107        ----------
108        X : array-like,
109            Training vectors, where rows is the number of samples
110            and columns is the number of features.
111        y : array-like,
112            Training vectors, where rows is the number of samples
113            and columns is the number of features.
114        **kwargs: dict
115            Additional parameters to be passed to the fit method
116            of the base learner. For example, `sample_weight`.
117        Returns
118        -------
119        A fitted object
120        """
121
122        if isinstance(X, np.ndarray):
123            X = pd.DataFrame(X)
124
125        # init layer
126        self.stacked_obj = CustomRegressor(
127            obj=self.stacked_obj,
128            n_hidden_features=self.n_hidden_features,
129            activation_name=self.activation_name,
130            a=self.a,
131            nodes_sim=self.nodes_sim,
132            bias=self.bias,
133            dropout=self.dropout,
134            direct_link=self.direct_link,
135            n_clusters=self.n_clusters,
136            cluster_encode=self.cluster_encode,
137            type_clust=self.type_clust,
138            type_scaling=self.type_scaling,
139            col_sample=self.col_sample,
140            row_sample=self.row_sample,
141            seed=self.seed,
142            backend=self.backend,
143        )
144
145        if self.verbose > 0:
146            iterator = tqdm(range(self.n_layers - 1))
147        else:
148            iterator = range(self.n_layers - 1)
149
150        for _ in iterator:
151            self.stacked_obj = deepcopy(
152                CustomRegressor(
153                    obj=self.stacked_obj,
154                    n_hidden_features=self.n_hidden_features,
155                    activation_name=self.activation_name,
156                    a=self.a,
157                    nodes_sim=self.nodes_sim,
158                    bias=self.bias,
159                    dropout=self.dropout,
160                    direct_link=self.direct_link,
161                    n_clusters=self.n_clusters,
162                    cluster_encode=self.cluster_encode,
163                    type_clust=self.type_clust,
164                    type_scaling=self.type_scaling,
165                    col_sample=self.col_sample,
166                    row_sample=self.row_sample,
167                    seed=self.seed,
168                    backend=self.backend,
169                )
170            )
171
172        self.stacked_obj.fit(X, y, **kwargs)
173
174        if self.level is not None:
175            self.stacked_obj = PredictionInterval(
176                obj=self.stacked_obj, method=self.pi_method, level=self.level
177            )
178
179        if hasattr(self.stacked_obj, "clustering_obj_"):
180            self.clustering_obj_ = self.stacked_obj.clustering_obj_
181
182        if hasattr(self.stacked_obj, "coef_"):
183            self.coef_ = self.stacked_obj.coef_
184
185        if hasattr(self.stacked_obj, "scaler_"):
186            self.scaler_ = self.stacked_obj.scaler_
187
188        if hasattr(self.stacked_obj, "nn_scaler_"):
189            self.nn_scaler_ = self.stacked_obj.nn_scaler_
190
191        if hasattr(self.stacked_obj, "clustering_scaler_"):
192            self.clustering_scaler_ = self.stacked_obj.clustering_scaler_
193
194        return self
195
196    def partial_fit(self, X, y, **kwargs):
197        """Fit Regression algorithms to X and y.
198        Parameters
199        ----------
200        X : array-like,
201            Training vectors, where rows is the number of samples
202            and columns is the number of features.
203        y : array-like,
204            Training vectors, where rows is the number of samples
205            and columns is the number of features.
206        **kwargs: dict
207            Additional parameters to be passed to the fit method
208            of the base learner. For example, `sample_weight`.
209        Returns
210        -------
211        A fitted object
212        """
213        assert hasattr(self, "stacked_obj"), "model must be fitted first"
214        current_obj = self.stacked_obj
215        for _ in range(self.n_layers):
216            try:
217                input_X = current_obj.obj.cook_test_set(X)
218                current_obj.obj.partial_fit(input_X, y, **kwargs)
219                try:
220                    current_obj = current_obj.obj
221                except AttributeError:
222                    pass
223            except ValueError as e:
224                print(e)
225                pass
226        return self
227
228    def predict(self, X, **kwargs):
229        if self.level is not None:
230            return self.stacked_obj.predict(X, return_pi=True)
231        return self.stacked_obj.predict(X, **kwargs)
232
233    def score(self, X, y, scoring=None):
234        return self.stacked_obj.score(X, y, scoring)

Deep Regressor

Parameters:

obj: an object
    A base learner, see also https://www.researchgate.net/publication/380701207_Deep_Quasi-Randomized_neural_Networks_for_classification

verbose : int, optional (default=0)
    Monitor progress when fitting.

n_layers: int (default=2)
    Number of layers. `n_layers = 1` is a simple `CustomRegressor`

All the other parameters are nnetsauce `CustomRegressor`'s

Examples:

import nnetsauce as ns
    from sklearn.datasets import load_diabetes
    from sklearn.model_selection import train_test_split
    from sklearn.linear_model import RidgeCV
    data = load_diabetes()
    X = data.data
    y= data.target
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=123)
    obj = RidgeCV()
    clf = ns.DeepRegressor(obj)
    clf.fit(X_train, y_train)
    print(clf.score(clf.predict(X_test), y_test))
    

def fit(self, X, y, **kwargs):
104    def fit(self, X, y, **kwargs):
105        """Fit Regression algorithms to X and y.
106        Parameters
107        ----------
108        X : array-like,
109            Training vectors, where rows is the number of samples
110            and columns is the number of features.
111        y : array-like,
112            Training vectors, where rows is the number of samples
113            and columns is the number of features.
114        **kwargs: dict
115            Additional parameters to be passed to the fit method
116            of the base learner. For example, `sample_weight`.
117        Returns
118        -------
119        A fitted object
120        """
121
122        if isinstance(X, np.ndarray):
123            X = pd.DataFrame(X)
124
125        # init layer
126        self.stacked_obj = CustomRegressor(
127            obj=self.stacked_obj,
128            n_hidden_features=self.n_hidden_features,
129            activation_name=self.activation_name,
130            a=self.a,
131            nodes_sim=self.nodes_sim,
132            bias=self.bias,
133            dropout=self.dropout,
134            direct_link=self.direct_link,
135            n_clusters=self.n_clusters,
136            cluster_encode=self.cluster_encode,
137            type_clust=self.type_clust,
138            type_scaling=self.type_scaling,
139            col_sample=self.col_sample,
140            row_sample=self.row_sample,
141            seed=self.seed,
142            backend=self.backend,
143        )
144
145        if self.verbose > 0:
146            iterator = tqdm(range(self.n_layers - 1))
147        else:
148            iterator = range(self.n_layers - 1)
149
150        for _ in iterator:
151            self.stacked_obj = deepcopy(
152                CustomRegressor(
153                    obj=self.stacked_obj,
154                    n_hidden_features=self.n_hidden_features,
155                    activation_name=self.activation_name,
156                    a=self.a,
157                    nodes_sim=self.nodes_sim,
158                    bias=self.bias,
159                    dropout=self.dropout,
160                    direct_link=self.direct_link,
161                    n_clusters=self.n_clusters,
162                    cluster_encode=self.cluster_encode,
163                    type_clust=self.type_clust,
164                    type_scaling=self.type_scaling,
165                    col_sample=self.col_sample,
166                    row_sample=self.row_sample,
167                    seed=self.seed,
168                    backend=self.backend,
169                )
170            )
171
172        self.stacked_obj.fit(X, y, **kwargs)
173
174        if self.level is not None:
175            self.stacked_obj = PredictionInterval(
176                obj=self.stacked_obj, method=self.pi_method, level=self.level
177            )
178
179        if hasattr(self.stacked_obj, "clustering_obj_"):
180            self.clustering_obj_ = self.stacked_obj.clustering_obj_
181
182        if hasattr(self.stacked_obj, "coef_"):
183            self.coef_ = self.stacked_obj.coef_
184
185        if hasattr(self.stacked_obj, "scaler_"):
186            self.scaler_ = self.stacked_obj.scaler_
187
188        if hasattr(self.stacked_obj, "nn_scaler_"):
189            self.nn_scaler_ = self.stacked_obj.nn_scaler_
190
191        if hasattr(self.stacked_obj, "clustering_scaler_"):
192            self.clustering_scaler_ = self.stacked_obj.clustering_scaler_
193
194        return self

Fit Regression algorithms to X and y.

Parameters

X : array-like, Training vectors, where rows is the number of samples and columns is the number of features. y : array-like, Training vectors, where rows is the number of samples and columns is the number of features. **kwargs: dict Additional parameters to be passed to the fit method of the base learner. For example, sample_weight.

Returns

A fitted object

def predict(self, X, **kwargs):
228    def predict(self, X, **kwargs):
229        if self.level is not None:
230            return self.stacked_obj.predict(X, return_pi=True)
231        return self.stacked_obj.predict(X, **kwargs)

Predict test data X.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

level: int
    Level of confidence (default = 95)

method: str
    `None`, or 'splitconformal', 'localconformal'
    prediction (if you specify `return_pi = True`)

**kwargs: additional parameters
        `return_pi = True` for conformal prediction,
        with `method` in ('splitconformal', 'localconformal')
        or `return_std = True` for `self.obj` in
        (`sklearn.linear_model.BayesianRidge`,
        `sklearn.linear_model.ARDRegressor`,
        `sklearn.gaussian_process.GaussianProcessRegressor`)`

Returns:

model predictions:
    an array if uncertainty quantification is not requested,
      or a tuple if with prediction intervals and simulations
      if `return_std = True` (mean, standard deviation,
      lower and upper prediction interval) or `return_pi = True`
      ()
def score(self, X, y, scoring=None):
233    def score(self, X, y, scoring=None):
234        return self.stacked_obj.score(X, y, scoring)

Compute the score of the model.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

scoring: str
    scoring method

Returns:

score: float
class DeepMTS(nnetsauce.MTS):
 11class DeepMTS(MTS):
 12    """Univariate and multivariate time series (DeepMTS) forecasting with Quasi-Randomized networks (Work in progress)
 13
 14    Parameters:
 15
 16        obj: object.
 17            any object containing a method fit (obj.fit()) and a method predict
 18            (obj.predict()).
 19
 20        n_layers: int.
 21            number of layers in the neural network.
 22
 23        n_hidden_features: int.
 24            number of nodes in the hidden layer.
 25
 26        activation_name: str.
 27            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'.
 28
 29        a: float.
 30            hyperparameter for 'prelu' or 'elu' activation function.
 31
 32        nodes_sim: str.
 33            type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
 34            'uniform'.
 35
 36        bias: boolean.
 37            indicates if the hidden layer contains a bias term (True) or not
 38            (False).
 39
 40        dropout: float.
 41            regularization parameter; (random) percentage of nodes dropped out
 42            of the training.
 43
 44        direct_link: boolean.
 45            indicates if the original predictors are included (True) in model's fitting or not (False).
 46
 47        n_clusters: int.
 48            number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering).
 49
 50        cluster_encode: bool.
 51            defines how the variable containing clusters is treated (default is one-hot)
 52            if `False`, then labels are used, without one-hot encoding.
 53
 54        type_clust: str.
 55            type of clustering method: currently k-means ('kmeans') or Gaussian
 56            Mixture Model ('gmm').
 57
 58        type_scaling: a tuple of 3 strings.
 59            scaling methods for inputs, hidden layer, and clustering respectively
 60            (and when relevant).
 61            Currently available: standardization ('std') or MinMax scaling ('minmax').
 62
 63        lags: int.
 64            number of lags used for each time series.
 65
 66        type_pi: str.
 67            type of prediction interval; currently:
 68            - "gaussian": simple, fast, but: assumes stationarity of Gaussian in-sample residuals and independence in the multivariate case
 69            - "kde": based on Kernel Density Estimation of in-sample residuals
 70            - "bootstrap": based on independent bootstrap of in-sample residuals
 71            - "block-bootstrap": based on basic block bootstrap of in-sample residuals
 72            - "scp-kde": Sequential split conformal prediction with Kernel Density Estimation of calibrated residuals
 73            - "scp-bootstrap": Sequential split conformal prediction with independent bootstrap of calibrated residuals
 74            - "scp-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of calibrated residuals
 75            - "scp2-kde": Sequential split conformal prediction with Kernel Density Estimation of standardized calibrated residuals
 76            - "scp2-bootstrap": Sequential split conformal prediction with independent bootstrap of standardized calibrated residuals
 77            - "scp2-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of standardized calibrated residuals
 78
 79        block_size: int.
 80            size of block for 'type_pi' in ("block-bootstrap", "scp-block-bootstrap", "scp2-block-bootstrap").
 81            Default is round(3.15*(n_residuals^1/3))
 82
 83        replications: int.
 84            number of replications (if needed, for predictive simulation). Default is 'None'.
 85
 86        kernel: str.
 87            the kernel to use for residuals density estimation (used for predictive simulation). Currently, either 'gaussian' or 'tophat'.
 88
 89        agg: str.
 90            either "mean" or "median" for simulation of bootstrap aggregating
 91
 92        seed: int.
 93            reproducibility seed for nodes_sim=='uniform' or predictive simulation.
 94
 95        backend: str.
 96            "cpu" or "gpu" or "tpu".
 97
 98        verbose: int.
 99            0: not printing; 1: printing
100
101        show_progress: bool.
102            True: progress bar when fitting each series; False: no progress bar when fitting each series
103
104    Attributes:
105
106        fit_objs_: dict
107            objects adjusted to each individual time series
108
109        y_: {array-like}
110            DeepMTS responses (most recent observations first)
111
112        X_: {array-like}
113            DeepMTS lags
114
115        xreg_: {array-like}
116            external regressors
117
118        y_means_: dict
119            a dictionary of each series mean values
120
121        preds_: {array-like}
122            successive model predictions
123
124        preds_std_: {array-like}
125            standard deviation around the predictions
126
127        return_std_: boolean
128            return uncertainty or not (set in predict)
129
130        df_: data frame
131            the input data frame, in case a data.frame is provided to `fit`
132
133    Examples:
134
135    Example 1:
136
137        ```python
138        import nnetsauce as ns
139        import numpy as np
140        from sklearn import linear_model
141        np.random.seed(123)
142
143        M = np.random.rand(10, 3)
144        M[:,0] = 10*M[:,0]
145        M[:,2] = 25*M[:,2]
146        print(M)
147
148        # Adjust Bayesian Ridge
149        regr4 = linear_model.BayesianRidge()
150        obj_DeepMTS = ns.DeepMTS(regr4, lags = 1, n_hidden_features=5)
151        obj_DeepMTS.fit(M)
152        print(obj_DeepMTS.predict())
153
154        # with credible intervals
155        print(obj_DeepMTS.predict(return_std=True, level=80))
156
157        print(obj_DeepMTS.predict(return_std=True, level=95))
158        ```
159
160    Example 2:
161
162        ```python
163        import nnetsauce as ns
164        import numpy as np
165        from sklearn import linear_model
166
167        dataset = {
168        'date' : ['2001-01-01', '2002-01-01', '2003-01-01', '2004-01-01', '2005-01-01'],
169        'series1' : [34, 30, 35.6, 33.3, 38.1],
170        'series2' : [4, 5.5, 5.6, 6.3, 5.1],
171        'series3' : [100, 100.5, 100.6, 100.2, 100.1]}
172        df = pd.DataFrame(dataset).set_index('date')
173        print(df)
174
175        # Adjust Bayesian Ridge
176        regr5 = linear_model.BayesianRidge()
177        obj_DeepMTS = ns.DeepMTS(regr5, lags = 1, n_hidden_features=5)
178        obj_DeepMTS.fit(df)
179        print(obj_DeepMTS.predict())
180
181        # with credible intervals
182        print(obj_DeepMTS.predict(return_std=True, level=80))
183
184        print(obj_DeepMTS.predict(return_std=True, level=95))
185        ```
186
187    """
188
189    # construct the object -----
190
191    def __init__(
192        self,
193        obj,
194        n_layers=3,
195        n_hidden_features=5,
196        activation_name="relu",
197        a=0.01,
198        nodes_sim="sobol",
199        bias=True,
200        dropout=0,
201        direct_link=True,
202        n_clusters=2,
203        cluster_encode=True,
204        type_clust="kmeans",
205        type_scaling=("std", "std", "std"),
206        lags=1,
207        type_pi="kde",
208        block_size=None,
209        replications=None,
210        kernel=None,
211        agg="mean",
212        seed=123,
213        backend="cpu",
214        verbose=0,
215        show_progress=True,
216    ):
217        assert int(lags) == lags, "parameter 'lags' should be an integer"
218        assert n_layers >= 1, "must have n_layers >= 1"
219        self.n_layers = int(n_layers)
220
221        if self.n_layers > 1:
222
223            for _ in range(self.n_layers - 1):
224                obj = CustomRegressor(
225                    obj=deepcopy(obj),
226                    n_hidden_features=n_hidden_features,
227                    activation_name=activation_name,
228                    a=a,
229                    nodes_sim=nodes_sim,
230                    bias=bias,
231                    dropout=dropout,
232                    direct_link=direct_link,
233                    n_clusters=n_clusters,
234                    cluster_encode=cluster_encode,
235                    type_clust=type_clust,
236                    type_scaling=type_scaling,
237                    seed=seed,
238                    backend=backend,
239                )
240
241        self.obj = deepcopy(obj)
242        super().__init__(
243            obj=self.obj,
244            n_hidden_features=n_hidden_features,
245            activation_name=activation_name,
246            a=a,
247            nodes_sim=nodes_sim,
248            bias=bias,
249            dropout=dropout,
250            direct_link=direct_link,
251            n_clusters=n_clusters,
252            cluster_encode=cluster_encode,
253            type_clust=type_clust,
254            type_scaling=type_scaling,
255            lags=lags,
256            type_pi=type_pi,
257            block_size=block_size,
258            replications=replications,
259            kernel=kernel,
260            agg=agg,
261            seed=seed,
262            backend=backend,
263            verbose=verbose,
264            show_progress=show_progress,
265        )

Univariate and multivariate time series (DeepMTS) forecasting with Quasi-Randomized networks (Work in progress)

Parameters:

obj: object.
    any object containing a method fit (obj.fit()) and a method predict
    (obj.predict()).

n_layers: int.
    number of layers in the neural network.

n_hidden_features: int.
    number of nodes in the hidden layer.

activation_name: str.
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'.

a: float.
    hyperparameter for 'prelu' or 'elu' activation function.

nodes_sim: str.
    type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
    'uniform'.

bias: boolean.
    indicates if the hidden layer contains a bias term (True) or not
    (False).

dropout: float.
    regularization parameter; (random) percentage of nodes dropped out
    of the training.

direct_link: boolean.
    indicates if the original predictors are included (True) in model's fitting or not (False).

n_clusters: int.
    number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering).

cluster_encode: bool.
    defines how the variable containing clusters is treated (default is one-hot)
    if `False`, then labels are used, without one-hot encoding.

type_clust: str.
    type of clustering method: currently k-means ('kmeans') or Gaussian
    Mixture Model ('gmm').

type_scaling: a tuple of 3 strings.
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax').

lags: int.
    number of lags used for each time series.

type_pi: str.
    type of prediction interval; currently:
    - "gaussian": simple, fast, but: assumes stationarity of Gaussian in-sample residuals and independence in the multivariate case
    - "kde": based on Kernel Density Estimation of in-sample residuals
    - "bootstrap": based on independent bootstrap of in-sample residuals
    - "block-bootstrap": based on basic block bootstrap of in-sample residuals
    - "scp-kde": Sequential split conformal prediction with Kernel Density Estimation of calibrated residuals
    - "scp-bootstrap": Sequential split conformal prediction with independent bootstrap of calibrated residuals
    - "scp-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of calibrated residuals
    - "scp2-kde": Sequential split conformal prediction with Kernel Density Estimation of standardized calibrated residuals
    - "scp2-bootstrap": Sequential split conformal prediction with independent bootstrap of standardized calibrated residuals
    - "scp2-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of standardized calibrated residuals

block_size: int.
    size of block for 'type_pi' in ("block-bootstrap", "scp-block-bootstrap", "scp2-block-bootstrap").
    Default is round(3.15*(n_residuals^1/3))

replications: int.
    number of replications (if needed, for predictive simulation). Default is 'None'.

kernel: str.
    the kernel to use for residuals density estimation (used for predictive simulation). Currently, either 'gaussian' or 'tophat'.

agg: str.
    either "mean" or "median" for simulation of bootstrap aggregating

seed: int.
    reproducibility seed for nodes_sim=='uniform' or predictive simulation.

backend: str.
    "cpu" or "gpu" or "tpu".

verbose: int.
    0: not printing; 1: printing

show_progress: bool.
    True: progress bar when fitting each series; False: no progress bar when fitting each series

Attributes:

fit_objs_: dict
    objects adjusted to each individual time series

y_: {array-like}
    DeepMTS responses (most recent observations first)

X_: {array-like}
    DeepMTS lags

xreg_: {array-like}
    external regressors

y_means_: dict
    a dictionary of each series mean values

preds_: {array-like}
    successive model predictions

preds_std_: {array-like}
    standard deviation around the predictions

return_std_: boolean
    return uncertainty or not (set in predict)

df_: data frame
    the input data frame, in case a data.frame is provided to `fit`

Examples:

Example 1:

import nnetsauce as ns
    import numpy as np
    from sklearn import linear_model
    np.random.seed(123)
 
M = np.random.rand(10, 3) M[:,0] = 10M[:,0] M[:,2] = 25M[:,2] print(M)
# Adjust Bayesian Ridge regr4 = linear_model.BayesianRidge() obj_DeepMTS = ns.DeepMTS(regr4, lags = 1, n_hidden_features=5) obj_DeepMTS.fit(M) print(obj_DeepMTS.predict())
# with credible intervals print(obj_DeepMTS.predict(return_std=True, level=80))
print(obj_DeepMTS.predict(return_std=True, level=95))

Example 2:

import nnetsauce as ns
    import numpy as np
    from sklearn import linear_model
 
dataset = { 'date' : ['2001-01-01', '2002-01-01', '2003-01-01', '2004-01-01', '2005-01-01'], 'series1' : [34, 30, 35.6, 33.3, 38.1], 'series2' : [4, 5.5, 5.6, 6.3, 5.1], 'series3' : [100, 100.5, 100.6, 100.2, 100.1]} df = pd.DataFrame(dataset).set_index('date') print(df)
# Adjust Bayesian Ridge regr5 = linear_model.BayesianRidge() obj_DeepMTS = ns.DeepMTS(regr5, lags = 1, n_hidden_features=5) obj_DeepMTS.fit(df) print(obj_DeepMTS.predict())
# with credible intervals print(obj_DeepMTS.predict(return_std=True, level=80))
print(obj_DeepMTS.predict(return_std=True, level=95))

class Downloader:
 6class Downloader:
 7    """Download datasets from data sources (R-universe for now)"""
 8
 9    def __init__(self):
10        self.pkgname = None
11        self.dataset = None
12        self.source = None
13        self.url = None
14        self.request = None
15
16    def download(
17        self,
18        pkgname="MASS",
19        dataset="Boston",
20        source="https://cran.r-universe.dev/",
21        **kwargs
22    ):
23        """Download datasets from data sources (R-universe for now)
24
25        Examples:
26
27        ```python
28        import nnetsauce as ns
29
30        downloader = ns.Downloader()
31        df = downloader.download(pkgname="MASS", dataset="Boston")
32        ```
33
34        """
35        self.pkgname = pkgname
36        self.dataset = dataset
37        self.source = source
38        self.url = source + pkgname + "/data/" + dataset + "/json"
39        self.request = requests.get(self.url)
40        return pd.DataFrame(self.request.json(), **kwargs)

Download datasets from data sources (R-universe for now)

def download( self, pkgname='MASS', dataset='Boston', source='https://cran.r-universe.dev/', **kwargs):
16    def download(
17        self,
18        pkgname="MASS",
19        dataset="Boston",
20        source="https://cran.r-universe.dev/",
21        **kwargs
22    ):
23        """Download datasets from data sources (R-universe for now)
24
25        Examples:
26
27        ```python
28        import nnetsauce as ns
29
30        downloader = ns.Downloader()
31        df = downloader.download(pkgname="MASS", dataset="Boston")
32        ```
33
34        """
35        self.pkgname = pkgname
36        self.dataset = dataset
37        self.source = source
38        self.url = source + pkgname + "/data/" + dataset + "/json"
39        self.request = requests.get(self.url)
40        return pd.DataFrame(self.request.json(), **kwargs)

Download datasets from data sources (R-universe for now)

Examples:

import nnetsauce as ns

downloader = ns.Downloader()
df = downloader.download(pkgname="MASS", dataset="Boston")
class GLMClassifier(nnetsauce.glm.glm.GLM, sklearn.base.ClassifierMixin):
 16class GLMClassifier(GLM, ClassifierMixin):
 17    """Generalized 'linear' models using quasi-randomized networks (classification)
 18
 19    Parameters:
 20
 21        n_hidden_features: int
 22            number of nodes in the hidden layer
 23
 24        lambda1: float
 25            regularization parameter for GLM coefficients on original features
 26
 27        alpha1: float
 28            controls compromize between l1 and l2 norm of GLM coefficients on original features
 29
 30        lambda2: float
 31            regularization parameter for GLM coefficients on nonlinear features
 32
 33        alpha2: float
 34            controls compromize between l1 and l2 norm of GLM coefficients on nonlinear features
 35
 36        activation_name: str
 37            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
 38
 39        a: float
 40            hyperparameter for 'prelu' or 'elu' activation function
 41
 42        nodes_sim: str
 43            type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
 44            'uniform'
 45
 46        bias: boolean
 47            indicates if the hidden layer contains a bias term (True) or not
 48            (False)
 49
 50        dropout: float
 51            regularization parameter; (random) percentage of nodes dropped out
 52            of the training
 53
 54        direct_link: boolean
 55            indicates if the original predictors are included (True) in model's
 56            fitting or not (False)
 57
 58        n_clusters: int
 59            number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
 60                no clustering)
 61
 62        cluster_encode: bool
 63            defines how the variable containing clusters is treated (default is one-hot)
 64            if `False`, then labels are used, without one-hot encoding
 65
 66        type_clust: str
 67            type of clustering method: currently k-means ('kmeans') or Gaussian
 68            Mixture Model ('gmm')
 69
 70        type_scaling: a tuple of 3 strings
 71            scaling methods for inputs, hidden layer, and clustering respectively
 72            (and when relevant).
 73            Currently available: standardization ('std') or MinMax scaling ('minmax')
 74
 75        optimizer: object
 76            optimizer, from class nnetsauce.Optimizer
 77
 78        seed: int
 79            reproducibility seed for nodes_sim=='uniform'
 80
 81    Attributes:
 82
 83        beta_: vector
 84            regression coefficients
 85
 86    Examples:
 87
 88    See [https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_classification.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_classification.py)
 89
 90    """
 91
 92    # construct the object -----
 93
 94    def __init__(
 95        self,
 96        n_hidden_features=5,
 97        lambda1=0.01,
 98        alpha1=0.5,
 99        lambda2=0.01,
100        alpha2=0.5,
101        family="expit",
102        activation_name="relu",
103        a=0.01,
104        nodes_sim="sobol",
105        bias=True,
106        dropout=0,
107        direct_link=True,
108        n_clusters=2,
109        cluster_encode=True,
110        type_clust="kmeans",
111        type_scaling=("std", "std", "std"),
112        optimizer=Optimizer(),
113        seed=123,
114    ):
115        super().__init__(
116            n_hidden_features=n_hidden_features,
117            lambda1=lambda1,
118            alpha1=alpha1,
119            lambda2=lambda2,
120            alpha2=alpha2,
121            activation_name=activation_name,
122            a=a,
123            nodes_sim=nodes_sim,
124            bias=bias,
125            dropout=dropout,
126            direct_link=direct_link,
127            n_clusters=n_clusters,
128            cluster_encode=cluster_encode,
129            type_clust=type_clust,
130            type_scaling=type_scaling,
131            optimizer=optimizer,
132            seed=seed,
133        )
134
135        self.family = family
136
137    def logit_loss(self, Y, row_index, XB):
138        self.n_classes = Y.shape[1]  # len(np.unique(y))
139        # Y = mo.one_hot_encode2(y, self.n_classes)
140        # Y = self.optimizer.one_hot_encode(y, self.n_classes)
141
142        # max_double = 709.0 # only if softmax
143        # XB[XB > max_double] = max_double
144        XB[XB > 709.0] = 709.0
145
146        if row_index is None:
147            return -np.mean(np.sum(Y * XB, axis=1) - logsumexp(XB))
148
149        return -np.mean(np.sum(Y[row_index, :] * XB, axis=1) - logsumexp(XB))
150
151    def expit_erf_loss(self, Y, row_index, XB):
152        # self.n_classes = len(np.unique(y))
153        # Y = mo.one_hot_encode2(y, self.n_classes)
154        # Y = self.optimizer.one_hot_encode(y, self.n_classes)
155        self.n_classes = Y.shape[1]
156
157        if row_index is None:
158            return -np.mean(np.sum(Y * XB, axis=1) - logsumexp(XB))
159
160        return -np.mean(np.sum(Y[row_index, :] * XB, axis=1) - logsumexp(XB))
161
162    def loss_func(
163        self, beta, group_index, X, Y, y, row_index=None, type_loss="logit", **kwargs
164    ):
165        res = {
166            "logit": self.logit_loss,
167            "expit": self.expit_erf_loss,
168            "erf": self.expit_erf_loss,
169        }
170
171        if row_index is None:
172            row_index = range(len(y))
173            XB = self.compute_XB(
174                X,
175                beta=np.reshape(beta, (X.shape[1], self.n_classes), order="F"),
176            )
177
178            return res[type_loss](Y, row_index, XB) + self.compute_penalty(
179                group_index=group_index, beta=beta
180            )
181
182        XB = self.compute_XB(
183            X,
184            beta=np.reshape(beta, (X.shape[1], self.n_classes), order="F"),
185            row_index=row_index,
186        )
187
188        return res[type_loss](Y, row_index, XB) + self.compute_penalty(
189            group_index=group_index, beta=beta
190        )
191
192    def fit(self, X, y, **kwargs):
193        """Fit GLM model to training data (X, y).
194
195        Args:
196
197            X: {array-like}, shape = [n_samples, n_features]
198                Training vectors, where n_samples is the number
199                of samples and n_features is the number of features.
200
201            y: array-like, shape = [n_samples]
202                Target values.
203
204            **kwargs: additional parameters to be passed to
205                    self.cook_training_set or self.obj.fit
206
207        Returns:
208
209            self: object
210
211        """
212
213        assert mx.is_factor(
214            y
215        ), "y must contain only integers"  # change is_factor and subsampling everywhere
216
217        self.classes_ = np.unique(y)  # for compatibility with sklearn
218        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
219
220        self.beta_ = None
221
222        n, p = X.shape
223
224        self.group_index = n * X.shape[1]
225
226        self.n_classes = len(np.unique(y))
227
228        output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
229
230        # Y = mo.one_hot_encode2(output_y, self.n_classes)
231        Y = self.optimizer.one_hot_encode(output_y, self.n_classes)
232
233        # initialization
234        beta_ = np.linalg.lstsq(scaled_Z, Y, rcond=None)[0]
235
236        # optimization
237        # fit(self, loss_func, response, x0, **kwargs):
238        # loss_func(self, beta, group_index, X, y,
239        #          row_index=None, type_loss="gaussian",
240        #          **kwargs)
241        self.optimizer.fit(
242            self.loss_func,
243            response=y,
244            x0=beta_.flatten(order="F"),
245            group_index=self.group_index,
246            X=scaled_Z,
247            Y=Y,
248            y=y,
249            type_loss=self.family,
250        )
251
252        self.beta_ = self.optimizer.results[0]
253        self.classes_ = np.unique(y)
254
255        return self
256
257    def predict(self, X, **kwargs):
258        """Predict test data X.
259
260        Args:
261
262            X: {array-like}, shape = [n_samples, n_features]
263                Training vectors, where n_samples is the number
264                of samples and n_features is the number of features.
265
266            **kwargs: additional parameters to be passed to
267                    self.cook_test_set
268
269        Returns:
270
271            model predictions: {array-like}
272
273        """
274
275        return np.argmax(self.predict_proba(X, **kwargs), axis=1)
276
277    def predict_proba(self, X, **kwargs):
278        """Predict probabilities for test data X.
279
280        Args:
281
282            X: {array-like}, shape = [n_samples, n_features]
283                Training vectors, where n_samples is the number
284                of samples and n_features is the number of features.
285
286            **kwargs: additional parameters to be passed to
287                    self.cook_test_set
288
289        Returns:
290
291            probability estimates for test data: {array-like}
292
293        """
294        if len(X.shape) == 1:
295            n_features = X.shape[0]
296            new_X = mo.rbind(
297                X.reshape(1, n_features),
298                np.ones(n_features).reshape(1, n_features),
299            )
300
301            Z = self.cook_test_set(new_X, **kwargs)
302
303        else:
304            Z = self.cook_test_set(X, **kwargs)
305
306        ZB = mo.safe_sparse_dot(
307            Z,
308            self.beta_.reshape(
309                self.n_classes,
310                X.shape[1] + self.n_hidden_features + self.n_clusters,
311            ).T,
312        )
313
314        if self.family == "logit":
315            exp_ZB = np.exp(ZB)
316
317            return exp_ZB / exp_ZB.sum(axis=1)[:, None]
318
319        if self.family == "expit":
320            exp_ZB = expit(ZB)
321
322            return exp_ZB / exp_ZB.sum(axis=1)[:, None]
323
324        if self.family == "erf":
325            exp_ZB = 0.5 * (1 + erf(ZB))
326
327            return exp_ZB / exp_ZB.sum(axis=1)[:, None]
328
329    def score(self, X, y, scoring=None):
330        """Scoring function for classification.
331
332        Args:
333
334            X: {array-like}, shape = [n_samples, n_features]
335                Training vectors, where n_samples is the number
336                of samples and n_features is the number of features.
337
338            y: array-like, shape = [n_samples]
339                Target values.
340
341            scoring: str
342                scoring method (default is accuracy)
343
344        Returns:
345
346            score: float
347        """
348
349        if scoring is None:
350            scoring = "accuracy"
351
352        if scoring == "accuracy":
353            return skm2.accuracy_score(y, self.predict(X))
354
355        if scoring == "f1":
356            return skm2.f1_score(y, self.predict(X))
357
358        if scoring == "precision":
359            return skm2.precision_score(y, self.predict(X))
360
361        if scoring == "recall":
362            return skm2.recall_score(y, self.predict(X))
363
364        if scoring == "roc_auc":
365            return skm2.roc_auc_score(y, self.predict(X))
366
367        if scoring == "log_loss":
368            return skm2.log_loss(y, self.predict_proba(X))
369
370        if scoring == "balanced_accuracy":
371            return skm2.balanced_accuracy_score(y, self.predict(X))
372
373        if scoring == "average_precision":
374            return skm2.average_precision_score(y, self.predict(X))
375
376        if scoring == "neg_brier_score":
377            return -skm2.brier_score_loss(y, self.predict_proba(X))
378
379        if scoring == "neg_log_loss":
380            return -skm2.log_loss(y, self.predict_proba(X))

Generalized 'linear' models using quasi-randomized networks (classification)

Parameters:

n_hidden_features: int
    number of nodes in the hidden layer

lambda1: float
    regularization parameter for GLM coefficients on original features

alpha1: float
    controls compromize between l1 and l2 norm of GLM coefficients on original features

lambda2: float
    regularization parameter for GLM coefficients on nonlinear features

alpha2: float
    controls compromize between l1 and l2 norm of GLM coefficients on nonlinear features

activation_name: str
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'

a: float
    hyperparameter for 'prelu' or 'elu' activation function

nodes_sim: str
    type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
    'uniform'

bias: boolean
    indicates if the hidden layer contains a bias term (True) or not
    (False)

dropout: float
    regularization parameter; (random) percentage of nodes dropped out
    of the training

direct_link: boolean
    indicates if the original predictors are included (True) in model's
    fitting or not (False)

n_clusters: int
    number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
        no clustering)

cluster_encode: bool
    defines how the variable containing clusters is treated (default is one-hot)
    if `False`, then labels are used, without one-hot encoding

type_clust: str
    type of clustering method: currently k-means ('kmeans') or Gaussian
    Mixture Model ('gmm')

type_scaling: a tuple of 3 strings
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax')

optimizer: object
    optimizer, from class nnetsauce.Optimizer

seed: int
    reproducibility seed for nodes_sim=='uniform'

Attributes:

beta_: vector
    regression coefficients

Examples:

See https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_classification.py

def fit(self, X, y, **kwargs):
192    def fit(self, X, y, **kwargs):
193        """Fit GLM model to training data (X, y).
194
195        Args:
196
197            X: {array-like}, shape = [n_samples, n_features]
198                Training vectors, where n_samples is the number
199                of samples and n_features is the number of features.
200
201            y: array-like, shape = [n_samples]
202                Target values.
203
204            **kwargs: additional parameters to be passed to
205                    self.cook_training_set or self.obj.fit
206
207        Returns:
208
209            self: object
210
211        """
212
213        assert mx.is_factor(
214            y
215        ), "y must contain only integers"  # change is_factor and subsampling everywhere
216
217        self.classes_ = np.unique(y)  # for compatibility with sklearn
218        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
219
220        self.beta_ = None
221
222        n, p = X.shape
223
224        self.group_index = n * X.shape[1]
225
226        self.n_classes = len(np.unique(y))
227
228        output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
229
230        # Y = mo.one_hot_encode2(output_y, self.n_classes)
231        Y = self.optimizer.one_hot_encode(output_y, self.n_classes)
232
233        # initialization
234        beta_ = np.linalg.lstsq(scaled_Z, Y, rcond=None)[0]
235
236        # optimization
237        # fit(self, loss_func, response, x0, **kwargs):
238        # loss_func(self, beta, group_index, X, y,
239        #          row_index=None, type_loss="gaussian",
240        #          **kwargs)
241        self.optimizer.fit(
242            self.loss_func,
243            response=y,
244            x0=beta_.flatten(order="F"),
245            group_index=self.group_index,
246            X=scaled_Z,
247            Y=Y,
248            y=y,
249            type_loss=self.family,
250        )
251
252        self.beta_ = self.optimizer.results[0]
253        self.classes_ = np.unique(y)
254
255        return self

Fit GLM model to training data (X, y).

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

**kwargs: additional parameters to be passed to
        self.cook_training_set or self.obj.fit

Returns:

self: object
def predict(self, X, **kwargs):
257    def predict(self, X, **kwargs):
258        """Predict test data X.
259
260        Args:
261
262            X: {array-like}, shape = [n_samples, n_features]
263                Training vectors, where n_samples is the number
264                of samples and n_features is the number of features.
265
266            **kwargs: additional parameters to be passed to
267                    self.cook_test_set
268
269        Returns:
270
271            model predictions: {array-like}
272
273        """
274
275        return np.argmax(self.predict_proba(X, **kwargs), axis=1)

Predict test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

model predictions: {array-like}
def predict_proba(self, X, **kwargs):
277    def predict_proba(self, X, **kwargs):
278        """Predict probabilities for test data X.
279
280        Args:
281
282            X: {array-like}, shape = [n_samples, n_features]
283                Training vectors, where n_samples is the number
284                of samples and n_features is the number of features.
285
286            **kwargs: additional parameters to be passed to
287                    self.cook_test_set
288
289        Returns:
290
291            probability estimates for test data: {array-like}
292
293        """
294        if len(X.shape) == 1:
295            n_features = X.shape[0]
296            new_X = mo.rbind(
297                X.reshape(1, n_features),
298                np.ones(n_features).reshape(1, n_features),
299            )
300
301            Z = self.cook_test_set(new_X, **kwargs)
302
303        else:
304            Z = self.cook_test_set(X, **kwargs)
305
306        ZB = mo.safe_sparse_dot(
307            Z,
308            self.beta_.reshape(
309                self.n_classes,
310                X.shape[1] + self.n_hidden_features + self.n_clusters,
311            ).T,
312        )
313
314        if self.family == "logit":
315            exp_ZB = np.exp(ZB)
316
317            return exp_ZB / exp_ZB.sum(axis=1)[:, None]
318
319        if self.family == "expit":
320            exp_ZB = expit(ZB)
321
322            return exp_ZB / exp_ZB.sum(axis=1)[:, None]
323
324        if self.family == "erf":
325            exp_ZB = 0.5 * (1 + erf(ZB))
326
327            return exp_ZB / exp_ZB.sum(axis=1)[:, None]

Predict probabilities for test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

probability estimates for test data: {array-like}
def score(self, X, y, scoring=None):
329    def score(self, X, y, scoring=None):
330        """Scoring function for classification.
331
332        Args:
333
334            X: {array-like}, shape = [n_samples, n_features]
335                Training vectors, where n_samples is the number
336                of samples and n_features is the number of features.
337
338            y: array-like, shape = [n_samples]
339                Target values.
340
341            scoring: str
342                scoring method (default is accuracy)
343
344        Returns:
345
346            score: float
347        """
348
349        if scoring is None:
350            scoring = "accuracy"
351
352        if scoring == "accuracy":
353            return skm2.accuracy_score(y, self.predict(X))
354
355        if scoring == "f1":
356            return skm2.f1_score(y, self.predict(X))
357
358        if scoring == "precision":
359            return skm2.precision_score(y, self.predict(X))
360
361        if scoring == "recall":
362            return skm2.recall_score(y, self.predict(X))
363
364        if scoring == "roc_auc":
365            return skm2.roc_auc_score(y, self.predict(X))
366
367        if scoring == "log_loss":
368            return skm2.log_loss(y, self.predict_proba(X))
369
370        if scoring == "balanced_accuracy":
371            return skm2.balanced_accuracy_score(y, self.predict(X))
372
373        if scoring == "average_precision":
374            return skm2.average_precision_score(y, self.predict(X))
375
376        if scoring == "neg_brier_score":
377            return -skm2.brier_score_loss(y, self.predict_proba(X))
378
379        if scoring == "neg_log_loss":
380            return -skm2.log_loss(y, self.predict_proba(X))

Scoring function for classification.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

scoring: str
    scoring method (default is accuracy)

Returns:

score: float
class GLMRegressor(nnetsauce.glm.glm.GLM, sklearn.base.RegressorMixin):
 14class GLMRegressor(GLM, RegressorMixin):
 15    """Generalized 'linear' models using quasi-randomized networks (regression)
 16
 17    Attributes:
 18
 19        n_hidden_features: int
 20            number of nodes in the hidden layer
 21
 22        lambda1: float
 23            regularization parameter for GLM coefficients on original features
 24
 25        alpha1: float
 26            controls compromize between l1 and l2 norm of GLM coefficients on original features
 27
 28        lambda2: float
 29            regularization parameter for GLM coefficients on nonlinear features
 30
 31        alpha2: float
 32            controls compromize between l1 and l2 norm of GLM coefficients on nonlinear features
 33
 34        family: str
 35            "gaussian", "laplace" or "poisson" (for now)
 36
 37        activation_name: str
 38            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
 39
 40        a: float
 41            hyperparameter for 'prelu' or 'elu' activation function
 42
 43        nodes_sim: str
 44            type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
 45            'uniform'
 46
 47        bias: boolean
 48            indicates if the hidden layer contains a bias term (True) or not
 49            (False)
 50
 51        dropout: float
 52            regularization parameter; (random) percentage of nodes dropped out
 53            of the training
 54
 55        direct_link: boolean
 56            indicates if the original predictors are included (True) in model's
 57            fitting or not (False)
 58
 59        n_clusters: int
 60            number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
 61                no clustering)
 62
 63        cluster_encode: bool
 64            defines how the variable containing clusters is treated (default is one-hot)
 65            if `False`, then labels are used, without one-hot encoding
 66
 67        type_clust: str
 68            type of clustering method: currently k-means ('kmeans') or Gaussian
 69            Mixture Model ('gmm')
 70
 71        type_scaling: a tuple of 3 strings
 72            scaling methods for inputs, hidden layer, and clustering respectively
 73            (and when relevant).
 74            Currently available: standardization ('std') or MinMax scaling ('minmax')
 75
 76        optimizer: object
 77            optimizer, from class nnetsauce.utils.Optimizer
 78
 79        seed: int
 80            reproducibility seed for nodes_sim=='uniform'
 81
 82    Attributes:
 83
 84        beta_: vector
 85            regression coefficients
 86
 87    Examples:
 88
 89    See [https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_regression.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_regression.py)
 90
 91    """
 92
 93    # construct the object -----
 94
 95    def __init__(
 96        self,
 97        n_hidden_features=5,
 98        lambda1=0.01,
 99        alpha1=0.5,
100        lambda2=0.01,
101        alpha2=0.5,
102        family="gaussian",
103        activation_name="relu",
104        a=0.01,
105        nodes_sim="sobol",
106        bias=True,
107        dropout=0,
108        direct_link=True,
109        n_clusters=2,
110        cluster_encode=True,
111        type_clust="kmeans",
112        type_scaling=("std", "std", "std"),
113        optimizer=Optimizer(),
114        seed=123,
115    ):
116        super().__init__(
117            n_hidden_features=n_hidden_features,
118            lambda1=lambda1,
119            alpha1=alpha1,
120            lambda2=lambda2,
121            alpha2=alpha2,
122            activation_name=activation_name,
123            a=a,
124            nodes_sim=nodes_sim,
125            bias=bias,
126            dropout=dropout,
127            direct_link=direct_link,
128            n_clusters=n_clusters,
129            cluster_encode=cluster_encode,
130            type_clust=type_clust,
131            type_scaling=type_scaling,
132            optimizer=optimizer,
133            seed=seed,
134        )
135
136        self.family = family
137
138    def gaussian_loss(self, y, row_index, XB):
139        return 0.5 * np.mean(np.square(y[row_index] - XB))
140
141    def laplace_loss(self, y, row_index, XB):
142        return 0.5 * np.mean(np.abs(y[row_index] - XB))
143
144    def poisson_loss(self, y, row_index, XB):
145        return -np.mean(y[row_index] * XB - np.exp(XB))
146
147    def loss_func(
148        self, beta, group_index, X, y, row_index=None, type_loss="gaussian", **kwargs
149    ):
150        res = {
151            "gaussian": self.gaussian_loss,
152            "laplace": self.laplace_loss,
153            "poisson": self.poisson_loss,
154        }
155
156        if row_index is None:
157            row_index = range(len(y))
158            XB = self.compute_XB(X, beta=beta)
159
160            return res[type_loss](y, row_index, XB) + self.compute_penalty(
161                group_index=group_index, beta=beta
162            )
163
164        XB = self.compute_XB(X, beta=beta, row_index=row_index)
165
166        return res[type_loss](y, row_index, XB) + self.compute_penalty(
167            group_index=group_index, beta=beta
168        )
169
170    def fit(self, X, y, **kwargs):
171        """Fit GLM model to training data (X, y).
172
173        Args:
174
175            X: {array-like}, shape = [n_samples, n_features]
176                Training vectors, where n_samples is the number
177                of samples and n_features is the number of features.
178
179            y: array-like, shape = [n_samples]
180                Target values.
181
182            **kwargs: additional parameters to be passed to
183                    self.cook_training_set or self.obj.fit
184
185        Returns:
186
187            self: object
188
189        """
190
191        self.beta_ = None
192
193        self.n_iter = 0
194
195        n, self.group_index = X.shape
196
197        centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
198
199        n_Z = scaled_Z.shape[0]
200
201        # initialization
202        beta_ = np.linalg.lstsq(scaled_Z, centered_y, rcond=None)[0]
203
204        # optimization
205        # fit(self, loss_func, response, x0, **kwargs):
206        # loss_func(self, beta, group_index, X, y,
207        #          row_index=None, type_loss="gaussian",
208        #          **kwargs)
209        self.optimizer.fit(
210            self.loss_func,
211            response=centered_y,
212            x0=beta_,
213            group_index=self.group_index,
214            X=scaled_Z,
215            y=centered_y,
216            type_loss=self.family,
217            **kwargs
218        )
219
220        self.beta_ = self.optimizer.results[0]
221
222        return self
223
224    def predict(self, X, **kwargs):
225        """Predict test data X.
226
227        Args:
228
229            X: {array-like}, shape = [n_samples, n_features]
230                Training vectors, where n_samples is the number
231                of samples and n_features is the number of features.
232
233            **kwargs: additional parameters to be passed to
234                    self.cook_test_set
235
236        Returns:
237
238            model predictions: {array-like}
239
240        """
241
242        if len(X.shape) == 1:
243            n_features = X.shape[0]
244            new_X = mo.rbind(
245                X.reshape(1, n_features),
246                np.ones(n_features).reshape(1, n_features),
247            )
248
249            return (
250                self.y_mean_ + np.dot(self.cook_test_set(new_X, **kwargs), self.beta_)
251            )[0]
252
253        return self.y_mean_ + np.dot(self.cook_test_set(X, **kwargs), self.beta_)
254
255    def score(self, X, y, scoring=None):
256        """Compute the score of the model.
257
258        Parameters:
259
260            X: {array-like}, shape = [n_samples, n_features]
261                Training vectors, where n_samples is the number
262                of samples and n_features is the number of features.
263
264            y: array-like, shape = [n_samples]
265                Target values.
266
267            scoring: str
268                scoring method
269
270        Returns:
271
272            score: float
273
274        """
275
276        if scoring is None:
277            return np.sqrt(np.mean((self.predict(X) - y) ** 2))
278
279        return skm2.get_scorer(scoring)(self, X, y)

Generalized 'linear' models using quasi-randomized networks (regression)

Attributes:

n_hidden_features: int
    number of nodes in the hidden layer

lambda1: float
    regularization parameter for GLM coefficients on original features

alpha1: float
    controls compromize between l1 and l2 norm of GLM coefficients on original features

lambda2: float
    regularization parameter for GLM coefficients on nonlinear features

alpha2: float
    controls compromize between l1 and l2 norm of GLM coefficients on nonlinear features

family: str
    "gaussian", "laplace" or "poisson" (for now)

activation_name: str
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'

a: float
    hyperparameter for 'prelu' or 'elu' activation function

nodes_sim: str
    type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
    'uniform'

bias: boolean
    indicates if the hidden layer contains a bias term (True) or not
    (False)

dropout: float
    regularization parameter; (random) percentage of nodes dropped out
    of the training

direct_link: boolean
    indicates if the original predictors are included (True) in model's
    fitting or not (False)

n_clusters: int
    number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
        no clustering)

cluster_encode: bool
    defines how the variable containing clusters is treated (default is one-hot)
    if `False`, then labels are used, without one-hot encoding

type_clust: str
    type of clustering method: currently k-means ('kmeans') or Gaussian
    Mixture Model ('gmm')

type_scaling: a tuple of 3 strings
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax')

optimizer: object
    optimizer, from class nnetsauce.utils.Optimizer

seed: int
    reproducibility seed for nodes_sim=='uniform'

Attributes:

beta_: vector
    regression coefficients

Examples:

See https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_regression.py

def fit(self, X, y, **kwargs):
170    def fit(self, X, y, **kwargs):
171        """Fit GLM model to training data (X, y).
172
173        Args:
174
175            X: {array-like}, shape = [n_samples, n_features]
176                Training vectors, where n_samples is the number
177                of samples and n_features is the number of features.
178
179            y: array-like, shape = [n_samples]
180                Target values.
181
182            **kwargs: additional parameters to be passed to
183                    self.cook_training_set or self.obj.fit
184
185        Returns:
186
187            self: object
188
189        """
190
191        self.beta_ = None
192
193        self.n_iter = 0
194
195        n, self.group_index = X.shape
196
197        centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
198
199        n_Z = scaled_Z.shape[0]
200
201        # initialization
202        beta_ = np.linalg.lstsq(scaled_Z, centered_y, rcond=None)[0]
203
204        # optimization
205        # fit(self, loss_func, response, x0, **kwargs):
206        # loss_func(self, beta, group_index, X, y,
207        #          row_index=None, type_loss="gaussian",
208        #          **kwargs)
209        self.optimizer.fit(
210            self.loss_func,
211            response=centered_y,
212            x0=beta_,
213            group_index=self.group_index,
214            X=scaled_Z,
215            y=centered_y,
216            type_loss=self.family,
217            **kwargs
218        )
219
220        self.beta_ = self.optimizer.results[0]
221
222        return self

Fit GLM model to training data (X, y).

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

**kwargs: additional parameters to be passed to
        self.cook_training_set or self.obj.fit

Returns:

self: object
def predict(self, X, **kwargs):
224    def predict(self, X, **kwargs):
225        """Predict test data X.
226
227        Args:
228
229            X: {array-like}, shape = [n_samples, n_features]
230                Training vectors, where n_samples is the number
231                of samples and n_features is the number of features.
232
233            **kwargs: additional parameters to be passed to
234                    self.cook_test_set
235
236        Returns:
237
238            model predictions: {array-like}
239
240        """
241
242        if len(X.shape) == 1:
243            n_features = X.shape[0]
244            new_X = mo.rbind(
245                X.reshape(1, n_features),
246                np.ones(n_features).reshape(1, n_features),
247            )
248
249            return (
250                self.y_mean_ + np.dot(self.cook_test_set(new_X, **kwargs), self.beta_)
251            )[0]
252
253        return self.y_mean_ + np.dot(self.cook_test_set(X, **kwargs), self.beta_)

Predict test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

model predictions: {array-like}
def score(self, X, y, scoring=None):
255    def score(self, X, y, scoring=None):
256        """Compute the score of the model.
257
258        Parameters:
259
260            X: {array-like}, shape = [n_samples, n_features]
261                Training vectors, where n_samples is the number
262                of samples and n_features is the number of features.
263
264            y: array-like, shape = [n_samples]
265                Target values.
266
267            scoring: str
268                scoring method
269
270        Returns:
271
272            score: float
273
274        """
275
276        if scoring is None:
277            return np.sqrt(np.mean((self.predict(X) - y) ** 2))
278
279        return skm2.get_scorer(scoring)(self, X, y)

Compute the score of the model.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

scoring: str
    scoring method

Returns:

score: float
class KernelRidge(sklearn.base.BaseEstimator, sklearn.base.RegressorMixin):
 18class KernelRidge(BaseEstimator, RegressorMixin):
 19    """
 20    Kernel Ridge Regression with optional GPU support, Matérn kernels, and automatic input standardization.
 21
 22    Parameters:
 23    - alpha: float
 24        Regularization parameter.
 25    - kernel: str
 26        Kernel type ("linear", "rbf", or "matern").
 27    - gamma: float
 28        Kernel coefficient for "rbf". Ignored for other kernels.
 29    - nu: float
 30        Smoothness parameter for the Matérn kernel. Default is 1.5.
 31    - length_scale: float
 32        Length scale parameter for the Matérn kernel. Default is 1.0.
 33    - backend: str
 34        "cpu" or "gpu" (uses JAX if "gpu").
 35    """
 36
 37    def __init__(
 38        self,
 39        alpha=1.0,
 40        kernel="rbf",
 41        gamma=None,
 42        nu=1.5,
 43        length_scale=1.0,
 44        backend="cpu",
 45    ):
 46        self.alpha = alpha
 47        self.alpha_ = alpha
 48        self.kernel = kernel
 49        self.gamma = gamma
 50        self.nu = nu
 51        self.length_scale = length_scale
 52        self.backend = backend
 53        self.scaler = StandardScaler()
 54
 55        if backend == "gpu" and not JAX_AVAILABLE:
 56            raise ImportError(
 57                "JAX is not installed. Please install JAX to use the GPU backend."
 58            )
 59
 60    def _linear_kernel(self, X, Y):
 61        return jnp.dot(X, Y.T) if self.backend == "gpu" else np.dot(X, Y.T)
 62
 63    def _rbf_kernel(self, X, Y):
 64        if self.gamma is None:
 65            self.gamma = 1.0 / X.shape[1]
 66        if self.backend == "gpu":
 67            sq_dists = (
 68                jnp.sum(X**2, axis=1)[:, None]
 69                + jnp.sum(Y**2, axis=1)
 70                - 2 * jnp.dot(X, Y.T)
 71            )
 72            return jnp.exp(-self.gamma * sq_dists)
 73        else:
 74            sq_dists = (
 75                np.sum(X**2, axis=1)[:, None]
 76                + np.sum(Y**2, axis=1)
 77                - 2 * np.dot(X, Y.T)
 78            )
 79            return np.exp(-self.gamma * sq_dists)
 80
 81    def _matern_kernel(self, X, Y):
 82        """
 83        Compute the Matérn kernel using JAX for GPU or NumPy for CPU.
 84
 85        Parameters:
 86        - X: array-like, shape (n_samples_X, n_features)
 87        - Y: array-like, shape (n_samples_Y, n_features)
 88
 89        Returns:
 90        - Kernel matrix, shape (n_samples_X, n_samples_Y)
 91        """
 92        if self.backend == "gpu":
 93            # Compute pairwise distances
 94            dists = jnp.sqrt(jnp.sum((X[:, None, :] - Y[None, :, :]) ** 2, axis=2))
 95            scaled_dists = jnp.sqrt(2 * self.nu) * dists / self.length_scale
 96
 97            # Matérn kernel formula
 98            coeff = (2 ** (1 - self.nu)) / jnp.exp(gammaln(self.nu))
 99            matern_kernel = coeff * (scaled_dists**self.nu) * kv(self.nu, scaled_dists)
100            matern_kernel = jnp.where(
101                dists == 0, 1.0, matern_kernel
102            )  # Handle the case where distance is 0
103            return matern_kernel
104        else:
105            # Use NumPy for CPU
106            from scipy.special import (
107                gammaln,
108                kv,
109            )  # Ensure scipy.special is used for CPU
110
111            dists = np.sqrt(np.sum((X[:, None, :] - Y[None, :, :]) ** 2, axis=2))
112            scaled_dists = np.sqrt(2 * self.nu) * dists / self.length_scale
113
114            # Matérn kernel formula
115            coeff = (2 ** (1 - self.nu)) / np.exp(gammaln(self.nu))
116            matern_kernel = coeff * (scaled_dists**self.nu) * kv(self.nu, scaled_dists)
117            matern_kernel = np.where(
118                dists == 0, 1.0, matern_kernel
119            )  # Handle the case where distance is 0
120            return matern_kernel
121
122    def _get_kernel(self, X, Y):
123        if self.kernel == "linear":
124            return self._linear_kernel(X, Y)
125        elif self.kernel == "rbf":
126            return self._rbf_kernel(X, Y)
127        elif self.kernel == "matern":
128            return self._matern_kernel(X, Y)
129        else:
130            raise ValueError(f"Unsupported kernel: {self.kernel}")
131
132    def fit(self, X, y):
133        """
134        Fit the Kernel Ridge Regression model.
135
136        Parameters:
137        - X: array-like, shape (n_samples, n_features)
138            Training data.
139        - y: array-like, shape (n_samples,)
140            Target values.
141        """
142        # Standardize the inputs
143        X = self.scaler.fit_transform(X)
144        self.X_fit_ = X
145
146        # Center the response
147        self.y_mean_ = np.mean(y)
148        y_centered = y - self.y_mean_
149
150        n_samples = X.shape[0]
151
152        # Compute the kernel matrix
153        K = self._get_kernel(X, X)
154        self.K_ = K
155        self.y_fit_ = y_centered
156
157        if isinstance(self.alpha, (list, np.ndarray)):
158            # If alpha is a list or array, compute LOOE for each alpha
159            self.alphas_ = self.alpha  # Store the list of alphas
160            self.dual_coefs_ = []  # Store dual coefficients for each alpha
161            self.looe_ = []  # Store LOOE for each alpha
162
163            for alpha in self.alpha:
164                G = K + alpha * np.eye(n_samples)
165                G_inv = np.linalg.inv(G)
166                diag_G_inv = np.diag(G_inv)
167                dual_coef = np.linalg.solve(G, y_centered)
168                looe = np.sum((dual_coef / diag_G_inv) ** 2)  # Compute LOOE
169                self.dual_coefs_.append(dual_coef)
170                self.looe_.append(looe)
171
172            # Select the best alpha based on the smallest LOOE
173            best_index = np.argmin(self.looe_)
174            self.alpha_ = self.alpha[best_index]
175            self.dual_coef_ = self.dual_coefs_[best_index]
176        else:
177            # If alpha is a single value, proceed as usual
178            if self.backend == "gpu":
179                self.dual_coef_ = jnp.linalg.solve(
180                    K + self.alpha * jnp.eye(n_samples), y_centered
181                )
182            else:
183                self.dual_coef_ = np.linalg.solve(
184                    K + self.alpha * np.eye(n_samples), y_centered
185                )
186
187        return self
188
189    def predict(self, X, probs=False):
190        """
191        Predict using the Kernel Ridge Regression model.
192
193        Parameters:
194        - X: array-like, shape (n_samples, n_features)
195            Test data.
196
197        Returns:
198        - Predicted values, shape (n_samples,).
199        """
200        # Standardize the inputs
201        X = self.scaler.transform(X)
202        K = self._get_kernel(X, self.X_fit_)
203        if self.backend == "gpu":
204            preds = jnp.dot(K, self.dual_coef_) + self.y_mean_
205            if probs:
206                # Compute similarity to self.X_fit_
207                similarities = jnp.dot(
208                    preds, self.X_fit_.T
209                )  # Shape: (n_samples, n_fit_)
210                # Apply softmax to get probabilities
211                return jaxsoftmax(similarities, axis=1)
212            return preds
213        else:
214            preds = np.dot(K, self.dual_coef_) + self.y_mean_
215            if probs:
216                # Compute similarity to self.X_fit_
217                similarities = np.dot(
218                    preds, self.X_fit_.T
219                )  # Shape: (n_samples, n_fit_)
220                # Apply softmax to get probabilities
221                return softmax(similarities, axis=1)
222            return preds
223
224    def partial_fit(self, X, y):
225        """
226        Incrementally fit the Kernel Ridge Regression model with new data using a recursive approach.
227
228        Parameters:
229        - X: array-like, shape (n_samples, n_features)
230            New training data.
231        - y: array-like, shape (n_samples,)
232            New target values.
233
234        Returns:
235        - self: object
236            The updated model.
237        """
238        # Standardize the inputs
239        X = (
240            self.scaler.fit_transform(X)
241            if not hasattr(self, "X_fit_")
242            else self.scaler.transform(X)
243        )
244
245        if not hasattr(self, "X_fit_"):
246            # Initialize with the first batch of data
247            self.X_fit_ = X
248
249            # Center the response
250            self.y_mean_ = np.mean(y)
251            y_centered = y - self.y_mean_
252            self.y_fit_ = y_centered
253
254            n_samples = X.shape[0]
255
256            # Compute the kernel matrix for the initial data
257            self.K_ = self._get_kernel(X, X)
258
259            # Initialize dual coefficients for each alpha
260            if isinstance(self.alpha, (list, np.ndarray)):
261                self.dual_coefs_ = [np.zeros(n_samples) for _ in self.alpha]
262            else:
263                self.dual_coef_ = np.zeros(n_samples)
264        else:
265            # Incrementally update with new data
266            y_centered = y - self.y_mean_  # Center the new batch of responses
267            for x_new, y_new in zip(X, y_centered):
268                x_new = x_new.reshape(1, -1)  # Ensure x_new is 2D
269                k_new = self._get_kernel(self.X_fit_, x_new).flatten()
270
271                # Compute the kernel value for the new data point
272                k_self = self._get_kernel(x_new, x_new).item()
273
274                if isinstance(self.alpha, (list, np.ndarray)):
275                    # Update dual coefficients for each alpha
276                    for idx, alpha in enumerate(self.alpha):
277                        gamma_new = 1 / (k_self + alpha)
278                        residual = y_new - np.dot(self.dual_coefs_[idx], k_new)
279                        self.dual_coefs_[idx] = np.append(
280                            self.dual_coefs_[idx], gamma_new * residual
281                        )
282                else:
283                    # Update dual coefficients for a single alpha
284                    gamma_new = 1 / (k_self + self.alpha)
285                    residual = y_new - np.dot(self.dual_coef_, k_new)
286                    self.dual_coef_ = np.append(self.dual_coef_, gamma_new * residual)
287
288                # Update the kernel matrix
289                self.K_ = np.block(
290                    [[self.K_, k_new[:, None]], [k_new[None, :], np.array([[k_self]])]]
291                )
292
293                # Update the stored data
294                self.X_fit_ = np.vstack([self.X_fit_, x_new])
295                self.y_fit_ = np.append(self.y_fit_, y_new)
296
297        # Select the best alpha based on LOOE after the batch
298        if isinstance(self.alpha, (list, np.ndarray)):
299            self.looe_ = []
300            for idx, alpha in enumerate(self.alpha):
301                G = self.K_ + alpha * np.eye(self.K_.shape[0])
302                G_inv = np.linalg.inv(G)
303                diag_G_inv = np.diag(G_inv)
304                looe = np.sum((self.dual_coefs_[idx] / diag_G_inv) ** 2)
305                self.looe_.append(looe)
306
307            # Select the best alpha
308            best_index = np.argmin(self.looe_)
309            self.alpha_ = self.alpha[best_index]
310            self.dual_coef_ = self.dual_coefs_[best_index]
311
312        return self

Kernel Ridge Regression with optional GPU support, Matérn kernels, and automatic input standardization.

Parameters:

  • alpha: float Regularization parameter.
  • kernel: str Kernel type ("linear", "rbf", or "matern").
  • gamma: float Kernel coefficient for "rbf". Ignored for other kernels.
  • nu: float Smoothness parameter for the Matérn kernel. Default is 1.5.
  • length_scale: float Length scale parameter for the Matérn kernel. Default is 1.0.
  • backend: str "cpu" or "gpu" (uses JAX if "gpu").
def fit(self, X, y):
132    def fit(self, X, y):
133        """
134        Fit the Kernel Ridge Regression model.
135
136        Parameters:
137        - X: array-like, shape (n_samples, n_features)
138            Training data.
139        - y: array-like, shape (n_samples,)
140            Target values.
141        """
142        # Standardize the inputs
143        X = self.scaler.fit_transform(X)
144        self.X_fit_ = X
145
146        # Center the response
147        self.y_mean_ = np.mean(y)
148        y_centered = y - self.y_mean_
149
150        n_samples = X.shape[0]
151
152        # Compute the kernel matrix
153        K = self._get_kernel(X, X)
154        self.K_ = K
155        self.y_fit_ = y_centered
156
157        if isinstance(self.alpha, (list, np.ndarray)):
158            # If alpha is a list or array, compute LOOE for each alpha
159            self.alphas_ = self.alpha  # Store the list of alphas
160            self.dual_coefs_ = []  # Store dual coefficients for each alpha
161            self.looe_ = []  # Store LOOE for each alpha
162
163            for alpha in self.alpha:
164                G = K + alpha * np.eye(n_samples)
165                G_inv = np.linalg.inv(G)
166                diag_G_inv = np.diag(G_inv)
167                dual_coef = np.linalg.solve(G, y_centered)
168                looe = np.sum((dual_coef / diag_G_inv) ** 2)  # Compute LOOE
169                self.dual_coefs_.append(dual_coef)
170                self.looe_.append(looe)
171
172            # Select the best alpha based on the smallest LOOE
173            best_index = np.argmin(self.looe_)
174            self.alpha_ = self.alpha[best_index]
175            self.dual_coef_ = self.dual_coefs_[best_index]
176        else:
177            # If alpha is a single value, proceed as usual
178            if self.backend == "gpu":
179                self.dual_coef_ = jnp.linalg.solve(
180                    K + self.alpha * jnp.eye(n_samples), y_centered
181                )
182            else:
183                self.dual_coef_ = np.linalg.solve(
184                    K + self.alpha * np.eye(n_samples), y_centered
185                )
186
187        return self

Fit the Kernel Ridge Regression model.

Parameters:

  • X: array-like, shape (n_samples, n_features) Training data.
  • y: array-like, shape (n_samples,) Target values.
def predict(self, X, probs=False):
189    def predict(self, X, probs=False):
190        """
191        Predict using the Kernel Ridge Regression model.
192
193        Parameters:
194        - X: array-like, shape (n_samples, n_features)
195            Test data.
196
197        Returns:
198        - Predicted values, shape (n_samples,).
199        """
200        # Standardize the inputs
201        X = self.scaler.transform(X)
202        K = self._get_kernel(X, self.X_fit_)
203        if self.backend == "gpu":
204            preds = jnp.dot(K, self.dual_coef_) + self.y_mean_
205            if probs:
206                # Compute similarity to self.X_fit_
207                similarities = jnp.dot(
208                    preds, self.X_fit_.T
209                )  # Shape: (n_samples, n_fit_)
210                # Apply softmax to get probabilities
211                return jaxsoftmax(similarities, axis=1)
212            return preds
213        else:
214            preds = np.dot(K, self.dual_coef_) + self.y_mean_
215            if probs:
216                # Compute similarity to self.X_fit_
217                similarities = np.dot(
218                    preds, self.X_fit_.T
219                )  # Shape: (n_samples, n_fit_)
220                # Apply softmax to get probabilities
221                return softmax(similarities, axis=1)
222            return preds

Predict using the Kernel Ridge Regression model.

Parameters:

  • X: array-like, shape (n_samples, n_features) Test data.

Returns:

  • Predicted values, shape (n_samples,).
class LazyClassifier(nnetsauce.LazyDeepClassifier):
757class LazyClassifier(LazyDeepClassifier):
758    """
759        Fitting -- almost -- all the classification algorithms with
760        nnetsauce's CustomClassifier and returning their scores (no layers).
761
762    Parameters:
763
764        verbose: int, optional (default=0)
765            Any positive number for verbosity.
766
767        ignore_warnings: bool, optional (default=True)
768            When set to True, the warning related to algorigms that are not able to run are ignored.
769
770        custom_metric: function, optional (default=None)
771            When function is provided, models are evaluated based on the custom evaluation metric provided.
772
773        predictions: bool, optional (default=False)
774            When set to True, the predictions of all the models models are returned as dataframe.
775
776        sort_by: string, optional (default='Accuracy')
777            Sort models by a metric. Available options are 'Accuracy', 'Balanced Accuracy', 'ROC AUC', 'F1 Score'
778            or a custom metric identified by its name and provided by custom_metric.
779
780        random_state: int, optional (default=42)
781            Reproducibiility seed.
782
783        estimators: list, optional (default='all')
784            list of Estimators names or just 'all' (default='all')
785
786        preprocess: bool
787            preprocessing is done when set to True
788
789        n_jobs : int, when possible, run in parallel
790            For now, only used by individual models that support it.
791
792        All the other parameters are the same as CustomClassifier's.
793
794    Attributes:
795
796        models_: dict-object
797            Returns a dictionary with each model pipeline as value
798            with key as name of models.
799
800        best_model_: object
801            Returns the best model pipeline based on the sort_by metric.
802
803    Examples:
804
805        import nnetsauce as ns
806        import numpy as np
807        from sklearn import datasets
808        from sklearn.utils import shuffle
809
810        dataset = datasets.load_iris()
811        X = dataset.data
812        y = dataset.target
813        X, y = shuffle(X, y, random_state=123)
814        X = X.astype(np.float32)
815        y = y.astype(np.float32)
816        X_train, X_test = X[:100], X[100:]
817        y_train, y_test = y[:100], y[100:]
818
819        clf = ns.LazyClassifier(verbose=0, ignore_warnings=True, custom_metric=None)
820        models, predictions = clf.fit(X_train, X_test, y_train, y_test)
821        model_dictionary = clf.provide_models(X_train,X_test,y_train,y_test)
822        print(models)
823
824    """
825
826    def __init__(
827        self,
828        verbose=0,
829        ignore_warnings=True,
830        custom_metric=None,
831        predictions=False,
832        sort_by="Accuracy",
833        random_state=42,
834        estimators="all",
835        preprocess=False,
836        n_jobs=None,
837        # CustomClassifier attributes
838        obj=None,
839        n_hidden_features=5,
840        activation_name="relu",
841        a=0.01,
842        nodes_sim="sobol",
843        bias=True,
844        dropout=0,
845        direct_link=True,
846        n_clusters=2,
847        cluster_encode=True,
848        type_clust="kmeans",
849        type_scaling=("std", "std", "std"),
850        col_sample=1,
851        row_sample=1,
852        seed=123,
853        backend="cpu",
854    ):
855        super().__init__(
856            verbose=verbose,
857            ignore_warnings=ignore_warnings,
858            custom_metric=custom_metric,
859            predictions=predictions,
860            sort_by=sort_by,
861            random_state=random_state,
862            estimators=estimators,
863            preprocess=preprocess,
864            n_jobs=n_jobs,
865            n_layers=1,
866            obj=obj,
867            n_hidden_features=n_hidden_features,
868            activation_name=activation_name,
869            a=a,
870            nodes_sim=nodes_sim,
871            bias=bias,
872            dropout=dropout,
873            direct_link=direct_link,
874            n_clusters=n_clusters,
875            cluster_encode=cluster_encode,
876            type_clust=type_clust,
877            type_scaling=type_scaling,
878            col_sample=col_sample,
879            row_sample=row_sample,
880            seed=seed,
881            backend=backend,
882        )

Fitting -- almost -- all the classification algorithms with nnetsauce's CustomClassifier and returning their scores (no layers).

Parameters:

verbose: int, optional (default=0)
    Any positive number for verbosity.

ignore_warnings: bool, optional (default=True)
    When set to True, the warning related to algorigms that are not able to run are ignored.

custom_metric: function, optional (default=None)
    When function is provided, models are evaluated based on the custom evaluation metric provided.

predictions: bool, optional (default=False)
    When set to True, the predictions of all the models models are returned as dataframe.

sort_by: string, optional (default='Accuracy')
    Sort models by a metric. Available options are 'Accuracy', 'Balanced Accuracy', 'ROC AUC', 'F1 Score'
    or a custom metric identified by its name and provided by custom_metric.

random_state: int, optional (default=42)
    Reproducibiility seed.

estimators: list, optional (default='all')
    list of Estimators names or just 'all' (default='all')

preprocess: bool
    preprocessing is done when set to True

n_jobs : int, when possible, run in parallel
    For now, only used by individual models that support it.

All the other parameters are the same as CustomClassifier's.

Attributes:

models_: dict-object
    Returns a dictionary with each model pipeline as value
    with key as name of models.

best_model_: object
    Returns the best model pipeline based on the sort_by metric.

Examples:

import nnetsauce as ns
import numpy as np
from sklearn import datasets
from sklearn.utils import shuffle

dataset = datasets.load_iris()
X = dataset.data
y = dataset.target
X, y = shuffle(X, y, random_state=123)
X = X.astype(np.float32)
y = y.astype(np.float32)
X_train, X_test = X[:100], X[100:]
y_train, y_test = y[:100], y[100:]

clf = ns.LazyClassifier(verbose=0, ignore_warnings=True, custom_metric=None)
models, predictions = clf.fit(X_train, X_test, y_train, y_test)
model_dictionary = clf.provide_models(X_train,X_test,y_train,y_test)
print(models)
class LazyRegressor(nnetsauce.LazyDeepRegressor):
646class LazyRegressor(LazyDeepRegressor):
647    """
648        Fitting -- almost -- all the regression algorithms with
649        nnetsauce's CustomRegressor and returning their scores.
650
651    Parameters:
652
653        verbose: int, optional (default=0)
654            Any positive number for verbosity.
655
656        ignore_warnings: bool, optional (default=True)
657            When set to True, the warning related to algorigms that are not able to run are ignored.
658
659        custom_metric: function, optional (default=None)
660            When function is provided, models are evaluated based on the custom evaluation metric provided.
661
662        predictions: bool, optional (default=False)
663            When set to True, the predictions of all the models models are returned as dataframe.
664
665        sort_by: string, optional (default='RMSE')
666            Sort models by a metric. Available options are 'R-Squared', 'Adjusted R-Squared', 'RMSE', 'Time Taken' and 'Custom Metric'.
667            or a custom metric identified by its name and provided by custom_metric.
668
669        random_state: int, optional (default=42)
670            Reproducibiility seed.
671
672        estimators: list, optional (default='all')
673            list of Estimators names or just 'all' (default='all')
674
675        preprocess: bool
676            preprocessing is done when set to True
677
678        n_jobs : int, when possible, run in parallel
679            For now, only used by individual models that support it.
680
681        All the other parameters are the same as CustomRegressor's.
682
683    Attributes:
684
685        models_: dict-object
686            Returns a dictionary with each model pipeline as value
687            with key as name of models.
688
689        best_model_: object
690            Returns the best model pipeline based on the sort_by metric.
691
692    Examples:
693
694        import nnetsauce as ns
695        import numpy as np
696        from sklearn import datasets
697        from sklearn.utils import shuffle
698
699        diabetes = datasets.load_diabetes()
700        X, y = shuffle(diabetes.data, diabetes.target, random_state=13)
701        X = X.astype(np.float32)
702
703        offset = int(X.shape[0] * 0.9)
704        X_train, y_train = X[:offset], y[:offset]
705        X_test, y_test = X[offset:], y[offset:]
706
707        reg = ns.LazyRegressor(verbose=0, ignore_warnings=False,
708                            custom_metric=None)
709        models, predictions = reg.fit(X_train, X_test, y_train, y_test)
710        print(models)
711
712    """
713
714    def __init__(
715        self,
716        verbose=0,
717        ignore_warnings=True,
718        custom_metric=None,
719        predictions=False,
720        sort_by="RMSE",
721        random_state=42,
722        estimators="all",
723        preprocess=False,
724        n_jobs=None,
725        # CustomRegressor attributes
726        obj=None,
727        n_hidden_features=5,
728        activation_name="relu",
729        a=0.01,
730        nodes_sim="sobol",
731        bias=True,
732        dropout=0,
733        direct_link=True,
734        n_clusters=2,
735        cluster_encode=True,
736        type_clust="kmeans",
737        type_scaling=("std", "std", "std"),
738        col_sample=1,
739        row_sample=1,
740        seed=123,
741        backend="cpu",
742    ):
743        super().__init__(
744            verbose=verbose,
745            ignore_warnings=ignore_warnings,
746            custom_metric=custom_metric,
747            predictions=predictions,
748            sort_by=sort_by,
749            random_state=random_state,
750            estimators=estimators,
751            preprocess=preprocess,
752            n_jobs=n_jobs,
753            n_layers=1,
754            obj=obj,
755            n_hidden_features=n_hidden_features,
756            activation_name=activation_name,
757            a=a,
758            nodes_sim=nodes_sim,
759            bias=bias,
760            dropout=dropout,
761            direct_link=direct_link,
762            n_clusters=n_clusters,
763            cluster_encode=cluster_encode,
764            type_clust=type_clust,
765            type_scaling=type_scaling,
766            col_sample=col_sample,
767            row_sample=row_sample,
768            seed=seed,
769            backend=backend,
770        )

Fitting -- almost -- all the regression algorithms with nnetsauce's CustomRegressor and returning their scores.

Parameters:

verbose: int, optional (default=0)
    Any positive number for verbosity.

ignore_warnings: bool, optional (default=True)
    When set to True, the warning related to algorigms that are not able to run are ignored.

custom_metric: function, optional (default=None)
    When function is provided, models are evaluated based on the custom evaluation metric provided.

predictions: bool, optional (default=False)
    When set to True, the predictions of all the models models are returned as dataframe.

sort_by: string, optional (default='RMSE')
    Sort models by a metric. Available options are 'R-Squared', 'Adjusted R-Squared', 'RMSE', 'Time Taken' and 'Custom Metric'.
    or a custom metric identified by its name and provided by custom_metric.

random_state: int, optional (default=42)
    Reproducibiility seed.

estimators: list, optional (default='all')
    list of Estimators names or just 'all' (default='all')

preprocess: bool
    preprocessing is done when set to True

n_jobs : int, when possible, run in parallel
    For now, only used by individual models that support it.

All the other parameters are the same as CustomRegressor's.

Attributes:

models_: dict-object
    Returns a dictionary with each model pipeline as value
    with key as name of models.

best_model_: object
    Returns the best model pipeline based on the sort_by metric.

Examples:

import nnetsauce as ns
import numpy as np
from sklearn import datasets
from sklearn.utils import shuffle

diabetes = datasets.load_diabetes()
X, y = shuffle(diabetes.data, diabetes.target, random_state=13)
X = X.astype(np.float32)

offset = int(X.shape[0] * 0.9)
X_train, y_train = X[:offset], y[:offset]
X_test, y_test = X[offset:], y[offset:]

reg = ns.LazyRegressor(verbose=0, ignore_warnings=False,
                    custom_metric=None)
models, predictions = reg.fit(X_train, X_test, y_train, y_test)
print(models)
class LazyDeepClassifier(nnetsauce.custom.custom.Custom, sklearn.base.ClassifierMixin):
 94class LazyDeepClassifier(Custom, ClassifierMixin):
 95    """
 96
 97    Fitting -- almost -- all the classification algorithms with layers of
 98    nnetsauce's CustomClassifier and returning their scores.
 99
100    Parameters:
101
102        verbose: int, optional (default=0)
103            Any positive number for verbosity.
104
105        ignore_warnings: bool, optional (default=True)
106            When set to True, the warning related to algorigms that are not
107            able to run are ignored.
108
109        custom_metric: function, optional (default=None)
110            When function is provided, models are evaluated based on the custom
111              evaluation metric provided.
112
113        predictions: bool, optional (default=False)
114            When set to True, the predictions of all the models models are
115            returned as data frame.
116
117        sort_by: string, optional (default='Accuracy')
118            Sort models by a metric. Available options are 'Accuracy',
119            'Balanced Accuracy', 'ROC AUC', 'F1 Score' or a custom metric
120            identified by its name and provided by custom_metric.
121
122        random_state: int, optional (default=42)
123            Reproducibiility seed.
124
125        estimators: list, optional (default='all')
126            list of Estimators names or just 'all' for > 90 classifiers
127            (default='all')
128
129        preprocess: bool, preprocessing is done when set to True
130
131        n_jobs: int, when possible, run in parallel
132            For now, only used by individual models that support it.
133
134        n_layers: int, optional (default=3)
135            Number of layers of CustomClassifiers to be used.
136
137        All the other parameters are the same as CustomClassifier's.
138
139    Attributes:
140
141        models_: dict-object
142            Returns a dictionary with each model pipeline as value
143            with key as name of models.
144
145        best_model_: object
146            Returns the best model pipeline.
147
148    Examples
149
150        ```python
151        import nnetsauce as ns
152        from sklearn.datasets import load_breast_cancer
153        from sklearn.model_selection import train_test_split
154        data = load_breast_cancer()
155        X = data.data
156        y= data.target
157        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2,
158            random_state=123)
159        clf = ns.LazyDeepClassifier(verbose=0, ignore_warnings=True, custom_metric=None)
160        models, predictions = clf.fit(X_train, X_test, y_train, y_test)
161        model_dictionary = clf.provide_models(X_train,X_test,y_train,y_test)
162        print(models)
163        ```
164
165    """
166
167    def __init__(
168        self,
169        verbose=0,
170        ignore_warnings=True,
171        custom_metric=None,
172        predictions=False,
173        sort_by="Accuracy",
174        random_state=42,
175        estimators="all",
176        preprocess=False,
177        n_jobs=None,
178        # Defining depth
179        n_layers=3,
180        # CustomClassifier attributes
181        obj=None,
182        n_hidden_features=5,
183        activation_name="relu",
184        a=0.01,
185        nodes_sim="sobol",
186        bias=True,
187        dropout=0,
188        direct_link=True,
189        n_clusters=2,
190        cluster_encode=True,
191        type_clust="kmeans",
192        type_scaling=("std", "std", "std"),
193        col_sample=1,
194        row_sample=1,
195        seed=123,
196        backend="cpu",
197    ):
198        self.verbose = verbose
199        self.ignore_warnings = ignore_warnings
200        self.custom_metric = custom_metric
201        self.predictions = predictions
202        self.sort_by = sort_by
203        self.models_ = {}
204        self.best_model_ = None
205        self.random_state = random_state
206        self.estimators = estimators
207        self.preprocess = preprocess
208        self.n_layers = n_layers - 1
209        self.n_jobs = n_jobs
210        super().__init__(
211            obj=obj,
212            n_hidden_features=n_hidden_features,
213            activation_name=activation_name,
214            a=a,
215            nodes_sim=nodes_sim,
216            bias=bias,
217            dropout=dropout,
218            direct_link=direct_link,
219            n_clusters=n_clusters,
220            cluster_encode=cluster_encode,
221            type_clust=type_clust,
222            type_scaling=type_scaling,
223            col_sample=col_sample,
224            row_sample=row_sample,
225            seed=seed,
226            backend=backend,
227        )
228
229    def fit(self, X_train, X_test, y_train, y_test):
230        """Fit classifiers to X_train and y_train, predict and score on X_test,
231        y_test.
232
233        Parameters:
234
235            X_train: array-like,
236                Training vectors, where rows is the number of samples
237                and columns is the number of features.
238
239            X_test: array-like,
240                Testing vectors, where rows is the number of samples
241                and columns is the number of features.
242
243            y_train: array-like,
244                Training vectors, where rows is the number of samples
245                and columns is the number of features.
246
247            y_test: array-like,
248                Testing vectors, where rows is the number of samples
249                and columns is the number of features.
250
251        Returns:
252
253            scores: Pandas DataFrame
254                Returns metrics of all the models in a Pandas DataFrame.
255
256            predictions: Pandas DataFrame
257                Returns predictions of all the models in a Pandas DataFrame.
258        """
259        Accuracy = []
260        B_Accuracy = []
261        ROC_AUC = []
262        F1 = []
263        names = []
264        TIME = []
265        predictions = {}
266
267        if self.custom_metric is not None:
268            CUSTOM_METRIC = []
269
270        if isinstance(X_train, np.ndarray):
271            X_train = pd.DataFrame(X_train)
272            X_test = pd.DataFrame(X_test)
273
274        numeric_features = X_train.select_dtypes(include=[np.number]).columns
275        categorical_features = X_train.select_dtypes(include=["object"]).columns
276
277        categorical_low, categorical_high = get_card_split(
278            X_train, categorical_features
279        )
280
281        if self.preprocess is True:
282            preprocessor = ColumnTransformer(
283                transformers=[
284                    ("numeric", numeric_transformer, numeric_features),
285                    (
286                        "categorical_low",
287                        categorical_transformer_low,
288                        categorical_low,
289                    ),
290                    (
291                        "categorical_high",
292                        categorical_transformer_high,
293                        categorical_high,
294                    ),
295                ]
296            )
297
298        # baseline models
299        try:
300            baseline_names = ["RandomForestClassifier", "XGBClassifier"]
301            baseline_models = [RandomForestClassifier(), xgb.XGBClassifier()]
302        except Exception as exception:
303            baseline_names = ["RandomForestClassifier"]
304            baseline_models = [RandomForestClassifier()]
305
306        for name, model in zip(baseline_names, baseline_models):
307            start = time.time()
308            try:
309                model.fit(X_train, y_train)
310                self.models_[name] = model
311                y_pred = model.predict(X_test)
312                accuracy = accuracy_score(y_test, y_pred, normalize=True)
313                b_accuracy = balanced_accuracy_score(y_test, y_pred)
314                f1 = f1_score(y_test, y_pred, average="weighted")
315                try:
316                    roc_auc = roc_auc_score(y_test, y_pred)
317                except Exception as exception:
318                    roc_auc = None
319                    if self.ignore_warnings is False:
320                        print("ROC AUC couldn't be calculated for " + name)
321                        print(exception)
322                names.append(name)
323                Accuracy.append(accuracy)
324                B_Accuracy.append(b_accuracy)
325                ROC_AUC.append(roc_auc)
326                F1.append(f1)
327                TIME.append(time.time() - start)
328                if self.custom_metric is not None:
329                    custom_metric = self.custom_metric(y_test, y_pred)
330                    CUSTOM_METRIC.append(custom_metric)
331                if self.verbose > 0:
332                    if self.custom_metric is not None:
333                        print(
334                            {
335                                "Model": name,
336                                "Accuracy": accuracy,
337                                "Balanced Accuracy": b_accuracy,
338                                "ROC AUC": roc_auc,
339                                "F1 Score": f1,
340                                self.custom_metric.__name__: custom_metric,
341                                "Time taken": time.time() - start,
342                            }
343                        )
344                    else:
345                        print(
346                            {
347                                "Model": name,
348                                "Accuracy": accuracy,
349                                "Balanced Accuracy": b_accuracy,
350                                "ROC AUC": roc_auc,
351                                "F1 Score": f1,
352                                "Time taken": time.time() - start,
353                            }
354                        )
355                if self.predictions:
356                    predictions[name] = y_pred
357            except Exception as exception:
358                 if self.ignore_warnings is False:
359                     print(name + " model failed to execute")
360                     print(exception)
361
362        if self.estimators == "all":
363            self.classifiers = [
364                item
365                for sublist in [
366                    DEEPCLASSIFIERS,
367                    DEEPMULTITASKCLASSIFIERS,
368                    DEEPSIMPLEMULTITASKCLASSIFIERS,
369                ]
370                for item in sublist
371            ]
372        else:
373            self.classifiers = (
374                [
375                    ("DeepCustomClassifier(" + est[0] + ")", est[1])
376                    for est in all_estimators()
377                    if (
378                        issubclass(est[1], ClassifierMixin)
379                        and (est[0] in self.estimators)
380                    )
381                ]
382                + [
383                    (
384                        "DeepMultitaskClassifier(" + est[0] + ")",
385                        partial(MultitaskClassifier, obj=est[1]()),
386                    )
387                    for est in all_estimators()
388                    if (
389                        issubclass(est[1], RegressorMixin)
390                        and (est[0] in self.estimators)
391                    )
392                ]
393                + [
394                    (
395                        "DeepSimpleMultitaskClassifier(" + est[0] + ")",
396                        partial(SimpleMultitaskClassifier, obj=est[1]()),
397                    )
398                    for est in all_estimators()
399                    if (
400                        issubclass(est[1], RegressorMixin)
401                        and (est[0] in self.estimators)
402                    )
403                ]
404            )
405
406        if self.preprocess is True:
407
408            for name, model in tqdm(self.classifiers):  # do parallel exec
409
410                other_args = {}  # use this trick for `random_state` too --> refactor
411                try:
412                    if (
413                        "n_jobs" in model().get_params().keys()
414                        and name.find("LogisticRegression") == -1
415                    ):
416                        other_args["n_jobs"] = self.n_jobs
417                except Exception:
418                    pass
419
420                start = time.time()
421
422                try:
423                    if "random_state" in model().get_params().keys():
424                        layer_clf = CustomClassifier(
425                            obj=model(random_state=self.random_state),
426                            n_hidden_features=self.n_hidden_features,
427                            activation_name=self.activation_name,
428                            a=self.a,
429                            nodes_sim=self.nodes_sim,
430                            bias=self.bias,
431                            dropout=self.dropout,
432                            direct_link=self.direct_link,
433                            n_clusters=self.n_clusters,
434                            cluster_encode=self.cluster_encode,
435                            type_clust=self.type_clust,
436                            type_scaling=self.type_scaling,
437                            col_sample=self.col_sample,
438                            row_sample=self.row_sample,
439                            seed=self.seed,
440                            backend=self.backend,
441                            cv_calibration=None,
442                        )
443
444                    else:
445                        layer_clf = CustomClassifier(
446                            obj=model(),
447                            n_hidden_features=self.n_hidden_features,
448                            activation_name=self.activation_name,
449                            a=self.a,
450                            nodes_sim=self.nodes_sim,
451                            bias=self.bias,
452                            dropout=self.dropout,
453                            direct_link=self.direct_link,
454                            n_clusters=self.n_clusters,
455                            cluster_encode=self.cluster_encode,
456                            type_clust=self.type_clust,
457                            type_scaling=self.type_scaling,
458                            col_sample=self.col_sample,
459                            row_sample=self.row_sample,
460                            seed=self.seed,
461                            backend=self.backend,
462                            cv_calibration=None,
463                        )
464
465                    layer_clf.fit(X_train, y_train)
466
467                    for _ in range(self.n_layers):
468                        layer_clf = deepcopy(
469                            CustomClassifier(
470                                obj=layer_clf,
471                                n_hidden_features=self.n_hidden_features,
472                                activation_name=self.activation_name,
473                                a=self.a,
474                                nodes_sim=self.nodes_sim,
475                                bias=self.bias,
476                                dropout=self.dropout,
477                                direct_link=self.direct_link,
478                                n_clusters=self.n_clusters,
479                                cluster_encode=self.cluster_encode,
480                                type_clust=self.type_clust,
481                                type_scaling=self.type_scaling,
482                                col_sample=self.col_sample,
483                                row_sample=self.row_sample,
484                                seed=self.seed,
485                                backend=self.backend,
486                                cv_calibration=None,
487                            )
488                        )
489
490                    pipe = Pipeline(
491                        [
492                            ("preprocessor", preprocessor),
493                            ("classifier", layer_clf),
494                        ]
495                    )
496
497                    pipe.fit(X_train, y_train)
498                    self.models_[name] = pipe
499                    y_pred = pipe.predict(X_test)
500                    accuracy = accuracy_score(y_test, y_pred, normalize=True)
501                    b_accuracy = balanced_accuracy_score(y_test, y_pred)
502                    f1 = f1_score(y_test, y_pred, average="weighted")
503                    try:
504                        roc_auc = roc_auc_score(y_test, y_pred)
505                    except Exception as exception:
506                        roc_auc = None
507                        if self.ignore_warnings is False:
508                            print("ROC AUC couldn't be calculated for " + name)
509                            print(exception)
510                    names.append(name)
511                    Accuracy.append(accuracy)
512                    B_Accuracy.append(b_accuracy)
513                    ROC_AUC.append(roc_auc)
514                    F1.append(f1)
515                    TIME.append(time.time() - start)
516                    if self.custom_metric is not None:
517                        custom_metric = self.custom_metric(y_test, y_pred)
518                        CUSTOM_METRIC.append(custom_metric)
519                    if self.verbose > 0:
520                        if self.custom_metric is not None:
521                            print(
522                                {
523                                    "Model": name,
524                                    "Accuracy": accuracy,
525                                    "Balanced Accuracy": b_accuracy,
526                                    "ROC AUC": roc_auc,
527                                    "F1 Score": f1,
528                                    self.custom_metric.__name__: custom_metric,
529                                    "Time taken": time.time() - start,
530                                }
531                            )
532                        else:
533                            print(
534                                {
535                                    "Model": name,
536                                    "Accuracy": accuracy,
537                                    "Balanced Accuracy": b_accuracy,
538                                    "ROC AUC": roc_auc,
539                                    "F1 Score": f1,
540                                    "Time taken": time.time() - start,
541                                }
542                            )
543                    if self.predictions:
544                        predictions[name] = y_pred
545                except Exception as exception:
546                     if self.ignore_warnings is False:
547                         print(name + " model failed to execute")
548                         print(exception)
549
550        else:  # no preprocessing
551
552            for name, model in tqdm(self.classifiers):  # do parallel exec
553                start = time.time()
554                try:
555                    if "random_state" in model().get_params().keys():
556                        layer_clf = CustomClassifier(
557                            obj=model(random_state=self.random_state),
558                            n_hidden_features=self.n_hidden_features,
559                            activation_name=self.activation_name,
560                            a=self.a,
561                            nodes_sim=self.nodes_sim,
562                            bias=self.bias,
563                            dropout=self.dropout,
564                            direct_link=self.direct_link,
565                            n_clusters=self.n_clusters,
566                            cluster_encode=self.cluster_encode,
567                            type_clust=self.type_clust,
568                            type_scaling=self.type_scaling,
569                            col_sample=self.col_sample,
570                            row_sample=self.row_sample,
571                            seed=self.seed,
572                            backend=self.backend,
573                            cv_calibration=None,
574                        )
575
576                    else:
577                        layer_clf = CustomClassifier(
578                            obj=model(),
579                            n_hidden_features=self.n_hidden_features,
580                            activation_name=self.activation_name,
581                            a=self.a,
582                            nodes_sim=self.nodes_sim,
583                            bias=self.bias,
584                            dropout=self.dropout,
585                            direct_link=self.direct_link,
586                            n_clusters=self.n_clusters,
587                            cluster_encode=self.cluster_encode,
588                            type_clust=self.type_clust,
589                            type_scaling=self.type_scaling,
590                            col_sample=self.col_sample,
591                            row_sample=self.row_sample,
592                            seed=self.seed,
593                            backend=self.backend,
594                            cv_calibration=None,
595                        )
596
597                    layer_clf.fit(X_train, y_train)
598
599                    for _ in range(self.n_layers):
600                        layer_clf = deepcopy(
601                            CustomClassifier(
602                                obj=layer_clf,
603                                n_hidden_features=self.n_hidden_features,
604                                activation_name=self.activation_name,
605                                a=self.a,
606                                nodes_sim=self.nodes_sim,
607                                bias=self.bias,
608                                dropout=self.dropout,
609                                direct_link=self.direct_link,
610                                n_clusters=self.n_clusters,
611                                cluster_encode=self.cluster_encode,
612                                type_clust=self.type_clust,
613                                type_scaling=self.type_scaling,
614                                col_sample=self.col_sample,
615                                row_sample=self.row_sample,
616                                seed=self.seed,
617                                backend=self.backend,
618                                cv_calibration=None,
619                            )
620                        )
621
622                        # layer_clf.fit(X_train, y_train)
623
624                    layer_clf.fit(X_train, y_train)
625
626                    self.models_[name] = layer_clf
627                    y_pred = layer_clf.predict(X_test)
628                    accuracy = accuracy_score(y_test, y_pred, normalize=True)
629                    b_accuracy = balanced_accuracy_score(y_test, y_pred)
630                    f1 = f1_score(y_test, y_pred, average="weighted")
631                    try:
632                        roc_auc = roc_auc_score(y_test, y_pred)
633                    except Exception as exception:
634                        roc_auc = None
635                        if self.ignore_warnings is False:
636                            print("ROC AUC couldn't be calculated for " + name)
637                            print(exception)
638                    names.append(name)
639                    Accuracy.append(accuracy)
640                    B_Accuracy.append(b_accuracy)
641                    ROC_AUC.append(roc_auc)
642                    F1.append(f1)
643                    TIME.append(time.time() - start)
644                    if self.custom_metric is not None:
645                        custom_metric = self.custom_metric(y_test, y_pred)
646                        CUSTOM_METRIC.append(custom_metric)
647                    if self.verbose > 0:
648                        if self.custom_metric is not None:
649                            print(
650                                {
651                                    "Model": name,
652                                    "Accuracy": accuracy,
653                                    "Balanced Accuracy": b_accuracy,
654                                    "ROC AUC": roc_auc,
655                                    "F1 Score": f1,
656                                    self.custom_metric.__name__: custom_metric,
657                                    "Time taken": time.time() - start,
658                                }
659                            )
660                        else:
661                            print(
662                                {
663                                    "Model": name,
664                                    "Accuracy": accuracy,
665                                    "Balanced Accuracy": b_accuracy,
666                                    "ROC AUC": roc_auc,
667                                    "F1 Score": f1,
668                                    "Time taken": time.time() - start,
669                                }
670                            )
671                    if self.predictions:
672                        predictions[name] = y_pred
673                except Exception as exception:
674                     if self.ignore_warnings is False:
675                         print(name + " model failed to execute")
676                         print(exception)
677
678        if self.custom_metric is None:
679            scores = pd.DataFrame(
680                {
681                    "Model": names,
682                    "Accuracy": Accuracy,
683                    "Balanced Accuracy": B_Accuracy,
684                    "ROC AUC": ROC_AUC,
685                    "F1 Score": F1,
686                    "Time Taken": TIME,
687                }
688            )
689        else:
690            scores = pd.DataFrame(
691                {
692                    "Model": names,
693                    "Accuracy": Accuracy,
694                    "Balanced Accuracy": B_Accuracy,
695                    "ROC AUC": ROC_AUC,
696                    "F1 Score": F1,
697                    "Custom metric": CUSTOM_METRIC,
698                    "Time Taken": TIME,
699                }
700            )
701        scores = scores.sort_values(by=self.sort_by, ascending=False).set_index("Model")
702
703        self.best_model_ = self.models_[scores.index[0]]
704
705        if self.predictions is True:
706
707            return scores, predictions
708
709        return scores
710
711    def get_best_model(self):
712        """
713        This function returns the best model pipeline based on the sort_by metric.
714
715        Returns:
716
717            best_model: object,
718                Returns the best model pipeline based on the sort_by metric.
719
720        """
721        return self.best_model_
722
723    def provide_models(self, X_train, X_test, y_train, y_test):
724        """Returns all the model objects trained. If fit hasn't been called yet,
725        then it's called to return the models.
726
727        Parameters:
728
729        X_train: array-like,
730            Training vectors, where rows is the number of samples
731            and columns is the number of features.
732
733        X_test: array-like,
734            Testing vectors, where rows is the number of samples
735            and columns is the number of features.
736
737        y_train: array-like,
738            Training vectors, where rows is the number of samples
739            and columns is the number of features.
740
741        y_test: array-like,
742            Testing vectors, where rows is the number of samples
743            and columns is the number of features.
744
745        Returns:
746
747            models: dict-object,
748                Returns a dictionary with each model's pipeline as value
749                and key = name of the model.
750        """
751        if len(self.models_.keys()) == 0:
752            self.fit(X_train, X_test, y_train, y_test)
753
754        return self.models_

Fitting -- almost -- all the classification algorithms with layers of nnetsauce's CustomClassifier and returning their scores.

Parameters:

verbose: int, optional (default=0)
    Any positive number for verbosity.

ignore_warnings: bool, optional (default=True)
    When set to True, the warning related to algorigms that are not
    able to run are ignored.

custom_metric: function, optional (default=None)
    When function is provided, models are evaluated based on the custom
      evaluation metric provided.

predictions: bool, optional (default=False)
    When set to True, the predictions of all the models models are
    returned as data frame.

sort_by: string, optional (default='Accuracy')
    Sort models by a metric. Available options are 'Accuracy',
    'Balanced Accuracy', 'ROC AUC', 'F1 Score' or a custom metric
    identified by its name and provided by custom_metric.

random_state: int, optional (default=42)
    Reproducibiility seed.

estimators: list, optional (default='all')
    list of Estimators names or just 'all' for > 90 classifiers
    (default='all')

preprocess: bool, preprocessing is done when set to True

n_jobs: int, when possible, run in parallel
    For now, only used by individual models that support it.

n_layers: int, optional (default=3)
    Number of layers of CustomClassifiers to be used.

All the other parameters are the same as CustomClassifier's.

Attributes:

models_: dict-object
    Returns a dictionary with each model pipeline as value
    with key as name of models.

best_model_: object
    Returns the best model pipeline.

Examples

import nnetsauce as ns
    from sklearn.datasets import load_breast_cancer
    from sklearn.model_selection import train_test_split
    data = load_breast_cancer()
    X = data.data
    y= data.target
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2,
        random_state=123)
    clf = ns.LazyDeepClassifier(verbose=0, ignore_warnings=True, custom_metric=None)
    models, predictions = clf.fit(X_train, X_test, y_train, y_test)
    model_dictionary = clf.provide_models(X_train,X_test,y_train,y_test)
    print(models)
    

def fit(self, X_train, X_test, y_train, y_test):
229    def fit(self, X_train, X_test, y_train, y_test):
230        """Fit classifiers to X_train and y_train, predict and score on X_test,
231        y_test.
232
233        Parameters:
234
235            X_train: array-like,
236                Training vectors, where rows is the number of samples
237                and columns is the number of features.
238
239            X_test: array-like,
240                Testing vectors, where rows is the number of samples
241                and columns is the number of features.
242
243            y_train: array-like,
244                Training vectors, where rows is the number of samples
245                and columns is the number of features.
246
247            y_test: array-like,
248                Testing vectors, where rows is the number of samples
249                and columns is the number of features.
250
251        Returns:
252
253            scores: Pandas DataFrame
254                Returns metrics of all the models in a Pandas DataFrame.
255
256            predictions: Pandas DataFrame
257                Returns predictions of all the models in a Pandas DataFrame.
258        """
259        Accuracy = []
260        B_Accuracy = []
261        ROC_AUC = []
262        F1 = []
263        names = []
264        TIME = []
265        predictions = {}
266
267        if self.custom_metric is not None:
268            CUSTOM_METRIC = []
269
270        if isinstance(X_train, np.ndarray):
271            X_train = pd.DataFrame(X_train)
272            X_test = pd.DataFrame(X_test)
273
274        numeric_features = X_train.select_dtypes(include=[np.number]).columns
275        categorical_features = X_train.select_dtypes(include=["object"]).columns
276
277        categorical_low, categorical_high = get_card_split(
278            X_train, categorical_features
279        )
280
281        if self.preprocess is True:
282            preprocessor = ColumnTransformer(
283                transformers=[
284                    ("numeric", numeric_transformer, numeric_features),
285                    (
286                        "categorical_low",
287                        categorical_transformer_low,
288                        categorical_low,
289                    ),
290                    (
291                        "categorical_high",
292                        categorical_transformer_high,
293                        categorical_high,
294                    ),
295                ]
296            )
297
298        # baseline models
299        try:
300            baseline_names = ["RandomForestClassifier", "XGBClassifier"]
301            baseline_models = [RandomForestClassifier(), xgb.XGBClassifier()]
302        except Exception as exception:
303            baseline_names = ["RandomForestClassifier"]
304            baseline_models = [RandomForestClassifier()]
305
306        for name, model in zip(baseline_names, baseline_models):
307            start = time.time()
308            try:
309                model.fit(X_train, y_train)
310                self.models_[name] = model
311                y_pred = model.predict(X_test)
312                accuracy = accuracy_score(y_test, y_pred, normalize=True)
313                b_accuracy = balanced_accuracy_score(y_test, y_pred)
314                f1 = f1_score(y_test, y_pred, average="weighted")
315                try:
316                    roc_auc = roc_auc_score(y_test, y_pred)
317                except Exception as exception:
318                    roc_auc = None
319                    if self.ignore_warnings is False:
320                        print("ROC AUC couldn't be calculated for " + name)
321                        print(exception)
322                names.append(name)
323                Accuracy.append(accuracy)
324                B_Accuracy.append(b_accuracy)
325                ROC_AUC.append(roc_auc)
326                F1.append(f1)
327                TIME.append(time.time() - start)
328                if self.custom_metric is not None:
329                    custom_metric = self.custom_metric(y_test, y_pred)
330                    CUSTOM_METRIC.append(custom_metric)
331                if self.verbose > 0:
332                    if self.custom_metric is not None:
333                        print(
334                            {
335                                "Model": name,
336                                "Accuracy": accuracy,
337                                "Balanced Accuracy": b_accuracy,
338                                "ROC AUC": roc_auc,
339                                "F1 Score": f1,
340                                self.custom_metric.__name__: custom_metric,
341                                "Time taken": time.time() - start,
342                            }
343                        )
344                    else:
345                        print(
346                            {
347                                "Model": name,
348                                "Accuracy": accuracy,
349                                "Balanced Accuracy": b_accuracy,
350                                "ROC AUC": roc_auc,
351                                "F1 Score": f1,
352                                "Time taken": time.time() - start,
353                            }
354                        )
355                if self.predictions:
356                    predictions[name] = y_pred
357            except Exception as exception:
358                 if self.ignore_warnings is False:
359                     print(name + " model failed to execute")
360                     print(exception)
361
362        if self.estimators == "all":
363            self.classifiers = [
364                item
365                for sublist in [
366                    DEEPCLASSIFIERS,
367                    DEEPMULTITASKCLASSIFIERS,
368                    DEEPSIMPLEMULTITASKCLASSIFIERS,
369                ]
370                for item in sublist
371            ]
372        else:
373            self.classifiers = (
374                [
375                    ("DeepCustomClassifier(" + est[0] + ")", est[1])
376                    for est in all_estimators()
377                    if (
378                        issubclass(est[1], ClassifierMixin)
379                        and (est[0] in self.estimators)
380                    )
381                ]
382                + [
383                    (
384                        "DeepMultitaskClassifier(" + est[0] + ")",
385                        partial(MultitaskClassifier, obj=est[1]()),
386                    )
387                    for est in all_estimators()
388                    if (
389                        issubclass(est[1], RegressorMixin)
390                        and (est[0] in self.estimators)
391                    )
392                ]
393                + [
394                    (
395                        "DeepSimpleMultitaskClassifier(" + est[0] + ")",
396                        partial(SimpleMultitaskClassifier, obj=est[1]()),
397                    )
398                    for est in all_estimators()
399                    if (
400                        issubclass(est[1], RegressorMixin)
401                        and (est[0] in self.estimators)
402                    )
403                ]
404            )
405
406        if self.preprocess is True:
407
408            for name, model in tqdm(self.classifiers):  # do parallel exec
409
410                other_args = {}  # use this trick for `random_state` too --> refactor
411                try:
412                    if (
413                        "n_jobs" in model().get_params().keys()
414                        and name.find("LogisticRegression") == -1
415                    ):
416                        other_args["n_jobs"] = self.n_jobs
417                except Exception:
418                    pass
419
420                start = time.time()
421
422                try:
423                    if "random_state" in model().get_params().keys():
424                        layer_clf = CustomClassifier(
425                            obj=model(random_state=self.random_state),
426                            n_hidden_features=self.n_hidden_features,
427                            activation_name=self.activation_name,
428                            a=self.a,
429                            nodes_sim=self.nodes_sim,
430                            bias=self.bias,
431                            dropout=self.dropout,
432                            direct_link=self.direct_link,
433                            n_clusters=self.n_clusters,
434                            cluster_encode=self.cluster_encode,
435                            type_clust=self.type_clust,
436                            type_scaling=self.type_scaling,
437                            col_sample=self.col_sample,
438                            row_sample=self.row_sample,
439                            seed=self.seed,
440                            backend=self.backend,
441                            cv_calibration=None,
442                        )
443
444                    else:
445                        layer_clf = CustomClassifier(
446                            obj=model(),
447                            n_hidden_features=self.n_hidden_features,
448                            activation_name=self.activation_name,
449                            a=self.a,
450                            nodes_sim=self.nodes_sim,
451                            bias=self.bias,
452                            dropout=self.dropout,
453                            direct_link=self.direct_link,
454                            n_clusters=self.n_clusters,
455                            cluster_encode=self.cluster_encode,
456                            type_clust=self.type_clust,
457                            type_scaling=self.type_scaling,
458                            col_sample=self.col_sample,
459                            row_sample=self.row_sample,
460                            seed=self.seed,
461                            backend=self.backend,
462                            cv_calibration=None,
463                        )
464
465                    layer_clf.fit(X_train, y_train)
466
467                    for _ in range(self.n_layers):
468                        layer_clf = deepcopy(
469                            CustomClassifier(
470                                obj=layer_clf,
471                                n_hidden_features=self.n_hidden_features,
472                                activation_name=self.activation_name,
473                                a=self.a,
474                                nodes_sim=self.nodes_sim,
475                                bias=self.bias,
476                                dropout=self.dropout,
477                                direct_link=self.direct_link,
478                                n_clusters=self.n_clusters,
479                                cluster_encode=self.cluster_encode,
480                                type_clust=self.type_clust,
481                                type_scaling=self.type_scaling,
482                                col_sample=self.col_sample,
483                                row_sample=self.row_sample,
484                                seed=self.seed,
485                                backend=self.backend,
486                                cv_calibration=None,
487                            )
488                        )
489
490                    pipe = Pipeline(
491                        [
492                            ("preprocessor", preprocessor),
493                            ("classifier", layer_clf),
494                        ]
495                    )
496
497                    pipe.fit(X_train, y_train)
498                    self.models_[name] = pipe
499                    y_pred = pipe.predict(X_test)
500                    accuracy = accuracy_score(y_test, y_pred, normalize=True)
501                    b_accuracy = balanced_accuracy_score(y_test, y_pred)
502                    f1 = f1_score(y_test, y_pred, average="weighted")
503                    try:
504                        roc_auc = roc_auc_score(y_test, y_pred)
505                    except Exception as exception:
506                        roc_auc = None
507                        if self.ignore_warnings is False:
508                            print("ROC AUC couldn't be calculated for " + name)
509                            print(exception)
510                    names.append(name)
511                    Accuracy.append(accuracy)
512                    B_Accuracy.append(b_accuracy)
513                    ROC_AUC.append(roc_auc)
514                    F1.append(f1)
515                    TIME.append(time.time() - start)
516                    if self.custom_metric is not None:
517                        custom_metric = self.custom_metric(y_test, y_pred)
518                        CUSTOM_METRIC.append(custom_metric)
519                    if self.verbose > 0:
520                        if self.custom_metric is not None:
521                            print(
522                                {
523                                    "Model": name,
524                                    "Accuracy": accuracy,
525                                    "Balanced Accuracy": b_accuracy,
526                                    "ROC AUC": roc_auc,
527                                    "F1 Score": f1,
528                                    self.custom_metric.__name__: custom_metric,
529                                    "Time taken": time.time() - start,
530                                }
531                            )
532                        else:
533                            print(
534                                {
535                                    "Model": name,
536                                    "Accuracy": accuracy,
537                                    "Balanced Accuracy": b_accuracy,
538                                    "ROC AUC": roc_auc,
539                                    "F1 Score": f1,
540                                    "Time taken": time.time() - start,
541                                }
542                            )
543                    if self.predictions:
544                        predictions[name] = y_pred
545                except Exception as exception:
546                     if self.ignore_warnings is False:
547                         print(name + " model failed to execute")
548                         print(exception)
549
550        else:  # no preprocessing
551
552            for name, model in tqdm(self.classifiers):  # do parallel exec
553                start = time.time()
554                try:
555                    if "random_state" in model().get_params().keys():
556                        layer_clf = CustomClassifier(
557                            obj=model(random_state=self.random_state),
558                            n_hidden_features=self.n_hidden_features,
559                            activation_name=self.activation_name,
560                            a=self.a,
561                            nodes_sim=self.nodes_sim,
562                            bias=self.bias,
563                            dropout=self.dropout,
564                            direct_link=self.direct_link,
565                            n_clusters=self.n_clusters,
566                            cluster_encode=self.cluster_encode,
567                            type_clust=self.type_clust,
568                            type_scaling=self.type_scaling,
569                            col_sample=self.col_sample,
570                            row_sample=self.row_sample,
571                            seed=self.seed,
572                            backend=self.backend,
573                            cv_calibration=None,
574                        )
575
576                    else:
577                        layer_clf = CustomClassifier(
578                            obj=model(),
579                            n_hidden_features=self.n_hidden_features,
580                            activation_name=self.activation_name,
581                            a=self.a,
582                            nodes_sim=self.nodes_sim,
583                            bias=self.bias,
584                            dropout=self.dropout,
585                            direct_link=self.direct_link,
586                            n_clusters=self.n_clusters,
587                            cluster_encode=self.cluster_encode,
588                            type_clust=self.type_clust,
589                            type_scaling=self.type_scaling,
590                            col_sample=self.col_sample,
591                            row_sample=self.row_sample,
592                            seed=self.seed,
593                            backend=self.backend,
594                            cv_calibration=None,
595                        )
596
597                    layer_clf.fit(X_train, y_train)
598
599                    for _ in range(self.n_layers):
600                        layer_clf = deepcopy(
601                            CustomClassifier(
602                                obj=layer_clf,
603                                n_hidden_features=self.n_hidden_features,
604                                activation_name=self.activation_name,
605                                a=self.a,
606                                nodes_sim=self.nodes_sim,
607                                bias=self.bias,
608                                dropout=self.dropout,
609                                direct_link=self.direct_link,
610                                n_clusters=self.n_clusters,
611                                cluster_encode=self.cluster_encode,
612                                type_clust=self.type_clust,
613                                type_scaling=self.type_scaling,
614                                col_sample=self.col_sample,
615                                row_sample=self.row_sample,
616                                seed=self.seed,
617                                backend=self.backend,
618                                cv_calibration=None,
619                            )
620                        )
621
622                        # layer_clf.fit(X_train, y_train)
623
624                    layer_clf.fit(X_train, y_train)
625
626                    self.models_[name] = layer_clf
627                    y_pred = layer_clf.predict(X_test)
628                    accuracy = accuracy_score(y_test, y_pred, normalize=True)
629                    b_accuracy = balanced_accuracy_score(y_test, y_pred)
630                    f1 = f1_score(y_test, y_pred, average="weighted")
631                    try:
632                        roc_auc = roc_auc_score(y_test, y_pred)
633                    except Exception as exception:
634                        roc_auc = None
635                        if self.ignore_warnings is False:
636                            print("ROC AUC couldn't be calculated for " + name)
637                            print(exception)
638                    names.append(name)
639                    Accuracy.append(accuracy)
640                    B_Accuracy.append(b_accuracy)
641                    ROC_AUC.append(roc_auc)
642                    F1.append(f1)
643                    TIME.append(time.time() - start)
644                    if self.custom_metric is not None:
645                        custom_metric = self.custom_metric(y_test, y_pred)
646                        CUSTOM_METRIC.append(custom_metric)
647                    if self.verbose > 0:
648                        if self.custom_metric is not None:
649                            print(
650                                {
651                                    "Model": name,
652                                    "Accuracy": accuracy,
653                                    "Balanced Accuracy": b_accuracy,
654                                    "ROC AUC": roc_auc,
655                                    "F1 Score": f1,
656                                    self.custom_metric.__name__: custom_metric,
657                                    "Time taken": time.time() - start,
658                                }
659                            )
660                        else:
661                            print(
662                                {
663                                    "Model": name,
664                                    "Accuracy": accuracy,
665                                    "Balanced Accuracy": b_accuracy,
666                                    "ROC AUC": roc_auc,
667                                    "F1 Score": f1,
668                                    "Time taken": time.time() - start,
669                                }
670                            )
671                    if self.predictions:
672                        predictions[name] = y_pred
673                except Exception as exception:
674                     if self.ignore_warnings is False:
675                         print(name + " model failed to execute")
676                         print(exception)
677
678        if self.custom_metric is None:
679            scores = pd.DataFrame(
680                {
681                    "Model": names,
682                    "Accuracy": Accuracy,
683                    "Balanced Accuracy": B_Accuracy,
684                    "ROC AUC": ROC_AUC,
685                    "F1 Score": F1,
686                    "Time Taken": TIME,
687                }
688            )
689        else:
690            scores = pd.DataFrame(
691                {
692                    "Model": names,
693                    "Accuracy": Accuracy,
694                    "Balanced Accuracy": B_Accuracy,
695                    "ROC AUC": ROC_AUC,
696                    "F1 Score": F1,
697                    "Custom metric": CUSTOM_METRIC,
698                    "Time Taken": TIME,
699                }
700            )
701        scores = scores.sort_values(by=self.sort_by, ascending=False).set_index("Model")
702
703        self.best_model_ = self.models_[scores.index[0]]
704
705        if self.predictions is True:
706
707            return scores, predictions
708
709        return scores

Fit classifiers to X_train and y_train, predict and score on X_test, y_test.

Parameters:

X_train: array-like,
    Training vectors, where rows is the number of samples
    and columns is the number of features.

X_test: array-like,
    Testing vectors, where rows is the number of samples
    and columns is the number of features.

y_train: array-like,
    Training vectors, where rows is the number of samples
    and columns is the number of features.

y_test: array-like,
    Testing vectors, where rows is the number of samples
    and columns is the number of features.

Returns:

scores: Pandas DataFrame
    Returns metrics of all the models in a Pandas DataFrame.

predictions: Pandas DataFrame
    Returns predictions of all the models in a Pandas DataFrame.
def provide_models(self, X_train, X_test, y_train, y_test):
723    def provide_models(self, X_train, X_test, y_train, y_test):
724        """Returns all the model objects trained. If fit hasn't been called yet,
725        then it's called to return the models.
726
727        Parameters:
728
729        X_train: array-like,
730            Training vectors, where rows is the number of samples
731            and columns is the number of features.
732
733        X_test: array-like,
734            Testing vectors, where rows is the number of samples
735            and columns is the number of features.
736
737        y_train: array-like,
738            Training vectors, where rows is the number of samples
739            and columns is the number of features.
740
741        y_test: array-like,
742            Testing vectors, where rows is the number of samples
743            and columns is the number of features.
744
745        Returns:
746
747            models: dict-object,
748                Returns a dictionary with each model's pipeline as value
749                and key = name of the model.
750        """
751        if len(self.models_.keys()) == 0:
752            self.fit(X_train, X_test, y_train, y_test)
753
754        return self.models_

Returns all the model objects trained. If fit hasn't been called yet, then it's called to return the models.

Parameters:

X_train: array-like, Training vectors, where rows is the number of samples and columns is the number of features.

X_test: array-like, Testing vectors, where rows is the number of samples and columns is the number of features.

y_train: array-like, Training vectors, where rows is the number of samples and columns is the number of features.

y_test: array-like, Testing vectors, where rows is the number of samples and columns is the number of features.

Returns:

models: dict-object,
    Returns a dictionary with each model's pipeline as value
    and key = name of the model.
class LazyDeepRegressor(nnetsauce.custom.custom.Custom, sklearn.base.RegressorMixin):
 90class LazyDeepRegressor(Custom, RegressorMixin):
 91    """
 92        Fitting -- almost -- all the regression algorithms with layers of
 93        nnetsauce's CustomRegressor and returning their scores.
 94
 95    Parameters:
 96
 97        verbose: int, optional (default=0)
 98            Any positive number for verbosity.
 99
100        ignore_warnings: bool, optional (default=True)
101            When set to True, the warning related to algorigms that are not able to run are ignored.
102
103        custom_metric: function, optional (default=None)
104            When function is provided, models are evaluated based on the custom evaluation metric provided.
105
106        predictions: bool, optional (default=False)
107            When set to True, the predictions of all the models models are returned as dataframe.
108
109        sort_by: string, optional (default='RMSE')
110            Sort models by a metric. Available options are 'R-Squared', 'Adjusted R-Squared', 'RMSE', 'Time Taken' and 'Custom Metric'.
111            or a custom metric identified by its name and provided by custom_metric.
112
113        random_state: int, optional (default=42)
114            Reproducibiility seed.
115
116        estimators: list, optional (default='all')
117            list of Estimators names or just 'all' (default='all')
118
119        preprocess: bool
120            preprocessing is done when set to True
121
122        n_jobs : int, when possible, run in parallel
123            For now, only used by individual models that support it.
124
125        n_layers: int, optional (default=3)
126            Number of layers of CustomRegressors to be used.
127
128        All the other parameters are the same as CustomRegressor's.
129
130    Attributes:
131
132        models_: dict-object
133            Returns a dictionary with each model pipeline as value
134            with key as name of models.
135
136        best_model_: object
137            Returns the best model pipeline based on the sort_by metric.
138
139    Examples:
140
141        import nnetsauce as ns
142        import numpy as np
143        from sklearn import datasets
144        from sklearn.utils import shuffle
145
146        diabetes = datasets.load_diabetes()
147        X, y = shuffle(diabetes.data, diabetes.target, random_state=13)
148        X = X.astype(np.float32)
149
150        offset = int(X.shape[0] * 0.9)
151        X_train, y_train = X[:offset], y[:offset]
152        X_test, y_test = X[offset:], y[offset:]
153
154        reg = ns.LazyDeepRegressor(verbose=0, ignore_warnings=False, custom_metric=None)
155        models, predictions = reg.fit(X_train, X_test, y_train, y_test)
156        print(models)
157
158    """
159
160    def __init__(
161        self,
162        verbose=0,
163        ignore_warnings=True,
164        custom_metric=None,
165        predictions=False,
166        sort_by="RMSE",
167        random_state=42,
168        estimators="all",
169        preprocess=False,
170        n_jobs=None,
171        # Defining depth
172        n_layers=3,
173        # CustomRegressor attributes
174        obj=None,
175        n_hidden_features=5,
176        activation_name="relu",
177        a=0.01,
178        nodes_sim="sobol",
179        bias=True,
180        dropout=0,
181        direct_link=True,
182        n_clusters=2,
183        cluster_encode=True,
184        type_clust="kmeans",
185        type_scaling=("std", "std", "std"),
186        col_sample=1,
187        row_sample=1,
188        seed=123,
189        backend="cpu",
190    ):
191        self.verbose = verbose
192        self.ignore_warnings = ignore_warnings
193        self.custom_metric = custom_metric
194        self.predictions = predictions
195        self.sort_by = sort_by
196        self.models_ = {}
197        self.best_model_ = None
198        self.random_state = random_state
199        self.estimators = estimators
200        self.preprocess = preprocess
201        self.n_layers = n_layers - 1
202        self.n_jobs = n_jobs
203        super().__init__(
204            obj=obj,
205            n_hidden_features=n_hidden_features,
206            activation_name=activation_name,
207            a=a,
208            nodes_sim=nodes_sim,
209            bias=bias,
210            dropout=dropout,
211            direct_link=direct_link,
212            n_clusters=n_clusters,
213            cluster_encode=cluster_encode,
214            type_clust=type_clust,
215            type_scaling=type_scaling,
216            col_sample=col_sample,
217            row_sample=row_sample,
218            seed=seed,
219            backend=backend,
220        )
221
222    def fit(self, X_train, X_test, y_train, y_test):
223        """Fit Regression algorithms to X_train and y_train, predict and score on X_test, y_test.
224
225        Parameters:
226
227            X_train : array-like,
228                Training vectors, where rows is the number of samples
229                and columns is the number of features.
230
231            X_test : array-like,
232                Testing vectors, where rows is the number of samples
233                and columns is the number of features.
234
235            y_train : array-like,
236                Training vectors, where rows is the number of samples
237                and columns is the number of features.
238
239            y_test : array-like,
240                Testing vectors, where rows is the number of samples
241                and columns is the number of features.
242
243        Returns:
244        -------
245        scores:  Pandas DataFrame
246            Returns metrics of all the models in a Pandas DataFrame.
247
248        predictions : Pandas DataFrame
249            Returns predictions of all the models in a Pandas DataFrame.
250
251        """
252        R2 = []
253        ADJR2 = []
254        RMSE = []
255        # WIN = []
256        names = []
257        TIME = []
258        predictions = {}
259
260        if self.custom_metric:
261            CUSTOM_METRIC = []
262
263        if isinstance(X_train, np.ndarray):
264            X_train = pd.DataFrame(X_train)
265            X_test = pd.DataFrame(X_test)
266
267        numeric_features = X_train.select_dtypes(include=[np.number]).columns
268        categorical_features = X_train.select_dtypes(include=["object"]).columns
269
270        categorical_low, categorical_high = get_card_split(
271            X_train, categorical_features
272        )
273
274        if self.preprocess is True:
275            preprocessor = ColumnTransformer(
276                transformers=[
277                    ("numeric", numeric_transformer, numeric_features),
278                    (
279                        "categorical_low",
280                        categorical_transformer_low,
281                        categorical_low,
282                    ),
283                    (
284                        "categorical_high",
285                        categorical_transformer_high,
286                        categorical_high,
287                    ),
288                ]
289            )
290
291        # base models
292        try:
293            baseline_names = ["RandomForestRegressor", "XGBRegressor"]
294            baseline_models = [RandomForestRegressor(), xgb.XGBRegressor()]
295        except Exception as exception:
296            baseline_names = ["RandomForestRegressor"]
297            baseline_models = [RandomForestRegressor()]
298
299        for name, model in zip(baseline_names, baseline_models):
300            start = time.time()
301            try:
302                model.fit(X_train, y_train)
303                self.models_[name] = model
304                y_pred = model.predict(X_test)
305                r_squared = r2_score(y_test, y_pred)
306                adj_rsquared = adjusted_rsquared(
307                    r_squared, X_test.shape[0], X_test.shape[1]
308                )
309                rmse = np.sqrt(np.mean((y_test - y_pred) ** 2))
310
311                names.append(name)
312                R2.append(r_squared)
313                ADJR2.append(adj_rsquared)
314                RMSE.append(rmse)
315                TIME.append(time.time() - start)
316
317                if self.custom_metric:
318                    custom_metric = self.custom_metric(y_test, y_pred)
319                    CUSTOM_METRIC.append(custom_metric)
320
321                if self.verbose > 0:
322                    scores_verbose = {
323                        "Model": name,
324                        "R-Squared": r_squared,
325                        "Adjusted R-Squared": adj_rsquared,
326                        "RMSE": rmse,
327                        "Time taken": time.time() - start,
328                    }
329
330                    if self.custom_metric:
331                        scores_verbose[self.custom_metric.__name__] = custom_metric
332
333                    print(scores_verbose)
334                if self.predictions:
335                    predictions[name] = y_pred
336            except Exception as exception:
337                if self.ignore_warnings is False:
338                    print(name + " model failed to execute")
339                    print(exception)
340
341        if self.estimators == "all":
342            self.regressors = DEEPREGRESSORS
343        else:
344            self.regressors = [
345                ("DeepCustomRegressor(" + est[0] + ")", est[1])
346                for est in all_estimators()
347                if (issubclass(est[1], RegressorMixin) and (est[0] in self.estimators))
348            ]
349
350        if self.preprocess is True:
351
352            for name, model in tqdm(self.regressors):  # do parallel exec
353                start = time.time()
354                try:
355                    if "random_state" in model().get_params().keys():
356                        layer_regr = CustomRegressor(
357                            obj=model(random_state=self.random_state),
358                            n_hidden_features=self.n_hidden_features,
359                            activation_name=self.activation_name,
360                            a=self.a,
361                            nodes_sim=self.nodes_sim,
362                            bias=self.bias,
363                            dropout=self.dropout,
364                            direct_link=self.direct_link,
365                            n_clusters=self.n_clusters,
366                            cluster_encode=self.cluster_encode,
367                            type_clust=self.type_clust,
368                            type_scaling=self.type_scaling,
369                            col_sample=self.col_sample,
370                            row_sample=self.row_sample,
371                            seed=self.seed,
372                            backend=self.backend,
373                        )
374                    else:
375                        layer_regr = CustomRegressor(
376                            obj=model(),
377                            n_hidden_features=self.n_hidden_features,
378                            activation_name=self.activation_name,
379                            a=self.a,
380                            nodes_sim=self.nodes_sim,
381                            bias=self.bias,
382                            dropout=self.dropout,
383                            direct_link=self.direct_link,
384                            n_clusters=self.n_clusters,
385                            cluster_encode=self.cluster_encode,
386                            type_clust=self.type_clust,
387                            type_scaling=self.type_scaling,
388                            col_sample=self.col_sample,
389                            row_sample=self.row_sample,
390                            seed=self.seed,
391                            backend=self.backend,
392                        )
393
394                    for _ in range(self.n_layers):
395                        layer_regr = deepcopy(
396                            CustomRegressor(
397                                obj=layer_regr,
398                                n_hidden_features=self.n_hidden_features,
399                                activation_name=self.activation_name,
400                                a=self.a,
401                                nodes_sim=self.nodes_sim,
402                                bias=self.bias,
403                                dropout=self.dropout,
404                                direct_link=self.direct_link,
405                                n_clusters=self.n_clusters,
406                                cluster_encode=self.cluster_encode,
407                                type_clust=self.type_clust,
408                                type_scaling=self.type_scaling,
409                                col_sample=self.col_sample,
410                                row_sample=self.row_sample,
411                                seed=self.seed,
412                                backend=self.backend,
413                            )
414                        )
415
416                    layer_regr.fit(X_train, y_train)
417
418                    pipe = Pipeline(
419                        steps=[
420                            ("preprocessor", preprocessor),
421                            ("regressor", layer_regr),
422                        ]
423                    )
424
425                    pipe.fit(X_train, y_train)
426
427                    self.models_[name] = pipe
428                    y_pred = pipe.predict(X_test)
429                    r_squared = r2_score(y_test, y_pred)
430                    adj_rsquared = adjusted_rsquared(
431                        r_squared, X_test.shape[0], X_test.shape[1]
432                    )
433                    rmse = np.sqrt(np.mean((y_test - y_pred) ** 2))
434
435                    names.append(name)
436                    R2.append(r_squared)
437                    ADJR2.append(adj_rsquared)
438                    RMSE.append(rmse)
439                    TIME.append(time.time() - start)
440
441                    if self.custom_metric:
442                        custom_metric = self.custom_metric(y_test, y_pred)
443                        CUSTOM_METRIC.append(custom_metric)
444
445                    if self.verbose > 0:
446                        scores_verbose = {
447                            "Model": name,
448                            "R-Squared": r_squared,
449                            "Adjusted R-Squared": adj_rsquared,
450                            "RMSE": rmse,
451                            "Time taken": time.time() - start,
452                        }
453
454                        if self.custom_metric:
455                            scores_verbose[self.custom_metric.__name__] = custom_metric
456
457                        print(scores_verbose)
458                    if self.predictions:
459                        predictions[name] = y_pred
460                except Exception as exception:
461                    if self.ignore_warnings is False:
462                        print(name + " model failed to execute")
463                        print(exception)
464
465        else:  # no preprocessing
466
467            for name, model in tqdm(self.regressors):  # do parallel exec
468                start = time.time()
469                try:
470                    if "random_state" in model().get_params().keys():
471                        layer_regr = CustomRegressor(
472                            obj=model(random_state=self.random_state),
473                            n_hidden_features=self.n_hidden_features,
474                            activation_name=self.activation_name,
475                            a=self.a,
476                            nodes_sim=self.nodes_sim,
477                            bias=self.bias,
478                            dropout=self.dropout,
479                            direct_link=self.direct_link,
480                            n_clusters=self.n_clusters,
481                            cluster_encode=self.cluster_encode,
482                            type_clust=self.type_clust,
483                            type_scaling=self.type_scaling,
484                            col_sample=self.col_sample,
485                            row_sample=self.row_sample,
486                            seed=self.seed,
487                            backend=self.backend,
488                        )
489                    else:
490                        layer_regr = CustomRegressor(
491                            obj=model(),
492                            n_hidden_features=self.n_hidden_features,
493                            activation_name=self.activation_name,
494                            a=self.a,
495                            nodes_sim=self.nodes_sim,
496                            bias=self.bias,
497                            dropout=self.dropout,
498                            direct_link=self.direct_link,
499                            n_clusters=self.n_clusters,
500                            cluster_encode=self.cluster_encode,
501                            type_clust=self.type_clust,
502                            type_scaling=self.type_scaling,
503                            col_sample=self.col_sample,
504                            row_sample=self.row_sample,
505                            seed=self.seed,
506                            backend=self.backend,
507                        )
508
509                    layer_regr.fit(X_train, y_train)
510
511                    for _ in range(self.n_layers):
512                        layer_regr = deepcopy(
513                            CustomRegressor(
514                                obj=layer_regr,
515                                n_hidden_features=self.n_hidden_features,
516                                activation_name=self.activation_name,
517                                a=self.a,
518                                nodes_sim=self.nodes_sim,
519                                bias=self.bias,
520                                dropout=self.dropout,
521                                direct_link=self.direct_link,
522                                n_clusters=self.n_clusters,
523                                cluster_encode=self.cluster_encode,
524                                type_clust=self.type_clust,
525                                type_scaling=self.type_scaling,
526                                col_sample=self.col_sample,
527                                row_sample=self.row_sample,
528                                seed=self.seed,
529                                backend=self.backend,
530                            )
531                        )
532
533                        # layer_regr.fit(X_train, y_train)
534
535                    layer_regr.fit(X_train, y_train)
536
537                    self.models_[name] = layer_regr
538                    y_pred = layer_regr.predict(X_test)
539
540                    r_squared = r2_score(y_test, y_pred)
541                    adj_rsquared = adjusted_rsquared(
542                        r_squared, X_test.shape[0], X_test.shape[1]
543                    )
544                    rmse = np.sqrt(np.mean((y_test - y_pred) ** 2))
545
546                    names.append(name)
547                    R2.append(r_squared)
548                    ADJR2.append(adj_rsquared)
549                    RMSE.append(rmse)
550                    TIME.append(time.time() - start)
551
552                    if self.custom_metric:
553                        custom_metric = self.custom_metric(y_test, y_pred)
554                        CUSTOM_METRIC.append(custom_metric)
555
556                    if self.verbose > 0:
557                        scores_verbose = {
558                            "Model": name,
559                            "R-Squared": r_squared,
560                            "Adjusted R-Squared": adj_rsquared,
561                            "RMSE": rmse,
562                            "Time taken": time.time() - start,
563                        }
564
565                        if self.custom_metric:
566                            scores_verbose[self.custom_metric.__name__] = custom_metric
567
568                        print(scores_verbose)
569                    if self.predictions:
570                        predictions[name] = y_pred
571                except Exception as exception:
572                    if self.ignore_warnings is False:
573                        print(name + " model failed to execute")
574                        print(exception)
575
576        scores = {
577            "Model": names,
578            "Adjusted R-Squared": ADJR2,
579            "R-Squared": R2,
580            "RMSE": RMSE,
581            "Time Taken": TIME,
582        }
583
584        if self.custom_metric:
585            scores["Custom metric"] = CUSTOM_METRIC
586
587        scores = pd.DataFrame(scores)
588        scores = scores.sort_values(by=self.sort_by, ascending=True).set_index("Model")
589
590        self.best_model_ = self.models_[scores.index[0]]
591
592        if self.predictions is True:
593
594            return scores, predictions
595
596        return scores
597
598    def get_best_model(self):
599        """
600        This function returns the best model pipeline based on the sort_by metric.
601
602        Returns:
603
604            best_model: object,
605                Returns the best model pipeline based on the sort_by metric.
606
607        """
608        return self.best_model_
609
610    def provide_models(self, X_train, X_test, y_train, y_test):
611        """
612        This function returns all the model objects trained in fit function.
613        If fit is not called already, then we call fit and then return the models.
614
615        Parameters:
616
617            X_train : array-like,
618                Training vectors, where rows is the number of samples
619                and columns is the number of features.
620
621            X_test : array-like,
622                Testing vectors, where rows is the number of samples
623                and columns is the number of features.
624
625            y_train : array-like,
626                Training vectors, where rows is the number of samples
627                and columns is the number of features.
628
629            y_test : array-like,
630                Testing vectors, where rows is the number of samples
631                and columns is the number of features.
632
633        Returns:
634
635            models: dict-object,
636                Returns a dictionary with each model pipeline as value
637                with key as name of models.
638
639        """
640        if len(self.models_.keys()) == 0:
641            self.fit(X_train, X_test, y_train, y_test)
642
643        return self.models_

Fitting -- almost -- all the regression algorithms with layers of nnetsauce's CustomRegressor and returning their scores.

Parameters:

verbose: int, optional (default=0)
    Any positive number for verbosity.

ignore_warnings: bool, optional (default=True)
    When set to True, the warning related to algorigms that are not able to run are ignored.

custom_metric: function, optional (default=None)
    When function is provided, models are evaluated based on the custom evaluation metric provided.

predictions: bool, optional (default=False)
    When set to True, the predictions of all the models models are returned as dataframe.

sort_by: string, optional (default='RMSE')
    Sort models by a metric. Available options are 'R-Squared', 'Adjusted R-Squared', 'RMSE', 'Time Taken' and 'Custom Metric'.
    or a custom metric identified by its name and provided by custom_metric.

random_state: int, optional (default=42)
    Reproducibiility seed.

estimators: list, optional (default='all')
    list of Estimators names or just 'all' (default='all')

preprocess: bool
    preprocessing is done when set to True

n_jobs : int, when possible, run in parallel
    For now, only used by individual models that support it.

n_layers: int, optional (default=3)
    Number of layers of CustomRegressors to be used.

All the other parameters are the same as CustomRegressor's.

Attributes:

models_: dict-object
    Returns a dictionary with each model pipeline as value
    with key as name of models.

best_model_: object
    Returns the best model pipeline based on the sort_by metric.

Examples:

import nnetsauce as ns
import numpy as np
from sklearn import datasets
from sklearn.utils import shuffle

diabetes = datasets.load_diabetes()
X, y = shuffle(diabetes.data, diabetes.target, random_state=13)
X = X.astype(np.float32)

offset = int(X.shape[0] * 0.9)
X_train, y_train = X[:offset], y[:offset]
X_test, y_test = X[offset:], y[offset:]

reg = ns.LazyDeepRegressor(verbose=0, ignore_warnings=False, custom_metric=None)
models, predictions = reg.fit(X_train, X_test, y_train, y_test)
print(models)
def fit(self, X_train, X_test, y_train, y_test):
222    def fit(self, X_train, X_test, y_train, y_test):
223        """Fit Regression algorithms to X_train and y_train, predict and score on X_test, y_test.
224
225        Parameters:
226
227            X_train : array-like,
228                Training vectors, where rows is the number of samples
229                and columns is the number of features.
230
231            X_test : array-like,
232                Testing vectors, where rows is the number of samples
233                and columns is the number of features.
234
235            y_train : array-like,
236                Training vectors, where rows is the number of samples
237                and columns is the number of features.
238
239            y_test : array-like,
240                Testing vectors, where rows is the number of samples
241                and columns is the number of features.
242
243        Returns:
244        -------
245        scores:  Pandas DataFrame
246            Returns metrics of all the models in a Pandas DataFrame.
247
248        predictions : Pandas DataFrame
249            Returns predictions of all the models in a Pandas DataFrame.
250
251        """
252        R2 = []
253        ADJR2 = []
254        RMSE = []
255        # WIN = []
256        names = []
257        TIME = []
258        predictions = {}
259
260        if self.custom_metric:
261            CUSTOM_METRIC = []
262
263        if isinstance(X_train, np.ndarray):
264            X_train = pd.DataFrame(X_train)
265            X_test = pd.DataFrame(X_test)
266
267        numeric_features = X_train.select_dtypes(include=[np.number]).columns
268        categorical_features = X_train.select_dtypes(include=["object"]).columns
269
270        categorical_low, categorical_high = get_card_split(
271            X_train, categorical_features
272        )
273
274        if self.preprocess is True:
275            preprocessor = ColumnTransformer(
276                transformers=[
277                    ("numeric", numeric_transformer, numeric_features),
278                    (
279                        "categorical_low",
280                        categorical_transformer_low,
281                        categorical_low,
282                    ),
283                    (
284                        "categorical_high",
285                        categorical_transformer_high,
286                        categorical_high,
287                    ),
288                ]
289            )
290
291        # base models
292        try:
293            baseline_names = ["RandomForestRegressor", "XGBRegressor"]
294            baseline_models = [RandomForestRegressor(), xgb.XGBRegressor()]
295        except Exception as exception:
296            baseline_names = ["RandomForestRegressor"]
297            baseline_models = [RandomForestRegressor()]
298
299        for name, model in zip(baseline_names, baseline_models):
300            start = time.time()
301            try:
302                model.fit(X_train, y_train)
303                self.models_[name] = model
304                y_pred = model.predict(X_test)
305                r_squared = r2_score(y_test, y_pred)
306                adj_rsquared = adjusted_rsquared(
307                    r_squared, X_test.shape[0], X_test.shape[1]
308                )
309                rmse = np.sqrt(np.mean((y_test - y_pred) ** 2))
310
311                names.append(name)
312                R2.append(r_squared)
313                ADJR2.append(adj_rsquared)
314                RMSE.append(rmse)
315                TIME.append(time.time() - start)
316
317                if self.custom_metric:
318                    custom_metric = self.custom_metric(y_test, y_pred)
319                    CUSTOM_METRIC.append(custom_metric)
320
321                if self.verbose > 0:
322                    scores_verbose = {
323                        "Model": name,
324                        "R-Squared": r_squared,
325                        "Adjusted R-Squared": adj_rsquared,
326                        "RMSE": rmse,
327                        "Time taken": time.time() - start,
328                    }
329
330                    if self.custom_metric:
331                        scores_verbose[self.custom_metric.__name__] = custom_metric
332
333                    print(scores_verbose)
334                if self.predictions:
335                    predictions[name] = y_pred
336            except Exception as exception:
337                if self.ignore_warnings is False:
338                    print(name + " model failed to execute")
339                    print(exception)
340
341        if self.estimators == "all":
342            self.regressors = DEEPREGRESSORS
343        else:
344            self.regressors = [
345                ("DeepCustomRegressor(" + est[0] + ")", est[1])
346                for est in all_estimators()
347                if (issubclass(est[1], RegressorMixin) and (est[0] in self.estimators))
348            ]
349
350        if self.preprocess is True:
351
352            for name, model in tqdm(self.regressors):  # do parallel exec
353                start = time.time()
354                try:
355                    if "random_state" in model().get_params().keys():
356                        layer_regr = CustomRegressor(
357                            obj=model(random_state=self.random_state),
358                            n_hidden_features=self.n_hidden_features,
359                            activation_name=self.activation_name,
360                            a=self.a,
361                            nodes_sim=self.nodes_sim,
362                            bias=self.bias,
363                            dropout=self.dropout,
364                            direct_link=self.direct_link,
365                            n_clusters=self.n_clusters,
366                            cluster_encode=self.cluster_encode,
367                            type_clust=self.type_clust,
368                            type_scaling=self.type_scaling,
369                            col_sample=self.col_sample,
370                            row_sample=self.row_sample,
371                            seed=self.seed,
372                            backend=self.backend,
373                        )
374                    else:
375                        layer_regr = CustomRegressor(
376                            obj=model(),
377                            n_hidden_features=self.n_hidden_features,
378                            activation_name=self.activation_name,
379                            a=self.a,
380                            nodes_sim=self.nodes_sim,
381                            bias=self.bias,
382                            dropout=self.dropout,
383                            direct_link=self.direct_link,
384                            n_clusters=self.n_clusters,
385                            cluster_encode=self.cluster_encode,
386                            type_clust=self.type_clust,
387                            type_scaling=self.type_scaling,
388                            col_sample=self.col_sample,
389                            row_sample=self.row_sample,
390                            seed=self.seed,
391                            backend=self.backend,
392                        )
393
394                    for _ in range(self.n_layers):
395                        layer_regr = deepcopy(
396                            CustomRegressor(
397                                obj=layer_regr,
398                                n_hidden_features=self.n_hidden_features,
399                                activation_name=self.activation_name,
400                                a=self.a,
401                                nodes_sim=self.nodes_sim,
402                                bias=self.bias,
403                                dropout=self.dropout,
404                                direct_link=self.direct_link,
405                                n_clusters=self.n_clusters,
406                                cluster_encode=self.cluster_encode,
407                                type_clust=self.type_clust,
408                                type_scaling=self.type_scaling,
409                                col_sample=self.col_sample,
410                                row_sample=self.row_sample,
411                                seed=self.seed,
412                                backend=self.backend,
413                            )
414                        )
415
416                    layer_regr.fit(X_train, y_train)
417
418                    pipe = Pipeline(
419                        steps=[
420                            ("preprocessor", preprocessor),
421                            ("regressor", layer_regr),
422                        ]
423                    )
424
425                    pipe.fit(X_train, y_train)
426
427                    self.models_[name] = pipe
428                    y_pred = pipe.predict(X_test)
429                    r_squared = r2_score(y_test, y_pred)
430                    adj_rsquared = adjusted_rsquared(
431                        r_squared, X_test.shape[0], X_test.shape[1]
432                    )
433                    rmse = np.sqrt(np.mean((y_test - y_pred) ** 2))
434
435                    names.append(name)
436                    R2.append(r_squared)
437                    ADJR2.append(adj_rsquared)
438                    RMSE.append(rmse)
439                    TIME.append(time.time() - start)
440
441                    if self.custom_metric:
442                        custom_metric = self.custom_metric(y_test, y_pred)
443                        CUSTOM_METRIC.append(custom_metric)
444
445                    if self.verbose > 0:
446                        scores_verbose = {
447                            "Model": name,
448                            "R-Squared": r_squared,
449                            "Adjusted R-Squared": adj_rsquared,
450                            "RMSE": rmse,
451                            "Time taken": time.time() - start,
452                        }
453
454                        if self.custom_metric:
455                            scores_verbose[self.custom_metric.__name__] = custom_metric
456
457                        print(scores_verbose)
458                    if self.predictions:
459                        predictions[name] = y_pred
460                except Exception as exception:
461                    if self.ignore_warnings is False:
462                        print(name + " model failed to execute")
463                        print(exception)
464
465        else:  # no preprocessing
466
467            for name, model in tqdm(self.regressors):  # do parallel exec
468                start = time.time()
469                try:
470                    if "random_state" in model().get_params().keys():
471                        layer_regr = CustomRegressor(
472                            obj=model(random_state=self.random_state),
473                            n_hidden_features=self.n_hidden_features,
474                            activation_name=self.activation_name,
475                            a=self.a,
476                            nodes_sim=self.nodes_sim,
477                            bias=self.bias,
478                            dropout=self.dropout,
479                            direct_link=self.direct_link,
480                            n_clusters=self.n_clusters,
481                            cluster_encode=self.cluster_encode,
482                            type_clust=self.type_clust,
483                            type_scaling=self.type_scaling,
484                            col_sample=self.col_sample,
485                            row_sample=self.row_sample,
486                            seed=self.seed,
487                            backend=self.backend,
488                        )
489                    else:
490                        layer_regr = CustomRegressor(
491                            obj=model(),
492                            n_hidden_features=self.n_hidden_features,
493                            activation_name=self.activation_name,
494                            a=self.a,
495                            nodes_sim=self.nodes_sim,
496                            bias=self.bias,
497                            dropout=self.dropout,
498                            direct_link=self.direct_link,
499                            n_clusters=self.n_clusters,
500                            cluster_encode=self.cluster_encode,
501                            type_clust=self.type_clust,
502                            type_scaling=self.type_scaling,
503                            col_sample=self.col_sample,
504                            row_sample=self.row_sample,
505                            seed=self.seed,
506                            backend=self.backend,
507                        )
508
509                    layer_regr.fit(X_train, y_train)
510
511                    for _ in range(self.n_layers):
512                        layer_regr = deepcopy(
513                            CustomRegressor(
514                                obj=layer_regr,
515                                n_hidden_features=self.n_hidden_features,
516                                activation_name=self.activation_name,
517                                a=self.a,
518                                nodes_sim=self.nodes_sim,
519                                bias=self.bias,
520                                dropout=self.dropout,
521                                direct_link=self.direct_link,
522                                n_clusters=self.n_clusters,
523                                cluster_encode=self.cluster_encode,
524                                type_clust=self.type_clust,
525                                type_scaling=self.type_scaling,
526                                col_sample=self.col_sample,
527                                row_sample=self.row_sample,
528                                seed=self.seed,
529                                backend=self.backend,
530                            )
531                        )
532
533                        # layer_regr.fit(X_train, y_train)
534
535                    layer_regr.fit(X_train, y_train)
536
537                    self.models_[name] = layer_regr
538                    y_pred = layer_regr.predict(X_test)
539
540                    r_squared = r2_score(y_test, y_pred)
541                    adj_rsquared = adjusted_rsquared(
542                        r_squared, X_test.shape[0], X_test.shape[1]
543                    )
544                    rmse = np.sqrt(np.mean((y_test - y_pred) ** 2))
545
546                    names.append(name)
547                    R2.append(r_squared)
548                    ADJR2.append(adj_rsquared)
549                    RMSE.append(rmse)
550                    TIME.append(time.time() - start)
551
552                    if self.custom_metric:
553                        custom_metric = self.custom_metric(y_test, y_pred)
554                        CUSTOM_METRIC.append(custom_metric)
555
556                    if self.verbose > 0:
557                        scores_verbose = {
558                            "Model": name,
559                            "R-Squared": r_squared,
560                            "Adjusted R-Squared": adj_rsquared,
561                            "RMSE": rmse,
562                            "Time taken": time.time() - start,
563                        }
564
565                        if self.custom_metric:
566                            scores_verbose[self.custom_metric.__name__] = custom_metric
567
568                        print(scores_verbose)
569                    if self.predictions:
570                        predictions[name] = y_pred
571                except Exception as exception:
572                    if self.ignore_warnings is False:
573                        print(name + " model failed to execute")
574                        print(exception)
575
576        scores = {
577            "Model": names,
578            "Adjusted R-Squared": ADJR2,
579            "R-Squared": R2,
580            "RMSE": RMSE,
581            "Time Taken": TIME,
582        }
583
584        if self.custom_metric:
585            scores["Custom metric"] = CUSTOM_METRIC
586
587        scores = pd.DataFrame(scores)
588        scores = scores.sort_values(by=self.sort_by, ascending=True).set_index("Model")
589
590        self.best_model_ = self.models_[scores.index[0]]
591
592        if self.predictions is True:
593
594            return scores, predictions
595
596        return scores

Fit Regression algorithms to X_train and y_train, predict and score on X_test, y_test.

Parameters:

X_train : array-like,
    Training vectors, where rows is the number of samples
    and columns is the number of features.

X_test : array-like,
    Testing vectors, where rows is the number of samples
    and columns is the number of features.

y_train : array-like,
    Training vectors, where rows is the number of samples
    and columns is the number of features.

y_test : array-like,
    Testing vectors, where rows is the number of samples
    and columns is the number of features.

Returns:

scores: Pandas DataFrame Returns metrics of all the models in a Pandas DataFrame.

predictions : Pandas DataFrame Returns predictions of all the models in a Pandas DataFrame.

def provide_models(self, X_train, X_test, y_train, y_test):
610    def provide_models(self, X_train, X_test, y_train, y_test):
611        """
612        This function returns all the model objects trained in fit function.
613        If fit is not called already, then we call fit and then return the models.
614
615        Parameters:
616
617            X_train : array-like,
618                Training vectors, where rows is the number of samples
619                and columns is the number of features.
620
621            X_test : array-like,
622                Testing vectors, where rows is the number of samples
623                and columns is the number of features.
624
625            y_train : array-like,
626                Training vectors, where rows is the number of samples
627                and columns is the number of features.
628
629            y_test : array-like,
630                Testing vectors, where rows is the number of samples
631                and columns is the number of features.
632
633        Returns:
634
635            models: dict-object,
636                Returns a dictionary with each model pipeline as value
637                with key as name of models.
638
639        """
640        if len(self.models_.keys()) == 0:
641            self.fit(X_train, X_test, y_train, y_test)
642
643        return self.models_

This function returns all the model objects trained in fit function. If fit is not called already, then we call fit and then return the models.

Parameters:

X_train : array-like,
    Training vectors, where rows is the number of samples
    and columns is the number of features.

X_test : array-like,
    Testing vectors, where rows is the number of samples
    and columns is the number of features.

y_train : array-like,
    Training vectors, where rows is the number of samples
    and columns is the number of features.

y_test : array-like,
    Testing vectors, where rows is the number of samples
    and columns is the number of features.

Returns:

models: dict-object,
    Returns a dictionary with each model pipeline as value
    with key as name of models.
class LazyMTS(nnetsauce.LazyDeepMTS):
 988class LazyMTS(LazyDeepMTS):
 989    """
 990    Fitting -- almost -- all the regression algorithms to multivariate time series
 991    and returning their scores (no layers).
 992
 993    Parameters:
 994
 995        verbose: int, optional (default=0)
 996            Any positive number for verbosity.
 997
 998        ignore_warnings: bool, optional (default=True)
 999            When set to True, the warning related to algorigms that are not
1000            able to run are ignored.
1001
1002        custom_metric: function, optional (default=None)
1003            When function is provided, models are evaluated based on the custom
1004              evaluation metric provided.
1005
1006        predictions: bool, optional (default=False)
1007            When set to True, the predictions of all the models models are returned as dataframe.
1008
1009        sort_by: string, optional (default='RMSE')
1010            Sort models by a metric. Available options are 'RMSE', 'MAE', 'MPL', 'MPE', 'MAPE',
1011            'R-Squared', 'Adjusted R-Squared' or a custom metric identified by its name and
1012            provided by custom_metric.
1013
1014        random_state: int, optional (default=42)
1015            Reproducibiility seed.
1016
1017        estimators: list, optional (default='all')
1018            list of Estimators (regression algorithms) names or just 'all' (default='all')
1019
1020        preprocess: bool, preprocessing is done when set to True
1021
1022        h: int, optional (default=None)
1023            Number of steps ahead to predict (when used, must be > 0 and < X_test.shape[0]).
1024
1025        All the other parameters are the same as MTS's.
1026
1027    Attributes:
1028
1029        models_: dict-object
1030            Returns a dictionary with each model pipeline as value
1031            with key as name of models.
1032
1033        best_model_: object
1034            Returns the best model pipeline based on the sort_by metric.
1035
1036    Examples:
1037
1038        See https://thierrymoudiki.github.io/blog/2023/10/29/python/quasirandomizednn/MTS-LazyPredict
1039
1040    """
1041
1042    def __init__(
1043        self,
1044        verbose=0,
1045        ignore_warnings=True,
1046        custom_metric=None,
1047        predictions=False,
1048        sort_by=None,  # leave it as is
1049        random_state=42,
1050        estimators="all",
1051        preprocess=False,
1052        h=None,
1053        # MTS attributes
1054        obj=None,
1055        n_hidden_features=5,
1056        activation_name="relu",
1057        a=0.01,
1058        nodes_sim="sobol",
1059        bias=True,
1060        dropout=0,
1061        direct_link=True,
1062        n_clusters=2,
1063        cluster_encode=True,
1064        type_clust="kmeans",
1065        type_scaling=("std", "std", "std"),
1066        lags=15,
1067        type_pi="scp2-kde",
1068        block_size=None,
1069        replications=None,
1070        kernel=None,
1071        agg="mean",
1072        seed=123,
1073        backend="cpu",
1074        show_progress=False,
1075    ):
1076        super().__init__(
1077            verbose=verbose,
1078            ignore_warnings=ignore_warnings,
1079            custom_metric=custom_metric,
1080            predictions=predictions,
1081            sort_by=sort_by,
1082            random_state=random_state,
1083            estimators=estimators,
1084            preprocess=preprocess,
1085            n_layers=1,
1086            h=h,
1087            obj=obj,
1088            n_hidden_features=n_hidden_features,
1089            activation_name=activation_name,
1090            a=a,
1091            nodes_sim=nodes_sim,
1092            bias=bias,
1093            dropout=dropout,
1094            direct_link=direct_link,
1095            n_clusters=n_clusters,
1096            cluster_encode=cluster_encode,
1097            type_clust=type_clust,
1098            type_scaling=type_scaling,
1099            lags=lags,
1100            type_pi=type_pi,
1101            block_size=block_size,
1102            replications=replications,
1103            kernel=kernel,
1104            agg=agg,
1105            seed=seed,
1106            backend=backend,
1107            show_progress=show_progress,
1108        )

Fitting -- almost -- all the regression algorithms to multivariate time series and returning their scores (no layers).

Parameters:

verbose: int, optional (default=0)
    Any positive number for verbosity.

ignore_warnings: bool, optional (default=True)
    When set to True, the warning related to algorigms that are not
    able to run are ignored.

custom_metric: function, optional (default=None)
    When function is provided, models are evaluated based on the custom
      evaluation metric provided.

predictions: bool, optional (default=False)
    When set to True, the predictions of all the models models are returned as dataframe.

sort_by: string, optional (default='RMSE')
    Sort models by a metric. Available options are 'RMSE', 'MAE', 'MPL', 'MPE', 'MAPE',
    'R-Squared', 'Adjusted R-Squared' or a custom metric identified by its name and
    provided by custom_metric.

random_state: int, optional (default=42)
    Reproducibiility seed.

estimators: list, optional (default='all')
    list of Estimators (regression algorithms) names or just 'all' (default='all')

preprocess: bool, preprocessing is done when set to True

h: int, optional (default=None)
    Number of steps ahead to predict (when used, must be > 0 and < X_test.shape[0]).

All the other parameters are the same as MTS's.

Attributes:

models_: dict-object
    Returns a dictionary with each model pipeline as value
    with key as name of models.

best_model_: object
    Returns the best model pipeline based on the sort_by metric.

Examples:

See https://thierrymoudiki.github.io/blog/2023/10/29/python/quasirandomizednn/MTS-LazyPredict
class LazyDeepMTS(nnetsauce.MTS):
104class LazyDeepMTS(MTS):
105    """
106
107    Fitting -- almost -- all the regression algorithms with layers of
108    nnetsauce's CustomRegressor to multivariate time series
109    and returning their scores.
110
111    Parameters:
112
113        verbose: int, optional (default=0)
114            Any positive number for verbosity.
115
116        ignore_warnings: bool, optional (default=True)
117            When set to True, the warning related to algorigms that are not
118            able to run are ignored.
119
120        custom_metric: function, optional (default=None)
121            When function is provided, models are evaluated based on the custom
122              evaluation metric provided.
123
124        predictions: bool, optional (default=False)
125            When set to True, the predictions of all the models models are returned as dataframe.
126
127        sort_by: string, optional (default='RMSE')
128            Sort models by a metric. Available options are 'RMSE', 'MAE', 'MPL', 'MPE', 'MAPE',
129            'R-Squared', 'Adjusted R-Squared' or a custom metric identified by its name and
130            provided by custom_metric.
131
132        random_state: int, optional (default=42)
133            Reproducibiility seed.
134
135        estimators: list, optional (default='all')
136            list of Estimators (regression algorithms) names or just 'all' (default='all')
137
138        preprocess: bool, preprocessing is done when set to True
139
140        n_layers: int, optional (default=1)
141            Number of layers in the network. When set to 1, the model is equivalent to a MTS.
142
143        h: int, optional (default=None)
144            Number of steps ahead to predict (when used, must be > 0 and < X_test.shape[0]).
145
146        All the other parameters are the same as MTS's.
147
148    Attributes:
149
150        models_: dict-object
151            Returns a dictionary with each model pipeline as value
152            with key as name of models.
153
154        best_model_: object
155            Returns the best model pipeline based on the sort_by metric.
156
157    Examples:
158
159        See https://thierrymoudiki.github.io/blog/2023/10/29/python/quasirandomizednn/MTS-LazyPredict
160
161    """
162
163    def __init__(
164        self,
165        verbose=0,
166        ignore_warnings=True,
167        custom_metric=None,
168        predictions=False,
169        sort_by=None,  # leave it as is
170        random_state=42,
171        estimators="all",
172        preprocess=False,
173        n_layers=1,
174        h=None,
175        # MTS attributes
176        obj=None,
177        n_hidden_features=5,
178        activation_name="relu",
179        a=0.01,
180        nodes_sim="sobol",
181        bias=True,
182        dropout=0,
183        direct_link=True,
184        n_clusters=2,
185        cluster_encode=True,
186        type_clust="kmeans",
187        type_scaling=("std", "std", "std"),
188        lags=15,
189        type_pi="scp2-kde",
190        block_size=None,
191        replications=None,
192        kernel=None,
193        agg="mean",
194        seed=123,
195        backend="cpu",
196        show_progress=False,
197    ):
198        self.verbose = verbose
199        self.ignore_warnings = ignore_warnings
200        self.custom_metric = custom_metric
201        self.predictions = predictions
202        self.sort_by = sort_by
203        self.models_ = {}
204        self.best_model_ = None
205        self.random_state = random_state
206        self.estimators = estimators
207        self.preprocess = preprocess
208        self.n_layers = n_layers
209        self.h = h
210        super().__init__(
211            obj=obj,
212            n_hidden_features=n_hidden_features,
213            activation_name=activation_name,
214            a=a,
215            nodes_sim=nodes_sim,
216            bias=bias,
217            dropout=dropout,
218            direct_link=direct_link,
219            n_clusters=n_clusters,
220            cluster_encode=cluster_encode,
221            type_clust=type_clust,
222            type_scaling=type_scaling,
223            seed=seed,
224            backend=backend,
225            lags=lags,
226            type_pi=type_pi,
227            block_size=block_size,
228            replications=replications,
229            kernel=kernel,
230            agg=agg,
231            verbose=verbose,
232            show_progress=show_progress,
233        )
234        if self.replications is not None or self.type_pi == "gaussian":
235            if self.sort_by is None:
236                self.sort_by = "WINKLERSCORE"
237        else:
238            if self.sort_by is None:
239                self.sort_by = "RMSE"
240
241    def fit(self, X_train, X_test, xreg=None, per_series=False, **kwargs):
242        """Fit Regression algorithms to X_train, predict and score on X_test.
243
244        Parameters:
245
246            X_train: array-like or data frame,
247                Training vectors, where rows is the number of samples
248                and columns is the number of features.
249
250            X_test: array-like or data frame,
251                Testing vectors, where rows is the number of samples
252                and columns is the number of features.
253
254            xreg: array-like, optional (default=None)
255                Additional (external) regressors to be passed to self.obj
256                xreg must be in 'increasing' order (most recent observations last)
257
258            per_series: bool, optional (default=False)
259                When set to True, the metrics are computed series by series.
260
261            **kwargs: dict, optional (default=None)
262                Additional parameters to be passed to `fit` method of `obj`.
263
264        Returns:
265
266            scores: Pandas DataFrame
267                Returns metrics of all the models in a Pandas DataFrame.
268
269            predictions: Pandas DataFrame
270                Returns predictions of all the models in a Pandas DataFrame.
271
272        """
273        R2 = []
274        ADJR2 = []
275        ME = []
276        MPL = []
277        RMSE = []
278        MAE = []
279        MPE = []
280        MAPE = []
281        WINKLERSCORE = []
282        COVERAGE = []
283
284        # WIN = []
285        names = []
286        TIME = []
287        predictions = {}
288
289        if self.custom_metric is not None:
290            CUSTOM_METRIC = []
291
292        if self.h is None:
293            assert X_test is not None, "If h is None, X_test must be provided."
294
295        if isinstance(X_train, np.ndarray):
296            X_train = pd.DataFrame(X_train)
297            X_test = pd.DataFrame(X_test)
298
299        self.series_names = X_train.columns.tolist()
300
301        X_train = convert_df_to_numeric(X_train)
302        X_test = convert_df_to_numeric(X_test)
303
304        numeric_features = X_train.select_dtypes(include=[np.number]).columns
305        categorical_features = X_train.select_dtypes(include=["object"]).columns
306
307        categorical_low, categorical_high = get_card_split(
308            X_train, categorical_features
309        )
310
311        if self.preprocess:
312            preprocessor = ColumnTransformer(
313                transformers=[
314                    ("numeric", numeric_transformer, numeric_features),
315                    (
316                        "categorical_low",
317                        categorical_transformer_low,
318                        categorical_low,
319                    ),
320                    (
321                        "categorical_high",
322                        categorical_transformer_high,
323                        categorical_high,
324                    ),
325                ]
326            )
327
328        # baselines (Classical MTS) ----
329        for i, name in enumerate(["ARIMA", "ETS", "Theta", "VAR", "VECM"]):
330            try:
331                start = time.time()
332                regr = ClassicalMTS(model=name)
333                regr.fit(X_train, **kwargs)
334                self.models_[name] = regr
335                if self.h is None:
336                    X_pred = regr.predict(h=X_test.shape[0], **kwargs)
337                else:
338                    assert self.h > 0, "h must be > 0"
339                    X_pred = regr.predict(h=self.h, **kwargs)
340                    try:
341                        X_test = X_test[0 : self.h, :]
342                    except Exception as e:
343                        X_test = X_test.iloc[0 : self.h, :]
344
345                rmse = mean_errors(
346                    actual=X_test,
347                    pred=X_pred,
348                    scoring="root_mean_squared_error",
349                    per_series=per_series,
350                )
351                mae = mean_errors(
352                    actual=X_test,
353                    pred=X_pred,
354                    scoring="mean_absolute_error",
355                    per_series=per_series,
356                )
357                mpl = mean_errors(
358                    actual=X_test,
359                    pred=X_pred,
360                    scoring="mean_pinball_loss",
361                    per_series=per_series,
362                )
363            except Exception:
364
365                continue
366
367            names.append(name)
368            RMSE.append(rmse)
369            MAE.append(mae)
370            MPL.append(mpl)
371
372            if self.custom_metric is not None:
373                try:
374                    if self.h is None:
375                        custom_metric = self.custom_metric(X_test, X_pred)
376                    else:
377                        custom_metric = self.custom_metric(X_test_h, X_pred)
378                    CUSTOM_METRIC.append(custom_metric)
379                except Exception as e:
380                    custom_metric = np.iinfo(np.float32).max
381                    CUSTOM_METRIC.append(np.iinfo(np.float32).max)
382
383            if (self.replications is not None) or (self.type_pi == "gaussian"):
384                if per_series == False:
385                    winklerscore = winkler_score(obj=X_pred, actual=X_test, level=95)
386                    coveragecalc = coverage(X_pred, X_test, level=95)
387                else:
388                    winklerscore = winkler_score(
389                        obj=X_pred, actual=X_test, level=95, per_series=True
390                    )
391                    coveragecalc = coverage(X_pred, X_test, level=95, per_series=True)
392                WINKLERSCORE.append(winklerscore)
393                COVERAGE.append(coveragecalc)
394            TIME.append(time.time() - start)
395
396        if self.estimators == "all":
397            if self.n_layers <= 1:
398                self.regressors = REGRESSORSMTS
399            else:
400                self.regressors = DEEPREGRESSORSMTS
401        else:
402            if self.n_layers <= 1:
403                self.regressors = [
404                    ("MTS(" + est[0] + ")", est[1])
405                    for est in all_estimators()
406                    if (
407                        issubclass(est[1], RegressorMixin)
408                        and (est[0] in self.estimators)
409                    )
410                ]
411            else:  # self.n_layers > 1
412                self.regressors = [
413                    ("DeepMTS(" + est[0] + ")", est[1])
414                    for est in all_estimators()
415                    if (
416                        issubclass(est[1], RegressorMixin)
417                        and (est[0] in self.estimators)
418                    )
419                ]
420
421        if self.preprocess is True:
422            for name, model in tqdm(self.regressors):  # do parallel exec
423                start = time.time()
424                try:
425                    if "random_state" in model().get_params().keys():
426                        pipe = Pipeline(
427                            steps=[
428                                ("preprocessor", preprocessor),
429                                (
430                                    "regressor",
431                                    DeepMTS(
432                                        obj=model(
433                                            random_state=self.random_state,
434                                            **kwargs,
435                                        ),
436                                        n_layers=self.n_layers,
437                                        n_hidden_features=self.n_hidden_features,
438                                        activation_name=self.activation_name,
439                                        a=self.a,
440                                        nodes_sim=self.nodes_sim,
441                                        bias=self.bias,
442                                        dropout=self.dropout,
443                                        direct_link=self.direct_link,
444                                        n_clusters=self.n_clusters,
445                                        cluster_encode=self.cluster_encode,
446                                        type_clust=self.type_clust,
447                                        type_scaling=self.type_scaling,
448                                        lags=self.lags,
449                                        type_pi=self.type_pi,
450                                        block_size=self.block_size,
451                                        replications=self.replications,
452                                        kernel=self.kernel,
453                                        agg=self.agg,
454                                        seed=self.seed,
455                                        backend=self.backend,
456                                        show_progress=self.show_progress,
457                                    ),
458                                ),
459                            ]
460                        )
461                    else:  # "random_state" in model().get_params().keys()
462                        pipe = Pipeline(
463                            steps=[
464                                ("preprocessor", preprocessor),
465                                (
466                                    "regressor",
467                                    DeepMTS(
468                                        obj=model(**kwargs),
469                                        n_layers=self.n_layers,
470                                        n_hidden_features=self.n_hidden_features,
471                                        activation_name=self.activation_name,
472                                        a=self.a,
473                                        nodes_sim=self.nodes_sim,
474                                        bias=self.bias,
475                                        dropout=self.dropout,
476                                        direct_link=self.direct_link,
477                                        n_clusters=self.n_clusters,
478                                        cluster_encode=self.cluster_encode,
479                                        type_clust=self.type_clust,
480                                        type_scaling=self.type_scaling,
481                                        lags=self.lags,
482                                        type_pi=self.type_pi,
483                                        block_size=self.block_size,
484                                        replications=self.replications,
485                                        kernel=self.kernel,
486                                        agg=self.agg,
487                                        seed=self.seed,
488                                        backend=self.backend,
489                                        show_progress=self.show_progress,
490                                    ),
491                                ),
492                            ]
493                        )
494
495                    pipe.fit(X_train, **kwargs)
496                    # pipe.fit(X_train, xreg=xreg)
497
498                    self.models_[name] = pipe
499
500                    if self.h is None:
501                        X_pred = pipe["regressor"].predict(h=self.h, **kwargs)
502                    else:
503                        assert self.h > 0, "h must be > 0"
504                        X_pred = pipe["regressor"].predict(h=self.h, **kwargs)
505
506                    if (self.replications is not None) or (self.type_pi == "gaussian"):
507                        rmse = mean_errors(
508                            actual=X_test,
509                            pred=X_pred,
510                            scoring="root_mean_squared_error",
511                            per_series=per_series,
512                        )
513                        mae = mean_errors(
514                            actual=X_test,
515                            pred=X_pred,
516                            scoring="mean_absolute_error",
517                            per_series=per_series,
518                        )
519                        mpl = mean_errors(
520                            actual=X_test,
521                            pred=X_pred,
522                            scoring="mean_pinball_loss",
523                            per_series=per_series,
524                        )
525                        winklerscore = winkler_score(
526                            obj=X_pred,
527                            actual=X_test,
528                            level=95,
529                            per_series=per_series,
530                        )
531                        coveragecalc = coverage(
532                            X_pred, X_test, level=95, per_series=per_series
533                        )
534                    else:
535                        rmse = mean_errors(
536                            actual=X_test,
537                            pred=X_pred,
538                            scoring="root_mean_squared_error",
539                            per_series=per_series,
540                        )
541                        mae = mean_errors(
542                            actual=X_test,
543                            pred=X_pred,
544                            scoring="mean_absolute_error",
545                            per_series=per_series,
546                        )
547                        mpl = mean_errors(
548                            actual=X_test,
549                            pred=X_pred,
550                            scoring="mean_pinball_loss",
551                            per_series=per_series,
552                        )
553
554                    names.append(name)
555                    RMSE.append(rmse)
556                    MAE.append(mae)
557                    MPL.append(mpl)
558
559                    if (self.replications is not None) or (self.type_pi == "gaussian"):
560                        WINKLERSCORE.append(winklerscore)
561                        COVERAGE.append(coveragecalc)
562                    TIME.append(time.time() - start)
563
564                    if self.custom_metric is not None:
565                        try:
566                            custom_metric = self.custom_metric(X_test, X_pred)
567                            CUSTOM_METRIC.append(custom_metric)
568                        except Exception as e:
569                            custom_metric = np.iinfo(np.float32).max
570                            CUSTOM_METRIC.append(custom_metric)
571
572                    if self.verbose > 0:
573                        if (self.replications is not None) or (
574                            self.type_pi == "gaussian"
575                        ):
576                            scores_verbose = {
577                                "Model": name,
578                                "RMSE": rmse,
579                                "MAE": mae,
580                                "MPL": mpl,
581                                "WINKLERSCORE": winklerscore,
582                                "COVERAGE": coveragecalc,
583                                "Time taken": time.time() - start,
584                            }
585                        else:
586                            scores_verbose = {
587                                "Model": name,
588                                "RMSE": rmse,
589                                "MAE": mae,
590                                "MPL": mpl,
591                                "Time taken": time.time() - start,
592                            }
593
594                        if self.custom_metric is not None:
595                            scores_verbose["Custom metric"] = custom_metric
596
597                    if self.predictions:
598                        predictions[name] = X_pred
599                except Exception as exception:
600                    if self.ignore_warnings is False:
601                        print(name + " model failed to execute")
602                        print(exception)
603
604        else:  # no preprocessing
605
606            for name, model in tqdm(self.regressors):  # do parallel exec
607                start = time.time()
608                try:
609                    if "random_state" in model().get_params().keys():
610                        pipe = DeepMTS(
611                            obj=model(random_state=self.random_state, **kwargs),
612                            n_layers=self.n_layers,
613                            n_hidden_features=self.n_hidden_features,
614                            activation_name=self.activation_name,
615                            a=self.a,
616                            nodes_sim=self.nodes_sim,
617                            bias=self.bias,
618                            dropout=self.dropout,
619                            direct_link=self.direct_link,
620                            n_clusters=self.n_clusters,
621                            cluster_encode=self.cluster_encode,
622                            type_clust=self.type_clust,
623                            type_scaling=self.type_scaling,
624                            lags=self.lags,
625                            type_pi=self.type_pi,
626                            block_size=self.block_size,
627                            replications=self.replications,
628                            kernel=self.kernel,
629                            agg=self.agg,
630                            seed=self.seed,
631                            backend=self.backend,
632                            show_progress=self.show_progress,
633                        )
634                    else:
635                        pipe = DeepMTS(
636                            obj=model(**kwargs),
637                            n_layers=self.n_layers,
638                            n_hidden_features=self.n_hidden_features,
639                            activation_name=self.activation_name,
640                            a=self.a,
641                            nodes_sim=self.nodes_sim,
642                            bias=self.bias,
643                            dropout=self.dropout,
644                            direct_link=self.direct_link,
645                            n_clusters=self.n_clusters,
646                            cluster_encode=self.cluster_encode,
647                            type_clust=self.type_clust,
648                            type_scaling=self.type_scaling,
649                            lags=self.lags,
650                            type_pi=self.type_pi,
651                            block_size=self.block_size,
652                            replications=self.replications,
653                            kernel=self.kernel,
654                            agg=self.agg,
655                            seed=self.seed,
656                            backend=self.backend,
657                            show_progress=self.show_progress,
658                        )
659
660                    pipe.fit(X_train, xreg, **kwargs)
661                    # pipe.fit(X_train, xreg=xreg) # DO xreg like in `ahead`
662
663                    self.models_[name] = pipe
664
665                    if self.preprocess is True:
666                        if self.h is None:
667                            X_pred = pipe["regressor"].predict(
668                                h=X_test.shape[0], **kwargs
669                            )
670                        else:
671                            assert (
672                                self.h > 0 and self.h <= X_test.shape[0]
673                            ), "h must be > 0 and < X_test.shape[0]"
674                            X_pred = pipe["regressor"].predict(h=self.h, **kwargs)
675
676                    else:
677
678                        if self.h is None:
679                            X_pred = pipe.predict(
680                                h=X_test.shape[0],
681                                **kwargs,
682                                # X_pred = pipe.predict(h=X_test.shape[0], new_xreg=new_xreg) ## DO xreg like in `ahead`
683                            )
684                        else:
685                            assert (
686                                self.h > 0 and self.h <= X_test.shape[0]
687                            ), "h must be > 0 and < X_test.shape[0]"
688                            X_pred = pipe.predict(h=self.h, **kwargs)
689
690                    if self.h is None:
691                        if (self.replications is not None) or (
692                            self.type_pi == "gaussian"
693                        ):
694                            rmse = mean_errors(
695                                actual=X_test,
696                                pred=X_pred.mean,
697                                scoring="root_mean_squared_error",
698                                per_series=per_series,
699                            )
700                            mae = mean_errors(
701                                actual=X_test,
702                                pred=X_pred.mean,
703                                scoring="mean_absolute_error",
704                                per_series=per_series,
705                            )
706                            mpl = mean_errors(
707                                actual=X_test,
708                                pred=X_pred.mean,
709                                scoring="mean_pinball_loss",
710                                per_series=per_series,
711                            )
712                            winklerscore = winkler_score(
713                                obj=X_pred,
714                                actual=X_test,
715                                level=95,
716                                per_series=per_series,
717                            )
718                            coveragecalc = coverage(
719                                X_pred, X_test, level=95, per_series=per_series
720                            )
721                        else:  # no prediction interval
722                            rmse = mean_errors(
723                                actual=X_test,
724                                pred=X_pred,
725                                scoring="root_mean_squared_error",
726                                per_series=per_series,
727                            )
728                            mae = mean_errors(
729                                actual=X_test,
730                                pred=X_pred,
731                                scoring="mean_absolute_error",
732                                per_series=per_series,
733                            )
734                            mpl = mean_errors(
735                                actual=X_test,
736                                pred=X_pred,
737                                scoring="mean_pinball_loss",
738                                per_series=per_series,
739                            )
740                    else:  # self.h is not None
741                        if (self.replications is not None) or (
742                            self.type_pi == "gaussian"
743                        ):
744
745                            if isinstance(X_test, pd.DataFrame):
746                                X_test_h = X_test.iloc[0 : self.h, :]
747                                rmse = mean_errors(
748                                    actual=X_test_h,
749                                    pred=X_pred,
750                                    scoring="root_mean_squared_error",
751                                    per_series=per_series,
752                                )
753                                mae = mean_errors(
754                                    actual=X_test_h,
755                                    pred=X_pred,
756                                    scoring="mean_absolute_error",
757                                    per_series=per_series,
758                                )
759                                mpl = mean_errors(
760                                    actual=X_test_h,
761                                    pred=X_pred,
762                                    scoring="mean_pinball_loss",
763                                    per_series=per_series,
764                                )
765                                winklerscore = winkler_score(
766                                    obj=X_pred,
767                                    actual=X_test_h,
768                                    level=95,
769                                    per_series=per_series,
770                                )
771                                coveragecalc = coverage(
772                                    X_pred,
773                                    X_test_h,
774                                    level=95,
775                                    per_series=per_series,
776                                )
777                            else:
778                                X_test_h = X_test[0 : self.h, :]
779                                rmse = mean_errors(
780                                    actual=X_test_h,
781                                    pred=X_pred,
782                                    scoring="root_mean_squared_error",
783                                    per_series=per_series,
784                                )
785                                mae = mean_errors(
786                                    actual=X_test_h,
787                                    pred=X_pred,
788                                    scoring="mean_absolute_error",
789                                    per_series=per_series,
790                                )
791                                mpl = mean_errors(
792                                    actual=X_test_h,
793                                    pred=X_pred,
794                                    scoring="mean_pinball_loss",
795                                    per_series=per_series,
796                                )
797                                winklerscore = winkler_score(
798                                    obj=X_pred,
799                                    actual=X_test_h,
800                                    level=95,
801                                    per_series=per_series,
802                                )
803                                coveragecalc = coverage(
804                                    X_pred,
805                                    X_test_h,
806                                    level=95,
807                                    per_series=per_series,
808                                )
809                        else:  # no prediction interval
810
811                            if isinstance(X_test, pd.DataFrame):
812                                X_test_h = X_test.iloc[0 : self.h, :]
813                                rmse = mean_errors(
814                                    actual=X_test_h,
815                                    pred=X_pred,
816                                    scoring="root_mean_squared_error",
817                                    per_series=per_series,
818                                )
819                                mae = mean_errors(
820                                    actual=X_test_h,
821                                    pred=X_pred,
822                                    scoring="mean_absolute_error",
823                                    per_series=per_series,
824                                )
825                                mpl = mean_errors(
826                                    actual=X_test_h,
827                                    pred=X_pred,
828                                    scoring="mean_pinball_loss",
829                                    per_series=per_series,
830                                )
831                            else:
832                                X_test_h = X_test[0 : self.h, :]
833                                rmse = mean_errors(
834                                    actual=X_test_h,
835                                    pred=X_pred,
836                                    scoring="root_mean_squared_error",
837                                    per_series=per_series,
838                                )
839                                mae = mean_errors(
840                                    actual=X_test_h,
841                                    pred=X_pred,
842                                    scoring="mean_absolute_error",
843                                    per_series=per_series,
844                                )
845
846                    names.append(name)
847                    RMSE.append(rmse)
848                    MAE.append(mae)
849                    MPL.append(mpl)
850                    if (self.replications is not None) or (self.type_pi == "gaussian"):
851                        WINKLERSCORE.append(winklerscore)
852                        COVERAGE.append(coveragecalc)
853                    TIME.append(time.time() - start)
854
855                    if self.custom_metric is not None:
856                        try:
857                            if self.h is None:
858                                custom_metric = self.custom_metric(X_test, X_pred)
859                            else:
860                                custom_metric = self.custom_metric(X_test_h, X_pred)
861                            CUSTOM_METRIC.append(custom_metric)
862                        except Exception as e:
863                            custom_metric = np.iinfo(np.float32).max
864                            CUSTOM_METRIC.append(np.iinfo(np.float32).max)
865
866                    if self.verbose > 0:
867                        if (self.replications is not None) or (
868                            self.type_pi == "gaussian"
869                        ):
870                            scores_verbose = {
871                                "Model": name,
872                                "RMSE": rmse,
873                                "MAE": mae,
874                                "MPL": mpl,
875                                "WINKLERSCORE": winklerscore,
876                                "COVERAGE": coveragecalc,
877                                "Time taken": time.time() - start,
878                            }
879                        else:
880                            scores_verbose = {
881                                "Model": name,
882                                "RMSE": rmse,
883                                "MAE": mae,
884                                "MPL": mpl,
885                                "Time taken": time.time() - start,
886                            }
887
888                        if self.custom_metric is not None:
889                            scores_verbose["Custom metric"] = custom_metric
890
891                    if self.predictions:
892                        predictions[name] = X_pred
893
894                except Exception as exception:
895                    if self.ignore_warnings is False:
896                        print(name + " model failed to execute")
897                        print(exception)
898
899        if (self.replications is not None) or (self.type_pi == "gaussian"):
900            scores = {
901                "Model": names,
902                "RMSE": RMSE,
903                "MAE": MAE,
904                "MPL": MPL,
905                "WINKLERSCORE": WINKLERSCORE,
906                "COVERAGE": COVERAGE,
907                "Time Taken": TIME,
908            }
909        else:
910            scores = {
911                "Model": names,
912                "RMSE": RMSE,
913                "MAE": MAE,
914                "MPL": MPL,
915                "Time Taken": TIME,
916            }
917
918        if self.custom_metric is not None:
919            scores["Custom metric"] = CUSTOM_METRIC
920
921        if per_series:
922            scores = dict_to_dataframe_series(scores, self.series_names)
923        else:
924            scores = pd.DataFrame(scores)
925
926        try:  # case per_series, can't be sorted
927            scores = scores.sort_values(by=self.sort_by, ascending=True).set_index(
928                "Model"
929            )
930
931            self.best_model_ = self.models_[scores.index[0]]
932        except Exception as e:
933            pass
934
935        if self.predictions is True:
936
937            return scores, predictions
938
939        return scores
940
941    def get_best_model(self):
942        """
943        This function returns the best model pipeline based on the sort_by metric.
944
945        Returns:
946
947            best_model: object,
948                Returns the best model pipeline based on the sort_by metric.
949
950        """
951        return self.best_model_
952
953    def provide_models(self, X_train, X_test):
954        """
955        This function returns all the model objects trained in fit function.
956        If fit is not called already, then we call fit and then return the models.
957
958        Parameters:
959
960            X_train : array-like,
961                Training vectors, where rows is the number of samples
962                and columns is the number of features.
963
964            X_test : array-like,
965                Testing vectors, where rows is the number of samples
966                and columns is the number of features.
967
968        Returns:
969
970            models: dict-object,
971                Returns a dictionary with each model pipeline as value
972                with key as name of models.
973
974        """
975        if self.h is None:
976            if len(self.models_.keys()) == 0:
977                self.fit(X_train, X_test)
978        else:
979            if len(self.models_.keys()) == 0:
980                if isinstance(X_test, pd.DataFrame):
981                    self.fit(X_train, X_test.iloc[0 : self.h, :])
982                else:
983                    self.fit(X_train, X_test[0 : self.h, :])
984
985        return self.models_

Fitting -- almost -- all the regression algorithms with layers of nnetsauce's CustomRegressor to multivariate time series and returning their scores.

Parameters:

verbose: int, optional (default=0)
    Any positive number for verbosity.

ignore_warnings: bool, optional (default=True)
    When set to True, the warning related to algorigms that are not
    able to run are ignored.

custom_metric: function, optional (default=None)
    When function is provided, models are evaluated based on the custom
      evaluation metric provided.

predictions: bool, optional (default=False)
    When set to True, the predictions of all the models models are returned as dataframe.

sort_by: string, optional (default='RMSE')
    Sort models by a metric. Available options are 'RMSE', 'MAE', 'MPL', 'MPE', 'MAPE',
    'R-Squared', 'Adjusted R-Squared' or a custom metric identified by its name and
    provided by custom_metric.

random_state: int, optional (default=42)
    Reproducibiility seed.

estimators: list, optional (default='all')
    list of Estimators (regression algorithms) names or just 'all' (default='all')

preprocess: bool, preprocessing is done when set to True

n_layers: int, optional (default=1)
    Number of layers in the network. When set to 1, the model is equivalent to a MTS.

h: int, optional (default=None)
    Number of steps ahead to predict (when used, must be > 0 and < X_test.shape[0]).

All the other parameters are the same as MTS's.

Attributes:

models_: dict-object
    Returns a dictionary with each model pipeline as value
    with key as name of models.

best_model_: object
    Returns the best model pipeline based on the sort_by metric.

Examples:

See https://thierrymoudiki.github.io/blog/2023/10/29/python/quasirandomizednn/MTS-LazyPredict
def fit(self, X_train, X_test, xreg=None, per_series=False, **kwargs):
241    def fit(self, X_train, X_test, xreg=None, per_series=False, **kwargs):
242        """Fit Regression algorithms to X_train, predict and score on X_test.
243
244        Parameters:
245
246            X_train: array-like or data frame,
247                Training vectors, where rows is the number of samples
248                and columns is the number of features.
249
250            X_test: array-like or data frame,
251                Testing vectors, where rows is the number of samples
252                and columns is the number of features.
253
254            xreg: array-like, optional (default=None)
255                Additional (external) regressors to be passed to self.obj
256                xreg must be in 'increasing' order (most recent observations last)
257
258            per_series: bool, optional (default=False)
259                When set to True, the metrics are computed series by series.
260
261            **kwargs: dict, optional (default=None)
262                Additional parameters to be passed to `fit` method of `obj`.
263
264        Returns:
265
266            scores: Pandas DataFrame
267                Returns metrics of all the models in a Pandas DataFrame.
268
269            predictions: Pandas DataFrame
270                Returns predictions of all the models in a Pandas DataFrame.
271
272        """
273        R2 = []
274        ADJR2 = []
275        ME = []
276        MPL = []
277        RMSE = []
278        MAE = []
279        MPE = []
280        MAPE = []
281        WINKLERSCORE = []
282        COVERAGE = []
283
284        # WIN = []
285        names = []
286        TIME = []
287        predictions = {}
288
289        if self.custom_metric is not None:
290            CUSTOM_METRIC = []
291
292        if self.h is None:
293            assert X_test is not None, "If h is None, X_test must be provided."
294
295        if isinstance(X_train, np.ndarray):
296            X_train = pd.DataFrame(X_train)
297            X_test = pd.DataFrame(X_test)
298
299        self.series_names = X_train.columns.tolist()
300
301        X_train = convert_df_to_numeric(X_train)
302        X_test = convert_df_to_numeric(X_test)
303
304        numeric_features = X_train.select_dtypes(include=[np.number]).columns
305        categorical_features = X_train.select_dtypes(include=["object"]).columns
306
307        categorical_low, categorical_high = get_card_split(
308            X_train, categorical_features
309        )
310
311        if self.preprocess:
312            preprocessor = ColumnTransformer(
313                transformers=[
314                    ("numeric", numeric_transformer, numeric_features),
315                    (
316                        "categorical_low",
317                        categorical_transformer_low,
318                        categorical_low,
319                    ),
320                    (
321                        "categorical_high",
322                        categorical_transformer_high,
323                        categorical_high,
324                    ),
325                ]
326            )
327
328        # baselines (Classical MTS) ----
329        for i, name in enumerate(["ARIMA", "ETS", "Theta", "VAR", "VECM"]):
330            try:
331                start = time.time()
332                regr = ClassicalMTS(model=name)
333                regr.fit(X_train, **kwargs)
334                self.models_[name] = regr
335                if self.h is None:
336                    X_pred = regr.predict(h=X_test.shape[0], **kwargs)
337                else:
338                    assert self.h > 0, "h must be > 0"
339                    X_pred = regr.predict(h=self.h, **kwargs)
340                    try:
341                        X_test = X_test[0 : self.h, :]
342                    except Exception as e:
343                        X_test = X_test.iloc[0 : self.h, :]
344
345                rmse = mean_errors(
346                    actual=X_test,
347                    pred=X_pred,
348                    scoring="root_mean_squared_error",
349                    per_series=per_series,
350                )
351                mae = mean_errors(
352                    actual=X_test,
353                    pred=X_pred,
354                    scoring="mean_absolute_error",
355                    per_series=per_series,
356                )
357                mpl = mean_errors(
358                    actual=X_test,
359                    pred=X_pred,
360                    scoring="mean_pinball_loss",
361                    per_series=per_series,
362                )
363            except Exception:
364
365                continue
366
367            names.append(name)
368            RMSE.append(rmse)
369            MAE.append(mae)
370            MPL.append(mpl)
371
372            if self.custom_metric is not None:
373                try:
374                    if self.h is None:
375                        custom_metric = self.custom_metric(X_test, X_pred)
376                    else:
377                        custom_metric = self.custom_metric(X_test_h, X_pred)
378                    CUSTOM_METRIC.append(custom_metric)
379                except Exception as e:
380                    custom_metric = np.iinfo(np.float32).max
381                    CUSTOM_METRIC.append(np.iinfo(np.float32).max)
382
383            if (self.replications is not None) or (self.type_pi == "gaussian"):
384                if per_series == False:
385                    winklerscore = winkler_score(obj=X_pred, actual=X_test, level=95)
386                    coveragecalc = coverage(X_pred, X_test, level=95)
387                else:
388                    winklerscore = winkler_score(
389                        obj=X_pred, actual=X_test, level=95, per_series=True
390                    )
391                    coveragecalc = coverage(X_pred, X_test, level=95, per_series=True)
392                WINKLERSCORE.append(winklerscore)
393                COVERAGE.append(coveragecalc)
394            TIME.append(time.time() - start)
395
396        if self.estimators == "all":
397            if self.n_layers <= 1:
398                self.regressors = REGRESSORSMTS
399            else:
400                self.regressors = DEEPREGRESSORSMTS
401        else:
402            if self.n_layers <= 1:
403                self.regressors = [
404                    ("MTS(" + est[0] + ")", est[1])
405                    for est in all_estimators()
406                    if (
407                        issubclass(est[1], RegressorMixin)
408                        and (est[0] in self.estimators)
409                    )
410                ]
411            else:  # self.n_layers > 1
412                self.regressors = [
413                    ("DeepMTS(" + est[0] + ")", est[1])
414                    for est in all_estimators()
415                    if (
416                        issubclass(est[1], RegressorMixin)
417                        and (est[0] in self.estimators)
418                    )
419                ]
420
421        if self.preprocess is True:
422            for name, model in tqdm(self.regressors):  # do parallel exec
423                start = time.time()
424                try:
425                    if "random_state" in model().get_params().keys():
426                        pipe = Pipeline(
427                            steps=[
428                                ("preprocessor", preprocessor),
429                                (
430                                    "regressor",
431                                    DeepMTS(
432                                        obj=model(
433                                            random_state=self.random_state,
434                                            **kwargs,
435                                        ),
436                                        n_layers=self.n_layers,
437                                        n_hidden_features=self.n_hidden_features,
438                                        activation_name=self.activation_name,
439                                        a=self.a,
440                                        nodes_sim=self.nodes_sim,
441                                        bias=self.bias,
442                                        dropout=self.dropout,
443                                        direct_link=self.direct_link,
444                                        n_clusters=self.n_clusters,
445                                        cluster_encode=self.cluster_encode,
446                                        type_clust=self.type_clust,
447                                        type_scaling=self.type_scaling,
448                                        lags=self.lags,
449                                        type_pi=self.type_pi,
450                                        block_size=self.block_size,
451                                        replications=self.replications,
452                                        kernel=self.kernel,
453                                        agg=self.agg,
454                                        seed=self.seed,
455                                        backend=self.backend,
456                                        show_progress=self.show_progress,
457                                    ),
458                                ),
459                            ]
460                        )
461                    else:  # "random_state" in model().get_params().keys()
462                        pipe = Pipeline(
463                            steps=[
464                                ("preprocessor", preprocessor),
465                                (
466                                    "regressor",
467                                    DeepMTS(
468                                        obj=model(**kwargs),
469                                        n_layers=self.n_layers,
470                                        n_hidden_features=self.n_hidden_features,
471                                        activation_name=self.activation_name,
472                                        a=self.a,
473                                        nodes_sim=self.nodes_sim,
474                                        bias=self.bias,
475                                        dropout=self.dropout,
476                                        direct_link=self.direct_link,
477                                        n_clusters=self.n_clusters,
478                                        cluster_encode=self.cluster_encode,
479                                        type_clust=self.type_clust,
480                                        type_scaling=self.type_scaling,
481                                        lags=self.lags,
482                                        type_pi=self.type_pi,
483                                        block_size=self.block_size,
484                                        replications=self.replications,
485                                        kernel=self.kernel,
486                                        agg=self.agg,
487                                        seed=self.seed,
488                                        backend=self.backend,
489                                        show_progress=self.show_progress,
490                                    ),
491                                ),
492                            ]
493                        )
494
495                    pipe.fit(X_train, **kwargs)
496                    # pipe.fit(X_train, xreg=xreg)
497
498                    self.models_[name] = pipe
499
500                    if self.h is None:
501                        X_pred = pipe["regressor"].predict(h=self.h, **kwargs)
502                    else:
503                        assert self.h > 0, "h must be > 0"
504                        X_pred = pipe["regressor"].predict(h=self.h, **kwargs)
505
506                    if (self.replications is not None) or (self.type_pi == "gaussian"):
507                        rmse = mean_errors(
508                            actual=X_test,
509                            pred=X_pred,
510                            scoring="root_mean_squared_error",
511                            per_series=per_series,
512                        )
513                        mae = mean_errors(
514                            actual=X_test,
515                            pred=X_pred,
516                            scoring="mean_absolute_error",
517                            per_series=per_series,
518                        )
519                        mpl = mean_errors(
520                            actual=X_test,
521                            pred=X_pred,
522                            scoring="mean_pinball_loss",
523                            per_series=per_series,
524                        )
525                        winklerscore = winkler_score(
526                            obj=X_pred,
527                            actual=X_test,
528                            level=95,
529                            per_series=per_series,
530                        )
531                        coveragecalc = coverage(
532                            X_pred, X_test, level=95, per_series=per_series
533                        )
534                    else:
535                        rmse = mean_errors(
536                            actual=X_test,
537                            pred=X_pred,
538                            scoring="root_mean_squared_error",
539                            per_series=per_series,
540                        )
541                        mae = mean_errors(
542                            actual=X_test,
543                            pred=X_pred,
544                            scoring="mean_absolute_error",
545                            per_series=per_series,
546                        )
547                        mpl = mean_errors(
548                            actual=X_test,
549                            pred=X_pred,
550                            scoring="mean_pinball_loss",
551                            per_series=per_series,
552                        )
553
554                    names.append(name)
555                    RMSE.append(rmse)
556                    MAE.append(mae)
557                    MPL.append(mpl)
558
559                    if (self.replications is not None) or (self.type_pi == "gaussian"):
560                        WINKLERSCORE.append(winklerscore)
561                        COVERAGE.append(coveragecalc)
562                    TIME.append(time.time() - start)
563
564                    if self.custom_metric is not None:
565                        try:
566                            custom_metric = self.custom_metric(X_test, X_pred)
567                            CUSTOM_METRIC.append(custom_metric)
568                        except Exception as e:
569                            custom_metric = np.iinfo(np.float32).max
570                            CUSTOM_METRIC.append(custom_metric)
571
572                    if self.verbose > 0:
573                        if (self.replications is not None) or (
574                            self.type_pi == "gaussian"
575                        ):
576                            scores_verbose = {
577                                "Model": name,
578                                "RMSE": rmse,
579                                "MAE": mae,
580                                "MPL": mpl,
581                                "WINKLERSCORE": winklerscore,
582                                "COVERAGE": coveragecalc,
583                                "Time taken": time.time() - start,
584                            }
585                        else:
586                            scores_verbose = {
587                                "Model": name,
588                                "RMSE": rmse,
589                                "MAE": mae,
590                                "MPL": mpl,
591                                "Time taken": time.time() - start,
592                            }
593
594                        if self.custom_metric is not None:
595                            scores_verbose["Custom metric"] = custom_metric
596
597                    if self.predictions:
598                        predictions[name] = X_pred
599                except Exception as exception:
600                    if self.ignore_warnings is False:
601                        print(name + " model failed to execute")
602                        print(exception)
603
604        else:  # no preprocessing
605
606            for name, model in tqdm(self.regressors):  # do parallel exec
607                start = time.time()
608                try:
609                    if "random_state" in model().get_params().keys():
610                        pipe = DeepMTS(
611                            obj=model(random_state=self.random_state, **kwargs),
612                            n_layers=self.n_layers,
613                            n_hidden_features=self.n_hidden_features,
614                            activation_name=self.activation_name,
615                            a=self.a,
616                            nodes_sim=self.nodes_sim,
617                            bias=self.bias,
618                            dropout=self.dropout,
619                            direct_link=self.direct_link,
620                            n_clusters=self.n_clusters,
621                            cluster_encode=self.cluster_encode,
622                            type_clust=self.type_clust,
623                            type_scaling=self.type_scaling,
624                            lags=self.lags,
625                            type_pi=self.type_pi,
626                            block_size=self.block_size,
627                            replications=self.replications,
628                            kernel=self.kernel,
629                            agg=self.agg,
630                            seed=self.seed,
631                            backend=self.backend,
632                            show_progress=self.show_progress,
633                        )
634                    else:
635                        pipe = DeepMTS(
636                            obj=model(**kwargs),
637                            n_layers=self.n_layers,
638                            n_hidden_features=self.n_hidden_features,
639                            activation_name=self.activation_name,
640                            a=self.a,
641                            nodes_sim=self.nodes_sim,
642                            bias=self.bias,
643                            dropout=self.dropout,
644                            direct_link=self.direct_link,
645                            n_clusters=self.n_clusters,
646                            cluster_encode=self.cluster_encode,
647                            type_clust=self.type_clust,
648                            type_scaling=self.type_scaling,
649                            lags=self.lags,
650                            type_pi=self.type_pi,
651                            block_size=self.block_size,
652                            replications=self.replications,
653                            kernel=self.kernel,
654                            agg=self.agg,
655                            seed=self.seed,
656                            backend=self.backend,
657                            show_progress=self.show_progress,
658                        )
659
660                    pipe.fit(X_train, xreg, **kwargs)
661                    # pipe.fit(X_train, xreg=xreg) # DO xreg like in `ahead`
662
663                    self.models_[name] = pipe
664
665                    if self.preprocess is True:
666                        if self.h is None:
667                            X_pred = pipe["regressor"].predict(
668                                h=X_test.shape[0], **kwargs
669                            )
670                        else:
671                            assert (
672                                self.h > 0 and self.h <= X_test.shape[0]
673                            ), "h must be > 0 and < X_test.shape[0]"
674                            X_pred = pipe["regressor"].predict(h=self.h, **kwargs)
675
676                    else:
677
678                        if self.h is None:
679                            X_pred = pipe.predict(
680                                h=X_test.shape[0],
681                                **kwargs,
682                                # X_pred = pipe.predict(h=X_test.shape[0], new_xreg=new_xreg) ## DO xreg like in `ahead`
683                            )
684                        else:
685                            assert (
686                                self.h > 0 and self.h <= X_test.shape[0]
687                            ), "h must be > 0 and < X_test.shape[0]"
688                            X_pred = pipe.predict(h=self.h, **kwargs)
689
690                    if self.h is None:
691                        if (self.replications is not None) or (
692                            self.type_pi == "gaussian"
693                        ):
694                            rmse = mean_errors(
695                                actual=X_test,
696                                pred=X_pred.mean,
697                                scoring="root_mean_squared_error",
698                                per_series=per_series,
699                            )
700                            mae = mean_errors(
701                                actual=X_test,
702                                pred=X_pred.mean,
703                                scoring="mean_absolute_error",
704                                per_series=per_series,
705                            )
706                            mpl = mean_errors(
707                                actual=X_test,
708                                pred=X_pred.mean,
709                                scoring="mean_pinball_loss",
710                                per_series=per_series,
711                            )
712                            winklerscore = winkler_score(
713                                obj=X_pred,
714                                actual=X_test,
715                                level=95,
716                                per_series=per_series,
717                            )
718                            coveragecalc = coverage(
719                                X_pred, X_test, level=95, per_series=per_series
720                            )
721                        else:  # no prediction interval
722                            rmse = mean_errors(
723                                actual=X_test,
724                                pred=X_pred,
725                                scoring="root_mean_squared_error",
726                                per_series=per_series,
727                            )
728                            mae = mean_errors(
729                                actual=X_test,
730                                pred=X_pred,
731                                scoring="mean_absolute_error",
732                                per_series=per_series,
733                            )
734                            mpl = mean_errors(
735                                actual=X_test,
736                                pred=X_pred,
737                                scoring="mean_pinball_loss",
738                                per_series=per_series,
739                            )
740                    else:  # self.h is not None
741                        if (self.replications is not None) or (
742                            self.type_pi == "gaussian"
743                        ):
744
745                            if isinstance(X_test, pd.DataFrame):
746                                X_test_h = X_test.iloc[0 : self.h, :]
747                                rmse = mean_errors(
748                                    actual=X_test_h,
749                                    pred=X_pred,
750                                    scoring="root_mean_squared_error",
751                                    per_series=per_series,
752                                )
753                                mae = mean_errors(
754                                    actual=X_test_h,
755                                    pred=X_pred,
756                                    scoring="mean_absolute_error",
757                                    per_series=per_series,
758                                )
759                                mpl = mean_errors(
760                                    actual=X_test_h,
761                                    pred=X_pred,
762                                    scoring="mean_pinball_loss",
763                                    per_series=per_series,
764                                )
765                                winklerscore = winkler_score(
766                                    obj=X_pred,
767                                    actual=X_test_h,
768                                    level=95,
769                                    per_series=per_series,
770                                )
771                                coveragecalc = coverage(
772                                    X_pred,
773                                    X_test_h,
774                                    level=95,
775                                    per_series=per_series,
776                                )
777                            else:
778                                X_test_h = X_test[0 : self.h, :]
779                                rmse = mean_errors(
780                                    actual=X_test_h,
781                                    pred=X_pred,
782                                    scoring="root_mean_squared_error",
783                                    per_series=per_series,
784                                )
785                                mae = mean_errors(
786                                    actual=X_test_h,
787                                    pred=X_pred,
788                                    scoring="mean_absolute_error",
789                                    per_series=per_series,
790                                )
791                                mpl = mean_errors(
792                                    actual=X_test_h,
793                                    pred=X_pred,
794                                    scoring="mean_pinball_loss",
795                                    per_series=per_series,
796                                )
797                                winklerscore = winkler_score(
798                                    obj=X_pred,
799                                    actual=X_test_h,
800                                    level=95,
801                                    per_series=per_series,
802                                )
803                                coveragecalc = coverage(
804                                    X_pred,
805                                    X_test_h,
806                                    level=95,
807                                    per_series=per_series,
808                                )
809                        else:  # no prediction interval
810
811                            if isinstance(X_test, pd.DataFrame):
812                                X_test_h = X_test.iloc[0 : self.h, :]
813                                rmse = mean_errors(
814                                    actual=X_test_h,
815                                    pred=X_pred,
816                                    scoring="root_mean_squared_error",
817                                    per_series=per_series,
818                                )
819                                mae = mean_errors(
820                                    actual=X_test_h,
821                                    pred=X_pred,
822                                    scoring="mean_absolute_error",
823                                    per_series=per_series,
824                                )
825                                mpl = mean_errors(
826                                    actual=X_test_h,
827                                    pred=X_pred,
828                                    scoring="mean_pinball_loss",
829                                    per_series=per_series,
830                                )
831                            else:
832                                X_test_h = X_test[0 : self.h, :]
833                                rmse = mean_errors(
834                                    actual=X_test_h,
835                                    pred=X_pred,
836                                    scoring="root_mean_squared_error",
837                                    per_series=per_series,
838                                )
839                                mae = mean_errors(
840                                    actual=X_test_h,
841                                    pred=X_pred,
842                                    scoring="mean_absolute_error",
843                                    per_series=per_series,
844                                )
845
846                    names.append(name)
847                    RMSE.append(rmse)
848                    MAE.append(mae)
849                    MPL.append(mpl)
850                    if (self.replications is not None) or (self.type_pi == "gaussian"):
851                        WINKLERSCORE.append(winklerscore)
852                        COVERAGE.append(coveragecalc)
853                    TIME.append(time.time() - start)
854
855                    if self.custom_metric is not None:
856                        try:
857                            if self.h is None:
858                                custom_metric = self.custom_metric(X_test, X_pred)
859                            else:
860                                custom_metric = self.custom_metric(X_test_h, X_pred)
861                            CUSTOM_METRIC.append(custom_metric)
862                        except Exception as e:
863                            custom_metric = np.iinfo(np.float32).max
864                            CUSTOM_METRIC.append(np.iinfo(np.float32).max)
865
866                    if self.verbose > 0:
867                        if (self.replications is not None) or (
868                            self.type_pi == "gaussian"
869                        ):
870                            scores_verbose = {
871                                "Model": name,
872                                "RMSE": rmse,
873                                "MAE": mae,
874                                "MPL": mpl,
875                                "WINKLERSCORE": winklerscore,
876                                "COVERAGE": coveragecalc,
877                                "Time taken": time.time() - start,
878                            }
879                        else:
880                            scores_verbose = {
881                                "Model": name,
882                                "RMSE": rmse,
883                                "MAE": mae,
884                                "MPL": mpl,
885                                "Time taken": time.time() - start,
886                            }
887
888                        if self.custom_metric is not None:
889                            scores_verbose["Custom metric"] = custom_metric
890
891                    if self.predictions:
892                        predictions[name] = X_pred
893
894                except Exception as exception:
895                    if self.ignore_warnings is False:
896                        print(name + " model failed to execute")
897                        print(exception)
898
899        if (self.replications is not None) or (self.type_pi == "gaussian"):
900            scores = {
901                "Model": names,
902                "RMSE": RMSE,
903                "MAE": MAE,
904                "MPL": MPL,
905                "WINKLERSCORE": WINKLERSCORE,
906                "COVERAGE": COVERAGE,
907                "Time Taken": TIME,
908            }
909        else:
910            scores = {
911                "Model": names,
912                "RMSE": RMSE,
913                "MAE": MAE,
914                "MPL": MPL,
915                "Time Taken": TIME,
916            }
917
918        if self.custom_metric is not None:
919            scores["Custom metric"] = CUSTOM_METRIC
920
921        if per_series:
922            scores = dict_to_dataframe_series(scores, self.series_names)
923        else:
924            scores = pd.DataFrame(scores)
925
926        try:  # case per_series, can't be sorted
927            scores = scores.sort_values(by=self.sort_by, ascending=True).set_index(
928                "Model"
929            )
930
931            self.best_model_ = self.models_[scores.index[0]]
932        except Exception as e:
933            pass
934
935        if self.predictions is True:
936
937            return scores, predictions
938
939        return scores

Fit Regression algorithms to X_train, predict and score on X_test.

Parameters:

X_train: array-like or data frame,
    Training vectors, where rows is the number of samples
    and columns is the number of features.

X_test: array-like or data frame,
    Testing vectors, where rows is the number of samples
    and columns is the number of features.

xreg: array-like, optional (default=None)
    Additional (external) regressors to be passed to self.obj
    xreg must be in 'increasing' order (most recent observations last)

per_series: bool, optional (default=False)
    When set to True, the metrics are computed series by series.

**kwargs: dict, optional (default=None)
    Additional parameters to be passed to `fit` method of `obj`.

Returns:

scores: Pandas DataFrame
    Returns metrics of all the models in a Pandas DataFrame.

predictions: Pandas DataFrame
    Returns predictions of all the models in a Pandas DataFrame.
def provide_models(self, X_train, X_test):
953    def provide_models(self, X_train, X_test):
954        """
955        This function returns all the model objects trained in fit function.
956        If fit is not called already, then we call fit and then return the models.
957
958        Parameters:
959
960            X_train : array-like,
961                Training vectors, where rows is the number of samples
962                and columns is the number of features.
963
964            X_test : array-like,
965                Testing vectors, where rows is the number of samples
966                and columns is the number of features.
967
968        Returns:
969
970            models: dict-object,
971                Returns a dictionary with each model pipeline as value
972                with key as name of models.
973
974        """
975        if self.h is None:
976            if len(self.models_.keys()) == 0:
977                self.fit(X_train, X_test)
978        else:
979            if len(self.models_.keys()) == 0:
980                if isinstance(X_test, pd.DataFrame):
981                    self.fit(X_train, X_test.iloc[0 : self.h, :])
982                else:
983                    self.fit(X_train, X_test[0 : self.h, :])
984
985        return self.models_

This function returns all the model objects trained in fit function. If fit is not called already, then we call fit and then return the models.

Parameters:

X_train : array-like,
    Training vectors, where rows is the number of samples
    and columns is the number of features.

X_test : array-like,
    Testing vectors, where rows is the number of samples
    and columns is the number of features.

Returns:

models: dict-object,
    Returns a dictionary with each model pipeline as value
    with key as name of models.
class MLARCH(nnetsauce.MTS):
 18class MLARCH(MTS):
 19    """Machine Learning with ARCH effects for time series forecasting
 20
 21    Parameters:
 22            
 23        model_mean: object of class nnetsauce.MTS
 24            Model for mean prediction (default: None, uses obj)
 25            
 26        model_sigma: object of class nnetsauce.MTS
 27            Model for residuals volatility prediction (default: None, uses obj)
 28        
 29        model_residuals: object of class nnetsauce.MTS
 30            Model for residuals prediction (default: None, uses obj)
 31    
 32    Examples: 
 33
 34        See examples/mlarch.py
 35                        
 36    """
 37    def __init__(
 38        self,
 39        model_mean,
 40        model_sigma, 
 41        model_residuals
 42    ):
 43        assert isinstance(model_mean, MTS), "model_mean must be an object of class nnetsauce.MTS"
 44        assert isinstance(model_sigma, MTS), "model_sigma must be an object of class nnetsauce.MTS"
 45        assert isinstance(model_residuals, MTS), "model_residuals must be an object of class nnetsauce.MTS"
 46        assert model_sigma.type_pi.startswith("scp") and model_sigma.replications is not None, \
 47        "for now, the models must be conformalized, i.e type_pi must start with 'scp' and replications must be an integer"
 48        assert model_residuals.type_pi.startswith("scp") and model_residuals.replications is not None, \
 49        "for now, the models must be conformalized, i.e type_pi must start with 'scp' and replications must be an integer"        
 50
 51        self.model_mean = model_mean
 52        self.model_sigma = model_sigma
 53        self.model_residuals = model_residuals
 54
 55        self.mean_residuals_ = None
 56        self.mean_residuals_wilcoxon_test_ = None
 57        self.mean_residuals_kss_test_ = None
 58        self.standardized_residuals_ = None
 59
 60
 61    def fit(self, y):
 62        """Fit the MLARCH model to the time series data.
 63
 64        Parameters
 65        ----------
 66        y : array-like of shape (n_samples,)
 67            The target time series to be fitted.
 68
 69        Returns
 70        -------
 71        self : object
 72            Returns self.
 73
 74        Notes
 75        -----
 76        This method:
 77        
 78        1. Fits the mean model to the time series
 79        2. Performs statistical tests on the residuals (Wilcoxon and KPSS)
 80        3. Fits the volatility model to the squared residuals
 81        4. Computes standardized residuals
 82        5. Fits the residuals model to the standardized residuals
 83        """
 84        n = len(y)
 85        self.model_mean.fit(y.reshape(-1, 1)) 
 86        # Wilcoxon signed-rank test on residuals (mean = 0)
 87        self.mean_residuals_wilcoxon_test_ = stats.wilcoxon(self.model_mean.residuals_)
 88        # KPSS test for stationarity on residuals
 89        self.mean_residuals_kss_test_ = kpss(self.model_mean.residuals_, regression='c')
 90        self.model_sigma.fit(np.log(self.model_mean.residuals_.reshape(-1, 1)**2)) 
 91        # n//2 here because the model is conformalized
 92        fitted_sigma = self.model_sigma.residuals_ + np.log(self.model_mean.residuals_**2)[(n//2):,:]
 93        # standardized residuals
 94        self.standardized_residuals_ = self.model_mean.residuals_[(n//2):,:]/np.sqrt(np.exp(fitted_sigma))
 95        self.model_residuals.fit(self.standardized_residuals_.reshape(-1, 1))
 96        return self
 97
 98
 99    def predict(self, h=5, level=95):
100        """Predict (probabilistic) future values of the time series.
101
102        Parameters
103        ----------
104        h : int, default=5
105            The forecast horizon.
106        level : int, default=95
107            The confidence level for prediction intervals.
108
109        Returns
110        -------
111        DescribeResult : namedtuple
112            A named tuple containing:
113
114            - mean : array-like of shape (h,)
115                The mean forecast.
116            - sims : array-like of shape (h, n_replications)
117                The simulated forecasts.
118            - lower : array-like of shape (h,)
119                The lower bound of the prediction interval.
120            - upper : array-like of shape (h,)
121                The upper bound of the prediction interval.
122
123        Notes
124        -----
125        This method:
126        1. Generates mean forecasts using the mean model
127        2. Generates standardized residual forecasts using the residuals model
128        3. Generates volatility forecasts using the sigma model
129        4. Combines these forecasts to generate the final predictions
130        5. Computes prediction intervals at the specified confidence level
131        """
132        DescribeResult = namedtuple(
133                "DescribeResult", ("mean", "sims", "lower", "upper")
134            )
135        mean_forecast = self.model_mean.predict(h=h).values.ravel()
136        preds_z = self.model_residuals.predict(h=h)
137        preds_sigma = self.model_sigma.predict(h=h)
138        sims_z = preds_z.sims
139        sims_sigma = preds_sigma.sims 
140
141        f = []
142        for i in range(len(sims_z)): 
143            f.append(mean_forecast + sims_z[i].values.ravel()*np.sqrt(np.exp(sims_sigma[i].values.ravel())))
144
145        f = np.asarray(f).T
146        mean_f = np.mean(f, axis=1)
147        alpha = 1 - level/100
148        lower_bound = np.quantile(f, alpha/2, axis=1)
149        upper_bound = np.quantile(f, 1-alpha/2, axis=1)
150
151        return DescribeResult(mean_f, f, 
152                              lower_bound, upper_bound)

Machine Learning with ARCH effects for time series forecasting

Parameters:

model_mean: object of class nnetsauce.MTS
    Model for mean prediction (default: None, uses obj)

model_sigma: object of class nnetsauce.MTS
    Model for residuals volatility prediction (default: None, uses obj)

model_residuals: object of class nnetsauce.MTS
    Model for residuals prediction (default: None, uses obj)

Examples:

See examples/mlarch.py
def fit(self, y):
61    def fit(self, y):
62        """Fit the MLARCH model to the time series data.
63
64        Parameters
65        ----------
66        y : array-like of shape (n_samples,)
67            The target time series to be fitted.
68
69        Returns
70        -------
71        self : object
72            Returns self.
73
74        Notes
75        -----
76        This method:
77        
78        1. Fits the mean model to the time series
79        2. Performs statistical tests on the residuals (Wilcoxon and KPSS)
80        3. Fits the volatility model to the squared residuals
81        4. Computes standardized residuals
82        5. Fits the residuals model to the standardized residuals
83        """
84        n = len(y)
85        self.model_mean.fit(y.reshape(-1, 1)) 
86        # Wilcoxon signed-rank test on residuals (mean = 0)
87        self.mean_residuals_wilcoxon_test_ = stats.wilcoxon(self.model_mean.residuals_)
88        # KPSS test for stationarity on residuals
89        self.mean_residuals_kss_test_ = kpss(self.model_mean.residuals_, regression='c')
90        self.model_sigma.fit(np.log(self.model_mean.residuals_.reshape(-1, 1)**2)) 
91        # n//2 here because the model is conformalized
92        fitted_sigma = self.model_sigma.residuals_ + np.log(self.model_mean.residuals_**2)[(n//2):,:]
93        # standardized residuals
94        self.standardized_residuals_ = self.model_mean.residuals_[(n//2):,:]/np.sqrt(np.exp(fitted_sigma))
95        self.model_residuals.fit(self.standardized_residuals_.reshape(-1, 1))
96        return self

Fit the MLARCH model to the time series data.

Parameters

y : array-like of shape (n_samples,) The target time series to be fitted.

Returns

self : object Returns self.

Notes

This method:

  1. Fits the mean model to the time series
  2. Performs statistical tests on the residuals (Wilcoxon and KPSS)
  3. Fits the volatility model to the squared residuals
  4. Computes standardized residuals
  5. Fits the residuals model to the standardized residuals
def predict(self, h=5, level=95):
 99    def predict(self, h=5, level=95):
100        """Predict (probabilistic) future values of the time series.
101
102        Parameters
103        ----------
104        h : int, default=5
105            The forecast horizon.
106        level : int, default=95
107            The confidence level for prediction intervals.
108
109        Returns
110        -------
111        DescribeResult : namedtuple
112            A named tuple containing:
113
114            - mean : array-like of shape (h,)
115                The mean forecast.
116            - sims : array-like of shape (h, n_replications)
117                The simulated forecasts.
118            - lower : array-like of shape (h,)
119                The lower bound of the prediction interval.
120            - upper : array-like of shape (h,)
121                The upper bound of the prediction interval.
122
123        Notes
124        -----
125        This method:
126        1. Generates mean forecasts using the mean model
127        2. Generates standardized residual forecasts using the residuals model
128        3. Generates volatility forecasts using the sigma model
129        4. Combines these forecasts to generate the final predictions
130        5. Computes prediction intervals at the specified confidence level
131        """
132        DescribeResult = namedtuple(
133                "DescribeResult", ("mean", "sims", "lower", "upper")
134            )
135        mean_forecast = self.model_mean.predict(h=h).values.ravel()
136        preds_z = self.model_residuals.predict(h=h)
137        preds_sigma = self.model_sigma.predict(h=h)
138        sims_z = preds_z.sims
139        sims_sigma = preds_sigma.sims 
140
141        f = []
142        for i in range(len(sims_z)): 
143            f.append(mean_forecast + sims_z[i].values.ravel()*np.sqrt(np.exp(sims_sigma[i].values.ravel())))
144
145        f = np.asarray(f).T
146        mean_f = np.mean(f, axis=1)
147        alpha = 1 - level/100
148        lower_bound = np.quantile(f, alpha/2, axis=1)
149        upper_bound = np.quantile(f, 1-alpha/2, axis=1)
150
151        return DescribeResult(mean_f, f, 
152                              lower_bound, upper_bound)

Predict (probabilistic) future values of the time series.

Parameters

h : int, default=5 The forecast horizon. level : int, default=95 The confidence level for prediction intervals.

Returns

DescribeResult : namedtuple A named tuple containing:

- mean : array-like of shape (h,)
    The mean forecast.
- sims : array-like of shape (h, n_replications)
    The simulated forecasts.
- lower : array-like of shape (h,)
    The lower bound of the prediction interval.
- upper : array-like of shape (h,)
    The upper bound of the prediction interval.

Notes

This method:

  1. Generates mean forecasts using the mean model
  2. Generates standardized residual forecasts using the residuals model
  3. Generates volatility forecasts using the sigma model
  4. Combines these forecasts to generate the final predictions
  5. Computes prediction intervals at the specified confidence level
class MedianVotingRegressor(sklearn.ensemble._voting.VotingRegressor):
 6class MedianVotingRegressor(VotingRegressor):
 7    def predict(self, X):
 8        """
 9        Predict using the median of the base regressors' predictions.
10
11        Parameters:
12        X (array-like): Feature matrix for predictions.
13
14        Returns:
15        y_pred (array): Median of predictions from the base regressors.
16        """
17        predictions = np.asarray(
18            [regressor.predict(X) for regressor in self.estimators_]
19        )
20        return np.median(predictions, axis=0)

Prediction voting regressor for unfitted estimators.

A voting regressor is an ensemble meta-estimator that fits several base regressors, each on the whole dataset. Then it averages the individual predictions to form a final prediction.

Read more in the :ref:User Guide <voting_regressor>.

New in version 0.21.

Parameters

estimators : list of (str, estimator) tuples Invoking the fit method on the VotingRegressor will fit clones of those original estimators that will be stored in the class attribute self.estimators_. An estimator can be set to 'drop' using set_params().

*Changed in version 0.21:*
``'drop'`` is accepted. Using None was deprecated in 0.22 and
support was removed in 0.24.

weights : array-like of shape (n_regressors,), default=None Sequence of weights (float or int) to weight the occurrences of predicted values before averaging. Uses uniform weights if None.

n_jobs : int, default=None The number of jobs to run in parallel for fit. None means 1 unless in a joblib.parallel_backend context. -1 means using all processors. See :term:Glossary <n_jobs> for more details.

verbose : bool, default=False If True, the time elapsed while fitting will be printed as it is completed.

*New in version 0.23.*

Attributes

estimators_ : list of regressors The collection of fitted sub-estimators as defined in estimators that are not 'drop'.

named_estimators_ : ~sklearn.utils.Bunch Attribute to access any fitted sub-estimators by name.

*New in version 0.20.*

n_features_in_ : int Number of features seen during :term:fit. Only defined if the underlying regressor exposes such an attribute when fit.

*New in version 0.24.*

feature_names_in_ : ndarray of shape (n_features_in_,) Names of features seen during :term:fit. Only defined if the underlying estimators expose such an attribute when fit.

*New in version 1.0.*

See Also

VotingClassifier : Soft Voting/Majority Rule classifier.

Examples

>>> import numpy as np
>>> from sklearn.linear_model import LinearRegression
>>> from sklearn.ensemble import RandomForestRegressor
>>> from sklearn.ensemble import VotingRegressor
>>> from sklearn.neighbors import KNeighborsRegressor
>>> r1 = LinearRegression()
>>> r2 = RandomForestRegressor(n_estimators=10, random_state=1)
>>> r3 = KNeighborsRegressor()
>>> X = np.array([[1, 1], [2, 4], [3, 9], [4, 16], [5, 25], [6, 36]])
>>> y = np.array([2, 6, 12, 20, 30, 42])
>>> er = VotingRegressor([('lr', r1), ('rf', r2), ('r3', r3)])
>>> print(er.fit(X, y).predict(X))
[ 6.8...  8.4... 12.5... 17.8... 26...  34...]

In the following example, we drop the 'lr' estimator with ~VotingRegressor.set_params() and fit the remaining two estimators:

>>> er = er.set_params(lr='drop')
>>> er = er.fit(X, y)
>>> len(er.estimators_)
2
def predict(self, X):
 7    def predict(self, X):
 8        """
 9        Predict using the median of the base regressors' predictions.
10
11        Parameters:
12        X (array-like): Feature matrix for predictions.
13
14        Returns:
15        y_pred (array): Median of predictions from the base regressors.
16        """
17        predictions = np.asarray(
18            [regressor.predict(X) for regressor in self.estimators_]
19        )
20        return np.median(predictions, axis=0)

Predict using the median of the base regressors' predictions.

Parameters: X (array-like): Feature matrix for predictions.

Returns: y_pred (array): Median of predictions from the base regressors.

class MTS(nnetsauce.Base):
  28class MTS(Base):
  29    """Univariate and multivariate time series (MTS) forecasting with Quasi-Randomized networks
  30
  31    Parameters:
  32
  33        obj: object.
  34            any object containing a method fit (obj.fit()) and a method predict
  35            (obj.predict()).
  36
  37        n_hidden_features: int.
  38            number of nodes in the hidden layer.
  39
  40        activation_name: str.
  41            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'.
  42
  43        a: float.
  44            hyperparameter for 'prelu' or 'elu' activation function.
  45
  46        nodes_sim: str.
  47            type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
  48            'uniform'.
  49
  50        bias: boolean.
  51            indicates if the hidden layer contains a bias term (True) or not
  52            (False).
  53
  54        dropout: float.
  55            regularization parameter; (random) percentage of nodes dropped out
  56            of the training.
  57
  58        direct_link: boolean.
  59            indicates if the original predictors are included (True) in model's fitting or not (False).
  60
  61        n_clusters: int.
  62            number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering).
  63
  64        cluster_encode: bool.
  65            defines how the variable containing clusters is treated (default is one-hot)
  66            if `False`, then labels are used, without one-hot encoding.
  67
  68        type_clust: str.
  69            type of clustering method: currently k-means ('kmeans') or Gaussian
  70            Mixture Model ('gmm').
  71
  72        type_scaling: a tuple of 3 strings.
  73            scaling methods for inputs, hidden layer, and clustering respectively
  74            (and when relevant).
  75            Currently available: standardization ('std') or MinMax scaling ('minmax').
  76
  77        lags: int.
  78            number of lags used for each time series.
  79            If string, lags must be one of 'AIC', 'AICc', or 'BIC'.
  80
  81        type_pi: str.
  82            type of prediction interval; currently:
  83            - "gaussian": simple, fast, but: assumes stationarity of Gaussian in-sample residuals and independence in the multivariate case
  84            - "kde": based on Kernel Density Estimation of in-sample residuals
  85            - "bootstrap": based on independent bootstrap of in-sample residuals
  86            - "block-bootstrap": based on basic block bootstrap of in-sample residuals
  87            - "scp-kde": Sequential split conformal prediction with Kernel Density Estimation of calibrated residuals
  88            - "scp-bootstrap": Sequential split conformal prediction with independent bootstrap of calibrated residuals
  89            - "scp-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of calibrated residuals
  90            - "scp2-kde": Sequential split conformal prediction with Kernel Density Estimation of standardized calibrated residuals
  91            - "scp2-bootstrap": Sequential split conformal prediction with independent bootstrap of standardized calibrated residuals
  92            - "scp2-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of standardized calibrated residuals
  93            - based on copulas of in-sample residuals: 'vine-tll', 'vine-bb1', 'vine-bb6', 'vine-bb7', 'vine-bb8', 'vine-clayton',
  94            'vine-frank', 'vine-gaussian', 'vine-gumbel', 'vine-indep', 'vine-joe', 'vine-student'
  95            - 'scp-vine-tll', 'scp-vine-bb1', 'scp-vine-bb6', 'scp-vine-bb7', 'scp-vine-bb8', 'scp-vine-clayton',
  96            'scp-vine-frank', 'scp-vine-gaussian', 'scp-vine-gumbel', 'scp-vine-indep', 'scp-vine-joe', 'scp-vine-student'
  97            - 'scp2-vine-tll', 'scp2-vine-bb1', 'scp2-vine-bb6', 'scp2-vine-bb7', 'scp2-vine-bb8', 'scp2-vine-clayton',
  98            'scp2-vine-frank', 'scp2-vine-gaussian', 'scp2-vine-gumbel', 'scp2-vine-indep', 'scp2-vine-joe', 'scp2-vine-student'
  99
 100        block_size: int.
 101            size of block for 'type_pi' in ("block-bootstrap", "scp-block-bootstrap", "scp2-block-bootstrap").
 102            Default is round(3.15*(n_residuals^1/3))
 103
 104        replications: int.
 105            number of replications (if needed, for predictive simulation). Default is 'None'.
 106
 107        kernel: str.
 108            the kernel to use for residuals density estimation (used for predictive simulation). Currently, either 'gaussian' or 'tophat'.
 109
 110        agg: str.
 111            either "mean" or "median" for simulation of bootstrap aggregating
 112
 113        seed: int.
 114            reproducibility seed for nodes_sim=='uniform' or predictive simulation.
 115
 116        backend: str.
 117            "cpu" or "gpu" or "tpu".
 118
 119        verbose: int.
 120            0: not printing; 1: printing
 121
 122        show_progress: bool.
 123            True: progress bar when fitting each series; False: no progress bar when fitting each series
 124
 125    Attributes:
 126
 127        fit_objs_: dict
 128            objects adjusted to each individual time series
 129
 130        y_: {array-like}
 131            MTS responses (most recent observations first)
 132
 133        X_: {array-like}
 134            MTS lags
 135
 136        xreg_: {array-like}
 137            external regressors
 138
 139        y_means_: dict
 140            a dictionary of each series mean values
 141
 142        preds_: {array-like}
 143            successive model predictions
 144
 145        preds_std_: {array-like}
 146            standard deviation around the predictions for Bayesian base learners (`obj`)
 147
 148        gaussian_preds_std_: {array-like}
 149            standard deviation around the predictions for `type_pi='gaussian'`
 150
 151        return_std_: boolean
 152            return uncertainty or not (set in predict)
 153
 154        df_: data frame
 155            the input data frame, in case a data.frame is provided to `fit`
 156
 157        n_obs_: int
 158            number of time series observations (number of rows for multivariate)
 159
 160        level_: int
 161            level of confidence for prediction intervals (default is 95)
 162
 163        residuals_: {array-like}
 164            in-sample residuals (for `type_pi` not conformal prediction) or calibrated residuals
 165            (for `type_pi` in conformal prediction)
 166
 167        residuals_sims_: tuple of {array-like}
 168            simulations of in-sample residuals (for `type_pi` not conformal prediction) or
 169            calibrated residuals (for `type_pi` in conformal prediction)
 170
 171        kde_: A scikit-learn object, see https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KernelDensity.html
 172
 173        residuals_std_dev_: residuals standard deviation
 174
 175    Examples:
 176
 177    Example 1:
 178
 179    ```python
 180    import nnetsauce as ns
 181    import numpy as np
 182    from sklearn import linear_model
 183    np.random.seed(123)
 184
 185    M = np.random.rand(10, 3)
 186    M[:,0] = 10*M[:,0]
 187    M[:,2] = 25*M[:,2]
 188    print(M)
 189
 190    # Adjust Bayesian Ridge
 191    regr4 = linear_model.BayesianRidge()
 192    obj_MTS = ns.MTS(regr4, lags = 1, n_hidden_features=5)
 193    obj_MTS.fit(M)
 194    print(obj_MTS.predict())
 195
 196    # with credible intervals
 197    print(obj_MTS.predict(return_std=True, level=80))
 198
 199    print(obj_MTS.predict(return_std=True, level=95))
 200    ```
 201
 202    Example 2:
 203
 204    ```python
 205    import nnetsauce as ns
 206    import numpy as np
 207    from sklearn import linear_model
 208
 209    dataset = {
 210    'date' : ['2001-01-01', '2002-01-01', '2003-01-01', '2004-01-01', '2005-01-01'],
 211    'series1' : [34, 30, 35.6, 33.3, 38.1],
 212    'series2' : [4, 5.5, 5.6, 6.3, 5.1],
 213    'series3' : [100, 100.5, 100.6, 100.2, 100.1]}
 214    df = pd.DataFrame(dataset).set_index('date')
 215    print(df)
 216
 217    # Adjust Bayesian Ridge
 218    regr5 = linear_model.BayesianRidge()
 219    obj_MTS = ns.MTS(regr5, lags = 1, n_hidden_features=5)
 220    obj_MTS.fit(df)
 221    print(obj_MTS.predict())
 222
 223    # with credible intervals
 224    print(obj_MTS.predict(return_std=True, level=80))
 225
 226    print(obj_MTS.predict(return_std=True, level=95))
 227    ```
 228    """
 229
 230    # construct the object -----
 231
 232    def __init__(
 233        self,
 234        obj,
 235        n_hidden_features=5,
 236        activation_name="relu",
 237        a=0.01,
 238        nodes_sim="sobol",
 239        bias=True,
 240        dropout=0,
 241        direct_link=True,
 242        n_clusters=2,
 243        cluster_encode=True,
 244        type_clust="kmeans",
 245        type_scaling=("std", "std", "std"),
 246        lags=1,
 247        type_pi="kde",
 248        block_size=None,
 249        replications=None,
 250        kernel="gaussian",
 251        agg="mean",
 252        seed=123,
 253        backend="cpu",
 254        verbose=0,
 255        show_progress=True,
 256    ):
 257
 258        super().__init__(
 259            n_hidden_features=n_hidden_features,
 260            activation_name=activation_name,
 261            a=a,
 262            nodes_sim=nodes_sim,
 263            bias=bias,
 264            dropout=dropout,
 265            direct_link=direct_link,
 266            n_clusters=n_clusters,
 267            cluster_encode=cluster_encode,
 268            type_clust=type_clust,
 269            type_scaling=type_scaling,
 270            seed=seed,
 271            backend=backend,
 272        )
 273
 274        # Add validation for lags parameter
 275        if isinstance(lags, str):
 276            assert lags in (
 277                "AIC",
 278                "AICc",
 279                "BIC",
 280            ), "if string, lags must be one of 'AIC', 'AICc', or 'BIC'"
 281        else:
 282            assert int(lags) == lags, "if numeric, lags parameter should be an integer"
 283
 284        self.obj = obj
 285        self.n_series = None
 286        self.lags = lags
 287        self.type_pi = type_pi
 288        self.block_size = block_size
 289        self.replications = replications
 290        self.kernel = kernel
 291        self.agg = agg
 292        self.verbose = verbose
 293        self.show_progress = show_progress
 294        self.series_names = None
 295        self.input_dates = None
 296        self.fit_objs_ = {}
 297        self.y_ = None  # MTS responses (most recent observations first)
 298        self.X_ = None  # MTS lags
 299        self.xreg_ = None
 300        self.y_means_ = {}
 301        self.mean_ = None
 302        self.median_ = None
 303        self.upper_ = None
 304        self.lower_ = None
 305        self.output_dates_ = None
 306        self.preds_std_ = []
 307        self.gaussian_preds_std_ = None
 308        self.alpha_ = None
 309        self.return_std_ = None
 310        self.df_ = None
 311        self.residuals_ = []
 312        self.abs_calib_residuals_ = None
 313        self.calib_residuals_quantile_ = None
 314        self.residuals_sims_ = None
 315        self.kde_ = None
 316        self.sims_ = None
 317        self.residuals_std_dev_ = None
 318        self.n_obs_ = None
 319        self.level_ = None
 320        self.init_n_series_ = None
 321
 322    def fit(self, X, xreg=None, **kwargs):
 323        """Fit MTS model to training data X, with optional regressors xreg
 324
 325        Parameters:
 326
 327        X: {array-like}, shape = [n_samples, n_features]
 328            Training time series, where n_samples is the number
 329            of samples and n_features is the number of features;
 330            X must be in increasing order (most recent observations last)
 331
 332        xreg: {array-like}, shape = [n_samples, n_features_xreg]
 333            Additional (external) regressors to be passed to self.obj
 334            xreg must be in 'increasing' order (most recent observations last)
 335
 336        **kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity)
 337
 338        Returns:
 339
 340        self: object
 341        """
 342
 343        self.init_n_series_ = X.shape[1]
 344
 345        # Automatic lag selection if requested
 346        if isinstance(self.lags, str):
 347            max_lags = min(25, X.shape[0] // 4)
 348            best_ic = float("inf")
 349            best_lags = 1
 350
 351            if self.verbose:
 352                print(f"\nSelecting optimal number of lags using {self.lags}...")
 353                iterator = tqdm(range(1, max_lags + 1))
 354            else:
 355                iterator = range(1, max_lags + 1)
 356
 357            for lag in iterator:
 358                # Convert DataFrame to numpy array before reversing
 359                if isinstance(X, pd.DataFrame):
 360                    X_values = X.values[::-1]
 361                else:
 362                    X_values = X[::-1]
 363
 364                # Try current lag value
 365                if self.init_n_series_ > 1:
 366                    mts_input = ts.create_train_inputs(X_values, lag)
 367                else:
 368                    mts_input = ts.create_train_inputs(X_values.reshape(-1, 1), lag)
 369
 370                # Cook training set and fit model
 371                dummy_y, scaled_Z = self.cook_training_set(
 372                    y=np.ones(mts_input[0].shape[0]), X=mts_input[1]
 373                )
 374                residuals_ = []
 375
 376                for i in range(self.init_n_series_):
 377                    y_mean = np.mean(mts_input[0][:, i])
 378                    centered_y_i = mts_input[0][:, i] - y_mean
 379                    self.obj.fit(X=scaled_Z, y=centered_y_i)
 380                    residuals_.append(
 381                        (centered_y_i - self.obj.predict(scaled_Z)).tolist()
 382                    )
 383
 384                self.residuals_ = np.asarray(residuals_).T
 385                ic = self._compute_information_criterion(
 386                    curr_lags=lag, criterion=self.lags
 387                )
 388
 389                if self.verbose:
 390                    print(f"Trying lags={lag}, {self.lags}={ic:.2f}")
 391
 392                if ic < best_ic:
 393                    best_ic = ic
 394                    best_lags = lag
 395
 396            if self.verbose:
 397                print(f"\nSelected {best_lags} lags with {self.lags}={best_ic:.2f}")
 398
 399            self.lags = best_lags
 400
 401        self.input_dates = None
 402        self.df_ = None
 403
 404        if isinstance(X, pd.DataFrame) is False:
 405            # input data set is a numpy array
 406            if xreg is None:
 407                X = pd.DataFrame(X)
 408                self.series_names = ["series" + str(i) for i in range(X.shape[1])]
 409            else:
 410                # xreg is not None
 411                X = mo.cbind(X, xreg)
 412                self.xreg_ = xreg
 413
 414        else:  # input data set is a DataFrame with column names
 415
 416            X_index = None
 417            if X.index is not None:
 418                X_index = X.index
 419            if xreg is None:
 420                X = copy.deepcopy(mo.convert_df_to_numeric(X))
 421            else:
 422                X = copy.deepcopy(mo.cbind(mo.convert_df_to_numeric(X), xreg))
 423                self.xreg_ = xreg
 424            if X_index is not None:
 425                X.index = X_index
 426            self.series_names = X.columns.tolist()
 427
 428        if isinstance(X, pd.DataFrame):
 429            if self.df_ is None:
 430                self.df_ = X
 431                X = X.values
 432            else:
 433                input_dates_prev = pd.DatetimeIndex(self.df_.index.values)
 434                frequency = pd.infer_freq(input_dates_prev)
 435                self.df_ = pd.concat([self.df_, X], axis=0)
 436                self.input_dates = pd.date_range(
 437                    start=input_dates_prev[0],
 438                    periods=len(input_dates_prev) + X.shape[0],
 439                    freq=frequency,
 440                ).values.tolist()
 441                self.df_.index = self.input_dates
 442                X = self.df_.values
 443            self.df_.columns = self.series_names
 444        else:
 445            if self.df_ is None:
 446                self.df_ = pd.DataFrame(X, columns=self.series_names)
 447            else:
 448                self.df_ = pd.concat(
 449                    [self.df_, pd.DataFrame(X, columns=self.series_names)],
 450                    axis=0,
 451                )
 452
 453        self.input_dates = ts.compute_input_dates(self.df_)
 454
 455        try:
 456            # multivariate time series
 457            n, p = X.shape
 458        except:
 459            # univariate time series
 460            n = X.shape[0]
 461            p = 1
 462        self.n_obs_ = n
 463
 464        rep_1_n = np.repeat(1, n)
 465
 466        self.y_ = None
 467        self.X_ = None
 468        self.n_series = p
 469        self.fit_objs_.clear()
 470        self.y_means_.clear()
 471        residuals_ = []
 472        self.residuals_ = None
 473        self.residuals_sims_ = None
 474        self.kde_ = None
 475        self.sims_ = None
 476        self.scaled_Z_ = None
 477        self.centered_y_is_ = []
 478
 479        if self.init_n_series_ > 1:
 480            # multivariate time series
 481            mts_input = ts.create_train_inputs(X[::-1], self.lags)
 482        else:
 483            # univariate time series
 484            mts_input = ts.create_train_inputs(X.reshape(-1, 1)[::-1], self.lags)
 485
 486        self.y_ = mts_input[0]
 487
 488        self.X_ = mts_input[1]
 489
 490        dummy_y, scaled_Z = self.cook_training_set(y=rep_1_n, X=self.X_)
 491
 492        self.scaled_Z_ = scaled_Z
 493
 494        # loop on all the time series and adjust self.obj.fit
 495        if self.verbose > 0:
 496            print(
 497                f"\n Adjusting {type(self.obj).__name__} to multivariate time series... \n"
 498            )
 499
 500        if self.show_progress is True:
 501            iterator = tqdm(range(self.init_n_series_))
 502        else:
 503            iterator = range(self.init_n_series_)
 504
 505        if self.type_pi in (
 506            "gaussian",
 507            "kde",
 508            "bootstrap",
 509            "block-bootstrap",
 510        ) or self.type_pi.startswith("vine"):
 511            for i in iterator:
 512                y_mean = np.mean(self.y_[:, i])
 513                self.y_means_[i] = y_mean
 514                centered_y_i = self.y_[:, i] - y_mean
 515                self.centered_y_is_.append(centered_y_i)
 516                self.obj.fit(X=scaled_Z, y=centered_y_i)
 517                self.fit_objs_[i] = deepcopy(self.obj)
 518                residuals_.append(
 519                    (centered_y_i - self.fit_objs_[i].predict(scaled_Z)).tolist()
 520                )
 521
 522        if self.type_pi.startswith("scp"):
 523            # split conformal prediction
 524            for i in iterator:
 525                n_y = self.y_.shape[0]
 526                n_y_half = n_y // 2
 527                first_half_idx = range(0, n_y_half)
 528                second_half_idx = range(n_y_half, n_y)
 529                y_mean_temp = np.mean(self.y_[first_half_idx, i])
 530                centered_y_i_temp = self.y_[first_half_idx, i] - y_mean_temp
 531                self.obj.fit(X=scaled_Z[first_half_idx, :], y=centered_y_i_temp)
 532                # calibrated residuals actually
 533                residuals_.append(
 534                    (
 535                        self.y_[second_half_idx, i]
 536                        - (y_mean_temp + self.obj.predict(scaled_Z[second_half_idx, :]))
 537                    ).tolist()
 538                )
 539                # fit on the second half
 540                y_mean = np.mean(self.y_[second_half_idx, i])
 541                self.y_means_[i] = y_mean
 542                centered_y_i = self.y_[second_half_idx, i] - y_mean
 543                self.obj.fit(X=scaled_Z[second_half_idx, :], y=centered_y_i)
 544                self.fit_objs_[i] = deepcopy(self.obj)
 545
 546        self.residuals_ = np.asarray(residuals_).T
 547
 548        if self.type_pi == "gaussian":
 549            self.gaussian_preds_std_ = np.std(self.residuals_, axis=0)
 550
 551        if self.type_pi.startswith("scp2"):
 552            # Calculate mean and standard deviation for each column
 553            data_mean = np.mean(self.residuals_, axis=0)
 554            self.residuals_std_dev_ = np.std(self.residuals_, axis=0)
 555            # Center and scale the array using broadcasting
 556            self.residuals_ = (
 557                self.residuals_ - data_mean[np.newaxis, :]
 558            ) / self.residuals_std_dev_[np.newaxis, :]
 559
 560        if self.replications != None and "kde" in self.type_pi:
 561            if self.verbose > 0:
 562                print(f"\n Simulate residuals using {self.kernel} kernel... \n")
 563            assert self.kernel in (
 564                "gaussian",
 565                "tophat",
 566            ), "currently, 'kernel' must be either 'gaussian' or 'tophat'"
 567            kernel_bandwidths = {"bandwidth": np.logspace(-6, 6, 150)}
 568            grid = GridSearchCV(
 569                KernelDensity(kernel=self.kernel, **kwargs),
 570                param_grid=kernel_bandwidths,
 571            )
 572            grid.fit(self.residuals_)
 573
 574            if self.verbose > 0:
 575                print(
 576                    f"\n Best parameters for {self.kernel} kernel: {grid.best_params_} \n"
 577                )
 578
 579            self.kde_ = grid.best_estimator_
 580
 581        return self
 582
 583    def partial_fit(self, X, xreg=None, **kwargs):
 584        """Update the model with new observations X, with optional regressors xreg
 585
 586        Parameters:
 587
 588        X: {array-like}, shape = [n_samples, n_features]
 589            Training time series, where n_samples is the number
 590            of samples and n_features is the number of features;
 591            X must be in increasing order (most recent observations last)
 592
 593        xreg: {array-like}, shape = [n_samples, n_features_xreg]
 594            Additional (external) regressors to be passed to self.obj
 595            xreg must be in 'increasing' order (most recent observations last)
 596
 597        **kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity)
 598
 599        Returns:
 600
 601        self: object
 602        """
 603
 604        assert self.df_ is not None, "fit() must be called before partial_fit()"
 605
 606        if (isinstance(X, pd.DataFrame) is False) and isinstance(X, pd.Series) is False:
 607            if len(X.shape) == 1:
 608                X = X.reshape(1, -1)
 609
 610            return self.fit(X, xreg, **kwargs)
 611
 612        else:
 613            if len(X.shape) == 1:
 614                X = pd.DataFrame(X.values.reshape(1, -1), columns=self.df_.columns)
 615
 616            return self.fit(X, xreg, **kwargs)
 617
 618    def predict(self, h=5, level=95, **kwargs):
 619        """Forecast all the time series, h steps ahead"""
 620
 621        self.output_dates_, frequency = ts.compute_output_dates(self.df_, h)
 622
 623        self.level_ = level
 624
 625        self.return_std_ = False  # do not remove (/!\)
 626
 627        self.mean_ = None  # do not remove (/!\)
 628
 629        self.mean_ = deepcopy(self.y_)  # do not remove (/!\)
 630
 631        self.lower_ = None  # do not remove (/!\)
 632
 633        self.upper_ = None  # do not remove (/!\)
 634
 635        self.sims_ = None  # do not remove (/!\)
 636
 637        y_means_ = np.asarray([self.y_means_[i] for i in range(self.init_n_series_)])
 638
 639        n_features = self.init_n_series_ * self.lags
 640
 641        self.alpha_ = 100 - level
 642
 643        pi_multiplier = norm.ppf(1 - self.alpha_ / 200)
 644
 645        if "return_std" in kwargs:  # bayesian forecasting
 646            self.return_std_ = True
 647            self.preds_std_ = []
 648            DescribeResult = namedtuple(
 649                "DescribeResult", ("mean", "lower", "upper")
 650            )  # to be updated
 651
 652        if "return_pi" in kwargs:  # split conformal, without simulation
 653            mean_pi_ = []
 654            lower_pi_ = []
 655            upper_pi_ = []
 656            median_pi_ = []
 657            DescribeResult = namedtuple(
 658                "DescribeResult", ("mean", "lower", "upper")
 659            )  # to be updated
 660
 661        if self.kde_ != None and "kde" in self.type_pi:  # kde
 662            target_cols = self.df_.columns[
 663                : self.init_n_series_
 664            ]  # Get target column names
 665            if self.verbose == 1:
 666                self.residuals_sims_ = tuple(
 667                    self.kde_.sample(
 668                        n_samples=h, random_state=self.seed + 100 * i
 669                    )  # Keep full sample
 670                    for i in tqdm(range(self.replications))
 671                )
 672            elif self.verbose == 0:
 673                self.residuals_sims_ = tuple(
 674                    self.kde_.sample(
 675                        n_samples=h, random_state=self.seed + 100 * i
 676                    )  # Keep full sample
 677                    for i in range(self.replications)
 678                )
 679
 680            # Convert to DataFrames after sampling
 681            self.residuals_sims_ = tuple(
 682                pd.DataFrame(
 683                    sim,  # Keep all columns
 684                    columns=target_cols,  # Use original target column names
 685                    index=self.output_dates_,
 686                )
 687                for sim in self.residuals_sims_
 688            )
 689
 690        if self.type_pi in ("bootstrap", "scp-bootstrap", "scp2-bootstrap"):
 691            assert self.replications is not None and isinstance(
 692                self.replications, int
 693            ), "'replications' must be provided and be an integer"
 694            if self.verbose == 1:
 695                self.residuals_sims_ = tuple(
 696                    ts.bootstrap(
 697                        self.residuals_,
 698                        h=h,
 699                        block_size=None,
 700                        seed=self.seed + 100 * i,
 701                    )
 702                    for i in tqdm(range(self.replications))
 703                )
 704            elif self.verbose == 0:
 705                self.residuals_sims_ = tuple(
 706                    ts.bootstrap(
 707                        self.residuals_,
 708                        h=h,
 709                        block_size=None,
 710                        seed=self.seed + 100 * i,
 711                    )
 712                    for i in range(self.replications)
 713                )
 714
 715        if self.type_pi in (
 716            "block-bootstrap",
 717            "scp-block-bootstrap",
 718            "scp2-block-bootstrap",
 719        ):
 720            if self.block_size is None:
 721                self.block_size = int(
 722                    np.ceil(3.15 * (self.residuals_.shape[0] ** (1 / 3)))
 723                )
 724
 725            assert self.replications is not None and isinstance(
 726                self.replications, int
 727            ), "'replications' must be provided and be an integer"
 728            if self.verbose == 1:
 729                self.residuals_sims_ = tuple(
 730                    ts.bootstrap(
 731                        self.residuals_,
 732                        h=h,
 733                        block_size=self.block_size,
 734                        seed=self.seed + 100 * i,
 735                    )
 736                    for i in tqdm(range(self.replications))
 737                )
 738            elif self.verbose == 0:
 739                self.residuals_sims_ = tuple(
 740                    ts.bootstrap(
 741                        self.residuals_,
 742                        h=h,
 743                        block_size=self.block_size,
 744                        seed=self.seed + 100 * i,
 745                    )
 746                    for i in range(self.replications)
 747                )
 748
 749        if "vine" in self.type_pi:
 750            if self.verbose == 1:
 751                self.residuals_sims_ = tuple(
 752                    vinecopula_sample(
 753                        x=self.residuals_,
 754                        n_samples=h,
 755                        method=self.type_pi,
 756                        random_state=self.seed + 100 * i,
 757                    )
 758                    for i in tqdm(range(self.replications))
 759                )
 760            elif self.verbose == 0:
 761                self.residuals_sims_ = tuple(
 762                    vinecopula_sample(
 763                        x=self.residuals_,
 764                        n_samples=h,
 765                        method=self.type_pi,
 766                        random_state=self.seed + 100 * i,
 767                    )
 768                    for i in range(self.replications)
 769                )
 770
 771        mean_ = deepcopy(self.mean_)
 772
 773        for i in range(h):
 774
 775            new_obs = ts.reformat_response(mean_, self.lags)
 776            new_X = new_obs.reshape(1, -1)
 777            cooked_new_X = self.cook_test_set(new_X, **kwargs)
 778
 779            if "return_std" in kwargs:
 780                self.preds_std_.append(
 781                    [
 782                        np.asarray(
 783                            self.fit_objs_[i].predict(cooked_new_X, return_std=True)[1]
 784                        ).item()
 785                        for i in range(self.n_series)
 786                    ]
 787                )
 788
 789            if "return_pi" in kwargs:
 790                for i in range(self.n_series):
 791                    preds_pi = self.fit_objs_[i].predict(cooked_new_X, **kwargs)
 792                    mean_pi_.append(preds_pi.mean[0])
 793                    lower_pi_.append(preds_pi.lower[0])
 794                    upper_pi_.append(preds_pi.upper[0])
 795
 796            predicted_cooked_new_X = np.asarray(
 797                [
 798                    np.asarray(self.fit_objs_[i].predict(cooked_new_X)).item()
 799                    for i in range(self.init_n_series_)
 800                ]
 801            )
 802
 803            preds = np.asarray(y_means_ + predicted_cooked_new_X)
 804
 805            # Create full row with both predictions and external regressors
 806            if self.xreg_ is not None and "xreg" in kwargs:
 807                next_xreg = kwargs["xreg"].iloc[i : i + 1].values.flatten()
 808                full_row = np.concatenate([preds, next_xreg])
 809            else:
 810                full_row = preds
 811
 812            # Create a new row with same number of columns as mean_
 813            new_row = np.zeros((1, mean_.shape[1]))
 814            new_row[0, : full_row.shape[0]] = full_row
 815
 816            # Maintain the full dimensionality by using vstack instead of rbind
 817            mean_ = np.vstack([new_row, mean_[:-1]])
 818
 819        # Final output should only include the target columns
 820        self.mean_ = pd.DataFrame(
 821            mean_[0:h, : self.init_n_series_][::-1],
 822            columns=self.df_.columns[: self.init_n_series_],
 823            index=self.output_dates_,
 824        )
 825
 826        # function's return ----------------------------------------------------------------------
 827        if (
 828            (("return_std" not in kwargs) and ("return_pi" not in kwargs))
 829            and (self.type_pi not in ("gaussian", "scp"))
 830        ) or ("vine" in self.type_pi):
 831
 832            if self.replications is None:
 833                return self.mean_.iloc[:, : self.init_n_series_]
 834
 835            # if "return_std" not in kwargs and self.replications is not None
 836            meanf = []
 837            medianf = []
 838            lower = []
 839            upper = []
 840
 841            if "scp2" in self.type_pi:
 842
 843                if self.verbose == 1:
 844                    self.sims_ = tuple(
 845                        (
 846                            self.mean_
 847                            + self.residuals_sims_[i]
 848                            * self.residuals_std_dev_[np.newaxis, :]
 849                            for i in tqdm(range(self.replications))
 850                        )
 851                    )
 852                elif self.verbose == 0:
 853                    self.sims_ = tuple(
 854                        (
 855                            self.mean_
 856                            + self.residuals_sims_[i]
 857                            * self.residuals_std_dev_[np.newaxis, :]
 858                            for i in range(self.replications)
 859                        )
 860                    )
 861            else:
 862
 863                if self.verbose == 1:
 864                    self.sims_ = tuple(
 865                        (
 866                            self.mean_ + self.residuals_sims_[i]
 867                            for i in tqdm(range(self.replications))
 868                        )
 869                    )
 870                elif self.verbose == 0:
 871                    self.sims_ = tuple(
 872                        (
 873                            self.mean_ + self.residuals_sims_[i]
 874                            for i in range(self.replications)
 875                        )
 876                    )
 877
 878            DescribeResult = namedtuple(
 879                "DescribeResult", ("mean", "sims", "lower", "upper")
 880            )
 881            for ix in range(self.init_n_series_):
 882                sims_ix = getsims(self.sims_, ix)
 883                if self.agg == "mean":
 884                    meanf.append(np.mean(sims_ix, axis=1))
 885                else:
 886                    medianf.append(np.median(sims_ix, axis=1))
 887                lower.append(np.quantile(sims_ix, q=self.alpha_ / 200, axis=1))
 888                upper.append(np.quantile(sims_ix, q=1 - self.alpha_ / 200, axis=1))
 889            self.mean_ = pd.DataFrame(
 890                np.asarray(meanf).T,
 891                columns=self.series_names[: self.init_n_series_],  # self.df_.columns,
 892                index=self.output_dates_,
 893            )
 894
 895            self.lower_ = pd.DataFrame(
 896                np.asarray(lower).T,
 897                columns=self.series_names[: self.init_n_series_],  # self.df_.columns,
 898                index=self.output_dates_,
 899            )
 900
 901            self.upper_ = pd.DataFrame(
 902                np.asarray(upper).T,
 903                columns=self.series_names[: self.init_n_series_],  # self.df_.columns,
 904                index=self.output_dates_,
 905            )
 906
 907            try:
 908                self.median_ = pd.DataFrame(
 909                    np.asarray(medianf).T,
 910                    columns=self.series_names[
 911                        : self.init_n_series_
 912                    ],  # self.df_.columns,
 913                    index=self.output_dates_,
 914                )
 915            except Exception as e:
 916                pass
 917
 918            return DescribeResult(self.mean_, self.sims_, self.lower_, self.upper_)
 919
 920        if (
 921            (("return_std" in kwargs) or ("return_pi" in kwargs))
 922            and (self.type_pi not in ("gaussian", "scp"))
 923        ) or "vine" in self.type_pi:
 924            DescribeResult = namedtuple("DescribeResult", ("mean", "lower", "upper"))
 925
 926            self.mean_ = pd.DataFrame(
 927                np.asarray(self.mean_),
 928                columns=self.series_names,  # self.df_.columns,
 929                index=self.output_dates_,
 930            )
 931
 932            if "return_std" in kwargs:
 933
 934                self.preds_std_ = np.asarray(self.preds_std_)
 935                print("self.preds_std_", self.preds_std_)
 936                print("self.mean_", self.mean_)
 937                print("pi_multiplier", pi_multiplier)
 938
 939                self.lower_ = pd.DataFrame(
 940                    self.mean_.values - pi_multiplier * self.preds_std_,
 941                    columns=self.series_names,  # self.df_.columns,
 942                    index=self.output_dates_,
 943                )
 944
 945                self.upper_ = pd.DataFrame(
 946                    self.mean_.values + pi_multiplier * self.preds_std_,
 947                    columns=self.series_names,  # self.df_.columns,
 948                    index=self.output_dates_,
 949                )
 950
 951            if "return_pi" in kwargs:
 952
 953                self.lower_ = pd.DataFrame(
 954                    np.asarray(lower_pi_).reshape(h, self.n_series)
 955                    + y_means_[np.newaxis, :],
 956                    columns=self.series_names,  # self.df_.columns,
 957                    index=self.output_dates_,
 958                )
 959
 960                self.upper_ = pd.DataFrame(
 961                    np.asarray(upper_pi_).reshape(h, self.n_series)
 962                    + y_means_[np.newaxis, :],
 963                    columns=self.series_names,  # self.df_.columns,
 964                    index=self.output_dates_,
 965                )
 966
 967            res = DescribeResult(self.mean_, self.lower_, self.upper_)
 968
 969            if self.xreg_ is not None:
 970                if len(self.xreg_.shape) > 1:
 971                    res2 = mx.tuple_map(
 972                        res,
 973                        lambda x: mo.delete_last_columns(
 974                            x, num_columns=self.xreg_.shape[1]
 975                        ),
 976                    )
 977                else:
 978                    res2 = mx.tuple_map(
 979                        res, lambda x: mo.delete_last_columns(x, num_columns=1)
 980                    )
 981                return DescribeResult(res2[0], res2[1], res2[2])
 982
 983            return res
 984
 985        if self.type_pi == "gaussian":
 986
 987            DescribeResult = namedtuple("DescribeResult", ("mean", "lower", "upper"))
 988
 989            self.mean_ = pd.DataFrame(
 990                np.asarray(self.mean_),
 991                columns=self.series_names,  # self.df_.columns,
 992                index=self.output_dates_,
 993            )
 994
 995            self.lower_ = pd.DataFrame(
 996                self.mean_.values - pi_multiplier * self.gaussian_preds_std_,
 997                columns=self.series_names,  # self.df_.columns,
 998                index=self.output_dates_,
 999            )
1000
1001            self.upper_ = pd.DataFrame(
1002                self.mean_.values + pi_multiplier * self.gaussian_preds_std_,
1003                columns=self.series_names,  # self.df_.columns,
1004                index=self.output_dates_,
1005            )
1006
1007            res = DescribeResult(self.mean_, self.lower_, self.upper_)
1008
1009            if self.xreg_ is not None:
1010                if len(self.xreg_.shape) > 1:
1011                    res2 = mx.tuple_map(
1012                        res,
1013                        lambda x: mo.delete_last_columns(
1014                            x, num_columns=self.xreg_.shape[1]
1015                        ),
1016                    )
1017                else:
1018                    res2 = mx.tuple_map(
1019                        res, lambda x: mo.delete_last_columns(x, num_columns=1)
1020                    )
1021                return DescribeResult(res2[0], res2[1], res2[2])
1022
1023            return res
1024
1025        # After prediction loop, ensure sims only contain target columns
1026        if self.sims_ is not None:
1027            if self.verbose == 1:
1028                self.sims_ = tuple(
1029                    sim[:h,]  # Only keep target columns and h rows
1030                    for sim in tqdm(self.sims_)
1031                )
1032            elif self.verbose == 0:
1033                self.sims_ = tuple(
1034                    sim[:h,]  # Only keep target columns and h rows
1035                    for sim in self.sims_
1036                )
1037
1038            # Convert numpy arrays to DataFrames with proper columns
1039            self.sims_ = tuple(
1040                pd.DataFrame(
1041                    sim,
1042                    columns=self.df_.columns[: self.init_n_series_],
1043                    index=self.output_dates_,
1044                )
1045                for sim in self.sims_
1046            )
1047
1048        if self.type_pi in ("kde", "bootstrap", "block-bootstrap", "vine-copula"):
1049            if self.xreg_ is not None:
1050                # Use getsimsxreg when external regressors are present
1051                target_cols = self.df_.columns[: self.init_n_series_]
1052                self.sims_ = getsimsxreg(self.sims_, self.output_dates_, target_cols)
1053            else:
1054                # Use original getsims for backward compatibility
1055                self.sims_ = getsims(self.sims_)
1056
1057    def score(self, X, training_index, testing_index, scoring=None, **kwargs):
1058        """Train on training_index, score on testing_index."""
1059
1060        assert (
1061            bool(set(training_index).intersection(set(testing_index))) == False
1062        ), "Non-overlapping 'training_index' and 'testing_index' required"
1063
1064        # Dimensions
1065        try:
1066            # multivariate time series
1067            n, p = X.shape
1068        except:
1069            # univariate time series
1070            n = X.shape[0]
1071            p = 1
1072
1073        # Training and testing sets
1074        if p > 1:
1075            X_train = X[training_index, :]
1076            X_test = X[testing_index, :]
1077        else:
1078            X_train = X[training_index]
1079            X_test = X[testing_index]
1080
1081        # Horizon
1082        h = len(testing_index)
1083        assert (
1084            len(training_index) + h
1085        ) <= n, "Please check lengths of training and testing windows"
1086
1087        # Fit and predict
1088        self.fit(X_train, **kwargs)
1089        preds = self.predict(h=h, **kwargs)
1090
1091        if scoring is None:
1092            scoring = "neg_root_mean_squared_error"
1093
1094        # check inputs
1095        assert scoring in (
1096            "explained_variance",
1097            "neg_mean_absolute_error",
1098            "neg_mean_squared_error",
1099            "neg_root_mean_squared_error",
1100            "neg_mean_squared_log_error",
1101            "neg_median_absolute_error",
1102            "r2",
1103        ), "'scoring' should be in ('explained_variance', 'neg_mean_absolute_error', \
1104                               'neg_mean_squared_error', 'neg_root_mean_squared_error', 'neg_mean_squared_log_error', \
1105                               'neg_median_absolute_error', 'r2')"
1106
1107        scoring_options = {
1108            "explained_variance": skm2.explained_variance_score,
1109            "neg_mean_absolute_error": skm2.mean_absolute_error,
1110            "neg_mean_squared_error": lambda x, y: np.mean((x - y) ** 2),
1111            "neg_root_mean_squared_error": lambda x, y: np.sqrt(np.mean((x - y) ** 2)),
1112            "neg_mean_squared_log_error": skm2.mean_squared_log_error,
1113            "neg_median_absolute_error": skm2.median_absolute_error,
1114            "r2": skm2.r2_score,
1115        }
1116
1117        return scoring_options[scoring](X_test, preds)
1118
1119    def plot(self, series=None, type_axis="dates", type_plot="pi"):
1120        """Plot time series forecast
1121
1122        Parameters:
1123
1124        series: {integer} or {string}
1125            series index or name
1126
1127        """
1128
1129        assert all(
1130            [
1131                self.mean_ is not None,
1132                self.lower_ is not None,
1133                self.upper_ is not None,
1134                self.output_dates_ is not None,
1135            ]
1136        ), "model forecasting must be obtained first (with predict)"
1137
1138        if series is None:
1139            # assert (
1140            #    self.init_n_series_ == 1
1141            # ), "please specify series index or name (n_series > 1)"
1142            series = 0
1143
1144        if isinstance(series, str):
1145            assert (
1146                series in self.series_names
1147            ), f"series {series} doesn't exist in the input dataset"
1148            series_idx = self.df_.columns.get_loc(series)
1149        else:
1150            assert isinstance(series, int) and (
1151                0 <= series < self.n_series
1152            ), f"check series index (< {self.n_series})"
1153            series_idx = series
1154
1155        y_all = list(self.df_.iloc[:, series_idx]) + list(
1156            self.mean_.iloc[:, series_idx]
1157        )
1158        y_test = list(self.mean_.iloc[:, series_idx])
1159        n_points_all = len(y_all)
1160        n_points_train = self.df_.shape[0]
1161
1162        if type_axis == "numeric":
1163            x_all = [i for i in range(n_points_all)]
1164            x_test = [i for i in range(n_points_train, n_points_all)]
1165
1166        if type_axis == "dates":  # use dates
1167            x_all = np.concatenate(
1168                (self.input_dates.values, self.output_dates_.values), axis=None
1169            )
1170            x_test = self.output_dates_.values
1171
1172        if type_plot == "pi":
1173            fig, ax = plt.subplots()
1174            ax.plot(x_all, y_all, "-")
1175            ax.plot(x_test, y_test, "-", color="orange")
1176            ax.fill_between(
1177                x_test,
1178                self.lower_.iloc[:, series_idx],
1179                self.upper_.iloc[:, series_idx],
1180                alpha=0.2,
1181                color="orange",
1182            )
1183            if self.replications is None:
1184                if self.n_series > 1:
1185                    plt.title(
1186                        f"prediction intervals for {series}",
1187                        loc="left",
1188                        fontsize=12,
1189                        fontweight=0,
1190                        color="black",
1191                    )
1192                else:
1193                    plt.title(
1194                        f"prediction intervals for input time series",
1195                        loc="left",
1196                        fontsize=12,
1197                        fontweight=0,
1198                        color="black",
1199                    )
1200                plt.show()
1201            else:  # self.replications is not None
1202                if self.n_series > 1:
1203                    plt.title(
1204                        f"prediction intervals for {self.replications} simulations of {series}",
1205                        loc="left",
1206                        fontsize=12,
1207                        fontweight=0,
1208                        color="black",
1209                    )
1210                else:
1211                    plt.title(
1212                        f"prediction intervals for {self.replications} simulations of input time series",
1213                        loc="left",
1214                        fontsize=12,
1215                        fontweight=0,
1216                        color="black",
1217                    )
1218                plt.show()
1219
1220        if type_plot == "spaghetti":
1221            palette = plt.get_cmap("Set1")
1222            sims_ix = getsims(self.sims_, series_idx)
1223            plt.plot(x_all, y_all, "-")
1224            for col_ix in range(
1225                sims_ix.shape[1]
1226            ):  # avoid this when there are thousands of simulations
1227                plt.plot(
1228                    x_test,
1229                    sims_ix[:, col_ix],
1230                    "-",
1231                    color=palette(col_ix),
1232                    linewidth=1,
1233                    alpha=0.9,
1234                )
1235            plt.plot(x_all, y_all, "-", color="black")
1236            plt.plot(x_test, y_test, "-", color="blue")
1237            # Add titles
1238            if self.n_series > 1:
1239                plt.title(
1240                    f"{self.replications} simulations of {series}",
1241                    loc="left",
1242                    fontsize=12,
1243                    fontweight=0,
1244                    color="black",
1245                )
1246            else:
1247                plt.title(
1248                    f"{self.replications} simulations of input time series",
1249                    loc="left",
1250                    fontsize=12,
1251                    fontweight=0,
1252                    color="black",
1253                )
1254            plt.xlabel("Time")
1255            plt.ylabel("Values")
1256            # Show the graph
1257            plt.show()
1258
1259    def cross_val_score(
1260        self,
1261        X,
1262        scoring="root_mean_squared_error",
1263        n_jobs=None,
1264        verbose=0,
1265        xreg=None,
1266        initial_window=5,
1267        horizon=3,
1268        fixed_window=False,
1269        show_progress=True,
1270        level=95,
1271        **kwargs,
1272    ):
1273        """Evaluate a score by time series cross-validation.
1274
1275        Parameters:
1276
1277            X: {array-like, sparse matrix} of shape (n_samples, n_features)
1278                The data to fit.
1279
1280            scoring: str or a function
1281                A str in ('root_mean_squared_error', 'mean_squared_error', 'mean_error',
1282                'mean_absolute_error', 'mean_error', 'mean_percentage_error',
1283                'mean_absolute_percentage_error',  'winkler_score', 'coverage')
1284                Or a function defined as 'coverage' and 'winkler_score' in `utils.timeseries`
1285
1286            n_jobs: int, default=None
1287                Number of jobs to run in parallel.
1288
1289            verbose: int, default=0
1290                The verbosity level.
1291
1292            xreg: array-like, optional (default=None)
1293                Additional (external) regressors to be passed to `fit`
1294                xreg must be in 'increasing' order (most recent observations last)
1295
1296            initial_window: int
1297                initial number of consecutive values in each training set sample
1298
1299            horizon: int
1300                number of consecutive values in test set sample
1301
1302            fixed_window: boolean
1303                if False, all training samples start at index 0, and the training
1304                window's size is increasing.
1305                if True, the training window's size is fixed, and the window is
1306                rolling forward
1307
1308            show_progress: boolean
1309                if True, a progress bar is printed
1310
1311            **kwargs: dict
1312                additional parameters to be passed to `fit` and `predict`
1313
1314        Returns:
1315
1316            A tuple: descriptive statistics or errors and raw errors
1317
1318        """
1319        tscv = TimeSeriesSplit()
1320
1321        tscv_obj = tscv.split(
1322            X,
1323            initial_window=initial_window,
1324            horizon=horizon,
1325            fixed_window=fixed_window,
1326        )
1327
1328        if isinstance(scoring, str):
1329
1330            assert scoring in (
1331                "root_mean_squared_error",
1332                "mean_squared_error",
1333                "mean_error",
1334                "mean_absolute_error",
1335                "mean_percentage_error",
1336                "mean_absolute_percentage_error",
1337                "winkler_score",
1338                "coverage",
1339            ), "must have scoring in ('root_mean_squared_error', 'mean_squared_error', 'mean_error', 'mean_absolute_error', 'mean_error', 'mean_percentage_error', 'mean_absolute_percentage_error',  'winkler_score', 'coverage')"
1340
1341            def err_func(X_test, X_pred, scoring):
1342                if (self.replications is not None) or (
1343                    self.type_pi == "gaussian"
1344                ):  # probabilistic
1345                    if scoring == "winkler_score":
1346                        return winkler_score(X_pred, X_test, level=level)
1347                    elif scoring == "coverage":
1348                        return coverage(X_pred, X_test, level=level)
1349                    else:
1350                        return mean_errors(
1351                            pred=X_pred.mean, actual=X_test, scoring=scoring
1352                        )
1353                else:  # not probabilistic
1354                    return mean_errors(pred=X_pred, actual=X_test, scoring=scoring)
1355
1356        else:  # isinstance(scoring, str) = False
1357
1358            err_func = scoring
1359
1360        errors = []
1361
1362        train_indices = []
1363
1364        test_indices = []
1365
1366        for train_index, test_index in tscv_obj:
1367            train_indices.append(train_index)
1368            test_indices.append(test_index)
1369
1370        if show_progress is True:
1371            iterator = tqdm(zip(train_indices, test_indices), total=len(train_indices))
1372        else:
1373            iterator = zip(train_indices, test_indices)
1374
1375        for train_index, test_index in iterator:
1376
1377            if verbose == 1:
1378                print(f"TRAIN: {train_index}")
1379                print(f"TEST: {test_index}")
1380
1381            if isinstance(X, pd.DataFrame):
1382                self.fit(X.iloc[train_index, :], xreg=xreg, **kwargs)
1383                X_test = X.iloc[test_index, :]
1384            else:
1385                self.fit(X[train_index, :], xreg=xreg, **kwargs)
1386                X_test = X[test_index, :]
1387            X_pred = self.predict(h=int(len(test_index)), level=level, **kwargs)
1388
1389            errors.append(err_func(X_test, X_pred, scoring))
1390
1391        res = np.asarray(errors)
1392
1393        return res, describe(res)
1394
1395    def _compute_information_criterion(self, curr_lags, criterion="AIC"):
1396        """Compute information criterion using existing residuals
1397
1398        Parameters
1399        ----------
1400        curr_lags : int
1401            Current number of lags being evaluated
1402        criterion : str
1403            One of 'AIC', 'AICc', or 'BIC'
1404
1405        Returns
1406        -------
1407        float
1408            Information criterion value or inf if parameters exceed observations
1409        """
1410        # Get dimensions
1411        n_obs = self.residuals_.shape[0]
1412        n_features = int(self.init_n_series_ * curr_lags)
1413        n_hidden = int(self.n_hidden_features)
1414
1415        # Calculate number of parameters
1416        term1 = int(n_features * n_hidden)
1417        term2 = int(n_hidden * self.init_n_series_)
1418        n_params = term1 + term2
1419
1420        # Check if we have enough observations for the number of parameters
1421        if n_obs <= n_params + 1:
1422            return float("inf")  # Return infinity if too many parameters
1423
1424        # Compute RSS using existing residuals
1425        rss = np.sum(self.residuals_**2)
1426
1427        # Compute criterion
1428        if criterion == "AIC":
1429            ic = n_obs * np.log(rss / n_obs) + 2 * n_params
1430        elif criterion == "AICc":
1431            ic = n_obs * np.log(rss / n_obs) + 2 * n_params * (
1432                n_obs / (n_obs - n_params - 1)
1433            )
1434        else:  # BIC
1435            ic = n_obs * np.log(rss / n_obs) + n_params * np.log(n_obs)
1436
1437        return ic

Univariate and multivariate time series (MTS) forecasting with Quasi-Randomized networks

Parameters:

obj: object.
    any object containing a method fit (obj.fit()) and a method predict
    (obj.predict()).

n_hidden_features: int.
    number of nodes in the hidden layer.

activation_name: str.
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'.

a: float.
    hyperparameter for 'prelu' or 'elu' activation function.

nodes_sim: str.
    type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
    'uniform'.

bias: boolean.
    indicates if the hidden layer contains a bias term (True) or not
    (False).

dropout: float.
    regularization parameter; (random) percentage of nodes dropped out
    of the training.

direct_link: boolean.
    indicates if the original predictors are included (True) in model's fitting or not (False).

n_clusters: int.
    number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering).

cluster_encode: bool.
    defines how the variable containing clusters is treated (default is one-hot)
    if `False`, then labels are used, without one-hot encoding.

type_clust: str.
    type of clustering method: currently k-means ('kmeans') or Gaussian
    Mixture Model ('gmm').

type_scaling: a tuple of 3 strings.
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax').

lags: int.
    number of lags used for each time series.
    If string, lags must be one of 'AIC', 'AICc', or 'BIC'.

type_pi: str.
    type of prediction interval; currently:
    - "gaussian": simple, fast, but: assumes stationarity of Gaussian in-sample residuals and independence in the multivariate case
    - "kde": based on Kernel Density Estimation of in-sample residuals
    - "bootstrap": based on independent bootstrap of in-sample residuals
    - "block-bootstrap": based on basic block bootstrap of in-sample residuals
    - "scp-kde": Sequential split conformal prediction with Kernel Density Estimation of calibrated residuals
    - "scp-bootstrap": Sequential split conformal prediction with independent bootstrap of calibrated residuals
    - "scp-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of calibrated residuals
    - "scp2-kde": Sequential split conformal prediction with Kernel Density Estimation of standardized calibrated residuals
    - "scp2-bootstrap": Sequential split conformal prediction with independent bootstrap of standardized calibrated residuals
    - "scp2-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of standardized calibrated residuals
    - based on copulas of in-sample residuals: 'vine-tll', 'vine-bb1', 'vine-bb6', 'vine-bb7', 'vine-bb8', 'vine-clayton',
    'vine-frank', 'vine-gaussian', 'vine-gumbel', 'vine-indep', 'vine-joe', 'vine-student'
    - 'scp-vine-tll', 'scp-vine-bb1', 'scp-vine-bb6', 'scp-vine-bb7', 'scp-vine-bb8', 'scp-vine-clayton',
    'scp-vine-frank', 'scp-vine-gaussian', 'scp-vine-gumbel', 'scp-vine-indep', 'scp-vine-joe', 'scp-vine-student'
    - 'scp2-vine-tll', 'scp2-vine-bb1', 'scp2-vine-bb6', 'scp2-vine-bb7', 'scp2-vine-bb8', 'scp2-vine-clayton',
    'scp2-vine-frank', 'scp2-vine-gaussian', 'scp2-vine-gumbel', 'scp2-vine-indep', 'scp2-vine-joe', 'scp2-vine-student'

block_size: int.
    size of block for 'type_pi' in ("block-bootstrap", "scp-block-bootstrap", "scp2-block-bootstrap").
    Default is round(3.15*(n_residuals^1/3))

replications: int.
    number of replications (if needed, for predictive simulation). Default is 'None'.

kernel: str.
    the kernel to use for residuals density estimation (used for predictive simulation). Currently, either 'gaussian' or 'tophat'.

agg: str.
    either "mean" or "median" for simulation of bootstrap aggregating

seed: int.
    reproducibility seed for nodes_sim=='uniform' or predictive simulation.

backend: str.
    "cpu" or "gpu" or "tpu".

verbose: int.
    0: not printing; 1: printing

show_progress: bool.
    True: progress bar when fitting each series; False: no progress bar when fitting each series

Attributes:

fit_objs_: dict
    objects adjusted to each individual time series

y_: {array-like}
    MTS responses (most recent observations first)

X_: {array-like}
    MTS lags

xreg_: {array-like}
    external regressors

y_means_: dict
    a dictionary of each series mean values

preds_: {array-like}
    successive model predictions

preds_std_: {array-like}
    standard deviation around the predictions for Bayesian base learners (`obj`)

gaussian_preds_std_: {array-like}
    standard deviation around the predictions for `type_pi='gaussian'`

return_std_: boolean
    return uncertainty or not (set in predict)

df_: data frame
    the input data frame, in case a data.frame is provided to `fit`

n_obs_: int
    number of time series observations (number of rows for multivariate)

level_: int
    level of confidence for prediction intervals (default is 95)

residuals_: {array-like}
    in-sample residuals (for `type_pi` not conformal prediction) or calibrated residuals
    (for `type_pi` in conformal prediction)

residuals_sims_: tuple of {array-like}
    simulations of in-sample residuals (for `type_pi` not conformal prediction) or
    calibrated residuals (for `type_pi` in conformal prediction)

kde_: A scikit-learn object, see https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KernelDensity.html

residuals_std_dev_: residuals standard deviation

Examples:

Example 1:

import nnetsauce as ns
import numpy as np
from sklearn import linear_model
np.random.seed(123)

M = np.random.rand(10, 3)
M[:,0] = 10*M[:,0]
M[:,2] = 25*M[:,2]
print(M)

# Adjust Bayesian Ridge
regr4 = linear_model.BayesianRidge()
obj_MTS = ns.MTS(regr4, lags = 1, n_hidden_features=5)
obj_MTS.fit(M)
print(obj_MTS.predict())

# with credible intervals
print(obj_MTS.predict(return_std=True, level=80))

print(obj_MTS.predict(return_std=True, level=95))

Example 2:

import nnetsauce as ns
import numpy as np
from sklearn import linear_model

dataset = {
'date' : ['2001-01-01', '2002-01-01', '2003-01-01', '2004-01-01', '2005-01-01'],
'series1' : [34, 30, 35.6, 33.3, 38.1],
'series2' : [4, 5.5, 5.6, 6.3, 5.1],
'series3' : [100, 100.5, 100.6, 100.2, 100.1]}
df = pd.DataFrame(dataset).set_index('date')
print(df)

# Adjust Bayesian Ridge
regr5 = linear_model.BayesianRidge()
obj_MTS = ns.MTS(regr5, lags = 1, n_hidden_features=5)
obj_MTS.fit(df)
print(obj_MTS.predict())

# with credible intervals
print(obj_MTS.predict(return_std=True, level=80))

print(obj_MTS.predict(return_std=True, level=95))
def fit(self, X, xreg=None, **kwargs):
322    def fit(self, X, xreg=None, **kwargs):
323        """Fit MTS model to training data X, with optional regressors xreg
324
325        Parameters:
326
327        X: {array-like}, shape = [n_samples, n_features]
328            Training time series, where n_samples is the number
329            of samples and n_features is the number of features;
330            X must be in increasing order (most recent observations last)
331
332        xreg: {array-like}, shape = [n_samples, n_features_xreg]
333            Additional (external) regressors to be passed to self.obj
334            xreg must be in 'increasing' order (most recent observations last)
335
336        **kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity)
337
338        Returns:
339
340        self: object
341        """
342
343        self.init_n_series_ = X.shape[1]
344
345        # Automatic lag selection if requested
346        if isinstance(self.lags, str):
347            max_lags = min(25, X.shape[0] // 4)
348            best_ic = float("inf")
349            best_lags = 1
350
351            if self.verbose:
352                print(f"\nSelecting optimal number of lags using {self.lags}...")
353                iterator = tqdm(range(1, max_lags + 1))
354            else:
355                iterator = range(1, max_lags + 1)
356
357            for lag in iterator:
358                # Convert DataFrame to numpy array before reversing
359                if isinstance(X, pd.DataFrame):
360                    X_values = X.values[::-1]
361                else:
362                    X_values = X[::-1]
363
364                # Try current lag value
365                if self.init_n_series_ > 1:
366                    mts_input = ts.create_train_inputs(X_values, lag)
367                else:
368                    mts_input = ts.create_train_inputs(X_values.reshape(-1, 1), lag)
369
370                # Cook training set and fit model
371                dummy_y, scaled_Z = self.cook_training_set(
372                    y=np.ones(mts_input[0].shape[0]), X=mts_input[1]
373                )
374                residuals_ = []
375
376                for i in range(self.init_n_series_):
377                    y_mean = np.mean(mts_input[0][:, i])
378                    centered_y_i = mts_input[0][:, i] - y_mean
379                    self.obj.fit(X=scaled_Z, y=centered_y_i)
380                    residuals_.append(
381                        (centered_y_i - self.obj.predict(scaled_Z)).tolist()
382                    )
383
384                self.residuals_ = np.asarray(residuals_).T
385                ic = self._compute_information_criterion(
386                    curr_lags=lag, criterion=self.lags
387                )
388
389                if self.verbose:
390                    print(f"Trying lags={lag}, {self.lags}={ic:.2f}")
391
392                if ic < best_ic:
393                    best_ic = ic
394                    best_lags = lag
395
396            if self.verbose:
397                print(f"\nSelected {best_lags} lags with {self.lags}={best_ic:.2f}")
398
399            self.lags = best_lags
400
401        self.input_dates = None
402        self.df_ = None
403
404        if isinstance(X, pd.DataFrame) is False:
405            # input data set is a numpy array
406            if xreg is None:
407                X = pd.DataFrame(X)
408                self.series_names = ["series" + str(i) for i in range(X.shape[1])]
409            else:
410                # xreg is not None
411                X = mo.cbind(X, xreg)
412                self.xreg_ = xreg
413
414        else:  # input data set is a DataFrame with column names
415
416            X_index = None
417            if X.index is not None:
418                X_index = X.index
419            if xreg is None:
420                X = copy.deepcopy(mo.convert_df_to_numeric(X))
421            else:
422                X = copy.deepcopy(mo.cbind(mo.convert_df_to_numeric(X), xreg))
423                self.xreg_ = xreg
424            if X_index is not None:
425                X.index = X_index
426            self.series_names = X.columns.tolist()
427
428        if isinstance(X, pd.DataFrame):
429            if self.df_ is None:
430                self.df_ = X
431                X = X.values
432            else:
433                input_dates_prev = pd.DatetimeIndex(self.df_.index.values)
434                frequency = pd.infer_freq(input_dates_prev)
435                self.df_ = pd.concat([self.df_, X], axis=0)
436                self.input_dates = pd.date_range(
437                    start=input_dates_prev[0],
438                    periods=len(input_dates_prev) + X.shape[0],
439                    freq=frequency,
440                ).values.tolist()
441                self.df_.index = self.input_dates
442                X = self.df_.values
443            self.df_.columns = self.series_names
444        else:
445            if self.df_ is None:
446                self.df_ = pd.DataFrame(X, columns=self.series_names)
447            else:
448                self.df_ = pd.concat(
449                    [self.df_, pd.DataFrame(X, columns=self.series_names)],
450                    axis=0,
451                )
452
453        self.input_dates = ts.compute_input_dates(self.df_)
454
455        try:
456            # multivariate time series
457            n, p = X.shape
458        except:
459            # univariate time series
460            n = X.shape[0]
461            p = 1
462        self.n_obs_ = n
463
464        rep_1_n = np.repeat(1, n)
465
466        self.y_ = None
467        self.X_ = None
468        self.n_series = p
469        self.fit_objs_.clear()
470        self.y_means_.clear()
471        residuals_ = []
472        self.residuals_ = None
473        self.residuals_sims_ = None
474        self.kde_ = None
475        self.sims_ = None
476        self.scaled_Z_ = None
477        self.centered_y_is_ = []
478
479        if self.init_n_series_ > 1:
480            # multivariate time series
481            mts_input = ts.create_train_inputs(X[::-1], self.lags)
482        else:
483            # univariate time series
484            mts_input = ts.create_train_inputs(X.reshape(-1, 1)[::-1], self.lags)
485
486        self.y_ = mts_input[0]
487
488        self.X_ = mts_input[1]
489
490        dummy_y, scaled_Z = self.cook_training_set(y=rep_1_n, X=self.X_)
491
492        self.scaled_Z_ = scaled_Z
493
494        # loop on all the time series and adjust self.obj.fit
495        if self.verbose > 0:
496            print(
497                f"\n Adjusting {type(self.obj).__name__} to multivariate time series... \n"
498            )
499
500        if self.show_progress is True:
501            iterator = tqdm(range(self.init_n_series_))
502        else:
503            iterator = range(self.init_n_series_)
504
505        if self.type_pi in (
506            "gaussian",
507            "kde",
508            "bootstrap",
509            "block-bootstrap",
510        ) or self.type_pi.startswith("vine"):
511            for i in iterator:
512                y_mean = np.mean(self.y_[:, i])
513                self.y_means_[i] = y_mean
514                centered_y_i = self.y_[:, i] - y_mean
515                self.centered_y_is_.append(centered_y_i)
516                self.obj.fit(X=scaled_Z, y=centered_y_i)
517                self.fit_objs_[i] = deepcopy(self.obj)
518                residuals_.append(
519                    (centered_y_i - self.fit_objs_[i].predict(scaled_Z)).tolist()
520                )
521
522        if self.type_pi.startswith("scp"):
523            # split conformal prediction
524            for i in iterator:
525                n_y = self.y_.shape[0]
526                n_y_half = n_y // 2
527                first_half_idx = range(0, n_y_half)
528                second_half_idx = range(n_y_half, n_y)
529                y_mean_temp = np.mean(self.y_[first_half_idx, i])
530                centered_y_i_temp = self.y_[first_half_idx, i] - y_mean_temp
531                self.obj.fit(X=scaled_Z[first_half_idx, :], y=centered_y_i_temp)
532                # calibrated residuals actually
533                residuals_.append(
534                    (
535                        self.y_[second_half_idx, i]
536                        - (y_mean_temp + self.obj.predict(scaled_Z[second_half_idx, :]))
537                    ).tolist()
538                )
539                # fit on the second half
540                y_mean = np.mean(self.y_[second_half_idx, i])
541                self.y_means_[i] = y_mean
542                centered_y_i = self.y_[second_half_idx, i] - y_mean
543                self.obj.fit(X=scaled_Z[second_half_idx, :], y=centered_y_i)
544                self.fit_objs_[i] = deepcopy(self.obj)
545
546        self.residuals_ = np.asarray(residuals_).T
547
548        if self.type_pi == "gaussian":
549            self.gaussian_preds_std_ = np.std(self.residuals_, axis=0)
550
551        if self.type_pi.startswith("scp2"):
552            # Calculate mean and standard deviation for each column
553            data_mean = np.mean(self.residuals_, axis=0)
554            self.residuals_std_dev_ = np.std(self.residuals_, axis=0)
555            # Center and scale the array using broadcasting
556            self.residuals_ = (
557                self.residuals_ - data_mean[np.newaxis, :]
558            ) / self.residuals_std_dev_[np.newaxis, :]
559
560        if self.replications != None and "kde" in self.type_pi:
561            if self.verbose > 0:
562                print(f"\n Simulate residuals using {self.kernel} kernel... \n")
563            assert self.kernel in (
564                "gaussian",
565                "tophat",
566            ), "currently, 'kernel' must be either 'gaussian' or 'tophat'"
567            kernel_bandwidths = {"bandwidth": np.logspace(-6, 6, 150)}
568            grid = GridSearchCV(
569                KernelDensity(kernel=self.kernel, **kwargs),
570                param_grid=kernel_bandwidths,
571            )
572            grid.fit(self.residuals_)
573
574            if self.verbose > 0:
575                print(
576                    f"\n Best parameters for {self.kernel} kernel: {grid.best_params_} \n"
577                )
578
579            self.kde_ = grid.best_estimator_
580
581        return self

Fit MTS model to training data X, with optional regressors xreg

Parameters:

X: {array-like}, shape = [n_samples, n_features] Training time series, where n_samples is the number of samples and n_features is the number of features; X must be in increasing order (most recent observations last)

xreg: {array-like}, shape = [n_samples, n_features_xreg] Additional (external) regressors to be passed to self.obj xreg must be in 'increasing' order (most recent observations last)

**kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity)

Returns:

self: object

def predict(self, h=5, level=95, **kwargs):
 618    def predict(self, h=5, level=95, **kwargs):
 619        """Forecast all the time series, h steps ahead"""
 620
 621        self.output_dates_, frequency = ts.compute_output_dates(self.df_, h)
 622
 623        self.level_ = level
 624
 625        self.return_std_ = False  # do not remove (/!\)
 626
 627        self.mean_ = None  # do not remove (/!\)
 628
 629        self.mean_ = deepcopy(self.y_)  # do not remove (/!\)
 630
 631        self.lower_ = None  # do not remove (/!\)
 632
 633        self.upper_ = None  # do not remove (/!\)
 634
 635        self.sims_ = None  # do not remove (/!\)
 636
 637        y_means_ = np.asarray([self.y_means_[i] for i in range(self.init_n_series_)])
 638
 639        n_features = self.init_n_series_ * self.lags
 640
 641        self.alpha_ = 100 - level
 642
 643        pi_multiplier = norm.ppf(1 - self.alpha_ / 200)
 644
 645        if "return_std" in kwargs:  # bayesian forecasting
 646            self.return_std_ = True
 647            self.preds_std_ = []
 648            DescribeResult = namedtuple(
 649                "DescribeResult", ("mean", "lower", "upper")
 650            )  # to be updated
 651
 652        if "return_pi" in kwargs:  # split conformal, without simulation
 653            mean_pi_ = []
 654            lower_pi_ = []
 655            upper_pi_ = []
 656            median_pi_ = []
 657            DescribeResult = namedtuple(
 658                "DescribeResult", ("mean", "lower", "upper")
 659            )  # to be updated
 660
 661        if self.kde_ != None and "kde" in self.type_pi:  # kde
 662            target_cols = self.df_.columns[
 663                : self.init_n_series_
 664            ]  # Get target column names
 665            if self.verbose == 1:
 666                self.residuals_sims_ = tuple(
 667                    self.kde_.sample(
 668                        n_samples=h, random_state=self.seed + 100 * i
 669                    )  # Keep full sample
 670                    for i in tqdm(range(self.replications))
 671                )
 672            elif self.verbose == 0:
 673                self.residuals_sims_ = tuple(
 674                    self.kde_.sample(
 675                        n_samples=h, random_state=self.seed + 100 * i
 676                    )  # Keep full sample
 677                    for i in range(self.replications)
 678                )
 679
 680            # Convert to DataFrames after sampling
 681            self.residuals_sims_ = tuple(
 682                pd.DataFrame(
 683                    sim,  # Keep all columns
 684                    columns=target_cols,  # Use original target column names
 685                    index=self.output_dates_,
 686                )
 687                for sim in self.residuals_sims_
 688            )
 689
 690        if self.type_pi in ("bootstrap", "scp-bootstrap", "scp2-bootstrap"):
 691            assert self.replications is not None and isinstance(
 692                self.replications, int
 693            ), "'replications' must be provided and be an integer"
 694            if self.verbose == 1:
 695                self.residuals_sims_ = tuple(
 696                    ts.bootstrap(
 697                        self.residuals_,
 698                        h=h,
 699                        block_size=None,
 700                        seed=self.seed + 100 * i,
 701                    )
 702                    for i in tqdm(range(self.replications))
 703                )
 704            elif self.verbose == 0:
 705                self.residuals_sims_ = tuple(
 706                    ts.bootstrap(
 707                        self.residuals_,
 708                        h=h,
 709                        block_size=None,
 710                        seed=self.seed + 100 * i,
 711                    )
 712                    for i in range(self.replications)
 713                )
 714
 715        if self.type_pi in (
 716            "block-bootstrap",
 717            "scp-block-bootstrap",
 718            "scp2-block-bootstrap",
 719        ):
 720            if self.block_size is None:
 721                self.block_size = int(
 722                    np.ceil(3.15 * (self.residuals_.shape[0] ** (1 / 3)))
 723                )
 724
 725            assert self.replications is not None and isinstance(
 726                self.replications, int
 727            ), "'replications' must be provided and be an integer"
 728            if self.verbose == 1:
 729                self.residuals_sims_ = tuple(
 730                    ts.bootstrap(
 731                        self.residuals_,
 732                        h=h,
 733                        block_size=self.block_size,
 734                        seed=self.seed + 100 * i,
 735                    )
 736                    for i in tqdm(range(self.replications))
 737                )
 738            elif self.verbose == 0:
 739                self.residuals_sims_ = tuple(
 740                    ts.bootstrap(
 741                        self.residuals_,
 742                        h=h,
 743                        block_size=self.block_size,
 744                        seed=self.seed + 100 * i,
 745                    )
 746                    for i in range(self.replications)
 747                )
 748
 749        if "vine" in self.type_pi:
 750            if self.verbose == 1:
 751                self.residuals_sims_ = tuple(
 752                    vinecopula_sample(
 753                        x=self.residuals_,
 754                        n_samples=h,
 755                        method=self.type_pi,
 756                        random_state=self.seed + 100 * i,
 757                    )
 758                    for i in tqdm(range(self.replications))
 759                )
 760            elif self.verbose == 0:
 761                self.residuals_sims_ = tuple(
 762                    vinecopula_sample(
 763                        x=self.residuals_,
 764                        n_samples=h,
 765                        method=self.type_pi,
 766                        random_state=self.seed + 100 * i,
 767                    )
 768                    for i in range(self.replications)
 769                )
 770
 771        mean_ = deepcopy(self.mean_)
 772
 773        for i in range(h):
 774
 775            new_obs = ts.reformat_response(mean_, self.lags)
 776            new_X = new_obs.reshape(1, -1)
 777            cooked_new_X = self.cook_test_set(new_X, **kwargs)
 778
 779            if "return_std" in kwargs:
 780                self.preds_std_.append(
 781                    [
 782                        np.asarray(
 783                            self.fit_objs_[i].predict(cooked_new_X, return_std=True)[1]
 784                        ).item()
 785                        for i in range(self.n_series)
 786                    ]
 787                )
 788
 789            if "return_pi" in kwargs:
 790                for i in range(self.n_series):
 791                    preds_pi = self.fit_objs_[i].predict(cooked_new_X, **kwargs)
 792                    mean_pi_.append(preds_pi.mean[0])
 793                    lower_pi_.append(preds_pi.lower[0])
 794                    upper_pi_.append(preds_pi.upper[0])
 795
 796            predicted_cooked_new_X = np.asarray(
 797                [
 798                    np.asarray(self.fit_objs_[i].predict(cooked_new_X)).item()
 799                    for i in range(self.init_n_series_)
 800                ]
 801            )
 802
 803            preds = np.asarray(y_means_ + predicted_cooked_new_X)
 804
 805            # Create full row with both predictions and external regressors
 806            if self.xreg_ is not None and "xreg" in kwargs:
 807                next_xreg = kwargs["xreg"].iloc[i : i + 1].values.flatten()
 808                full_row = np.concatenate([preds, next_xreg])
 809            else:
 810                full_row = preds
 811
 812            # Create a new row with same number of columns as mean_
 813            new_row = np.zeros((1, mean_.shape[1]))
 814            new_row[0, : full_row.shape[0]] = full_row
 815
 816            # Maintain the full dimensionality by using vstack instead of rbind
 817            mean_ = np.vstack([new_row, mean_[:-1]])
 818
 819        # Final output should only include the target columns
 820        self.mean_ = pd.DataFrame(
 821            mean_[0:h, : self.init_n_series_][::-1],
 822            columns=self.df_.columns[: self.init_n_series_],
 823            index=self.output_dates_,
 824        )
 825
 826        # function's return ----------------------------------------------------------------------
 827        if (
 828            (("return_std" not in kwargs) and ("return_pi" not in kwargs))
 829            and (self.type_pi not in ("gaussian", "scp"))
 830        ) or ("vine" in self.type_pi):
 831
 832            if self.replications is None:
 833                return self.mean_.iloc[:, : self.init_n_series_]
 834
 835            # if "return_std" not in kwargs and self.replications is not None
 836            meanf = []
 837            medianf = []
 838            lower = []
 839            upper = []
 840
 841            if "scp2" in self.type_pi:
 842
 843                if self.verbose == 1:
 844                    self.sims_ = tuple(
 845                        (
 846                            self.mean_
 847                            + self.residuals_sims_[i]
 848                            * self.residuals_std_dev_[np.newaxis, :]
 849                            for i in tqdm(range(self.replications))
 850                        )
 851                    )
 852                elif self.verbose == 0:
 853                    self.sims_ = tuple(
 854                        (
 855                            self.mean_
 856                            + self.residuals_sims_[i]
 857                            * self.residuals_std_dev_[np.newaxis, :]
 858                            for i in range(self.replications)
 859                        )
 860                    )
 861            else:
 862
 863                if self.verbose == 1:
 864                    self.sims_ = tuple(
 865                        (
 866                            self.mean_ + self.residuals_sims_[i]
 867                            for i in tqdm(range(self.replications))
 868                        )
 869                    )
 870                elif self.verbose == 0:
 871                    self.sims_ = tuple(
 872                        (
 873                            self.mean_ + self.residuals_sims_[i]
 874                            for i in range(self.replications)
 875                        )
 876                    )
 877
 878            DescribeResult = namedtuple(
 879                "DescribeResult", ("mean", "sims", "lower", "upper")
 880            )
 881            for ix in range(self.init_n_series_):
 882                sims_ix = getsims(self.sims_, ix)
 883                if self.agg == "mean":
 884                    meanf.append(np.mean(sims_ix, axis=1))
 885                else:
 886                    medianf.append(np.median(sims_ix, axis=1))
 887                lower.append(np.quantile(sims_ix, q=self.alpha_ / 200, axis=1))
 888                upper.append(np.quantile(sims_ix, q=1 - self.alpha_ / 200, axis=1))
 889            self.mean_ = pd.DataFrame(
 890                np.asarray(meanf).T,
 891                columns=self.series_names[: self.init_n_series_],  # self.df_.columns,
 892                index=self.output_dates_,
 893            )
 894
 895            self.lower_ = pd.DataFrame(
 896                np.asarray(lower).T,
 897                columns=self.series_names[: self.init_n_series_],  # self.df_.columns,
 898                index=self.output_dates_,
 899            )
 900
 901            self.upper_ = pd.DataFrame(
 902                np.asarray(upper).T,
 903                columns=self.series_names[: self.init_n_series_],  # self.df_.columns,
 904                index=self.output_dates_,
 905            )
 906
 907            try:
 908                self.median_ = pd.DataFrame(
 909                    np.asarray(medianf).T,
 910                    columns=self.series_names[
 911                        : self.init_n_series_
 912                    ],  # self.df_.columns,
 913                    index=self.output_dates_,
 914                )
 915            except Exception as e:
 916                pass
 917
 918            return DescribeResult(self.mean_, self.sims_, self.lower_, self.upper_)
 919
 920        if (
 921            (("return_std" in kwargs) or ("return_pi" in kwargs))
 922            and (self.type_pi not in ("gaussian", "scp"))
 923        ) or "vine" in self.type_pi:
 924            DescribeResult = namedtuple("DescribeResult", ("mean", "lower", "upper"))
 925
 926            self.mean_ = pd.DataFrame(
 927                np.asarray(self.mean_),
 928                columns=self.series_names,  # self.df_.columns,
 929                index=self.output_dates_,
 930            )
 931
 932            if "return_std" in kwargs:
 933
 934                self.preds_std_ = np.asarray(self.preds_std_)
 935                print("self.preds_std_", self.preds_std_)
 936                print("self.mean_", self.mean_)
 937                print("pi_multiplier", pi_multiplier)
 938
 939                self.lower_ = pd.DataFrame(
 940                    self.mean_.values - pi_multiplier * self.preds_std_,
 941                    columns=self.series_names,  # self.df_.columns,
 942                    index=self.output_dates_,
 943                )
 944
 945                self.upper_ = pd.DataFrame(
 946                    self.mean_.values + pi_multiplier * self.preds_std_,
 947                    columns=self.series_names,  # self.df_.columns,
 948                    index=self.output_dates_,
 949                )
 950
 951            if "return_pi" in kwargs:
 952
 953                self.lower_ = pd.DataFrame(
 954                    np.asarray(lower_pi_).reshape(h, self.n_series)
 955                    + y_means_[np.newaxis, :],
 956                    columns=self.series_names,  # self.df_.columns,
 957                    index=self.output_dates_,
 958                )
 959
 960                self.upper_ = pd.DataFrame(
 961                    np.asarray(upper_pi_).reshape(h, self.n_series)
 962                    + y_means_[np.newaxis, :],
 963                    columns=self.series_names,  # self.df_.columns,
 964                    index=self.output_dates_,
 965                )
 966
 967            res = DescribeResult(self.mean_, self.lower_, self.upper_)
 968
 969            if self.xreg_ is not None:
 970                if len(self.xreg_.shape) > 1:
 971                    res2 = mx.tuple_map(
 972                        res,
 973                        lambda x: mo.delete_last_columns(
 974                            x, num_columns=self.xreg_.shape[1]
 975                        ),
 976                    )
 977                else:
 978                    res2 = mx.tuple_map(
 979                        res, lambda x: mo.delete_last_columns(x, num_columns=1)
 980                    )
 981                return DescribeResult(res2[0], res2[1], res2[2])
 982
 983            return res
 984
 985        if self.type_pi == "gaussian":
 986
 987            DescribeResult = namedtuple("DescribeResult", ("mean", "lower", "upper"))
 988
 989            self.mean_ = pd.DataFrame(
 990                np.asarray(self.mean_),
 991                columns=self.series_names,  # self.df_.columns,
 992                index=self.output_dates_,
 993            )
 994
 995            self.lower_ = pd.DataFrame(
 996                self.mean_.values - pi_multiplier * self.gaussian_preds_std_,
 997                columns=self.series_names,  # self.df_.columns,
 998                index=self.output_dates_,
 999            )
1000
1001            self.upper_ = pd.DataFrame(
1002                self.mean_.values + pi_multiplier * self.gaussian_preds_std_,
1003                columns=self.series_names,  # self.df_.columns,
1004                index=self.output_dates_,
1005            )
1006
1007            res = DescribeResult(self.mean_, self.lower_, self.upper_)
1008
1009            if self.xreg_ is not None:
1010                if len(self.xreg_.shape) > 1:
1011                    res2 = mx.tuple_map(
1012                        res,
1013                        lambda x: mo.delete_last_columns(
1014                            x, num_columns=self.xreg_.shape[1]
1015                        ),
1016                    )
1017                else:
1018                    res2 = mx.tuple_map(
1019                        res, lambda x: mo.delete_last_columns(x, num_columns=1)
1020                    )
1021                return DescribeResult(res2[0], res2[1], res2[2])
1022
1023            return res
1024
1025        # After prediction loop, ensure sims only contain target columns
1026        if self.sims_ is not None:
1027            if self.verbose == 1:
1028                self.sims_ = tuple(
1029                    sim[:h,]  # Only keep target columns and h rows
1030                    for sim in tqdm(self.sims_)
1031                )
1032            elif self.verbose == 0:
1033                self.sims_ = tuple(
1034                    sim[:h,]  # Only keep target columns and h rows
1035                    for sim in self.sims_
1036                )
1037
1038            # Convert numpy arrays to DataFrames with proper columns
1039            self.sims_ = tuple(
1040                pd.DataFrame(
1041                    sim,
1042                    columns=self.df_.columns[: self.init_n_series_],
1043                    index=self.output_dates_,
1044                )
1045                for sim in self.sims_
1046            )
1047
1048        if self.type_pi in ("kde", "bootstrap", "block-bootstrap", "vine-copula"):
1049            if self.xreg_ is not None:
1050                # Use getsimsxreg when external regressors are present
1051                target_cols = self.df_.columns[: self.init_n_series_]
1052                self.sims_ = getsimsxreg(self.sims_, self.output_dates_, target_cols)
1053            else:
1054                # Use original getsims for backward compatibility
1055                self.sims_ = getsims(self.sims_)

Forecast all the time series, h steps ahead

def score(self, X, training_index, testing_index, scoring=None, **kwargs):
1057    def score(self, X, training_index, testing_index, scoring=None, **kwargs):
1058        """Train on training_index, score on testing_index."""
1059
1060        assert (
1061            bool(set(training_index).intersection(set(testing_index))) == False
1062        ), "Non-overlapping 'training_index' and 'testing_index' required"
1063
1064        # Dimensions
1065        try:
1066            # multivariate time series
1067            n, p = X.shape
1068        except:
1069            # univariate time series
1070            n = X.shape[0]
1071            p = 1
1072
1073        # Training and testing sets
1074        if p > 1:
1075            X_train = X[training_index, :]
1076            X_test = X[testing_index, :]
1077        else:
1078            X_train = X[training_index]
1079            X_test = X[testing_index]
1080
1081        # Horizon
1082        h = len(testing_index)
1083        assert (
1084            len(training_index) + h
1085        ) <= n, "Please check lengths of training and testing windows"
1086
1087        # Fit and predict
1088        self.fit(X_train, **kwargs)
1089        preds = self.predict(h=h, **kwargs)
1090
1091        if scoring is None:
1092            scoring = "neg_root_mean_squared_error"
1093
1094        # check inputs
1095        assert scoring in (
1096            "explained_variance",
1097            "neg_mean_absolute_error",
1098            "neg_mean_squared_error",
1099            "neg_root_mean_squared_error",
1100            "neg_mean_squared_log_error",
1101            "neg_median_absolute_error",
1102            "r2",
1103        ), "'scoring' should be in ('explained_variance', 'neg_mean_absolute_error', \
1104                               'neg_mean_squared_error', 'neg_root_mean_squared_error', 'neg_mean_squared_log_error', \
1105                               'neg_median_absolute_error', 'r2')"
1106
1107        scoring_options = {
1108            "explained_variance": skm2.explained_variance_score,
1109            "neg_mean_absolute_error": skm2.mean_absolute_error,
1110            "neg_mean_squared_error": lambda x, y: np.mean((x - y) ** 2),
1111            "neg_root_mean_squared_error": lambda x, y: np.sqrt(np.mean((x - y) ** 2)),
1112            "neg_mean_squared_log_error": skm2.mean_squared_log_error,
1113            "neg_median_absolute_error": skm2.median_absolute_error,
1114            "r2": skm2.r2_score,
1115        }
1116
1117        return scoring_options[scoring](X_test, preds)

Train on training_index, score on testing_index.

class MultitaskClassifier(nnetsauce.Base, sklearn.base.ClassifierMixin):
 16class MultitaskClassifier(Base, ClassifierMixin):
 17    """Multitask Classification model based on regression models, with shared covariates
 18
 19    Parameters:
 20
 21        obj: object
 22            any object (must be a regression model) containing a method fit (obj.fit())
 23            and a method predict (obj.predict())
 24
 25        n_hidden_features: int
 26            number of nodes in the hidden layer
 27
 28        activation_name: str
 29            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
 30
 31        a: float
 32            hyperparameter for 'prelu' or 'elu' activation function
 33
 34        nodes_sim: str
 35            type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
 36            'uniform'
 37
 38        bias: boolean
 39            indicates if the hidden layer contains a bias term (True) or not
 40            (False)
 41
 42        dropout: float
 43            regularization parameter; (random) percentage of nodes dropped out
 44            of the training
 45
 46        direct_link: boolean
 47            indicates if the original predictors are included (True) in model's
 48            fitting or not (False)
 49
 50        n_clusters: int
 51            number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
 52                no clustering)
 53
 54        cluster_encode: bool
 55            defines how the variable containing clusters is treated (default is one-hot)
 56            if `False`, then labels are used, without one-hot encoding
 57
 58        type_clust: str
 59            type of clustering method: currently k-means ('kmeans') or Gaussian
 60            Mixture Model ('gmm')
 61
 62        type_scaling: a tuple of 3 strings
 63            scaling methods for inputs, hidden layer, and clustering respectively
 64            (and when relevant).
 65            Currently available: standardization ('std') or MinMax scaling ('minmax')
 66
 67        col_sample: float
 68            percentage of covariates randomly chosen for training
 69
 70        row_sample: float
 71            percentage of rows chosen for training, by stratified bootstrapping
 72
 73        seed: int
 74            reproducibility seed for nodes_sim=='uniform'
 75
 76        backend: str
 77            "cpu" or "gpu" or "tpu"
 78
 79    Attributes:
 80
 81        fit_objs_: dict
 82            objects adjusted to each individual time series
 83
 84        n_classes_: int
 85            number of classes for the classifier
 86
 87    Examples:
 88
 89    See also [https://github.com/Techtonique/nnetsauce/blob/master/examples/mtask_classification.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/mtask_classification.py)
 90
 91    ```python
 92    import nnetsauce as ns
 93    import numpy as np
 94    from sklearn.datasets import load_breast_cancer
 95    from sklearn.linear_model import LinearRegression
 96    from sklearn.model_selection import train_test_split
 97    from sklearn import metrics
 98    from time import time
 99
100    breast_cancer = load_breast_cancer()
101    Z = breast_cancer.data
102    t = breast_cancer.target
103
104    X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2,
105                                                        random_state=123+2*10)
106
107    # Linear Regression is used
108    regr = LinearRegression()
109    fit_obj = ns.MultitaskClassifier(regr, n_hidden_features=5,
110                                n_clusters=2, type_clust="gmm")
111
112    start = time()
113    fit_obj.fit(X_train, y_train)
114    print(f"Elapsed {time() - start}")
115
116    print(fit_obj.score(X_test, y_test))
117    print(fit_obj.score(X_test, y_test, scoring="roc_auc"))
118
119    start = time()
120    preds = fit_obj.predict(X_test)
121    print(f"Elapsed {time() - start}")
122    print(metrics.classification_report(preds, y_test))
123    ```
124
125    """
126
127    # construct the object -----
128
129    def __init__(
130        self,
131        obj,
132        n_hidden_features=5,
133        activation_name="relu",
134        a=0.01,
135        nodes_sim="sobol",
136        bias=True,
137        dropout=0,
138        direct_link=True,
139        n_clusters=2,
140        cluster_encode=True,
141        type_clust="kmeans",
142        type_scaling=("std", "std", "std"),
143        col_sample=1,
144        row_sample=1,
145        seed=123,
146        backend="cpu",
147    ):
148        super().__init__(
149            n_hidden_features=n_hidden_features,
150            activation_name=activation_name,
151            a=a,
152            nodes_sim=nodes_sim,
153            bias=bias,
154            dropout=dropout,
155            direct_link=direct_link,
156            n_clusters=n_clusters,
157            cluster_encode=cluster_encode,
158            type_clust=type_clust,
159            type_scaling=type_scaling,
160            col_sample=col_sample,
161            row_sample=row_sample,
162            seed=seed,
163            backend=backend,
164        )
165
166        self.type_fit = "classification"
167        self.obj = obj
168        self.fit_objs_ = {}
169
170    def fit(self, X, y, sample_weight=None, **kwargs):
171        """Fit MultitaskClassifier to training data (X, y).
172
173        Args:
174
175            X: {array-like}, shape = [n_samples, n_features]
176                Training vectors, where n_samples is the number
177                of samples and n_features is the number of features.
178
179            y: array-like, shape = [n_samples]
180                Target values.
181
182            **kwargs: additional parameters to be passed to
183                    self.cook_training_set or self.obj.fit
184
185        Returns:
186
187            self: object
188
189        """
190
191        assert mx.is_factor(y), "y must contain only integers"
192
193        output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
194
195        self.classes_ = np.unique(y)  # for compatibility with sklearn
196        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
197
198        # multitask response
199        Y = mo.one_hot_encode2(output_y, self.n_classes_)
200
201        # if sample_weight is None:
202        for i in range(self.n_classes_):
203            self.fit_objs_[i] = deepcopy(self.obj.fit(scaled_Z, Y[:, i], **kwargs))
204
205        self.classes_ = np.unique(y)
206        return self
207
208    def predict(self, X, **kwargs):
209        """Predict test data X.
210
211        Args:
212
213            X: {array-like}, shape = [n_samples, n_features]
214                Training vectors, where n_samples is the number
215                of samples and n_features is the number of features.
216
217            **kwargs: additional parameters to be passed to
218                    self.cook_test_set
219
220        Returns:
221
222            model predictions: {array-like}
223
224        """
225        return np.argmax(self.predict_proba(X, **kwargs), axis=1)
226
227    def predict_proba(self, X, **kwargs):
228        """Predict probabilities for test data X.
229
230        Args:
231
232            X: {array-like}, shape = [n_samples, n_features]
233                Training vectors, where n_samples is the number
234                of samples and n_features is the number of features.
235
236            **kwargs: additional parameters to be passed to
237                    self.cook_test_set
238
239        Returns:
240
241            probability estimates for test data: {array-like}
242
243        """
244
245        shape_X = X.shape
246
247        probs = np.zeros((shape_X[0], self.n_classes_))
248
249        if len(shape_X) == 1:
250            n_features = shape_X[0]
251
252            new_X = mo.rbind(
253                X.reshape(1, n_features),
254                np.ones(n_features).reshape(1, n_features),
255            )
256
257            Z = self.cook_test_set(new_X, **kwargs)
258
259            # loop on all the classes
260            for i in range(self.n_classes_):
261                probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs)[0]
262
263        else:
264            Z = self.cook_test_set(X, **kwargs)
265
266            # loop on all the classes
267            for i in range(self.n_classes_):
268                probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs)
269
270        expit_raw_probs = expit(probs)
271
272        return expit_raw_probs / expit_raw_probs.sum(axis=1)[:, None]
273
274    def decision_function(self, X, **kwargs):
275        """Compute the decision function of X.
276
277        Parameters:
278            X: {array-like}, shape = [n_samples, n_features]
279                Samples to compute decision function for.
280
281            **kwargs: additional parameters to be passed to
282                    self.cook_test_set
283
284        Returns:
285            array-like of shape (n_samples,) or (n_samples, n_classes)
286            Decision function of the input samples. The order of outputs is the same
287            as that of the classes passed to fit.
288        """
289        if not hasattr(self.obj, "decision_function"):
290            # If base classifier doesn't have decision_function, use predict_proba
291            proba = self.predict_proba(X, **kwargs)
292            if proba.shape[1] == 2:
293                return proba[:, 1]  # For binary classification
294            return proba  # For multiclass
295
296        if len(X.shape) == 1:
297            n_features = X.shape[0]
298            new_X = mo.rbind(
299                X.reshape(1, n_features),
300                np.ones(n_features).reshape(1, n_features),
301            )
302
303            return (
304                self.obj.decision_function(
305                    self.cook_test_set(new_X, **kwargs), **kwargs
306                )
307            )[0]
308
309        return self.obj.decision_function(self.cook_test_set(X, **kwargs), **kwargs)

Multitask Classification model based on regression models, with shared covariates

Parameters:

obj: object
    any object (must be a regression model) containing a method fit (obj.fit())
    and a method predict (obj.predict())

n_hidden_features: int
    number of nodes in the hidden layer

activation_name: str
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'

a: float
    hyperparameter for 'prelu' or 'elu' activation function

nodes_sim: str
    type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
    'uniform'

bias: boolean
    indicates if the hidden layer contains a bias term (True) or not
    (False)

dropout: float
    regularization parameter; (random) percentage of nodes dropped out
    of the training

direct_link: boolean
    indicates if the original predictors are included (True) in model's
    fitting or not (False)

n_clusters: int
    number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
        no clustering)

cluster_encode: bool
    defines how the variable containing clusters is treated (default is one-hot)
    if `False`, then labels are used, without one-hot encoding

type_clust: str
    type of clustering method: currently k-means ('kmeans') or Gaussian
    Mixture Model ('gmm')

type_scaling: a tuple of 3 strings
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax')

col_sample: float
    percentage of covariates randomly chosen for training

row_sample: float
    percentage of rows chosen for training, by stratified bootstrapping

seed: int
    reproducibility seed for nodes_sim=='uniform'

backend: str
    "cpu" or "gpu" or "tpu"

Attributes:

fit_objs_: dict
    objects adjusted to each individual time series

n_classes_: int
    number of classes for the classifier

Examples:

See also https://github.com/Techtonique/nnetsauce/blob/master/examples/mtask_classification.py

import nnetsauce as ns
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics
from time import time

breast_cancer = load_breast_cancer()
Z = breast_cancer.data
t = breast_cancer.target

X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2,
                                                    random_state=123+2*10)

# Linear Regression is used
regr = LinearRegression()
fit_obj = ns.MultitaskClassifier(regr, n_hidden_features=5,
                            n_clusters=2, type_clust="gmm")

start = time()
fit_obj.fit(X_train, y_train)
print(f"Elapsed {time() - start}")

print(fit_obj.score(X_test, y_test))
print(fit_obj.score(X_test, y_test, scoring="roc_auc"))

start = time()
preds = fit_obj.predict(X_test)
print(f"Elapsed {time() - start}")
print(metrics.classification_report(preds, y_test))
def fit(self, X, y, sample_weight=None, **kwargs):
170    def fit(self, X, y, sample_weight=None, **kwargs):
171        """Fit MultitaskClassifier to training data (X, y).
172
173        Args:
174
175            X: {array-like}, shape = [n_samples, n_features]
176                Training vectors, where n_samples is the number
177                of samples and n_features is the number of features.
178
179            y: array-like, shape = [n_samples]
180                Target values.
181
182            **kwargs: additional parameters to be passed to
183                    self.cook_training_set or self.obj.fit
184
185        Returns:
186
187            self: object
188
189        """
190
191        assert mx.is_factor(y), "y must contain only integers"
192
193        output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
194
195        self.classes_ = np.unique(y)  # for compatibility with sklearn
196        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
197
198        # multitask response
199        Y = mo.one_hot_encode2(output_y, self.n_classes_)
200
201        # if sample_weight is None:
202        for i in range(self.n_classes_):
203            self.fit_objs_[i] = deepcopy(self.obj.fit(scaled_Z, Y[:, i], **kwargs))
204
205        self.classes_ = np.unique(y)
206        return self

Fit MultitaskClassifier to training data (X, y).

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

**kwargs: additional parameters to be passed to
        self.cook_training_set or self.obj.fit

Returns:

self: object
def predict(self, X, **kwargs):
208    def predict(self, X, **kwargs):
209        """Predict test data X.
210
211        Args:
212
213            X: {array-like}, shape = [n_samples, n_features]
214                Training vectors, where n_samples is the number
215                of samples and n_features is the number of features.
216
217            **kwargs: additional parameters to be passed to
218                    self.cook_test_set
219
220        Returns:
221
222            model predictions: {array-like}
223
224        """
225        return np.argmax(self.predict_proba(X, **kwargs), axis=1)

Predict test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

model predictions: {array-like}
def predict_proba(self, X, **kwargs):
227    def predict_proba(self, X, **kwargs):
228        """Predict probabilities for test data X.
229
230        Args:
231
232            X: {array-like}, shape = [n_samples, n_features]
233                Training vectors, where n_samples is the number
234                of samples and n_features is the number of features.
235
236            **kwargs: additional parameters to be passed to
237                    self.cook_test_set
238
239        Returns:
240
241            probability estimates for test data: {array-like}
242
243        """
244
245        shape_X = X.shape
246
247        probs = np.zeros((shape_X[0], self.n_classes_))
248
249        if len(shape_X) == 1:
250            n_features = shape_X[0]
251
252            new_X = mo.rbind(
253                X.reshape(1, n_features),
254                np.ones(n_features).reshape(1, n_features),
255            )
256
257            Z = self.cook_test_set(new_X, **kwargs)
258
259            # loop on all the classes
260            for i in range(self.n_classes_):
261                probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs)[0]
262
263        else:
264            Z = self.cook_test_set(X, **kwargs)
265
266            # loop on all the classes
267            for i in range(self.n_classes_):
268                probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs)
269
270        expit_raw_probs = expit(probs)
271
272        return expit_raw_probs / expit_raw_probs.sum(axis=1)[:, None]

Predict probabilities for test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

probability estimates for test data: {array-like}
class NeuralNetRegressor(sklearn.base.BaseEstimator, sklearn.base.RegressorMixin):
 98class NeuralNetRegressor(BaseEstimator, RegressorMixin):
 99    """
100    (Pretrained) Neural Network Regressor.
101
102    Parameters:
103
104        hidden_layer_sizes : tuple, default=(100,)
105            The number of neurons in each hidden layer.
106        max_iter : int, default=100
107            The maximum number of iterations to train the model.
108        learning_rate : float, default=0.01
109            The learning rate for the optimizer.
110        l1_ratio : float, default=0.5
111            The ratio of L1 regularization.
112        alpha : float, default=1e-6
113            The regularization parameter.
114        activation_name : str, default="relu"
115            The activation function to use.
116        dropout : float, default=0.0
117            The dropout rate.
118        random_state : int, default=None
119            The random state for the random number generator.
120        weights : list, default=None
121            The weights to initialize the model with.
122
123    Attributes:
124
125        weights : list
126            The weights of the model.
127        params : list
128            The parameters of the model.
129        scaler_ : sklearn.preprocessing.StandardScaler
130            The scaler used to standardize the input features.
131        y_mean_ : float
132            The mean of the target variable.
133
134    Methods:
135
136        fit(X, y)
137            Fit the model to the data.
138        predict(X)
139            Predict the target variable.
140        get_weights()
141            Get the weights of the model.
142        set_weights(weights)
143            Set the weights of the model.
144    """
145
146    def __init__(
147        self,
148        hidden_layer_sizes=None,
149        max_iter=100,
150        learning_rate=0.01,
151        l1_ratio=0.5,
152        alpha=1e-6,
153        activation_name="relu",
154        dropout=0,
155        weights=None,
156        random_state=None,
157    ):
158        if weights is None and hidden_layer_sizes is None:
159            hidden_layer_sizes = (100,)  # default value if neither is provided
160        self.hidden_layer_sizes = hidden_layer_sizes
161        self.max_iter = max_iter
162        self.learning_rate = learning_rate
163        self.l1_ratio = l1_ratio
164        self.alpha = alpha
165        self.activation_name = activation_name
166        self.dropout = dropout
167        self.weights = weights
168        self.random_state = random_state
169        self.params = None
170        self.scaler_ = StandardScaler()
171        self.y_mean_ = None
172
173    def _validate_weights(self, input_dim):
174        """Validate that weights dimensions are coherent."""
175        if not self.weights:
176            return False
177
178        try:
179            # Check each layer's weights and biases
180            prev_dim = input_dim
181            for W, b in self.weights:
182                # Check weight matrix dimensions
183                if W.shape[0] != prev_dim:
184                    raise ValueError(
185                        f"Weight matrix input dimension {W.shape[0]} does not match, previous layer output dimension {prev_dim}"
186                    )
187                # Check bias dimension matches weight matrix output
188                if W.shape[1] != b.shape[0]:
189                    raise ValueError(
190                        f"Bias dimension {b.shape[0]} does not match weight matrix, output dimension {W.shape[1]}"
191                    )
192                prev_dim = W.shape[1]
193
194            # Check final output dimension is 1 for regression
195            if prev_dim != 1:
196                raise ValueError(
197                    f"Final layer output dimension {prev_dim} must be 1 for regression"
198                )
199
200            return True
201        except (AttributeError, IndexError):
202            raise ValueError(
203                "Weights format is invalid. Expected list of (weight, bias) tuples"
204            )
205
206    def fit(self, X, y):
207        # Standardize the input features
208        X = self.scaler_.fit_transform(X)
209        # Ensure y is 2D for consistency
210        y = y.reshape(-1, 1)
211        self.y_mean_ = jnp.mean(y)
212        y = y - self.y_mean_
213        # Validate or initialize weights
214        if self.weights is not None:
215            if self._validate_weights(X.shape[1]):
216                self.params = self.weights
217        else:
218            if self.hidden_layer_sizes is None:
219                raise ValueError(
220                    "Either weights or hidden_layer_sizes must be provided"
221                )
222            self.params = initialize_params(
223                X.shape[1], self.hidden_layer_sizes, self.random_state
224            )
225        loss_fn = partial(loss, l1_ratio=self.l1_ratio, alpha=self.alpha)
226        grad_loss = jit(grad(loss_fn))  # compiled gradient evaluation function
227        perex_grads = jit(
228            vmap(grad_loss, in_axes=(None, 0, 0))
229        )  # fast per-example grads
230        # Training loop
231        for _ in range(self.max_iter):
232            grads = perex_grads(self.params, X, y)
233            # Average gradients across examples
234            grads = jax.tree_map(lambda g: jnp.mean(g, axis=0), grads)
235            # Update parameters
236            self.params = [
237                (W - self.learning_rate * dW, b - self.learning_rate * db)
238                for (W, b), (dW, db) in zip(self.params, grads)
239            ]
240        # Store final weights
241        self.weights = self.params
242        return self
243
244    def get_weights(self):
245        """Return the current weights of the model."""
246        if self.weights is None:
247            raise ValueError("No weights available. Model has not been fitted yet.")
248        return self.weights
249
250    def set_weights(self, weights):
251        """Set the weights of the model manually."""
252        self.weights = weights
253        self.params = weights
254
255    def predict(self, X):
256        X = self.scaler_.transform(X)
257        if self.params is None:
258            raise ValueError("Model has not been fitted yet.")
259        predictions = predict_internal(
260            self.params,
261            X,
262            activation_func=self.activation_name,
263            dropout=self.dropout,
264            seed=self.random_state,
265        )
266        return predictions.reshape(-1) + self.y_mean_

(Pretrained) Neural Network Regressor.

Parameters:

hidden_layer_sizes : tuple, default=(100,)
    The number of neurons in each hidden layer.
max_iter : int, default=100
    The maximum number of iterations to train the model.
learning_rate : float, default=0.01
    The learning rate for the optimizer.
l1_ratio : float, default=0.5
    The ratio of L1 regularization.
alpha : float, default=1e-6
    The regularization parameter.
activation_name : str, default="relu"
    The activation function to use.
dropout : float, default=0.0
    The dropout rate.
random_state : int, default=None
    The random state for the random number generator.
weights : list, default=None
    The weights to initialize the model with.

Attributes:

weights : list
    The weights of the model.
params : list
    The parameters of the model.
scaler_ : sklearn.preprocessing.StandardScaler
    The scaler used to standardize the input features.
y_mean_ : float
    The mean of the target variable.

Methods:

fit(X, y)
    Fit the model to the data.
predict(X)
    Predict the target variable.
get_weights()
    Get the weights of the model.
set_weights(weights)
    Set the weights of the model.
def fit(self, X, y):
206    def fit(self, X, y):
207        # Standardize the input features
208        X = self.scaler_.fit_transform(X)
209        # Ensure y is 2D for consistency
210        y = y.reshape(-1, 1)
211        self.y_mean_ = jnp.mean(y)
212        y = y - self.y_mean_
213        # Validate or initialize weights
214        if self.weights is not None:
215            if self._validate_weights(X.shape[1]):
216                self.params = self.weights
217        else:
218            if self.hidden_layer_sizes is None:
219                raise ValueError(
220                    "Either weights or hidden_layer_sizes must be provided"
221                )
222            self.params = initialize_params(
223                X.shape[1], self.hidden_layer_sizes, self.random_state
224            )
225        loss_fn = partial(loss, l1_ratio=self.l1_ratio, alpha=self.alpha)
226        grad_loss = jit(grad(loss_fn))  # compiled gradient evaluation function
227        perex_grads = jit(
228            vmap(grad_loss, in_axes=(None, 0, 0))
229        )  # fast per-example grads
230        # Training loop
231        for _ in range(self.max_iter):
232            grads = perex_grads(self.params, X, y)
233            # Average gradients across examples
234            grads = jax.tree_map(lambda g: jnp.mean(g, axis=0), grads)
235            # Update parameters
236            self.params = [
237                (W - self.learning_rate * dW, b - self.learning_rate * db)
238                for (W, b), (dW, db) in zip(self.params, grads)
239            ]
240        # Store final weights
241        self.weights = self.params
242        return self
def predict(self, X):
255    def predict(self, X):
256        X = self.scaler_.transform(X)
257        if self.params is None:
258            raise ValueError("Model has not been fitted yet.")
259        predictions = predict_internal(
260            self.params,
261            X,
262            activation_func=self.activation_name,
263            dropout=self.dropout,
264            seed=self.random_state,
265        )
266        return predictions.reshape(-1) + self.y_mean_
class NeuralNetClassifier(sklearn.base.BaseEstimator, sklearn.base.ClassifierMixin):
 10class NeuralNetClassifier(BaseEstimator, ClassifierMixin):
 11    """
 12    (Pretrained) Neural Network Classifier.
 13
 14    Parameters:
 15
 16        hidden_layer_sizes : tuple, default=(100,)
 17            The number of neurons in each hidden layer.
 18        max_iter : int, default=100
 19            The maximum number of iterations to train the model.
 20        learning_rate : float, default=0.01
 21            The learning rate for the optimizer.
 22        l1_ratio : float, default=0.5
 23            The ratio of L1 regularization.
 24        alpha : float, default=1e-6
 25            The regularization parameter.
 26        activation_name : str, default="relu"
 27            The activation function to use.
 28        dropout : float, default=0.0
 29            The dropout rate.
 30        random_state : int, default=None
 31            The random state for the random number generator.
 32        weights : list, default=None
 33            The weights to initialize the model with.
 34
 35    Attributes:
 36
 37        weights : list
 38            The weights of the model.
 39        params : list
 40            The parameters of the model.
 41        scaler_ : sklearn.preprocessing.StandardScaler
 42            The scaler used to standardize the input features.
 43        y_mean_ : float
 44            The mean of the target variable.
 45
 46    Methods:
 47
 48        fit(X, y)
 49            Fit the model to the data.
 50        predict(X)
 51            Predict the target variable.
 52        predict_proba(X)
 53            Predict the probability of the target variable.
 54        get_weights()
 55            Get the weights of the model.
 56        set_weights(weights)
 57            Set the weights of the model.
 58    """
 59
 60    def __init__(
 61        self,
 62        hidden_layer_sizes=(100,),
 63        max_iter=100,
 64        learning_rate=0.01,
 65        weights=None,
 66        l1_ratio=0.5,
 67        alpha=1e-6,
 68        activation_name="relu",
 69        dropout=0.0,
 70        random_state=None,
 71    ):
 72        self.hidden_layer_sizes = hidden_layer_sizes
 73        self.max_iter = max_iter
 74        self.learning_rate = learning_rate
 75        self.weights = weights
 76        self.l1_ratio = l1_ratio
 77        self.alpha = alpha
 78        self.activation_name = activation_name
 79        self.dropout = dropout
 80        self.random_state = random_state
 81        self.regr = None
 82
 83    def fit(self, X, y):
 84        """Fit the model to the data.
 85
 86        Parameters:
 87
 88            X: {array-like}, shape = [n_samples, n_features]
 89                Training vectors, where n_samples is the number of samples and
 90                n_features is the number of features.
 91            y: array-like, shape = [n_samples]
 92                Target values.
 93        """
 94        regressor = NeuralNetRegressor(
 95            hidden_layer_sizes=self.hidden_layer_sizes,
 96            max_iter=self.max_iter,
 97            learning_rate=self.learning_rate,
 98            weights=self.weights,
 99            l1_ratio=self.l1_ratio,
100            alpha=self.alpha,
101            activation_name=self.activation_name,
102            dropout=self.dropout,
103            random_state=self.random_state,
104        )
105        self.regr = SimpleMultitaskClassifier(regressor)
106        self.regr.fit(X, y)
107        self.classes_ = np.unique(y)
108        self.n_classes_ = len(self.classes_)
109        self.n_tasks_ = 1
110        self.n_features_in_ = X.shape[1]
111        self.n_outputs_ = 1
112        self.n_samples_fit_ = X.shape[0]
113        self.n_samples_test_ = X.shape[0]
114        self.n_features_out_ = 1
115        self.n_outputs_ = 1
116        self.n_features_in_ = X.shape[1]
117        self.n_features_out_ = 1
118        self.n_outputs_ = 1
119        return self
120
121    def predict_proba(self, X):
122        """Predict the probability of the target variable.
123
124        Parameters:
125
126            X: {array-like}, shape = [n_samples, n_features]
127                Training vectors, where n_samples is the number of samples and
128                n_features is the number of features.
129        """
130        return self.regr.predict_proba(X)
131
132    def predict(self, X):
133        """Predict the target variable.
134
135        Parameters:
136
137            X: {array-like}, shape = [n_samples, n_features]
138                Training vectors, where n_samples is the number of samples and
139                n_features is the number of features.
140        """
141        return self.regr.predict(X)

(Pretrained) Neural Network Classifier.

Parameters:

hidden_layer_sizes : tuple, default=(100,)
    The number of neurons in each hidden layer.
max_iter : int, default=100
    The maximum number of iterations to train the model.
learning_rate : float, default=0.01
    The learning rate for the optimizer.
l1_ratio : float, default=0.5
    The ratio of L1 regularization.
alpha : float, default=1e-6
    The regularization parameter.
activation_name : str, default="relu"
    The activation function to use.
dropout : float, default=0.0
    The dropout rate.
random_state : int, default=None
    The random state for the random number generator.
weights : list, default=None
    The weights to initialize the model with.

Attributes:

weights : list
    The weights of the model.
params : list
    The parameters of the model.
scaler_ : sklearn.preprocessing.StandardScaler
    The scaler used to standardize the input features.
y_mean_ : float
    The mean of the target variable.

Methods:

fit(X, y)
    Fit the model to the data.
predict(X)
    Predict the target variable.
predict_proba(X)
    Predict the probability of the target variable.
get_weights()
    Get the weights of the model.
set_weights(weights)
    Set the weights of the model.
def fit(self, X, y):
 83    def fit(self, X, y):
 84        """Fit the model to the data.
 85
 86        Parameters:
 87
 88            X: {array-like}, shape = [n_samples, n_features]
 89                Training vectors, where n_samples is the number of samples and
 90                n_features is the number of features.
 91            y: array-like, shape = [n_samples]
 92                Target values.
 93        """
 94        regressor = NeuralNetRegressor(
 95            hidden_layer_sizes=self.hidden_layer_sizes,
 96            max_iter=self.max_iter,
 97            learning_rate=self.learning_rate,
 98            weights=self.weights,
 99            l1_ratio=self.l1_ratio,
100            alpha=self.alpha,
101            activation_name=self.activation_name,
102            dropout=self.dropout,
103            random_state=self.random_state,
104        )
105        self.regr = SimpleMultitaskClassifier(regressor)
106        self.regr.fit(X, y)
107        self.classes_ = np.unique(y)
108        self.n_classes_ = len(self.classes_)
109        self.n_tasks_ = 1
110        self.n_features_in_ = X.shape[1]
111        self.n_outputs_ = 1
112        self.n_samples_fit_ = X.shape[0]
113        self.n_samples_test_ = X.shape[0]
114        self.n_features_out_ = 1
115        self.n_outputs_ = 1
116        self.n_features_in_ = X.shape[1]
117        self.n_features_out_ = 1
118        self.n_outputs_ = 1
119        return self

Fit the model to the data.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number of samples and
    n_features is the number of features.
y: array-like, shape = [n_samples]
    Target values.
def predict_proba(self, X):
121    def predict_proba(self, X):
122        """Predict the probability of the target variable.
123
124        Parameters:
125
126            X: {array-like}, shape = [n_samples, n_features]
127                Training vectors, where n_samples is the number of samples and
128                n_features is the number of features.
129        """
130        return self.regr.predict_proba(X)

Predict the probability of the target variable.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number of samples and
    n_features is the number of features.
def predict(self, X):
132    def predict(self, X):
133        """Predict the target variable.
134
135        Parameters:
136
137            X: {array-like}, shape = [n_samples, n_features]
138                Training vectors, where n_samples is the number of samples and
139                n_features is the number of features.
140        """
141        return self.regr.predict(X)

Predict the target variable.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number of samples and
    n_features is the number of features.
class PredictionInterval(sklearn.base.BaseEstimator, sklearn.base.RegressorMixin):
 19class PredictionInterval(BaseEstimator, RegressorMixin):
 20    """Class PredictionInterval: Obtain prediction intervals.
 21
 22    Attributes:
 23
 24        obj: an object;
 25            fitted object containing methods `fit` and `predict`
 26
 27        method: a string;
 28            method for constructing the prediction intervals.
 29            Currently "splitconformal" (default) and "localconformal"
 30
 31        level: a float;
 32            Confidence level for prediction intervals. Default is 95,
 33            equivalent to a miscoverage error of 5 (%)
 34
 35        replications: an integer;
 36            Number of replications for simulated conformal (default is `None`),
 37            for type_pi = "bootstrap" or "kde"
 38
 39        type_pi: a string;
 40            type of prediction interval: currently `None`
 41            (split conformal without simulation), "kde" or "bootstrap"
 42
 43        type_split: a string;
 44            "random" (random split of data) or "sequential" (sequential split of data)
 45
 46        seed: an integer;
 47            Reproducibility of fit (there's a random split between fitting and calibration data)
 48    """
 49
 50    def __init__(
 51        self,
 52        obj,
 53        method="splitconformal",
 54        level=95,
 55        type_pi=None,
 56        type_split="random",
 57        replications=None,
 58        kernel=None,
 59        agg="mean",
 60        seed=123,
 61    ):
 62
 63        self.obj = obj
 64        self.method = method
 65        self.level = level
 66        self.type_pi = type_pi
 67        self.type_split = type_split
 68        self.replications = replications
 69        self.kernel = kernel
 70        self.agg = agg
 71        self.seed = seed
 72        self.alpha_ = 1 - self.level / 100
 73        self.quantile_ = None
 74        self.icp_ = None
 75        self.calibrated_residuals_ = None
 76        self.scaled_calibrated_residuals_ = None
 77        self.calibrated_residuals_scaler_ = None
 78        self.kde_ = None
 79        self.aic_ = None 
 80        self.aicc_ = None
 81        self.bic_ = None
 82
 83    def fit(self, X, y, sample_weight=None, **kwargs):
 84        """Fit the `method` to training data (X, y).
 85
 86        Args:
 87
 88            X: array-like, shape = [n_samples, n_features];
 89                Training set vectors, where n_samples is the number
 90                of samples and n_features is the number of features.
 91
 92            y: array-like, shape = [n_samples, ]; Target values.
 93
 94            sample_weight: array-like, shape = [n_samples]
 95                Sample weights.
 96
 97        """
 98
 99        if self.type_split == "random":
100
101            X_train, X_calibration, y_train, y_calibration = train_test_split(
102                X, y, test_size=0.5, random_state=self.seed
103            )
104
105        elif self.type_split == "sequential":
106
107            n_x = X.shape[0]
108            n_x_half = n_x // 2
109            first_half_idx = range(0, n_x_half)
110            second_half_idx = range(n_x_half, n_x)
111            X_train = X[first_half_idx, :]
112            X_calibration = X[second_half_idx, :]
113            y_train = y[first_half_idx]
114            y_calibration = y[second_half_idx]
115
116        if self.method == "splitconformal":
117
118            self.obj.fit(X_train, y_train)
119            preds_calibration = self.obj.predict(X_calibration)
120            self.calibrated_residuals_ = y_calibration - preds_calibration
121            absolute_residuals = np.abs(self.calibrated_residuals_)
122            self.calibrated_residuals_scaler_ = StandardScaler(
123                with_mean=True, with_std=True
124            )
125            self.scaled_calibrated_residuals_ = (
126                self.calibrated_residuals_scaler_.fit_transform(
127                    self.calibrated_residuals_.reshape(-1, 1)
128                ).ravel()
129            )
130            try:
131                # numpy version >= 1.22
132                self.quantile_ = np.quantile(
133                    a=absolute_residuals, q=self.level / 100, method="higher"
134                )
135            except Exception:
136                # numpy version < 1.22
137                self.quantile_ = np.quantile(
138                    a=absolute_residuals,
139                    q=self.level / 100,
140                    interpolation="higher",
141                )
142
143        if self.method == "localconformal":
144
145            mad_estimator = ExtraTreesRegressor()
146            normalizer = RegressorNormalizer(self.obj, mad_estimator, AbsErrorErrFunc())
147            nc = RegressorNc(self.obj, AbsErrorErrFunc(), normalizer)
148            self.icp_ = IcpRegressor(nc)
149            self.icp_.fit(X_train, y_train)
150            self.icp_.calibrate(X_calibration, y_calibration)
151
152        return self
153
154    def predict(self, X, return_pi=False):
155        """Obtain predictions and prediction intervals
156
157        Args:
158
159            X: array-like, shape = [n_samples, n_features];
160                Testing set vectors, where n_samples is the number
161                of samples and n_features is the number of features.
162
163            return_pi: boolean
164                Whether the prediction interval is returned or not.
165                Default is False, for compatibility with other _estimators_.
166                If True, a tuple containing the predictions + lower and upper
167                bounds is returned.
168
169        """
170
171        if self.method == "splitconformal":
172            pred = self.obj.predict(X)
173
174        if self.method == "localconformal":
175            pred = self.icp_.predict(X)
176
177        if self.method == "splitconformal":
178
179            if (
180                self.replications is None and self.type_pi is None
181            ):  # type_pi is not used here, no bootstrap or kde
182
183                if return_pi:
184
185                    DescribeResult = namedtuple(
186                        "DescribeResult", ("mean", "lower", "upper")
187                    )
188                    return DescribeResult(
189                        pred, pred - self.quantile_, pred + self.quantile_
190                    )
191
192                else:
193
194                    return pred
195
196            else:  # self.method == "splitconformal" and if self.replications is not None, type_pi must be used
197
198                if self.type_pi is None:
199                    self.type_pi = "kde"
200                    raise Warning("type_pi must be set, setting to 'kde'")
201
202                if self.replications is None:
203                    self.replications = 100
204                    raise Warning("replications must be set, setting to 100")
205
206                assert self.type_pi in (
207                    "bootstrap",
208                    "kde",
209                ), "`self.type_pi` must be in ('bootstrap', 'kde')"
210
211                if self.type_pi == "bootstrap":
212                    np.random.seed(self.seed)
213                    self.residuals_sims_ = np.asarray(
214                        [
215                            np.random.choice(
216                                a=self.scaled_calibrated_residuals_,
217                                size=X.shape[0],
218                            )
219                            for _ in range(self.replications)
220                        ]
221                    ).T
222                    self.sims_ = np.asarray(
223                        [
224                            pred
225                            + self.calibrated_residuals_scaler_.scale_[0]
226                            * self.residuals_sims_[:, i].ravel()
227                            for i in range(self.replications)
228                        ]
229                    ).T
230                elif self.type_pi == "kde":
231                    self.kde_ = gaussian_kde(dataset=self.scaled_calibrated_residuals_)
232                    self.sims_ = np.asarray(
233                        [
234                            pred
235                            + self.calibrated_residuals_scaler_.scale_[0]
236                            * self.kde_.resample(
237                                size=X.shape[0], seed=self.seed + i
238                            ).ravel()
239                            for i in range(self.replications)
240                        ]
241                    ).T
242
243                self.mean_ = np.mean(self.sims_, axis=1)
244                self.lower_ = np.quantile(self.sims_, q=self.alpha_ / 200, axis=1)
245                self.upper_ = np.quantile(self.sims_, q=1 - self.alpha_ / 200, axis=1)
246
247                DescribeResult = namedtuple(
248                    "DescribeResult", ("mean", "sims", "lower", "upper")
249                )
250
251                return DescribeResult(self.mean_, self.sims_, self.lower_, self.upper_)
252
253        if self.method == "localconformal":
254
255            if self.replications is None:
256
257                if return_pi:
258
259                    predictions_bounds = self.icp_.predict(
260                        X, significance=1 - self.level
261                    )
262                    DescribeResult = namedtuple(
263                        "DescribeResult", ("mean", "lower", "upper")
264                    )
265                    return DescribeResult(
266                        pred, predictions_bounds[:, 0], predictions_bounds[:, 1]
267                    )
268
269                else:
270
271                    return pred
272
273            else:  # (self.method == "localconformal") and if self.replications is not None
274
275                raise NotImplementedError(
276                    "When self.method == 'localconformal', there are no simulations"
277                )

Class PredictionInterval: Obtain prediction intervals.

Attributes:

obj: an object;
    fitted object containing methods `fit` and `predict`

method: a string;
    method for constructing the prediction intervals.
    Currently "splitconformal" (default) and "localconformal"

level: a float;
    Confidence level for prediction intervals. Default is 95,
    equivalent to a miscoverage error of 5 (%)

replications: an integer;
    Number of replications for simulated conformal (default is `None`),
    for type_pi = "bootstrap" or "kde"

type_pi: a string;
    type of prediction interval: currently `None`
    (split conformal without simulation), "kde" or "bootstrap"

type_split: a string;
    "random" (random split of data) or "sequential" (sequential split of data)

seed: an integer;
    Reproducibility of fit (there's a random split between fitting and calibration data)
def fit(self, X, y, sample_weight=None, **kwargs):
 83    def fit(self, X, y, sample_weight=None, **kwargs):
 84        """Fit the `method` to training data (X, y).
 85
 86        Args:
 87
 88            X: array-like, shape = [n_samples, n_features];
 89                Training set vectors, where n_samples is the number
 90                of samples and n_features is the number of features.
 91
 92            y: array-like, shape = [n_samples, ]; Target values.
 93
 94            sample_weight: array-like, shape = [n_samples]
 95                Sample weights.
 96
 97        """
 98
 99        if self.type_split == "random":
100
101            X_train, X_calibration, y_train, y_calibration = train_test_split(
102                X, y, test_size=0.5, random_state=self.seed
103            )
104
105        elif self.type_split == "sequential":
106
107            n_x = X.shape[0]
108            n_x_half = n_x // 2
109            first_half_idx = range(0, n_x_half)
110            second_half_idx = range(n_x_half, n_x)
111            X_train = X[first_half_idx, :]
112            X_calibration = X[second_half_idx, :]
113            y_train = y[first_half_idx]
114            y_calibration = y[second_half_idx]
115
116        if self.method == "splitconformal":
117
118            self.obj.fit(X_train, y_train)
119            preds_calibration = self.obj.predict(X_calibration)
120            self.calibrated_residuals_ = y_calibration - preds_calibration
121            absolute_residuals = np.abs(self.calibrated_residuals_)
122            self.calibrated_residuals_scaler_ = StandardScaler(
123                with_mean=True, with_std=True
124            )
125            self.scaled_calibrated_residuals_ = (
126                self.calibrated_residuals_scaler_.fit_transform(
127                    self.calibrated_residuals_.reshape(-1, 1)
128                ).ravel()
129            )
130            try:
131                # numpy version >= 1.22
132                self.quantile_ = np.quantile(
133                    a=absolute_residuals, q=self.level / 100, method="higher"
134                )
135            except Exception:
136                # numpy version < 1.22
137                self.quantile_ = np.quantile(
138                    a=absolute_residuals,
139                    q=self.level / 100,
140                    interpolation="higher",
141                )
142
143        if self.method == "localconformal":
144
145            mad_estimator = ExtraTreesRegressor()
146            normalizer = RegressorNormalizer(self.obj, mad_estimator, AbsErrorErrFunc())
147            nc = RegressorNc(self.obj, AbsErrorErrFunc(), normalizer)
148            self.icp_ = IcpRegressor(nc)
149            self.icp_.fit(X_train, y_train)
150            self.icp_.calibrate(X_calibration, y_calibration)
151
152        return self

Fit the method to training data (X, y).

Args:

X: array-like, shape = [n_samples, n_features];
    Training set vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples, ]; Target values.

sample_weight: array-like, shape = [n_samples]
    Sample weights.
def predict(self, X, return_pi=False):
154    def predict(self, X, return_pi=False):
155        """Obtain predictions and prediction intervals
156
157        Args:
158
159            X: array-like, shape = [n_samples, n_features];
160                Testing set vectors, where n_samples is the number
161                of samples and n_features is the number of features.
162
163            return_pi: boolean
164                Whether the prediction interval is returned or not.
165                Default is False, for compatibility with other _estimators_.
166                If True, a tuple containing the predictions + lower and upper
167                bounds is returned.
168
169        """
170
171        if self.method == "splitconformal":
172            pred = self.obj.predict(X)
173
174        if self.method == "localconformal":
175            pred = self.icp_.predict(X)
176
177        if self.method == "splitconformal":
178
179            if (
180                self.replications is None and self.type_pi is None
181            ):  # type_pi is not used here, no bootstrap or kde
182
183                if return_pi:
184
185                    DescribeResult = namedtuple(
186                        "DescribeResult", ("mean", "lower", "upper")
187                    )
188                    return DescribeResult(
189                        pred, pred - self.quantile_, pred + self.quantile_
190                    )
191
192                else:
193
194                    return pred
195
196            else:  # self.method == "splitconformal" and if self.replications is not None, type_pi must be used
197
198                if self.type_pi is None:
199                    self.type_pi = "kde"
200                    raise Warning("type_pi must be set, setting to 'kde'")
201
202                if self.replications is None:
203                    self.replications = 100
204                    raise Warning("replications must be set, setting to 100")
205
206                assert self.type_pi in (
207                    "bootstrap",
208                    "kde",
209                ), "`self.type_pi` must be in ('bootstrap', 'kde')"
210
211                if self.type_pi == "bootstrap":
212                    np.random.seed(self.seed)
213                    self.residuals_sims_ = np.asarray(
214                        [
215                            np.random.choice(
216                                a=self.scaled_calibrated_residuals_,
217                                size=X.shape[0],
218                            )
219                            for _ in range(self.replications)
220                        ]
221                    ).T
222                    self.sims_ = np.asarray(
223                        [
224                            pred
225                            + self.calibrated_residuals_scaler_.scale_[0]
226                            * self.residuals_sims_[:, i].ravel()
227                            for i in range(self.replications)
228                        ]
229                    ).T
230                elif self.type_pi == "kde":
231                    self.kde_ = gaussian_kde(dataset=self.scaled_calibrated_residuals_)
232                    self.sims_ = np.asarray(
233                        [
234                            pred
235                            + self.calibrated_residuals_scaler_.scale_[0]
236                            * self.kde_.resample(
237                                size=X.shape[0], seed=self.seed + i
238                            ).ravel()
239                            for i in range(self.replications)
240                        ]
241                    ).T
242
243                self.mean_ = np.mean(self.sims_, axis=1)
244                self.lower_ = np.quantile(self.sims_, q=self.alpha_ / 200, axis=1)
245                self.upper_ = np.quantile(self.sims_, q=1 - self.alpha_ / 200, axis=1)
246
247                DescribeResult = namedtuple(
248                    "DescribeResult", ("mean", "sims", "lower", "upper")
249                )
250
251                return DescribeResult(self.mean_, self.sims_, self.lower_, self.upper_)
252
253        if self.method == "localconformal":
254
255            if self.replications is None:
256
257                if return_pi:
258
259                    predictions_bounds = self.icp_.predict(
260                        X, significance=1 - self.level
261                    )
262                    DescribeResult = namedtuple(
263                        "DescribeResult", ("mean", "lower", "upper")
264                    )
265                    return DescribeResult(
266                        pred, predictions_bounds[:, 0], predictions_bounds[:, 1]
267                    )
268
269                else:
270
271                    return pred
272
273            else:  # (self.method == "localconformal") and if self.replications is not None
274
275                raise NotImplementedError(
276                    "When self.method == 'localconformal', there are no simulations"
277                )

Obtain predictions and prediction intervals

Args:

X: array-like, shape = [n_samples, n_features];
    Testing set vectors, where n_samples is the number
    of samples and n_features is the number of features.

return_pi: boolean
    Whether the prediction interval is returned or not.
    Default is False, for compatibility with other _estimators_.
    If True, a tuple containing the predictions + lower and upper
    bounds is returned.
class SimpleMultitaskClassifier(nnetsauce.Base, sklearn.base.ClassifierMixin):
 18class SimpleMultitaskClassifier(Base, ClassifierMixin):
 19    """Multitask Classification model based on regression models, with shared covariates
 20
 21    Parameters:
 22
 23        obj: object
 24            any object (must be a regression model) containing a method fit (obj.fit())
 25            and a method predict (obj.predict())
 26
 27        seed: int
 28            reproducibility seed
 29
 30    Attributes:
 31
 32        fit_objs_: dict
 33            objects adjusted to each individual time series
 34
 35        n_classes_: int
 36            number of classes for the classifier
 37
 38    Examples:
 39
 40    ```python
 41    import nnetsauce as ns
 42    import numpy as np
 43    from sklearn.datasets import load_breast_cancer
 44    from sklearn.linear_model import LinearRegression
 45    from sklearn.model_selection import train_test_split
 46    from sklearn import metrics
 47    from time import time
 48
 49    breast_cancer = load_breast_cancer()
 50    Z = breast_cancer.data
 51    t = breast_cancer.target
 52
 53    X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2,
 54                                                        random_state=123+2*10)
 55
 56    # Linear Regression is used
 57    regr = LinearRegression()
 58    fit_obj = ns.SimpleMultitaskClassifier(regr)
 59
 60    start = time()
 61    fit_obj.fit(X_train, y_train)
 62    print(f"Elapsed {time() - start}")
 63
 64    print(fit_obj.score(X_test, y_test))
 65    print(fit_obj.score(X_test, y_test, scoring="roc_auc"))
 66
 67    start = time()
 68    preds = fit_obj.predict(X_test)
 69    print(f"Elapsed {time() - start}")
 70    print(metrics.classification_report(preds, y_test))
 71    ```
 72
 73    """
 74
 75    # construct the object -----
 76
 77    def __init__(
 78        self,
 79        obj,
 80    ):
 81        self.type_fit = "classification"
 82        self.obj = obj
 83        self.fit_objs_ = {}
 84        self.X_scaler_ = StandardScaler()
 85        self.scaled_X_ = None
 86
 87    def fit(self, X, y, sample_weight=None, **kwargs):
 88        """Fit SimpleMultitaskClassifier to training data (X, y).
 89
 90        Args:
 91
 92            X: {array-like}, shape = [n_samples, n_features]
 93                Training vectors, where n_samples is the number
 94                of samples and n_features is the number of features.
 95
 96            y: array-like, shape = [n_samples]
 97                Target values.
 98
 99            **kwargs: additional parameters to be passed to
100                    self.cook_training_set or self.obj.fit
101
102        Returns:
103
104            self: object
105
106        """
107
108        assert mx.is_factor(y), "y must contain only integers"
109
110        self.classes_ = np.unique(y)  # for compatibility with sklearn
111        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
112
113        self.scaled_X_ = self.X_scaler_.fit_transform(X)
114
115        # multitask response
116        Y = mo.one_hot_encode2(y, self.n_classes_)
117
118        try: 
119            for i in range(self.n_classes_):
120                self.fit_objs_[i] = deepcopy(
121                    self.obj.fit(self.scaled_X_, Y[:, i], sample_weight=sample_weight, **kwargs)
122                )
123        except Exception as e:
124            for i in range(self.n_classes_):
125                self.fit_objs_[i] = deepcopy(
126                    self.obj.fit(self.scaled_X_, Y[:, i], **kwargs)
127                )
128        return self
129
130    def predict(self, X, **kwargs):
131        """Predict test data X.
132
133        Args:
134
135            X: {array-like}, shape = [n_samples, n_features]
136                Training vectors, where n_samples is the number
137                of samples and n_features is the number of features.
138
139            **kwargs: additional parameters
140
141        Returns:
142
143            model predictions: {array-like}
144
145        """
146        return np.argmax(self.predict_proba(X, **kwargs), axis=1)
147
148    def predict_proba(self, X, **kwargs):
149        """Predict probabilities for test data X.
150
151        Args:
152
153            X: {array-like}, shape = [n_samples, n_features]
154                Training vectors, where n_samples is the number
155                of samples and n_features is the number of features.
156
157            **kwargs: additional parameters
158
159        Returns:
160
161            probability estimates for test data: {array-like}
162
163        """
164
165        shape_X = X.shape
166
167        probs = np.zeros((shape_X[0], self.n_classes_))
168
169        if len(shape_X) == 1: # one example
170
171            n_features = shape_X[0]
172
173            new_X = mo.rbind(
174                X.reshape(1, n_features),
175                np.ones(n_features).reshape(1, n_features),
176            )
177
178            Z = self.X_scaler_.transform(new_X, **kwargs)
179
180            # Fallback to standard model
181            for i in range(self.n_classes_):
182                probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs)[0]
183
184        else: # multiple rows
185
186            Z = self.X_scaler_.transform(X, **kwargs)
187
188            # Fallback to standard model
189            for i in range(self.n_classes_):
190                probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs)
191
192        expit_raw_probs = expit(probs)
193        
194        # Add small epsilon to avoid division by zero
195        row_sums = expit_raw_probs.sum(axis=1)[:, None]
196        row_sums[row_sums < 1e-10] = 1e-10
197        
198        return expit_raw_probs / row_sums
199
200    def decision_function(self, X, **kwargs):
201        """Compute the decision function of X.
202
203        Parameters:
204            X: {array-like}, shape = [n_samples, n_features]
205                Samples to compute decision function for.
206
207            **kwargs: additional parameters to be passed to
208                    self.cook_test_set
209
210        Returns:
211            array-like of shape (n_samples,) or (n_samples, n_classes)
212            Decision function of the input samples. The order of outputs is the same
213            as that of the classes passed to fit.
214        """
215        if not hasattr(self.obj, "decision_function"):
216            # If base classifier doesn't have decision_function, use predict_proba
217            proba = self.predict_proba(X, **kwargs)
218            if proba.shape[1] == 2:
219                return proba[:, 1]  # For binary classification
220            return proba  # For multiclass
221
222        if len(X.shape) == 1:
223            n_features = X.shape[0]
224            new_X = mo.rbind(
225                X.reshape(1, n_features),
226                np.ones(n_features).reshape(1, n_features),
227            )
228
229            return (
230                self.obj.decision_function(
231                    self.cook_test_set(new_X, **kwargs), **kwargs
232                )
233            )[0]
234
235        return self.obj.decision_function(self.cook_test_set(X, **kwargs), **kwargs)

Multitask Classification model based on regression models, with shared covariates

Parameters:

obj: object
    any object (must be a regression model) containing a method fit (obj.fit())
    and a method predict (obj.predict())

seed: int
    reproducibility seed

Attributes:

fit_objs_: dict
    objects adjusted to each individual time series

n_classes_: int
    number of classes for the classifier

Examples:

import nnetsauce as ns
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics
from time import time

breast_cancer = load_breast_cancer()
Z = breast_cancer.data
t = breast_cancer.target

X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2,
                                                    random_state=123+2*10)

# Linear Regression is used
regr = LinearRegression()
fit_obj = ns.SimpleMultitaskClassifier(regr)

start = time()
fit_obj.fit(X_train, y_train)
print(f"Elapsed {time() - start}")

print(fit_obj.score(X_test, y_test))
print(fit_obj.score(X_test, y_test, scoring="roc_auc"))

start = time()
preds = fit_obj.predict(X_test)
print(f"Elapsed {time() - start}")
print(metrics.classification_report(preds, y_test))
def fit(self, X, y, sample_weight=None, **kwargs):
 87    def fit(self, X, y, sample_weight=None, **kwargs):
 88        """Fit SimpleMultitaskClassifier to training data (X, y).
 89
 90        Args:
 91
 92            X: {array-like}, shape = [n_samples, n_features]
 93                Training vectors, where n_samples is the number
 94                of samples and n_features is the number of features.
 95
 96            y: array-like, shape = [n_samples]
 97                Target values.
 98
 99            **kwargs: additional parameters to be passed to
100                    self.cook_training_set or self.obj.fit
101
102        Returns:
103
104            self: object
105
106        """
107
108        assert mx.is_factor(y), "y must contain only integers"
109
110        self.classes_ = np.unique(y)  # for compatibility with sklearn
111        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
112
113        self.scaled_X_ = self.X_scaler_.fit_transform(X)
114
115        # multitask response
116        Y = mo.one_hot_encode2(y, self.n_classes_)
117
118        try: 
119            for i in range(self.n_classes_):
120                self.fit_objs_[i] = deepcopy(
121                    self.obj.fit(self.scaled_X_, Y[:, i], sample_weight=sample_weight, **kwargs)
122                )
123        except Exception as e:
124            for i in range(self.n_classes_):
125                self.fit_objs_[i] = deepcopy(
126                    self.obj.fit(self.scaled_X_, Y[:, i], **kwargs)
127                )
128        return self

Fit SimpleMultitaskClassifier to training data (X, y).

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

**kwargs: additional parameters to be passed to
        self.cook_training_set or self.obj.fit

Returns:

self: object
def predict(self, X, **kwargs):
130    def predict(self, X, **kwargs):
131        """Predict test data X.
132
133        Args:
134
135            X: {array-like}, shape = [n_samples, n_features]
136                Training vectors, where n_samples is the number
137                of samples and n_features is the number of features.
138
139            **kwargs: additional parameters
140
141        Returns:
142
143            model predictions: {array-like}
144
145        """
146        return np.argmax(self.predict_proba(X, **kwargs), axis=1)

Predict test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters

Returns:

model predictions: {array-like}
def predict_proba(self, X, **kwargs):
148    def predict_proba(self, X, **kwargs):
149        """Predict probabilities for test data X.
150
151        Args:
152
153            X: {array-like}, shape = [n_samples, n_features]
154                Training vectors, where n_samples is the number
155                of samples and n_features is the number of features.
156
157            **kwargs: additional parameters
158
159        Returns:
160
161            probability estimates for test data: {array-like}
162
163        """
164
165        shape_X = X.shape
166
167        probs = np.zeros((shape_X[0], self.n_classes_))
168
169        if len(shape_X) == 1: # one example
170
171            n_features = shape_X[0]
172
173            new_X = mo.rbind(
174                X.reshape(1, n_features),
175                np.ones(n_features).reshape(1, n_features),
176            )
177
178            Z = self.X_scaler_.transform(new_X, **kwargs)
179
180            # Fallback to standard model
181            for i in range(self.n_classes_):
182                probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs)[0]
183
184        else: # multiple rows
185
186            Z = self.X_scaler_.transform(X, **kwargs)
187
188            # Fallback to standard model
189            for i in range(self.n_classes_):
190                probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs)
191
192        expit_raw_probs = expit(probs)
193        
194        # Add small epsilon to avoid division by zero
195        row_sums = expit_raw_probs.sum(axis=1)[:, None]
196        row_sums[row_sums < 1e-10] = 1e-10
197        
198        return expit_raw_probs / row_sums

Predict probabilities for test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters

Returns:

probability estimates for test data: {array-like}
class Optimizer:
  9class Optimizer:
 10    """Optimizer class
 11
 12    Attributes:
 13
 14        type_optim: str
 15            type of optimizer, (currently) either 'sgd' (stochastic minibatch gradient descent)
 16            or 'scd' (stochastic minibatch coordinate descent)
 17
 18        num_iters: int
 19            number of iterations of the optimizer
 20
 21        learning_rate: float
 22            step size
 23
 24        batch_prop: float
 25            proportion of the initial data used at each optimization step
 26
 27        learning_method: str
 28            "poly" - learning rate decreasing as a polynomial function
 29            of # of iterations (default)
 30            "exp" - learning rate decreasing as an exponential function
 31            of # of iterations
 32            "momentum" - gradient descent using momentum
 33
 34        randomization: str
 35            type of randomization applied at each step
 36            "strat" - stratified subsampling (default)
 37            "shuffle" - random subsampling
 38
 39        mass: float
 40            mass on velocity, for `method` == "momentum"
 41
 42        decay: float
 43            coefficient of decrease of the learning rate for
 44            `method` == "poly" and `method` == "exp"
 45
 46        tolerance: float
 47            early stopping parameter (convergence of loss function)
 48
 49        verbose: int
 50            controls verbosity of gradient descent
 51            0 - nothing is printed
 52            1 - a progress bar is printed
 53            2 - successive loss function values are printed
 54
 55    """
 56
 57    # construct the object -----
 58
 59    def __init__(
 60        self,
 61        type_optim="sgd",
 62        num_iters=100,
 63        learning_rate=0.01,
 64        batch_prop=1.0,
 65        learning_method="momentum",
 66        randomization="strat",
 67        mass=0.9,
 68        decay=0.1,
 69        tolerance=1e-3,
 70        verbose=1,
 71    ):
 72        self.type_optim = type_optim
 73        self.num_iters = num_iters
 74        self.learning_rate = learning_rate
 75        self.batch_prop = batch_prop
 76        self.learning_method = learning_method
 77        self.randomization = randomization
 78        self.mass = mass
 79        self.decay = decay
 80        self.tolerance = tolerance
 81        self.verbose = verbose
 82        self.opt = None
 83
 84    def fit(self, loss_func, response, x0, **kwargs):
 85        """Fit GLM model to training data (X, y).
 86
 87        Args:
 88
 89            loss_func: loss function
 90
 91            response: array-like, shape = [n_samples]
 92            target variable (used for subsampling)
 93
 94            x0: array-like, shape = [n_features]
 95                initial value provided to the optimizer
 96
 97            **kwargs: additional parameters to be passed to
 98                    loss function
 99
100        Returns:
101
102            self: object
103
104        """
105
106        if self.type_optim == "scd":
107            self.results = scd(
108                loss_func,
109                response=response,
110                x=x0,
111                num_iters=self.num_iters,
112                batch_prop=self.batch_prop,
113                learning_rate=self.learning_rate,
114                learning_method=self.learning_method,
115                mass=self.mass,
116                decay=self.decay,
117                randomization=self.randomization,
118                tolerance=self.tolerance,
119                verbose=self.verbose,
120                **kwargs
121            )
122
123        if self.type_optim == "sgd":
124            self.results = sgd(
125                loss_func,
126                response=response,
127                x=x0,
128                num_iters=self.num_iters,
129                batch_prop=self.batch_prop,
130                learning_rate=self.learning_rate,
131                learning_method=self.learning_method,
132                mass=self.mass,
133                decay=self.decay,
134                randomization=self.randomization,
135                tolerance=self.tolerance,
136                verbose=self.verbose,
137                **kwargs
138            )
139
140        return self
141
142    def one_hot_encode(self, y, n_classes):
143        return one_hot_encode(y, n_classes)

Optimizer class

Attributes:

type_optim: str
    type of optimizer, (currently) either 'sgd' (stochastic minibatch gradient descent)
    or 'scd' (stochastic minibatch coordinate descent)

num_iters: int
    number of iterations of the optimizer

learning_rate: float
    step size

batch_prop: float
    proportion of the initial data used at each optimization step

learning_method: str
    "poly" - learning rate decreasing as a polynomial function
    of # of iterations (default)
    "exp" - learning rate decreasing as an exponential function
    of # of iterations
    "momentum" - gradient descent using momentum

randomization: str
    type of randomization applied at each step
    "strat" - stratified subsampling (default)
    "shuffle" - random subsampling

mass: float
    mass on velocity, for `method` == "momentum"

decay: float
    coefficient of decrease of the learning rate for
    `method` == "poly" and `method` == "exp"

tolerance: float
    early stopping parameter (convergence of loss function)

verbose: int
    controls verbosity of gradient descent
    0 - nothing is printed
    1 - a progress bar is printed
    2 - successive loss function values are printed
def fit(self, loss_func, response, x0, **kwargs):
 84    def fit(self, loss_func, response, x0, **kwargs):
 85        """Fit GLM model to training data (X, y).
 86
 87        Args:
 88
 89            loss_func: loss function
 90
 91            response: array-like, shape = [n_samples]
 92            target variable (used for subsampling)
 93
 94            x0: array-like, shape = [n_features]
 95                initial value provided to the optimizer
 96
 97            **kwargs: additional parameters to be passed to
 98                    loss function
 99
100        Returns:
101
102            self: object
103
104        """
105
106        if self.type_optim == "scd":
107            self.results = scd(
108                loss_func,
109                response=response,
110                x=x0,
111                num_iters=self.num_iters,
112                batch_prop=self.batch_prop,
113                learning_rate=self.learning_rate,
114                learning_method=self.learning_method,
115                mass=self.mass,
116                decay=self.decay,
117                randomization=self.randomization,
118                tolerance=self.tolerance,
119                verbose=self.verbose,
120                **kwargs
121            )
122
123        if self.type_optim == "sgd":
124            self.results = sgd(
125                loss_func,
126                response=response,
127                x=x0,
128                num_iters=self.num_iters,
129                batch_prop=self.batch_prop,
130                learning_rate=self.learning_rate,
131                learning_method=self.learning_method,
132                mass=self.mass,
133                decay=self.decay,
134                randomization=self.randomization,
135                tolerance=self.tolerance,
136                verbose=self.verbose,
137                **kwargs
138            )
139
140        return self

Fit GLM model to training data (X, y).

Args:

loss_func: loss function

response: array-like, shape = [n_samples]
target variable (used for subsampling)

x0: array-like, shape = [n_features]
    initial value provided to the optimizer

**kwargs: additional parameters to be passed to
        loss function

Returns:

self: object
class QuantileRegressor(sklearn.base.BaseEstimator, sklearn.base.RegressorMixin):
 37class QuantileRegressor(BaseEstimator, RegressorMixin):
 38    """
 39    Quantile Regressor.
 40
 41    Parameters:
 42
 43        obj: base model (regression model)
 44            The base regressor from which to build a
 45            quantile regressor.
 46
 47        level: int, default=95
 48            The level of the quantiles to compute.
 49
 50        scoring: str, default="predictions"
 51            The scoring to use for the optimization and constructing
 52            prediction intervals (predictions, residuals, conformal,
 53              studentized, conformal-studentized).
 54
 55    Attributes:
 56
 57        obj_ : base model (regression model)
 58            The base regressor from which to build a
 59            quantile regressor.
 60
 61        offset_multipliers_ : list
 62            The multipliers for the offset.
 63
 64        scoring_residuals_ : list
 65            The residuals for the scoring.
 66
 67        student_multiplier_ : float
 68            The multiplier for the student.
 69
 70
 71    """
 72
 73    def __init__(self, obj, level=95, scoring="predictions"):
 74        assert scoring in (
 75            "predictions",
 76            "residuals",
 77            "conformal",
 78            "studentized",
 79            "conformal-studentized",
 80        ), "scoring must be 'predictions' or 'residuals'"
 81        self.obj = obj
 82        low_risk_level = (1 - level / 100) / 2
 83        self.quantiles = [low_risk_level, 0.5, 1 - low_risk_level]
 84        self.scoring = scoring
 85        self.offset_multipliers_ = None
 86        self.obj_ = None
 87        self.scoring_residuals_ = None
 88        self.student_multiplier_ = None
 89
 90    def _compute_quantile_loss(self, residuals: np.ndarray, quantile: float) -> float:
 91        """
 92        Compute the quantile loss for a given set of residuals and quantile.
 93        """
 94        if not 0 < quantile < 1:
 95            raise ValueError("Quantile should be between 0 and 1.")
 96        loss = quantile * (residuals >= 0) + (quantile - 1) * (residuals < 0)
 97        return np.mean(residuals * loss)
 98
 99    def _optimize_multiplier(
100        self,
101        y: np.ndarray,
102        base_predictions: np.ndarray,
103        prev_predictions: np.ndarray = None,
104        scoring_residuals: np.ndarray = None,
105        quantile: float = 0.5,
106    ) -> float:
107        """
108        Optimize the multiplier for a given quantile.
109        """
110        if not 0 < quantile < 1:
111            raise ValueError("Quantile should be between 0 and 1.")
112
113        def objective(log_multiplier):
114            """
115            Objective function for optimization.
116            """
117            # Convert to positive multiplier using exp
118            multiplier = np.exp(log_multiplier[0])
119            if self.scoring == "predictions":
120                assert base_predictions is not None, "base_predictions must be not None"
121                # Calculate predictions
122                if prev_predictions is None:
123                    # For first quantile, subtract from conditional expectation
124                    predictions = base_predictions - multiplier * np.abs(
125                        base_predictions
126                    )
127                else:
128                    # For other quantiles, add to previous quantile
129                    offset = multiplier * np.abs(base_predictions)
130                    predictions = prev_predictions + offset
131            elif self.scoring in ("residuals", "conformal"):
132                assert (
133                    scoring_residuals is not None
134                ), "scoring_residuals must be not None"
135                # print("scoring_residuals", scoring_residuals)
136                # Calculate predictions
137                if prev_predictions is None:
138                    # For first quantile, subtract from conditional expectation
139                    predictions = base_predictions - multiplier * np.std(
140                        scoring_residuals
141                    )
142                    # print("predictions", predictions)
143                else:
144                    # For other quantiles, add to previous quantile
145                    offset = multiplier * np.std(scoring_residuals)
146                    predictions = prev_predictions + offset
147            elif self.scoring in ("studentized", "conformal-studentized"):
148                assert (
149                    scoring_residuals is not None
150                ), "scoring_residuals must be not None"
151                # Calculate predictions
152                if prev_predictions is None:
153                    # For first quantile, subtract from conditional expectation
154                    predictions = (
155                        base_predictions - multiplier * self.student_multiplier_
156                    )
157                    # print("predictions", predictions)
158                else:
159                    # For other quantiles, add to previous quantile
160                    offset = multiplier * self.student_multiplier_
161                    predictions = prev_predictions + offset
162            else:
163                raise ValueError("Invalid argument 'scoring'")
164
165            residuals = y - predictions
166            return self._compute_quantile_loss(residuals, quantile)
167
168        # Optimize in log space for numerical stability
169        # bounds = [(-10, 10)]  # log space bounds
170        bounds = [(-100, 100)]  # log space bounds
171        result = differential_evolution(
172            objective,
173            bounds,
174            # popsize=15,
175            # maxiter=100,
176            # tol=1e-4,
177            popsize=25,
178            maxiter=200,
179            tol=1e-6,
180            disp=False,
181        )
182
183        return np.exp(result.x[0])
184
185    def fit(self, X: np.ndarray, y: np.ndarray):
186        """Fit the model to the data.
187
188        Parameters:
189
190            X: {array-like}, shape = [n_samples, n_features]
191                Training vectors, where n_samples is the number of samples and
192                n_features is the number of features.
193            y: array-like, shape = [n_samples]
194                Target values.
195        """
196        self.obj_ = clone(self.obj)
197        if self.scoring in ("predictions", "residuals"):
198            self.obj_.fit(X, y)
199            base_predictions = self.obj_.predict(X)
200            scoring_residuals = y - base_predictions
201            self.scoring_residuals_ = scoring_residuals
202        elif self.scoring == "conformal":
203            X_train, X_calib, y_train, y_calib = train_test_split(
204                X, y, test_size=0.5, random_state=42
205            )
206            self.obj_.fit(X_train, y_train)
207            scoring_residuals = y_calib - self.obj_.predict(
208                X_calib
209            )  # These are calibration predictions
210            self.scoring_residuals_ = scoring_residuals
211            # Update base_predictions to use training predictions for optimization
212            self.obj_.fit(X_calib, y_calib)
213            base_predictions = self.obj_.predict(X_calib)
214        elif self.scoring in ("studentized", "conformal-studentized"):
215            # Calculate student multiplier
216            if self.scoring == "conformal-studentized":
217                X_train, X_calib, y_train, y_calib = train_test_split(
218                    X, y, test_size=0.5, random_state=42
219                )
220                self.obj_.fit(X_train, y_train)
221                scoring_residuals = y_calib - self.obj_.predict(X_calib)
222                # Calculate studentized multiplier using calibration data
223                self.student_multiplier_ = np.std(y_calib, ddof=1) / np.sqrt(
224                    len(y_calib)
225                )
226                self.obj_.fit(X_calib, y_calib)
227                base_predictions = self.obj_.predict(X_calib)
228            else:  # regular studentized
229                self.obj_.fit(X, y)
230                base_predictions = self.obj_.predict(X)
231                scoring_residuals = y - base_predictions
232                self.student_multiplier_ = np.std(y, ddof=1) / np.sqrt(len(y))
233
234        # Initialize storage for multipliers
235        self.offset_multipliers_ = []
236        # Keep track of current predictions for each quantile
237        current_predictions = None
238
239        # Fit each quantile sequentially
240        for i, quantile in enumerate(self.quantiles):
241            if self.scoring == "predictions":
242                multiplier = self._optimize_multiplier(
243                    y=y,
244                    base_predictions=base_predictions,
245                    prev_predictions=current_predictions,
246                    quantile=quantile,
247                )
248
249                self.offset_multipliers_.append(multiplier)
250
251                # Update current predictions
252                if current_predictions is None:
253                    # First quantile (lowest)
254                    current_predictions = base_predictions - multiplier * np.abs(
255                        base_predictions
256                    )
257                else:
258                    # Subsequent quantiles
259                    offset = multiplier * np.abs(base_predictions)
260                    current_predictions = current_predictions + offset
261            elif self.scoring == "residuals":
262                multiplier = self._optimize_multiplier(
263                    y=y,
264                    base_predictions=base_predictions,
265                    scoring_residuals=scoring_residuals,
266                    prev_predictions=current_predictions,
267                    quantile=quantile,
268                )
269
270                self.offset_multipliers_.append(multiplier)
271
272                # Update current predictions
273                if current_predictions is None:
274                    # First quantile (lowest)
275                    current_predictions = base_predictions - multiplier * np.std(
276                        scoring_residuals
277                    )
278                else:
279                    # Subsequent quantiles
280                    offset = multiplier * np.std(scoring_residuals)
281                    current_predictions = current_predictions + offset
282            elif self.scoring == "conformal":
283                multiplier = self._optimize_multiplier(
284                    y=y_calib,
285                    base_predictions=base_predictions,
286                    scoring_residuals=scoring_residuals,
287                    prev_predictions=current_predictions,
288                    quantile=quantile,
289                )
290
291                self.offset_multipliers_.append(multiplier)
292
293                # Update current predictions
294                if current_predictions is None:
295                    # First quantile (lowest)
296                    current_predictions = base_predictions - multiplier * np.std(
297                        scoring_residuals
298                    )
299                else:
300                    # Subsequent quantiles
301                    offset = multiplier * np.std(scoring_residuals)
302                    current_predictions = current_predictions + offset
303            elif self.scoring in ("studentized", "conformal-studentized"):
304                multiplier = self._optimize_multiplier(
305                    y=y_calib if self.scoring == "conformal-studentized" else y,
306                    base_predictions=base_predictions,
307                    scoring_residuals=scoring_residuals,
308                    prev_predictions=current_predictions,
309                    quantile=quantile,
310                )
311
312                self.offset_multipliers_.append(multiplier)
313
314                # Update current predictions
315                if current_predictions is None:
316                    current_predictions = (
317                        base_predictions - multiplier * self.student_multiplier_
318                    )
319                else:
320                    offset = multiplier * self.student_multiplier_
321                    current_predictions = current_predictions + offset
322
323        return self
324
325    def predict(self, X, return_pi=False):
326        """Predict the target variable.
327
328        Parameters:
329
330            X: {array-like}, shape = [n_samples, n_features]
331                Training vectors, where n_samples is the number of samples and
332                n_features is the number of features.
333
334            return_pi: bool, default=True
335                Whether to return the prediction intervals.
336        """
337        if self.obj_ is None or self.offset_multipliers_ is None:
338            raise ValueError("Model not fitted yet.")
339
340        base_predictions = self.obj_.predict(X)
341        all_predictions = []
342
343        if self.scoring == "predictions":
344
345            # Generate first quantile
346            current_predictions = base_predictions - self.offset_multipliers_[
347                0
348            ] * np.abs(base_predictions)
349            all_predictions.append(current_predictions)
350
351            # Generate remaining quantiles
352            for multiplier in self.offset_multipliers_[1:]:
353                offset = multiplier * np.abs(base_predictions)
354                current_predictions = current_predictions + offset
355                all_predictions.append(current_predictions)
356
357        elif self.scoring in ("residuals", "conformal"):
358
359            # Generate first quantile
360            current_predictions = base_predictions - self.offset_multipliers_[
361                0
362            ] * np.std(self.scoring_residuals_)
363            all_predictions.append(current_predictions)
364
365            # Generate remaining quantiles
366            for multiplier in self.offset_multipliers_[1:]:
367                offset = multiplier * np.std(self.scoring_residuals_)
368                current_predictions = current_predictions + offset
369                all_predictions.append(current_predictions)
370
371        elif self.scoring in ("studentized", "conformal-studentized"):
372            # Generate first quantile
373            current_predictions = (
374                base_predictions
375                - self.offset_multipliers_[0] * self.student_multiplier_
376            )
377            all_predictions.append(current_predictions)
378
379            # Generate remaining quantiles
380            for multiplier in self.offset_multipliers_[1:]:
381                offset = multiplier * self.student_multiplier_
382                current_predictions = current_predictions + offset
383                all_predictions.append(current_predictions)
384
385        if return_pi == False:
386            return np.asarray(all_predictions[1])
387
388        DescribeResult = namedtuple(
389            "DecribeResult", ["mean", "lower", "upper", "median"]
390        )
391        DescribeResult.mean = base_predictions
392        DescribeResult.lower = np.asarray(all_predictions[0])
393        DescribeResult.median = np.asarray(all_predictions[1])
394        DescribeResult.upper = np.asarray(all_predictions[2])
395        return DescribeResult

Quantile Regressor.

Parameters:

obj: base model (regression model)
    The base regressor from which to build a
    quantile regressor.

level: int, default=95
    The level of the quantiles to compute.

scoring: str, default="predictions"
    The scoring to use for the optimization and constructing
    prediction intervals (predictions, residuals, conformal,
      studentized, conformal-studentized).

Attributes:

obj_ : base model (regression model)
    The base regressor from which to build a
    quantile regressor.

offset_multipliers_ : list
    The multipliers for the offset.

scoring_residuals_ : list
    The residuals for the scoring.

student_multiplier_ : float
    The multiplier for the student.
def fit(self, X: numpy.ndarray, y: numpy.ndarray):
185    def fit(self, X: np.ndarray, y: np.ndarray):
186        """Fit the model to the data.
187
188        Parameters:
189
190            X: {array-like}, shape = [n_samples, n_features]
191                Training vectors, where n_samples is the number of samples and
192                n_features is the number of features.
193            y: array-like, shape = [n_samples]
194                Target values.
195        """
196        self.obj_ = clone(self.obj)
197        if self.scoring in ("predictions", "residuals"):
198            self.obj_.fit(X, y)
199            base_predictions = self.obj_.predict(X)
200            scoring_residuals = y - base_predictions
201            self.scoring_residuals_ = scoring_residuals
202        elif self.scoring == "conformal":
203            X_train, X_calib, y_train, y_calib = train_test_split(
204                X, y, test_size=0.5, random_state=42
205            )
206            self.obj_.fit(X_train, y_train)
207            scoring_residuals = y_calib - self.obj_.predict(
208                X_calib
209            )  # These are calibration predictions
210            self.scoring_residuals_ = scoring_residuals
211            # Update base_predictions to use training predictions for optimization
212            self.obj_.fit(X_calib, y_calib)
213            base_predictions = self.obj_.predict(X_calib)
214        elif self.scoring in ("studentized", "conformal-studentized"):
215            # Calculate student multiplier
216            if self.scoring == "conformal-studentized":
217                X_train, X_calib, y_train, y_calib = train_test_split(
218                    X, y, test_size=0.5, random_state=42
219                )
220                self.obj_.fit(X_train, y_train)
221                scoring_residuals = y_calib - self.obj_.predict(X_calib)
222                # Calculate studentized multiplier using calibration data
223                self.student_multiplier_ = np.std(y_calib, ddof=1) / np.sqrt(
224                    len(y_calib)
225                )
226                self.obj_.fit(X_calib, y_calib)
227                base_predictions = self.obj_.predict(X_calib)
228            else:  # regular studentized
229                self.obj_.fit(X, y)
230                base_predictions = self.obj_.predict(X)
231                scoring_residuals = y - base_predictions
232                self.student_multiplier_ = np.std(y, ddof=1) / np.sqrt(len(y))
233
234        # Initialize storage for multipliers
235        self.offset_multipliers_ = []
236        # Keep track of current predictions for each quantile
237        current_predictions = None
238
239        # Fit each quantile sequentially
240        for i, quantile in enumerate(self.quantiles):
241            if self.scoring == "predictions":
242                multiplier = self._optimize_multiplier(
243                    y=y,
244                    base_predictions=base_predictions,
245                    prev_predictions=current_predictions,
246                    quantile=quantile,
247                )
248
249                self.offset_multipliers_.append(multiplier)
250
251                # Update current predictions
252                if current_predictions is None:
253                    # First quantile (lowest)
254                    current_predictions = base_predictions - multiplier * np.abs(
255                        base_predictions
256                    )
257                else:
258                    # Subsequent quantiles
259                    offset = multiplier * np.abs(base_predictions)
260                    current_predictions = current_predictions + offset
261            elif self.scoring == "residuals":
262                multiplier = self._optimize_multiplier(
263                    y=y,
264                    base_predictions=base_predictions,
265                    scoring_residuals=scoring_residuals,
266                    prev_predictions=current_predictions,
267                    quantile=quantile,
268                )
269
270                self.offset_multipliers_.append(multiplier)
271
272                # Update current predictions
273                if current_predictions is None:
274                    # First quantile (lowest)
275                    current_predictions = base_predictions - multiplier * np.std(
276                        scoring_residuals
277                    )
278                else:
279                    # Subsequent quantiles
280                    offset = multiplier * np.std(scoring_residuals)
281                    current_predictions = current_predictions + offset
282            elif self.scoring == "conformal":
283                multiplier = self._optimize_multiplier(
284                    y=y_calib,
285                    base_predictions=base_predictions,
286                    scoring_residuals=scoring_residuals,
287                    prev_predictions=current_predictions,
288                    quantile=quantile,
289                )
290
291                self.offset_multipliers_.append(multiplier)
292
293                # Update current predictions
294                if current_predictions is None:
295                    # First quantile (lowest)
296                    current_predictions = base_predictions - multiplier * np.std(
297                        scoring_residuals
298                    )
299                else:
300                    # Subsequent quantiles
301                    offset = multiplier * np.std(scoring_residuals)
302                    current_predictions = current_predictions + offset
303            elif self.scoring in ("studentized", "conformal-studentized"):
304                multiplier = self._optimize_multiplier(
305                    y=y_calib if self.scoring == "conformal-studentized" else y,
306                    base_predictions=base_predictions,
307                    scoring_residuals=scoring_residuals,
308                    prev_predictions=current_predictions,
309                    quantile=quantile,
310                )
311
312                self.offset_multipliers_.append(multiplier)
313
314                # Update current predictions
315                if current_predictions is None:
316                    current_predictions = (
317                        base_predictions - multiplier * self.student_multiplier_
318                    )
319                else:
320                    offset = multiplier * self.student_multiplier_
321                    current_predictions = current_predictions + offset
322
323        return self

Fit the model to the data.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number of samples and
    n_features is the number of features.
y: array-like, shape = [n_samples]
    Target values.
def predict(self, X, return_pi=False):
325    def predict(self, X, return_pi=False):
326        """Predict the target variable.
327
328        Parameters:
329
330            X: {array-like}, shape = [n_samples, n_features]
331                Training vectors, where n_samples is the number of samples and
332                n_features is the number of features.
333
334            return_pi: bool, default=True
335                Whether to return the prediction intervals.
336        """
337        if self.obj_ is None or self.offset_multipliers_ is None:
338            raise ValueError("Model not fitted yet.")
339
340        base_predictions = self.obj_.predict(X)
341        all_predictions = []
342
343        if self.scoring == "predictions":
344
345            # Generate first quantile
346            current_predictions = base_predictions - self.offset_multipliers_[
347                0
348            ] * np.abs(base_predictions)
349            all_predictions.append(current_predictions)
350
351            # Generate remaining quantiles
352            for multiplier in self.offset_multipliers_[1:]:
353                offset = multiplier * np.abs(base_predictions)
354                current_predictions = current_predictions + offset
355                all_predictions.append(current_predictions)
356
357        elif self.scoring in ("residuals", "conformal"):
358
359            # Generate first quantile
360            current_predictions = base_predictions - self.offset_multipliers_[
361                0
362            ] * np.std(self.scoring_residuals_)
363            all_predictions.append(current_predictions)
364
365            # Generate remaining quantiles
366            for multiplier in self.offset_multipliers_[1:]:
367                offset = multiplier * np.std(self.scoring_residuals_)
368                current_predictions = current_predictions + offset
369                all_predictions.append(current_predictions)
370
371        elif self.scoring in ("studentized", "conformal-studentized"):
372            # Generate first quantile
373            current_predictions = (
374                base_predictions
375                - self.offset_multipliers_[0] * self.student_multiplier_
376            )
377            all_predictions.append(current_predictions)
378
379            # Generate remaining quantiles
380            for multiplier in self.offset_multipliers_[1:]:
381                offset = multiplier * self.student_multiplier_
382                current_predictions = current_predictions + offset
383                all_predictions.append(current_predictions)
384
385        if return_pi == False:
386            return np.asarray(all_predictions[1])
387
388        DescribeResult = namedtuple(
389            "DecribeResult", ["mean", "lower", "upper", "median"]
390        )
391        DescribeResult.mean = base_predictions
392        DescribeResult.lower = np.asarray(all_predictions[0])
393        DescribeResult.median = np.asarray(all_predictions[1])
394        DescribeResult.upper = np.asarray(all_predictions[2])
395        return DescribeResult

Predict the target variable.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number of samples and
    n_features is the number of features.

return_pi: bool, default=True
    Whether to return the prediction intervals.
class QuantileClassifier(sklearn.base.BaseEstimator, sklearn.base.ClassifierMixin):
41class QuantileClassifier(BaseEstimator, ClassifierMixin):
42    """
43    Quantile Classifier.
44
45    Parameters:
46
47        obj: base model (classification model)
48            The base classifier from which to build a
49            quantile classifier.
50
51        level: int, default=95
52            The level of the quantiles to compute.
53
54        scoring: str, default="predictions"
55            The scoring to use for the optimization and constructing
56            prediction intervals (predictions, residuals, conformal,
57              studentized, conformal-studentized).
58
59    Attributes:
60
61        obj_ : base model (classification model)
62            The base classifier from which to build a
63            quantile classifier.
64
65        offset_multipliers_ : list
66            The multipliers for the offset.
67
68        scoring_residuals_ : list
69            The residuals for the scoring.
70
71        student_multiplier_ : float
72            The multiplier for the student.
73
74
75    """
76
77    def __init__(self, obj, level=95, scoring="predictions"):
78        assert scoring in (
79            "predictions",
80            "residuals",
81            "conformal",
82            "studentized",
83            "conformal-studentized",
84        ), "scoring must be 'predictions' or 'residuals'"
85        self.obj = obj
86        quantileregressor = QuantileRegressor(self.obj)
87        quantileregressor.predict = partial(quantileregressor.predict, return_pi=False)
88        self.obj_ = SimpleMultitaskClassifier(quantileregressor)
89
90    def fit(self, X, y, **kwargs):
91        self.obj_.fit(X, y, **kwargs)
92
93    def predict(self, X, **kwargs):
94        return self.obj_.predict(X, **kwargs)
95
96    def predict_proba(self, X, **kwargs):
97        return self.obj_.predict_proba(X, **kwargs)

Quantile Classifier.

Parameters:

obj: base model (classification model)
    The base classifier from which to build a
    quantile classifier.

level: int, default=95
    The level of the quantiles to compute.

scoring: str, default="predictions"
    The scoring to use for the optimization and constructing
    prediction intervals (predictions, residuals, conformal,
      studentized, conformal-studentized).

Attributes:

obj_ : base model (classification model)
    The base classifier from which to build a
    quantile classifier.

offset_multipliers_ : list
    The multipliers for the offset.

scoring_residuals_ : list
    The residuals for the scoring.

student_multiplier_ : float
    The multiplier for the student.
def fit(self, X, y, **kwargs):
90    def fit(self, X, y, **kwargs):
91        self.obj_.fit(X, y, **kwargs)
def predict(self, X, **kwargs):
93    def predict(self, X, **kwargs):
94        return self.obj_.predict(X, **kwargs)
def predict_proba(self, X, **kwargs):
96    def predict_proba(self, X, **kwargs):
97        return self.obj_.predict_proba(X, **kwargs)
class RandomBagRegressor(nnetsauce.randombag.bag.RandomBag, sklearn.base.RegressorMixin):
 18class RandomBagRegressor(RandomBag, RegressorMixin):
 19    """Randomized 'Bagging' Regression model
 20
 21    Parameters:
 22
 23        obj: object
 24            any object containing a method fit (obj.fit()) and a method predict
 25            (obj.predict())
 26
 27        n_estimators: int
 28            number of boosting iterations
 29
 30        n_hidden_features: int
 31            number of nodes in the hidden layer
 32
 33        activation_name: str
 34            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
 35
 36        a: float
 37            hyperparameter for 'prelu' or 'elu' activation function
 38
 39        nodes_sim: str
 40            type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
 41            'uniform'
 42
 43        bias: boolean
 44            indicates if the hidden layer contains a bias term (True) or not
 45            (False)
 46
 47        dropout: float
 48            regularization parameter; (random) percentage of nodes dropped out
 49            of the training
 50
 51        direct_link: boolean
 52            indicates if the original predictors are included (True) in model''s
 53            fitting or not (False)
 54
 55        n_clusters: int
 56            number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
 57                no clustering)
 58
 59        cluster_encode: bool
 60            defines how the variable containing clusters is treated (default is one-hot)
 61            if `False`, then labels are used, without one-hot encoding
 62
 63        type_clust: str
 64            type of clustering method: currently k-means ('kmeans') or Gaussian
 65            Mixture Model ('gmm')
 66
 67        type_scaling: a tuple of 3 strings
 68            scaling methods for inputs, hidden layer, and clustering respectively
 69            (and when relevant).
 70            Currently available: standardization ('std') or MinMax scaling ('minmax')
 71
 72        col_sample: float
 73            percentage of covariates randomly chosen for training
 74
 75        row_sample: float
 76            percentage of rows chosen for training, by stratified bootstrapping
 77
 78        seed: int
 79            reproducibility seed for nodes_sim=='uniform'
 80
 81        backend: str
 82            "cpu" or "gpu" or "tpu"
 83
 84    Attributes:
 85
 86        voter_: dict
 87            dictionary containing all the fitted base-learners
 88
 89
 90    Examples:
 91
 92    ```python
 93    import numpy as np
 94    import nnetsauce as ns
 95    from sklearn.datasets import fetch_california_housing
 96    from sklearn.tree import DecisionTreeRegressor
 97    from sklearn.model_selection import train_test_split
 98
 99    X, y = fetch_california_housing(return_X_y=True, as_frame=False)
100
101    # split data into training test and test set
102    X_train, X_test, y_train, y_test = train_test_split(X, y,
103                                                        test_size=0.2, random_state=13)
104
105    # Requires further tuning
106    obj = DecisionTreeRegressor(max_depth=3, random_state=123)
107    obj2 = ns.RandomBagRegressor(obj=obj, direct_link=False,
108                                n_estimators=50,
109                                col_sample=0.9, row_sample=0.9,
110                                dropout=0, n_clusters=0, verbose=1)
111
112    obj2.fit(X_train, y_train)
113
114    print(np.sqrt(obj2.score(X_test, y_test))) # RMSE
115
116    ```
117
118    """
119
120    # construct the object -----
121
122    def __init__(
123        self,
124        obj,
125        n_estimators=10,
126        n_hidden_features=1,
127        activation_name="relu",
128        a=0.01,
129        nodes_sim="sobol",
130        bias=True,
131        dropout=0,
132        direct_link=False,
133        n_clusters=2,
134        cluster_encode=True,
135        type_clust="kmeans",
136        type_scaling=("std", "std", "std"),
137        col_sample=1,
138        row_sample=1,
139        n_jobs=None,
140        seed=123,
141        verbose=1,
142        backend="cpu",
143    ):
144        super().__init__(
145            obj=obj,
146            n_estimators=n_estimators,
147            n_hidden_features=n_hidden_features,
148            activation_name=activation_name,
149            a=a,
150            nodes_sim=nodes_sim,
151            bias=bias,
152            dropout=dropout,
153            direct_link=direct_link,
154            n_clusters=n_clusters,
155            cluster_encode=cluster_encode,
156            type_clust=type_clust,
157            type_scaling=type_scaling,
158            col_sample=col_sample,
159            row_sample=row_sample,
160            seed=seed,
161            backend=backend,
162        )
163
164        self.type_fit = "regression"
165        self.verbose = verbose
166        self.n_jobs = n_jobs
167        self.voter_ = {}
168
169    def fit(self, X, y, **kwargs):
170        """Fit Random 'Bagging' model to training data (X, y).
171
172        Args:
173
174            X: {array-like}, shape = [n_samples, n_features]
175                Training vectors, where n_samples is the number
176                of samples and n_features is the number of features.
177
178            y: array-like, shape = [n_samples]
179                Target values.
180
181            **kwargs: additional parameters to be passed to
182                    self.cook_training_set or self.obj.fit
183
184        Returns:
185
186            self: object
187
188        """
189
190        base_learner = CustomRegressor(
191            self.obj,
192            n_hidden_features=self.n_hidden_features,
193            activation_name=self.activation_name,
194            a=self.a,
195            nodes_sim=self.nodes_sim,
196            bias=self.bias,
197            dropout=self.dropout,
198            direct_link=self.direct_link,
199            n_clusters=self.n_clusters,
200            type_clust=self.type_clust,
201            type_scaling=self.type_scaling,
202            col_sample=self.col_sample,
203            row_sample=self.row_sample,
204            seed=self.seed,
205        )
206
207        # 1 - Sequential training -----
208
209        if self.n_jobs is None:
210            self.voter_ = rbagloop_regression(
211                base_learner, X, y, self.n_estimators, self.verbose, self.seed
212            )
213
214            self.n_estimators = len(self.voter_)
215
216            return self
217
218        # 2 - Parallel training -----
219        # buggy
220        # if self.n_jobs is not None:
221        def fit_estimators(m):
222            base_learner__ = deepcopy(base_learner)
223            base_learner__.set_params(seed=self.seed + m * 1000)
224            base_learner__.fit(X, y, **kwargs)
225            return base_learner__
226
227        if self.verbose == 1:
228            voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")(
229                delayed(fit_estimators)(m) for m in tqdm(range(self.n_estimators))
230            )
231        else:
232            voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")(
233                delayed(fit_estimators)(m) for m in range(self.n_estimators)
234            )
235
236        self.voter_ = {i: elt for i, elt in enumerate(voters_list)}
237
238        self.n_estimators = len(self.voter_)
239
240        return self
241
242    def predict(self, X, weights=None, **kwargs):
243        """Predict for test data X.
244
245        Args:
246
247            X: {array-like}, shape = [n_samples, n_features]
248                Training vectors, where n_samples is the number
249                of samples and n_features is the number of features.
250
251            **kwargs: additional parameters to be passed to
252                    self.cook_test_set
253
254        Returns:
255
256            estimates for test data: {array-like}
257
258        """
259
260        def calculate_preds(voter, weights=None):
261            ensemble_preds = 0
262
263            n_iter = len(voter)
264
265            assert n_iter > 0, "no estimator found in `RandomBag` ensemble"
266
267            if weights is None:
268                for idx, elt in voter.items():
269                    ensemble_preds += elt.predict(X)
270
271                return ensemble_preds / n_iter
272
273            # if weights is not None:
274            for idx, elt in voter.items():
275                ensemble_preds += weights[idx] * elt.predict(X)
276
277            return ensemble_preds
278
279        # end calculate_preds ----
280
281        if weights is None:
282            return calculate_preds(self.voter_)
283
284        # if weights is not None:
285        self.weights = weights
286
287        return calculate_preds(self.voter_, weights)

Randomized 'Bagging' Regression model

Parameters:

obj: object
    any object containing a method fit (obj.fit()) and a method predict
    (obj.predict())

n_estimators: int
    number of boosting iterations

n_hidden_features: int
    number of nodes in the hidden layer

activation_name: str
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'

a: float
    hyperparameter for 'prelu' or 'elu' activation function

nodes_sim: str
    type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
    'uniform'

bias: boolean
    indicates if the hidden layer contains a bias term (True) or not
    (False)

dropout: float
    regularization parameter; (random) percentage of nodes dropped out
    of the training

direct_link: boolean
    indicates if the original predictors are included (True) in model''s
    fitting or not (False)

n_clusters: int
    number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
        no clustering)

cluster_encode: bool
    defines how the variable containing clusters is treated (default is one-hot)
    if `False`, then labels are used, without one-hot encoding

type_clust: str
    type of clustering method: currently k-means ('kmeans') or Gaussian
    Mixture Model ('gmm')

type_scaling: a tuple of 3 strings
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax')

col_sample: float
    percentage of covariates randomly chosen for training

row_sample: float
    percentage of rows chosen for training, by stratified bootstrapping

seed: int
    reproducibility seed for nodes_sim=='uniform'

backend: str
    "cpu" or "gpu" or "tpu"

Attributes:

voter_: dict
    dictionary containing all the fitted base-learners

Examples:

import numpy as np
import nnetsauce as ns
from sklearn.datasets import fetch_california_housing
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split

X, y = fetch_california_housing(return_X_y=True, as_frame=False)

# split data into training test and test set
X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    test_size=0.2, random_state=13)

# Requires further tuning
obj = DecisionTreeRegressor(max_depth=3, random_state=123)
obj2 = ns.RandomBagRegressor(obj=obj, direct_link=False,
                            n_estimators=50,
                            col_sample=0.9, row_sample=0.9,
                            dropout=0, n_clusters=0, verbose=1)

obj2.fit(X_train, y_train)

print(np.sqrt(obj2.score(X_test, y_test))) # RMSE
def fit(self, X, y, **kwargs):
169    def fit(self, X, y, **kwargs):
170        """Fit Random 'Bagging' model to training data (X, y).
171
172        Args:
173
174            X: {array-like}, shape = [n_samples, n_features]
175                Training vectors, where n_samples is the number
176                of samples and n_features is the number of features.
177
178            y: array-like, shape = [n_samples]
179                Target values.
180
181            **kwargs: additional parameters to be passed to
182                    self.cook_training_set or self.obj.fit
183
184        Returns:
185
186            self: object
187
188        """
189
190        base_learner = CustomRegressor(
191            self.obj,
192            n_hidden_features=self.n_hidden_features,
193            activation_name=self.activation_name,
194            a=self.a,
195            nodes_sim=self.nodes_sim,
196            bias=self.bias,
197            dropout=self.dropout,
198            direct_link=self.direct_link,
199            n_clusters=self.n_clusters,
200            type_clust=self.type_clust,
201            type_scaling=self.type_scaling,
202            col_sample=self.col_sample,
203            row_sample=self.row_sample,
204            seed=self.seed,
205        )
206
207        # 1 - Sequential training -----
208
209        if self.n_jobs is None:
210            self.voter_ = rbagloop_regression(
211                base_learner, X, y, self.n_estimators, self.verbose, self.seed
212            )
213
214            self.n_estimators = len(self.voter_)
215
216            return self
217
218        # 2 - Parallel training -----
219        # buggy
220        # if self.n_jobs is not None:
221        def fit_estimators(m):
222            base_learner__ = deepcopy(base_learner)
223            base_learner__.set_params(seed=self.seed + m * 1000)
224            base_learner__.fit(X, y, **kwargs)
225            return base_learner__
226
227        if self.verbose == 1:
228            voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")(
229                delayed(fit_estimators)(m) for m in tqdm(range(self.n_estimators))
230            )
231        else:
232            voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")(
233                delayed(fit_estimators)(m) for m in range(self.n_estimators)
234            )
235
236        self.voter_ = {i: elt for i, elt in enumerate(voters_list)}
237
238        self.n_estimators = len(self.voter_)
239
240        return self

Fit Random 'Bagging' model to training data (X, y).

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

**kwargs: additional parameters to be passed to
        self.cook_training_set or self.obj.fit

Returns:

self: object
def predict(self, X, weights=None, **kwargs):
242    def predict(self, X, weights=None, **kwargs):
243        """Predict for test data X.
244
245        Args:
246
247            X: {array-like}, shape = [n_samples, n_features]
248                Training vectors, where n_samples is the number
249                of samples and n_features is the number of features.
250
251            **kwargs: additional parameters to be passed to
252                    self.cook_test_set
253
254        Returns:
255
256            estimates for test data: {array-like}
257
258        """
259
260        def calculate_preds(voter, weights=None):
261            ensemble_preds = 0
262
263            n_iter = len(voter)
264
265            assert n_iter > 0, "no estimator found in `RandomBag` ensemble"
266
267            if weights is None:
268                for idx, elt in voter.items():
269                    ensemble_preds += elt.predict(X)
270
271                return ensemble_preds / n_iter
272
273            # if weights is not None:
274            for idx, elt in voter.items():
275                ensemble_preds += weights[idx] * elt.predict(X)
276
277            return ensemble_preds
278
279        # end calculate_preds ----
280
281        if weights is None:
282            return calculate_preds(self.voter_)
283
284        # if weights is not None:
285        self.weights = weights
286
287        return calculate_preds(self.voter_, weights)

Predict for test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

estimates for test data: {array-like}
class RandomBagClassifier(nnetsauce.randombag.bag.RandomBag, sklearn.base.ClassifierMixin):
 18class RandomBagClassifier(RandomBag, ClassifierMixin):
 19    """Randomized 'Bagging' Classification model
 20
 21    Parameters:
 22
 23        obj: object
 24            any object containing a method fit (obj.fit()) and a method predict
 25            (obj.predict())
 26
 27        n_estimators: int
 28            number of boosting iterations
 29
 30        n_hidden_features: int
 31            number of nodes in the hidden layer
 32
 33        activation_name: str
 34            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
 35
 36        a: float
 37            hyperparameter for 'prelu' or 'elu' activation function
 38
 39        nodes_sim: str
 40            type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
 41            'uniform'
 42
 43        bias: boolean
 44            indicates if the hidden layer contains a bias term (True) or not
 45            (False)
 46
 47        dropout: float
 48            regularization parameter; (random) percentage of nodes dropped out
 49            of the training
 50
 51        direct_link: boolean
 52            indicates if the original predictors are included (True) in model's
 53            fitting or not (False)
 54
 55        n_clusters: int
 56            number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
 57                no clustering)
 58
 59        cluster_encode: bool
 60            defines how the variable containing clusters is treated (default is one-hot)
 61            if `False`, then labels are used, without one-hot encoding
 62
 63        type_clust: str
 64            type of clustering method: currently k-means ('kmeans') or Gaussian
 65            Mixture Model ('gmm')
 66
 67        type_scaling: a tuple of 3 strings
 68            scaling methods for inputs, hidden layer, and clustering respectively
 69            (and when relevant).
 70            Currently available: standardization ('std') or MinMax scaling ('minmax')
 71
 72        col_sample: float
 73            percentage of covariates randomly chosen for training
 74
 75        row_sample: float
 76            percentage of rows chosen for training, by stratified bootstrapping
 77
 78        seed: int
 79            reproducibility seed for nodes_sim=='uniform'
 80
 81        backend: str
 82            "cpu" or "gpu" or "tpu"
 83
 84    Attributes:
 85
 86        voter_: dict
 87            dictionary containing all the fitted base-learners
 88
 89
 90    Examples:
 91
 92    See also [https://github.com/Techtonique/nnetsauce/blob/master/examples/randombag_classification.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/randombag_classification.py)
 93
 94    ```python
 95    import nnetsauce as ns
 96    from sklearn.datasets import load_breast_cancer
 97    from sklearn.tree import DecisionTreeClassifier
 98    from sklearn.model_selection import train_test_split
 99    from sklearn import metrics
100    from time import time
101
102
103    breast_cancer = load_breast_cancer()
104    Z = breast_cancer.data
105    t = breast_cancer.target
106    np.random.seed(123)
107    X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2)
108
109    # decision tree
110    clf = DecisionTreeClassifier(max_depth=2, random_state=123)
111    fit_obj = ns.RandomBagClassifier(clf, n_hidden_features=2,
112                                    direct_link=True,
113                                    n_estimators=100,
114                                    col_sample=0.9, row_sample=0.9,
115                                    dropout=0.3, n_clusters=0, verbose=1)
116
117    start = time()
118    fit_obj.fit(X_train, y_train)
119    print(f"Elapsed {time() - start}")
120
121    print(fit_obj.score(X_test, y_test))
122    print(fit_obj.score(X_test, y_test, scoring="roc_auc"))
123
124    start = time()
125    preds = fit_obj.predict(X_test)
126    print(f"Elapsed {time() - start}")
127    print(metrics.classification_report(preds, y_test))
128    ```
129
130    """
131
132    # construct the object -----
133
134    def __init__(
135        self,
136        obj,
137        n_estimators=10,
138        n_hidden_features=1,
139        activation_name="relu",
140        a=0.01,
141        nodes_sim="sobol",
142        bias=True,
143        dropout=0,
144        direct_link=False,
145        n_clusters=2,
146        cluster_encode=True,
147        type_clust="kmeans",
148        type_scaling=("std", "std", "std"),
149        col_sample=1,
150        row_sample=1,
151        n_jobs=None,
152        seed=123,
153        verbose=1,
154        backend="cpu",
155    ):
156        super().__init__(
157            obj=obj,
158            n_estimators=n_estimators,
159            n_hidden_features=n_hidden_features,
160            activation_name=activation_name,
161            a=a,
162            nodes_sim=nodes_sim,
163            bias=bias,
164            dropout=dropout,
165            direct_link=direct_link,
166            n_clusters=n_clusters,
167            cluster_encode=cluster_encode,
168            type_clust=type_clust,
169            type_scaling=type_scaling,
170            col_sample=col_sample,
171            row_sample=row_sample,
172            seed=seed,
173            backend=backend,
174        )
175
176        self.type_fit = "classification"
177        self.verbose = verbose
178        self.n_jobs = n_jobs
179        self.voter_ = {}
180
181    def fit(self, X, y, **kwargs):
182        """Fit Random 'Bagging' model to training data (X, y).
183
184        Args:
185
186            X: {array-like}, shape = [n_samples, n_features]
187                Training vectors, where n_samples is the number
188                of samples and n_features is the number of features.
189
190            y: array-like, shape = [n_samples]
191                Target values.
192
193            **kwargs: additional parameters to be passed to
194                    self.cook_training_set or self.obj.fit
195
196        Returns:
197
198            self: object
199
200        """
201
202        assert mx.is_factor(y), "y must contain only integers"
203
204        self.n_classes_ = len(np.unique(y))  # for compatibility with sklearn
205
206        # training
207        self.n_classes = len(np.unique(y))
208
209        base_learner = CustomClassifier(
210            self.obj,
211            n_hidden_features=self.n_hidden_features,
212            activation_name=self.activation_name,
213            a=self.a,
214            nodes_sim=self.nodes_sim,
215            bias=self.bias,
216            dropout=self.dropout,
217            direct_link=self.direct_link,
218            n_clusters=self.n_clusters,
219            type_clust=self.type_clust,
220            type_scaling=self.type_scaling,
221            col_sample=self.col_sample,
222            row_sample=self.row_sample,
223            seed=self.seed,
224            cv_calibration=None
225        )
226
227        # 1 - Sequential training -----
228
229        if self.n_jobs is None:
230            self.voter_ = rbagloop_classification(
231                base_learner, X, y, self.n_estimators, self.verbose, self.seed
232            )
233
234            self.n_estimators = len(self.voter_)
235
236            return self
237
238        # 2 - Parallel training -----
239        # buggy
240        # if self.n_jobs is not None:
241        def fit_estimators(m):
242            base_learner__ = deepcopy(base_learner)
243            base_learner__.set_params(seed=self.seed + m * 1000)
244            base_learner__.fit(X, y, **kwargs)
245            return base_learner__
246
247        if self.verbose == 1:
248            voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")(
249                delayed(fit_estimators)(m) for m in tqdm(range(self.n_estimators))
250            )
251        else:
252            voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")(
253                delayed(fit_estimators)(m) for m in range(self.n_estimators)
254            )
255
256        self.voter_ = {idx: elt for idx, elt in enumerate(voters_list)}
257
258        self.n_estimators = len(self.voter_)
259        self.classes_ = np.unique(y)
260        return self
261
262    def predict(self, X, weights=None, **kwargs):
263        """Predict test data X.
264
265        Args:
266
267            X: {array-like}, shape = [n_samples, n_features]
268                Training vectors, where n_samples is the number
269                of samples and n_features is the number of features.
270
271            **kwargs: additional parameters to be passed to
272                    self.cook_test_set
273
274        Returns:
275
276            model predictions: {array-like}
277
278        """
279        return self.predict_proba(X, weights, **kwargs).argmax(axis=1)
280
281    def predict_proba(self, X, weights=None, **kwargs):
282        """Predict probabilities for test data X.
283
284        Args:
285
286            X: {array-like}, shape = [n_samples, n_features]
287                Training vectors, where n_samples is the number
288                of samples and n_features is the number of features.
289
290            **kwargs: additional parameters to be passed to
291                    self.cook_test_set
292
293        Returns:
294
295            probability estimates for test data: {array-like}
296
297        """
298
299        def calculate_probas(voter, weights=None, verbose=None):
300            ensemble_proba = 0
301
302            n_iter = len(voter)
303
304            assert n_iter > 0, "no estimator found in `RandomBag` ensemble"
305
306            if weights is None:
307                for idx, elt in voter.items():
308                    try:
309                        ensemble_proba += elt.predict_proba(X)
310
311                        # if verbose == 1:
312                        #    pbar.update(idx)
313
314                    except:
315                        continue
316
317                # if verbose == 1:
318                #    pbar.update(n_iter)
319
320                return ensemble_proba / n_iter
321
322            # if weights is not None:
323            for idx, elt in voter.items():
324                ensemble_proba += weights[idx] * elt.predict_proba(X)
325
326                # if verbose == 1:
327                #    pbar.update(idx)
328
329            # if verbose == 1:
330            #    pbar.update(n_iter)
331
332            return ensemble_proba
333
334        # end calculate_probas ----
335
336        if self.n_jobs is None:
337            # if self.verbose == 1:
338            #    pbar = Progbar(self.n_estimators)
339
340            if weights is None:
341                return calculate_probas(self.voter_, verbose=self.verbose)
342
343            # if weights is not None:
344            self.weights = weights
345
346            return calculate_probas(self.voter_, weights, verbose=self.verbose)
347
348        # if self.n_jobs is not None:
349        def predict_estimator(m):
350            try:
351                return self.voter_[m].predict_proba(X)
352            except:
353                pass
354
355        if self.verbose == 1:
356            preds = Parallel(n_jobs=self.n_jobs, prefer="threads")(
357                delayed(predict_estimator)(m) for m in tqdm(range(self.n_estimators))
358            )
359
360        else:
361            preds = Parallel(n_jobs=self.n_jobs, prefer="threads")(
362                delayed(predict_estimator)(m) for m in range(self.n_estimators)
363            )
364
365        ensemble_proba = 0
366
367        if weights is None:
368            for i in range(self.n_estimators):
369                ensemble_proba += preds[i]
370
371            return ensemble_proba / self.n_estimators
372
373        for i in range(self.n_estimators):
374            ensemble_proba += weights[i] * preds[i]
375
376        return ensemble_proba

Randomized 'Bagging' Classification model

Parameters:

obj: object
    any object containing a method fit (obj.fit()) and a method predict
    (obj.predict())

n_estimators: int
    number of boosting iterations

n_hidden_features: int
    number of nodes in the hidden layer

activation_name: str
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'

a: float
    hyperparameter for 'prelu' or 'elu' activation function

nodes_sim: str
    type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
    'uniform'

bias: boolean
    indicates if the hidden layer contains a bias term (True) or not
    (False)

dropout: float
    regularization parameter; (random) percentage of nodes dropped out
    of the training

direct_link: boolean
    indicates if the original predictors are included (True) in model's
    fitting or not (False)

n_clusters: int
    number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
        no clustering)

cluster_encode: bool
    defines how the variable containing clusters is treated (default is one-hot)
    if `False`, then labels are used, without one-hot encoding

type_clust: str
    type of clustering method: currently k-means ('kmeans') or Gaussian
    Mixture Model ('gmm')

type_scaling: a tuple of 3 strings
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax')

col_sample: float
    percentage of covariates randomly chosen for training

row_sample: float
    percentage of rows chosen for training, by stratified bootstrapping

seed: int
    reproducibility seed for nodes_sim=='uniform'

backend: str
    "cpu" or "gpu" or "tpu"

Attributes:

voter_: dict
    dictionary containing all the fitted base-learners

Examples:

See also https://github.com/Techtonique/nnetsauce/blob/master/examples/randombag_classification.py

import nnetsauce as ns
from sklearn.datasets import load_breast_cancer
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn import metrics
from time import time


breast_cancer = load_breast_cancer()
Z = breast_cancer.data
t = breast_cancer.target
np.random.seed(123)
X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2)

# decision tree
clf = DecisionTreeClassifier(max_depth=2, random_state=123)
fit_obj = ns.RandomBagClassifier(clf, n_hidden_features=2,
                                direct_link=True,
                                n_estimators=100,
                                col_sample=0.9, row_sample=0.9,
                                dropout=0.3, n_clusters=0, verbose=1)

start = time()
fit_obj.fit(X_train, y_train)
print(f"Elapsed {time() - start}")

print(fit_obj.score(X_test, y_test))
print(fit_obj.score(X_test, y_test, scoring="roc_auc"))

start = time()
preds = fit_obj.predict(X_test)
print(f"Elapsed {time() - start}")
print(metrics.classification_report(preds, y_test))
def fit(self, X, y, **kwargs):
181    def fit(self, X, y, **kwargs):
182        """Fit Random 'Bagging' model to training data (X, y).
183
184        Args:
185
186            X: {array-like}, shape = [n_samples, n_features]
187                Training vectors, where n_samples is the number
188                of samples and n_features is the number of features.
189
190            y: array-like, shape = [n_samples]
191                Target values.
192
193            **kwargs: additional parameters to be passed to
194                    self.cook_training_set or self.obj.fit
195
196        Returns:
197
198            self: object
199
200        """
201
202        assert mx.is_factor(y), "y must contain only integers"
203
204        self.n_classes_ = len(np.unique(y))  # for compatibility with sklearn
205
206        # training
207        self.n_classes = len(np.unique(y))
208
209        base_learner = CustomClassifier(
210            self.obj,
211            n_hidden_features=self.n_hidden_features,
212            activation_name=self.activation_name,
213            a=self.a,
214            nodes_sim=self.nodes_sim,
215            bias=self.bias,
216            dropout=self.dropout,
217            direct_link=self.direct_link,
218            n_clusters=self.n_clusters,
219            type_clust=self.type_clust,
220            type_scaling=self.type_scaling,
221            col_sample=self.col_sample,
222            row_sample=self.row_sample,
223            seed=self.seed,
224            cv_calibration=None
225        )
226
227        # 1 - Sequential training -----
228
229        if self.n_jobs is None:
230            self.voter_ = rbagloop_classification(
231                base_learner, X, y, self.n_estimators, self.verbose, self.seed
232            )
233
234            self.n_estimators = len(self.voter_)
235
236            return self
237
238        # 2 - Parallel training -----
239        # buggy
240        # if self.n_jobs is not None:
241        def fit_estimators(m):
242            base_learner__ = deepcopy(base_learner)
243            base_learner__.set_params(seed=self.seed + m * 1000)
244            base_learner__.fit(X, y, **kwargs)
245            return base_learner__
246
247        if self.verbose == 1:
248            voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")(
249                delayed(fit_estimators)(m) for m in tqdm(range(self.n_estimators))
250            )
251        else:
252            voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")(
253                delayed(fit_estimators)(m) for m in range(self.n_estimators)
254            )
255
256        self.voter_ = {idx: elt for idx, elt in enumerate(voters_list)}
257
258        self.n_estimators = len(self.voter_)
259        self.classes_ = np.unique(y)
260        return self

Fit Random 'Bagging' model to training data (X, y).

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

**kwargs: additional parameters to be passed to
        self.cook_training_set or self.obj.fit

Returns:

self: object
def predict(self, X, weights=None, **kwargs):
262    def predict(self, X, weights=None, **kwargs):
263        """Predict test data X.
264
265        Args:
266
267            X: {array-like}, shape = [n_samples, n_features]
268                Training vectors, where n_samples is the number
269                of samples and n_features is the number of features.
270
271            **kwargs: additional parameters to be passed to
272                    self.cook_test_set
273
274        Returns:
275
276            model predictions: {array-like}
277
278        """
279        return self.predict_proba(X, weights, **kwargs).argmax(axis=1)

Predict test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

model predictions: {array-like}
def predict_proba(self, X, weights=None, **kwargs):
281    def predict_proba(self, X, weights=None, **kwargs):
282        """Predict probabilities for test data X.
283
284        Args:
285
286            X: {array-like}, shape = [n_samples, n_features]
287                Training vectors, where n_samples is the number
288                of samples and n_features is the number of features.
289
290            **kwargs: additional parameters to be passed to
291                    self.cook_test_set
292
293        Returns:
294
295            probability estimates for test data: {array-like}
296
297        """
298
299        def calculate_probas(voter, weights=None, verbose=None):
300            ensemble_proba = 0
301
302            n_iter = len(voter)
303
304            assert n_iter > 0, "no estimator found in `RandomBag` ensemble"
305
306            if weights is None:
307                for idx, elt in voter.items():
308                    try:
309                        ensemble_proba += elt.predict_proba(X)
310
311                        # if verbose == 1:
312                        #    pbar.update(idx)
313
314                    except:
315                        continue
316
317                # if verbose == 1:
318                #    pbar.update(n_iter)
319
320                return ensemble_proba / n_iter
321
322            # if weights is not None:
323            for idx, elt in voter.items():
324                ensemble_proba += weights[idx] * elt.predict_proba(X)
325
326                # if verbose == 1:
327                #    pbar.update(idx)
328
329            # if verbose == 1:
330            #    pbar.update(n_iter)
331
332            return ensemble_proba
333
334        # end calculate_probas ----
335
336        if self.n_jobs is None:
337            # if self.verbose == 1:
338            #    pbar = Progbar(self.n_estimators)
339
340            if weights is None:
341                return calculate_probas(self.voter_, verbose=self.verbose)
342
343            # if weights is not None:
344            self.weights = weights
345
346            return calculate_probas(self.voter_, weights, verbose=self.verbose)
347
348        # if self.n_jobs is not None:
349        def predict_estimator(m):
350            try:
351                return self.voter_[m].predict_proba(X)
352            except:
353                pass
354
355        if self.verbose == 1:
356            preds = Parallel(n_jobs=self.n_jobs, prefer="threads")(
357                delayed(predict_estimator)(m) for m in tqdm(range(self.n_estimators))
358            )
359
360        else:
361            preds = Parallel(n_jobs=self.n_jobs, prefer="threads")(
362                delayed(predict_estimator)(m) for m in range(self.n_estimators)
363            )
364
365        ensemble_proba = 0
366
367        if weights is None:
368            for i in range(self.n_estimators):
369                ensemble_proba += preds[i]
370
371            return ensemble_proba / self.n_estimators
372
373        for i in range(self.n_estimators):
374            ensemble_proba += weights[i] * preds[i]
375
376        return ensemble_proba

Predict probabilities for test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

probability estimates for test data: {array-like}
class RegressorUpdater(sklearn.base.BaseEstimator, sklearn.base.RegressorMixin):
 16class RegressorUpdater(BaseEstimator, RegressorMixin):
 17    """
 18    Update a regression model with new observations
 19
 20    Parameters
 21    ----------
 22    regr: object
 23        A regression model with a coef_ attribute
 24    alpha: float
 25        Updating factor's exponent
 26
 27    Attributes
 28    ----------
 29    n_obs_: int
 30        Number of observations
 31    coef_: np.ndarray
 32        Coefficients of the model
 33    updating_factor_: float
 34        Updating factor
 35
 36    """
 37
 38    def __init__(self, regr, alpha=0.5):
 39        self.regr = regr
 40        self.alpha = alpha
 41        self.n_obs_ = None
 42        self.coef_ = None
 43        self.updating_factor_ = None
 44        try:
 45            self.coef_ = self.regr.coef_
 46            if isinstance(self.regr, Base):
 47                self.n_obs_ = self.regr.scaler_.n_samples_seen_
 48        except AttributeError:
 49            pass
 50
 51    def fit(self, X, y, **kwargs):
 52
 53        if isinstance(self.regr, CustomRegressor):  # nnetsauce model not deep ---
 54            if check_is_fitted(self.regr) == False:
 55                self.regr.fit(X, y, **kwargs)
 56                self.n_obs_ = X.shape[0]
 57                if hasattr(self.regr, "coef_"):
 58                    self.coef_ = self.regr.coef_
 59                return self
 60            self.n_obs_ = self.regr.scaler_.n_samples_seen_
 61            if hasattr(self.regr, "coef_"):
 62                self.coef_ = self.regr.coef_
 63            return self
 64
 65        if (
 66            hasattr(self.regr, "coef_") == False
 67        ):  # sklearn model or CustomRegressor model ---
 68            self.regr.fit(X, y)
 69            self.n_obs_ = X.shape[0]
 70            self.regr.fit(X, y)
 71            if hasattr(self.regr, "stacked_obj"):
 72                self.coef_ = self.regr.stacked_obj.coef_
 73            else:
 74                self.coef_ = self.regr.coef_
 75            return self
 76        self.n_obs_ = X.shape[0]
 77        if hasattr(self.regr, "coef_"):
 78            self.coef_ = self.regr.coef_
 79        return self
 80
 81    def predict(self, X):
 82        # assert hasattr(self.regr, "coef_"), "model must have coef_ attribute"
 83        return self.regr.predict(X)
 84
 85    def partial_fit(self, X, y):
 86
 87        assert hasattr(
 88            self.regr, "coef_"
 89        ), "model must be fitted first (i.e have 'coef_' attribute)"
 90        assert (
 91            self.n_obs_ is not None
 92        ), "model must be fitted first (i.e have 'n_obs_' attribute)"
 93
 94        if len(X.shape) == 1:
 95            X = X.reshape(1, -1)
 96
 97        assert X.shape[0] == 1, "X must have one row"
 98
 99        self.updating_factor_ = self.n_obs_ ** (-self.alpha)
100
101        if isinstance(self.regr, Base):  # nnetsauce model ---
102
103            newX = deepcopy(X)
104
105            if isinstance(
106                self.regr, CustomRegressor
107            ):  # other nnetsauce model (CustomRegressor) ---
108                newX = self.regr.cook_test_set(X=X)
109                if isinstance(X, pd.DataFrame):
110                    newx = newX.values.ravel()
111                else:
112                    newx = newX.ravel()
113
114        else:  # an sklearn model ---
115
116            if isinstance(X, pd.DataFrame):
117                newx = X.values.ravel()
118            else:
119                newx = X.ravel()
120
121        new_coef = self.regr.coef_ + self.updating_factor_ * np.dot(
122            newx, y - np.dot(newx, self.regr.coef_)
123        )
124        self.regr.coef_ = _update_mean(self.regr.coef_, self.n_obs_, new_coef)
125        self.coef_ = deepcopy(self.regr.coef_)
126        self.n_obs_ += 1
127        return self

Update a regression model with new observations

Parameters

regr: object A regression model with a coef_ attribute alpha: float Updating factor's exponent

Attributes

n_obs_: int Number of observations coef_: np.ndarray Coefficients of the model updating_factor_: float Updating factor

def fit(self, X, y, **kwargs):
51    def fit(self, X, y, **kwargs):
52
53        if isinstance(self.regr, CustomRegressor):  # nnetsauce model not deep ---
54            if check_is_fitted(self.regr) == False:
55                self.regr.fit(X, y, **kwargs)
56                self.n_obs_ = X.shape[0]
57                if hasattr(self.regr, "coef_"):
58                    self.coef_ = self.regr.coef_
59                return self
60            self.n_obs_ = self.regr.scaler_.n_samples_seen_
61            if hasattr(self.regr, "coef_"):
62                self.coef_ = self.regr.coef_
63            return self
64
65        if (
66            hasattr(self.regr, "coef_") == False
67        ):  # sklearn model or CustomRegressor model ---
68            self.regr.fit(X, y)
69            self.n_obs_ = X.shape[0]
70            self.regr.fit(X, y)
71            if hasattr(self.regr, "stacked_obj"):
72                self.coef_ = self.regr.stacked_obj.coef_
73            else:
74                self.coef_ = self.regr.coef_
75            return self
76        self.n_obs_ = X.shape[0]
77        if hasattr(self.regr, "coef_"):
78            self.coef_ = self.regr.coef_
79        return self
def predict(self, X):
81    def predict(self, X):
82        # assert hasattr(self.regr, "coef_"), "model must have coef_ attribute"
83        return self.regr.predict(X)
class ClassifierUpdater(sklearn.base.BaseEstimator, sklearn.base.ClassifierMixin):
 16class ClassifierUpdater(BaseEstimator, ClassifierMixin):
 17    """
 18    Update a regression model with new observations
 19
 20    Parameters
 21    ----------
 22    clf: object
 23        A regression model with a coef_ attribute
 24    alpha: float
 25        Updating factor's exponent
 26
 27    Attributes
 28    ----------
 29    n_obs_: int
 30        Number of observations
 31    coef_: np.ndarray
 32        Coefficients of the model
 33    updating_factor_: float
 34        Updating factor
 35
 36    """
 37
 38    def __init__(self, clf, alpha=0.5):
 39        self.clf = clf
 40        self.alpha = alpha
 41        self.n_obs_ = None
 42        self.coef_ = None
 43        self.updating_factor_ = None
 44        try:
 45            self.coef_ = self.clf.coef_
 46            if isinstance(self.clf, Base):
 47                self.n_obs_ = self.clf.scaler_.n_samples_seen_
 48        except AttributeError:
 49            pass
 50
 51    def fit(self, X, y, **kwargs):
 52
 53        raise NotImplementedError("fit method is not implemented for ClassifierUpdater")
 54
 55        if isinstance(self.clf, CustomClassifier):  # nnetsauce model not deep ---
 56            if check_is_fitted(self.clf) == False:
 57                self.clf.fit(X, y, **kwargs)
 58                self.n_obs_ = X.shape[0]
 59                if hasattr(self.clf, "coef_"):
 60                    self.coef_ = self.clf.coef_
 61                return self
 62            self.n_obs_ = self.clf.scaler_.n_samples_seen_
 63            if hasattr(self.clf, "coef_"):
 64                self.coef_ = self.clf.coef_
 65            return self
 66
 67        if (
 68            hasattr(self.clf, "coef_") == False
 69        ):  # sklearn model or CustomClassifier model ---
 70            self.clf.fit(X, y)
 71            self.n_obs_ = X.shape[0]
 72            self.clf.fit(X, y)
 73            if hasattr(self.clf, "stacked_obj"):
 74                self.coef_ = self.clf.stacked_obj.coef_
 75            else:
 76                self.coef_ = self.clf.coef_
 77            return self
 78        self.n_obs_ = X.shape[0]
 79        if hasattr(self.clf, "coef_"):
 80            self.coef_ = self.clf.coef_
 81        return self
 82
 83    def predict(self, X):
 84
 85        raise NotImplementedError(
 86            "predict method is not implemented for ClassifierUpdater"
 87        )
 88        # assert hasattr(self.clf, "coef_"), "model must have coef_ attribute"
 89        return self.clf.predict(X)
 90
 91    def partial_fit(self, X, y):
 92
 93        raise NotImplementedError(
 94            "partial_fit method is not implemented for ClassifierUpdater"
 95        )
 96
 97        assert hasattr(
 98            self.clf, "coef_"
 99        ), "model must be fitted first (i.e have 'coef_' attribute)"
100        assert (
101            self.n_obs_ is not None
102        ), "model must be fitted first (i.e have 'n_obs_' attribute)"
103
104        if len(X.shape) == 1:
105            X = X.reshape(1, -1)
106
107        assert X.shape[0] == 1, "X must have one row"
108
109        self.updating_factor_ = self.n_obs_ ** (-self.alpha)
110
111        if isinstance(self.clf, Base):  # nnetsauce model ---
112
113            newX = deepcopy(X)
114
115            if isinstance(
116                self.clf, CustomClassifier
117            ):  # other nnetsauce model (CustomClassifier) ---
118                newX = self.clf.cook_test_set(X=X)
119                if isinstance(X, pd.DataFrame):
120                    newx = newX.values.ravel()
121                else:
122                    newx = newX.ravel()
123
124        else:  # an sklearn model ---
125
126            if isinstance(X, pd.DataFrame):
127                newx = X.values.ravel()
128            else:
129                newx = X.ravel()
130
131        new_coef = self.clf.coef_ + self.updating_factor_ * np.dot(
132            newx, y - np.dot(newx, self.clf.coef_)
133        )
134        self.clf.coef_ = _update_mean(self.clf.coef_, self.n_obs_, new_coef)
135        self.coef_ = deepcopy(self.clf.coef_)
136        self.n_obs_ += 1
137        return self

Update a regression model with new observations

Parameters

clf: object A regression model with a coef_ attribute alpha: float Updating factor's exponent

Attributes

n_obs_: int Number of observations coef_: np.ndarray Coefficients of the model updating_factor_: float Updating factor

def fit(self, X, y, **kwargs):
51    def fit(self, X, y, **kwargs):
52
53        raise NotImplementedError("fit method is not implemented for ClassifierUpdater")
54
55        if isinstance(self.clf, CustomClassifier):  # nnetsauce model not deep ---
56            if check_is_fitted(self.clf) == False:
57                self.clf.fit(X, y, **kwargs)
58                self.n_obs_ = X.shape[0]
59                if hasattr(self.clf, "coef_"):
60                    self.coef_ = self.clf.coef_
61                return self
62            self.n_obs_ = self.clf.scaler_.n_samples_seen_
63            if hasattr(self.clf, "coef_"):
64                self.coef_ = self.clf.coef_
65            return self
66
67        if (
68            hasattr(self.clf, "coef_") == False
69        ):  # sklearn model or CustomClassifier model ---
70            self.clf.fit(X, y)
71            self.n_obs_ = X.shape[0]
72            self.clf.fit(X, y)
73            if hasattr(self.clf, "stacked_obj"):
74                self.coef_ = self.clf.stacked_obj.coef_
75            else:
76                self.coef_ = self.clf.coef_
77            return self
78        self.n_obs_ = X.shape[0]
79        if hasattr(self.clf, "coef_"):
80            self.coef_ = self.clf.coef_
81        return self
def predict(self, X):
83    def predict(self, X):
84
85        raise NotImplementedError(
86            "predict method is not implemented for ClassifierUpdater"
87        )
88        # assert hasattr(self.clf, "coef_"), "model must have coef_ attribute"
89        return self.clf.predict(X)
class RidgeRegressor(nnetsauce.Base, sklearn.base.RegressorMixin):
 20class RidgeRegressor(Base, RegressorMixin):
 21    """Basic Ridge Regression model.
 22
 23    Parameters:
 24        lambda_: float or array-like
 25            Ridge regularization parameter(s). Default is 0.
 26    """
 27
 28    def __init__(
 29        self,
 30        lambda_=0.0,
 31        n_hidden_features=0,
 32        activation_name="relu",
 33        a=0.01,
 34        nodes_sim="sobol",
 35        bias=True,
 36        dropout=0,
 37        direct_link=True,
 38        n_clusters=0,
 39        cluster_encode=True,
 40        type_clust="kmeans",
 41        type_scaling=("std", "std", "std"),
 42        col_sample=1,
 43        row_sample=1,
 44        seed=123,
 45        backend="cpu",
 46    ):
 47        super().__init__(
 48            n_hidden_features=n_hidden_features,
 49            activation_name=activation_name,
 50            a=a,
 51            nodes_sim=nodes_sim,
 52            bias=bias,
 53            dropout=dropout,
 54            direct_link=direct_link,
 55            n_clusters=n_clusters,
 56            cluster_encode=cluster_encode,
 57            type_clust=type_clust,
 58            type_scaling=type_scaling,
 59            col_sample=col_sample,
 60            row_sample=row_sample,
 61            seed=seed,
 62            backend=backend,
 63        )
 64        self.lambda_ = lambda_
 65        self.scale_ = {}
 66
 67    def _center_scale_xy(self, X, y):
 68        """Center X and y, scale X."""
 69        n = X.shape[0]
 70
 71        # Center X and y
 72        X_mean = np.mean(X, axis=0)
 73        y_mean = np.mean(y)
 74        X_centered = X - X_mean
 75        y_centered = y - y_mean
 76
 77        # Scale X
 78        X_scale = np.sqrt(np.sum(X_centered**2, axis=0) / n)
 79        # Avoid division by zero
 80        X_scale = np.where(X_scale == 0, 1.0, X_scale)
 81        X_scaled = X_centered / X_scale
 82
 83        return X_scaled, y_centered, X_mean, y_mean, X_scale
 84
 85    def fit(self, X, y):
 86        """Fit Ridge regression model.
 87
 88        Parameters:
 89            X : array-like of shape (n_samples, n_features)
 90                Training data
 91            y : array-like of shape (n_samples,)
 92                Target values
 93
 94        Returns:
 95            self : returns an instance of self.
 96        """
 97        # Ensure numpy arrays
 98        X = np.asarray(X)
 99        y = np.asarray(y)
100        print(f"\nInput shapes - X: {X.shape}, y: {y.shape}")
101        print(f"First few X values: {X[:2]}")
102        print(f"First few y values: {y[:2]}")
103
104        if y.ndim == 2:
105            y = y.ravel()
106
107        # Center and scale
108        X_scaled, y_centered, self.X_mean_, self.y_mean_, self.X_scale_ = (
109            self._center_scale_xy(X, y)
110        )
111
112        # SVD decomposition
113        U, d, Vt = np.linalg.svd(X_scaled, full_matrices=False)
114
115        # Compute coefficients
116        rhs = np.dot(U.T, y_centered)
117        d2 = d**2
118
119        print(f"d2 shape: {d2.shape}")
120        print(f"rhs shape: {rhs.shape}")
121        print(f"Vt shape: {Vt.shape}")
122
123        if np.isscalar(self.lambda_):
124            div = d2 + self.lambda_
125            a = (d * rhs) / div
126            print(f"\nSingle lambda case:")
127            print(f"lambda: {self.lambda_}")
128            print(f"div shape: {div.shape}")
129            print(f"a shape: {a.shape}")
130            self.coef_ = np.dot(Vt.T, a) / self.X_scale_
131            print(f"coef shape: {self.coef_.shape}")
132        else:
133            coefs = []
134            print(f"\nMultiple lambda case:")
135            for lambda_ in self.lambda_:
136                print(f"lambda: {lambda_}")
137                div = d2 + lambda_
138                print(f"div shape: {div.shape}")
139                a = (d * rhs) / div
140                print(f"a shape: {a.shape}")
141                coef = np.dot(Vt.T, a) / self.X_scale_
142                print(f"coef shape: {coef.shape}")
143                coefs.append(coef)
144            self.coef_ = np.array(coefs).T
145            print(f"final coefs shape: {self.coef_.shape}")
146
147        # Compute GCV, HKB and LW criteria
148        y_pred = self.predict(X)
149        try:
150            resid = y - y_pred
151        except Exception as e:
152            resid = y[:, np.newaxis] - y_pred
153        n, p = X.shape
154        if resid.ndim == 1:
155            s2 = np.sum(resid**2) / (n - p)
156        else:
157            s2 = np.sum(resid**2, axis=0) / (n - p)
158
159        self.HKB_ = (p - 2) * s2 / np.sum(self.coef_**2)
160        self.LW_ = (p - 2) * s2 * n / np.sum(y_pred**2)
161
162        if np.isscalar(self.lambda_):
163            div = d2 + self.lambda_
164            self.GCV_ = np.sum((y - y_pred) ** 2) / (n - np.sum(d2 / div)) ** 2
165        else:
166            self.GCV_ = []
167            for lambda_ in self.lambda_:
168                div = d2 + lambda_
169                try:
170                    gcv = np.sum((y - y_pred) ** 2) / (n - np.sum(d2 / div)) ** 2
171                except Exception as e:
172                    gcv = (
173                        np.sum((y[:, np.newaxis] - y_pred) ** 2)
174                        / (n - np.sum(d2 / div)) ** 2
175                    )
176                self.GCV_.append(gcv)
177            self.GCV_ = np.array(self.GCV_)
178
179        return self
180
181    def predict(self, X):
182        """Predict using the Ridge regression model.
183
184        Parameters:
185            X : array-like of shape (n_samples, n_features)
186                Samples to predict for
187
188        Returns:
189            y_pred : array-like of shape (n_samples,)
190                Returns predicted values.
191        """
192        # Ensure X is 2D
193        X = np.asarray(X)
194        if X.ndim == 1:
195            X = X.reshape(1, -1)
196            
197        # Center and scale X
198        X_scaled = (X - self.X_mean_) / self.X_scale_
199
200        if self.backend == "cpu":
201            if np.isscalar(self.lambda_):
202                return (
203                    mo.safe_sparse_dot(X_scaled, self.coef_, backend=self.backend)
204                    + self.y_mean_
205                )
206            else:
207                return np.array(
208                    [
209                        mo.safe_sparse_dot(X_scaled, coef, backend=self.backend) + self.y_mean_
210                        for coef in self.coef_.T
211                    ]
212                ).T
213        else:
214            if np.isscalar(self.lambda_):
215                return (
216                    mo.safe_sparse_dot(X_scaled, self.coef_, backend=self.backend)
217                    + self.y_mean_
218                )
219            else:
220                return jnp.array(
221                    [
222                        mo.safe_sparse_dot(X_scaled, coef, backend=self.backend) + self.y_mean_
223                        for coef in self.coef_.T
224                    ]
225                ).T
226
227    def decision_function(self, X):
228        """Compute the decision function of X.
229
230        Parameters:
231            X : array-like of shape (n_samples, n_features)
232                Samples
233
234        Returns:
235            decision : array-like of shape (n_samples,) or (n_samples, n_lambdas)
236                Decision function of the input samples. The order of outputs is the same
237                as that of the provided lambda_ values. For a single lambda, returns
238                array of shape (n_samples,). For multiple lambdas, returns array of shape
239                (n_samples, n_lambdas).
240        """
241        X = self.cook_test_set(X)
242
243        if self.backend == "cpu":
244            if np.isscalar(self.lambda_):
245                return mo.safe_sparse_dot(X, self.coef_, backend=self.backend)
246            else:
247                return np.array(
248                    [
249                        mo.safe_sparse_dot(X, coef, backend=self.backend)
250                        for coef in self.coef_.T
251                    ]
252                ).T
253        else:
254            if np.isscalar(self.lambda_):
255                return mo.safe_sparse_dot(X, self.coef_, backend=self.backend)
256            else:
257                return jnp.array(
258                    [
259                        mo.safe_sparse_dot(X, coef, backend=self.backend)
260                        for coef in self.coef_.T
261                    ]
262                ).T

Basic Ridge Regression model.

Parameters: lambda_: float or array-like Ridge regularization parameter(s). Default is 0.

def fit(self, X, y):
 85    def fit(self, X, y):
 86        """Fit Ridge regression model.
 87
 88        Parameters:
 89            X : array-like of shape (n_samples, n_features)
 90                Training data
 91            y : array-like of shape (n_samples,)
 92                Target values
 93
 94        Returns:
 95            self : returns an instance of self.
 96        """
 97        # Ensure numpy arrays
 98        X = np.asarray(X)
 99        y = np.asarray(y)
100        print(f"\nInput shapes - X: {X.shape}, y: {y.shape}")
101        print(f"First few X values: {X[:2]}")
102        print(f"First few y values: {y[:2]}")
103
104        if y.ndim == 2:
105            y = y.ravel()
106
107        # Center and scale
108        X_scaled, y_centered, self.X_mean_, self.y_mean_, self.X_scale_ = (
109            self._center_scale_xy(X, y)
110        )
111
112        # SVD decomposition
113        U, d, Vt = np.linalg.svd(X_scaled, full_matrices=False)
114
115        # Compute coefficients
116        rhs = np.dot(U.T, y_centered)
117        d2 = d**2
118
119        print(f"d2 shape: {d2.shape}")
120        print(f"rhs shape: {rhs.shape}")
121        print(f"Vt shape: {Vt.shape}")
122
123        if np.isscalar(self.lambda_):
124            div = d2 + self.lambda_
125            a = (d * rhs) / div
126            print(f"\nSingle lambda case:")
127            print(f"lambda: {self.lambda_}")
128            print(f"div shape: {div.shape}")
129            print(f"a shape: {a.shape}")
130            self.coef_ = np.dot(Vt.T, a) / self.X_scale_
131            print(f"coef shape: {self.coef_.shape}")
132        else:
133            coefs = []
134            print(f"\nMultiple lambda case:")
135            for lambda_ in self.lambda_:
136                print(f"lambda: {lambda_}")
137                div = d2 + lambda_
138                print(f"div shape: {div.shape}")
139                a = (d * rhs) / div
140                print(f"a shape: {a.shape}")
141                coef = np.dot(Vt.T, a) / self.X_scale_
142                print(f"coef shape: {coef.shape}")
143                coefs.append(coef)
144            self.coef_ = np.array(coefs).T
145            print(f"final coefs shape: {self.coef_.shape}")
146
147        # Compute GCV, HKB and LW criteria
148        y_pred = self.predict(X)
149        try:
150            resid = y - y_pred
151        except Exception as e:
152            resid = y[:, np.newaxis] - y_pred
153        n, p = X.shape
154        if resid.ndim == 1:
155            s2 = np.sum(resid**2) / (n - p)
156        else:
157            s2 = np.sum(resid**2, axis=0) / (n - p)
158
159        self.HKB_ = (p - 2) * s2 / np.sum(self.coef_**2)
160        self.LW_ = (p - 2) * s2 * n / np.sum(y_pred**2)
161
162        if np.isscalar(self.lambda_):
163            div = d2 + self.lambda_
164            self.GCV_ = np.sum((y - y_pred) ** 2) / (n - np.sum(d2 / div)) ** 2
165        else:
166            self.GCV_ = []
167            for lambda_ in self.lambda_:
168                div = d2 + lambda_
169                try:
170                    gcv = np.sum((y - y_pred) ** 2) / (n - np.sum(d2 / div)) ** 2
171                except Exception as e:
172                    gcv = (
173                        np.sum((y[:, np.newaxis] - y_pred) ** 2)
174                        / (n - np.sum(d2 / div)) ** 2
175                    )
176                self.GCV_.append(gcv)
177            self.GCV_ = np.array(self.GCV_)
178
179        return self

Fit Ridge regression model.

Parameters: X : array-like of shape (n_samples, n_features) Training data y : array-like of shape (n_samples,) Target values

Returns: self : returns an instance of self.

def predict(self, X):
181    def predict(self, X):
182        """Predict using the Ridge regression model.
183
184        Parameters:
185            X : array-like of shape (n_samples, n_features)
186                Samples to predict for
187
188        Returns:
189            y_pred : array-like of shape (n_samples,)
190                Returns predicted values.
191        """
192        # Ensure X is 2D
193        X = np.asarray(X)
194        if X.ndim == 1:
195            X = X.reshape(1, -1)
196            
197        # Center and scale X
198        X_scaled = (X - self.X_mean_) / self.X_scale_
199
200        if self.backend == "cpu":
201            if np.isscalar(self.lambda_):
202                return (
203                    mo.safe_sparse_dot(X_scaled, self.coef_, backend=self.backend)
204                    + self.y_mean_
205                )
206            else:
207                return np.array(
208                    [
209                        mo.safe_sparse_dot(X_scaled, coef, backend=self.backend) + self.y_mean_
210                        for coef in self.coef_.T
211                    ]
212                ).T
213        else:
214            if np.isscalar(self.lambda_):
215                return (
216                    mo.safe_sparse_dot(X_scaled, self.coef_, backend=self.backend)
217                    + self.y_mean_
218                )
219            else:
220                return jnp.array(
221                    [
222                        mo.safe_sparse_dot(X_scaled, coef, backend=self.backend) + self.y_mean_
223                        for coef in self.coef_.T
224                    ]
225                ).T

Predict using the Ridge regression model.

Parameters: X : array-like of shape (n_samples, n_features) Samples to predict for

Returns: y_pred : array-like of shape (n_samples,) Returns predicted values.

class Ridge2Regressor(nnetsauce.ridge2.ridge2.Ridge2, sklearn.base.RegressorMixin):
 23class Ridge2Regressor(Ridge2, RegressorMixin):
 24    """Ridge regression with 2 regularization parameters derived from class Ridge
 25
 26    Parameters:
 27
 28        n_hidden_features: int
 29            number of nodes in the hidden layer
 30
 31        activation_name: str
 32            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
 33
 34        a: float
 35            hyperparameter for 'prelu' or 'elu' activation function
 36
 37        nodes_sim: str
 38            type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
 39            'uniform'
 40
 41        bias: boolean
 42            indicates if the hidden layer contains a bias term (True) or not
 43            (False)
 44
 45        dropout: float
 46            regularization parameter; (random) percentage of nodes dropped out
 47            of the training
 48
 49        n_clusters: int
 50            number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
 51                no clustering)
 52
 53        cluster_encode: bool
 54            defines how the variable containing clusters is treated (default is one-hot)
 55            if `False`, then labels are used, without one-hot encoding
 56
 57        type_clust: str
 58            type of clustering method: currently k-means ('kmeans') or Gaussian
 59            Mixture Model ('gmm')
 60
 61        type_scaling: a tuple of 3 strings
 62            scaling methods for inputs, hidden layer, and clustering respectively
 63            (and when relevant).
 64            Currently available: standardization ('std') or MinMax scaling ('minmax')
 65
 66        lambda1: float
 67            regularization parameter on direct link
 68
 69        lambda2: float
 70            regularization parameter on hidden layer
 71
 72        seed: int
 73            reproducibility seed for nodes_sim=='uniform'
 74
 75        backend: str
 76            'cpu' or 'gpu' or 'tpu'
 77
 78    Attributes:
 79
 80        beta_: {array-like}
 81            regression coefficients
 82
 83        y_mean_: float
 84            average response
 85
 86    """
 87
 88    # construct the object -----
 89
 90    def __init__(
 91        self,
 92        n_hidden_features=5,
 93        activation_name="relu",
 94        a=0.01,
 95        nodes_sim="sobol",
 96        bias=True,
 97        dropout=0,
 98        n_clusters=2,
 99        cluster_encode=True,
100        type_clust="kmeans",
101        type_scaling=("std", "std", "std"),
102        lambda1=0.1,
103        lambda2=0.1,
104        seed=123,
105        backend="cpu",
106    ):
107        super().__init__(
108            n_hidden_features=n_hidden_features,
109            activation_name=activation_name,
110            a=a,
111            nodes_sim=nodes_sim,
112            bias=bias,
113            dropout=dropout,
114            n_clusters=n_clusters,
115            cluster_encode=cluster_encode,
116            type_clust=type_clust,
117            type_scaling=type_scaling,
118            lambda1=lambda1,
119            lambda2=lambda2,
120            seed=seed,
121            backend=backend,
122        )
123
124        self.type_fit = "regression"
125
126    def fit(self, X, y, **kwargs):
127        """Fit Ridge model to training data (X, y).
128
129        Args:
130
131            X: {array-like}, shape = [n_samples, n_features]
132                Training vectors, where n_samples is the number
133                of samples and n_features is the number of features.
134
135            y: array-like, shape = [n_samples]
136                Target values.
137
138            **kwargs: additional parameters to be passed to
139                    self.cook_training_set or self.obj.fit
140
141        Returns:
142
143            self: object
144
145        """
146
147        sys_platform = platform.system()
148
149        centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
150
151        n_X, p_X = X.shape
152        n_Z, p_Z = scaled_Z.shape
153
154        if self.n_clusters > 0:
155            if self.encode_clusters == True:
156                n_features = p_X + self.n_clusters
157            else:
158                n_features = p_X + 1
159        else:
160            n_features = p_X
161
162        X_ = scaled_Z[:, 0:n_features]
163        Phi_X_ = scaled_Z[:, n_features:p_Z]
164
165        B = mo.crossprod(x=X_, backend=self.backend) + self.lambda1 * np.diag(
166            np.repeat(1, n_features)
167        )
168        C = mo.crossprod(x=Phi_X_, y=X_, backend=self.backend)
169        D = mo.crossprod(x=Phi_X_, backend=self.backend) + self.lambda2 * np.diag(
170            np.repeat(1, Phi_X_.shape[1])
171        )
172
173        if sys_platform in ("Linux", "Darwin"):
174            B_inv = pinv(B) if self.backend == "cpu" else jpinv(B)
175        else:
176            B_inv = pinv(B)
177
178        W = mo.safe_sparse_dot(a=C, b=B_inv, backend=self.backend)
179        S_mat = D - mo.tcrossprod(x=W, y=C, backend=self.backend)
180
181        if sys_platform in ("Linux", "Darwin"):
182            S_inv = pinv(S_mat) if self.backend == "cpu" else jpinv(S_mat)
183        else:
184            S_inv = pinv(S_mat)
185
186        Y = mo.safe_sparse_dot(a=S_inv, b=W, backend=self.backend)
187        inv = mo.rbind(
188            mo.cbind(
189                x=B_inv + mo.crossprod(x=W, y=Y, backend=self.backend),
190                y=-np.transpose(Y),
191                backend=self.backend,
192            ),
193            mo.cbind(x=-Y, y=S_inv, backend=self.backend),
194            backend=self.backend,
195        )
196
197        self.beta_ = mo.safe_sparse_dot(
198            a=inv,
199            b=mo.crossprod(x=scaled_Z, y=centered_y, backend=self.backend),
200            backend=self.backend,
201        )
202
203        return self
204
205    def predict(self, X, **kwargs):
206        """Predict test data X.
207
208        Args:
209
210            X: {array-like}, shape = [n_samples, n_features]
211                Training vectors, where n_samples is the number
212                of samples and n_features is the number of features.
213
214            **kwargs: additional parameters to be passed to
215                    self.cook_test_set
216
217        Returns:
218
219            model predictions: {array-like}
220
221        """
222
223        if len(X.shape) == 1:
224            n_features = X.shape[0]
225            new_X = mo.rbind(
226                x=X.reshape(1, n_features),
227                y=np.ones(n_features).reshape(1, n_features),
228                backend=self.backend,
229            )
230
231            return (
232                self.y_mean_
233                + mo.safe_sparse_dot(
234                    a=self.cook_test_set(new_X, **kwargs),
235                    b=self.beta_,
236                    backend=self.backend,
237                )
238            )[0]
239
240        return self.y_mean_ + mo.safe_sparse_dot(
241            a=self.cook_test_set(X, **kwargs),
242            b=self.beta_,
243            backend=self.backend,
244        )

Ridge regression with 2 regularization parameters derived from class Ridge

Parameters:

n_hidden_features: int
    number of nodes in the hidden layer

activation_name: str
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'

a: float
    hyperparameter for 'prelu' or 'elu' activation function

nodes_sim: str
    type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
    'uniform'

bias: boolean
    indicates if the hidden layer contains a bias term (True) or not
    (False)

dropout: float
    regularization parameter; (random) percentage of nodes dropped out
    of the training

n_clusters: int
    number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
        no clustering)

cluster_encode: bool
    defines how the variable containing clusters is treated (default is one-hot)
    if `False`, then labels are used, without one-hot encoding

type_clust: str
    type of clustering method: currently k-means ('kmeans') or Gaussian
    Mixture Model ('gmm')

type_scaling: a tuple of 3 strings
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax')

lambda1: float
    regularization parameter on direct link

lambda2: float
    regularization parameter on hidden layer

seed: int
    reproducibility seed for nodes_sim=='uniform'

backend: str
    'cpu' or 'gpu' or 'tpu'

Attributes:

beta_: {array-like}
    regression coefficients

y_mean_: float
    average response
def fit(self, X, y, **kwargs):
126    def fit(self, X, y, **kwargs):
127        """Fit Ridge model to training data (X, y).
128
129        Args:
130
131            X: {array-like}, shape = [n_samples, n_features]
132                Training vectors, where n_samples is the number
133                of samples and n_features is the number of features.
134
135            y: array-like, shape = [n_samples]
136                Target values.
137
138            **kwargs: additional parameters to be passed to
139                    self.cook_training_set or self.obj.fit
140
141        Returns:
142
143            self: object
144
145        """
146
147        sys_platform = platform.system()
148
149        centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
150
151        n_X, p_X = X.shape
152        n_Z, p_Z = scaled_Z.shape
153
154        if self.n_clusters > 0:
155            if self.encode_clusters == True:
156                n_features = p_X + self.n_clusters
157            else:
158                n_features = p_X + 1
159        else:
160            n_features = p_X
161
162        X_ = scaled_Z[:, 0:n_features]
163        Phi_X_ = scaled_Z[:, n_features:p_Z]
164
165        B = mo.crossprod(x=X_, backend=self.backend) + self.lambda1 * np.diag(
166            np.repeat(1, n_features)
167        )
168        C = mo.crossprod(x=Phi_X_, y=X_, backend=self.backend)
169        D = mo.crossprod(x=Phi_X_, backend=self.backend) + self.lambda2 * np.diag(
170            np.repeat(1, Phi_X_.shape[1])
171        )
172
173        if sys_platform in ("Linux", "Darwin"):
174            B_inv = pinv(B) if self.backend == "cpu" else jpinv(B)
175        else:
176            B_inv = pinv(B)
177
178        W = mo.safe_sparse_dot(a=C, b=B_inv, backend=self.backend)
179        S_mat = D - mo.tcrossprod(x=W, y=C, backend=self.backend)
180
181        if sys_platform in ("Linux", "Darwin"):
182            S_inv = pinv(S_mat) if self.backend == "cpu" else jpinv(S_mat)
183        else:
184            S_inv = pinv(S_mat)
185
186        Y = mo.safe_sparse_dot(a=S_inv, b=W, backend=self.backend)
187        inv = mo.rbind(
188            mo.cbind(
189                x=B_inv + mo.crossprod(x=W, y=Y, backend=self.backend),
190                y=-np.transpose(Y),
191                backend=self.backend,
192            ),
193            mo.cbind(x=-Y, y=S_inv, backend=self.backend),
194            backend=self.backend,
195        )
196
197        self.beta_ = mo.safe_sparse_dot(
198            a=inv,
199            b=mo.crossprod(x=scaled_Z, y=centered_y, backend=self.backend),
200            backend=self.backend,
201        )
202
203        return self

Fit Ridge model to training data (X, y).

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

**kwargs: additional parameters to be passed to
        self.cook_training_set or self.obj.fit

Returns:

self: object
def predict(self, X, **kwargs):
205    def predict(self, X, **kwargs):
206        """Predict test data X.
207
208        Args:
209
210            X: {array-like}, shape = [n_samples, n_features]
211                Training vectors, where n_samples is the number
212                of samples and n_features is the number of features.
213
214            **kwargs: additional parameters to be passed to
215                    self.cook_test_set
216
217        Returns:
218
219            model predictions: {array-like}
220
221        """
222
223        if len(X.shape) == 1:
224            n_features = X.shape[0]
225            new_X = mo.rbind(
226                x=X.reshape(1, n_features),
227                y=np.ones(n_features).reshape(1, n_features),
228                backend=self.backend,
229            )
230
231            return (
232                self.y_mean_
233                + mo.safe_sparse_dot(
234                    a=self.cook_test_set(new_X, **kwargs),
235                    b=self.beta_,
236                    backend=self.backend,
237                )
238            )[0]
239
240        return self.y_mean_ + mo.safe_sparse_dot(
241            a=self.cook_test_set(X, **kwargs),
242            b=self.beta_,
243            backend=self.backend,
244        )

Predict test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

model predictions: {array-like}
class Ridge2Classifier(nnetsauce.ridge2.ridge2.Ridge2, sklearn.base.ClassifierMixin):
 18class Ridge2Classifier(Ridge2, ClassifierMixin):
 19    """Multinomial logit classification with 2 regularization parameters
 20
 21    Parameters:
 22
 23        n_hidden_features: int
 24            number of nodes in the hidden layer
 25
 26        activation_name: str
 27            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
 28
 29        a: float
 30            hyperparameter for 'prelu' or 'elu' activation function
 31
 32        nodes_sim: str
 33            type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
 34            'uniform'
 35
 36        bias: boolean
 37            indicates if the hidden layer contains a bias term (True) or not
 38            (False)
 39
 40        dropout: float
 41            regularization parameter; (random) percentage of nodes dropped out
 42            of the training
 43
 44        direct_link: boolean
 45            indicates if the original predictors are included (True) in model's
 46            fitting or not (False)
 47
 48        n_clusters: int
 49            number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
 50                no clustering)
 51
 52        cluster_encode: bool
 53            defines how the variable containing clusters is treated (default is one-hot)
 54            if `False`, then labels are used, without one-hot encoding
 55
 56        type_clust: str
 57            type of clustering method: currently k-means ('kmeans') or Gaussian
 58            Mixture Model ('gmm')
 59
 60        type_scaling: a tuple of 3 strings
 61            scaling methods for inputs, hidden layer, and clustering respectively
 62            (and when relevant).
 63            Currently available: standardization ('std') or MinMax scaling ('minmax')
 64
 65        lambda1: float
 66            regularization parameter on direct link
 67
 68        lambda2: float
 69            regularization parameter on hidden layer
 70
 71        solver: str
 72            optimization function "L-BFGS-B",  "Newton-CG",
 73            "trust-ncg", "L-BFGS-B-lstsq", "Newton-CG-lstsq",
 74            "trust-ncg-lstsq" (see scipy.optimize.minimize)
 75            When using "L-BFGS-B-lstsq", "Newton-CG-lstsq", or "trust-ncg-lstsq",
 76            the initial value for the optimization is set to the least squares solution
 77
 78        seed: int
 79            reproducibility seed for nodes_sim=='uniform'
 80
 81        backend: str
 82            "cpu" or "gpu" or "tpu"
 83
 84    Attributes:
 85
 86        beta_: {array-like}
 87            regression coefficients
 88
 89        classes_: {array-like}
 90            unique classes in the target variable
 91
 92        minloglik_: float
 93            minimum value of the negative log-likelihood
 94
 95    Examples:
 96
 97    See also [https://github.com/Techtonique/nnetsauce/blob/master/examples/ridge_classification.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/ridge_classification.py)
 98
 99    ```python
100    import nnetsauce as ns
101    import numpy as np
102    from sklearn.datasets import load_breast_cancer
103    from sklearn.model_selection import train_test_split
104    from time import time
105
106
107    breast_cancer = load_breast_cancer()
108    X = breast_cancer.data
109    y = breast_cancer.target
110
111    # split data into training test and test set
112    np.random.seed(123)
113    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
114
115    # create the model with nnetsauce
116    fit_obj = ns.Ridge2Classifier(lambda1 = 6.90185578e+04,
117                                lambda2 = 3.17392781e+02,
118                                n_hidden_features=95,
119                                n_clusters=2,
120                                dropout = 3.62817383e-01,
121                                type_clust = "gmm")
122
123    # fit the model on training set
124    start = time()
125    fit_obj.fit(X_train, y_train)
126    print(f"Elapsed {time() - start}")
127
128    # get the accuracy on test set
129    start = time()
130    print(fit_obj.score(X_test, y_test))
131    print(f"Elapsed {time() - start}")
132
133    # get area under the curve on test set (auc)
134    print(fit_obj.score(X_test, y_test, scoring="roc_auc"))
135    ```
136
137
138    """
139
140    # construct the object -----
141
142    def __init__(
143        self,
144        n_hidden_features=5,
145        activation_name="relu",
146        a=0.01,
147        nodes_sim="sobol",
148        bias=True,
149        dropout=0,
150        direct_link=True,
151        n_clusters=2,
152        cluster_encode=True,
153        type_clust="kmeans",
154        type_scaling=("std", "std", "std"),
155        lambda1=0.1,
156        lambda2=0.1,
157        solver="L-BFGS-B",
158        seed=123,
159        backend="cpu",
160    ):
161        super().__init__(
162            n_hidden_features=n_hidden_features,
163            activation_name=activation_name,
164            a=a,
165            nodes_sim=nodes_sim,
166            bias=bias,
167            dropout=dropout,
168            direct_link=direct_link,
169            n_clusters=n_clusters,
170            cluster_encode=cluster_encode,
171            type_clust=type_clust,
172            type_scaling=type_scaling,
173            lambda1=lambda1,
174            lambda2=lambda2,
175            seed=seed,
176            backend=backend,
177        )
178
179        self.type_fit = "classification"
180        self.solver = solver
181        self.beta_ = None
182        self.classes_ = None
183        self.minloglik_ = None
184
185    def loglik(self, X, Y, **kwargs):
186        """Log-likelihood for training data (X, Y).
187
188        Args:
189
190            X: {array-like}, shape = [n_samples, n_features]
191                Training vectors, where n_samples is the number
192                of samples and n_features is the number of features.
193
194            Y: array-like, shape = [n_samples]
195                One-hot encode target values.
196
197            **kwargs: additional parameters to be passed to
198                    self.cook_training_set or self.obj.fit
199
200        Returns:
201
202        """
203
204        def loglik_grad_hess(Y, X, B, XB, hessian=True, **kwargs):
205            # nobs, n_classes
206            n, K = Y.shape
207
208            # total number of covariates
209            p = X.shape[1]
210
211            # initial number of covariates
212            init_p = p - self.n_hidden_features
213
214            max_double = 709.0
215            XB[XB > max_double] = max_double
216            exp_XB = np.exp(XB)
217            probs = exp_XB / exp_XB.sum(axis=1)[:, None]
218
219            # gradient -----
220            # (Y - p) -> (n, K)
221            # X -> (n, p)
222            # (K, n) %*% (n, p) -> (K, p)
223            if hessian is False:
224                grad = (
225                    -mo.safe_sparse_dot(a=(Y - probs).T, b=X, backend=self.backend) / n
226                )
227                grad += self.lambda1 * B[0:init_p, :].sum(axis=0)[:, None]
228                grad += self.lambda2 * B[init_p:p, :].sum(axis=0)[:, None]
229
230                return grad.flatten()
231
232            # hessian -----
233            if hessian is True:
234                Kp = K * p
235                hess = np.zeros((Kp, Kp), float)
236                for k1 in range(K):
237                    x_index = range(k1 * p, (k1 + 1) * p)
238                    for k2 in range(k1, K):
239                        y_index = range(k2 * p, (k2 + 1) * p)
240                        H_sub = (
241                            -mo.safe_sparse_dot(
242                                a=X.T,
243                                b=(probs[:, k1] * probs[:, k2])[:, None] * X,
244                                backend=self.backend,
245                            )
246                            / n
247                        )  # do not store
248                        hess[np.ix_(x_index, y_index)] = hess[
249                            np.ix_(y_index, x_index)
250                        ] = H_sub
251
252                return hess + (self.lambda1 + self.lambda2) * np.identity(Kp)
253
254        # total number of covariates
255        p = X.shape[1]
256
257        # initial number of covariates
258        init_p = p - self.n_hidden_features
259
260        # log-likelihood (1st return)
261        def loglik_func(x):
262            # (p, K)
263            B = x.reshape(Y.shape[1], p).T
264
265            # (n, K)
266            XB = mo.safe_sparse_dot(X, B, backend=self.backend)
267
268            res = -(np.sum(Y * XB, axis=1) - logsumexp(XB)).mean()
269
270            res += (
271                0.5
272                * self.lambda1
273                * mo.squared_norm(B[0:init_p, :], backend=self.backend)
274            )
275            res += (
276                0.5
277                * self.lambda2
278                * mo.squared_norm(B[init_p:p, :], backend=self.backend)
279            )
280
281            return res
282
283        # gradient of log-likelihood
284        def grad_func(x):
285            # (p, K)
286            B = x.reshape(Y.shape[1], p).T
287
288            return loglik_grad_hess(
289                Y=Y,
290                X=X,
291                B=B,
292                XB=mo.safe_sparse_dot(X, B, backend=self.backend),
293                hessian=False,
294                **kwargs
295            )
296
297        # hessian of log-likelihood
298        def hessian_func(x):
299            # (p, K)
300            B = x.reshape(Y.shape[1], p).T
301
302            return loglik_grad_hess(
303                Y=Y,
304                X=X,
305                B=B,
306                XB=mo.safe_sparse_dot(X, B, backend=self.backend),
307                hessian=True,
308                **kwargs
309            )
310
311        return loglik_func, grad_func, hessian_func
312
313    # newton-cg
314    # L-BFGS-B
315    def fit(self, X, y, **kwargs):
316        """Fit Ridge model to training data (X, y).
317
318        for beta: regression coeffs (beta11, ..., beta1p, ..., betaK1, ..., betaKp)
319        for K classes and p covariates.
320
321        Args:
322
323            X: {array-like}, shape = [n_samples, n_features]
324                Training vectors, where n_samples is the number
325                of samples and n_features is the number of features.
326
327            y: array-like, shape = [n_samples]
328                Target values.
329
330            **kwargs: additional parameters to be passed to
331                    self.cook_training_set or self.obj.fit
332
333        Returns:
334
335            self: object
336
337        """
338
339        assert mx.is_factor(y), "y must contain only integers"
340
341        output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
342
343        self.n_classes = len(np.unique(y))
344        self.classes_ = np.unique(y)  # for compatibility with sklearn
345        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
346
347        Y = mo.one_hot_encode2(output_y, self.n_classes)
348
349        # optimize for beta, minimize self.loglik (maximize loglik) -----
350        loglik_func, grad_func, hessian_func = self.loglik(X=scaled_Z, Y=Y)
351
352        if self.solver == "L-BFGS-B":
353            opt = minimize(
354                fun=loglik_func,
355                x0=np.zeros(scaled_Z.shape[1] * self.n_classes),
356                jac=grad_func,
357                method=self.solver,
358            )
359            self.beta_ = opt.x
360            self.minloglik_ = opt.fun
361
362        if self.solver in ("Newton-CG", "trust-ncg"):
363            opt = minimize(
364                fun=loglik_func,
365                x0=np.zeros(scaled_Z.shape[1] * self.n_classes),
366                jac=grad_func,
367                hess=hessian_func,
368                method=self.solver,
369            )
370            self.beta_ = opt.x
371            self.minloglik_ = opt.fun
372
373        if self.solver == "L-BFGS-B-lstsq":
374            opt = minimize(
375                fun=loglik_func,
376                x0=np.linalg.lstsq(scaled_Z, Y, rcond=None)[0].flatten(order="F"),
377                jac=grad_func,
378                method="L-BFGS-B",
379            )
380            self.beta_ = opt.x
381            self.minloglik_ = opt.fun
382
383        if self.solver in "Newton-CG-lstsq":
384            opt = minimize(
385                fun=loglik_func,
386                x0=np.linalg.lstsq(scaled_Z, Y, rcond=None)[0].flatten(order="F"),
387                jac=grad_func,
388                hess=hessian_func,
389                method="Newton-CG",
390            )
391            self.beta_ = opt.x
392            self.minloglik_ = opt.fun
393
394        if self.solver in "trust-ncg-lstsq":
395            opt = minimize(
396                fun=loglik_func,
397                x0=np.linalg.lstsq(scaled_Z, Y, rcond=None)[0].flatten(order="F"),
398                jac=grad_func,
399                hess=hessian_func,
400                method="trust-ncg",
401            )
402            self.beta_ = opt.x
403            self.minloglik_ = opt.fun
404
405        self.classes_ = np.unique(y)
406
407        return self
408
409    def predict(self, X, **kwargs):
410        """Predict test data X.
411
412        Args:
413
414            X: {array-like}, shape = [n_samples, n_features]
415                Training vectors, where n_samples is the number
416                of samples and n_features is the number of features.
417
418            **kwargs: additional parameters to be passed to
419                    self.cook_test_set
420
421        Returns:
422
423            model predictions: {array-like}
424        """
425
426        return np.argmax(self.predict_proba(X, **kwargs), axis=1)
427
428    def predict_proba(self, X, **kwargs):
429        """Predict probabilities for test data X.
430
431        Args:
432
433            X: {array-like}, shape = [n_samples, n_features]
434                Training vectors, where n_samples is the number
435                of samples and n_features is the number of features.
436
437            **kwargs: additional parameters to be passed to
438                    self.cook_test_set
439
440        Returns:
441
442            probability estimates for test data: {array-like}
443
444        """
445        if len(X.shape) == 1:
446            n_features = X.shape[0]
447            new_X = mo.rbind(
448                X.reshape(1, n_features),
449                np.ones(n_features).reshape(1, n_features),
450            )
451
452            Z = self.cook_test_set(new_X, **kwargs)
453
454        else:
455            Z = self.cook_test_set(X, **kwargs)
456
457        ZB = mo.safe_sparse_dot(
458            a=Z,
459            b=self.beta_.reshape(
460                self.n_classes,
461                X.shape[1] + self.n_hidden_features + self.n_clusters,
462            ).T,
463            backend=self.backend,
464        )
465
466        exp_ZB = np.exp(ZB)
467
468        return exp_ZB / exp_ZB.sum(axis=1)[:, None]

Multinomial logit classification with 2 regularization parameters

Parameters:

n_hidden_features: int
    number of nodes in the hidden layer

activation_name: str
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'

a: float
    hyperparameter for 'prelu' or 'elu' activation function

nodes_sim: str
    type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
    'uniform'

bias: boolean
    indicates if the hidden layer contains a bias term (True) or not
    (False)

dropout: float
    regularization parameter; (random) percentage of nodes dropped out
    of the training

direct_link: boolean
    indicates if the original predictors are included (True) in model's
    fitting or not (False)

n_clusters: int
    number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
        no clustering)

cluster_encode: bool
    defines how the variable containing clusters is treated (default is one-hot)
    if `False`, then labels are used, without one-hot encoding

type_clust: str
    type of clustering method: currently k-means ('kmeans') or Gaussian
    Mixture Model ('gmm')

type_scaling: a tuple of 3 strings
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax')

lambda1: float
    regularization parameter on direct link

lambda2: float
    regularization parameter on hidden layer

solver: str
    optimization function "L-BFGS-B",  "Newton-CG",
    "trust-ncg", "L-BFGS-B-lstsq", "Newton-CG-lstsq",
    "trust-ncg-lstsq" (see scipy.optimize.minimize)
    When using "L-BFGS-B-lstsq", "Newton-CG-lstsq", or "trust-ncg-lstsq",
    the initial value for the optimization is set to the least squares solution

seed: int
    reproducibility seed for nodes_sim=='uniform'

backend: str
    "cpu" or "gpu" or "tpu"

Attributes:

beta_: {array-like}
    regression coefficients

classes_: {array-like}
    unique classes in the target variable

minloglik_: float
    minimum value of the negative log-likelihood

Examples:

See also https://github.com/Techtonique/nnetsauce/blob/master/examples/ridge_classification.py

import nnetsauce as ns
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from time import time


breast_cancer = load_breast_cancer()
X = breast_cancer.data
y = breast_cancer.target

# split data into training test and test set
np.random.seed(123)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# create the model with nnetsauce
fit_obj = ns.Ridge2Classifier(lambda1 = 6.90185578e+04,
                            lambda2 = 3.17392781e+02,
                            n_hidden_features=95,
                            n_clusters=2,
                            dropout = 3.62817383e-01,
                            type_clust = "gmm")

# fit the model on training set
start = time()
fit_obj.fit(X_train, y_train)
print(f"Elapsed {time() - start}")

# get the accuracy on test set
start = time()
print(fit_obj.score(X_test, y_test))
print(f"Elapsed {time() - start}")

# get area under the curve on test set (auc)
print(fit_obj.score(X_test, y_test, scoring="roc_auc"))
def fit(self, X, y, **kwargs):
315    def fit(self, X, y, **kwargs):
316        """Fit Ridge model to training data (X, y).
317
318        for beta: regression coeffs (beta11, ..., beta1p, ..., betaK1, ..., betaKp)
319        for K classes and p covariates.
320
321        Args:
322
323            X: {array-like}, shape = [n_samples, n_features]
324                Training vectors, where n_samples is the number
325                of samples and n_features is the number of features.
326
327            y: array-like, shape = [n_samples]
328                Target values.
329
330            **kwargs: additional parameters to be passed to
331                    self.cook_training_set or self.obj.fit
332
333        Returns:
334
335            self: object
336
337        """
338
339        assert mx.is_factor(y), "y must contain only integers"
340
341        output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
342
343        self.n_classes = len(np.unique(y))
344        self.classes_ = np.unique(y)  # for compatibility with sklearn
345        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
346
347        Y = mo.one_hot_encode2(output_y, self.n_classes)
348
349        # optimize for beta, minimize self.loglik (maximize loglik) -----
350        loglik_func, grad_func, hessian_func = self.loglik(X=scaled_Z, Y=Y)
351
352        if self.solver == "L-BFGS-B":
353            opt = minimize(
354                fun=loglik_func,
355                x0=np.zeros(scaled_Z.shape[1] * self.n_classes),
356                jac=grad_func,
357                method=self.solver,
358            )
359            self.beta_ = opt.x
360            self.minloglik_ = opt.fun
361
362        if self.solver in ("Newton-CG", "trust-ncg"):
363            opt = minimize(
364                fun=loglik_func,
365                x0=np.zeros(scaled_Z.shape[1] * self.n_classes),
366                jac=grad_func,
367                hess=hessian_func,
368                method=self.solver,
369            )
370            self.beta_ = opt.x
371            self.minloglik_ = opt.fun
372
373        if self.solver == "L-BFGS-B-lstsq":
374            opt = minimize(
375                fun=loglik_func,
376                x0=np.linalg.lstsq(scaled_Z, Y, rcond=None)[0].flatten(order="F"),
377                jac=grad_func,
378                method="L-BFGS-B",
379            )
380            self.beta_ = opt.x
381            self.minloglik_ = opt.fun
382
383        if self.solver in "Newton-CG-lstsq":
384            opt = minimize(
385                fun=loglik_func,
386                x0=np.linalg.lstsq(scaled_Z, Y, rcond=None)[0].flatten(order="F"),
387                jac=grad_func,
388                hess=hessian_func,
389                method="Newton-CG",
390            )
391            self.beta_ = opt.x
392            self.minloglik_ = opt.fun
393
394        if self.solver in "trust-ncg-lstsq":
395            opt = minimize(
396                fun=loglik_func,
397                x0=np.linalg.lstsq(scaled_Z, Y, rcond=None)[0].flatten(order="F"),
398                jac=grad_func,
399                hess=hessian_func,
400                method="trust-ncg",
401            )
402            self.beta_ = opt.x
403            self.minloglik_ = opt.fun
404
405        self.classes_ = np.unique(y)
406
407        return self

Fit Ridge model to training data (X, y).

for beta: regression coeffs (beta11, ..., beta1p, ..., betaK1, ..., betaKp) for K classes and p covariates.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

**kwargs: additional parameters to be passed to
        self.cook_training_set or self.obj.fit

Returns:

self: object
def predict(self, X, **kwargs):
409    def predict(self, X, **kwargs):
410        """Predict test data X.
411
412        Args:
413
414            X: {array-like}, shape = [n_samples, n_features]
415                Training vectors, where n_samples is the number
416                of samples and n_features is the number of features.
417
418            **kwargs: additional parameters to be passed to
419                    self.cook_test_set
420
421        Returns:
422
423            model predictions: {array-like}
424        """
425
426        return np.argmax(self.predict_proba(X, **kwargs), axis=1)

Predict test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

model predictions: {array-like}
def predict_proba(self, X, **kwargs):
428    def predict_proba(self, X, **kwargs):
429        """Predict probabilities for test data X.
430
431        Args:
432
433            X: {array-like}, shape = [n_samples, n_features]
434                Training vectors, where n_samples is the number
435                of samples and n_features is the number of features.
436
437            **kwargs: additional parameters to be passed to
438                    self.cook_test_set
439
440        Returns:
441
442            probability estimates for test data: {array-like}
443
444        """
445        if len(X.shape) == 1:
446            n_features = X.shape[0]
447            new_X = mo.rbind(
448                X.reshape(1, n_features),
449                np.ones(n_features).reshape(1, n_features),
450            )
451
452            Z = self.cook_test_set(new_X, **kwargs)
453
454        else:
455            Z = self.cook_test_set(X, **kwargs)
456
457        ZB = mo.safe_sparse_dot(
458            a=Z,
459            b=self.beta_.reshape(
460                self.n_classes,
461                X.shape[1] + self.n_hidden_features + self.n_clusters,
462            ).T,
463            backend=self.backend,
464        )
465
466        exp_ZB = np.exp(ZB)
467
468        return exp_ZB / exp_ZB.sum(axis=1)[:, None]

Predict probabilities for test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

probability estimates for test data: {array-like}
class Ridge2MultitaskClassifier(nnetsauce.ridge2.ridge2.Ridge2, sklearn.base.ClassifierMixin):
 23class Ridge2MultitaskClassifier(Ridge2, ClassifierMixin):
 24    """Multitask Ridge classification with 2 regularization parameters
 25
 26    Parameters:
 27
 28        n_hidden_features: int
 29            number of nodes in the hidden layer
 30
 31        activation_name: str
 32            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
 33
 34        a: float
 35            hyperparameter for 'prelu' or 'elu' activation function
 36
 37        nodes_sim: str
 38            type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
 39            'uniform'
 40
 41        bias: boolean
 42            indicates if the hidden layer contains a bias term (True) or not
 43            (False)
 44
 45        dropout: float
 46            regularization parameter; (random) percentage of nodes dropped out
 47            of the training
 48
 49        n_clusters: int
 50            number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
 51                no clustering)
 52
 53        cluster_encode: bool
 54            defines how the variable containing clusters is treated (default is one-hot)
 55            if `False`, then labels are used, without one-hot encoding
 56
 57        type_clust: str
 58            type of clustering method: currently k-means ('kmeans') or Gaussian
 59            Mixture Model ('gmm')
 60
 61        type_scaling: a tuple of 3 strings
 62            scaling methods for inputs, hidden layer, and clustering respectively
 63            (and when relevant).
 64            Currently available: standardization ('std') or MinMax scaling ('minmax')
 65
 66        lambda1: float
 67            regularization parameter on direct link
 68
 69        lambda2: float
 70            regularization parameter on hidden layer
 71
 72        seed: int
 73            reproducibility seed for nodes_sim=='uniform'
 74
 75        backend: str
 76            "cpu" or "gpu" or "tpu"
 77
 78    Attributes:
 79
 80        beta_: {array-like}
 81            regression coefficients
 82
 83    Examples:
 84
 85    See also [https://github.com/Techtonique/nnetsauce/blob/master/examples/ridgemtask_classification.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/ridgemtask_classification.py)
 86
 87    ```python
 88    import nnetsauce as ns
 89    import numpy as np
 90    from sklearn.datasets import load_breast_cancer
 91    from sklearn.model_selection import train_test_split
 92    from sklearn import metrics
 93    from time import time
 94
 95    breast_cancer = load_breast_cancer()
 96    Z = breast_cancer.data
 97    t = breast_cancer.target
 98    np.random.seed(123)
 99    X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2)
100
101    fit_obj = ns.Ridge2MultitaskClassifier(n_hidden_features=int(9.83730469e+01),
102                                    dropout=4.31054687e-01,
103                                    n_clusters=int(1.71484375e+00),
104                                    lambda1=1.24023438e+01, lambda2=7.30263672e+03)
105
106    start = time()
107    fit_obj.fit(X_train, y_train)
108    print(f"Elapsed {time() - start}")
109
110    print(fit_obj.score(X_test, y_test))
111    print(fit_obj.score(X_test, y_test, scoring="roc_auc"))
112
113    start = time()
114    preds = fit_obj.predict(X_test)
115    print(f"Elapsed {time() - start}")
116    print(metrics.classification_report(preds, y_test))
117    ```
118
119    """
120
121    # construct the object -----
122
123    def __init__(
124        self,
125        n_hidden_features=5,
126        activation_name="relu",
127        a=0.01,
128        nodes_sim="sobol",
129        bias=True,
130        dropout=0,
131        n_clusters=2,
132        cluster_encode=True,
133        type_clust="kmeans",
134        type_scaling=("std", "std", "std"),
135        lambda1=0.1,
136        lambda2=0.1,
137        seed=123,
138        backend="cpu",
139    ):
140        super().__init__(
141            n_hidden_features=n_hidden_features,
142            activation_name=activation_name,
143            a=a,
144            nodes_sim=nodes_sim,
145            bias=bias,
146            dropout=dropout,
147            n_clusters=n_clusters,
148            cluster_encode=cluster_encode,
149            type_clust=type_clust,
150            type_scaling=type_scaling,
151            lambda1=lambda1,
152            lambda2=lambda2,
153            seed=seed,
154            backend=backend,
155        )
156
157        self.type_fit = "classification"
158
159    def fit(self, X, y, **kwargs):
160        """Fit Ridge model to training data (X, y).
161
162        Args:
163
164            X: {array-like}, shape = [n_samples, n_features]
165                Training vectors, where n_samples is the number
166                of samples and n_features is the number of features.
167
168            y: array-like, shape = [n_samples]
169                Target values.
170
171            **kwargs: additional parameters to be passed to
172                    self.cook_training_set or self.obj.fit
173
174        Returns:
175
176            self: object
177
178        """
179
180        sys_platform = platform.system()
181
182        assert mx.is_factor(y), "y must contain only integers"
183
184        self.classes_ = np.unique(y)  # for compatibility with sklearn
185        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
186
187        output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
188
189        n_X, p_X = X.shape
190        n_Z, p_Z = scaled_Z.shape
191
192        self.n_classes = len(np.unique(y))
193
194        # multitask response
195        Y = mo.one_hot_encode2(output_y, self.n_classes)
196
197        if self.n_clusters > 0:
198            if self.encode_clusters == True:
199                n_features = p_X + self.n_clusters
200            else:
201                n_features = p_X + 1
202        else:
203            n_features = p_X
204
205        X_ = scaled_Z[:, 0:n_features]
206        Phi_X_ = scaled_Z[:, n_features:p_Z]
207
208        B = mo.crossprod(x=X_, backend=self.backend) + self.lambda1 * np.diag(
209            np.repeat(1, X_.shape[1])
210        )
211        C = mo.crossprod(x=Phi_X_, y=X_, backend=self.backend)
212        D = mo.crossprod(x=Phi_X_, backend=self.backend) + self.lambda2 * np.diag(
213            np.repeat(1, Phi_X_.shape[1])
214        )
215
216        if sys_platform in ("Linux", "Darwin"):
217            B_inv = pinv(B) if self.backend == "cpu" else jpinv(B)
218        else:
219            B_inv = pinv(B)
220
221        W = mo.safe_sparse_dot(a=C, b=B_inv, backend=self.backend)
222        S_mat = D - mo.tcrossprod(x=W, y=C, backend=self.backend)
223
224        if sys_platform in ("Linux", "Darwin"):
225            S_inv = pinv(S_mat) if self.backend == "cpu" else jpinv(S_mat)
226        else:
227            S_inv = pinv(S_mat)
228
229        Y2 = mo.safe_sparse_dot(a=S_inv, b=W, backend=self.backend)
230        inv = mo.rbind(
231            mo.cbind(
232                x=B_inv + mo.crossprod(x=W, y=Y2, backend=self.backend),
233                y=-np.transpose(Y2),
234                backend=self.backend,
235            ),
236            mo.cbind(x=-Y2, y=S_inv, backend=self.backend),
237            backend=self.backend,
238        )
239
240        self.beta_ = mo.safe_sparse_dot(
241            a=inv,
242            b=mo.crossprod(x=scaled_Z, y=Y, backend=self.backend),
243            backend=self.backend,
244        )
245        self.classes_ = np.unique(y)
246        return self
247
248    def predict(self, X, **kwargs):
249        """Predict test data X.
250
251        Args:
252
253            X: {array-like}, shape = [n_samples, n_features]
254                Training vectors, where n_samples is the number
255                of samples and n_features is the number of features.
256
257            **kwargs: additional parameters to be passed to
258                    self.cook_test_set
259
260        Returns:
261
262            model predictions: {array-like}
263
264        """
265
266        return np.argmax(self.predict_proba(X, **kwargs), axis=1)
267
268    def predict_proba(self, X, **kwargs):
269        """Predict probabilities for test data X.
270
271        Args:
272
273            X: {array-like}, shape = [n_samples, n_features]
274                Training vectors, where n_samples is the number
275                of samples and n_features is the number of features.
276
277            **kwargs: additional parameters to be passed to
278                    self.cook_test_set
279
280        Returns:
281
282            probability estimates for test data: {array-like}
283
284        """
285
286        if len(X.shape) == 1:
287            n_features = X.shape[0]
288            new_X = mo.rbind(
289                x=X.reshape(1, n_features),
290                y=np.ones(n_features).reshape(1, n_features),
291                backend=self.backend,
292            )
293
294            Z = self.cook_test_set(new_X, **kwargs)
295
296        else:
297            Z = self.cook_test_set(X, **kwargs)
298
299        ZB = mo.safe_sparse_dot(a=Z, b=self.beta_, backend=self.backend)
300
301        exp_ZB = np.exp(ZB)
302
303        return exp_ZB / exp_ZB.sum(axis=1)[:, None]
304
305    def score(self, X, y, scoring=None):
306        """Scoring function for classification.
307
308        Args:
309
310            X: {array-like}, shape = [n_samples, n_features]
311                Training vectors, where n_samples is the number
312                of samples and n_features is the number of features.
313
314            y: array-like, shape = [n_samples]
315                Target values.
316
317            scoring: str
318                scoring method (default is accuracy)
319
320        Returns:
321
322            score: float
323        """
324
325        if scoring is None:
326            scoring = "accuracy"
327
328        if scoring == "accuracy":
329            return skm2.accuracy_score(y, self.predict(X))
330
331        if scoring == "f1":
332            return skm2.f1_score(y, self.predict(X))
333
334        if scoring == "precision":
335            return skm2.precision_score(y, self.predict(X))
336
337        if scoring == "recall":
338            return skm2.recall_score(y, self.predict(X))
339
340        if scoring == "roc_auc":
341            return skm2.roc_auc_score(y, self.predict(X))
342
343        if scoring == "log_loss":
344            return skm2.log_loss(y, self.predict_proba(X))
345
346        if scoring == "balanced_accuracy":
347            return skm2.balanced_accuracy_score(y, self.predict(X))
348
349        if scoring == "average_precision":
350            return skm2.average_precision_score(y, self.predict(X))
351
352        if scoring == "neg_brier_score":
353            return -skm2.brier_score_loss(y, self.predict_proba(X))
354
355        if scoring == "neg_log_loss":
356            return -skm2.log_loss(y, self.predict_proba(X))

Multitask Ridge classification with 2 regularization parameters

Parameters:

n_hidden_features: int
    number of nodes in the hidden layer

activation_name: str
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'

a: float
    hyperparameter for 'prelu' or 'elu' activation function

nodes_sim: str
    type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
    'uniform'

bias: boolean
    indicates if the hidden layer contains a bias term (True) or not
    (False)

dropout: float
    regularization parameter; (random) percentage of nodes dropped out
    of the training

n_clusters: int
    number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
        no clustering)

cluster_encode: bool
    defines how the variable containing clusters is treated (default is one-hot)
    if `False`, then labels are used, without one-hot encoding

type_clust: str
    type of clustering method: currently k-means ('kmeans') or Gaussian
    Mixture Model ('gmm')

type_scaling: a tuple of 3 strings
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax')

lambda1: float
    regularization parameter on direct link

lambda2: float
    regularization parameter on hidden layer

seed: int
    reproducibility seed for nodes_sim=='uniform'

backend: str
    "cpu" or "gpu" or "tpu"

Attributes:

beta_: {array-like}
    regression coefficients

Examples:

See also https://github.com/Techtonique/nnetsauce/blob/master/examples/ridgemtask_classification.py

import nnetsauce as ns
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn import metrics
from time import time

breast_cancer = load_breast_cancer()
Z = breast_cancer.data
t = breast_cancer.target
np.random.seed(123)
X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2)

fit_obj = ns.Ridge2MultitaskClassifier(n_hidden_features=int(9.83730469e+01),
                                dropout=4.31054687e-01,
                                n_clusters=int(1.71484375e+00),
                                lambda1=1.24023438e+01, lambda2=7.30263672e+03)

start = time()
fit_obj.fit(X_train, y_train)
print(f"Elapsed {time() - start}")

print(fit_obj.score(X_test, y_test))
print(fit_obj.score(X_test, y_test, scoring="roc_auc"))

start = time()
preds = fit_obj.predict(X_test)
print(f"Elapsed {time() - start}")
print(metrics.classification_report(preds, y_test))
def fit(self, X, y, **kwargs):
159    def fit(self, X, y, **kwargs):
160        """Fit Ridge model to training data (X, y).
161
162        Args:
163
164            X: {array-like}, shape = [n_samples, n_features]
165                Training vectors, where n_samples is the number
166                of samples and n_features is the number of features.
167
168            y: array-like, shape = [n_samples]
169                Target values.
170
171            **kwargs: additional parameters to be passed to
172                    self.cook_training_set or self.obj.fit
173
174        Returns:
175
176            self: object
177
178        """
179
180        sys_platform = platform.system()
181
182        assert mx.is_factor(y), "y must contain only integers"
183
184        self.classes_ = np.unique(y)  # for compatibility with sklearn
185        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
186
187        output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
188
189        n_X, p_X = X.shape
190        n_Z, p_Z = scaled_Z.shape
191
192        self.n_classes = len(np.unique(y))
193
194        # multitask response
195        Y = mo.one_hot_encode2(output_y, self.n_classes)
196
197        if self.n_clusters > 0:
198            if self.encode_clusters == True:
199                n_features = p_X + self.n_clusters
200            else:
201                n_features = p_X + 1
202        else:
203            n_features = p_X
204
205        X_ = scaled_Z[:, 0:n_features]
206        Phi_X_ = scaled_Z[:, n_features:p_Z]
207
208        B = mo.crossprod(x=X_, backend=self.backend) + self.lambda1 * np.diag(
209            np.repeat(1, X_.shape[1])
210        )
211        C = mo.crossprod(x=Phi_X_, y=X_, backend=self.backend)
212        D = mo.crossprod(x=Phi_X_, backend=self.backend) + self.lambda2 * np.diag(
213            np.repeat(1, Phi_X_.shape[1])
214        )
215
216        if sys_platform in ("Linux", "Darwin"):
217            B_inv = pinv(B) if self.backend == "cpu" else jpinv(B)
218        else:
219            B_inv = pinv(B)
220
221        W = mo.safe_sparse_dot(a=C, b=B_inv, backend=self.backend)
222        S_mat = D - mo.tcrossprod(x=W, y=C, backend=self.backend)
223
224        if sys_platform in ("Linux", "Darwin"):
225            S_inv = pinv(S_mat) if self.backend == "cpu" else jpinv(S_mat)
226        else:
227            S_inv = pinv(S_mat)
228
229        Y2 = mo.safe_sparse_dot(a=S_inv, b=W, backend=self.backend)
230        inv = mo.rbind(
231            mo.cbind(
232                x=B_inv + mo.crossprod(x=W, y=Y2, backend=self.backend),
233                y=-np.transpose(Y2),
234                backend=self.backend,
235            ),
236            mo.cbind(x=-Y2, y=S_inv, backend=self.backend),
237            backend=self.backend,
238        )
239
240        self.beta_ = mo.safe_sparse_dot(
241            a=inv,
242            b=mo.crossprod(x=scaled_Z, y=Y, backend=self.backend),
243            backend=self.backend,
244        )
245        self.classes_ = np.unique(y)
246        return self

Fit Ridge model to training data (X, y).

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

**kwargs: additional parameters to be passed to
        self.cook_training_set or self.obj.fit

Returns:

self: object
def predict(self, X, **kwargs):
248    def predict(self, X, **kwargs):
249        """Predict test data X.
250
251        Args:
252
253            X: {array-like}, shape = [n_samples, n_features]
254                Training vectors, where n_samples is the number
255                of samples and n_features is the number of features.
256
257            **kwargs: additional parameters to be passed to
258                    self.cook_test_set
259
260        Returns:
261
262            model predictions: {array-like}
263
264        """
265
266        return np.argmax(self.predict_proba(X, **kwargs), axis=1)

Predict test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

model predictions: {array-like}
def predict_proba(self, X, **kwargs):
268    def predict_proba(self, X, **kwargs):
269        """Predict probabilities for test data X.
270
271        Args:
272
273            X: {array-like}, shape = [n_samples, n_features]
274                Training vectors, where n_samples is the number
275                of samples and n_features is the number of features.
276
277            **kwargs: additional parameters to be passed to
278                    self.cook_test_set
279
280        Returns:
281
282            probability estimates for test data: {array-like}
283
284        """
285
286        if len(X.shape) == 1:
287            n_features = X.shape[0]
288            new_X = mo.rbind(
289                x=X.reshape(1, n_features),
290                y=np.ones(n_features).reshape(1, n_features),
291                backend=self.backend,
292            )
293
294            Z = self.cook_test_set(new_X, **kwargs)
295
296        else:
297            Z = self.cook_test_set(X, **kwargs)
298
299        ZB = mo.safe_sparse_dot(a=Z, b=self.beta_, backend=self.backend)
300
301        exp_ZB = np.exp(ZB)
302
303        return exp_ZB / exp_ZB.sum(axis=1)[:, None]

Predict probabilities for test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

probability estimates for test data: {array-like}
def score(self, X, y, scoring=None):
305    def score(self, X, y, scoring=None):
306        """Scoring function for classification.
307
308        Args:
309
310            X: {array-like}, shape = [n_samples, n_features]
311                Training vectors, where n_samples is the number
312                of samples and n_features is the number of features.
313
314            y: array-like, shape = [n_samples]
315                Target values.
316
317            scoring: str
318                scoring method (default is accuracy)
319
320        Returns:
321
322            score: float
323        """
324
325        if scoring is None:
326            scoring = "accuracy"
327
328        if scoring == "accuracy":
329            return skm2.accuracy_score(y, self.predict(X))
330
331        if scoring == "f1":
332            return skm2.f1_score(y, self.predict(X))
333
334        if scoring == "precision":
335            return skm2.precision_score(y, self.predict(X))
336
337        if scoring == "recall":
338            return skm2.recall_score(y, self.predict(X))
339
340        if scoring == "roc_auc":
341            return skm2.roc_auc_score(y, self.predict(X))
342
343        if scoring == "log_loss":
344            return skm2.log_loss(y, self.predict_proba(X))
345
346        if scoring == "balanced_accuracy":
347            return skm2.balanced_accuracy_score(y, self.predict(X))
348
349        if scoring == "average_precision":
350            return skm2.average_precision_score(y, self.predict(X))
351
352        if scoring == "neg_brier_score":
353            return -skm2.brier_score_loss(y, self.predict_proba(X))
354
355        if scoring == "neg_log_loss":
356            return -skm2.log_loss(y, self.predict_proba(X))

Scoring function for classification.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

scoring: str
    scoring method (default is accuracy)

Returns:

score: float
class SubSampler:
 6class SubSampler:
 7    """Subsampling class.
 8
 9    Attributes:
10
11       y: array-like, shape = [n_samples]
12           Target values.
13
14       row_sample: double
15           subsampling fraction
16
17       n_samples: int
18            subsampling by using the number of rows (supersedes row_sample)
19
20       seed: int
21           reproductibility seed
22
23       n_jobs: int
24            number of jobs to run in parallel
25
26       verbose: bool
27            print progress messages and bars
28    """
29
30    def __init__(
31        self,
32        y,
33        row_sample=0.8,
34        n_samples=None,
35        seed=123,
36        n_jobs=None,
37        verbose=False,
38    ):
39        self.y = y
40        self.n_samples = n_samples
41        if self.n_samples is None:
42            assert (
43                row_sample < 1 and row_sample >= 0
44            ), "'row_sample' must be provided, plus < 1 and >= 0"
45            self.row_sample = row_sample
46        else:
47            assert self.n_samples < len(y), "'n_samples' must be < len(y)"
48            self.row_sample = self.n_samples / len(y)
49        self.seed = seed
50        self.indices = None
51        self.n_jobs = n_jobs
52        self.verbose = verbose
53
54    def subsample(self):
55        """Returns indices of subsampled input data.
56
57        Examples:
58
59        <ul>
60            <li> <a href="https://github.com/Techtonique/nnetsauce/blob/master/nnetsauce/demo/thierrymoudiki_20240105_subsampling.ipynb">20240105_subsampling.ipynb</a> </li>
61            <li> <a href="https://github.com/Techtonique/nnetsauce/blob/master/nnetsauce/demo/thierrymoudiki_20240131_subsampling_nsamples.ipynb">20240131_subsampling_nsamples.ipynb</a> </li>
62        </ul>
63
64        """
65        self.indices = dosubsample(
66            y=self.y,
67            row_sample=self.row_sample,
68            seed=self.seed,
69            n_jobs=self.n_jobs,
70            verbose=self.verbose,
71        )
72        return self.indices

Subsampling class.

Attributes:

y: array-like, shape = [n_samples] Target values.

row_sample: double subsampling fraction

n_samples: int subsampling by using the number of rows (supersedes row_sample)

seed: int reproductibility seed

n_jobs: int number of jobs to run in parallel

verbose: bool print progress messages and bars

def subsample(self):
54    def subsample(self):
55        """Returns indices of subsampled input data.
56
57        Examples:
58
59        <ul>
60            <li> <a href="https://github.com/Techtonique/nnetsauce/blob/master/nnetsauce/demo/thierrymoudiki_20240105_subsampling.ipynb">20240105_subsampling.ipynb</a> </li>
61            <li> <a href="https://github.com/Techtonique/nnetsauce/blob/master/nnetsauce/demo/thierrymoudiki_20240131_subsampling_nsamples.ipynb">20240131_subsampling_nsamples.ipynb</a> </li>
62        </ul>
63
64        """
65        self.indices = dosubsample(
66            y=self.y,
67            row_sample=self.row_sample,
68            seed=self.seed,
69            n_jobs=self.n_jobs,
70            verbose=self.verbose,
71        )
72        return self.indices

Returns indices of subsampled input data.

Examples: