nnetsauce

 1from .base.base import Base
 2from .base.baseRegressor import BaseRegressor
 3from .boosting.adaBoostClassifier import AdaBoostClassifier
 4from .custom.customClassifier import CustomClassifier
 5from .custom.customRegressor import CustomRegressor
 6from .custom.customBackpropRegressor import CustomBackPropRegressor
 7from .datasets import Downloader
 8from .deep.deepClassifier import DeepClassifier
 9from .deep.deepRegressor import DeepRegressor
10from .deep.deepMTS import DeepMTS
11from .glm.glmClassifier import GLMClassifier
12from .glm.glmRegressor import GLMRegressor
13from .kernel.kernel import KernelRidge
14from .lazypredict.lazydeepClassifier import LazyDeepClassifier, LazyClassifier
15from .lazypredict.lazydeepRegressor import LazyDeepRegressor, LazyRegressor
16from .lazypredict.lazydeepClassifier import LazyDeepClassifier
17from .lazypredict.lazydeepRegressor import LazyDeepRegressor
18from .lazypredict.lazydeepMTS import LazyDeepMTS, LazyMTS
19from .mts.mts import MTS
20from .mts.mlarch import MLARCH
21from .mts.classical import ClassicalMTS
22from .multitask.multitaskClassifier import MultitaskClassifier
23from .multitask.simplemultitaskClassifier import SimpleMultitaskClassifier
24from .neuralnet.neuralnetregression import NeuralNetRegressor
25from .neuralnet.neuralnetclassification import NeuralNetClassifier
26from .optimizers.optimizer import Optimizer
27from .predictioninterval import PredictionInterval
28from .quantile.quantileregression import QuantileRegressor
29from .quantile.quantileclassification import QuantileClassifier
30from .randombag.randomBagClassifier import RandomBagClassifier
31from .randombag.randomBagRegressor import RandomBagRegressor
32from .ridge.ridge import RidgeRegressor
33from .ridge2.ridge2Classifier import Ridge2Classifier
34from .ridge2.ridge2Regressor import Ridge2Regressor
35from .ridge2.ridge2MultitaskClassifier import Ridge2MultitaskClassifier
36from .ridge2.elasticNet2Regressor import ElasticNet2Regressor
37from .rvfl.bayesianrvflRegressor import BayesianRVFLRegressor
38from .rvfl.bayesianrvfl2Regressor import BayesianRVFL2Regressor
39from .sampling import SubSampler
40from .updater import RegressorUpdater, ClassifierUpdater
41from .votingregressor import MedianVotingRegressor
42
43__all__ = [
44    "AdaBoostClassifier",
45    "Base",
46    "BaseRegressor",
47    "BayesianRVFLRegressor",
48    "BayesianRVFL2Regressor",
49    "ClassicalMTS",
50    "CustomClassifier",
51    "CustomRegressor",
52    "CustomBackPropRegressor",
53    "DeepClassifier",
54    "DeepRegressor",
55    "DeepMTS",
56    "Downloader",
57    "ElasticNet2Regressor",
58    "GLMClassifier",
59    "GLMRegressor",
60    "KernelRidge",
61    "LazyClassifier",
62    "LazyRegressor",
63    "LazyDeepClassifier",
64    "LazyDeepRegressor",
65    "LazyMTS",
66    "LazyDeepMTS",
67    "MLARCH",
68    "MedianVotingRegressor",
69    "MTS",
70    "MultitaskClassifier",
71    "NeuralNetRegressor",
72    "NeuralNetClassifier",
73    "PredictionInterval",
74    "SimpleMultitaskClassifier",
75    "Optimizer",
76    "QuantileRegressor",
77    "QuantileClassifier",
78    "RandomBagRegressor",
79    "RandomBagClassifier",
80    "RegressorUpdater",
81    "ClassifierUpdater",
82    "RidgeRegressor",
83    "Ridge2Regressor",
84    "Ridge2Classifier",
85    "Ridge2MultitaskClassifier",
86    "SubSampler",
87]
class AdaBoostClassifier(nnetsauce.boosting.bst.Boosting, sklearn.base.ClassifierMixin):
 21class AdaBoostClassifier(Boosting, ClassifierMixin):
 22    """AdaBoost Classification (SAMME) model class derived from class Boosting
 23
 24    Parameters:
 25
 26        obj: object
 27            any object containing a method fit (obj.fit()) and a method predict
 28            (obj.predict())
 29
 30        n_estimators: int
 31            number of boosting iterations
 32
 33        learning_rate: float
 34            learning rate of the boosting procedure
 35
 36        n_hidden_features: int
 37            number of nodes in the hidden layer
 38
 39        reg_lambda: float
 40            regularization parameter for weights
 41
 42        reg_alpha: float
 43            controls compromize between l1 and l2 norm of weights
 44
 45        activation_name: str
 46            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
 47
 48        a: float
 49            hyperparameter for 'prelu' or 'elu' activation function
 50
 51        nodes_sim: str
 52            type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
 53            'uniform'
 54
 55        bias: boolean
 56            indicates if the hidden layer contains a bias term (True) or not
 57            (False)
 58
 59        dropout: float
 60            regularization parameter; (random) percentage of nodes dropped out
 61            of the training
 62
 63        direct_link: boolean
 64            indicates if the original predictors are included (True) in model's
 65            fitting or not (False)
 66
 67        n_clusters: int
 68            number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
 69                no clustering)
 70
 71        cluster_encode: bool
 72            defines how the variable containing clusters is treated (default is one-hot)
 73            if `False`, then labels are used, without one-hot encoding
 74
 75        type_clust: str
 76            type of clustering method: currently k-means ('kmeans') or Gaussian
 77            Mixture Model ('gmm')
 78
 79        type_scaling: a tuple of 3 strings
 80            scaling methods for inputs, hidden layer, and clustering respectively
 81            (and when relevant).
 82            Currently available: standardization ('std') or MinMax scaling ('minmax')
 83
 84        col_sample: float
 85            percentage of covariates randomly chosen for training
 86
 87        row_sample: float
 88            percentage of rows chosen for training, by stratified bootstrapping
 89
 90        seed: int
 91            reproducibility seed for nodes_sim=='uniform'
 92
 93        verbose: int
 94            0 for no output, 1 for a progress bar (default is 1)
 95
 96        method: str
 97            type of Adaboost method, 'SAMME' (discrete) or 'SAMME.R' (real)
 98
 99        backend: str
100            "cpu" or "gpu" or "tpu"
101
102    Attributes:
103
104        alpha_: list
105            AdaBoost coefficients alpha_m
106
107        base_learners_: dict
108            a dictionary containing the base learners
109
110    Examples:
111
112    See also [https://github.com/Techtonique/nnetsauce/blob/master/examples/adaboost_classification.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/adaboost_classification.py)
113
114    ```python
115    import nnetsauce as ns
116    import numpy as np
117    from sklearn.datasets import load_breast_cancer
118    from sklearn.linear_model import LogisticRegression
119    from sklearn.model_selection import train_test_split
120    from sklearn import metrics
121    from time import time
122
123    breast_cancer = load_breast_cancer()
124    Z = breast_cancer.data
125    t = breast_cancer.target
126    np.random.seed(123)
127    X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2)
128
129    # SAMME.R
130    clf = LogisticRegression(solver='liblinear', multi_class = 'ovr',
131                            random_state=123)
132    fit_obj = ns.AdaBoostClassifier(clf,
133                                    n_hidden_features=int(11.22338867),
134                                    direct_link=True,
135                                    n_estimators=250, learning_rate=0.01126343,
136                                    col_sample=0.72684326, row_sample=0.86429443,
137                                    dropout=0.63078613, n_clusters=2,
138                                    type_clust="gmm",
139                                    verbose=1, seed = 123,
140                                    method="SAMME.R")
141
142    start = time()
143    fit_obj.fit(X_train, y_train)
144    print(f"Elapsed {time() - start}")
145
146    start = time()
147    print(fit_obj.score(X_test, y_test))
148    print(f"Elapsed {time() - start}")
149
150    preds = fit_obj.predict(X_test)
151
152    print(metrics.classification_report(preds, y_test))
153
154    ```
155
156    """
157
158    # construct the object -----
159    _estimator_type = "classifier"
160
161    def __init__(
162        self,
163        obj,
164        n_estimators=10,
165        learning_rate=0.1,
166        n_hidden_features=1,
167        reg_lambda=0,
168        reg_alpha=0.5,
169        activation_name="relu",
170        a=0.01,
171        nodes_sim="sobol",
172        bias=True,
173        dropout=0,
174        direct_link=False,
175        n_clusters=2,
176        cluster_encode=True,
177        type_clust="kmeans",
178        type_scaling=("std", "std", "std"),
179        col_sample=1,
180        row_sample=1,
181        seed=123,
182        verbose=1,
183        method="SAMME",
184        backend="cpu",
185    ):
186        self.type_fit = "classification"
187        self.verbose = verbose
188        self.method = method
189        self.reg_lambda = reg_lambda
190        self.reg_alpha = reg_alpha
191
192        super().__init__(
193            obj=obj,
194            n_estimators=n_estimators,
195            learning_rate=learning_rate,
196            n_hidden_features=n_hidden_features,
197            activation_name=activation_name,
198            a=a,
199            nodes_sim=nodes_sim,
200            bias=bias,
201            dropout=dropout,
202            direct_link=direct_link,
203            n_clusters=n_clusters,
204            cluster_encode=cluster_encode,
205            type_clust=type_clust,
206            type_scaling=type_scaling,
207            col_sample=col_sample,
208            row_sample=row_sample,
209            seed=seed,
210            backend=backend,
211        )
212
213        self.alpha_ = []
214        self.base_learners_ = dict.fromkeys(range(n_estimators))
215
216    def fit(self, X, y, sample_weight=None, **kwargs):
217        """Fit Boosting model to training data (X, y).
218
219        Parameters:
220
221            X: {array-like}, shape = [n_samples, n_features]
222                Training vectors, where n_samples is the number
223                of samples and n_features is the number of features.
224
225            y: array-like, shape = [n_samples]
226                Target values.
227
228            **kwargs: additional parameters to be passed to
229                    self.cook_training_set or self.obj.fit
230
231        Returns:
232
233             self: object
234        """
235
236        assert mx.is_factor(y), "y must contain only integers"
237
238        assert self.method in (
239            "SAMME",
240            "SAMME.R",
241        ), "`method` must be either 'SAMME' or 'SAMME.R'"
242
243        assert (self.reg_lambda <= 1) & (
244            self.reg_lambda >= 0
245        ), "must have self.reg_lambda <= 1 &  self.reg_lambda >= 0"
246
247        assert (self.reg_alpha <= 1) & (
248            self.reg_alpha >= 0
249        ), "must have self.reg_alpha <= 1 &  self.reg_alpha >= 0"
250
251        # training
252        n, p = X.shape
253        self.n_classes = len(np.unique(y))
254        self.classes_ = np.unique(y)  # for compatibility with sklearn
255        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
256
257        if sample_weight is None:
258            w_m = np.repeat(1.0 / n, n)
259        else:
260            w_m = np.asarray(sample_weight)
261
262        base_learner = CustomClassifier(
263            self.obj,
264            n_hidden_features=self.n_hidden_features,
265            activation_name=self.activation_name,
266            a=self.a,
267            nodes_sim=self.nodes_sim,
268            bias=self.bias,
269            dropout=self.dropout,
270            direct_link=self.direct_link,
271            n_clusters=self.n_clusters,
272            type_clust=self.type_clust,
273            type_scaling=self.type_scaling,
274            col_sample=self.col_sample,
275            row_sample=self.row_sample,
276            seed=self.seed,
277        )
278
279        if self.verbose == 1:
280            pbar = Progbar(self.n_estimators)
281
282        if self.method == "SAMME":
283            err_m = 1e6
284            err_bound = 1 - 1 / self.n_classes
285            self.alpha_.append(1.0)
286            x_range_n = range(n)
287
288            for m in range(self.n_estimators):
289                preds = base_learner.fit(
290                    X, y, sample_weight=w_m.ravel(), **kwargs
291                ).predict(X)
292
293                self.base_learners_.update({m: deepcopy(base_learner)})
294
295                cond = [y[i] != preds[i] for i in x_range_n]
296
297                err_m = max(
298                    sum([elt[0] * elt[1] for elt in zip(cond, w_m)]),
299                    2.220446049250313e-16,
300                )  # sum(w_m) == 1
301
302                if self.reg_lambda > 0:
303                    err_m += self.reg_lambda * (
304                        (1 - self.reg_alpha) * 0.5 * sum([x**2 for x in w_m])
305                        + self.reg_alpha * sum([abs(x) for x in w_m])
306                    )
307
308                err_m = min(err_m, err_bound)
309
310                alpha_m = self.learning_rate * log(
311                    (self.n_classes - 1) * (1 - err_m) / err_m
312                )
313
314                self.alpha_.append(alpha_m)
315
316                w_m_temp = [exp(alpha_m * cond[i]) for i in x_range_n]
317
318                sum_w_m = sum(w_m_temp)
319
320                w_m = np.asarray([w_m_temp[i] / sum_w_m for i in x_range_n])
321
322                base_learner.set_params(seed=self.seed + (m + 1) * 1000)
323
324                if self.verbose == 1:
325                    pbar.update(m)
326
327            if self.verbose == 1:
328                pbar.update(self.n_estimators)
329
330            self.n_estimators = len(self.base_learners_)
331            self.classes_ = np.unique(y)
332
333            return self
334
335        if self.method == "SAMME.R":
336            Y = mo.one_hot_encode2(y, self.n_classes)
337
338            if sample_weight is None:
339                w_m = np.repeat(1.0 / n, n)  # (N, 1)
340
341            else:
342                w_m = np.asarray(sample_weight)
343
344            for m in range(self.n_estimators):
345                probs = base_learner.fit(
346                    X, y, sample_weight=w_m.ravel(), **kwargs
347                ).predict_proba(X)
348
349                np.clip(a=probs, a_min=2.220446049250313e-16, a_max=1.0, out=probs)
350
351                self.base_learners_.update({m: deepcopy(base_learner)})
352
353                w_m *= np.exp(
354                    -1.0
355                    * self.learning_rate
356                    * (1.0 - 1.0 / self.n_classes)
357                    * xlogy(Y, probs).sum(axis=1)
358                )
359
360                w_m /= np.sum(w_m)
361
362                base_learner.set_params(seed=self.seed + (m + 1) * 1000)
363
364                if self.verbose == 1:
365                    pbar.update(m)
366
367            if self.verbose == 1:
368                pbar.update(self.n_estimators)
369
370            self.n_estimators = len(self.base_learners_)
371            self.classes_ = np.unique(y)
372
373            return self
374
375    def predict(self, X, **kwargs):
376        """Predict test data X.
377
378        Parameters:
379
380            X: {array-like}, shape = [n_samples, n_features]
381                Training vectors, where n_samples is the number
382                of samples and n_features is the number of features.
383
384            **kwargs: additional parameters to be passed to
385                  self.cook_test_set
386
387        Returns:
388
389            model predictions: {array-like}
390        """
391        return self.predict_proba(X, **kwargs).argmax(axis=1)
392
393    def predict_proba(self, X, **kwargs):
394        """Predict probabilities for test data X.
395
396        Parameters:
397
398            X: {array-like}, shape = [n_samples, n_features]
399                Training vectors, where n_samples is the number
400                of samples and n_features is the number of features.
401
402            **kwargs: additional parameters to be passed to
403                  self.cook_test_set
404
405        Returns:
406
407            probability estimates for test data: {array-like}
408
409        """
410
411        n_iter = len(self.base_learners_)
412
413        if self.method == "SAMME":
414            ensemble_learner = np.zeros((X.shape[0], self.n_classes))
415
416            # if self.verbose == 1:
417            #    pbar = Progbar(n_iter)
418
419            for idx, base_learner in self.base_learners_.items():
420                preds = base_learner.predict(X, **kwargs)
421
422                ensemble_learner += self.alpha_[idx] * mo.one_hot_encode2(
423                    preds, self.n_classes
424                )
425
426                # if self.verbose == 1:
427                #    pbar.update(idx)
428
429            # if self.verbose == 1:
430            #    pbar.update(n_iter)
431
432            expit_ensemble_learner = expit(ensemble_learner)
433
434            sum_ensemble = expit_ensemble_learner.sum(axis=1)
435
436            return expit_ensemble_learner / sum_ensemble[:, None]
437
438        # if self.method == "SAMME.R":
439        ensemble_learner = 0
440
441        # if self.verbose == 1:
442        #    pbar = Progbar(n_iter)
443
444        for idx, base_learner in self.base_learners_.items():
445            probs = base_learner.predict_proba(X, **kwargs)
446
447            np.clip(a=probs, a_min=2.220446049250313e-16, a_max=1.0, out=probs)
448
449            log_preds_proba = np.log(probs)
450
451            ensemble_learner += log_preds_proba - log_preds_proba.mean(axis=1)[:, None]
452
453            # if self.verbose == 1:
454            #    pbar.update(idx)
455
456        ensemble_learner *= self.n_classes - 1
457
458        # if self.verbose == 1:
459        #    pbar.update(n_iter)
460
461        expit_ensemble_learner = expit(ensemble_learner)
462
463        sum_ensemble = expit_ensemble_learner.sum(axis=1)
464
465        return expit_ensemble_learner / sum_ensemble[:, None]
466
467    @property
468    def _estimator_type(self):
469        return "classifier"            

AdaBoost Classification (SAMME) model class derived from class Boosting

Parameters:

obj: object
    any object containing a method fit (obj.fit()) and a method predict
    (obj.predict())

n_estimators: int
    number of boosting iterations

learning_rate: float
    learning rate of the boosting procedure

n_hidden_features: int
    number of nodes in the hidden layer

reg_lambda: float
    regularization parameter for weights

reg_alpha: float
    controls compromize between l1 and l2 norm of weights

activation_name: str
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'

a: float
    hyperparameter for 'prelu' or 'elu' activation function

nodes_sim: str
    type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
    'uniform'

bias: boolean
    indicates if the hidden layer contains a bias term (True) or not
    (False)

dropout: float
    regularization parameter; (random) percentage of nodes dropped out
    of the training

direct_link: boolean
    indicates if the original predictors are included (True) in model's
    fitting or not (False)

n_clusters: int
    number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
        no clustering)

cluster_encode: bool
    defines how the variable containing clusters is treated (default is one-hot)
    if `False`, then labels are used, without one-hot encoding

type_clust: str
    type of clustering method: currently k-means ('kmeans') or Gaussian
    Mixture Model ('gmm')

type_scaling: a tuple of 3 strings
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax')

col_sample: float
    percentage of covariates randomly chosen for training

row_sample: float
    percentage of rows chosen for training, by stratified bootstrapping

seed: int
    reproducibility seed for nodes_sim=='uniform'

verbose: int
    0 for no output, 1 for a progress bar (default is 1)

method: str
    type of Adaboost method, 'SAMME' (discrete) or 'SAMME.R' (real)

backend: str
    "cpu" or "gpu" or "tpu"

Attributes:

alpha_: list
    AdaBoost coefficients alpha_m

base_learners_: dict
    a dictionary containing the base learners

Examples:

See also https://github.com/Techtonique/nnetsauce/blob/master/examples/adaboost_classification.py

import nnetsauce as ns
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics
from time import time

breast_cancer = load_breast_cancer()
Z = breast_cancer.data
t = breast_cancer.target
np.random.seed(123)
X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2)

# SAMME.R
clf = LogisticRegression(solver='liblinear', multi_class = 'ovr',
                        random_state=123)
fit_obj = ns.AdaBoostClassifier(clf,
                                n_hidden_features=int(11.22338867),
                                direct_link=True,
                                n_estimators=250, learning_rate=0.01126343,
                                col_sample=0.72684326, row_sample=0.86429443,
                                dropout=0.63078613, n_clusters=2,
                                type_clust="gmm",
                                verbose=1, seed = 123,
                                method="SAMME.R")

start = time()
fit_obj.fit(X_train, y_train)
print(f"Elapsed {time() - start}")

start = time()
print(fit_obj.score(X_test, y_test))
print(f"Elapsed {time() - start}")

preds = fit_obj.predict(X_test)

print(metrics.classification_report(preds, y_test))
def fit(self, X, y, sample_weight=None, **kwargs):
216    def fit(self, X, y, sample_weight=None, **kwargs):
217        """Fit Boosting model to training data (X, y).
218
219        Parameters:
220
221            X: {array-like}, shape = [n_samples, n_features]
222                Training vectors, where n_samples is the number
223                of samples and n_features is the number of features.
224
225            y: array-like, shape = [n_samples]
226                Target values.
227
228            **kwargs: additional parameters to be passed to
229                    self.cook_training_set or self.obj.fit
230
231        Returns:
232
233             self: object
234        """
235
236        assert mx.is_factor(y), "y must contain only integers"
237
238        assert self.method in (
239            "SAMME",
240            "SAMME.R",
241        ), "`method` must be either 'SAMME' or 'SAMME.R'"
242
243        assert (self.reg_lambda <= 1) & (
244            self.reg_lambda >= 0
245        ), "must have self.reg_lambda <= 1 &  self.reg_lambda >= 0"
246
247        assert (self.reg_alpha <= 1) & (
248            self.reg_alpha >= 0
249        ), "must have self.reg_alpha <= 1 &  self.reg_alpha >= 0"
250
251        # training
252        n, p = X.shape
253        self.n_classes = len(np.unique(y))
254        self.classes_ = np.unique(y)  # for compatibility with sklearn
255        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
256
257        if sample_weight is None:
258            w_m = np.repeat(1.0 / n, n)
259        else:
260            w_m = np.asarray(sample_weight)
261
262        base_learner = CustomClassifier(
263            self.obj,
264            n_hidden_features=self.n_hidden_features,
265            activation_name=self.activation_name,
266            a=self.a,
267            nodes_sim=self.nodes_sim,
268            bias=self.bias,
269            dropout=self.dropout,
270            direct_link=self.direct_link,
271            n_clusters=self.n_clusters,
272            type_clust=self.type_clust,
273            type_scaling=self.type_scaling,
274            col_sample=self.col_sample,
275            row_sample=self.row_sample,
276            seed=self.seed,
277        )
278
279        if self.verbose == 1:
280            pbar = Progbar(self.n_estimators)
281
282        if self.method == "SAMME":
283            err_m = 1e6
284            err_bound = 1 - 1 / self.n_classes
285            self.alpha_.append(1.0)
286            x_range_n = range(n)
287
288            for m in range(self.n_estimators):
289                preds = base_learner.fit(
290                    X, y, sample_weight=w_m.ravel(), **kwargs
291                ).predict(X)
292
293                self.base_learners_.update({m: deepcopy(base_learner)})
294
295                cond = [y[i] != preds[i] for i in x_range_n]
296
297                err_m = max(
298                    sum([elt[0] * elt[1] for elt in zip(cond, w_m)]),
299                    2.220446049250313e-16,
300                )  # sum(w_m) == 1
301
302                if self.reg_lambda > 0:
303                    err_m += self.reg_lambda * (
304                        (1 - self.reg_alpha) * 0.5 * sum([x**2 for x in w_m])
305                        + self.reg_alpha * sum([abs(x) for x in w_m])
306                    )
307
308                err_m = min(err_m, err_bound)
309
310                alpha_m = self.learning_rate * log(
311                    (self.n_classes - 1) * (1 - err_m) / err_m
312                )
313
314                self.alpha_.append(alpha_m)
315
316                w_m_temp = [exp(alpha_m * cond[i]) for i in x_range_n]
317
318                sum_w_m = sum(w_m_temp)
319
320                w_m = np.asarray([w_m_temp[i] / sum_w_m for i in x_range_n])
321
322                base_learner.set_params(seed=self.seed + (m + 1) * 1000)
323
324                if self.verbose == 1:
325                    pbar.update(m)
326
327            if self.verbose == 1:
328                pbar.update(self.n_estimators)
329
330            self.n_estimators = len(self.base_learners_)
331            self.classes_ = np.unique(y)
332
333            return self
334
335        if self.method == "SAMME.R":
336            Y = mo.one_hot_encode2(y, self.n_classes)
337
338            if sample_weight is None:
339                w_m = np.repeat(1.0 / n, n)  # (N, 1)
340
341            else:
342                w_m = np.asarray(sample_weight)
343
344            for m in range(self.n_estimators):
345                probs = base_learner.fit(
346                    X, y, sample_weight=w_m.ravel(), **kwargs
347                ).predict_proba(X)
348
349                np.clip(a=probs, a_min=2.220446049250313e-16, a_max=1.0, out=probs)
350
351                self.base_learners_.update({m: deepcopy(base_learner)})
352
353                w_m *= np.exp(
354                    -1.0
355                    * self.learning_rate
356                    * (1.0 - 1.0 / self.n_classes)
357                    * xlogy(Y, probs).sum(axis=1)
358                )
359
360                w_m /= np.sum(w_m)
361
362                base_learner.set_params(seed=self.seed + (m + 1) * 1000)
363
364                if self.verbose == 1:
365                    pbar.update(m)
366
367            if self.verbose == 1:
368                pbar.update(self.n_estimators)
369
370            self.n_estimators = len(self.base_learners_)
371            self.classes_ = np.unique(y)
372
373            return self

Fit Boosting model to training data (X, y).

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

**kwargs: additional parameters to be passed to
        self.cook_training_set or self.obj.fit

Returns:

 self: object
def predict(self, X, **kwargs):
375    def predict(self, X, **kwargs):
376        """Predict test data X.
377
378        Parameters:
379
380            X: {array-like}, shape = [n_samples, n_features]
381                Training vectors, where n_samples is the number
382                of samples and n_features is the number of features.
383
384            **kwargs: additional parameters to be passed to
385                  self.cook_test_set
386
387        Returns:
388
389            model predictions: {array-like}
390        """
391        return self.predict_proba(X, **kwargs).argmax(axis=1)

Predict test data X.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
      self.cook_test_set

Returns:

model predictions: {array-like}
def predict_proba(self, X, **kwargs):
393    def predict_proba(self, X, **kwargs):
394        """Predict probabilities for test data X.
395
396        Parameters:
397
398            X: {array-like}, shape = [n_samples, n_features]
399                Training vectors, where n_samples is the number
400                of samples and n_features is the number of features.
401
402            **kwargs: additional parameters to be passed to
403                  self.cook_test_set
404
405        Returns:
406
407            probability estimates for test data: {array-like}
408
409        """
410
411        n_iter = len(self.base_learners_)
412
413        if self.method == "SAMME":
414            ensemble_learner = np.zeros((X.shape[0], self.n_classes))
415
416            # if self.verbose == 1:
417            #    pbar = Progbar(n_iter)
418
419            for idx, base_learner in self.base_learners_.items():
420                preds = base_learner.predict(X, **kwargs)
421
422                ensemble_learner += self.alpha_[idx] * mo.one_hot_encode2(
423                    preds, self.n_classes
424                )
425
426                # if self.verbose == 1:
427                #    pbar.update(idx)
428
429            # if self.verbose == 1:
430            #    pbar.update(n_iter)
431
432            expit_ensemble_learner = expit(ensemble_learner)
433
434            sum_ensemble = expit_ensemble_learner.sum(axis=1)
435
436            return expit_ensemble_learner / sum_ensemble[:, None]
437
438        # if self.method == "SAMME.R":
439        ensemble_learner = 0
440
441        # if self.verbose == 1:
442        #    pbar = Progbar(n_iter)
443
444        for idx, base_learner in self.base_learners_.items():
445            probs = base_learner.predict_proba(X, **kwargs)
446
447            np.clip(a=probs, a_min=2.220446049250313e-16, a_max=1.0, out=probs)
448
449            log_preds_proba = np.log(probs)
450
451            ensemble_learner += log_preds_proba - log_preds_proba.mean(axis=1)[:, None]
452
453            # if self.verbose == 1:
454            #    pbar.update(idx)
455
456        ensemble_learner *= self.n_classes - 1
457
458        # if self.verbose == 1:
459        #    pbar.update(n_iter)
460
461        expit_ensemble_learner = expit(ensemble_learner)
462
463        sum_ensemble = expit_ensemble_learner.sum(axis=1)
464
465        return expit_ensemble_learner / sum_ensemble[:, None]

Predict probabilities for test data X.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
      self.cook_test_set

Returns:

probability estimates for test data: {array-like}
class Base(sklearn.base.BaseEstimator):
 46class Base(BaseEstimator):
 47    """Base model from which all the other classes inherit.
 48
 49    This class contains the most important data preprocessing/feature engineering methods.
 50
 51    Parameters:
 52
 53        n_hidden_features: int
 54            number of nodes in the hidden layer
 55
 56        activation_name: str
 57            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
 58
 59        a: float
 60            hyperparameter for 'prelu' or 'elu' activation function
 61
 62        nodes_sim: str
 63            type of simulation for hidden layer nodes: 'sobol', 'hammersley', 'halton',
 64            'uniform'
 65
 66        bias: boolean
 67            indicates if the hidden layer contains a bias term (True) or
 68            not (False)
 69
 70        dropout: float
 71            regularization parameter; (random) percentage of nodes dropped out
 72            of the training
 73
 74        direct_link: boolean
 75            indicates if the original features are included (True) in model's
 76            fitting or not (False)
 77
 78        n_clusters: int
 79            number of clusters for type_clust='kmeans' or type_clust='gmm'
 80            clustering (could be 0: no clustering)
 81
 82        cluster_encode: bool
 83            defines how the variable containing clusters is treated (default is one-hot);
 84            if `False`, then labels are used, without one-hot encoding
 85
 86        type_clust: str
 87            type of clustering method: currently k-means ('kmeans') or Gaussian
 88            Mixture Model ('gmm')
 89
 90        type_scaling: a tuple of 3 strings
 91            scaling methods for inputs, hidden layer, and clustering respectively
 92            (and when relevant).
 93            Currently available: standardization ('std') or MinMax scaling ('minmax') or robust scaling ('robust') or  max absolute scaling ('maxabs')
 94
 95        col_sample: float
 96            percentage of features randomly chosen for training
 97
 98        row_sample: float
 99            percentage of rows chosen for training, by stratified bootstrapping
100
101        seed: int
102            reproducibility seed for nodes_sim=='uniform', clustering and dropout
103
104        backend: str
105            "cpu" or "gpu" or "tpu"
106
107    """
108
109    # construct the object -----
110
111    def __init__(
112        self,
113        n_hidden_features=5,
114        activation_name="relu",
115        a=0.01,
116        nodes_sim="sobol",
117        bias=True,
118        dropout=0,
119        direct_link=True,
120        n_clusters=2,
121        cluster_encode=True,
122        type_clust="kmeans",
123        type_scaling=("std", "std", "std"),
124        col_sample=1,
125        row_sample=1,
126        seed=123,
127        backend="cpu",
128    ):
129        # input checks -----
130
131        sys_platform = platform.system()
132
133        if (sys_platform == "Windows") and (backend in ("gpu", "tpu")):
134            warnings.warn("No GPU/TPU computing on Windows yet, backend set to 'cpu'")
135            backend = "cpu"
136
137        assert activation_name in (
138            "relu",
139            "tanh",
140            "sigmoid",
141            "prelu",
142            "elu",
143        ), "'activation_name' must be in ('relu', 'tanh', 'sigmoid','prelu', 'elu')"
144
145        assert nodes_sim in (
146            "sobol",
147            "hammersley",
148            "uniform",
149            "halton",
150        ), "'nodes_sim' must be in ('sobol', 'hammersley', 'uniform', 'halton')"
151
152        assert type_clust in (
153            "kmeans",
154            "gmm",
155        ), "'type_clust' must be in ('kmeans', 'gmm')"
156
157        assert (len(type_scaling) == 3) & all(
158            type_scaling[i] in ("minmax", "std", "robust", "maxabs")
159            for i in range(len(type_scaling))
160        ), "'type_scaling' must have length 3, and available scaling methods are 'minmax' scaling, standardization ('std'), robust scaling ('robust') and max absolute ('maxabs')"
161
162        assert (col_sample >= 0) & (
163            col_sample <= 1
164        ), "'col_sample' must be comprised between 0 and 1 (both included)"
165
166        assert backend in (
167            "cpu",
168            "gpu",
169            "tpu",
170        ), "must have 'backend' in ('cpu', 'gpu', 'tpu')"
171
172        self.n_hidden_features = n_hidden_features
173        self.activation_name = activation_name
174        self.a = a
175        self.nodes_sim = nodes_sim
176        self.bias = bias
177        self.seed = seed
178        self.backend = backend
179        self.dropout = dropout
180        self.direct_link = direct_link
181        self.cluster_encode = cluster_encode
182        self.type_clust = type_clust
183        self.type_scaling = type_scaling
184        self.col_sample = col_sample
185        self.row_sample = row_sample
186        self.n_clusters = n_clusters
187        if isinstance(self, RegressorMixin):
188            self.type_fit = "regression"
189        elif isinstance(self, ClassifierMixin):
190            self.type_fit = "classification"
191        self.subsampler_ = None
192        self.index_col_ = None
193        self.index_row_ = True
194        self.clustering_obj_ = None
195        self.clustering_scaler_ = None
196        self.nn_scaler_ = None
197        self.scaler_ = None
198        self.encoder_ = None
199        self.W_ = None
200        self.X_ = None
201        self.y_ = None
202        self.y_mean_ = None
203        self.beta_ = None
204
205        # activation function -----
206        if sys_platform in ("Linux", "Darwin"):
207            activation_options = {
208                "relu": ac.relu if (self.backend == "cpu") else jnn.relu,
209                "tanh": np.tanh if (self.backend == "cpu") else jnp.tanh,
210                "sigmoid": (ac.sigmoid if (self.backend == "cpu") else jnn.sigmoid),
211                "prelu": partial(ac.prelu, a=a),
212                "elu": (
213                    partial(ac.elu, a=a)
214                    if (self.backend == "cpu")
215                    else partial(jnn.elu, a=a)
216                ),
217            }
218        else:  # on Windows currently, no JAX
219            activation_options = {
220                "relu": (ac.relu if (self.backend == "cpu") else NotImplementedError),
221                "tanh": (np.tanh if (self.backend == "cpu") else NotImplementedError),
222                "sigmoid": (
223                    ac.sigmoid if (self.backend == "cpu") else NotImplementedError
224                ),
225                "prelu": partial(ac.prelu, a=a),
226                "elu": (
227                    partial(ac.elu, a=a)
228                    if (self.backend == "cpu")
229                    else NotImplementedError
230                ),
231            }
232        self.activation_func = activation_options[activation_name]
233
234    # "preprocessing" methods to be inherited -----
235
236    def encode_clusters(self, X=None, predict=False, scaler=None, **kwargs):  #
237        """Create new covariates with kmeans or GMM clustering
238
239        Parameters:
240
241            X: {array-like}, shape = [n_samples, n_features]
242                Training vectors, where n_samples is the number
243                of samples and n_features is the number of features.
244
245            predict: boolean
246                is False on training set and True on test set
247
248            scaler: {object} of class StandardScaler, MinMaxScaler, RobustScaler or MaxAbsScaler
249                if scaler has already been fitted on training data (online training), it can be passed here
250
251            **kwargs:
252                additional parameters to be passed to the
253                clustering method
254
255        Returns:
256
257            Clusters' matrix, one-hot encoded: {array-like}
258
259        """
260
261        np.random.seed(self.seed)
262
263        if X is None:
264            X = self.X_
265
266        if isinstance(X, pd.DataFrame):
267            X = copy.deepcopy(X.values.astype(float))
268
269        if len(X.shape) == 1:
270            X = X.reshape(1, -1)
271
272        if predict is False:  # encode training set
273
274            # scale input data before clustering
275            self.clustering_scaler_, scaled_X = mo.scale_covariates(
276                X, choice=self.type_scaling[2], scaler=self.clustering_scaler_
277            )
278
279            self.clustering_obj_, X_clustered = mo.cluster_covariates(
280                scaled_X,
281                self.n_clusters,
282                self.seed,
283                type_clust=self.type_clust,
284                **kwargs
285            )
286
287            if self.cluster_encode == True:
288                return mo.one_hot_encode(X_clustered, self.n_clusters).astype(
289                    np.float16
290                )
291
292            return X_clustered.astype(np.float16)
293
294        # if predict == True, encode test set
295        X_clustered = self.clustering_obj_.predict(self.clustering_scaler_.transform(X))
296
297        if self.cluster_encode == True:
298            return mo.one_hot_encode(X_clustered, self.n_clusters).astype(np.float16)
299
300        return X_clustered.astype(np.float16)
301
302    def create_layer(self, scaled_X, W=None):
303        """Create hidden layer.
304
305        Parameters:
306
307            scaled_X: {array-like}, shape = [n_samples, n_features]
308                Training vectors, where n_samples is the number
309                of samples and n_features is the number of features
310
311            W: {array-like}, shape = [n_features, hidden_features]
312                if provided, constructs the hidden layer with W; otherwise computed internally
313
314        Returns:
315
316            Hidden layer matrix: {array-like}
317
318        """
319
320        n_features = scaled_X.shape[1]
321
322        # hash_sim = {
323        #         "sobol": generate_sobol,
324        #         "hammersley": generate_hammersley,
325        #         "uniform": generate_uniform,
326        #         "halton": generate_halton
327        #     }
328
329        if self.bias is False:  # no bias term in the hidden layer
330            if W is None:
331                if self.nodes_sim == "sobol":
332                    self.W_ = generate_sobol(
333                        n_dims=n_features,
334                        n_points=self.n_hidden_features,
335                        seed=self.seed,
336                    )
337                elif self.nodes_sim == "hammersley":
338                    self.W_ = generate_hammersley(
339                        n_dims=n_features,
340                        n_points=self.n_hidden_features,
341                        seed=self.seed,
342                    )
343                elif self.nodes_sim == "uniform":
344                    self.W_ = generate_uniform(
345                        n_dims=n_features,
346                        n_points=self.n_hidden_features,
347                        seed=self.seed,
348                    )
349                else:
350                    self.W_ = generate_halton(
351                        n_dims=n_features,
352                        n_points=self.n_hidden_features,
353                        seed=self.seed,
354                    )
355
356                assert (
357                    scaled_X.shape[1] == self.W_.shape[0]
358                ), "check dimensions of covariates X and matrix W"
359
360                return mo.dropout(
361                    x=self.activation_func(
362                        mo.safe_sparse_dot(a=scaled_X, b=self.W_, backend=self.backend)
363                    ),
364                    drop_prob=self.dropout,
365                    seed=self.seed,
366                )
367
368            # W is not none
369            assert (
370                scaled_X.shape[1] == W.shape[0]
371            ), "check dimensions of covariates X and matrix W"
372
373            # self.W_ = W
374            return mo.dropout(
375                x=self.activation_func(
376                    mo.safe_sparse_dot(a=scaled_X, b=W, backend=self.backend)
377                ),
378                drop_prob=self.dropout,
379                seed=self.seed,
380            )
381
382        # with bias term in the hidden layer
383        if W is None:
384            n_features_1 = n_features + 1
385
386            if self.nodes_sim == "sobol":
387                self.W_ = generate_sobol(
388                    n_dims=n_features_1,
389                    n_points=self.n_hidden_features,
390                    seed=self.seed,
391                )
392            elif self.nodes_sim == "hammersley":
393                self.W_ = generate_hammersley(
394                    n_dims=n_features_1,
395                    n_points=self.n_hidden_features,
396                    seed=self.seed,
397                )
398            elif self.nodes_sim == "uniform":
399                self.W_ = generate_uniform(
400                    n_dims=n_features_1,
401                    n_points=self.n_hidden_features,
402                    seed=self.seed,
403                )
404            else:
405                self.W_ = generate_halton(
406                    n_dims=n_features_1,
407                    n_points=self.n_hidden_features,
408                    seed=self.seed,
409                )
410
411            # self.W_ = hash_sim[self.nodes_sim](
412            #         n_dims=n_features_1,
413            #         n_points=self.n_hidden_features,
414            #         seed=self.seed,
415            #     )
416
417            return mo.dropout(
418                x=self.activation_func(
419                    mo.safe_sparse_dot(
420                        a=mo.cbind(
421                            np.ones(scaled_X.shape[0]),
422                            scaled_X,
423                            backend=self.backend,
424                        ),
425                        b=self.W_,
426                        backend=self.backend,
427                    )
428                ),
429                drop_prob=self.dropout,
430                seed=self.seed,
431            )
432
433        # W is not None
434        # self.W_ = W
435        return mo.dropout(
436            x=self.activation_func(
437                mo.safe_sparse_dot(
438                    a=mo.cbind(
439                        np.ones(scaled_X.shape[0]),
440                        scaled_X,
441                        backend=self.backend,
442                    ),
443                    b=W,
444                    backend=self.backend,
445                )
446            ),
447            drop_prob=self.dropout,
448            seed=self.seed,
449        )
450
451    def cook_training_set(self, y=None, X=None, W=None, **kwargs):
452        """Create new hidden features for training set, with hidden layer, center the response.
453
454        Parameters:
455
456            y: array-like, shape = [n_samples]
457                Target values
458
459            X: {array-like}, shape = [n_samples, n_features]
460                Training vectors, where n_samples is the number
461                of samples and n_features is the number of features
462
463            W: {array-like}, shape = [n_features, hidden_features]
464                if provided, constructs the hidden layer via W
465
466        Returns:
467
468            (centered response, direct link + hidden layer matrix): {tuple}
469
470        """
471
472        # either X and y are stored or not
473        # assert ((y is None) & (X is None)) | ((y is not None) & (X is not None))
474        if self.n_hidden_features > 0:  # has a hidden layer
475            assert (
476                len(self.type_scaling) >= 2
477            ), "must have len(self.type_scaling) >= 2 when self.n_hidden_features > 0"
478
479        if X is None:
480
481            if self.col_sample == 1:
482                input_X = self.X_
483            else:
484                n_features = self.X_.shape[1]
485                new_n_features = int(np.ceil(n_features * self.col_sample))
486                assert (
487                    new_n_features >= 1
488                ), "check class attribute 'col_sample' and the number of covariates provided for X"
489                np.random.seed(self.seed)
490                index_col = np.random.choice(
491                    range(n_features), size=new_n_features, replace=False
492                )
493                self.index_col_ = index_col
494                input_X = self.X_[:, self.index_col_]
495
496        else:  # X is not None # keep X vs self.X_
497
498            if isinstance(X, pd.DataFrame):
499                X = copy.deepcopy(X.values.astype(float))
500
501            if self.col_sample == 1:
502                input_X = X
503            else:
504                n_features = X.shape[1]
505                new_n_features = int(np.ceil(n_features * self.col_sample))
506                assert (
507                    new_n_features >= 1
508                ), "check class attribute 'col_sample' and the number of covariates provided for X"
509                np.random.seed(self.seed)
510                index_col = np.random.choice(
511                    range(n_features), size=new_n_features, replace=False
512                )
513                self.index_col_ = index_col
514                input_X = X[:, self.index_col_]
515
516        if self.n_clusters <= 0:
517            # data without any clustering: self.n_clusters is None -----
518
519            if self.n_hidden_features > 0:  # with hidden layer
520
521                self.nn_scaler_, scaled_X = mo.scale_covariates(
522                    input_X, choice=self.type_scaling[1], scaler=self.nn_scaler_
523                )
524                Phi_X = (
525                    self.create_layer(scaled_X)
526                    if W is None
527                    else self.create_layer(scaled_X, W=W)
528                )
529                Z = (
530                    mo.cbind(input_X, Phi_X, backend=self.backend)
531                    if self.direct_link is True
532                    else Phi_X
533                )
534                self.scaler_, scaled_Z = mo.scale_covariates(
535                    Z, choice=self.type_scaling[0], scaler=self.scaler_
536                )
537            else:  # no hidden layer
538                Z = input_X
539                self.scaler_, scaled_Z = mo.scale_covariates(
540                    Z, choice=self.type_scaling[0], scaler=self.scaler_
541                )
542
543        else:
544
545            # data with clustering: self.n_clusters is not None ----- # keep
546
547            augmented_X = mo.cbind(
548                input_X,
549                self.encode_clusters(input_X, **kwargs),
550                backend=self.backend,
551            )
552
553            if self.n_hidden_features > 0:  # with hidden layer
554
555                self.nn_scaler_, scaled_X = mo.scale_covariates(
556                    augmented_X,
557                    choice=self.type_scaling[1],
558                    scaler=self.nn_scaler_,
559                )
560                Phi_X = (
561                    self.create_layer(scaled_X)
562                    if W is None
563                    else self.create_layer(scaled_X, W=W)
564                )
565                Z = (
566                    mo.cbind(augmented_X, Phi_X, backend=self.backend)
567                    if self.direct_link is True
568                    else Phi_X
569                )
570                self.scaler_, scaled_Z = mo.scale_covariates(
571                    Z, choice=self.type_scaling[0], scaler=self.scaler_
572                )
573            else:  # no hidden layer
574                Z = augmented_X
575                self.scaler_, scaled_Z = mo.scale_covariates(
576                    Z, choice=self.type_scaling[0], scaler=self.scaler_
577                )
578
579        # Returning model inputs -----
580        if mx.is_factor(y) is False:  # regression
581            # center y
582            if y is None:
583                self.y_mean_, centered_y = mo.center_response(self.y_)
584            else:
585                self.y_mean_, centered_y = mo.center_response(y)
586
587            # y is subsampled
588            if self.row_sample < 1:
589                n, p = Z.shape
590
591                self.subsampler_ = (
592                    SubSampler(y=self.y_, row_sample=self.row_sample, seed=self.seed)
593                    if y is None
594                    else SubSampler(y=y, row_sample=self.row_sample, seed=self.seed)
595                )
596
597                self.index_row_ = self.subsampler_.subsample()
598
599                n_row_sample = len(self.index_row_)
600                # regression
601                return (
602                    centered_y[self.index_row_].reshape(n_row_sample),
603                    self.scaler_.transform(
604                        Z[self.index_row_, :].reshape(n_row_sample, p)
605                    ),
606                )
607            # y is not subsampled
608            # regression
609            return (centered_y, self.scaler_.transform(Z))
610
611        # classification
612        # y is subsampled
613        if self.row_sample < 1:
614            n, p = Z.shape
615
616            self.subsampler_ = (
617                SubSampler(y=self.y_, row_sample=self.row_sample, seed=self.seed)
618                if y is None
619                else SubSampler(y=y, row_sample=self.row_sample, seed=self.seed)
620            )
621
622            self.index_row_ = self.subsampler_.subsample()
623
624            n_row_sample = len(self.index_row_)
625            # classification
626            return (
627                y[self.index_row_].reshape(n_row_sample),
628                self.scaler_.transform(Z[self.index_row_, :].reshape(n_row_sample, p)),
629            )
630        # y is not subsampled
631        # classification
632        return (y, self.scaler_.transform(Z))
633
634    def cook_test_set(self, X, **kwargs):
635        """Transform data from test set, with hidden layer.
636
637        Parameters:
638
639            X: {array-like}, shape = [n_samples, n_features]
640                Training vectors, where n_samples is the number
641                of samples and n_features is the number of features
642
643            **kwargs: additional parameters to be passed to self.encode_cluster
644
645        Returns:
646
647            Transformed test set : {array-like}
648        """
649
650        if isinstance(X, pd.DataFrame):
651            X = copy.deepcopy(X.values.astype(float))
652
653        if len(X.shape) == 1:
654            X = X.reshape(1, -1)
655
656        if (
657            self.n_clusters == 0
658        ):  # data without clustering: self.n_clusters is None -----
659            if self.n_hidden_features > 0:
660                # if hidden layer
661                scaled_X = (
662                    self.nn_scaler_.transform(X)
663                    if (self.col_sample == 1)
664                    else self.nn_scaler_.transform(X[:, self.index_col_])
665                )
666                Phi_X = self.create_layer(scaled_X, self.W_)
667                if self.direct_link == True:
668                    return self.scaler_.transform(
669                        mo.cbind(scaled_X, Phi_X, backend=self.backend)
670                    )
671                # when self.direct_link == False
672                return self.scaler_.transform(Phi_X)
673            # if no hidden layer # self.n_hidden_features == 0
674            return self.scaler_.transform(X)
675
676        # data with clustering: self.n_clusters > 0 -----
677        if self.col_sample == 1:
678            predicted_clusters = self.encode_clusters(X=X, predict=True, **kwargs)
679            augmented_X = mo.cbind(X, predicted_clusters, backend=self.backend)
680        else:
681            predicted_clusters = self.encode_clusters(
682                X=X[:, self.index_col_], predict=True, **kwargs
683            )
684            augmented_X = mo.cbind(
685                X[:, self.index_col_], predicted_clusters, backend=self.backend
686            )
687
688        if self.n_hidden_features > 0:  # if hidden layer
689            scaled_X = self.nn_scaler_.transform(augmented_X)
690            Phi_X = self.create_layer(scaled_X, self.W_)
691            if self.direct_link == True:
692                return self.scaler_.transform(
693                    mo.cbind(augmented_X, Phi_X, backend=self.backend)
694                )
695            return self.scaler_.transform(Phi_X)
696
697        # if no hidden layer
698        return self.scaler_.transform(augmented_X)
699
700    def cross_val_score(
701        self,
702        X,
703        y,
704        cv=5,
705        scoring="accuracy",
706        random_state=42,
707        n_jobs=-1,
708        epsilon=0.5,
709        penalized=True,
710        objective="abs",
711        **kwargs
712    ):
713        """
714        Penalized Cross-validation score for a model.
715
716        Parameters:
717
718            X: {array-like}, shape = [n_samples, n_features]
719                Training vectors, where n_samples is the number
720                of samples and n_features is the number of features
721
722            y: array-like, shape = [n_samples]
723                Target values
724
725            X_test: {array-like}, shape = [n_samples, n_features]
726                Test vectors, where n_samples is the number
727                of samples and n_features is the number of features
728
729            y_test: array-like, shape = [n_samples]
730                Target values
731
732            cv: int
733                Number of folds
734
735            scoring: str
736                Scoring metric
737
738            random_state: int
739                Random state
740
741            n_jobs: int
742                Number of jobs to run in parallel
743
744            epsilon: float
745                Penalty parameter
746
747            penalized: bool
748                Whether to obtain penalized cross-validation score or not
749
750            objective: str
751                'abs': Minimize the absolute difference between cross-validation score and validation score
752                'relative': Minimize the relative difference between cross-validation score and validation score
753        Returns:
754
755            A namedtuple with the following fields:
756                - cv_score: float
757                    cross-validation score
758                - val_score: float
759                    validation score
760                - penalized_score: float
761                    penalized cross-validation score: cv_score / val_score + epsilon*(1/val_score + 1/cv_score)
762                    If higher scoring metric is better, minimize the function result.
763                    If lower scoring metric is better, maximize the function result.
764        """
765        if scoring == "accuracy":
766            scoring_func = accuracy_score
767        elif scoring == "balanced_accuracy":
768            scoring_func = balanced_accuracy_score
769        elif scoring == "f1":
770            scoring_func = f1_score
771        elif scoring == "roc_auc":
772            scoring_func = roc_auc_score
773        elif scoring == "r2":
774            scoring_func = r2_score
775        elif scoring == "mse":
776            scoring_func = mean_squared_error
777        elif scoring == "mae":
778            scoring_func = mean_absolute_error
779        elif scoring == "mape":
780            scoring_func = mean_absolute_percentage_error
781        elif scoring == "rmse":
782
783            def scoring_func(y_true, y_pred):
784                return np.sqrt(mean_squared_error(y_true, y_pred))
785
786        X_train, X_val, y_train, y_val = train_test_split(
787            X, y, test_size=0.2, random_state=random_state
788        )
789
790        res = cross_val_score(
791            self, X_train, y_train, cv=cv, scoring=scoring, n_jobs=n_jobs
792        )  # cross-validation error
793
794        if penalized == False:
795            return res
796
797        DescribeResult = namedtuple(
798            "DescribeResult", ["cv_score", "val_score", "penalized_score"]
799        )
800
801        numerator = res.mean()
802
803        # Evaluate on the (cv+1)-th fold
804        preds_val = self.fit(X_train, y_train).predict(X_val)
805        try:
806            denominator = scoring(y_val, preds_val)  # validation error
807        except Exception as e:
808            denominator = scoring_func(y_val, preds_val)
809
810        # if higher is better
811        if objective == "abs":
812            penalized_score = np.abs(numerator - denominator) + epsilon * (
813                1 / denominator + 1 / numerator
814            )
815        elif objective == "relative":
816            ratio = numerator / denominator
817            penalized_score = np.abs(ratio - 1) + epsilon * (
818                1 / denominator + 1 / numerator
819            )
820
821        return DescribeResult(
822            cv_score=numerator,
823            val_score=denominator,
824            penalized_score=penalized_score,
825        )

Base model from which all the other classes inherit.

This class contains the most important data preprocessing/feature engineering methods.

Parameters:

n_hidden_features: int
    number of nodes in the hidden layer

activation_name: str
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'

a: float
    hyperparameter for 'prelu' or 'elu' activation function

nodes_sim: str
    type of simulation for hidden layer nodes: 'sobol', 'hammersley', 'halton',
    'uniform'

bias: boolean
    indicates if the hidden layer contains a bias term (True) or
    not (False)

dropout: float
    regularization parameter; (random) percentage of nodes dropped out
    of the training

direct_link: boolean
    indicates if the original features are included (True) in model's
    fitting or not (False)

n_clusters: int
    number of clusters for type_clust='kmeans' or type_clust='gmm'
    clustering (could be 0: no clustering)

cluster_encode: bool
    defines how the variable containing clusters is treated (default is one-hot);
    if `False`, then labels are used, without one-hot encoding

type_clust: str
    type of clustering method: currently k-means ('kmeans') or Gaussian
    Mixture Model ('gmm')

type_scaling: a tuple of 3 strings
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax') or robust scaling ('robust') or  max absolute scaling ('maxabs')

col_sample: float
    percentage of features randomly chosen for training

row_sample: float
    percentage of rows chosen for training, by stratified bootstrapping

seed: int
    reproducibility seed for nodes_sim=='uniform', clustering and dropout

backend: str
    "cpu" or "gpu" or "tpu"
def encode_clusters(self, X=None, predict=False, scaler=None, **kwargs):
236    def encode_clusters(self, X=None, predict=False, scaler=None, **kwargs):  #
237        """Create new covariates with kmeans or GMM clustering
238
239        Parameters:
240
241            X: {array-like}, shape = [n_samples, n_features]
242                Training vectors, where n_samples is the number
243                of samples and n_features is the number of features.
244
245            predict: boolean
246                is False on training set and True on test set
247
248            scaler: {object} of class StandardScaler, MinMaxScaler, RobustScaler or MaxAbsScaler
249                if scaler has already been fitted on training data (online training), it can be passed here
250
251            **kwargs:
252                additional parameters to be passed to the
253                clustering method
254
255        Returns:
256
257            Clusters' matrix, one-hot encoded: {array-like}
258
259        """
260
261        np.random.seed(self.seed)
262
263        if X is None:
264            X = self.X_
265
266        if isinstance(X, pd.DataFrame):
267            X = copy.deepcopy(X.values.astype(float))
268
269        if len(X.shape) == 1:
270            X = X.reshape(1, -1)
271
272        if predict is False:  # encode training set
273
274            # scale input data before clustering
275            self.clustering_scaler_, scaled_X = mo.scale_covariates(
276                X, choice=self.type_scaling[2], scaler=self.clustering_scaler_
277            )
278
279            self.clustering_obj_, X_clustered = mo.cluster_covariates(
280                scaled_X,
281                self.n_clusters,
282                self.seed,
283                type_clust=self.type_clust,
284                **kwargs
285            )
286
287            if self.cluster_encode == True:
288                return mo.one_hot_encode(X_clustered, self.n_clusters).astype(
289                    np.float16
290                )
291
292            return X_clustered.astype(np.float16)
293
294        # if predict == True, encode test set
295        X_clustered = self.clustering_obj_.predict(self.clustering_scaler_.transform(X))
296
297        if self.cluster_encode == True:
298            return mo.one_hot_encode(X_clustered, self.n_clusters).astype(np.float16)
299
300        return X_clustered.astype(np.float16)

Create new covariates with kmeans or GMM clustering

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

predict: boolean
    is False on training set and True on test set

scaler: {object} of class StandardScaler, MinMaxScaler, RobustScaler or MaxAbsScaler
    if scaler has already been fitted on training data (online training), it can be passed here

**kwargs:
    additional parameters to be passed to the
    clustering method

Returns:

Clusters' matrix, one-hot encoded: {array-like}
def create_layer(self, scaled_X, W=None):
302    def create_layer(self, scaled_X, W=None):
303        """Create hidden layer.
304
305        Parameters:
306
307            scaled_X: {array-like}, shape = [n_samples, n_features]
308                Training vectors, where n_samples is the number
309                of samples and n_features is the number of features
310
311            W: {array-like}, shape = [n_features, hidden_features]
312                if provided, constructs the hidden layer with W; otherwise computed internally
313
314        Returns:
315
316            Hidden layer matrix: {array-like}
317
318        """
319
320        n_features = scaled_X.shape[1]
321
322        # hash_sim = {
323        #         "sobol": generate_sobol,
324        #         "hammersley": generate_hammersley,
325        #         "uniform": generate_uniform,
326        #         "halton": generate_halton
327        #     }
328
329        if self.bias is False:  # no bias term in the hidden layer
330            if W is None:
331                if self.nodes_sim == "sobol":
332                    self.W_ = generate_sobol(
333                        n_dims=n_features,
334                        n_points=self.n_hidden_features,
335                        seed=self.seed,
336                    )
337                elif self.nodes_sim == "hammersley":
338                    self.W_ = generate_hammersley(
339                        n_dims=n_features,
340                        n_points=self.n_hidden_features,
341                        seed=self.seed,
342                    )
343                elif self.nodes_sim == "uniform":
344                    self.W_ = generate_uniform(
345                        n_dims=n_features,
346                        n_points=self.n_hidden_features,
347                        seed=self.seed,
348                    )
349                else:
350                    self.W_ = generate_halton(
351                        n_dims=n_features,
352                        n_points=self.n_hidden_features,
353                        seed=self.seed,
354                    )
355
356                assert (
357                    scaled_X.shape[1] == self.W_.shape[0]
358                ), "check dimensions of covariates X and matrix W"
359
360                return mo.dropout(
361                    x=self.activation_func(
362                        mo.safe_sparse_dot(a=scaled_X, b=self.W_, backend=self.backend)
363                    ),
364                    drop_prob=self.dropout,
365                    seed=self.seed,
366                )
367
368            # W is not none
369            assert (
370                scaled_X.shape[1] == W.shape[0]
371            ), "check dimensions of covariates X and matrix W"
372
373            # self.W_ = W
374            return mo.dropout(
375                x=self.activation_func(
376                    mo.safe_sparse_dot(a=scaled_X, b=W, backend=self.backend)
377                ),
378                drop_prob=self.dropout,
379                seed=self.seed,
380            )
381
382        # with bias term in the hidden layer
383        if W is None:
384            n_features_1 = n_features + 1
385
386            if self.nodes_sim == "sobol":
387                self.W_ = generate_sobol(
388                    n_dims=n_features_1,
389                    n_points=self.n_hidden_features,
390                    seed=self.seed,
391                )
392            elif self.nodes_sim == "hammersley":
393                self.W_ = generate_hammersley(
394                    n_dims=n_features_1,
395                    n_points=self.n_hidden_features,
396                    seed=self.seed,
397                )
398            elif self.nodes_sim == "uniform":
399                self.W_ = generate_uniform(
400                    n_dims=n_features_1,
401                    n_points=self.n_hidden_features,
402                    seed=self.seed,
403                )
404            else:
405                self.W_ = generate_halton(
406                    n_dims=n_features_1,
407                    n_points=self.n_hidden_features,
408                    seed=self.seed,
409                )
410
411            # self.W_ = hash_sim[self.nodes_sim](
412            #         n_dims=n_features_1,
413            #         n_points=self.n_hidden_features,
414            #         seed=self.seed,
415            #     )
416
417            return mo.dropout(
418                x=self.activation_func(
419                    mo.safe_sparse_dot(
420                        a=mo.cbind(
421                            np.ones(scaled_X.shape[0]),
422                            scaled_X,
423                            backend=self.backend,
424                        ),
425                        b=self.W_,
426                        backend=self.backend,
427                    )
428                ),
429                drop_prob=self.dropout,
430                seed=self.seed,
431            )
432
433        # W is not None
434        # self.W_ = W
435        return mo.dropout(
436            x=self.activation_func(
437                mo.safe_sparse_dot(
438                    a=mo.cbind(
439                        np.ones(scaled_X.shape[0]),
440                        scaled_X,
441                        backend=self.backend,
442                    ),
443                    b=W,
444                    backend=self.backend,
445                )
446            ),
447            drop_prob=self.dropout,
448            seed=self.seed,
449        )

Create hidden layer.

Parameters:

scaled_X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features

W: {array-like}, shape = [n_features, hidden_features]
    if provided, constructs the hidden layer with W; otherwise computed internally

Returns:

Hidden layer matrix: {array-like}
def cook_training_set(self, y=None, X=None, W=None, **kwargs):
451    def cook_training_set(self, y=None, X=None, W=None, **kwargs):
452        """Create new hidden features for training set, with hidden layer, center the response.
453
454        Parameters:
455
456            y: array-like, shape = [n_samples]
457                Target values
458
459            X: {array-like}, shape = [n_samples, n_features]
460                Training vectors, where n_samples is the number
461                of samples and n_features is the number of features
462
463            W: {array-like}, shape = [n_features, hidden_features]
464                if provided, constructs the hidden layer via W
465
466        Returns:
467
468            (centered response, direct link + hidden layer matrix): {tuple}
469
470        """
471
472        # either X and y are stored or not
473        # assert ((y is None) & (X is None)) | ((y is not None) & (X is not None))
474        if self.n_hidden_features > 0:  # has a hidden layer
475            assert (
476                len(self.type_scaling) >= 2
477            ), "must have len(self.type_scaling) >= 2 when self.n_hidden_features > 0"
478
479        if X is None:
480
481            if self.col_sample == 1:
482                input_X = self.X_
483            else:
484                n_features = self.X_.shape[1]
485                new_n_features = int(np.ceil(n_features * self.col_sample))
486                assert (
487                    new_n_features >= 1
488                ), "check class attribute 'col_sample' and the number of covariates provided for X"
489                np.random.seed(self.seed)
490                index_col = np.random.choice(
491                    range(n_features), size=new_n_features, replace=False
492                )
493                self.index_col_ = index_col
494                input_X = self.X_[:, self.index_col_]
495
496        else:  # X is not None # keep X vs self.X_
497
498            if isinstance(X, pd.DataFrame):
499                X = copy.deepcopy(X.values.astype(float))
500
501            if self.col_sample == 1:
502                input_X = X
503            else:
504                n_features = X.shape[1]
505                new_n_features = int(np.ceil(n_features * self.col_sample))
506                assert (
507                    new_n_features >= 1
508                ), "check class attribute 'col_sample' and the number of covariates provided for X"
509                np.random.seed(self.seed)
510                index_col = np.random.choice(
511                    range(n_features), size=new_n_features, replace=False
512                )
513                self.index_col_ = index_col
514                input_X = X[:, self.index_col_]
515
516        if self.n_clusters <= 0:
517            # data without any clustering: self.n_clusters is None -----
518
519            if self.n_hidden_features > 0:  # with hidden layer
520
521                self.nn_scaler_, scaled_X = mo.scale_covariates(
522                    input_X, choice=self.type_scaling[1], scaler=self.nn_scaler_
523                )
524                Phi_X = (
525                    self.create_layer(scaled_X)
526                    if W is None
527                    else self.create_layer(scaled_X, W=W)
528                )
529                Z = (
530                    mo.cbind(input_X, Phi_X, backend=self.backend)
531                    if self.direct_link is True
532                    else Phi_X
533                )
534                self.scaler_, scaled_Z = mo.scale_covariates(
535                    Z, choice=self.type_scaling[0], scaler=self.scaler_
536                )
537            else:  # no hidden layer
538                Z = input_X
539                self.scaler_, scaled_Z = mo.scale_covariates(
540                    Z, choice=self.type_scaling[0], scaler=self.scaler_
541                )
542
543        else:
544
545            # data with clustering: self.n_clusters is not None ----- # keep
546
547            augmented_X = mo.cbind(
548                input_X,
549                self.encode_clusters(input_X, **kwargs),
550                backend=self.backend,
551            )
552
553            if self.n_hidden_features > 0:  # with hidden layer
554
555                self.nn_scaler_, scaled_X = mo.scale_covariates(
556                    augmented_X,
557                    choice=self.type_scaling[1],
558                    scaler=self.nn_scaler_,
559                )
560                Phi_X = (
561                    self.create_layer(scaled_X)
562                    if W is None
563                    else self.create_layer(scaled_X, W=W)
564                )
565                Z = (
566                    mo.cbind(augmented_X, Phi_X, backend=self.backend)
567                    if self.direct_link is True
568                    else Phi_X
569                )
570                self.scaler_, scaled_Z = mo.scale_covariates(
571                    Z, choice=self.type_scaling[0], scaler=self.scaler_
572                )
573            else:  # no hidden layer
574                Z = augmented_X
575                self.scaler_, scaled_Z = mo.scale_covariates(
576                    Z, choice=self.type_scaling[0], scaler=self.scaler_
577                )
578
579        # Returning model inputs -----
580        if mx.is_factor(y) is False:  # regression
581            # center y
582            if y is None:
583                self.y_mean_, centered_y = mo.center_response(self.y_)
584            else:
585                self.y_mean_, centered_y = mo.center_response(y)
586
587            # y is subsampled
588            if self.row_sample < 1:
589                n, p = Z.shape
590
591                self.subsampler_ = (
592                    SubSampler(y=self.y_, row_sample=self.row_sample, seed=self.seed)
593                    if y is None
594                    else SubSampler(y=y, row_sample=self.row_sample, seed=self.seed)
595                )
596
597                self.index_row_ = self.subsampler_.subsample()
598
599                n_row_sample = len(self.index_row_)
600                # regression
601                return (
602                    centered_y[self.index_row_].reshape(n_row_sample),
603                    self.scaler_.transform(
604                        Z[self.index_row_, :].reshape(n_row_sample, p)
605                    ),
606                )
607            # y is not subsampled
608            # regression
609            return (centered_y, self.scaler_.transform(Z))
610
611        # classification
612        # y is subsampled
613        if self.row_sample < 1:
614            n, p = Z.shape
615
616            self.subsampler_ = (
617                SubSampler(y=self.y_, row_sample=self.row_sample, seed=self.seed)
618                if y is None
619                else SubSampler(y=y, row_sample=self.row_sample, seed=self.seed)
620            )
621
622            self.index_row_ = self.subsampler_.subsample()
623
624            n_row_sample = len(self.index_row_)
625            # classification
626            return (
627                y[self.index_row_].reshape(n_row_sample),
628                self.scaler_.transform(Z[self.index_row_, :].reshape(n_row_sample, p)),
629            )
630        # y is not subsampled
631        # classification
632        return (y, self.scaler_.transform(Z))

Create new hidden features for training set, with hidden layer, center the response.

Parameters:

y: array-like, shape = [n_samples]
    Target values

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features

W: {array-like}, shape = [n_features, hidden_features]
    if provided, constructs the hidden layer via W

Returns:

(centered response, direct link + hidden layer matrix): {tuple}
def cook_test_set(self, X, **kwargs):
634    def cook_test_set(self, X, **kwargs):
635        """Transform data from test set, with hidden layer.
636
637        Parameters:
638
639            X: {array-like}, shape = [n_samples, n_features]
640                Training vectors, where n_samples is the number
641                of samples and n_features is the number of features
642
643            **kwargs: additional parameters to be passed to self.encode_cluster
644
645        Returns:
646
647            Transformed test set : {array-like}
648        """
649
650        if isinstance(X, pd.DataFrame):
651            X = copy.deepcopy(X.values.astype(float))
652
653        if len(X.shape) == 1:
654            X = X.reshape(1, -1)
655
656        if (
657            self.n_clusters == 0
658        ):  # data without clustering: self.n_clusters is None -----
659            if self.n_hidden_features > 0:
660                # if hidden layer
661                scaled_X = (
662                    self.nn_scaler_.transform(X)
663                    if (self.col_sample == 1)
664                    else self.nn_scaler_.transform(X[:, self.index_col_])
665                )
666                Phi_X = self.create_layer(scaled_X, self.W_)
667                if self.direct_link == True:
668                    return self.scaler_.transform(
669                        mo.cbind(scaled_X, Phi_X, backend=self.backend)
670                    )
671                # when self.direct_link == False
672                return self.scaler_.transform(Phi_X)
673            # if no hidden layer # self.n_hidden_features == 0
674            return self.scaler_.transform(X)
675
676        # data with clustering: self.n_clusters > 0 -----
677        if self.col_sample == 1:
678            predicted_clusters = self.encode_clusters(X=X, predict=True, **kwargs)
679            augmented_X = mo.cbind(X, predicted_clusters, backend=self.backend)
680        else:
681            predicted_clusters = self.encode_clusters(
682                X=X[:, self.index_col_], predict=True, **kwargs
683            )
684            augmented_X = mo.cbind(
685                X[:, self.index_col_], predicted_clusters, backend=self.backend
686            )
687
688        if self.n_hidden_features > 0:  # if hidden layer
689            scaled_X = self.nn_scaler_.transform(augmented_X)
690            Phi_X = self.create_layer(scaled_X, self.W_)
691            if self.direct_link == True:
692                return self.scaler_.transform(
693                    mo.cbind(augmented_X, Phi_X, backend=self.backend)
694                )
695            return self.scaler_.transform(Phi_X)
696
697        # if no hidden layer
698        return self.scaler_.transform(augmented_X)

Transform data from test set, with hidden layer.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features

**kwargs: additional parameters to be passed to self.encode_cluster

Returns:

Transformed test set : {array-like}
class BaseRegressor(nnetsauce.Base, sklearn.base.RegressorMixin):
 15class BaseRegressor(Base, RegressorMixin):
 16    """Random Vector Functional Link Network regression without shrinkage
 17
 18    Parameters:
 19
 20        n_hidden_features: int
 21            number of nodes in the hidden layer
 22
 23        activation_name: str
 24            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
 25
 26        a: float
 27            hyperparameter for 'prelu' or 'elu' activation function
 28
 29        nodes_sim: str
 30            type of simulation for hidden layer nodes: 'sobol', 'hammersley', 'halton',
 31            'uniform'
 32
 33        bias: boolean
 34            indicates if the hidden layer contains a bias term (True) or
 35            not (False)
 36
 37        dropout: float
 38            regularization parameter; (random) percentage of nodes dropped out
 39            of the training
 40
 41        direct_link: boolean
 42            indicates if the original features are included (True) in model's
 43            fitting or not (False)
 44
 45        n_clusters: int
 46            number of clusters for type_clust='kmeans' or type_clust='gmm'
 47            clustering (could be 0: no clustering)
 48
 49        cluster_encode: bool
 50            defines how the variable containing clusters is treated (default is one-hot);
 51            if `False`, then labels are used, without one-hot encoding
 52
 53        type_clust: str
 54            type of clustering method: currently k-means ('kmeans') or Gaussian
 55            Mixture Model ('gmm')
 56
 57        type_scaling: a tuple of 3 strings
 58            scaling methods for inputs, hidden layer, and clustering respectively
 59            (and when relevant).
 60            Currently available: standardization ('std') or MinMax scaling ('minmax')
 61
 62        col_sample: float
 63            percentage of features randomly chosen for training
 64
 65        row_sample: float
 66            percentage of rows chosen for training, by stratified bootstrapping
 67
 68        seed: int
 69            reproducibility seed for nodes_sim=='uniform', clustering and dropout
 70
 71        backend: str
 72            "cpu" or "gpu" or "tpu"
 73
 74    Attributes:
 75
 76        beta_: vector
 77            regression coefficients
 78
 79        GCV_: float
 80            Generalized Cross-Validation error
 81
 82    """
 83
 84    # construct the object -----
 85
 86    def __init__(
 87        self,
 88        n_hidden_features=5,
 89        activation_name="relu",
 90        a=0.01,
 91        nodes_sim="sobol",
 92        bias=True,
 93        dropout=0,
 94        direct_link=True,
 95        n_clusters=2,
 96        cluster_encode=True,
 97        type_clust="kmeans",
 98        type_scaling=("std", "std", "std"),
 99        col_sample=1,
100        row_sample=1,
101        seed=123,
102        backend="cpu",
103    ):
104        super().__init__(
105            n_hidden_features=n_hidden_features,
106            activation_name=activation_name,
107            a=a,
108            nodes_sim=nodes_sim,
109            bias=bias,
110            dropout=dropout,
111            direct_link=direct_link,
112            n_clusters=n_clusters,
113            cluster_encode=cluster_encode,
114            type_clust=type_clust,
115            type_scaling=type_scaling,
116            col_sample=col_sample,
117            row_sample=row_sample,
118            seed=seed,
119            backend=backend,
120        )
121
122    def fit(self, X, y, **kwargs):
123        """Fit BaseRegressor to training data (X, y)
124
125        Parameters:
126
127            X: {array-like}, shape = [n_samples, n_features]
128                Training vectors, where n_samples is the number
129                of samples and n_features is the number of features
130
131            y: array-like, shape = [n_samples]
132                Target values
133
134            **kwargs: additional parameters to be passed to self.cook_training_set
135
136        Returns:
137
138            self: object
139        """
140
141        centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
142
143        fit_obj = lmf.beta_Sigma_hat(X=scaled_Z, y=centered_y, backend=self.backend)
144
145        self.beta_ = fit_obj["beta_hat"]
146
147        self.GCV_ = fit_obj["GCV"]
148
149        return self
150
151    def predict(self, X, **kwargs):
152        """Predict test data X.
153
154        Parameters:
155
156            X: {array-like}, shape = [n_samples, n_features]
157                Training vectors, where n_samples is the number
158                of samples and n_features is the number of features
159
160            **kwargs: additional parameters to be passed to self.cook_test_set
161
162        Returns:
163
164            model predictions: {array-like}
165        """
166
167        if len(X.shape) == 1:
168            n_features = X.shape[0]
169            new_X = mo.rbind(
170                X.reshape(1, n_features),
171                np.ones(n_features).reshape(1, n_features),
172            )
173
174            return (
175                self.y_mean_
176                + mo.safe_sparse_dot(
177                    a=self.cook_test_set(new_X, **kwargs),
178                    b=self.beta_,
179                    backend=self.backend,
180                )
181            )[0]
182
183        return self.y_mean_ + mo.safe_sparse_dot(
184            a=self.cook_test_set(X, **kwargs),
185            b=self.beta_,
186            backend=self.backend,
187        )

Random Vector Functional Link Network regression without shrinkage

Parameters:

n_hidden_features: int
    number of nodes in the hidden layer

activation_name: str
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'

a: float
    hyperparameter for 'prelu' or 'elu' activation function

nodes_sim: str
    type of simulation for hidden layer nodes: 'sobol', 'hammersley', 'halton',
    'uniform'

bias: boolean
    indicates if the hidden layer contains a bias term (True) or
    not (False)

dropout: float
    regularization parameter; (random) percentage of nodes dropped out
    of the training

direct_link: boolean
    indicates if the original features are included (True) in model's
    fitting or not (False)

n_clusters: int
    number of clusters for type_clust='kmeans' or type_clust='gmm'
    clustering (could be 0: no clustering)

cluster_encode: bool
    defines how the variable containing clusters is treated (default is one-hot);
    if `False`, then labels are used, without one-hot encoding

type_clust: str
    type of clustering method: currently k-means ('kmeans') or Gaussian
    Mixture Model ('gmm')

type_scaling: a tuple of 3 strings
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax')

col_sample: float
    percentage of features randomly chosen for training

row_sample: float
    percentage of rows chosen for training, by stratified bootstrapping

seed: int
    reproducibility seed for nodes_sim=='uniform', clustering and dropout

backend: str
    "cpu" or "gpu" or "tpu"

Attributes:

beta_: vector
    regression coefficients

GCV_: float
    Generalized Cross-Validation error
def fit(self, X, y, **kwargs):
122    def fit(self, X, y, **kwargs):
123        """Fit BaseRegressor to training data (X, y)
124
125        Parameters:
126
127            X: {array-like}, shape = [n_samples, n_features]
128                Training vectors, where n_samples is the number
129                of samples and n_features is the number of features
130
131            y: array-like, shape = [n_samples]
132                Target values
133
134            **kwargs: additional parameters to be passed to self.cook_training_set
135
136        Returns:
137
138            self: object
139        """
140
141        centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
142
143        fit_obj = lmf.beta_Sigma_hat(X=scaled_Z, y=centered_y, backend=self.backend)
144
145        self.beta_ = fit_obj["beta_hat"]
146
147        self.GCV_ = fit_obj["GCV"]
148
149        return self

Fit BaseRegressor to training data (X, y)

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features

y: array-like, shape = [n_samples]
    Target values

**kwargs: additional parameters to be passed to self.cook_training_set

Returns:

self: object
def predict(self, X, **kwargs):
151    def predict(self, X, **kwargs):
152        """Predict test data X.
153
154        Parameters:
155
156            X: {array-like}, shape = [n_samples, n_features]
157                Training vectors, where n_samples is the number
158                of samples and n_features is the number of features
159
160            **kwargs: additional parameters to be passed to self.cook_test_set
161
162        Returns:
163
164            model predictions: {array-like}
165        """
166
167        if len(X.shape) == 1:
168            n_features = X.shape[0]
169            new_X = mo.rbind(
170                X.reshape(1, n_features),
171                np.ones(n_features).reshape(1, n_features),
172            )
173
174            return (
175                self.y_mean_
176                + mo.safe_sparse_dot(
177                    a=self.cook_test_set(new_X, **kwargs),
178                    b=self.beta_,
179                    backend=self.backend,
180                )
181            )[0]
182
183        return self.y_mean_ + mo.safe_sparse_dot(
184            a=self.cook_test_set(X, **kwargs),
185            b=self.beta_,
186            backend=self.backend,
187        )

Predict test data X.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features

**kwargs: additional parameters to be passed to self.cook_test_set

Returns:

model predictions: {array-like}
class BayesianRVFLRegressor(nnetsauce.Base, sklearn.base.RegressorMixin):
 15class BayesianRVFLRegressor(Base, RegressorMixin):
 16    """Bayesian Random Vector Functional Link Network regression with one prior
 17
 18    Parameters:
 19
 20        n_hidden_features: int
 21            number of nodes in the hidden layer
 22
 23        activation_name: str
 24            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
 25
 26        a: float
 27            hyperparameter for 'prelu' or 'elu' activation function
 28
 29        nodes_sim: str
 30            type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 'uniform'
 31
 32        bias: boolean
 33            indicates if the hidden layer contains a bias term (True) or not (False)
 34
 35        dropout: float
 36            regularization parameter; (random) percentage of nodes dropped out
 37            of the training
 38
 39        direct_link: boolean
 40            indicates if the original features are included (True) in model''s fitting or not (False)
 41
 42        n_clusters: int
 43            number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering)
 44
 45        cluster_encode: bool
 46            defines how the variable containing clusters is treated (default is one-hot)
 47            if `False`, then labels are used, without one-hot encoding
 48
 49        type_clust: str
 50            type of clustering method: currently k-means ('kmeans') or Gaussian Mixture Model ('gmm')
 51
 52        type_scaling: a tuple of 3 strings
 53            scaling methods for inputs, hidden layer, and clustering respectively
 54            (and when relevant).
 55            Currently available: standardization ('std') or MinMax scaling ('minmax')
 56
 57        seed: int
 58            reproducibility seed for nodes_sim=='uniform'
 59
 60        s: float
 61            std. dev. of regression parameters in Bayesian Ridge Regression
 62
 63        sigma: float
 64            std. dev. of residuals in Bayesian Ridge Regression
 65
 66        return_std: boolean
 67            if True, uncertainty around predictions is evaluated
 68
 69        backend: str
 70            "cpu" or "gpu" or "tpu"
 71
 72    Attributes:
 73
 74        beta_: array-like
 75            regression''s coefficients
 76
 77        Sigma_: array-like
 78            covariance of the distribution of fitted parameters
 79
 80        GCV_: float
 81            Generalized cross-validation error
 82
 83        y_mean_: float
 84            average response
 85
 86    Examples:
 87
 88    ```python
 89    TBD
 90    ```
 91
 92    """
 93
 94    # construct the object -----
 95
 96    def __init__(
 97        self,
 98        n_hidden_features=5,
 99        activation_name="relu",
100        a=0.01,
101        nodes_sim="sobol",
102        bias=True,
103        dropout=0,
104        direct_link=True,
105        n_clusters=2,
106        cluster_encode=True,
107        type_clust="kmeans",
108        type_scaling=("std", "std", "std"),
109        seed=123,
110        s=0.1,
111        sigma=0.05,
112        return_std=True,
113        backend="cpu",
114    ):
115        super().__init__(
116            n_hidden_features=n_hidden_features,
117            activation_name=activation_name,
118            a=a,
119            nodes_sim=nodes_sim,
120            bias=bias,
121            dropout=dropout,
122            direct_link=direct_link,
123            n_clusters=n_clusters,
124            cluster_encode=cluster_encode,
125            type_clust=type_clust,
126            type_scaling=type_scaling,
127            seed=seed,
128            backend=backend,
129        )
130        self.s = s
131        self.sigma = sigma
132        self.beta_ = None
133        self.Sigma_ = None
134        self.GCV_ = None
135        self.return_std = return_std
136
137    def fit(self, X, y, **kwargs):
138        """Fit BayesianRVFLRegressor to training data (X, y).
139
140        Parameters:
141
142            X: {array-like}, shape = [n_samples, n_features]
143                Training vectors, where n_samples is the number
144                of samples and n_features is the number of features.
145
146            y: array-like, shape = [n_samples]
147                Target values.
148
149            **kwargs: additional parameters to be passed to
150                    self.cook_training_set
151
152        Returns:
153
154            self: object
155
156        """
157
158        centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
159
160        fit_obj = lmf.beta_Sigma_hat_rvfl(
161            X=scaled_Z,
162            y=centered_y,
163            s=self.s,
164            sigma=self.sigma,
165            fit_intercept=False,
166            return_cov=self.return_std,
167            backend=self.backend,
168        )
169
170        self.beta_ = fit_obj["beta_hat"]
171
172        if self.return_std == True:
173            self.Sigma_ = fit_obj["Sigma_hat"]
174
175        self.GCV_ = fit_obj["GCV"]
176
177        return self
178
179    def predict(self, X, return_std=False, **kwargs):
180        """Predict test data X.
181
182        Parameters:
183
184            X: {array-like}, shape = [n_samples, n_features]
185                Training vectors, where n_samples is the number
186                of samples and n_features is the number of features.
187
188            return_std: {boolean}, standard dev. is returned or not
189
190            **kwargs: additional parameters to be passed to
191                    self.cook_test_set
192
193        Returns:
194
195            model predictions: {array-like}
196
197        """
198
199        if len(X.shape) == 1:  # one observation in the test set only
200            n_features = X.shape[0]
201            new_X = mo.rbind(
202                x=X.reshape(1, n_features),
203                y=np.ones(n_features).reshape(1, n_features),
204                backend=self.backend,
205            )
206
207        self.return_std = return_std
208
209        if self.return_std == False:
210            if len(X.shape) == 1:
211                return (
212                    self.y_mean_
213                    + mo.safe_sparse_dot(
214                        a=self.cook_test_set(new_X, **kwargs),
215                        b=self.beta_,
216                        backend=self.backend,
217                    )
218                )[0]
219
220            return self.y_mean_ + mo.safe_sparse_dot(
221                a=self.cook_test_set(X, **kwargs),
222                b=self.beta_,
223                backend=self.backend,
224            )
225
226        else:  # confidence interval required for preds?
227            if len(X.shape) == 1:
228                Z = self.cook_test_set(new_X, **kwargs)
229
230                pred_obj = lmf.beta_Sigma_hat_rvfl(
231                    s=self.s,
232                    sigma=self.sigma,
233                    X_star=Z,
234                    return_cov=True,
235                    beta_hat_=self.beta_,
236                    Sigma_hat_=self.Sigma_,
237                    backend=self.backend,
238                )
239
240                return (
241                    self.y_mean_ + pred_obj["preds"][0],
242                    pred_obj["preds_std"][0],
243                )
244
245            Z = self.cook_test_set(X, **kwargs)
246
247            pred_obj = lmf.beta_Sigma_hat_rvfl(
248                s=self.s,
249                sigma=self.sigma,
250                X_star=Z,
251                return_cov=True,
252                beta_hat_=self.beta_,
253                Sigma_hat_=self.Sigma_,
254                backend=self.backend,
255            )
256
257            return (self.y_mean_ + pred_obj["preds"], pred_obj["preds_std"])

Bayesian Random Vector Functional Link Network regression with one prior

Parameters:

n_hidden_features: int
    number of nodes in the hidden layer

activation_name: str
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'

a: float
    hyperparameter for 'prelu' or 'elu' activation function

nodes_sim: str
    type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 'uniform'

bias: boolean
    indicates if the hidden layer contains a bias term (True) or not (False)

dropout: float
    regularization parameter; (random) percentage of nodes dropped out
    of the training

direct_link: boolean
    indicates if the original features are included (True) in model''s fitting or not (False)

n_clusters: int
    number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering)

cluster_encode: bool
    defines how the variable containing clusters is treated (default is one-hot)
    if `False`, then labels are used, without one-hot encoding

type_clust: str
    type of clustering method: currently k-means ('kmeans') or Gaussian Mixture Model ('gmm')

type_scaling: a tuple of 3 strings
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax')

seed: int
    reproducibility seed for nodes_sim=='uniform'

s: float
    std. dev. of regression parameters in Bayesian Ridge Regression

sigma: float
    std. dev. of residuals in Bayesian Ridge Regression

return_std: boolean
    if True, uncertainty around predictions is evaluated

backend: str
    "cpu" or "gpu" or "tpu"

Attributes:

beta_: array-like
    regression''s coefficients

Sigma_: array-like
    covariance of the distribution of fitted parameters

GCV_: float
    Generalized cross-validation error

y_mean_: float
    average response

Examples:

TBD
def fit(self, X, y, **kwargs):
137    def fit(self, X, y, **kwargs):
138        """Fit BayesianRVFLRegressor to training data (X, y).
139
140        Parameters:
141
142            X: {array-like}, shape = [n_samples, n_features]
143                Training vectors, where n_samples is the number
144                of samples and n_features is the number of features.
145
146            y: array-like, shape = [n_samples]
147                Target values.
148
149            **kwargs: additional parameters to be passed to
150                    self.cook_training_set
151
152        Returns:
153
154            self: object
155
156        """
157
158        centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
159
160        fit_obj = lmf.beta_Sigma_hat_rvfl(
161            X=scaled_Z,
162            y=centered_y,
163            s=self.s,
164            sigma=self.sigma,
165            fit_intercept=False,
166            return_cov=self.return_std,
167            backend=self.backend,
168        )
169
170        self.beta_ = fit_obj["beta_hat"]
171
172        if self.return_std == True:
173            self.Sigma_ = fit_obj["Sigma_hat"]
174
175        self.GCV_ = fit_obj["GCV"]
176
177        return self

Fit BayesianRVFLRegressor to training data (X, y).

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

**kwargs: additional parameters to be passed to
        self.cook_training_set

Returns:

self: object
def predict(self, X, return_std=False, **kwargs):
179    def predict(self, X, return_std=False, **kwargs):
180        """Predict test data X.
181
182        Parameters:
183
184            X: {array-like}, shape = [n_samples, n_features]
185                Training vectors, where n_samples is the number
186                of samples and n_features is the number of features.
187
188            return_std: {boolean}, standard dev. is returned or not
189
190            **kwargs: additional parameters to be passed to
191                    self.cook_test_set
192
193        Returns:
194
195            model predictions: {array-like}
196
197        """
198
199        if len(X.shape) == 1:  # one observation in the test set only
200            n_features = X.shape[0]
201            new_X = mo.rbind(
202                x=X.reshape(1, n_features),
203                y=np.ones(n_features).reshape(1, n_features),
204                backend=self.backend,
205            )
206
207        self.return_std = return_std
208
209        if self.return_std == False:
210            if len(X.shape) == 1:
211                return (
212                    self.y_mean_
213                    + mo.safe_sparse_dot(
214                        a=self.cook_test_set(new_X, **kwargs),
215                        b=self.beta_,
216                        backend=self.backend,
217                    )
218                )[0]
219
220            return self.y_mean_ + mo.safe_sparse_dot(
221                a=self.cook_test_set(X, **kwargs),
222                b=self.beta_,
223                backend=self.backend,
224            )
225
226        else:  # confidence interval required for preds?
227            if len(X.shape) == 1:
228                Z = self.cook_test_set(new_X, **kwargs)
229
230                pred_obj = lmf.beta_Sigma_hat_rvfl(
231                    s=self.s,
232                    sigma=self.sigma,
233                    X_star=Z,
234                    return_cov=True,
235                    beta_hat_=self.beta_,
236                    Sigma_hat_=self.Sigma_,
237                    backend=self.backend,
238                )
239
240                return (
241                    self.y_mean_ + pred_obj["preds"][0],
242                    pred_obj["preds_std"][0],
243                )
244
245            Z = self.cook_test_set(X, **kwargs)
246
247            pred_obj = lmf.beta_Sigma_hat_rvfl(
248                s=self.s,
249                sigma=self.sigma,
250                X_star=Z,
251                return_cov=True,
252                beta_hat_=self.beta_,
253                Sigma_hat_=self.Sigma_,
254                backend=self.backend,
255            )
256
257            return (self.y_mean_ + pred_obj["preds"], pred_obj["preds_std"])

Predict test data X.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

return_std: {boolean}, standard dev. is returned or not

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

model predictions: {array-like}
class BayesianRVFL2Regressor(nnetsauce.Base, sklearn.base.RegressorMixin):
 15class BayesianRVFL2Regressor(Base, RegressorMixin):
 16    """Bayesian Random Vector Functional Link Network regression with two priors
 17
 18    Parameters:
 19
 20        n_hidden_features: int
 21            number of nodes in the hidden layer
 22
 23        activation_name: str
 24            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
 25
 26        a: float
 27            hyperparameter for 'prelu' or 'elu' activation function
 28
 29        nodes_sim: str
 30            type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 'uniform'
 31
 32        bias: boolean
 33            indicates if the hidden layer contains a bias term (True) or not (False)
 34
 35        dropout: float
 36            regularization parameter; (random) percentage of nodes dropped out
 37            of the training
 38
 39        direct_link: boolean
 40            indicates if the original features are included (True) in model''s fitting or not (False)
 41
 42        n_clusters: int
 43            number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering)
 44
 45        cluster_encode: bool
 46            defines how the variable containing clusters is treated (default is one-hot)
 47            if `False`, then labels are used, without one-hot encoding
 48
 49        type_clust: str
 50            type of clustering method: currently k-means ('kmeans') or Gaussian Mixture Model ('gmm')
 51
 52        type_scaling: a tuple of 3 strings
 53            scaling methods for inputs, hidden layer, and clustering respectively
 54            (and when relevant).
 55            Currently available: standardization ('std') or MinMax scaling ('minmax')
 56
 57        seed: int
 58            reproducibility seed for nodes_sim=='uniform'
 59
 60        s1: float
 61            std. dev. of init. regression parameters in Bayesian Ridge Regression
 62
 63        s2: float
 64            std. dev. of augmented regression parameters in Bayesian Ridge Regression
 65
 66        sigma: float
 67            std. dev. of residuals in Bayesian Ridge Regression
 68
 69        return_std: boolean
 70            if True, uncertainty around predictions is evaluated
 71
 72        backend: str
 73            "cpu" or "gpu" or "tpu"
 74
 75    Attributes:
 76
 77        beta_: array-like
 78            regression''s coefficients
 79
 80        Sigma_: array-like
 81            covariance of the distribution of fitted parameters
 82
 83        GCV_: float
 84            Generalized cross-validation error
 85
 86        y_mean_: float
 87            average response
 88
 89    Examples:
 90
 91    ```python
 92    TBD
 93    ```
 94
 95    """
 96
 97    # construct the object -----
 98
 99    def __init__(
100        self,
101        n_hidden_features=5,
102        activation_name="relu",
103        a=0.01,
104        nodes_sim="sobol",
105        bias=True,
106        dropout=0,
107        direct_link=True,
108        n_clusters=0,
109        cluster_encode=True,
110        type_clust="kmeans",
111        type_scaling=("std", "std", "std"),
112        seed=123,
113        s1=0.1,
114        s2=0.1,
115        sigma=0.05,
116        return_std=True,
117        backend="cpu",
118    ):
119        super().__init__(
120            n_hidden_features=n_hidden_features,
121            activation_name=activation_name,
122            a=a,
123            nodes_sim=nodes_sim,
124            bias=bias,
125            dropout=dropout,
126            direct_link=direct_link,
127            n_clusters=n_clusters,
128            cluster_encode=cluster_encode,
129            type_clust=type_clust,
130            type_scaling=type_scaling,
131            seed=seed,
132            backend=backend,
133        )
134
135        self.s1 = s1
136        self.s2 = s2
137        self.sigma = sigma
138        self.beta_ = None
139        self.Sigma_ = None
140        self.GCV_ = None
141        self.return_std = return_std
142
143    def fit(self, X, y, **kwargs):
144        """Fit BayesianRVFL2Regressor to training data (X, y)
145
146        Parameters:
147
148            X: {array-like}, shape = [n_samples, n_features]
149                Training vectors, where n_samples is the number
150                of samples and n_features is the number of features
151
152            y: array-like, shape = [n_samples]
153                Target values
154
155            **kwargs: additional parameters to be passed to
156                    self.cook_training_set
157
158        Returns:
159
160            self: object
161
162        """
163
164        centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
165
166        n, p = X.shape
167        q = self.n_hidden_features
168
169        if self.direct_link == True:
170            r = p + self.n_clusters
171
172            block11 = (self.s1**2) * np.eye(r)
173            block12 = np.zeros((r, q))
174            block21 = np.zeros((q, r))
175            block22 = (self.s2**2) * np.eye(q)
176
177            Sigma_prior = mo.rbind(
178                x=mo.cbind(x=block11, y=block12, backend=self.backend),
179                y=mo.cbind(x=block21, y=block22, backend=self.backend),
180                backend=self.backend,
181            )
182
183        else:
184            Sigma_prior = (self.s2**2) * np.eye(q)
185
186        fit_obj = lmf.beta_Sigma_hat_rvfl2(
187            X=scaled_Z,
188            y=centered_y,
189            Sigma=Sigma_prior,
190            sigma=self.sigma,
191            fit_intercept=False,
192            return_cov=self.return_std,
193            backend=self.backend,
194        )
195
196        self.beta_ = fit_obj["beta_hat"]
197
198        if self.return_std == True:
199            self.Sigma_ = fit_obj["Sigma_hat"]
200
201        self.GCV_ = fit_obj["GCV"]
202
203        return self
204
205    def predict(self, X, return_std=False, **kwargs):
206        """Predict test data X.
207
208        Parameters:
209
210            X: {array-like}, shape = [n_samples, n_features]
211                Training vectors, where n_samples is the number
212                of samples and n_features is the number of features.
213
214            return_std: {boolean}, standard dev. is returned or not
215
216            **kwargs: additional parameters to be passed to
217                    self.cook_test_set
218
219        Returns:
220
221            model predictions: {array-like}
222
223        """
224
225        if len(X.shape) == 1:  # one observation in the test set only
226            n_features = X.shape[0]
227            new_X = mo.rbind(
228                x=X.reshape(1, n_features),
229                y=np.ones(n_features).reshape(1, n_features),
230                backend=self.backend,
231            )
232
233        self.return_std = return_std
234
235        if self.return_std == False:
236            if len(X.shape) == 1:
237                return (
238                    self.y_mean_
239                    + mo.safe_sparse_dot(
240                        self.cook_test_set(new_X, **kwargs),
241                        self.beta_,
242                        backend=self.backend,
243                    )
244                )[0]
245
246            return self.y_mean_ + mo.safe_sparse_dot(
247                self.cook_test_set(X, **kwargs),
248                self.beta_,
249                backend=self.backend,
250            )
251
252        else:  # confidence interval required for preds?
253            if len(X.shape) == 1:
254                Z = self.cook_test_set(new_X, **kwargs)
255
256                pred_obj = lmf.beta_Sigma_hat_rvfl2(
257                    X_star=Z,
258                    return_cov=self.return_std,
259                    beta_hat_=self.beta_,
260                    Sigma_hat_=self.Sigma_,
261                    backend=self.backend,
262                )
263
264                return (
265                    self.y_mean_ + pred_obj["preds"][0],
266                    pred_obj["preds_std"][0],
267                )
268
269            Z = self.cook_test_set(X, **kwargs)
270
271            pred_obj = lmf.beta_Sigma_hat_rvfl2(
272                X_star=Z,
273                return_cov=self.return_std,
274                beta_hat_=self.beta_,
275                Sigma_hat_=self.Sigma_,
276                backend=self.backend,
277            )
278
279            return (self.y_mean_ + pred_obj["preds"], pred_obj["preds_std"])

Bayesian Random Vector Functional Link Network regression with two priors

Parameters:

n_hidden_features: int
    number of nodes in the hidden layer

activation_name: str
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'

a: float
    hyperparameter for 'prelu' or 'elu' activation function

nodes_sim: str
    type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 'uniform'

bias: boolean
    indicates if the hidden layer contains a bias term (True) or not (False)

dropout: float
    regularization parameter; (random) percentage of nodes dropped out
    of the training

direct_link: boolean
    indicates if the original features are included (True) in model''s fitting or not (False)

n_clusters: int
    number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering)

cluster_encode: bool
    defines how the variable containing clusters is treated (default is one-hot)
    if `False`, then labels are used, without one-hot encoding

type_clust: str
    type of clustering method: currently k-means ('kmeans') or Gaussian Mixture Model ('gmm')

type_scaling: a tuple of 3 strings
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax')

seed: int
    reproducibility seed for nodes_sim=='uniform'

s1: float
    std. dev. of init. regression parameters in Bayesian Ridge Regression

s2: float
    std. dev. of augmented regression parameters in Bayesian Ridge Regression

sigma: float
    std. dev. of residuals in Bayesian Ridge Regression

return_std: boolean
    if True, uncertainty around predictions is evaluated

backend: str
    "cpu" or "gpu" or "tpu"

Attributes:

beta_: array-like
    regression''s coefficients

Sigma_: array-like
    covariance of the distribution of fitted parameters

GCV_: float
    Generalized cross-validation error

y_mean_: float
    average response

Examples:

TBD
def fit(self, X, y, **kwargs):
143    def fit(self, X, y, **kwargs):
144        """Fit BayesianRVFL2Regressor to training data (X, y)
145
146        Parameters:
147
148            X: {array-like}, shape = [n_samples, n_features]
149                Training vectors, where n_samples is the number
150                of samples and n_features is the number of features
151
152            y: array-like, shape = [n_samples]
153                Target values
154
155            **kwargs: additional parameters to be passed to
156                    self.cook_training_set
157
158        Returns:
159
160            self: object
161
162        """
163
164        centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
165
166        n, p = X.shape
167        q = self.n_hidden_features
168
169        if self.direct_link == True:
170            r = p + self.n_clusters
171
172            block11 = (self.s1**2) * np.eye(r)
173            block12 = np.zeros((r, q))
174            block21 = np.zeros((q, r))
175            block22 = (self.s2**2) * np.eye(q)
176
177            Sigma_prior = mo.rbind(
178                x=mo.cbind(x=block11, y=block12, backend=self.backend),
179                y=mo.cbind(x=block21, y=block22, backend=self.backend),
180                backend=self.backend,
181            )
182
183        else:
184            Sigma_prior = (self.s2**2) * np.eye(q)
185
186        fit_obj = lmf.beta_Sigma_hat_rvfl2(
187            X=scaled_Z,
188            y=centered_y,
189            Sigma=Sigma_prior,
190            sigma=self.sigma,
191            fit_intercept=False,
192            return_cov=self.return_std,
193            backend=self.backend,
194        )
195
196        self.beta_ = fit_obj["beta_hat"]
197
198        if self.return_std == True:
199            self.Sigma_ = fit_obj["Sigma_hat"]
200
201        self.GCV_ = fit_obj["GCV"]
202
203        return self

Fit BayesianRVFL2Regressor to training data (X, y)

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features

y: array-like, shape = [n_samples]
    Target values

**kwargs: additional parameters to be passed to
        self.cook_training_set

Returns:

self: object
def predict(self, X, return_std=False, **kwargs):
205    def predict(self, X, return_std=False, **kwargs):
206        """Predict test data X.
207
208        Parameters:
209
210            X: {array-like}, shape = [n_samples, n_features]
211                Training vectors, where n_samples is the number
212                of samples and n_features is the number of features.
213
214            return_std: {boolean}, standard dev. is returned or not
215
216            **kwargs: additional parameters to be passed to
217                    self.cook_test_set
218
219        Returns:
220
221            model predictions: {array-like}
222
223        """
224
225        if len(X.shape) == 1:  # one observation in the test set only
226            n_features = X.shape[0]
227            new_X = mo.rbind(
228                x=X.reshape(1, n_features),
229                y=np.ones(n_features).reshape(1, n_features),
230                backend=self.backend,
231            )
232
233        self.return_std = return_std
234
235        if self.return_std == False:
236            if len(X.shape) == 1:
237                return (
238                    self.y_mean_
239                    + mo.safe_sparse_dot(
240                        self.cook_test_set(new_X, **kwargs),
241                        self.beta_,
242                        backend=self.backend,
243                    )
244                )[0]
245
246            return self.y_mean_ + mo.safe_sparse_dot(
247                self.cook_test_set(X, **kwargs),
248                self.beta_,
249                backend=self.backend,
250            )
251
252        else:  # confidence interval required for preds?
253            if len(X.shape) == 1:
254                Z = self.cook_test_set(new_X, **kwargs)
255
256                pred_obj = lmf.beta_Sigma_hat_rvfl2(
257                    X_star=Z,
258                    return_cov=self.return_std,
259                    beta_hat_=self.beta_,
260                    Sigma_hat_=self.Sigma_,
261                    backend=self.backend,
262                )
263
264                return (
265                    self.y_mean_ + pred_obj["preds"][0],
266                    pred_obj["preds_std"][0],
267                )
268
269            Z = self.cook_test_set(X, **kwargs)
270
271            pred_obj = lmf.beta_Sigma_hat_rvfl2(
272                X_star=Z,
273                return_cov=self.return_std,
274                beta_hat_=self.beta_,
275                Sigma_hat_=self.Sigma_,
276                backend=self.backend,
277            )
278
279            return (self.y_mean_ + pred_obj["preds"], pred_obj["preds_std"])

Predict test data X.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

return_std: {boolean}, standard dev. is returned or not

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

model predictions: {array-like}
class ClassicalMTS(nnetsauce.Base):
 42class ClassicalMTS(Base):
 43    """Multivariate time series (FactorMTS) forecasting with Factor models
 44
 45    Parameters:
 46
 47        model: type of model: str.
 48            currently, 'VAR', 'VECM', 'ARIMA', 'ETS', 'Theta'
 49
 50    Attributes:
 51
 52        df_: data frame
 53            the input data frame, in case a data.frame is provided to `fit`
 54
 55        level_: int
 56            level of confidence for prediction intervals (default is 95)
 57
 58    Examples:
 59    See examples/classical_mts_timeseries.py
 60    """
 61
 62    # construct the object -----
 63
 64    def __init__(self, model="VAR"):
 65
 66        self.model = model
 67        if self.model == "VAR":
 68            self.obj = VAR
 69        elif self.model == "VECM":
 70            self.obj = VECM
 71        elif self.model == "ARIMA":
 72            self.obj = ARIMA
 73        elif self.model == "ETS":
 74            self.obj = ExponentialSmoothing
 75        elif self.model == "Theta":
 76            self.obj = ThetaModel
 77        else:
 78            raise ValueError("model not recognized")
 79        self.n_series = None
 80        self.replications = None
 81        self.mean_ = None
 82        self.upper_ = None
 83        self.lower_ = None
 84        self.output_dates_ = None
 85        self.alpha_ = None
 86        self.df_ = None
 87        self.residuals_ = []
 88        self.sims_ = None
 89        self.level_ = None
 90
 91    def fit(self, X, **kwargs):
 92        """Fit FactorMTS model to training data X, with optional regressors xreg
 93
 94        Parameters:
 95
 96        X: {array-like}, shape = [n_samples, n_features]
 97            Training time series, where n_samples is the number
 98            of samples and n_features is the number of features;
 99            X must be in increasing order (most recent observations last)
100
101        **kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity)
102
103        Returns:
104
105        self: object
106        """
107
108        try:
109            self.n_series = X.shape[1]
110        except Exception:
111            self.n_series = 1
112
113        if (isinstance(X, pd.DataFrame) is False) and isinstance(
114            X, pd.Series
115        ) is False:  # input data set is a numpy array
116
117            X = pd.DataFrame(X)
118            if self.n_series > 1:
119                self.series_names = ["series" + str(i) for i in range(X.shape[1])]
120            else:
121                self.series_names = "series0"
122
123        else:  # input data set is a DataFrame or Series with column names
124
125            X_index = None
126            if X.index is not None and len(X.shape) > 1:
127                X_index = X.index
128                X = copy.deepcopy(mo.convert_df_to_numeric(X))
129            if X_index is not None:
130                try:
131                    X.index = X_index
132                except Exception:
133                    pass
134            if isinstance(X, pd.DataFrame):
135                self.series_names = X.columns.tolist()
136            else:
137                self.series_names = X.name
138
139        if isinstance(X, pd.DataFrame) or isinstance(X, pd.Series):
140            self.df_ = X
141            X = X.values
142            self.df_.columns = self.series_names
143            self.input_dates = ts.compute_input_dates(self.df_)
144        else:
145            self.df_ = pd.DataFrame(X, columns=self.series_names)
146
147        if self.model == "Theta":
148            self.obj = self.obj(self.df_, **kwargs).fit()
149        else:
150            self.obj = self.obj(X, **kwargs).fit(**kwargs)
151
152        return self
153
154    def predict(self, h=5, level=95, **kwargs):
155        """Forecast all the time series, h steps ahead
156
157        Parameters:
158
159        h: {integer}
160            Forecasting horizon
161
162        **kwargs: additional parameters to be passed to
163                self.cook_test_set
164
165        Returns:
166
167        model predictions for horizon = h: {array-like}
168
169        """
170
171        self.output_dates_, frequency = ts.compute_output_dates(self.df_, h)
172
173        self.level_ = level
174
175        self.lower_ = None  # do not remove (/!\)
176
177        self.upper_ = None  # do not remove (/!\)
178
179        self.sims_ = None  # do not remove (/!\)
180
181        self.level_ = level
182
183        self.alpha_ = 100 - level
184
185        pi_multiplier = norm.ppf(1 - self.alpha_ / 200)
186
187        # Named tuple for forecast results
188        DescribeResult = namedtuple("DescribeResult", ("mean", "lower", "upper"))
189
190        if self.model == "VAR":
191            mean_forecast, lower_bound, upper_bound = self.obj.forecast_interval(
192                self.obj.endog, steps=h, alpha=self.alpha_ / 100, **kwargs
193            )
194
195        elif self.model == "VECM":
196            forecast_result = self.obj.predict(steps=h)
197            mean_forecast = forecast_result
198            lower_bound, upper_bound = self._compute_confidence_intervals(
199                forecast_result, alpha=self.alpha_ / 100, **kwargs
200            )
201
202        elif self.model == "ARIMA":
203            forecast_result = self.obj.get_forecast(steps=h)
204            mean_forecast = forecast_result.predicted_mean
205            lower_bound = forecast_result.conf_int()[:, 0]
206            upper_bound = forecast_result.conf_int()[:, 1]
207
208        elif self.model == "ETS":
209            forecast_result = self.obj.forecast(steps=h)
210            residuals = self.obj.resid
211            std_errors = np.std(residuals)
212            mean_forecast = forecast_result
213            lower_bound = forecast_result - pi_multiplier * std_errors
214            upper_bound = forecast_result + pi_multiplier * std_errors
215
216        elif self.model == "Theta":
217            try:
218                mean_forecast = self.obj.forecast(steps=h).values
219                forecast_result = self.obj.prediction_intervals(
220                    steps=h, alpha=self.alpha_ / 100, **kwargs
221                )
222                lower_bound = forecast_result["lower"].values
223                upper_bound = forecast_result["upper"].values
224            except Exception:
225                mean_forecast = self.obj.forecast(steps=h)
226                forecast_result = self.obj.prediction_intervals(
227                    steps=h, alpha=self.alpha_ / 100, **kwargs
228                )
229                lower_bound = forecast_result["lower"]
230                upper_bound = forecast_result["upper"]
231
232        else:
233
234            raise ValueError("model not recognized")
235
236        try:
237            self.mean_ = pd.DataFrame(
238                mean_forecast,
239                columns=self.series_names,
240                index=self.output_dates_,
241            )
242            self.lower_ = pd.DataFrame(
243                lower_bound, columns=self.series_names, index=self.output_dates_
244            )
245            self.upper_ = pd.DataFrame(
246                upper_bound, columns=self.series_names, index=self.output_dates_
247            )
248        except Exception:
249            self.mean_ = pd.Series(
250                mean_forecast, name=self.series_names, index=self.output_dates_
251            )
252            self.lower_ = pd.Series(
253                lower_bound, name=self.series_names, index=self.output_dates_
254            )
255            self.upper_ = pd.Series(
256                upper_bound, name=self.series_names, index=self.output_dates_
257            )
258
259        return DescribeResult(mean=self.mean_, lower=self.lower_, upper=self.upper_)
260
261    def _compute_confidence_intervals(self, forecast_result, alpha):
262        """
263        Compute confidence intervals for VECM forecasts.
264        Uses the covariance of residuals to approximate the confidence intervals.
265        """
266        residuals = self.obj.resid
267        cov_matrix = np.cov(residuals.T)  # Covariance matrix of residuals
268        std_errors = np.sqrt(np.diag(cov_matrix))  # Standard errors
269
270        z_value = norm.ppf(1 - alpha / 2)  # Z-score for the given alpha level
271        lower_bound = forecast_result - z_value * std_errors
272        upper_bound = forecast_result + z_value * std_errors
273
274        return lower_bound, upper_bound
275
276    def score(self, X, training_index, testing_index, scoring=None, **kwargs):
277        """Train on training_index, score on testing_index."""
278
279        assert (
280            bool(set(training_index).intersection(set(testing_index))) == False
281        ), "Non-overlapping 'training_index' and 'testing_index' required"
282
283        # Dimensions
284        try:
285            # multivariate time series
286            n, p = X.shape
287        except:
288            # univariate time series
289            n = X.shape[0]
290            p = 1
291
292        # Training and testing sets
293        if p > 1:
294            X_train = X[training_index, :]
295            X_test = X[testing_index, :]
296        else:
297            X_train = X[training_index]
298            X_test = X[testing_index]
299
300        # Horizon
301        h = len(testing_index)
302        assert (
303            len(training_index) + h
304        ) <= n, "Please check lengths of training and testing windows"
305
306        # Fit and predict
307        self.fit(X_train, **kwargs)
308        preds = self.predict(h=h, **kwargs)
309
310        if scoring is None:
311            scoring = "neg_root_mean_squared_error"
312
313        # check inputs
314        assert scoring in (
315            "explained_variance",
316            "neg_mean_absolute_error",
317            "neg_mean_squared_error",
318            "neg_root_mean_squared_error",
319            "neg_mean_squared_log_error",
320            "neg_median_absolute_error",
321            "r2",
322        ), "'scoring' should be in ('explained_variance', 'neg_mean_absolute_error', \
323                               'neg_mean_squared_error', 'neg_root_mean_squared_error', 'neg_mean_squared_log_error', \
324                               'neg_median_absolute_error', 'r2')"
325
326        scoring_options = {
327            "explained_variance": skm2.explained_variance_score,
328            "neg_mean_absolute_error": skm2.mean_absolute_error,
329            "neg_mean_squared_error": lambda x, y: np.mean((x - y) ** 2),
330            "neg_root_mean_squared_error": lambda x, y: np.sqrt(np.mean((x - y) ** 2)),
331            "neg_mean_squared_log_error": skm2.mean_squared_log_error,
332            "neg_median_absolute_error": skm2.median_absolute_error,
333            "r2": skm2.r2_score,
334        }
335
336        # if p > 1:
337        #     return tuple(
338        #         [
339        #             scoring_options[scoring](
340        #                 X_test[:, i], preds[:, i]#, **kwargs
341        #             )
342        #             for i in range(p)
343        #         ]
344        #     )
345        # else:
346        return scoring_options[scoring](X_test, preds)
347
348    def plot(self, series=None, type_axis="dates", type_plot="pi"):
349        """Plot time series forecast
350
351        Parameters:
352
353        series: {integer} or {string}
354            series index or name
355
356        """
357
358        assert all(
359            [
360                self.mean_ is not None,
361                self.lower_ is not None,
362                self.upper_ is not None,
363                self.output_dates_ is not None,
364            ]
365        ), "model forecasting must be obtained first (with predict)"
366
367        if series is None:
368            assert (
369                self.n_series == 1
370            ), "please specify series index or name (n_series > 1)"
371            series = 0
372
373        if isinstance(series, str):
374            assert (
375                series in self.series_names
376            ), f"series {series} doesn't exist in the input dataset"
377            series_idx = self.df_.columns.get_loc(series)
378        else:
379            assert isinstance(series, int) and (
380                0 <= series < self.n_series
381            ), f"check series index (< {self.n_series})"
382            series_idx = series
383
384        if isinstance(self.df_, pd.DataFrame):
385            y_all = list(self.df_.iloc[:, series_idx]) + list(
386                self.mean_.iloc[:, series_idx]
387            )
388            y_test = list(self.mean_.iloc[:, series_idx])
389        else:
390            y_all = list(self.df_.values) + list(self.mean_.values)
391            y_test = list(self.mean_.values)
392        n_points_all = len(y_all)
393        n_points_train = self.df_.shape[0]
394
395        if type_axis == "numeric":
396            x_all = [i for i in range(n_points_all)]
397            x_test = [i for i in range(n_points_train, n_points_all)]
398
399        if type_axis == "dates":  # use dates
400            x_all = np.concatenate(
401                (self.input_dates.values, self.output_dates_.values), axis=None
402            )
403            x_test = self.output_dates_.values
404
405        if type_plot == "pi":
406            fig, ax = plt.subplots()
407            ax.plot(x_all, y_all, "-")
408            ax.plot(x_test, y_test, "-", color="orange")
409            try:
410                ax.fill_between(
411                    x_test,
412                    self.lower_.iloc[:, series_idx],
413                    self.upper_.iloc[:, series_idx],
414                    alpha=0.2,
415                    color="orange",
416                )
417            except Exception:
418                ax.fill_between(
419                    x_test,
420                    self.lower_.values,
421                    self.upper_.values,
422                    alpha=0.2,
423                    color="orange",
424                )
425            if self.replications is None:
426                if self.n_series > 1:
427                    plt.title(
428                        f"prediction intervals for {series}",
429                        loc="left",
430                        fontsize=12,
431                        fontweight=0,
432                        color="black",
433                    )
434                else:
435                    plt.title(
436                        f"prediction intervals for input time series",
437                        loc="left",
438                        fontsize=12,
439                        fontweight=0,
440                        color="black",
441                    )
442                plt.show()
443            else:  # self.replications is not None
444                if self.n_series > 1:
445                    plt.title(
446                        f"prediction intervals for {self.replications} simulations of {series}",
447                        loc="left",
448                        fontsize=12,
449                        fontweight=0,
450                        color="black",
451                    )
452                else:
453                    plt.title(
454                        f"prediction intervals for {self.replications} simulations of input time series",
455                        loc="left",
456                        fontsize=12,
457                        fontweight=0,
458                        color="black",
459                    )
460                plt.show()
461
462        if type_plot == "spaghetti":
463            palette = plt.get_cmap("Set1")
464            sims_ix = getsims(self.sims_, series_idx)
465            plt.plot(x_all, y_all, "-")
466            for col_ix in range(
467                sims_ix.shape[1]
468            ):  # avoid this when there are thousands of simulations
469                plt.plot(
470                    x_test,
471                    sims_ix[:, col_ix],
472                    "-",
473                    color=palette(col_ix),
474                    linewidth=1,
475                    alpha=0.9,
476                )
477            plt.plot(x_all, y_all, "-", color="black")
478            plt.plot(x_test, y_test, "-", color="blue")
479            # Add titles
480            if self.n_series > 1:
481                plt.title(
482                    f"{self.replications} simulations of {series}",
483                    loc="left",
484                    fontsize=12,
485                    fontweight=0,
486                    color="black",
487                )
488            else:
489                plt.title(
490                    f"{self.replications} simulations of input time series",
491                    loc="left",
492                    fontsize=12,
493                    fontweight=0,
494                    color="black",
495                )
496            plt.xlabel("Time")
497            plt.ylabel("Values")
498            # Show the graph
499            plt.show()
500
501    def cross_val_score(
502        self,
503        X,
504        scoring="root_mean_squared_error",
505        n_jobs=None,
506        verbose=0,
507        xreg=None,
508        initial_window=5,
509        horizon=3,
510        fixed_window=False,
511        show_progress=True,
512        level=95,
513        **kwargs,
514    ):
515        """Evaluate a score by time series cross-validation.
516
517        Parameters:
518
519            X: {array-like, sparse matrix} of shape (n_samples, n_features)
520                The data to fit.
521
522            scoring: str or a function
523                A str in ('root_mean_squared_error', 'mean_squared_error', 'mean_error',
524                'mean_absolute_error', 'mean_error', 'mean_percentage_error',
525                'mean_absolute_percentage_error',  'winkler_score', 'coverage')
526                Or a function defined as 'coverage' and 'winkler_score' in `utils.timeseries`
527
528            n_jobs: int, default=None
529                Number of jobs to run in parallel.
530
531            verbose: int, default=0
532                The verbosity level.
533
534            xreg: array-like, optional (default=None)
535                Additional (external) regressors to be passed to `fit`
536                xreg must be in 'increasing' order (most recent observations last)
537
538            initial_window: int
539                initial number of consecutive values in each training set sample
540
541            horizon: int
542                number of consecutive values in test set sample
543
544            fixed_window: boolean
545                if False, all training samples start at index 0, and the training
546                window's size is increasing.
547                if True, the training window's size is fixed, and the window is
548                rolling forward
549
550            show_progress: boolean
551                if True, a progress bar is printed
552
553            **kwargs: dict
554                additional parameters to be passed to `fit` and `predict`
555
556        Returns:
557
558            A tuple: descriptive statistics or errors and raw errors
559
560        """
561        tscv = TimeSeriesSplit()
562
563        tscv_obj = tscv.split(
564            X,
565            initial_window=initial_window,
566            horizon=horizon,
567            fixed_window=fixed_window,
568        )
569
570        if isinstance(scoring, str):
571
572            assert scoring in (
573                "root_mean_squared_error",
574                "mean_squared_error",
575                "mean_error",
576                "mean_absolute_error",
577                "mean_percentage_error",
578                "mean_absolute_percentage_error",
579                "winkler_score",
580                "coverage",
581            ), "must have scoring in ('root_mean_squared_error', 'mean_squared_error', 'mean_error', 'mean_absolute_error', 'mean_error', 'mean_percentage_error', 'mean_absolute_percentage_error',  'winkler_score', 'coverage')"
582
583            def err_func(X_test, X_pred, scoring):
584                if (self.replications is not None) or (
585                    self.type_pi == "gaussian"
586                ):  # probabilistic
587                    if scoring == "winkler_score":
588                        return winkler_score(X_pred, X_test, level=level)
589                    elif scoring == "coverage":
590                        return coverage(X_pred, X_test, level=level)
591                    else:
592                        return mean_errors(
593                            pred=X_pred.mean, actual=X_test, scoring=scoring
594                        )
595                else:  # not probabilistic
596                    return mean_errors(pred=X_pred, actual=X_test, scoring=scoring)
597
598        else:  # isinstance(scoring, str) = False
599
600            err_func = scoring
601
602        errors = []
603
604        train_indices = []
605
606        test_indices = []
607
608        for train_index, test_index in tscv_obj:
609            train_indices.append(train_index)
610            test_indices.append(test_index)
611
612        if show_progress is True:
613            iterator = tqdm(zip(train_indices, test_indices), total=len(train_indices))
614        else:
615            iterator = zip(train_indices, test_indices)
616
617        for train_index, test_index in iterator:
618
619            if verbose == 1:
620                print(f"TRAIN: {train_index}")
621                print(f"TEST: {test_index}")
622
623            if isinstance(X, pd.DataFrame):
624                self.fit(X.iloc[train_index, :], xreg=xreg, **kwargs)
625                X_test = X.iloc[test_index, :]
626            else:
627                self.fit(X[train_index, :], xreg=xreg, **kwargs)
628                X_test = X[test_index, :]
629            X_pred = self.predict(h=int(len(test_index)), level=level, **kwargs)
630
631            errors.append(err_func(X_test, X_pred, scoring))
632
633        res = np.asarray(errors)
634
635        return res, describe(res)

Multivariate time series (FactorMTS) forecasting with Factor models

Parameters:

model: type of model: str.
    currently, 'VAR', 'VECM', 'ARIMA', 'ETS', 'Theta'

Attributes:

df_: data frame
    the input data frame, in case a data.frame is provided to `fit`

level_: int
    level of confidence for prediction intervals (default is 95)

Examples: See examples/classical_mts_timeseries.py

def fit(self, X, **kwargs):
 91    def fit(self, X, **kwargs):
 92        """Fit FactorMTS model to training data X, with optional regressors xreg
 93
 94        Parameters:
 95
 96        X: {array-like}, shape = [n_samples, n_features]
 97            Training time series, where n_samples is the number
 98            of samples and n_features is the number of features;
 99            X must be in increasing order (most recent observations last)
100
101        **kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity)
102
103        Returns:
104
105        self: object
106        """
107
108        try:
109            self.n_series = X.shape[1]
110        except Exception:
111            self.n_series = 1
112
113        if (isinstance(X, pd.DataFrame) is False) and isinstance(
114            X, pd.Series
115        ) is False:  # input data set is a numpy array
116
117            X = pd.DataFrame(X)
118            if self.n_series > 1:
119                self.series_names = ["series" + str(i) for i in range(X.shape[1])]
120            else:
121                self.series_names = "series0"
122
123        else:  # input data set is a DataFrame or Series with column names
124
125            X_index = None
126            if X.index is not None and len(X.shape) > 1:
127                X_index = X.index
128                X = copy.deepcopy(mo.convert_df_to_numeric(X))
129            if X_index is not None:
130                try:
131                    X.index = X_index
132                except Exception:
133                    pass
134            if isinstance(X, pd.DataFrame):
135                self.series_names = X.columns.tolist()
136            else:
137                self.series_names = X.name
138
139        if isinstance(X, pd.DataFrame) or isinstance(X, pd.Series):
140            self.df_ = X
141            X = X.values
142            self.df_.columns = self.series_names
143            self.input_dates = ts.compute_input_dates(self.df_)
144        else:
145            self.df_ = pd.DataFrame(X, columns=self.series_names)
146
147        if self.model == "Theta":
148            self.obj = self.obj(self.df_, **kwargs).fit()
149        else:
150            self.obj = self.obj(X, **kwargs).fit(**kwargs)
151
152        return self

Fit FactorMTS model to training data X, with optional regressors xreg

Parameters:

X: {array-like}, shape = [n_samples, n_features] Training time series, where n_samples is the number of samples and n_features is the number of features; X must be in increasing order (most recent observations last)

**kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity)

Returns:

self: object

def predict(self, h=5, level=95, **kwargs):
154    def predict(self, h=5, level=95, **kwargs):
155        """Forecast all the time series, h steps ahead
156
157        Parameters:
158
159        h: {integer}
160            Forecasting horizon
161
162        **kwargs: additional parameters to be passed to
163                self.cook_test_set
164
165        Returns:
166
167        model predictions for horizon = h: {array-like}
168
169        """
170
171        self.output_dates_, frequency = ts.compute_output_dates(self.df_, h)
172
173        self.level_ = level
174
175        self.lower_ = None  # do not remove (/!\)
176
177        self.upper_ = None  # do not remove (/!\)
178
179        self.sims_ = None  # do not remove (/!\)
180
181        self.level_ = level
182
183        self.alpha_ = 100 - level
184
185        pi_multiplier = norm.ppf(1 - self.alpha_ / 200)
186
187        # Named tuple for forecast results
188        DescribeResult = namedtuple("DescribeResult", ("mean", "lower", "upper"))
189
190        if self.model == "VAR":
191            mean_forecast, lower_bound, upper_bound = self.obj.forecast_interval(
192                self.obj.endog, steps=h, alpha=self.alpha_ / 100, **kwargs
193            )
194
195        elif self.model == "VECM":
196            forecast_result = self.obj.predict(steps=h)
197            mean_forecast = forecast_result
198            lower_bound, upper_bound = self._compute_confidence_intervals(
199                forecast_result, alpha=self.alpha_ / 100, **kwargs
200            )
201
202        elif self.model == "ARIMA":
203            forecast_result = self.obj.get_forecast(steps=h)
204            mean_forecast = forecast_result.predicted_mean
205            lower_bound = forecast_result.conf_int()[:, 0]
206            upper_bound = forecast_result.conf_int()[:, 1]
207
208        elif self.model == "ETS":
209            forecast_result = self.obj.forecast(steps=h)
210            residuals = self.obj.resid
211            std_errors = np.std(residuals)
212            mean_forecast = forecast_result
213            lower_bound = forecast_result - pi_multiplier * std_errors
214            upper_bound = forecast_result + pi_multiplier * std_errors
215
216        elif self.model == "Theta":
217            try:
218                mean_forecast = self.obj.forecast(steps=h).values
219                forecast_result = self.obj.prediction_intervals(
220                    steps=h, alpha=self.alpha_ / 100, **kwargs
221                )
222                lower_bound = forecast_result["lower"].values
223                upper_bound = forecast_result["upper"].values
224            except Exception:
225                mean_forecast = self.obj.forecast(steps=h)
226                forecast_result = self.obj.prediction_intervals(
227                    steps=h, alpha=self.alpha_ / 100, **kwargs
228                )
229                lower_bound = forecast_result["lower"]
230                upper_bound = forecast_result["upper"]
231
232        else:
233
234            raise ValueError("model not recognized")
235
236        try:
237            self.mean_ = pd.DataFrame(
238                mean_forecast,
239                columns=self.series_names,
240                index=self.output_dates_,
241            )
242            self.lower_ = pd.DataFrame(
243                lower_bound, columns=self.series_names, index=self.output_dates_
244            )
245            self.upper_ = pd.DataFrame(
246                upper_bound, columns=self.series_names, index=self.output_dates_
247            )
248        except Exception:
249            self.mean_ = pd.Series(
250                mean_forecast, name=self.series_names, index=self.output_dates_
251            )
252            self.lower_ = pd.Series(
253                lower_bound, name=self.series_names, index=self.output_dates_
254            )
255            self.upper_ = pd.Series(
256                upper_bound, name=self.series_names, index=self.output_dates_
257            )
258
259        return DescribeResult(mean=self.mean_, lower=self.lower_, upper=self.upper_)

Forecast all the time series, h steps ahead

Parameters:

h: {integer} Forecasting horizon

**kwargs: additional parameters to be passed to self.cook_test_set

Returns:

model predictions for horizon = h: {array-like}

def score(self, X, training_index, testing_index, scoring=None, **kwargs):
276    def score(self, X, training_index, testing_index, scoring=None, **kwargs):
277        """Train on training_index, score on testing_index."""
278
279        assert (
280            bool(set(training_index).intersection(set(testing_index))) == False
281        ), "Non-overlapping 'training_index' and 'testing_index' required"
282
283        # Dimensions
284        try:
285            # multivariate time series
286            n, p = X.shape
287        except:
288            # univariate time series
289            n = X.shape[0]
290            p = 1
291
292        # Training and testing sets
293        if p > 1:
294            X_train = X[training_index, :]
295            X_test = X[testing_index, :]
296        else:
297            X_train = X[training_index]
298            X_test = X[testing_index]
299
300        # Horizon
301        h = len(testing_index)
302        assert (
303            len(training_index) + h
304        ) <= n, "Please check lengths of training and testing windows"
305
306        # Fit and predict
307        self.fit(X_train, **kwargs)
308        preds = self.predict(h=h, **kwargs)
309
310        if scoring is None:
311            scoring = "neg_root_mean_squared_error"
312
313        # check inputs
314        assert scoring in (
315            "explained_variance",
316            "neg_mean_absolute_error",
317            "neg_mean_squared_error",
318            "neg_root_mean_squared_error",
319            "neg_mean_squared_log_error",
320            "neg_median_absolute_error",
321            "r2",
322        ), "'scoring' should be in ('explained_variance', 'neg_mean_absolute_error', \
323                               'neg_mean_squared_error', 'neg_root_mean_squared_error', 'neg_mean_squared_log_error', \
324                               'neg_median_absolute_error', 'r2')"
325
326        scoring_options = {
327            "explained_variance": skm2.explained_variance_score,
328            "neg_mean_absolute_error": skm2.mean_absolute_error,
329            "neg_mean_squared_error": lambda x, y: np.mean((x - y) ** 2),
330            "neg_root_mean_squared_error": lambda x, y: np.sqrt(np.mean((x - y) ** 2)),
331            "neg_mean_squared_log_error": skm2.mean_squared_log_error,
332            "neg_median_absolute_error": skm2.median_absolute_error,
333            "r2": skm2.r2_score,
334        }
335
336        # if p > 1:
337        #     return tuple(
338        #         [
339        #             scoring_options[scoring](
340        #                 X_test[:, i], preds[:, i]#, **kwargs
341        #             )
342        #             for i in range(p)
343        #         ]
344        #     )
345        # else:
346        return scoring_options[scoring](X_test, preds)

Train on training_index, score on testing_index.

class CustomClassifier(nnetsauce.custom.custom.Custom, sklearn.base.ClassifierMixin):
 16class CustomClassifier(Custom, ClassifierMixin):
 17    """Custom Classification model
 18
 19    Attributes:
 20
 21        obj: object
 22            any object containing a method fit (obj.fit()) and a method predict
 23            (obj.predict())
 24
 25        n_hidden_features: int
 26            number of nodes in the hidden layer
 27
 28        activation_name: str
 29            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
 30
 31        a: float
 32            hyperparameter for 'prelu' or 'elu' activation function
 33
 34        nodes_sim: str
 35            type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
 36            'uniform'
 37
 38        bias: boolean
 39            indicates if the hidden layer contains a bias term (True) or not
 40            (False)
 41
 42        dropout: float
 43            regularization parameter; (random) percentage of nodes dropped out
 44            of the training
 45
 46        direct_link: boolean
 47            indicates if the original predictors are included (True) in model''s
 48            fitting or not (False)
 49
 50        n_clusters: int
 51            number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
 52                no clustering)
 53
 54        cluster_encode: bool
 55            defines how the variable containing clusters is treated (default is one-hot)
 56            if `False`, then labels are used, without one-hot encoding
 57
 58        type_clust: str
 59            type of clustering method: currently k-means ('kmeans') or Gaussian
 60            Mixture Model ('gmm')
 61
 62        type_scaling: a tuple of 3 strings
 63            scaling methods for inputs, hidden layer, and clustering respectively
 64            (and when relevant).
 65            Currently available: standardization ('std') or MinMax scaling ('minmax')
 66
 67        col_sample: float
 68            percentage of covariates randomly chosen for training
 69
 70        row_sample: float
 71            percentage of rows chosen for training, by stratified bootstrapping
 72        
 73        cv_calibration: int, cross-validation generator, or iterable, default=2
 74            Determines the cross-validation splitting strategy. Same as 
 75            `sklearn.calibration.CalibratedClassifierCV`
 76
 77        calibration_method: str
 78            {‘sigmoid’, ‘isotonic’}, default=’sigmoid’
 79            The method to use for calibration. Same as 
 80            `sklearn.calibration.CalibratedClassifierCV`
 81
 82        seed: int
 83            reproducibility seed for nodes_sim=='uniform'
 84
 85        backend: str
 86            "cpu" or "gpu" or "tpu"
 87
 88    Examples:
 89
 90    Note: it's better to use the `DeepClassifier` or `LazyDeepClassifier` classes directly
 91
 92    ```python
 93    import nnetsauce as ns
 94    from sklearn.ensemble import RandomForestClassifier
 95    from sklearn.model_selection import train_test_split
 96    from sklearn.datasets import load_digits
 97    from time import time
 98
 99    digits = load_digits()
100    X = digits.data
101    y = digits.target
102    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
103                                                        random_state=123)
104
105    # layer 1 (base layer) ----
106    layer1_regr = RandomForestClassifier(n_estimators=10, random_state=123)
107
108    start = time()
109
110    layer1_regr.fit(X_train, y_train)
111
112    # Accuracy in layer 1
113    print(layer1_regr.score(X_test, y_test))
114
115    # layer 2 using layer 1 ----
116    layer2_regr = ns.CustomClassifier(obj = layer1_regr, n_hidden_features=5,
117                            direct_link=True, bias=True,
118                            nodes_sim='uniform', activation_name='relu',
119                            n_clusters=2, seed=123)
120    layer2_regr.fit(X_train, y_train)
121
122    # Accuracy in layer 2
123    print(layer2_regr.score(X_test, y_test))
124
125    # layer 3 using layer 2 ----
126    layer3_regr = ns.CustomClassifier(obj = layer2_regr, n_hidden_features=10,
127                            direct_link=True, bias=True, dropout=0.7,
128                            nodes_sim='uniform', activation_name='relu',
129                            n_clusters=2, seed=123)
130    layer3_regr.fit(X_train, y_train)
131
132    # Accuracy in layer 3
133    print(layer3_regr.score(X_test, y_test))
134
135    print(f"Elapsed {time() - start}")
136    ```
137
138    """
139
140    # construct the object -----
141    _estimator_type = "classifier" 
142
143    def __init__(
144        self,
145        obj,
146        n_hidden_features=5,
147        activation_name="relu",
148        a=0.01,
149        nodes_sim="sobol",
150        bias=True,
151        dropout=0,
152        direct_link=True,
153        n_clusters=2,
154        cluster_encode=True,
155        type_clust="kmeans",
156        type_scaling=("std", "std", "std"),
157        col_sample=1,
158        row_sample=1,
159        cv_calibration=2,
160        calibration_method="sigmoid",
161        seed=123,
162        backend="cpu",
163    ):
164        super().__init__(
165            obj=obj,
166            n_hidden_features=n_hidden_features,
167            activation_name=activation_name,
168            a=a,
169            nodes_sim=nodes_sim,
170            bias=bias,
171            dropout=dropout,
172            direct_link=direct_link,
173            n_clusters=n_clusters,
174            cluster_encode=cluster_encode,
175            type_clust=type_clust,
176            type_scaling=type_scaling,
177            col_sample=col_sample,
178            row_sample=row_sample,
179            seed=seed,
180            backend=backend,
181        )
182        self.coef_ = None
183        self.intercept_ = None
184        self.type_fit = "classification"
185        self.cv_calibration = cv_calibration
186        self.calibration_method = calibration_method
187
188    def __sklearn_clone__(self):
189        """Create a clone of the estimator.
190        
191        This is required for scikit-learn's calibration system to work properly.
192        """
193        # Create a new instance with the same parameters
194        clone = CustomClassifier(
195            obj=self.obj,
196            n_hidden_features=self.n_hidden_features,
197            activation_name=self.activation_name,
198            a=self.a,
199            nodes_sim=self.nodes_sim,
200            bias=self.bias,
201            dropout=self.dropout,
202            direct_link=self.direct_link,
203            n_clusters=self.n_clusters,
204            cluster_encode=self.cluster_encode,
205            type_clust=self.type_clust,
206            type_scaling=self.type_scaling,
207            col_sample=self.col_sample,
208            row_sample=self.row_sample,
209            cv_calibration=self.cv_calibration,
210            calibration_method=self.calibration_method,
211            seed=self.seed,
212            backend=self.backend
213        )
214        return clone
215
216    def fit(self, X, y, sample_weight=None, **kwargs):
217        """Fit custom model to training data (X, y).
218
219        Parameters:
220
221            X: {array-like}, shape = [n_samples, n_features]
222                Training vectors, where n_samples is the number
223                of samples and n_features is the number of features.
224
225            y: array-like, shape = [n_samples]
226                Target values.
227
228            sample_weight: array-like, shape = [n_samples]
229                Sample weights.
230
231            **kwargs: additional parameters to be passed to
232                        self.cook_training_set or self.obj.fit
233
234        Returns:
235
236            self: object
237        """
238
239        if len(X.shape) == 1:
240            if isinstance(X, pd.DataFrame):
241                X = pd.DataFrame(X.values.reshape(1, -1), columns=X.columns)
242            else:
243                X = X.reshape(1, -1)
244
245        output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
246        self.classes_ = np.unique(y)
247        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
248
249        # Wrap in CalibratedClassifierCV if needed
250        if self.cv_calibration is not None:
251            self.obj = CalibratedClassifierCV(
252                self.obj, 
253                cv=self.cv_calibration,
254                method=self.calibration_method
255            )
256
257        # if sample_weights, else: (must use self.row_index)
258        if sample_weight is not None:
259            self.obj.fit(
260                scaled_Z,
261                output_y,
262                sample_weight=sample_weight[self.index_row_].ravel(),
263                **kwargs
264            )
265            return self
266
267        # if sample_weight is None:
268        self.obj.fit(scaled_Z, output_y, **kwargs)
269        self.classes_ = np.unique(y)  # for compatibility with sklearn
270        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
271
272        if hasattr(self.obj, "coef_"):
273            self.coef_ = self.obj.coef_
274
275        if hasattr(self.obj, "intercept_"):
276            self.intercept_ = self.obj.intercept_
277
278        return self
279
280    def partial_fit(self, X, y, sample_weight=None, **kwargs):
281        """Partial fit custom model to training data (X, y).
282
283        Parameters:
284
285            X: {array-like}, shape = [n_samples, n_features]
286                Subset of training vectors, where n_samples is the number
287                of samples and n_features is the number of features.
288
289            y: array-like, shape = [n_samples]
290                Subset of target values.
291
292            sample_weight: array-like, shape = [n_samples]
293                Sample weights.
294
295            **kwargs: additional parameters to be passed to
296                        self.cook_training_set or self.obj.fit
297
298        Returns:
299
300            self: object
301        """
302
303        if len(X.shape) == 1:
304            if isinstance(X, pd.DataFrame):
305                X = pd.DataFrame(X.values.reshape(1, -1), columns=X.columns)
306            else:
307                X = X.reshape(1, -1)
308            y = np.array([y], dtype=np.integer)
309
310        output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
311        self.n_classes_ = len(np.unique(y))  # for compatibility with sklearn
312
313        # if sample_weights, else: (must use self.row_index)
314        if sample_weight is not None:
315            try:
316                self.obj.partial_fit(
317                    scaled_Z,
318                    output_y,
319                    sample_weight=sample_weight[self.index_row_].ravel(),
320                    # **kwargs
321                )
322            except:
323                NotImplementedError
324
325            return self
326
327        # if sample_weight is None:
328        #try:
329        self.obj.partial_fit(scaled_Z, output_y)
330        #except:
331        #    raise NotImplementedError
332
333        self.classes_ = np.unique(y)  # for compatibility with sklearn
334        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
335
336        return self
337
338    def predict(self, X, **kwargs):
339        """Predict test data X.
340
341        Parameters:
342
343            X: {array-like}, shape = [n_samples, n_features]
344                Training vectors, where n_samples is the number
345                of samples and n_features is the number of features.
346
347            **kwargs: additional parameters to be passed to
348                    self.cook_test_set
349
350        Returns:
351
352            model predictions: {array-like}
353        """
354
355        if len(X.shape) == 1:
356            n_features = X.shape[0]
357            new_X = mo.rbind(
358                X.reshape(1, n_features),
359                np.ones(n_features).reshape(1, n_features),
360            )
361
362            return (self.obj.predict(self.cook_test_set(new_X, **kwargs), **kwargs))[0]
363
364        return self.obj.predict(self.cook_test_set(X, **kwargs), **kwargs)
365
366    def predict_proba(self, X, **kwargs):
367        """Predict probabilities for test data X.
368
369        Args:
370
371            X: {array-like}, shape = [n_samples, n_features]
372                Training vectors, where n_samples is the number
373                of samples and n_features is the number of features.
374
375            **kwargs: additional parameters to be passed to
376                    self.cook_test_set
377
378        Returns:
379
380            probability estimates for test data: {array-like}
381        """
382
383        if len(X.shape) == 1:
384            n_features = X.shape[0]
385            new_X = mo.rbind(
386                X.reshape(1, n_features),
387                np.ones(n_features).reshape(1, n_features),
388            )
389            return (
390                self.obj.predict_proba(self.cook_test_set(new_X, **kwargs), **kwargs)
391            )[0]
392        return self.obj.predict_proba(self.cook_test_set(X, **kwargs), **kwargs)
393
394    def decision_function(self, X, **kwargs):
395        """Compute the decision function of X.
396
397        Parameters:
398            X: {array-like}, shape = [n_samples, n_features]
399                Samples to compute decision function for.
400
401            **kwargs: additional parameters to be passed to
402                    self.cook_test_set
403
404        Returns:
405            array-like of shape (n_samples,) or (n_samples, n_classes)
406            Decision function of the input samples. The order of outputs is the same
407            as that of the classes passed to fit.
408        """
409        if not hasattr(self.obj, "decision_function"):
410            # If base classifier doesn't have decision_function, use predict_proba
411            proba = self.predict_proba(X, **kwargs)
412            if proba.shape[1] == 2:
413                return proba[:, 1]  # For binary classification
414            return proba  # For multiclass
415
416        if len(X.shape) == 1:
417            n_features = X.shape[0]
418            new_X = mo.rbind(
419                X.reshape(1, n_features),
420                np.ones(n_features).reshape(1, n_features),
421            )
422
423            return (
424                self.obj.decision_function(
425                    self.cook_test_set(new_X, **kwargs), **kwargs
426                )
427            )[0]
428
429        return self.obj.decision_function(self.cook_test_set(X, **kwargs), **kwargs)
430
431    def score(self, X, y, scoring=None):
432        """Scoring function for classification.
433
434        Args:
435
436            X: {array-like}, shape = [n_samples, n_features]
437                Training vectors, where n_samples is the number
438                of samples and n_features is the number of features.
439
440            y: array-like, shape = [n_samples]
441                Target values.
442
443            scoring: str
444                scoring method (default is accuracy)
445
446        Returns:
447
448            score: float
449        """
450
451        if scoring is None:
452            scoring = "accuracy"
453
454        if scoring == "accuracy":
455            return skm2.accuracy_score(y, self.predict(X))
456
457        if scoring == "f1":
458            return skm2.f1_score(y, self.predict(X))
459
460        if scoring == "precision":
461            return skm2.precision_score(y, self.predict(X))
462
463        if scoring == "recall":
464            return skm2.recall_score(y, self.predict(X))
465
466        if scoring == "roc_auc":
467            return skm2.roc_auc_score(y, self.predict(X))
468
469        if scoring == "log_loss":
470            return skm2.log_loss(y, self.predict_proba(X))
471
472        if scoring == "balanced_accuracy":
473            return skm2.balanced_accuracy_score(y, self.predict(X))
474
475        if scoring == "average_precision":
476            return skm2.average_precision_score(y, self.predict(X))
477
478        if scoring == "neg_brier_score":
479            return -skm2.brier_score_loss(y, self.predict_proba(X))
480
481        if scoring == "neg_log_loss":
482            return -skm2.log_loss(y, self.predict_proba(X))
483
484    @property
485    def _estimator_type(self):
486        return "classifier"

Custom Classification model

Attributes:

obj: object
    any object containing a method fit (obj.fit()) and a method predict
    (obj.predict())

n_hidden_features: int
    number of nodes in the hidden layer

activation_name: str
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'

a: float
    hyperparameter for 'prelu' or 'elu' activation function

nodes_sim: str
    type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
    'uniform'

bias: boolean
    indicates if the hidden layer contains a bias term (True) or not
    (False)

dropout: float
    regularization parameter; (random) percentage of nodes dropped out
    of the training

direct_link: boolean
    indicates if the original predictors are included (True) in model''s
    fitting or not (False)

n_clusters: int
    number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
        no clustering)

cluster_encode: bool
    defines how the variable containing clusters is treated (default is one-hot)
    if `False`, then labels are used, without one-hot encoding

type_clust: str
    type of clustering method: currently k-means ('kmeans') or Gaussian
    Mixture Model ('gmm')

type_scaling: a tuple of 3 strings
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax')

col_sample: float
    percentage of covariates randomly chosen for training

row_sample: float
    percentage of rows chosen for training, by stratified bootstrapping

cv_calibration: int, cross-validation generator, or iterable, default=2
    Determines the cross-validation splitting strategy. Same as 
    `sklearn.calibration.CalibratedClassifierCV`

calibration_method: str
    {‘sigmoid’, ‘isotonic’}, default=’sigmoid’
    The method to use for calibration. Same as 
    `sklearn.calibration.CalibratedClassifierCV`

seed: int
    reproducibility seed for nodes_sim=='uniform'

backend: str
    "cpu" or "gpu" or "tpu"

Examples:

Note: it's better to use the DeepClassifier or LazyDeepClassifier classes directly

import nnetsauce as ns
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_digits
from time import time

digits = load_digits()
X = digits.data
y = digits.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
                                                    random_state=123)

# layer 1 (base layer) ----
layer1_regr = RandomForestClassifier(n_estimators=10, random_state=123)

start = time()

layer1_regr.fit(X_train, y_train)

# Accuracy in layer 1
print(layer1_regr.score(X_test, y_test))

# layer 2 using layer 1 ----
layer2_regr = ns.CustomClassifier(obj = layer1_regr, n_hidden_features=5,
                        direct_link=True, bias=True,
                        nodes_sim='uniform', activation_name='relu',
                        n_clusters=2, seed=123)
layer2_regr.fit(X_train, y_train)

# Accuracy in layer 2
print(layer2_regr.score(X_test, y_test))

# layer 3 using layer 2 ----
layer3_regr = ns.CustomClassifier(obj = layer2_regr, n_hidden_features=10,
                        direct_link=True, bias=True, dropout=0.7,
                        nodes_sim='uniform', activation_name='relu',
                        n_clusters=2, seed=123)
layer3_regr.fit(X_train, y_train)

# Accuracy in layer 3
print(layer3_regr.score(X_test, y_test))

print(f"Elapsed {time() - start}")
def fit(self, X, y, sample_weight=None, **kwargs):
216    def fit(self, X, y, sample_weight=None, **kwargs):
217        """Fit custom model to training data (X, y).
218
219        Parameters:
220
221            X: {array-like}, shape = [n_samples, n_features]
222                Training vectors, where n_samples is the number
223                of samples and n_features is the number of features.
224
225            y: array-like, shape = [n_samples]
226                Target values.
227
228            sample_weight: array-like, shape = [n_samples]
229                Sample weights.
230
231            **kwargs: additional parameters to be passed to
232                        self.cook_training_set or self.obj.fit
233
234        Returns:
235
236            self: object
237        """
238
239        if len(X.shape) == 1:
240            if isinstance(X, pd.DataFrame):
241                X = pd.DataFrame(X.values.reshape(1, -1), columns=X.columns)
242            else:
243                X = X.reshape(1, -1)
244
245        output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
246        self.classes_ = np.unique(y)
247        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
248
249        # Wrap in CalibratedClassifierCV if needed
250        if self.cv_calibration is not None:
251            self.obj = CalibratedClassifierCV(
252                self.obj, 
253                cv=self.cv_calibration,
254                method=self.calibration_method
255            )
256
257        # if sample_weights, else: (must use self.row_index)
258        if sample_weight is not None:
259            self.obj.fit(
260                scaled_Z,
261                output_y,
262                sample_weight=sample_weight[self.index_row_].ravel(),
263                **kwargs
264            )
265            return self
266
267        # if sample_weight is None:
268        self.obj.fit(scaled_Z, output_y, **kwargs)
269        self.classes_ = np.unique(y)  # for compatibility with sklearn
270        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
271
272        if hasattr(self.obj, "coef_"):
273            self.coef_ = self.obj.coef_
274
275        if hasattr(self.obj, "intercept_"):
276            self.intercept_ = self.obj.intercept_
277
278        return self

Fit custom model to training data (X, y).

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

sample_weight: array-like, shape = [n_samples]
    Sample weights.

**kwargs: additional parameters to be passed to
            self.cook_training_set or self.obj.fit

Returns:

self: object
def predict(self, X, **kwargs):
338    def predict(self, X, **kwargs):
339        """Predict test data X.
340
341        Parameters:
342
343            X: {array-like}, shape = [n_samples, n_features]
344                Training vectors, where n_samples is the number
345                of samples and n_features is the number of features.
346
347            **kwargs: additional parameters to be passed to
348                    self.cook_test_set
349
350        Returns:
351
352            model predictions: {array-like}
353        """
354
355        if len(X.shape) == 1:
356            n_features = X.shape[0]
357            new_X = mo.rbind(
358                X.reshape(1, n_features),
359                np.ones(n_features).reshape(1, n_features),
360            )
361
362            return (self.obj.predict(self.cook_test_set(new_X, **kwargs), **kwargs))[0]
363
364        return self.obj.predict(self.cook_test_set(X, **kwargs), **kwargs)

Predict test data X.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

model predictions: {array-like}
def predict_proba(self, X, **kwargs):
366    def predict_proba(self, X, **kwargs):
367        """Predict probabilities for test data X.
368
369        Args:
370
371            X: {array-like}, shape = [n_samples, n_features]
372                Training vectors, where n_samples is the number
373                of samples and n_features is the number of features.
374
375            **kwargs: additional parameters to be passed to
376                    self.cook_test_set
377
378        Returns:
379
380            probability estimates for test data: {array-like}
381        """
382
383        if len(X.shape) == 1:
384            n_features = X.shape[0]
385            new_X = mo.rbind(
386                X.reshape(1, n_features),
387                np.ones(n_features).reshape(1, n_features),
388            )
389            return (
390                self.obj.predict_proba(self.cook_test_set(new_X, **kwargs), **kwargs)
391            )[0]
392        return self.obj.predict_proba(self.cook_test_set(X, **kwargs), **kwargs)

Predict probabilities for test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

probability estimates for test data: {array-like}
def score(self, X, y, scoring=None):
431    def score(self, X, y, scoring=None):
432        """Scoring function for classification.
433
434        Args:
435
436            X: {array-like}, shape = [n_samples, n_features]
437                Training vectors, where n_samples is the number
438                of samples and n_features is the number of features.
439
440            y: array-like, shape = [n_samples]
441                Target values.
442
443            scoring: str
444                scoring method (default is accuracy)
445
446        Returns:
447
448            score: float
449        """
450
451        if scoring is None:
452            scoring = "accuracy"
453
454        if scoring == "accuracy":
455            return skm2.accuracy_score(y, self.predict(X))
456
457        if scoring == "f1":
458            return skm2.f1_score(y, self.predict(X))
459
460        if scoring == "precision":
461            return skm2.precision_score(y, self.predict(X))
462
463        if scoring == "recall":
464            return skm2.recall_score(y, self.predict(X))
465
466        if scoring == "roc_auc":
467            return skm2.roc_auc_score(y, self.predict(X))
468
469        if scoring == "log_loss":
470            return skm2.log_loss(y, self.predict_proba(X))
471
472        if scoring == "balanced_accuracy":
473            return skm2.balanced_accuracy_score(y, self.predict(X))
474
475        if scoring == "average_precision":
476            return skm2.average_precision_score(y, self.predict(X))
477
478        if scoring == "neg_brier_score":
479            return -skm2.brier_score_loss(y, self.predict_proba(X))
480
481        if scoring == "neg_log_loss":
482            return -skm2.log_loss(y, self.predict_proba(X))

Scoring function for classification.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

scoring: str
    scoring method (default is accuracy)

Returns:

score: float
class CustomRegressor(nnetsauce.custom.custom.Custom, sklearn.base.RegressorMixin):
 18class CustomRegressor(Custom, RegressorMixin):
 19    """Custom Regression model
 20
 21    This class is used to 'augment' any regression model with transformed features.
 22
 23    Parameters:
 24
 25        obj: object
 26            any object containing a method fit (obj.fit()) and a method predict
 27            (obj.predict())
 28
 29        n_hidden_features: int
 30            number of nodes in the hidden layer
 31
 32        activation_name: str
 33            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
 34
 35        a: float
 36            hyperparameter for 'prelu' or 'elu' activation function
 37
 38        nodes_sim: str
 39            type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
 40            'uniform'
 41
 42        bias: boolean
 43            indicates if the hidden layer contains a bias term (True) or not
 44            (False)
 45
 46        dropout: float
 47            regularization parameter; (random) percentage of nodes dropped out
 48            of the training
 49
 50        direct_link: boolean
 51            indicates if the original predictors are included (True) in model's
 52            fitting or not (False)
 53
 54        n_clusters: int
 55            number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
 56                no clustering)
 57
 58        cluster_encode: bool
 59            defines how the variable containing clusters is treated (default is one-hot)
 60            if `False`, then labels are used, without one-hot encoding
 61
 62        type_clust: str
 63            type of clustering method: currently k-means ('kmeans') or Gaussian
 64            Mixture Model ('gmm')
 65
 66        type_scaling: a tuple of 3 strings
 67            scaling methods for inputs, hidden layer, and clustering respectively
 68            (and when relevant).
 69            Currently available: standardization ('std') or MinMax scaling ('minmax')
 70
 71        type_pi: str.
 72            type of prediction interval; currently `None` (split or local
 73            conformal without simulation), "kde" or "bootstrap" (simulated split
 74            conformal).
 75
 76        replications: int.
 77            number of replications (if needed) for predictive simulation.
 78            Used only in `self.predict`, for `self.kernel` in ('gaussian',
 79            'tophat') and `self.type_pi = 'kde'`. Default is `None`.
 80
 81        kernel: str.
 82            the kernel to use for kernel density estimation (used for predictive
 83            simulation in `self.predict`, with `method='splitconformal'` and
 84            `type_pi = 'kde'`). Currently, either 'gaussian' or 'tophat'.
 85
 86        type_split: str.
 87            Type of splitting for conformal prediction. None (default), or
 88            "random" (random split of data) or "sequential" (sequential split of data)
 89
 90        col_sample: float
 91            percentage of covariates randomly chosen for training
 92
 93        row_sample: float
 94            percentage of rows chosen for training, by stratified bootstrapping
 95
 96        level: float
 97            confidence level for prediction intervals
 98
 99        pi_method: str
100            method for prediction intervals: 'splitconformal' or 'localconformal'
101
102        seed: int
103            reproducibility seed for nodes_sim=='uniform'
104
105        type_fit: str
106            'regression'
107
108        backend: str
109            "cpu" or "gpu" or "tpu"
110
111    Examples:
112
113    See [https://thierrymoudiki.github.io/blog/2024/03/18/python/conformal-and-bayesian-regression](https://thierrymoudiki.github.io/blog/2024/03/18/python/conformal-and-bayesian-regression)
114
115    """
116
117    # construct the object -----
118
119    def __init__(
120        self,
121        obj,
122        n_hidden_features=5,
123        activation_name="relu",
124        a=0.01,
125        nodes_sim="sobol",
126        bias=True,
127        dropout=0,
128        direct_link=True,
129        n_clusters=2,
130        cluster_encode=True,
131        type_clust="kmeans",
132        type_scaling=("std", "std", "std"),
133        type_pi=None,
134        replications=None,
135        kernel=None,
136        type_split=None,
137        col_sample=1,
138        row_sample=1,
139        level=None,
140        pi_method=None,
141        seed=123,
142        backend="cpu",
143    ):
144        super().__init__(
145            obj=obj,
146            n_hidden_features=n_hidden_features,
147            activation_name=activation_name,
148            a=a,
149            nodes_sim=nodes_sim,
150            bias=bias,
151            dropout=dropout,
152            direct_link=direct_link,
153            n_clusters=n_clusters,
154            cluster_encode=cluster_encode,
155            type_clust=type_clust,
156            type_scaling=type_scaling,
157            col_sample=col_sample,
158            row_sample=row_sample,
159            seed=seed,
160            backend=backend,
161        )
162
163        self.type_fit = "regression"
164        self.type_pi = type_pi
165        self.replications = replications
166        self.kernel = kernel
167        self.type_split = type_split
168        self.level = level
169        self.pi_method = pi_method
170        self.coef_ = None
171        self.intercept_ = None
172        self.X_ = None
173        self.y_ = None
174        self.aic_ = None 
175        self.aicc_ = None
176        self.bic_ = None
177
178    def fit(self, X, y, sample_weight=None, **kwargs):
179        """Fit custom model to training data (X, y).
180
181        Parameters:
182
183            X: {array-like}, shape = [n_samples, n_features]
184                Training vectors, where n_samples is the number
185                of samples and n_features is the number of features.
186
187            y: array-like, shape = [n_samples]
188                Target values.
189
190            sample_weight: array-like, shape = [n_samples]
191                Sample weights.
192
193            **kwargs: additional parameters to be passed to
194                self.cook_training_set or self.obj.fit
195
196        Returns:
197
198            self: object
199
200        """
201
202        centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
203
204        if self.level is not None:
205            self.obj = PredictionInterval(
206                obj=self.obj, method=self.pi_method, level=self.level
207            )
208
209        # if sample_weights, else: (must use self.row_index)
210        if sample_weight is not None:
211            self.obj.fit(
212                scaled_Z,
213                centered_y,
214                sample_weight=sample_weight[self.index_row_].ravel(),
215                **kwargs
216            )
217
218            return self
219
220        self.obj.fit(scaled_Z, centered_y, **kwargs)
221
222        self.X_ = X
223
224        self.y_ = y
225
226        # Compute SSE
227        centered_y_pred = self.obj.predict(scaled_Z)
228        self.sse_ = np.sum((centered_y - centered_y_pred) ** 2)
229        
230        # Get number of parameters
231        n_params = self.n_hidden_features + X.shape[1]  # hidden features + original features
232        if self.n_clusters > 0:
233            n_params += self.n_clusters  # add clusters if used
234            
235        # Compute information criteria
236        n_samples = X.shape[0]
237        temp = n_samples * np.log(self.sse_/n_samples)
238        self.aic_ = temp + 2 * n_params
239        self.bic_ = temp + np.log(n_samples) * n_params
240
241        if hasattr(self.obj, "coef_"):
242            self.coef_ = self.obj.coef_
243
244        if hasattr(self.obj, "intercept_"):
245            self.intercept_ = self.obj.intercept_
246
247        return self
248
249    def partial_fit(self, X, y, **kwargs):
250        """Partial fit custom model to training data (X, y).
251
252        Parameters:
253
254            X: {array-like}, shape = [n_samples, n_features]
255                Subset of training vectors, where n_samples is the number
256                of samples and n_features is the number of features.
257
258            y: array-like, shape = [n_samples]
259                Subset of target values.
260
261            **kwargs: additional parameters to be passed to
262                self.cook_training_set or self.obj.fit
263
264        Returns:
265
266            self: object
267
268        """
269
270        if len(X.shape) == 1:
271            if isinstance(X, pd.DataFrame):
272                X = pd.DataFrame(X.values.reshape(1, -1), columns=X.columns)
273            else:
274                X = X.reshape(1, -1)
275            y = np.array([y])
276
277        centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
278
279        self.obj.partial_fit(scaled_Z, centered_y, **kwargs)
280
281        self.X_ = X
282
283        self.y_ = y
284
285        return self
286
287    def predict(self, X, level=95, method='splitconformal', **kwargs):
288        """Predict test data X.
289
290        Parameters:
291
292            X: {array-like}, shape = [n_samples, n_features]
293                Training vectors, where n_samples is the number
294                of samples and n_features is the number of features.
295
296            level: int
297                Level of confidence (default = 95)
298
299            method: str
300                'splitconformal', 'localconformal'
301                prediction (if you specify `return_pi = True`)
302
303            **kwargs: additional parameters
304                    `return_pi = True` for conformal prediction,
305                    with `method` in ('splitconformal', 'localconformal')
306                    or `return_std = True` for `self.obj` in
307                    (`sklearn.linear_model.BayesianRidge`,
308                    `sklearn.linear_model.ARDRegressor`,
309                    `sklearn.gaussian_process.GaussianProcessRegressor`)`
310
311        Returns:
312
313            model predictions:
314                an array if uncertainty quantification is not requested,
315                  or a tuple if with prediction intervals and simulations
316                  if `return_std = True` (mean, standard deviation,
317                  lower and upper prediction interval) or `return_pi = True`
318                  ()
319
320        """
321
322        if "return_std" in kwargs:
323
324            alpha = 100 - level
325            pi_multiplier = norm.ppf(1 - alpha / 200)
326
327            if len(X.shape) == 1:
328
329                n_features = X.shape[0]
330                new_X = mo.rbind(
331                    X.reshape(1, n_features),
332                    np.ones(n_features).reshape(1, n_features),
333                )
334
335                mean_, std_ = self.obj.predict(
336                    self.cook_test_set(new_X, **kwargs), return_std=True
337                )[0]
338
339                preds = self.y_mean_ + mean_
340                lower = self.y_mean_ + (mean_ - pi_multiplier * std_)
341                upper = self.y_mean_ + (mean_ + pi_multiplier * std_)
342
343                DescribeResults = namedtuple(
344                    "DescribeResults", ["mean", "std", "lower", "upper"]
345                )
346
347                return DescribeResults(preds, std_, lower, upper)
348
349            # len(X.shape) > 1
350            mean_, std_ = self.obj.predict(
351                self.cook_test_set(X, **kwargs), return_std=True
352            )
353
354            preds = self.y_mean_ + mean_
355            lower = self.y_mean_ + (mean_ - pi_multiplier * std_)
356            upper = self.y_mean_ + (mean_ + pi_multiplier * std_)
357
358            DescribeResults = namedtuple(
359                "DescribeResults", ["mean", "std", "lower", "upper"]
360            )
361
362            return DescribeResults(preds, std_, lower, upper)
363
364        if "return_pi" in kwargs:
365            assert method in (
366                "splitconformal",
367                "localconformal",
368            ), "method must be in ('splitconformal', 'localconformal')"
369            self.pi = PredictionInterval(
370                obj=self,
371                method=method,
372                level=level,
373                type_pi=self.type_pi,
374                replications=self.replications,
375                kernel=self.kernel,
376            )
377
378            if len(self.X_.shape) == 1:
379                if isinstance(X, pd.DataFrame):
380                    self.X_ = pd.DataFrame(
381                        self.X_.values.reshape(1, -1), columns=self.X_.columns
382                    )
383                else:
384                    self.X_ = self.X_.reshape(1, -1)
385                self.y_ = np.array([self.y_])
386
387            self.pi.fit(self.X_, self.y_)
388            # self.X_ = None # consumes memory to keep, dangerous to delete (side effect)
389            # self.y_ = None # consumes memory to keep, dangerous to delete (side effect)
390            preds = self.pi.predict(X, return_pi=True)
391            return preds
392
393        # "return_std" not in kwargs
394        if len(X.shape) == 1:
395
396            n_features = X.shape[0]
397            new_X = mo.rbind(
398                X.reshape(1, n_features),
399                np.ones(n_features).reshape(1, n_features),
400            )
401
402            return (
403                self.y_mean_
404                + self.obj.predict(self.cook_test_set(new_X, **kwargs), **kwargs)
405            )[0]
406
407        # len(X.shape) > 1
408        return self.y_mean_ + self.obj.predict(
409            self.cook_test_set(X, **kwargs), **kwargs
410        )
411
412    def score(self, X, y, scoring=None):
413        """Compute the score of the model.
414
415        Parameters:
416
417            X: {array-like}, shape = [n_samples, n_features]
418                Training vectors, where n_samples is the number
419                of samples and n_features is the number of features.
420
421            y: array-like, shape = [n_samples]
422                Target values.
423
424            scoring: str
425                scoring method
426
427        Returns:
428
429            score: float
430
431        """
432
433        if scoring is None:
434            return np.sqrt(np.mean((self.predict(X) - y) ** 2))
435
436        return skm2.get_scorer(scoring)(self, X, y)

Custom Regression model

This class is used to 'augment' any regression model with transformed features.

Parameters:

obj: object
    any object containing a method fit (obj.fit()) and a method predict
    (obj.predict())

n_hidden_features: int
    number of nodes in the hidden layer

activation_name: str
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'

a: float
    hyperparameter for 'prelu' or 'elu' activation function

nodes_sim: str
    type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
    'uniform'

bias: boolean
    indicates if the hidden layer contains a bias term (True) or not
    (False)

dropout: float
    regularization parameter; (random) percentage of nodes dropped out
    of the training

direct_link: boolean
    indicates if the original predictors are included (True) in model's
    fitting or not (False)

n_clusters: int
    number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
        no clustering)

cluster_encode: bool
    defines how the variable containing clusters is treated (default is one-hot)
    if `False`, then labels are used, without one-hot encoding

type_clust: str
    type of clustering method: currently k-means ('kmeans') or Gaussian
    Mixture Model ('gmm')

type_scaling: a tuple of 3 strings
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax')

type_pi: str.
    type of prediction interval; currently `None` (split or local
    conformal without simulation), "kde" or "bootstrap" (simulated split
    conformal).

replications: int.
    number of replications (if needed) for predictive simulation.
    Used only in `self.predict`, for `self.kernel` in ('gaussian',
    'tophat') and `self.type_pi = 'kde'`. Default is `None`.

kernel: str.
    the kernel to use for kernel density estimation (used for predictive
    simulation in `self.predict`, with `method='splitconformal'` and
    `type_pi = 'kde'`). Currently, either 'gaussian' or 'tophat'.

type_split: str.
    Type of splitting for conformal prediction. None (default), or
    "random" (random split of data) or "sequential" (sequential split of data)

col_sample: float
    percentage of covariates randomly chosen for training

row_sample: float
    percentage of rows chosen for training, by stratified bootstrapping

level: float
    confidence level for prediction intervals

pi_method: str
    method for prediction intervals: 'splitconformal' or 'localconformal'

seed: int
    reproducibility seed for nodes_sim=='uniform'

type_fit: str
    'regression'

backend: str
    "cpu" or "gpu" or "tpu"

Examples:

See https://thierrymoudiki.github.io/blog/2024/03/18/python/conformal-and-bayesian-regression

def fit(self, X, y, sample_weight=None, **kwargs):
178    def fit(self, X, y, sample_weight=None, **kwargs):
179        """Fit custom model to training data (X, y).
180
181        Parameters:
182
183            X: {array-like}, shape = [n_samples, n_features]
184                Training vectors, where n_samples is the number
185                of samples and n_features is the number of features.
186
187            y: array-like, shape = [n_samples]
188                Target values.
189
190            sample_weight: array-like, shape = [n_samples]
191                Sample weights.
192
193            **kwargs: additional parameters to be passed to
194                self.cook_training_set or self.obj.fit
195
196        Returns:
197
198            self: object
199
200        """
201
202        centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
203
204        if self.level is not None:
205            self.obj = PredictionInterval(
206                obj=self.obj, method=self.pi_method, level=self.level
207            )
208
209        # if sample_weights, else: (must use self.row_index)
210        if sample_weight is not None:
211            self.obj.fit(
212                scaled_Z,
213                centered_y,
214                sample_weight=sample_weight[self.index_row_].ravel(),
215                **kwargs
216            )
217
218            return self
219
220        self.obj.fit(scaled_Z, centered_y, **kwargs)
221
222        self.X_ = X
223
224        self.y_ = y
225
226        # Compute SSE
227        centered_y_pred = self.obj.predict(scaled_Z)
228        self.sse_ = np.sum((centered_y - centered_y_pred) ** 2)
229        
230        # Get number of parameters
231        n_params = self.n_hidden_features + X.shape[1]  # hidden features + original features
232        if self.n_clusters > 0:
233            n_params += self.n_clusters  # add clusters if used
234            
235        # Compute information criteria
236        n_samples = X.shape[0]
237        temp = n_samples * np.log(self.sse_/n_samples)
238        self.aic_ = temp + 2 * n_params
239        self.bic_ = temp + np.log(n_samples) * n_params
240
241        if hasattr(self.obj, "coef_"):
242            self.coef_ = self.obj.coef_
243
244        if hasattr(self.obj, "intercept_"):
245            self.intercept_ = self.obj.intercept_
246
247        return self

Fit custom model to training data (X, y).

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

sample_weight: array-like, shape = [n_samples]
    Sample weights.

**kwargs: additional parameters to be passed to
    self.cook_training_set or self.obj.fit

Returns:

self: object
def predict(self, X, level=95, method='splitconformal', **kwargs):
287    def predict(self, X, level=95, method='splitconformal', **kwargs):
288        """Predict test data X.
289
290        Parameters:
291
292            X: {array-like}, shape = [n_samples, n_features]
293                Training vectors, where n_samples is the number
294                of samples and n_features is the number of features.
295
296            level: int
297                Level of confidence (default = 95)
298
299            method: str
300                'splitconformal', 'localconformal'
301                prediction (if you specify `return_pi = True`)
302
303            **kwargs: additional parameters
304                    `return_pi = True` for conformal prediction,
305                    with `method` in ('splitconformal', 'localconformal')
306                    or `return_std = True` for `self.obj` in
307                    (`sklearn.linear_model.BayesianRidge`,
308                    `sklearn.linear_model.ARDRegressor`,
309                    `sklearn.gaussian_process.GaussianProcessRegressor`)`
310
311        Returns:
312
313            model predictions:
314                an array if uncertainty quantification is not requested,
315                  or a tuple if with prediction intervals and simulations
316                  if `return_std = True` (mean, standard deviation,
317                  lower and upper prediction interval) or `return_pi = True`
318                  ()
319
320        """
321
322        if "return_std" in kwargs:
323
324            alpha = 100 - level
325            pi_multiplier = norm.ppf(1 - alpha / 200)
326
327            if len(X.shape) == 1:
328
329                n_features = X.shape[0]
330                new_X = mo.rbind(
331                    X.reshape(1, n_features),
332                    np.ones(n_features).reshape(1, n_features),
333                )
334
335                mean_, std_ = self.obj.predict(
336                    self.cook_test_set(new_X, **kwargs), return_std=True
337                )[0]
338
339                preds = self.y_mean_ + mean_
340                lower = self.y_mean_ + (mean_ - pi_multiplier * std_)
341                upper = self.y_mean_ + (mean_ + pi_multiplier * std_)
342
343                DescribeResults = namedtuple(
344                    "DescribeResults", ["mean", "std", "lower", "upper"]
345                )
346
347                return DescribeResults(preds, std_, lower, upper)
348
349            # len(X.shape) > 1
350            mean_, std_ = self.obj.predict(
351                self.cook_test_set(X, **kwargs), return_std=True
352            )
353
354            preds = self.y_mean_ + mean_
355            lower = self.y_mean_ + (mean_ - pi_multiplier * std_)
356            upper = self.y_mean_ + (mean_ + pi_multiplier * std_)
357
358            DescribeResults = namedtuple(
359                "DescribeResults", ["mean", "std", "lower", "upper"]
360            )
361
362            return DescribeResults(preds, std_, lower, upper)
363
364        if "return_pi" in kwargs:
365            assert method in (
366                "splitconformal",
367                "localconformal",
368            ), "method must be in ('splitconformal', 'localconformal')"
369            self.pi = PredictionInterval(
370                obj=self,
371                method=method,
372                level=level,
373                type_pi=self.type_pi,
374                replications=self.replications,
375                kernel=self.kernel,
376            )
377
378            if len(self.X_.shape) == 1:
379                if isinstance(X, pd.DataFrame):
380                    self.X_ = pd.DataFrame(
381                        self.X_.values.reshape(1, -1), columns=self.X_.columns
382                    )
383                else:
384                    self.X_ = self.X_.reshape(1, -1)
385                self.y_ = np.array([self.y_])
386
387            self.pi.fit(self.X_, self.y_)
388            # self.X_ = None # consumes memory to keep, dangerous to delete (side effect)
389            # self.y_ = None # consumes memory to keep, dangerous to delete (side effect)
390            preds = self.pi.predict(X, return_pi=True)
391            return preds
392
393        # "return_std" not in kwargs
394        if len(X.shape) == 1:
395
396            n_features = X.shape[0]
397            new_X = mo.rbind(
398                X.reshape(1, n_features),
399                np.ones(n_features).reshape(1, n_features),
400            )
401
402            return (
403                self.y_mean_
404                + self.obj.predict(self.cook_test_set(new_X, **kwargs), **kwargs)
405            )[0]
406
407        # len(X.shape) > 1
408        return self.y_mean_ + self.obj.predict(
409            self.cook_test_set(X, **kwargs), **kwargs
410        )

Predict test data X.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

level: int
    Level of confidence (default = 95)

method: str
    'splitconformal', 'localconformal'
    prediction (if you specify `return_pi = True`)

**kwargs: additional parameters
        `return_pi = True` for conformal prediction,
        with `method` in ('splitconformal', 'localconformal')
        or `return_std = True` for `self.obj` in
        (`sklearn.linear_model.BayesianRidge`,
        `sklearn.linear_model.ARDRegressor`,
        `sklearn.gaussian_process.GaussianProcessRegressor`)`

Returns:

model predictions:
    an array if uncertainty quantification is not requested,
      or a tuple if with prediction intervals and simulations
      if `return_std = True` (mean, standard deviation,
      lower and upper prediction interval) or `return_pi = True`
      ()
def score(self, X, y, scoring=None):
412    def score(self, X, y, scoring=None):
413        """Compute the score of the model.
414
415        Parameters:
416
417            X: {array-like}, shape = [n_samples, n_features]
418                Training vectors, where n_samples is the number
419                of samples and n_features is the number of features.
420
421            y: array-like, shape = [n_samples]
422                Target values.
423
424            scoring: str
425                scoring method
426
427        Returns:
428
429            score: float
430
431        """
432
433        if scoring is None:
434            return np.sqrt(np.mean((self.predict(X) - y) ** 2))
435
436        return skm2.get_scorer(scoring)(self, X, y)

Compute the score of the model.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

scoring: str
    scoring method

Returns:

score: float
class CustomBackPropRegressor(nnetsauce.custom.custom.Custom, sklearn.base.RegressorMixin):
 19class CustomBackPropRegressor(Custom, RegressorMixin):
 20    """
 21    Finite difference trainer for nnetsauce models.
 22
 23    Parameters
 24    ----------
 25
 26    base_model : str
 27        The name of the base model (e.g., 'RidgeCV').
 28    
 29    type_grad : {'finitediff', 'autodiff'}, optional
 30        Type of gradient computation to use (default='finitediff').
 31
 32    lr : float, optional
 33        Learning rate for optimization (default=1e-4).
 34
 35    optimizer : {'gd', 'sgd', 'adam', 'cd'}, optional
 36        Optimization algorithm: gradient descent ('gd'), stochastic gradient descent ('sgd'),
 37        Adam ('adam'), or coordinate descent ('cd'). Default is 'gd'.
 38
 39    eps : float, optional
 40        Scaling factor for adaptive finite difference step size (default=1e-3).
 41
 42    batch_size : int, optional
 43        Batch size for 'sgd' optimizer (default=32).
 44
 45    alpha : float, optional
 46        Elastic net penalty strength (default=0.0).
 47
 48    l1_ratio : float, optional
 49        Elastic net mixing parameter (0 = Ridge, 1 = Lasso, default=0.0).
 50
 51    type_loss : {'mse', 'quantile'}, optional
 52        Type of loss function to use (default='mse').
 53
 54    q : float, optional
 55        Quantile for quantile loss (default=0.5).
 56
 57    **kwargs
 58        Additional parameters to pass to the scikit-learn model.
 59
 60    """
 61
 62    def __init__(self, base_model, 
 63        type_grad='finitediff',
 64        lr=1e-4, optimizer='gd', 
 65        eps=1e-3, batch_size=32, 
 66        alpha=0.0, l1_ratio=0.0, 
 67        type_loss="mse", q=0.5,
 68        backend='cpu',
 69        **kwargs):
 70        super().__init__(base_model, True, **kwargs)
 71        self.base_model = base_model
 72        self.custom_kwargs = kwargs
 73        self.backend = backend
 74        self.model = ns.CustomRegressor(self.base_model, 
 75            backend=self.backend,
 76                                        **self.custom_kwargs)
 77        assert isinstance(self.model, ns.CustomRegressor),\
 78         "'model' must be of class ns.CustomRegressor"
 79        self.type_grad = type_grad
 80        self.lr = lr
 81        self.optimizer = optimizer
 82        self.eps = eps
 83        self.loss_history_ = []
 84        self.opt_state = None
 85        self.batch_size = batch_size  # for SGD
 86        self.loss_history_ = []
 87        self._cd_index = 0  # For coordinate descent
 88        self.alpha = alpha
 89        self.l1_ratio = l1_ratio
 90        self.type_loss = type_loss
 91        self.q = q
 92
 93    def _loss(self, X, y, **kwargs):
 94        """
 95        Compute the loss (with elastic net penalty) for the current model.
 96
 97        Parameters
 98        ----------
 99
100        X : array-like of shape (n_samples, n_features)
101            Input data.
102
103        y : array-like of shape (n_samples,)
104            Target values.
105
106        **kwargs
107            Additional keyword arguments for loss calculation.
108
109        Returns
110        -------
111        float
112            The computed loss value.
113        """
114        y_pred = self.model.predict(X)
115        if self.type_loss == "mse": 
116            loss = np.mean((y - y_pred) ** 2)            
117        elif self.type_loss == "quantile":
118            loss = mean_pinball_loss(y, y_pred, alpha=self.q, **kwargs)
119        W = self.model.W_
120        l1 = np.sum(np.abs(W))
121        l2 = np.sum(W ** 2)
122        return loss + self.alpha * (self.l1_ratio * l1 + 0.5 * (1 - self.l1_ratio) * l2)
123
124    def _compute_grad(self, X, y):
125        """
126        Compute the gradient of the loss with respect to W_ using finite differences.
127
128        Parameters
129        ----------
130
131        X : array-like of shape (n_samples, n_features)
132            Input data.
133
134        y : array-like of shape (n_samples,)
135            Target values.
136
137        Returns
138        -------
139
140        ndarray
141            Gradient array with the same shape as W_.
142        """
143        if self.type_grad == 'autodiff':
144            raise NotImplementedError("Automatic differentiation is not implemented yet.")
145            # Use JAX for automatic differentiation
146            W = deepcopy(self.model.W_)
147            W_flat = W.flatten()
148            n_params = W_flat.size
149
150            def loss_fn(W_flat):
151                W_reshaped = W_flat.reshape(W.shape)
152                self.model.W_ = W_reshaped
153                return self._loss(X, y)
154
155            grad_fn = jax.grad(loss_fn)
156            grad_flat = grad_fn(W_flat)
157            grad = grad_flat.reshape(W.shape)
158
159            # Add elastic net gradient
160            l1_grad = self.alpha * self.l1_ratio * np.sign(W)
161            l2_grad = self.alpha * (1 - self.l1_ratio) * W
162            grad += l1_grad + l2_grad
163
164            self.model.W_ = W
165            return grad
166        
167        # Finite difference gradient computation
168        W = deepcopy(self.model.W_)
169        shape = W.shape
170        W_flat = W.flatten()
171        n_params = W_flat.size
172
173        # Adaptive finite difference step
174        h_vec = self.eps * np.maximum(1.0, np.abs(W_flat))
175        eye = np.eye(n_params)
176
177        loss_plus = np.zeros(n_params)
178        loss_minus = np.zeros(n_params)
179
180        for i in range(n_params):
181            h_i = h_vec[i]
182            Wp = W_flat.copy(); Wp[i] += h_i
183            Wm = W_flat.copy(); Wm[i] -= h_i
184
185            self.model.W_ = Wp.reshape(shape)
186            loss_plus[i] = self._loss(X, y)
187
188            self.model.W_ = Wm.reshape(shape)
189            loss_minus[i] = self._loss(X, y)
190
191        grad = ((loss_plus - loss_minus) / (2 * h_vec)).reshape(shape)
192
193        # Add elastic net gradient
194        l1_grad = self.alpha * self.l1_ratio * np.sign(W)
195        l2_grad = self.alpha * (1 - self.l1_ratio) * W
196        grad += l1_grad + l2_grad
197
198        self.model.W_ = W  # restore original
199        return grad
200
201    def fit(self, X, y, epochs=10, verbose=True, show_progress=True, sample_weight=None, **kwargs):
202        """
203        Fit the model using finite difference optimization.
204
205        Parameters
206        ----------
207
208        X : array-like of shape (n_samples, n_features)
209            Training data.
210
211        y : array-like of shape (n_samples,)
212            Target values.
213
214        epochs : int, optional
215            Number of optimization steps (default=10).
216
217        verbose : bool, optional
218            Whether to print progress messages (default=True).
219
220        show_progress : bool, optional
221            Whether to show tqdm progress bar (default=True).
222
223        sample_weight : array-like, optional
224            Sample weights.
225
226        **kwargs
227            Additional keyword arguments.
228
229        Returns
230        -------
231
232        self : object
233            Returns self.
234        """        
235
236        self.model.fit(X, y)
237
238        iterator = tqdm(range(epochs)) if show_progress else range(epochs)
239
240        for epoch in iterator:
241            grad = self._compute_grad(X, y)
242
243            if self.optimizer == 'gd':
244                self.model.W_ -= self.lr * grad
245                self.model.W_ = np.clip(self.model.W_, 0, 1)
246                #print("self.model.W_", self.model.W_)
247
248            elif self.optimizer == 'sgd':
249                # Sample a mini-batch for stochastic gradient
250                n_samples = X.shape[0]
251                idxs = np.random.choice(n_samples, self.batch_size, replace=False)
252                if isinstance(X, pd.DataFrame):
253                    X_batch = X.iloc[idxs,:]
254                else: 
255                    X_batch = X[idxs,:]
256                y_batch = y[idxs]
257                grad = self._compute_grad(X_batch, y_batch)
258
259                self.model.W_ -= self.lr * grad
260                self.model.W_ = np.clip(self.model.W_, 0, 1)
261
262            elif self.optimizer == 'adam':
263                if self.opt_state is None:
264                    self.opt_state = {'m': np.zeros_like(grad), 'v': np.zeros_like(grad), 't': 0}
265                beta1, beta2, eps = 0.9, 0.999, 1e-8
266                self.opt_state['t'] += 1
267                self.opt_state['m'] = beta1 * self.opt_state['m'] + (1 - beta1) * grad
268                self.opt_state['v'] = beta2 * self.opt_state['v'] + (1 - beta2) * (grad ** 2)
269                m_hat = self.opt_state['m'] / (1 - beta1 ** self.opt_state['t'])
270                v_hat = self.opt_state['v'] / (1 - beta2 ** self.opt_state['t'])
271
272                self.model.W_ -= self.lr * m_hat / (np.sqrt(v_hat) + eps)
273                self.model.W_ = np.clip(self.model.W_, 0, 1)
274                #print("self.model.W_", self.model.W_)
275
276            elif self.optimizer == 'cd':  # coordinate descent
277
278                W_shape = self.model.W_.shape
279                W_flat_size = self.model.W_.size
280                W_flat = self.model.W_.flatten()
281                grad_flat = grad.flatten()
282
283                # Update only one coordinate per epoch (cyclic)
284                idx = self._cd_index % W_flat_size
285                W_flat[idx] -= self.lr * grad_flat[idx]
286                # Clip the updated value
287                W_flat[idx] = np.clip(W_flat[idx], 0, 1)
288
289                # Restore W_
290                self.model.W_ = W_flat.reshape(W_shape)
291
292                self._cd_index += 1
293
294            else:
295                raise ValueError(f"Unsupported optimizer: {self.optimizer}")
296
297            loss = self._loss(X, y)
298            self.loss_history_.append(loss)
299
300            if verbose:
301                print(f"Epoch {epoch+1}: Loss = {loss:.6f}")                
302
303        # if sample_weights, else: (must use self.row_index)
304        if sample_weight in kwargs:
305            self.model.fit(
306                X,
307                y,
308                sample_weight=sample_weight[self.index_row_].ravel(),
309                **kwargs
310            )
311
312            return self
313
314        return self
315
316
317    def predict(self, X, level=95, method='splitconformal', **kwargs):
318        """
319        Predict using the trained model.
320
321        Parameters
322        ----------
323
324        X : array-like of shape (n_samples, n_features)
325            Input data.
326
327        level : int, optional
328            Level of confidence for prediction intervals (default=95).
329
330        method : {'splitconformal', 'localconformal'}, optional
331            Method for conformal prediction (default='splitconformal').
332
333        **kwargs
334            Additional keyword arguments. Use `return_pi=True` for prediction intervals,
335            or `return_std=True` for standard deviation estimates.
336
337        Returns
338        -------
339        
340        array or tuple
341            Model predictions, or a tuple with prediction intervals or standard deviations if requested.
342        """
343        if "return_std" in kwargs:
344
345            alpha = 100 - level
346            pi_multiplier = norm.ppf(1 - alpha / 200)
347
348            if len(X.shape) == 1:
349
350                n_features = X.shape[0]
351                new_X = mo.rbind(
352                    X.reshape(1, n_features),
353                    np.ones(n_features).reshape(1, n_features),
354                )
355
356                mean_, std_ = self.model.predict(
357                    new_X, return_std=True
358                )[0]
359
360                preds =  mean_
361                lower =  (mean_ - pi_multiplier * std_)
362                upper =  (mean_ + pi_multiplier * std_)
363
364                DescribeResults = namedtuple(
365                    "DescribeResults", ["mean", "std", "lower", "upper"]
366                )
367
368                return DescribeResults(preds, std_, lower, upper)
369
370            # len(X.shape) > 1
371            mean_, std_ = self.model.predict(
372                X, return_std=True
373            )
374
375            preds =  mean_
376            lower =  (mean_ - pi_multiplier * std_)
377            upper =  (mean_ + pi_multiplier * std_)
378
379            DescribeResults = namedtuple(
380                "DescribeResults", ["mean", "std", "lower", "upper"]
381            )
382
383            return DescribeResults(preds, std_, lower, upper)
384
385        if "return_pi" in kwargs:
386            assert method in (
387                "splitconformal",
388                "localconformal",
389            ), "method must be in ('splitconformal', 'localconformal')"
390            self.pi = ns.PredictionInterval(
391                obj=self,
392                method=method,
393                level=level,
394                type_pi=self.type_pi,
395                replications=self.replications,
396                kernel=self.kernel,
397            )
398
399            if len(self.X_.shape) == 1:
400                if isinstance(X, pd.DataFrame):
401                    self.X_ = pd.DataFrame(
402                        self.X_.values.reshape(1, -1), columns=self.X_.columns
403                    )
404                else:
405                    self.X_ = self.X_.reshape(1, -1)
406                self.y_ = np.array([self.y_])
407
408            self.pi.fit(self.X_, self.y_)
409            # self.X_ = None # consumes memory to keep, dangerous to delete (side effect)
410            # self.y_ = None # consumes memory to keep, dangerous to delete (side effect)
411            preds = self.pi.predict(X, return_pi=True)
412            return preds
413
414        # "return_std" not in kwargs
415        if len(X.shape) == 1:
416
417            n_features = X.shape[0]
418            new_X = mo.rbind(
419                X.reshape(1, n_features),
420                np.ones(n_features).reshape(1, n_features),
421            )
422
423            return (
424                0
425                + self.model.predict(new_X, **kwargs)
426            )[0]
427
428        # len(X.shape) > 1
429        return  self.model.predict(
430            X, **kwargs
431        )

Finite difference trainer for nnetsauce models.

Parameters

base_model : str The name of the base model (e.g., 'RidgeCV').

type_grad : {'finitediff', 'autodiff'}, optional Type of gradient computation to use (default='finitediff').

lr : float, optional Learning rate for optimization (default=1e-4).

optimizer : {'gd', 'sgd', 'adam', 'cd'}, optional Optimization algorithm: gradient descent ('gd'), stochastic gradient descent ('sgd'), Adam ('adam'), or coordinate descent ('cd'). Default is 'gd'.

eps : float, optional Scaling factor for adaptive finite difference step size (default=1e-3).

batch_size : int, optional Batch size for 'sgd' optimizer (default=32).

alpha : float, optional Elastic net penalty strength (default=0.0).

l1_ratio : float, optional Elastic net mixing parameter (0 = Ridge, 1 = Lasso, default=0.0).

type_loss : {'mse', 'quantile'}, optional Type of loss function to use (default='mse').

q : float, optional Quantile for quantile loss (default=0.5).

**kwargs Additional parameters to pass to the scikit-learn model.

def fit( self, X, y, epochs=10, verbose=True, show_progress=True, sample_weight=None, **kwargs):
201    def fit(self, X, y, epochs=10, verbose=True, show_progress=True, sample_weight=None, **kwargs):
202        """
203        Fit the model using finite difference optimization.
204
205        Parameters
206        ----------
207
208        X : array-like of shape (n_samples, n_features)
209            Training data.
210
211        y : array-like of shape (n_samples,)
212            Target values.
213
214        epochs : int, optional
215            Number of optimization steps (default=10).
216
217        verbose : bool, optional
218            Whether to print progress messages (default=True).
219
220        show_progress : bool, optional
221            Whether to show tqdm progress bar (default=True).
222
223        sample_weight : array-like, optional
224            Sample weights.
225
226        **kwargs
227            Additional keyword arguments.
228
229        Returns
230        -------
231
232        self : object
233            Returns self.
234        """        
235
236        self.model.fit(X, y)
237
238        iterator = tqdm(range(epochs)) if show_progress else range(epochs)
239
240        for epoch in iterator:
241            grad = self._compute_grad(X, y)
242
243            if self.optimizer == 'gd':
244                self.model.W_ -= self.lr * grad
245                self.model.W_ = np.clip(self.model.W_, 0, 1)
246                #print("self.model.W_", self.model.W_)
247
248            elif self.optimizer == 'sgd':
249                # Sample a mini-batch for stochastic gradient
250                n_samples = X.shape[0]
251                idxs = np.random.choice(n_samples, self.batch_size, replace=False)
252                if isinstance(X, pd.DataFrame):
253                    X_batch = X.iloc[idxs,:]
254                else: 
255                    X_batch = X[idxs,:]
256                y_batch = y[idxs]
257                grad = self._compute_grad(X_batch, y_batch)
258
259                self.model.W_ -= self.lr * grad
260                self.model.W_ = np.clip(self.model.W_, 0, 1)
261
262            elif self.optimizer == 'adam':
263                if self.opt_state is None:
264                    self.opt_state = {'m': np.zeros_like(grad), 'v': np.zeros_like(grad), 't': 0}
265                beta1, beta2, eps = 0.9, 0.999, 1e-8
266                self.opt_state['t'] += 1
267                self.opt_state['m'] = beta1 * self.opt_state['m'] + (1 - beta1) * grad
268                self.opt_state['v'] = beta2 * self.opt_state['v'] + (1 - beta2) * (grad ** 2)
269                m_hat = self.opt_state['m'] / (1 - beta1 ** self.opt_state['t'])
270                v_hat = self.opt_state['v'] / (1 - beta2 ** self.opt_state['t'])
271
272                self.model.W_ -= self.lr * m_hat / (np.sqrt(v_hat) + eps)
273                self.model.W_ = np.clip(self.model.W_, 0, 1)
274                #print("self.model.W_", self.model.W_)
275
276            elif self.optimizer == 'cd':  # coordinate descent
277
278                W_shape = self.model.W_.shape
279                W_flat_size = self.model.W_.size
280                W_flat = self.model.W_.flatten()
281                grad_flat = grad.flatten()
282
283                # Update only one coordinate per epoch (cyclic)
284                idx = self._cd_index % W_flat_size
285                W_flat[idx] -= self.lr * grad_flat[idx]
286                # Clip the updated value
287                W_flat[idx] = np.clip(W_flat[idx], 0, 1)
288
289                # Restore W_
290                self.model.W_ = W_flat.reshape(W_shape)
291
292                self._cd_index += 1
293
294            else:
295                raise ValueError(f"Unsupported optimizer: {self.optimizer}")
296
297            loss = self._loss(X, y)
298            self.loss_history_.append(loss)
299
300            if verbose:
301                print(f"Epoch {epoch+1}: Loss = {loss:.6f}")                
302
303        # if sample_weights, else: (must use self.row_index)
304        if sample_weight in kwargs:
305            self.model.fit(
306                X,
307                y,
308                sample_weight=sample_weight[self.index_row_].ravel(),
309                **kwargs
310            )
311
312            return self
313
314        return self

Fit the model using finite difference optimization.

Parameters

X : array-like of shape (n_samples, n_features) Training data.

y : array-like of shape (n_samples,) Target values.

epochs : int, optional Number of optimization steps (default=10).

verbose : bool, optional Whether to print progress messages (default=True).

show_progress : bool, optional Whether to show tqdm progress bar (default=True).

sample_weight : array-like, optional Sample weights.

**kwargs Additional keyword arguments.

Returns

self : object Returns self.

def predict(self, X, level=95, method='splitconformal', **kwargs):
317    def predict(self, X, level=95, method='splitconformal', **kwargs):
318        """
319        Predict using the trained model.
320
321        Parameters
322        ----------
323
324        X : array-like of shape (n_samples, n_features)
325            Input data.
326
327        level : int, optional
328            Level of confidence for prediction intervals (default=95).
329
330        method : {'splitconformal', 'localconformal'}, optional
331            Method for conformal prediction (default='splitconformal').
332
333        **kwargs
334            Additional keyword arguments. Use `return_pi=True` for prediction intervals,
335            or `return_std=True` for standard deviation estimates.
336
337        Returns
338        -------
339        
340        array or tuple
341            Model predictions, or a tuple with prediction intervals or standard deviations if requested.
342        """
343        if "return_std" in kwargs:
344
345            alpha = 100 - level
346            pi_multiplier = norm.ppf(1 - alpha / 200)
347
348            if len(X.shape) == 1:
349
350                n_features = X.shape[0]
351                new_X = mo.rbind(
352                    X.reshape(1, n_features),
353                    np.ones(n_features).reshape(1, n_features),
354                )
355
356                mean_, std_ = self.model.predict(
357                    new_X, return_std=True
358                )[0]
359
360                preds =  mean_
361                lower =  (mean_ - pi_multiplier * std_)
362                upper =  (mean_ + pi_multiplier * std_)
363
364                DescribeResults = namedtuple(
365                    "DescribeResults", ["mean", "std", "lower", "upper"]
366                )
367
368                return DescribeResults(preds, std_, lower, upper)
369
370            # len(X.shape) > 1
371            mean_, std_ = self.model.predict(
372                X, return_std=True
373            )
374
375            preds =  mean_
376            lower =  (mean_ - pi_multiplier * std_)
377            upper =  (mean_ + pi_multiplier * std_)
378
379            DescribeResults = namedtuple(
380                "DescribeResults", ["mean", "std", "lower", "upper"]
381            )
382
383            return DescribeResults(preds, std_, lower, upper)
384
385        if "return_pi" in kwargs:
386            assert method in (
387                "splitconformal",
388                "localconformal",
389            ), "method must be in ('splitconformal', 'localconformal')"
390            self.pi = ns.PredictionInterval(
391                obj=self,
392                method=method,
393                level=level,
394                type_pi=self.type_pi,
395                replications=self.replications,
396                kernel=self.kernel,
397            )
398
399            if len(self.X_.shape) == 1:
400                if isinstance(X, pd.DataFrame):
401                    self.X_ = pd.DataFrame(
402                        self.X_.values.reshape(1, -1), columns=self.X_.columns
403                    )
404                else:
405                    self.X_ = self.X_.reshape(1, -1)
406                self.y_ = np.array([self.y_])
407
408            self.pi.fit(self.X_, self.y_)
409            # self.X_ = None # consumes memory to keep, dangerous to delete (side effect)
410            # self.y_ = None # consumes memory to keep, dangerous to delete (side effect)
411            preds = self.pi.predict(X, return_pi=True)
412            return preds
413
414        # "return_std" not in kwargs
415        if len(X.shape) == 1:
416
417            n_features = X.shape[0]
418            new_X = mo.rbind(
419                X.reshape(1, n_features),
420                np.ones(n_features).reshape(1, n_features),
421            )
422
423            return (
424                0
425                + self.model.predict(new_X, **kwargs)
426            )[0]
427
428        # len(X.shape) > 1
429        return  self.model.predict(
430            X, **kwargs
431        )

Predict using the trained model.

Parameters

X : array-like of shape (n_samples, n_features) Input data.

level : int, optional Level of confidence for prediction intervals (default=95).

method : {'splitconformal', 'localconformal'}, optional Method for conformal prediction (default='splitconformal').

**kwargs Additional keyword arguments. Use return_pi=True for prediction intervals, or return_std=True for standard deviation estimates.

Returns

array or tuple Model predictions, or a tuple with prediction intervals or standard deviations if requested.

class DeepClassifier(nnetsauce.CustomClassifier, sklearn.base.ClassifierMixin):
 35class DeepClassifier(CustomClassifier, ClassifierMixin):
 36    """
 37    Deep Classifier
 38
 39    Parameters:
 40
 41        obj: an object
 42            A base learner, see also https://www.researchgate.net/publication/380701207_Deep_Quasi-Randomized_neural_Networks_for_classification
 43
 44        n_layers: int (default=3)
 45            Number of layers. `n_layers = 1` is a simple `CustomClassifier`
 46
 47        verbose : int, optional (default=0)
 48            Monitor progress when fitting.
 49
 50        All the other parameters are nnetsauce `CustomClassifier`'s
 51
 52    Examples:
 53
 54        ```python
 55        import nnetsauce as ns
 56        from sklearn.datasets import load_breast_cancer
 57        from sklearn.model_selection import train_test_split
 58        from sklearn.linear_model import LogisticRegressionCV
 59        data = load_breast_cancer()
 60        X = data.data
 61        y= data.target
 62        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=123)
 63        obj = LogisticRegressionCV()
 64        clf = ns.DeepClassifier(obj)
 65        clf.fit(X_train, y_train)
 66        print(clf.score(clf.predict(X_test), y_test))
 67        ```
 68    """
 69    _estimator_type = "classifier"
 70
 71    def __init__(
 72        self,
 73        obj,
 74        # Defining depth
 75        n_layers=3,
 76        verbose=0,
 77        # CustomClassifier attributes
 78        n_hidden_features=5,
 79        activation_name="relu",
 80        a=0.01,
 81        nodes_sim="sobol",
 82        bias=True,
 83        dropout=0,
 84        direct_link=True,
 85        n_clusters=2,
 86        cluster_encode=True,
 87        type_clust="kmeans",
 88        type_scaling=("std", "std", "std"),
 89        col_sample=1,
 90        row_sample=1,
 91        cv_calibration=2,
 92        calibration_method="sigmoid",
 93        seed=123,
 94        backend="cpu",
 95    ):
 96        super().__init__(
 97            obj=obj,
 98            n_hidden_features=n_hidden_features,
 99            activation_name=activation_name,
100            a=a,
101            nodes_sim=nodes_sim,
102            bias=bias,
103            dropout=dropout,
104            direct_link=direct_link,
105            n_clusters=n_clusters,
106            cluster_encode=cluster_encode,
107            type_clust=type_clust,
108            type_scaling=type_scaling,
109            col_sample=col_sample,
110            row_sample=row_sample,
111            seed=seed,
112            backend=backend,
113        )
114        self.coef_ = None
115        self.intercept_ = None
116        self.type_fit = "classification"
117        self.cv_calibration = cv_calibration
118        self.calibration_method = calibration_method
119        
120        # Only wrap in CalibratedClassifierCV if not already wrapped
121        # if not isinstance(obj, CalibratedClassifierCV):
122        #     self.obj = CalibratedClassifierCV(
123        #         self.obj, 
124        #         cv=self.cv_calibration,
125        #         method=self.calibration_method
126        #     )
127        # else:
128        self.coef_ = None
129        self.intercept_ = None
130        self.type_fit = "classification"
131        self.cv_calibration = cv_calibration
132        self.calibration_method = calibration_method
133        self.obj = obj
134
135        assert n_layers >= 1, "must have n_layers >= 1"
136        self.stacked_obj = obj
137        self.verbose = verbose
138        self.n_layers = n_layers
139        self.classes_ = None
140        self.n_classes_ = None
141
142    def fit(self, X, y, **kwargs):
143        """Fit Classification algorithms to X and y.
144        Parameters
145        ----------
146        X : array-like,
147            Training vectors, where rows is the number of samples
148            and columns is the number of features.
149        y : array-like,
150            Training vectors, where rows is the number of samples
151            and columns is the number of features.
152        **kwargs: dict
153            Additional parameters to be passed to the fit method
154            of the base learner. For example, `sample_weight`.
155
156        Returns
157        -------
158        A fitted object
159        """
160
161        self.classes_ = np.unique(y)
162        self.n_classes_ = len(
163            self.classes_
164        )  # for compatibility with         scikit-learn
165
166        if isinstance(X, np.ndarray):
167            X = pd.DataFrame(X)
168
169        # init layer
170        self.stacked_obj = CustomClassifier(
171            obj=self.stacked_obj,
172            n_hidden_features=self.n_hidden_features,
173            activation_name=self.activation_name,
174            a=self.a,
175            nodes_sim=self.nodes_sim,
176            bias=self.bias,
177            dropout=self.dropout,
178            direct_link=self.direct_link,
179            n_clusters=self.n_clusters,
180            cluster_encode=self.cluster_encode,
181            type_clust=self.type_clust,
182            type_scaling=self.type_scaling,
183            col_sample=self.col_sample,
184            row_sample=self.row_sample,
185            cv_calibration=None,
186            calibration_method=None,
187            seed=self.seed,
188            backend=self.backend,
189        )
190
191        if self.verbose > 0:
192            iterator = tqdm(range(self.n_layers - 1))
193        else:
194            iterator = range(self.n_layers - 1)
195
196        for _ in iterator:
197            self.stacked_obj = deepcopy(
198                CustomClassifier(
199                    obj=self.stacked_obj,
200                    n_hidden_features=self.n_hidden_features,
201                    activation_name=self.activation_name,
202                    a=self.a,
203                    nodes_sim=self.nodes_sim,
204                    bias=self.bias,
205                    dropout=self.dropout,
206                    direct_link=self.direct_link,
207                    n_clusters=self.n_clusters,
208                    cluster_encode=self.cluster_encode,
209                    type_clust=self.type_clust,
210                    type_scaling=self.type_scaling,
211                    col_sample=self.col_sample,
212                    row_sample=self.row_sample,
213                    cv_calibration=None,
214                    calibration_method=None,
215                    seed=self.seed,
216                    backend=self.backend,
217                )
218            )
219            self.stacked_obj.fit(X, y, **kwargs)
220        
221        return self
222
223    def partial_fit(self, X, y, **kwargs):
224        """Fit Regression algorithms to X and y.
225        Parameters
226        ----------
227        X : array-like,
228            Training vectors, where rows is the number of samples
229            and columns is the number of features.
230        y : array-like,
231            Training vectors, where rows is the number of samples
232            and columns is the number of features.
233        **kwargs: dict
234            Additional parameters to be passed to the fit method
235            of the base learner. For example, `sample_weight`.
236        Returns
237        -------
238        A fitted object
239        """
240        assert hasattr(self, "stacked_obj"), "model must be fitted first"
241        current_obj = self.stacked_obj
242        for _ in range(self.n_layers):
243            try:
244                input_X = current_obj.obj.cook_test_set(X)
245                current_obj.obj.partial_fit(input_X, y, **kwargs)
246                try:
247                    current_obj = current_obj.obj
248                except AttributeError:
249                    pass
250            except ValueError:
251                pass
252        return self
253
254    def predict(self, X):
255        return self.stacked_obj.predict(X)
256
257    def predict_proba(self, X):
258        return self.stacked_obj.predict_proba(X)
259
260    def score(self, X, y, scoring=None):
261        return self.stacked_obj.score(X, y, scoring)
262
263    def cross_val_optim(
264        self,
265        X_train,
266        y_train,
267        X_test=None,
268        y_test=None,
269        scoring="accuracy",
270        surrogate_obj=None,
271        cv=5,
272        n_jobs=None,
273        n_init=10,
274        n_iter=190,
275        abs_tol=1e-3,
276        verbose=2,
277        seed=123,
278        **kwargs,
279    ):
280        """Cross-validation function and hyperparameters' search
281
282        Parameters:
283
284            X_train: array-like,
285                Training vectors, where rows is the number of samples
286                and columns is the number of features.
287
288            y_train: array-like,
289                Training vectors, where rows is the number of samples
290                and columns is the number of features.
291
292            X_test: array-like,
293                Testing vectors, where rows is the number of samples
294                and columns is the number of features.
295
296            y_test: array-like,
297                Testing vectors, where rows is the number of samples
298                and columns is the number of features.
299
300            scoring: str
301                scoring metric; see https://scikit-learn.org/stable/modules/model_evaluation.html#the-scoring-parameter-defining-model-evaluation-rules
302
303            surrogate_obj: an object;
304                An ML model for estimating the uncertainty around the objective function
305
306            cv: int;
307                number of cross-validation folds
308
309            n_jobs: int;
310                number of jobs for parallel execution
311
312            n_init: an integer;
313                number of points in the initial setting, when `x_init` and `y_init` are not provided
314
315            n_iter: an integer;
316                number of iterations of the minimization algorithm
317
318            abs_tol: a float;
319                tolerance for convergence of the optimizer (early stopping based on acquisition function)
320
321            verbose: int
322                controls verbosity
323
324            seed: int
325                reproducibility seed
326
327            **kwargs: dict
328                additional parameters to be passed to the estimator
329
330        Examples:
331
332            ```python
333            ```
334        """
335
336        num_to_activation_name = {1: "relu", 2: "sigmoid", 3: "tanh"}
337        num_to_nodes_sim = {1: "sobol", 2: "uniform", 3: "hammersley"}
338        num_to_type_clust = {1: "kmeans", 2: "gmm"}
339
340        def deepclassifier_cv(
341            X_train,
342            y_train,
343            # Defining depth
344            n_layers=3,
345            # CustomClassifier attributes
346            n_hidden_features=5,
347            activation_name="relu",
348            nodes_sim="sobol",
349            dropout=0,
350            n_clusters=2,
351            type_clust="kmeans",
352            cv=5,
353            n_jobs=None,
354            scoring="accuracy",
355            seed=123,
356        ):
357            self.set_params(
358                **{
359                    "n_layers": n_layers,
360                    # CustomClassifier attributes
361                    "n_hidden_features": n_hidden_features,
362                    "activation_name": activation_name,
363                    "nodes_sim": nodes_sim,
364                    "dropout": dropout,
365                    "n_clusters": n_clusters,
366                    "type_clust": type_clust,
367                    **kwargs,
368                }
369            )
370            return -cross_val_score(
371                estimator=self,
372                X=X_train,
373                y=y_train,
374                scoring=scoring,
375                cv=cv,
376                n_jobs=n_jobs,
377                verbose=0,
378            ).mean()
379
380        # objective function for hyperparams tuning
381        def crossval_objective(xx):
382            return deepclassifier_cv(
383                X_train=X_train,
384                y_train=y_train,
385                # Defining depth
386                n_layers=int(np.ceil(xx[0])),
387                # CustomClassifier attributes
388                n_hidden_features=int(np.ceil(xx[1])),
389                activation_name=num_to_activation_name[np.ceil(xx[2])],
390                nodes_sim=num_to_nodes_sim[int(np.ceil(xx[3]))],
391                dropout=xx[4],
392                n_clusters=int(np.ceil(xx[5])),
393                type_clust=num_to_type_clust[int(np.ceil(xx[6]))],
394                cv=cv,
395                n_jobs=n_jobs,
396                scoring=scoring,
397                seed=seed,
398            )
399
400        if surrogate_obj is None:
401            gp_opt = gp.GPOpt(
402                objective_func=crossval_objective,
403                lower_bound=np.array([0, 3, 0, 0, 0.0, 0, 0]),
404                upper_bound=np.array([5, 100, 3, 3, 0.4, 5, 2]),
405                params_names=[
406                    "n_layers",
407                    # CustomClassifier attributes
408                    "n_hidden_features",
409                    "activation_name",
410                    "nodes_sim",
411                    "dropout",
412                    "n_clusters",
413                    "type_clust",
414                ],
415                method="bayesian",
416                n_init=n_init,
417                n_iter=n_iter,
418                seed=seed,
419            )
420        else:
421            gp_opt = gp.GPOpt(
422                objective_func=crossval_objective,
423                lower_bound=np.array([0, 3, 0, 0, 0.0, 0, 0]),
424                upper_bound=np.array([5, 100, 3, 3, 0.4, 5, 2]),
425                params_names=[
426                    "n_layers",
427                    # CustomClassifier attributes
428                    "n_hidden_features",
429                    "activation_name",
430                    "nodes_sim",
431                    "dropout",
432                    "n_clusters",
433                    "type_clust",
434                ],
435                acquisition="ucb",
436                method="splitconformal",
437                surrogate_obj=ns.PredictionInterval(
438                    obj=surrogate_obj, method="splitconformal"
439                ),
440                n_init=n_init,
441                n_iter=n_iter,
442                seed=seed,
443            )
444
445        res = gp_opt.optimize(verbose=verbose, abs_tol=abs_tol)
446        res.best_params["n_layers"] = int(np.ceil(res.best_params["n_layers"]))
447        res.best_params["n_hidden_features"] = int(
448            np.ceil(res.best_params["n_hidden_features"])
449        )
450        res.best_params["activation_name"] = num_to_activation_name[
451            np.ceil(res.best_params["activation_name"])
452        ]
453        res.best_params["nodes_sim"] = num_to_nodes_sim[
454            int(np.ceil(res.best_params["nodes_sim"]))
455        ]
456        res.best_params["dropout"] = res.best_params["dropout"]
457        res.best_params["n_clusters"] = int(np.ceil(res.best_params["n_clusters"]))
458        res.best_params["type_clust"] = num_to_type_clust[
459            int(np.ceil(res.best_params["type_clust"]))
460        ]
461
462        # out-of-sample error
463        if X_test is not None and y_test is not None:
464            self.set_params(**res.best_params, verbose=0, seed=seed)
465            preds = self.fit(X_train, y_train).predict(X_test)
466            # check error on y_test
467            oos_err = getattr(metrics, scoring + "_score")(y_true=y_test, y_pred=preds)
468            result = namedtuple("result", res._fields + ("test_" + scoring,))
469            return result(*res, oos_err)
470        else:
471            return res
472
473    def lazy_cross_val_optim(
474        self,
475        X_train,
476        y_train,
477        X_test=None,
478        y_test=None,
479        scoring="accuracy",
480        surrogate_objs=None,
481        customize=False,
482        cv=5,
483        n_jobs=None,
484        n_init=10,
485        n_iter=190,
486        abs_tol=1e-3,
487        verbose=1,
488        seed=123,
489    ):
490        """Automated Cross-validation function and hyperparameters' search using multiple surrogates
491
492        Parameters:
493
494            X_train: array-like,
495                Training vectors, where rows is the number of samples
496                and columns is the number of features.
497
498            y_train: array-like,
499                Training vectors, where rows is the number of samples
500                and columns is the number of features.
501
502            X_test: array-like,
503                Testing vectors, where rows is the number of samples
504                and columns is the number of features.
505
506            y_test: array-like,
507                Testing vectors, where rows is the number of samples
508                and columns is the number of features.
509
510            scoring: str
511                scoring metric; see https://scikit-learn.org/stable/modules/model_evaluation.html#the-scoring-parameter-defining-model-evaluation-rules
512
513            surrogate_objs: object names as a list of strings;
514                ML models for estimating the uncertainty around the objective function
515
516            customize: boolean
517                if True, the surrogate is transformed into a quasi-randomized network (default is False)
518
519            cv: int;
520                number of cross-validation folds
521
522            n_jobs: int;
523                number of jobs for parallel execution
524
525            n_init: an integer;
526                number of points in the initial setting, when `x_init` and `y_init` are not provided
527
528            n_iter: an integer;
529                number of iterations of the minimization algorithm
530
531            abs_tol: a float;
532                tolerance for convergence of the optimizer (early stopping based on acquisition function)
533
534            verbose: int
535                controls verbosity
536
537            seed: int
538                reproducibility seed
539
540        Examples:
541
542            ```python
543            ```
544        """
545
546        removed_regressors = [
547            "TheilSenRegressor",
548            "ARDRegression",
549            "CCA",
550            "GaussianProcessRegressor",
551            "GradientBoostingRegressor",
552            "HistGradientBoostingRegressor",
553            "IsotonicRegression",
554            "MultiOutputRegressor",
555            "MultiTaskElasticNet",
556            "MultiTaskElasticNetCV",
557            "MultiTaskLasso",
558            "MultiTaskLassoCV",
559            "OrthogonalMatchingPursuit",
560            "OrthogonalMatchingPursuitCV",
561            "PLSCanonical",
562            "PLSRegression",
563            "RadiusNeighborsRegressor",
564            "RegressorChain",
565            "StackingRegressor",
566            "VotingRegressor",
567        ]
568
569        results = []
570
571        for est in all_estimators():
572
573            if surrogate_objs is None:
574
575                if issubclass(est[1], RegressorMixin) and (
576                    est[0] not in removed_regressors
577                ):
578                    try:
579                        if customize == True:
580                            surr_obj = ns.CustomClassifier(obj=est[1]())
581                        else:
582                            surr_obj = est[1]()
583                        res = self.cross_val_optim(
584                            X_train=X_train,
585                            y_train=y_train,
586                            X_test=X_test,
587                            y_test=y_test,
588                            surrogate_obj=surr_obj,
589                            cv=cv,
590                            n_jobs=n_jobs,
591                            scoring=scoring,
592                            n_init=n_init,
593                            n_iter=n_iter,
594                            abs_tol=abs_tol,
595                            verbose=verbose,
596                            seed=seed,
597                        )
598                        if customize == True:
599                            results.append((f"CustomClassifier({est[0]})", res))
600                        else:
601                            results.append((est[0], res))
602                    except:
603                        pass
604
605            else:
606
607                if (
608                    issubclass(est[1], RegressorMixin)
609                    and (est[0] not in removed_regressors)
610                    and est[0] in surrogate_objs
611                ):
612                    try:
613                        if customize == True:
614                            surr_obj = ns.CustomClassifier(obj=est[1]())
615                        else:
616                            surr_obj = est[1]()
617                        res = self.cross_val_optim(
618                            X_train=X_train,
619                            y_train=y_train,
620                            X_test=X_test,
621                            y_test=y_test,
622                            surrogate_obj=surr_obj,
623                            cv=cv,
624                            n_jobs=n_jobs,
625                            scoring=scoring,
626                            n_init=n_init,
627                            n_iter=n_iter,
628                            abs_tol=abs_tol,
629                            verbose=verbose,
630                            seed=seed,
631                        )
632                        if customize == True:
633                            results.append((f"CustomClassifier({est[0]})", res))
634                        else:
635                            results.append((est[0], res))
636                    except:
637                        pass
638
639        return results
640
641    @property
642    def _estimator_type(self):
643        return "classifier"        

Deep Classifier

Parameters:

obj: an object
    A base learner, see also https://www.researchgate.net/publication/380701207_Deep_Quasi-Randomized_neural_Networks_for_classification

n_layers: int (default=3)
    Number of layers. `n_layers = 1` is a simple `CustomClassifier`

verbose : int, optional (default=0)
    Monitor progress when fitting.

All the other parameters are nnetsauce `CustomClassifier`'s

Examples:

import nnetsauce as ns
    from sklearn.datasets import load_breast_cancer
    from sklearn.model_selection import train_test_split
    from sklearn.linear_model import LogisticRegressionCV
    data = load_breast_cancer()
    X = data.data
    y= data.target
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=123)
    obj = LogisticRegressionCV()
    clf = ns.DeepClassifier(obj)
    clf.fit(X_train, y_train)
    print(clf.score(clf.predict(X_test), y_test))
    

def fit(self, X, y, **kwargs):
142    def fit(self, X, y, **kwargs):
143        """Fit Classification algorithms to X and y.
144        Parameters
145        ----------
146        X : array-like,
147            Training vectors, where rows is the number of samples
148            and columns is the number of features.
149        y : array-like,
150            Training vectors, where rows is the number of samples
151            and columns is the number of features.
152        **kwargs: dict
153            Additional parameters to be passed to the fit method
154            of the base learner. For example, `sample_weight`.
155
156        Returns
157        -------
158        A fitted object
159        """
160
161        self.classes_ = np.unique(y)
162        self.n_classes_ = len(
163            self.classes_
164        )  # for compatibility with         scikit-learn
165
166        if isinstance(X, np.ndarray):
167            X = pd.DataFrame(X)
168
169        # init layer
170        self.stacked_obj = CustomClassifier(
171            obj=self.stacked_obj,
172            n_hidden_features=self.n_hidden_features,
173            activation_name=self.activation_name,
174            a=self.a,
175            nodes_sim=self.nodes_sim,
176            bias=self.bias,
177            dropout=self.dropout,
178            direct_link=self.direct_link,
179            n_clusters=self.n_clusters,
180            cluster_encode=self.cluster_encode,
181            type_clust=self.type_clust,
182            type_scaling=self.type_scaling,
183            col_sample=self.col_sample,
184            row_sample=self.row_sample,
185            cv_calibration=None,
186            calibration_method=None,
187            seed=self.seed,
188            backend=self.backend,
189        )
190
191        if self.verbose > 0:
192            iterator = tqdm(range(self.n_layers - 1))
193        else:
194            iterator = range(self.n_layers - 1)
195
196        for _ in iterator:
197            self.stacked_obj = deepcopy(
198                CustomClassifier(
199                    obj=self.stacked_obj,
200                    n_hidden_features=self.n_hidden_features,
201                    activation_name=self.activation_name,
202                    a=self.a,
203                    nodes_sim=self.nodes_sim,
204                    bias=self.bias,
205                    dropout=self.dropout,
206                    direct_link=self.direct_link,
207                    n_clusters=self.n_clusters,
208                    cluster_encode=self.cluster_encode,
209                    type_clust=self.type_clust,
210                    type_scaling=self.type_scaling,
211                    col_sample=self.col_sample,
212                    row_sample=self.row_sample,
213                    cv_calibration=None,
214                    calibration_method=None,
215                    seed=self.seed,
216                    backend=self.backend,
217                )
218            )
219            self.stacked_obj.fit(X, y, **kwargs)
220        
221        return self

Fit Classification algorithms to X and y.

Parameters

X : array-like, Training vectors, where rows is the number of samples and columns is the number of features. y : array-like, Training vectors, where rows is the number of samples and columns is the number of features. **kwargs: dict Additional parameters to be passed to the fit method of the base learner. For example, sample_weight.

Returns

A fitted object

def predict(self, X):
254    def predict(self, X):
255        return self.stacked_obj.predict(X)

Predict test data X.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

model predictions: {array-like}
def predict_proba(self, X):
257    def predict_proba(self, X):
258        return self.stacked_obj.predict_proba(X)

Predict probabilities for test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

probability estimates for test data: {array-like}
def score(self, X, y, scoring=None):
260    def score(self, X, y, scoring=None):
261        return self.stacked_obj.score(X, y, scoring)

Scoring function for classification.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

scoring: str
    scoring method (default is accuracy)

Returns:

score: float
class DeepRegressor(nnetsauce.CustomRegressor, sklearn.base.RegressorMixin):
 13class DeepRegressor(CustomRegressor, RegressorMixin):
 14    """
 15    Deep Regressor
 16
 17    Parameters:
 18
 19        obj: an object
 20            A base learner, see also https://www.researchgate.net/publication/380701207_Deep_Quasi-Randomized_neural_Networks_for_classification
 21
 22        verbose : int, optional (default=0)
 23            Monitor progress when fitting.
 24
 25        n_layers: int (default=2)
 26            Number of layers. `n_layers = 1` is a simple `CustomRegressor`
 27
 28        All the other parameters are nnetsauce `CustomRegressor`'s
 29
 30    Examples:
 31
 32        ```python
 33        import nnetsauce as ns
 34        from sklearn.datasets import load_diabetes
 35        from sklearn.model_selection import train_test_split
 36        from sklearn.linear_model import RidgeCV
 37        data = load_diabetes()
 38        X = data.data
 39        y= data.target
 40        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=123)
 41        obj = RidgeCV()
 42        clf = ns.DeepRegressor(obj)
 43        clf.fit(X_train, y_train)
 44        print(clf.score(clf.predict(X_test), y_test))
 45        ```
 46
 47    """
 48
 49    def __init__(
 50        self,
 51        obj,
 52        # Defining depth
 53        n_layers=2,
 54        verbose=0,
 55        # CustomRegressor attributes
 56        n_hidden_features=5,
 57        activation_name="relu",
 58        a=0.01,
 59        nodes_sim="sobol",
 60        bias=True,
 61        dropout=0,
 62        direct_link=True,
 63        n_clusters=2,
 64        cluster_encode=True,
 65        type_clust="kmeans",
 66        type_scaling=("std", "std", "std"),
 67        col_sample=1,
 68        row_sample=1,
 69        level=None,
 70        pi_method="splitconformal",
 71        seed=123,
 72        backend="cpu",
 73    ):
 74        super().__init__(
 75            obj=obj,
 76            n_hidden_features=n_hidden_features,
 77            activation_name=activation_name,
 78            a=a,
 79            nodes_sim=nodes_sim,
 80            bias=bias,
 81            dropout=dropout,
 82            direct_link=direct_link,
 83            n_clusters=n_clusters,
 84            cluster_encode=cluster_encode,
 85            type_clust=type_clust,
 86            type_scaling=type_scaling,
 87            col_sample=col_sample,
 88            row_sample=row_sample,
 89            level=level,
 90            pi_method=pi_method,
 91            seed=seed,
 92            backend=backend,
 93        )
 94
 95        assert n_layers >= 1, "must have n_layers >= 1"
 96
 97        self.stacked_obj = deepcopy(obj)
 98        self.verbose = verbose
 99        self.n_layers = n_layers
100        self.level = level
101        self.pi_method = pi_method
102        self.coef_ = None
103
104    def fit(self, X, y, **kwargs):
105        """Fit Regression algorithms to X and y.
106        Parameters
107        ----------
108        X : array-like,
109            Training vectors, where rows is the number of samples
110            and columns is the number of features.
111        y : array-like,
112            Training vectors, where rows is the number of samples
113            and columns is the number of features.
114        **kwargs: dict
115            Additional parameters to be passed to the fit method
116            of the base learner. For example, `sample_weight`.
117        Returns
118        -------
119        A fitted object
120        """
121
122        if isinstance(X, np.ndarray):
123            X = pd.DataFrame(X)
124
125        # init layer
126        self.stacked_obj = CustomRegressor(
127            obj=self.stacked_obj,
128            n_hidden_features=self.n_hidden_features,
129            activation_name=self.activation_name,
130            a=self.a,
131            nodes_sim=self.nodes_sim,
132            bias=self.bias,
133            dropout=self.dropout,
134            direct_link=self.direct_link,
135            n_clusters=self.n_clusters,
136            cluster_encode=self.cluster_encode,
137            type_clust=self.type_clust,
138            type_scaling=self.type_scaling,
139            col_sample=self.col_sample,
140            row_sample=self.row_sample,
141            seed=self.seed,
142            backend=self.backend,
143        )
144
145        if self.verbose > 0:
146            iterator = tqdm(range(self.n_layers - 1))
147        else:
148            iterator = range(self.n_layers - 1)
149
150        for _ in iterator:
151            self.stacked_obj = deepcopy(
152                CustomRegressor(
153                    obj=self.stacked_obj,
154                    n_hidden_features=self.n_hidden_features,
155                    activation_name=self.activation_name,
156                    a=self.a,
157                    nodes_sim=self.nodes_sim,
158                    bias=self.bias,
159                    dropout=self.dropout,
160                    direct_link=self.direct_link,
161                    n_clusters=self.n_clusters,
162                    cluster_encode=self.cluster_encode,
163                    type_clust=self.type_clust,
164                    type_scaling=self.type_scaling,
165                    col_sample=self.col_sample,
166                    row_sample=self.row_sample,
167                    seed=self.seed,
168                    backend=self.backend,
169                )
170            )
171
172        self.stacked_obj.fit(X, y, **kwargs)
173
174        if self.level is not None:
175            self.stacked_obj = PredictionInterval(
176                obj=self.stacked_obj, method=self.pi_method, level=self.level
177            )
178
179        if hasattr(self.stacked_obj, "clustering_obj_"):
180            self.clustering_obj_ = self.stacked_obj.clustering_obj_
181
182        if hasattr(self.stacked_obj, "coef_"):
183            self.coef_ = self.stacked_obj.coef_
184
185        if hasattr(self.stacked_obj, "scaler_"):
186            self.scaler_ = self.stacked_obj.scaler_
187
188        if hasattr(self.stacked_obj, "nn_scaler_"):
189            self.nn_scaler_ = self.stacked_obj.nn_scaler_
190
191        if hasattr(self.stacked_obj, "clustering_scaler_"):
192            self.clustering_scaler_ = self.stacked_obj.clustering_scaler_
193
194        return self
195
196    def partial_fit(self, X, y, **kwargs):
197        """Fit Regression algorithms to X and y.
198        Parameters
199        ----------
200        X : array-like,
201            Training vectors, where rows is the number of samples
202            and columns is the number of features.
203        y : array-like,
204            Training vectors, where rows is the number of samples
205            and columns is the number of features.
206        **kwargs: dict
207            Additional parameters to be passed to the fit method
208            of the base learner. For example, `sample_weight`.
209        Returns
210        -------
211        A fitted object
212        """
213        assert hasattr(self, "stacked_obj"), "model must be fitted first"
214        current_obj = self.stacked_obj
215        for _ in range(self.n_layers):
216            try:
217                input_X = current_obj.obj.cook_test_set(X)
218                current_obj.obj.partial_fit(input_X, y, **kwargs)
219                try:
220                    current_obj = current_obj.obj
221                except AttributeError:
222                    pass
223            except ValueError as e:
224                print(e)
225                pass
226        return self
227
228    def predict(self, X, **kwargs):
229        if self.level is not None:
230            return self.stacked_obj.predict(X, return_pi=True)
231        return self.stacked_obj.predict(X, **kwargs)
232
233    def score(self, X, y, scoring=None):
234        return self.stacked_obj.score(X, y, scoring)

Deep Regressor

Parameters:

obj: an object
    A base learner, see also https://www.researchgate.net/publication/380701207_Deep_Quasi-Randomized_neural_Networks_for_classification

verbose : int, optional (default=0)
    Monitor progress when fitting.

n_layers: int (default=2)
    Number of layers. `n_layers = 1` is a simple `CustomRegressor`

All the other parameters are nnetsauce `CustomRegressor`'s

Examples:

import nnetsauce as ns
    from sklearn.datasets import load_diabetes
    from sklearn.model_selection import train_test_split
    from sklearn.linear_model import RidgeCV
    data = load_diabetes()
    X = data.data
    y= data.target
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=123)
    obj = RidgeCV()
    clf = ns.DeepRegressor(obj)
    clf.fit(X_train, y_train)
    print(clf.score(clf.predict(X_test), y_test))
    

def fit(self, X, y, **kwargs):
104    def fit(self, X, y, **kwargs):
105        """Fit Regression algorithms to X and y.
106        Parameters
107        ----------
108        X : array-like,
109            Training vectors, where rows is the number of samples
110            and columns is the number of features.
111        y : array-like,
112            Training vectors, where rows is the number of samples
113            and columns is the number of features.
114        **kwargs: dict
115            Additional parameters to be passed to the fit method
116            of the base learner. For example, `sample_weight`.
117        Returns
118        -------
119        A fitted object
120        """
121
122        if isinstance(X, np.ndarray):
123            X = pd.DataFrame(X)
124
125        # init layer
126        self.stacked_obj = CustomRegressor(
127            obj=self.stacked_obj,
128            n_hidden_features=self.n_hidden_features,
129            activation_name=self.activation_name,
130            a=self.a,
131            nodes_sim=self.nodes_sim,
132            bias=self.bias,
133            dropout=self.dropout,
134            direct_link=self.direct_link,
135            n_clusters=self.n_clusters,
136            cluster_encode=self.cluster_encode,
137            type_clust=self.type_clust,
138            type_scaling=self.type_scaling,
139            col_sample=self.col_sample,
140            row_sample=self.row_sample,
141            seed=self.seed,
142            backend=self.backend,
143        )
144
145        if self.verbose > 0:
146            iterator = tqdm(range(self.n_layers - 1))
147        else:
148            iterator = range(self.n_layers - 1)
149
150        for _ in iterator:
151            self.stacked_obj = deepcopy(
152                CustomRegressor(
153                    obj=self.stacked_obj,
154                    n_hidden_features=self.n_hidden_features,
155                    activation_name=self.activation_name,
156                    a=self.a,
157                    nodes_sim=self.nodes_sim,
158                    bias=self.bias,
159                    dropout=self.dropout,
160                    direct_link=self.direct_link,
161                    n_clusters=self.n_clusters,
162                    cluster_encode=self.cluster_encode,
163                    type_clust=self.type_clust,
164                    type_scaling=self.type_scaling,
165                    col_sample=self.col_sample,
166                    row_sample=self.row_sample,
167                    seed=self.seed,
168                    backend=self.backend,
169                )
170            )
171
172        self.stacked_obj.fit(X, y, **kwargs)
173
174        if self.level is not None:
175            self.stacked_obj = PredictionInterval(
176                obj=self.stacked_obj, method=self.pi_method, level=self.level
177            )
178
179        if hasattr(self.stacked_obj, "clustering_obj_"):
180            self.clustering_obj_ = self.stacked_obj.clustering_obj_
181
182        if hasattr(self.stacked_obj, "coef_"):
183            self.coef_ = self.stacked_obj.coef_
184
185        if hasattr(self.stacked_obj, "scaler_"):
186            self.scaler_ = self.stacked_obj.scaler_
187
188        if hasattr(self.stacked_obj, "nn_scaler_"):
189            self.nn_scaler_ = self.stacked_obj.nn_scaler_
190
191        if hasattr(self.stacked_obj, "clustering_scaler_"):
192            self.clustering_scaler_ = self.stacked_obj.clustering_scaler_
193
194        return self

Fit Regression algorithms to X and y.

Parameters

X : array-like, Training vectors, where rows is the number of samples and columns is the number of features. y : array-like, Training vectors, where rows is the number of samples and columns is the number of features. **kwargs: dict Additional parameters to be passed to the fit method of the base learner. For example, sample_weight.

Returns

A fitted object

def predict(self, X, **kwargs):
228    def predict(self, X, **kwargs):
229        if self.level is not None:
230            return self.stacked_obj.predict(X, return_pi=True)
231        return self.stacked_obj.predict(X, **kwargs)

Predict test data X.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

level: int
    Level of confidence (default = 95)

method: str
    'splitconformal', 'localconformal'
    prediction (if you specify `return_pi = True`)

**kwargs: additional parameters
        `return_pi = True` for conformal prediction,
        with `method` in ('splitconformal', 'localconformal')
        or `return_std = True` for `self.obj` in
        (`sklearn.linear_model.BayesianRidge`,
        `sklearn.linear_model.ARDRegressor`,
        `sklearn.gaussian_process.GaussianProcessRegressor`)`

Returns:

model predictions:
    an array if uncertainty quantification is not requested,
      or a tuple if with prediction intervals and simulations
      if `return_std = True` (mean, standard deviation,
      lower and upper prediction interval) or `return_pi = True`
      ()
def score(self, X, y, scoring=None):
233    def score(self, X, y, scoring=None):
234        return self.stacked_obj.score(X, y, scoring)

Compute the score of the model.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

scoring: str
    scoring method

Returns:

score: float
class DeepMTS(nnetsauce.MTS):
 11class DeepMTS(MTS):
 12    """Univariate and multivariate time series (DeepMTS) forecasting with Quasi-Randomized networks (Work in progress)
 13
 14    Parameters:
 15
 16        obj: object.
 17            any object containing a method fit (obj.fit()) and a method predict
 18            (obj.predict()).
 19
 20        n_layers: int.
 21            number of layers in the neural network.
 22
 23        n_hidden_features: int.
 24            number of nodes in the hidden layer.
 25
 26        activation_name: str.
 27            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'.
 28
 29        a: float.
 30            hyperparameter for 'prelu' or 'elu' activation function.
 31
 32        nodes_sim: str.
 33            type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
 34            'uniform'.
 35
 36        bias: boolean.
 37            indicates if the hidden layer contains a bias term (True) or not
 38            (False).
 39
 40        dropout: float.
 41            regularization parameter; (random) percentage of nodes dropped out
 42            of the training.
 43
 44        direct_link: boolean.
 45            indicates if the original predictors are included (True) in model's fitting or not (False).
 46
 47        n_clusters: int.
 48            number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering).
 49
 50        cluster_encode: bool.
 51            defines how the variable containing clusters is treated (default is one-hot)
 52            if `False`, then labels are used, without one-hot encoding.
 53
 54        type_clust: str.
 55            type of clustering method: currently k-means ('kmeans') or Gaussian
 56            Mixture Model ('gmm').
 57
 58        type_scaling: a tuple of 3 strings.
 59            scaling methods for inputs, hidden layer, and clustering respectively
 60            (and when relevant).
 61            Currently available: standardization ('std') or MinMax scaling ('minmax').
 62
 63        lags: int.
 64            number of lags used for each time series.
 65
 66        type_pi: str.
 67            type of prediction interval; currently:
 68            - "gaussian": simple, fast, but: assumes stationarity of Gaussian in-sample residuals and independence in the multivariate case
 69            - "kde": based on Kernel Density Estimation of in-sample residuals
 70            - "bootstrap": based on independent bootstrap of in-sample residuals
 71            - "block-bootstrap": based on basic block bootstrap of in-sample residuals
 72            - "scp-kde": Sequential split conformal prediction with Kernel Density Estimation of calibrated residuals
 73            - "scp-bootstrap": Sequential split conformal prediction with independent bootstrap of calibrated residuals
 74            - "scp-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of calibrated residuals
 75            - "scp2-kde": Sequential split conformal prediction with Kernel Density Estimation of standardized calibrated residuals
 76            - "scp2-bootstrap": Sequential split conformal prediction with independent bootstrap of standardized calibrated residuals
 77            - "scp2-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of standardized calibrated residuals
 78
 79        block_size: int.
 80            size of block for 'type_pi' in ("block-bootstrap", "scp-block-bootstrap", "scp2-block-bootstrap").
 81            Default is round(3.15*(n_residuals^1/3))
 82
 83        replications: int.
 84            number of replications (if needed, for predictive simulation). Default is 'None'.
 85
 86        kernel: str.
 87            the kernel to use for residuals density estimation (used for predictive simulation). Currently, either 'gaussian' or 'tophat'.
 88
 89        agg: str.
 90            either "mean" or "median" for simulation of bootstrap aggregating
 91
 92        seed: int.
 93            reproducibility seed for nodes_sim=='uniform' or predictive simulation.
 94
 95        backend: str.
 96            "cpu" or "gpu" or "tpu".
 97
 98        verbose: int.
 99            0: not printing; 1: printing
100
101        show_progress: bool.
102            True: progress bar when fitting each series; False: no progress bar when fitting each series
103
104    Attributes:
105
106        fit_objs_: dict
107            objects adjusted to each individual time series
108
109        y_: {array-like}
110            DeepMTS responses (most recent observations first)
111
112        X_: {array-like}
113            DeepMTS lags
114
115        xreg_: {array-like}
116            external regressors
117
118        y_means_: dict
119            a dictionary of each series mean values
120
121        preds_: {array-like}
122            successive model predictions
123
124        preds_std_: {array-like}
125            standard deviation around the predictions
126
127        return_std_: boolean
128            return uncertainty or not (set in predict)
129
130        df_: data frame
131            the input data frame, in case a data.frame is provided to `fit`
132
133    Examples:
134
135    Example 1:
136
137        ```python
138        import nnetsauce as ns
139        import numpy as np
140        from sklearn import linear_model
141        np.random.seed(123)
142
143        M = np.random.rand(10, 3)
144        M[:,0] = 10*M[:,0]
145        M[:,2] = 25*M[:,2]
146        print(M)
147
148        # Adjust Bayesian Ridge
149        regr4 = linear_model.BayesianRidge()
150        obj_DeepMTS = ns.DeepMTS(regr4, lags = 1, n_hidden_features=5)
151        obj_DeepMTS.fit(M)
152        print(obj_DeepMTS.predict())
153
154        # with credible intervals
155        print(obj_DeepMTS.predict(return_std=True, level=80))
156
157        print(obj_DeepMTS.predict(return_std=True, level=95))
158        ```
159
160    Example 2:
161
162        ```python
163        import nnetsauce as ns
164        import numpy as np
165        from sklearn import linear_model
166
167        dataset = {
168        'date' : ['2001-01-01', '2002-01-01', '2003-01-01', '2004-01-01', '2005-01-01'],
169        'series1' : [34, 30, 35.6, 33.3, 38.1],
170        'series2' : [4, 5.5, 5.6, 6.3, 5.1],
171        'series3' : [100, 100.5, 100.6, 100.2, 100.1]}
172        df = pd.DataFrame(dataset).set_index('date')
173        print(df)
174
175        # Adjust Bayesian Ridge
176        regr5 = linear_model.BayesianRidge()
177        obj_DeepMTS = ns.DeepMTS(regr5, lags = 1, n_hidden_features=5)
178        obj_DeepMTS.fit(df)
179        print(obj_DeepMTS.predict())
180
181        # with credible intervals
182        print(obj_DeepMTS.predict(return_std=True, level=80))
183
184        print(obj_DeepMTS.predict(return_std=True, level=95))
185        ```
186
187    """
188
189    # construct the object -----
190
191    def __init__(
192        self,
193        obj,
194        n_layers=3,
195        n_hidden_features=5,
196        activation_name="relu",
197        a=0.01,
198        nodes_sim="sobol",
199        bias=True,
200        dropout=0,
201        direct_link=True,
202        n_clusters=2,
203        cluster_encode=True,
204        type_clust="kmeans",
205        type_scaling=("std", "std", "std"),
206        lags=1,
207        type_pi="kde",
208        block_size=None,
209        replications=None,
210        kernel=None,
211        agg="mean",
212        seed=123,
213        backend="cpu",
214        verbose=0,
215        show_progress=True,
216    ):
217        assert int(lags) == lags, "parameter 'lags' should be an integer"
218        assert n_layers >= 1, "must have n_layers >= 1"
219        self.n_layers = int(n_layers)
220
221        if self.n_layers > 1:
222
223            for _ in range(self.n_layers - 1):
224                obj = CustomRegressor(
225                    obj=deepcopy(obj),
226                    n_hidden_features=n_hidden_features,
227                    activation_name=activation_name,
228                    a=a,
229                    nodes_sim=nodes_sim,
230                    bias=bias,
231                    dropout=dropout,
232                    direct_link=direct_link,
233                    n_clusters=n_clusters,
234                    cluster_encode=cluster_encode,
235                    type_clust=type_clust,
236                    type_scaling=type_scaling,
237                    seed=seed,
238                    backend=backend,
239                )
240
241        self.obj = deepcopy(obj)
242        super().__init__(
243            obj=self.obj,
244            n_hidden_features=n_hidden_features,
245            activation_name=activation_name,
246            a=a,
247            nodes_sim=nodes_sim,
248            bias=bias,
249            dropout=dropout,
250            direct_link=direct_link,
251            n_clusters=n_clusters,
252            cluster_encode=cluster_encode,
253            type_clust=type_clust,
254            type_scaling=type_scaling,
255            lags=lags,
256            type_pi=type_pi,
257            block_size=block_size,
258            replications=replications,
259            kernel=kernel,
260            agg=agg,
261            seed=seed,
262            backend=backend,
263            verbose=verbose,
264            show_progress=show_progress,
265        )

Univariate and multivariate time series (DeepMTS) forecasting with Quasi-Randomized networks (Work in progress)

Parameters:

obj: object.
    any object containing a method fit (obj.fit()) and a method predict
    (obj.predict()).

n_layers: int.
    number of layers in the neural network.

n_hidden_features: int.
    number of nodes in the hidden layer.

activation_name: str.
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'.

a: float.
    hyperparameter for 'prelu' or 'elu' activation function.

nodes_sim: str.
    type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
    'uniform'.

bias: boolean.
    indicates if the hidden layer contains a bias term (True) or not
    (False).

dropout: float.
    regularization parameter; (random) percentage of nodes dropped out
    of the training.

direct_link: boolean.
    indicates if the original predictors are included (True) in model's fitting or not (False).

n_clusters: int.
    number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering).

cluster_encode: bool.
    defines how the variable containing clusters is treated (default is one-hot)
    if `False`, then labels are used, without one-hot encoding.

type_clust: str.
    type of clustering method: currently k-means ('kmeans') or Gaussian
    Mixture Model ('gmm').

type_scaling: a tuple of 3 strings.
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax').

lags: int.
    number of lags used for each time series.

type_pi: str.
    type of prediction interval; currently:
    - "gaussian": simple, fast, but: assumes stationarity of Gaussian in-sample residuals and independence in the multivariate case
    - "kde": based on Kernel Density Estimation of in-sample residuals
    - "bootstrap": based on independent bootstrap of in-sample residuals
    - "block-bootstrap": based on basic block bootstrap of in-sample residuals
    - "scp-kde": Sequential split conformal prediction with Kernel Density Estimation of calibrated residuals
    - "scp-bootstrap": Sequential split conformal prediction with independent bootstrap of calibrated residuals
    - "scp-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of calibrated residuals
    - "scp2-kde": Sequential split conformal prediction with Kernel Density Estimation of standardized calibrated residuals
    - "scp2-bootstrap": Sequential split conformal prediction with independent bootstrap of standardized calibrated residuals
    - "scp2-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of standardized calibrated residuals

block_size: int.
    size of block for 'type_pi' in ("block-bootstrap", "scp-block-bootstrap", "scp2-block-bootstrap").
    Default is round(3.15*(n_residuals^1/3))

replications: int.
    number of replications (if needed, for predictive simulation). Default is 'None'.

kernel: str.
    the kernel to use for residuals density estimation (used for predictive simulation). Currently, either 'gaussian' or 'tophat'.

agg: str.
    either "mean" or "median" for simulation of bootstrap aggregating

seed: int.
    reproducibility seed for nodes_sim=='uniform' or predictive simulation.

backend: str.
    "cpu" or "gpu" or "tpu".

verbose: int.
    0: not printing; 1: printing

show_progress: bool.
    True: progress bar when fitting each series; False: no progress bar when fitting each series

Attributes:

fit_objs_: dict
    objects adjusted to each individual time series

y_: {array-like}
    DeepMTS responses (most recent observations first)

X_: {array-like}
    DeepMTS lags

xreg_: {array-like}
    external regressors

y_means_: dict
    a dictionary of each series mean values

preds_: {array-like}
    successive model predictions

preds_std_: {array-like}
    standard deviation around the predictions

return_std_: boolean
    return uncertainty or not (set in predict)

df_: data frame
    the input data frame, in case a data.frame is provided to `fit`

Examples:

Example 1:

import nnetsauce as ns
    import numpy as np
    from sklearn import linear_model
    np.random.seed(123)
 
M = np.random.rand(10, 3) M[:,0] = 10M[:,0] M[:,2] = 25M[:,2] print(M)
# Adjust Bayesian Ridge regr4 = linear_model.BayesianRidge() obj_DeepMTS = ns.DeepMTS(regr4, lags = 1, n_hidden_features=5) obj_DeepMTS.fit(M) print(obj_DeepMTS.predict())
# with credible intervals print(obj_DeepMTS.predict(return_std=True, level=80))
print(obj_DeepMTS.predict(return_std=True, level=95))

Example 2:

import nnetsauce as ns
    import numpy as np
    from sklearn import linear_model
 
dataset = { 'date' : ['2001-01-01', '2002-01-01', '2003-01-01', '2004-01-01', '2005-01-01'], 'series1' : [34, 30, 35.6, 33.3, 38.1], 'series2' : [4, 5.5, 5.6, 6.3, 5.1], 'series3' : [100, 100.5, 100.6, 100.2, 100.1]} df = pd.DataFrame(dataset).set_index('date') print(df)
# Adjust Bayesian Ridge regr5 = linear_model.BayesianRidge() obj_DeepMTS = ns.DeepMTS(regr5, lags = 1, n_hidden_features=5) obj_DeepMTS.fit(df) print(obj_DeepMTS.predict())
# with credible intervals print(obj_DeepMTS.predict(return_std=True, level=80))
print(obj_DeepMTS.predict(return_std=True, level=95))

class Downloader:
 6class Downloader:
 7    """Download datasets from data sources (R-universe for now)"""
 8
 9    def __init__(self):
10        self.pkgname = None
11        self.dataset = None
12        self.source = None
13        self.url = None
14        self.request = None
15
16    def download(
17        self,
18        pkgname="MASS",
19        dataset="Boston",
20        source="https://cran.r-universe.dev/",
21        **kwargs
22    ):
23        """Download datasets from data sources (R-universe for now)
24
25        Examples:
26
27        ```python
28        import nnetsauce as ns
29
30        downloader = ns.Downloader()
31        df = downloader.download(pkgname="MASS", dataset="Boston")
32        ```
33
34        """
35        self.pkgname = pkgname
36        self.dataset = dataset
37        self.source = source
38        self.url = source + pkgname + "/data/" + dataset + "/json"
39        self.request = requests.get(self.url)
40        return pd.DataFrame(self.request.json(), **kwargs)

Download datasets from data sources (R-universe for now)

def download( self, pkgname='MASS', dataset='Boston', source='https://cran.r-universe.dev/', **kwargs):
16    def download(
17        self,
18        pkgname="MASS",
19        dataset="Boston",
20        source="https://cran.r-universe.dev/",
21        **kwargs
22    ):
23        """Download datasets from data sources (R-universe for now)
24
25        Examples:
26
27        ```python
28        import nnetsauce as ns
29
30        downloader = ns.Downloader()
31        df = downloader.download(pkgname="MASS", dataset="Boston")
32        ```
33
34        """
35        self.pkgname = pkgname
36        self.dataset = dataset
37        self.source = source
38        self.url = source + pkgname + "/data/" + dataset + "/json"
39        self.request = requests.get(self.url)
40        return pd.DataFrame(self.request.json(), **kwargs)

Download datasets from data sources (R-universe for now)

Examples:

import nnetsauce as ns

downloader = ns.Downloader()
df = downloader.download(pkgname="MASS", dataset="Boston")
class ElasticNet2Regressor(nnetsauce.ridge2.ridge2.Ridge2, sklearn.base.RegressorMixin):
 23class ElasticNet2Regressor(Ridge2, RegressorMixin):
 24    """Enhanced Elastic Net with dual regularization paths, JAX support, and coordinate descent.
 25    
 26    Features:
 27    - Separate L1/L2 ratios for direct (lambda1/l1_ratio1) and hidden (lambda2/l1_ratio2) paths
 28    - JAX acceleration for GPU/TPU when backend != 'cpu'
 29    - Choice of optimization methods (L-BFGS-B or coordinate descent)
 30
 31    Parameters:
 32        n_hidden_features: int
 33            Number of nodes in the hidden layer
 34        activation_name: str
 35            Activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
 36        a: float
 37            Hyperparameter for 'prelu' or 'elu' activation
 38        nodes_sim: str
 39            Node simulation type: 'sobol', 'hammersley', 'halton', 'uniform'
 40        bias: bool
 41            Whether to include bias term in hidden layer
 42        dropout: float
 43            Dropout rate (regularization)
 44        n_clusters: int
 45            Number of clusters (0 for no clustering)
 46        cluster_encode: bool
 47            Whether to one-hot encode clusters
 48        type_clust: str
 49            Clustering method: 'kmeans' or 'gmm'
 50        type_scaling: tuple
 51            Scaling methods for (inputs, hidden layer, clusters)
 52        lambda1: float
 53            Regularization strength for direct connections
 54        lambda2: float
 55            Regularization strength for hidden layer
 56        l1_ratio1: float
 57            L1 ratio (0-1) for direct connections
 58        l1_ratio2: float
 59            L1 ratio (0-1) for hidden layer
 60        max_iter: int
 61            Maximum optimization iterations
 62        tol: float
 63            Optimization tolerance
 64        solver: str
 65            Optimization method: 'lbfgs' or 'cd' (coordinate descent)
 66        seed: int
 67            Random seed
 68        backend: str
 69            'cpu', 'gpu', or 'tpu'
 70    """
 71
 72    def __init__(
 73        self,
 74        n_hidden_features=5,
 75        activation_name="relu",
 76        a=0.01,
 77        nodes_sim="sobol",
 78        bias=True,
 79        dropout=0,
 80        n_clusters=2,
 81        cluster_encode=True,
 82        type_clust="kmeans",
 83        type_scaling=("std", "std", "std"),
 84        lambda1=0.1,
 85        lambda2=0.1,
 86        l1_ratio1=0.5,
 87        l1_ratio2=0.5,
 88        max_iter=1000,
 89        tol=1e-4,
 90        solver="lbfgs",
 91        seed=123,
 92        backend="cpu",
 93    ):
 94        super().__init__(
 95            n_hidden_features=n_hidden_features,
 96            activation_name=activation_name,
 97            a=a,
 98            nodes_sim=nodes_sim,
 99            bias=bias,
100            dropout=dropout,
101            n_clusters=n_clusters,
102            cluster_encode=cluster_encode,
103            type_clust=type_clust,
104            type_scaling=type_scaling,
105            lambda1=lambda1,
106            lambda2=lambda2,
107            seed=seed,
108            backend=backend,
109        )
110        
111        self.l1_ratio1 = l1_ratio1
112        self.l1_ratio2 = l1_ratio2
113        self.max_iter = max_iter
114        self.tol = tol
115        self.solver = solver
116        self.type_fit = "regression"
117        
118        # Initialize JAX-related attributes
119        self._jax_initialized = False
120        self._init_jax_functions()
121
122    def _init_jax_functions(self):
123        """Initialize JAX functions if backend is not CPU and JAX is available"""
124        if self.backend != "cpu" and JAX_AVAILABLE and not self._jax_initialized:
125            # JIT compile key functions
126            self._jax_elastic_net_penalty = jit(self._jax_penalty)
127            self._jax_objective = jit(self._jax_obj)
128            self._jax_grad = jit(grad(self._jax_obj))
129            self._jax_initialized = True
130
131    def _jax_penalty(self, beta, n_direct):
132        """JAX version of elastic net penalty"""
133        beta_direct = beta[:n_direct]
134        beta_hidden = beta[n_direct:]
135        
136        l1_1 = self.lambda1 * self.l1_ratio1 * jnp.sum(jnp.abs(beta_direct))
137        l2_1 = 0.5 * self.lambda1 * (1-self.l1_ratio1) * jnp.sum(beta_direct**2)
138        l1_2 = self.lambda2 * self.l1_ratio2 * jnp.sum(jnp.abs(beta_hidden))
139        l2_2 = 0.5 * self.lambda2 * (1-self.l1_ratio2) * jnp.sum(beta_hidden**2)
140        
141        return l1_1 + l2_1 + l1_2 + l2_2
142
143    def _jax_obj(self, beta, X, y, n_direct):
144        """JAX version of objective function"""
145        residuals = y - jnp.dot(X, beta)
146        mse = jnp.mean(residuals**2)
147        penalty = self._jax_penalty(beta, n_direct)
148        return 0.5 * mse + penalty
149
150    def _numpy_penalty(self, beta, n_direct):
151        """NumPy version of elastic net penalty"""
152        beta_direct = beta[:n_direct]
153        beta_hidden = beta[n_direct:]
154        
155        l1_1 = self.lambda1 * self.l1_ratio1 * np.sum(np.abs(beta_direct))
156        l2_1 = 0.5 * self.lambda1 * (1-self.l1_ratio1) * np.sum(beta_direct**2)
157        l1_2 = self.lambda2 * self.l1_ratio2 * np.sum(np.abs(beta_hidden))
158        l2_2 = 0.5 * self.lambda2 * (1-self.l1_ratio2) * np.sum(beta_hidden**2)
159        
160        return l1_1 + l2_1 + l1_2 + l2_2
161
162    def _numpy_obj(self, beta, X, y, n_direct):
163        """NumPy version of objective function"""
164        residuals = y - np.dot(X, beta)
165        mse = np.mean(residuals**2)
166        penalty = self._numpy_penalty(beta, n_direct)
167        return 0.5 * mse + penalty
168
169    def _soft_threshold(self, x, threshold):
170        """Soft thresholding operator for coordinate descent"""
171        return np.sign(x) * np.maximum(np.abs(x) - threshold, 0)
172
173    def _coordinate_descent(self, X, y, n_direct):
174        """Coordinate descent optimization"""
175        n_samples, n_features = X.shape
176        beta = np.zeros(n_features)
177        XtX = X.T @ X
178        Xty = X.T @ y
179        diag_XtX = np.diag(XtX)
180        
181        for _ in range(self.max_iter):
182            beta_old = beta.copy()
183            
184            for j in range(n_features):
185                # Compute partial residual
186                X_j = X[:, j]
187                r = y - X @ beta + X_j * beta[j]
188                
189                # Compute unregularized update
190                update = X_j @ r / (diag_XtX[j] + 1e-10)
191                
192                # Apply appropriate regularization
193                if j < n_direct:  # Direct connection
194                    lambda_ = self.lambda1
195                    l1_ratio = self.l1_ratio1
196                else:  # Hidden layer connection
197                    lambda_ = self.lambda2
198                    l1_ratio = self.l1_ratio2
199                
200                # Apply soft thresholding for L1 and shrinkage for L2
201                beta[j] = self._soft_threshold(update, lambda_ * l1_ratio)
202                beta[j] /= (1 + lambda_ * (1 - l1_ratio))
203            
204            # Check convergence
205            if np.max(np.abs(beta - beta_old)) < self.tol:
206                break
207                
208        return beta
209
210    def fit(self, X, y, **kwargs):
211        """Fit model with selected optimization method"""
212        centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
213        n_X, p_X = X.shape
214        n_Z, p_Z = scaled_Z.shape
215
216        if self.n_clusters > 0:
217            n_direct = p_X + (self.n_clusters if self.cluster_encode else 1)
218        else:
219            n_direct = p_X
220
221        X_ = scaled_Z[:, 0:n_direct]
222        Phi_X_ = scaled_Z[:, n_direct:p_Z]
223        all_features = np.hstack([X_, Phi_X_])
224        
225        # Convert to JAX arrays if using GPU/TPU
226        if self.backend != "cpu" and JAX_AVAILABLE:
227            all_features = jnp.array(all_features)
228            centered_y = jnp.array(centered_y)
229            beta_init = jnp.zeros(all_features.shape[1])
230            
231            if self.solver == "lbfgs":
232                res = minimize(
233                    fun=self._jax_obj,
234                    x0=beta_init,
235                    args=(all_features, centered_y, n_direct),
236                    method='L-BFGS-B',
237                    jac=self._jax_grad,
238                    options={'maxiter': self.max_iter, 'gtol': self.tol}
239                )
240                self.beta_ = np.array(res.x)
241            else:
242                # Fall back to NumPy for coordinate descent
243                self.beta_ = self._coordinate_descent(
244                    np.array(all_features), 
245                    np.array(centered_y), 
246                    n_direct
247                )
248        else:
249            # NumPy backend
250            beta_init = np.zeros(all_features.shape[1])
251            
252            if self.solver == "cd":
253                self.beta_ = self._coordinate_descent(
254                    all_features, 
255                    centered_y, 
256                    n_direct
257                )
258            else:
259                res = minimize(
260                    fun=self._numpy_obj,
261                    x0=beta_init,
262                    args=(all_features, centered_y, n_direct),
263                    method='L-BFGS-B',
264                    options={'maxiter': self.max_iter, 'gtol': self.tol}
265                )
266                self.beta_ = res.x
267        
268        self.y_mean_ = np.mean(y)
269        return self
270
271    def predict(self, X, **kwargs):
272        """Predict using fitted model"""
273        if len(X.shape) == 1:
274            n_features = X.shape[0]
275            new_X = mo.rbind(
276                x=X.reshape(1, n_features),
277                y=np.ones(n_features).reshape(1, n_features),
278                backend=self.backend,
279            )
280            return (
281                self.y_mean_
282                + mo.safe_sparse_dot(
283                    a=self.cook_test_set(new_X, **kwargs),
284                    b=self.beta_,
285                    backend=self.backend,
286                )
287            )[0]
288
289        return self.y_mean_ + mo.safe_sparse_dot(
290            a=self.cook_test_set(X, **kwargs),
291            b=self.beta_,
292            backend=self.backend,
293        )

Enhanced Elastic Net with dual regularization paths, JAX support, and coordinate descent.

Features:

  • Separate L1/L2 ratios for direct (lambda1/l1_ratio1) and hidden (lambda2/l1_ratio2) paths
  • JAX acceleration for GPU/TPU when backend != 'cpu'
  • Choice of optimization methods (L-BFGS-B or coordinate descent)

Parameters: n_hidden_features: int Number of nodes in the hidden layer activation_name: str Activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' a: float Hyperparameter for 'prelu' or 'elu' activation nodes_sim: str Node simulation type: 'sobol', 'hammersley', 'halton', 'uniform' bias: bool Whether to include bias term in hidden layer dropout: float Dropout rate (regularization) n_clusters: int Number of clusters (0 for no clustering) cluster_encode: bool Whether to one-hot encode clusters type_clust: str Clustering method: 'kmeans' or 'gmm' type_scaling: tuple Scaling methods for (inputs, hidden layer, clusters) lambda1: float Regularization strength for direct connections lambda2: float Regularization strength for hidden layer l1_ratio1: float L1 ratio (0-1) for direct connections l1_ratio2: float L1 ratio (0-1) for hidden layer max_iter: int Maximum optimization iterations tol: float Optimization tolerance solver: str Optimization method: 'lbfgs' or 'cd' (coordinate descent) seed: int Random seed backend: str 'cpu', 'gpu', or 'tpu'

def fit(self, X, y, **kwargs):
210    def fit(self, X, y, **kwargs):
211        """Fit model with selected optimization method"""
212        centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
213        n_X, p_X = X.shape
214        n_Z, p_Z = scaled_Z.shape
215
216        if self.n_clusters > 0:
217            n_direct = p_X + (self.n_clusters if self.cluster_encode else 1)
218        else:
219            n_direct = p_X
220
221        X_ = scaled_Z[:, 0:n_direct]
222        Phi_X_ = scaled_Z[:, n_direct:p_Z]
223        all_features = np.hstack([X_, Phi_X_])
224        
225        # Convert to JAX arrays if using GPU/TPU
226        if self.backend != "cpu" and JAX_AVAILABLE:
227            all_features = jnp.array(all_features)
228            centered_y = jnp.array(centered_y)
229            beta_init = jnp.zeros(all_features.shape[1])
230            
231            if self.solver == "lbfgs":
232                res = minimize(
233                    fun=self._jax_obj,
234                    x0=beta_init,
235                    args=(all_features, centered_y, n_direct),
236                    method='L-BFGS-B',
237                    jac=self._jax_grad,
238                    options={'maxiter': self.max_iter, 'gtol': self.tol}
239                )
240                self.beta_ = np.array(res.x)
241            else:
242                # Fall back to NumPy for coordinate descent
243                self.beta_ = self._coordinate_descent(
244                    np.array(all_features), 
245                    np.array(centered_y), 
246                    n_direct
247                )
248        else:
249            # NumPy backend
250            beta_init = np.zeros(all_features.shape[1])
251            
252            if self.solver == "cd":
253                self.beta_ = self._coordinate_descent(
254                    all_features, 
255                    centered_y, 
256                    n_direct
257                )
258            else:
259                res = minimize(
260                    fun=self._numpy_obj,
261                    x0=beta_init,
262                    args=(all_features, centered_y, n_direct),
263                    method='L-BFGS-B',
264                    options={'maxiter': self.max_iter, 'gtol': self.tol}
265                )
266                self.beta_ = res.x
267        
268        self.y_mean_ = np.mean(y)
269        return self

Fit model with selected optimization method

def predict(self, X, **kwargs):
271    def predict(self, X, **kwargs):
272        """Predict using fitted model"""
273        if len(X.shape) == 1:
274            n_features = X.shape[0]
275            new_X = mo.rbind(
276                x=X.reshape(1, n_features),
277                y=np.ones(n_features).reshape(1, n_features),
278                backend=self.backend,
279            )
280            return (
281                self.y_mean_
282                + mo.safe_sparse_dot(
283                    a=self.cook_test_set(new_X, **kwargs),
284                    b=self.beta_,
285                    backend=self.backend,
286                )
287            )[0]
288
289        return self.y_mean_ + mo.safe_sparse_dot(
290            a=self.cook_test_set(X, **kwargs),
291            b=self.beta_,
292            backend=self.backend,
293        )

Predict using fitted model

class GLMClassifier(nnetsauce.glm.glm.GLM, sklearn.base.ClassifierMixin):
 16class GLMClassifier(GLM, ClassifierMixin):
 17    """Generalized 'linear' models using quasi-randomized networks (classification)
 18
 19    Parameters:
 20
 21        n_hidden_features: int
 22            number of nodes in the hidden layer
 23
 24        lambda1: float
 25            regularization parameter for GLM coefficients on original features
 26
 27        alpha1: float
 28            controls compromize between l1 and l2 norm of GLM coefficients on original features
 29
 30        lambda2: float
 31            regularization parameter for GLM coefficients on nonlinear features
 32
 33        alpha2: float
 34            controls compromize between l1 and l2 norm of GLM coefficients on nonlinear features
 35
 36        activation_name: str
 37            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
 38
 39        a: float
 40            hyperparameter for 'prelu' or 'elu' activation function
 41
 42        nodes_sim: str
 43            type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
 44            'uniform'
 45
 46        bias: boolean
 47            indicates if the hidden layer contains a bias term (True) or not
 48            (False)
 49
 50        dropout: float
 51            regularization parameter; (random) percentage of nodes dropped out
 52            of the training
 53
 54        direct_link: boolean
 55            indicates if the original predictors are included (True) in model's
 56            fitting or not (False)
 57
 58        n_clusters: int
 59            number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
 60                no clustering)
 61
 62        cluster_encode: bool
 63            defines how the variable containing clusters is treated (default is one-hot)
 64            if `False`, then labels are used, without one-hot encoding
 65
 66        type_clust: str
 67            type of clustering method: currently k-means ('kmeans') or Gaussian
 68            Mixture Model ('gmm')
 69
 70        type_scaling: a tuple of 3 strings
 71            scaling methods for inputs, hidden layer, and clustering respectively
 72            (and when relevant).
 73            Currently available: standardization ('std') or MinMax scaling ('minmax')
 74
 75        optimizer: object
 76            optimizer, from class nnetsauce.Optimizer
 77
 78        seed: int
 79            reproducibility seed for nodes_sim=='uniform'
 80
 81    Attributes:
 82
 83        beta_: vector
 84            regression coefficients
 85
 86    Examples:
 87
 88    See [https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_classification.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_classification.py)
 89
 90    """
 91
 92    # construct the object -----
 93    _estimator_type = "classifier"
 94
 95    def __init__(
 96        self,
 97        n_hidden_features=5,
 98        lambda1=0.01,
 99        alpha1=0.5,
100        lambda2=0.01,
101        alpha2=0.5,
102        family="expit",
103        activation_name="relu",
104        a=0.01,
105        nodes_sim="sobol",
106        bias=True,
107        dropout=0,
108        direct_link=True,
109        n_clusters=2,
110        cluster_encode=True,
111        type_clust="kmeans",
112        type_scaling=("std", "std", "std"),
113        optimizer=Optimizer(),
114        seed=123,
115    ):
116        super().__init__(
117            n_hidden_features=n_hidden_features,
118            lambda1=lambda1,
119            alpha1=alpha1,
120            lambda2=lambda2,
121            alpha2=alpha2,
122            activation_name=activation_name,
123            a=a,
124            nodes_sim=nodes_sim,
125            bias=bias,
126            dropout=dropout,
127            direct_link=direct_link,
128            n_clusters=n_clusters,
129            cluster_encode=cluster_encode,
130            type_clust=type_clust,
131            type_scaling=type_scaling,
132            optimizer=optimizer,
133            seed=seed,
134        )
135
136        self.family = family
137
138    def logit_loss(self, Y, row_index, XB):
139        self.n_classes = Y.shape[1]  # len(np.unique(y))
140        # Y = mo.one_hot_encode2(y, self.n_classes)
141        # Y = self.optimizer.one_hot_encode(y, self.n_classes)
142
143        # max_double = 709.0 # only if softmax
144        # XB[XB > max_double] = max_double
145        XB[XB > 709.0] = 709.0
146
147        if row_index is None:
148            return -np.mean(np.sum(Y * XB, axis=1) - logsumexp(XB))
149
150        return -np.mean(np.sum(Y[row_index, :] * XB, axis=1) - logsumexp(XB))
151
152    def expit_erf_loss(self, Y, row_index, XB):
153        # self.n_classes = len(np.unique(y))
154        # Y = mo.one_hot_encode2(y, self.n_classes)
155        # Y = self.optimizer.one_hot_encode(y, self.n_classes)
156        self.n_classes = Y.shape[1]
157
158        if row_index is None:
159            return -np.mean(np.sum(Y * XB, axis=1) - logsumexp(XB))
160
161        return -np.mean(np.sum(Y[row_index, :] * XB, axis=1) - logsumexp(XB))
162
163    def loss_func(
164        self, beta, group_index, X, Y, y, row_index=None, type_loss="logit", **kwargs
165    ):
166        res = {
167            "logit": self.logit_loss,
168            "expit": self.expit_erf_loss,
169            "erf": self.expit_erf_loss,
170        }
171
172        if row_index is None:
173            row_index = range(len(y))
174            XB = self.compute_XB(
175                X,
176                beta=np.reshape(beta, (X.shape[1], self.n_classes), order="F"),
177            )
178
179            return res[type_loss](Y, row_index, XB) + self.compute_penalty(
180                group_index=group_index, beta=beta
181            )
182
183        XB = self.compute_XB(
184            X,
185            beta=np.reshape(beta, (X.shape[1], self.n_classes), order="F"),
186            row_index=row_index,
187        )
188
189        return res[type_loss](Y, row_index, XB) + self.compute_penalty(
190            group_index=group_index, beta=beta
191        )
192
193    def fit(self, X, y, **kwargs):
194        """Fit GLM model to training data (X, y).
195
196        Args:
197
198            X: {array-like}, shape = [n_samples, n_features]
199                Training vectors, where n_samples is the number
200                of samples and n_features is the number of features.
201
202            y: array-like, shape = [n_samples]
203                Target values.
204
205            **kwargs: additional parameters to be passed to
206                    self.cook_training_set or self.obj.fit
207
208        Returns:
209
210            self: object
211
212        """
213
214        assert mx.is_factor(
215            y
216        ), "y must contain only integers"  # change is_factor and subsampling everywhere
217
218        self.classes_ = np.unique(y)  # for compatibility with sklearn
219        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
220
221        self.beta_ = None
222
223        n, p = X.shape
224
225        self.group_index = n * X.shape[1]
226
227        self.n_classes = len(np.unique(y))
228
229        output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
230
231        # Y = mo.one_hot_encode2(output_y, self.n_classes)
232        Y = self.optimizer.one_hot_encode(output_y, self.n_classes)
233
234        # initialization
235        beta_ = np.linalg.lstsq(scaled_Z, Y, rcond=None)[0]
236
237        # optimization
238        # fit(self, loss_func, response, x0, **kwargs):
239        # loss_func(self, beta, group_index, X, y,
240        #          row_index=None, type_loss="gaussian",
241        #          **kwargs)
242        self.optimizer.fit(
243            self.loss_func,
244            response=y,
245            x0=beta_.flatten(order="F"),
246            group_index=self.group_index,
247            X=scaled_Z,
248            Y=Y,
249            y=y,
250            type_loss=self.family,
251        )
252
253        self.beta_ = self.optimizer.results[0]
254        self.classes_ = np.unique(y)
255
256        return self
257
258    def predict(self, X, **kwargs):
259        """Predict test data X.
260
261        Args:
262
263            X: {array-like}, shape = [n_samples, n_features]
264                Training vectors, where n_samples is the number
265                of samples and n_features is the number of features.
266
267            **kwargs: additional parameters to be passed to
268                    self.cook_test_set
269
270        Returns:
271
272            model predictions: {array-like}
273
274        """
275
276        return np.argmax(self.predict_proba(X, **kwargs), axis=1)
277
278    def predict_proba(self, X, **kwargs):
279        """Predict probabilities for test data X.
280
281        Args:
282
283            X: {array-like}, shape = [n_samples, n_features]
284                Training vectors, where n_samples is the number
285                of samples and n_features is the number of features.
286
287            **kwargs: additional parameters to be passed to
288                    self.cook_test_set
289
290        Returns:
291
292            probability estimates for test data: {array-like}
293
294        """
295        if len(X.shape) == 1:
296            n_features = X.shape[0]
297            new_X = mo.rbind(
298                X.reshape(1, n_features),
299                np.ones(n_features).reshape(1, n_features),
300            )
301
302            Z = self.cook_test_set(new_X, **kwargs)
303
304        else:
305            Z = self.cook_test_set(X, **kwargs)
306
307        ZB = mo.safe_sparse_dot(
308            Z,
309            self.beta_.reshape(
310                self.n_classes,
311                X.shape[1] + self.n_hidden_features + self.n_clusters,
312            ).T,
313        )
314
315        if self.family == "logit":
316            exp_ZB = np.exp(ZB)
317
318            return exp_ZB / exp_ZB.sum(axis=1)[:, None]
319
320        if self.family == "expit":
321            exp_ZB = expit(ZB)
322
323            return exp_ZB / exp_ZB.sum(axis=1)[:, None]
324
325        if self.family == "erf":
326            exp_ZB = 0.5 * (1 + erf(ZB))
327
328            return exp_ZB / exp_ZB.sum(axis=1)[:, None]
329
330    def score(self, X, y, scoring=None):
331        """Scoring function for classification.
332
333        Args:
334
335            X: {array-like}, shape = [n_samples, n_features]
336                Training vectors, where n_samples is the number
337                of samples and n_features is the number of features.
338
339            y: array-like, shape = [n_samples]
340                Target values.
341
342            scoring: str
343                scoring method (default is accuracy)
344
345        Returns:
346
347            score: float
348        """
349
350        if scoring is None:
351            scoring = "accuracy"
352
353        if scoring == "accuracy":
354            return skm2.accuracy_score(y, self.predict(X))
355
356        if scoring == "f1":
357            return skm2.f1_score(y, self.predict(X))
358
359        if scoring == "precision":
360            return skm2.precision_score(y, self.predict(X))
361
362        if scoring == "recall":
363            return skm2.recall_score(y, self.predict(X))
364
365        if scoring == "roc_auc":
366            return skm2.roc_auc_score(y, self.predict(X))
367
368        if scoring == "log_loss":
369            return skm2.log_loss(y, self.predict_proba(X))
370
371        if scoring == "balanced_accuracy":
372            return skm2.balanced_accuracy_score(y, self.predict(X))
373
374        if scoring == "average_precision":
375            return skm2.average_precision_score(y, self.predict(X))
376
377        if scoring == "neg_brier_score":
378            return -skm2.brier_score_loss(y, self.predict_proba(X))
379
380        if scoring == "neg_log_loss":
381            return -skm2.log_loss(y, self.predict_proba(X))
382
383    @property
384    def _estimator_type(self):
385        return "classifier"            

Generalized 'linear' models using quasi-randomized networks (classification)

Parameters:

n_hidden_features: int
    number of nodes in the hidden layer

lambda1: float
    regularization parameter for GLM coefficients on original features

alpha1: float
    controls compromize between l1 and l2 norm of GLM coefficients on original features

lambda2: float
    regularization parameter for GLM coefficients on nonlinear features

alpha2: float
    controls compromize between l1 and l2 norm of GLM coefficients on nonlinear features

activation_name: str
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'

a: float
    hyperparameter for 'prelu' or 'elu' activation function

nodes_sim: str
    type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
    'uniform'

bias: boolean
    indicates if the hidden layer contains a bias term (True) or not
    (False)

dropout: float
    regularization parameter; (random) percentage of nodes dropped out
    of the training

direct_link: boolean
    indicates if the original predictors are included (True) in model's
    fitting or not (False)

n_clusters: int
    number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
        no clustering)

cluster_encode: bool
    defines how the variable containing clusters is treated (default is one-hot)
    if `False`, then labels are used, without one-hot encoding

type_clust: str
    type of clustering method: currently k-means ('kmeans') or Gaussian
    Mixture Model ('gmm')

type_scaling: a tuple of 3 strings
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax')

optimizer: object
    optimizer, from class nnetsauce.Optimizer

seed: int
    reproducibility seed for nodes_sim=='uniform'

Attributes:

beta_: vector
    regression coefficients

Examples:

See https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_classification.py

def fit(self, X, y, **kwargs):
193    def fit(self, X, y, **kwargs):
194        """Fit GLM model to training data (X, y).
195
196        Args:
197
198            X: {array-like}, shape = [n_samples, n_features]
199                Training vectors, where n_samples is the number
200                of samples and n_features is the number of features.
201
202            y: array-like, shape = [n_samples]
203                Target values.
204
205            **kwargs: additional parameters to be passed to
206                    self.cook_training_set or self.obj.fit
207
208        Returns:
209
210            self: object
211
212        """
213
214        assert mx.is_factor(
215            y
216        ), "y must contain only integers"  # change is_factor and subsampling everywhere
217
218        self.classes_ = np.unique(y)  # for compatibility with sklearn
219        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
220
221        self.beta_ = None
222
223        n, p = X.shape
224
225        self.group_index = n * X.shape[1]
226
227        self.n_classes = len(np.unique(y))
228
229        output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
230
231        # Y = mo.one_hot_encode2(output_y, self.n_classes)
232        Y = self.optimizer.one_hot_encode(output_y, self.n_classes)
233
234        # initialization
235        beta_ = np.linalg.lstsq(scaled_Z, Y, rcond=None)[0]
236
237        # optimization
238        # fit(self, loss_func, response, x0, **kwargs):
239        # loss_func(self, beta, group_index, X, y,
240        #          row_index=None, type_loss="gaussian",
241        #          **kwargs)
242        self.optimizer.fit(
243            self.loss_func,
244            response=y,
245            x0=beta_.flatten(order="F"),
246            group_index=self.group_index,
247            X=scaled_Z,
248            Y=Y,
249            y=y,
250            type_loss=self.family,
251        )
252
253        self.beta_ = self.optimizer.results[0]
254        self.classes_ = np.unique(y)
255
256        return self

Fit GLM model to training data (X, y).

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

**kwargs: additional parameters to be passed to
        self.cook_training_set or self.obj.fit

Returns:

self: object
def predict(self, X, **kwargs):
258    def predict(self, X, **kwargs):
259        """Predict test data X.
260
261        Args:
262
263            X: {array-like}, shape = [n_samples, n_features]
264                Training vectors, where n_samples is the number
265                of samples and n_features is the number of features.
266
267            **kwargs: additional parameters to be passed to
268                    self.cook_test_set
269
270        Returns:
271
272            model predictions: {array-like}
273
274        """
275
276        return np.argmax(self.predict_proba(X, **kwargs), axis=1)

Predict test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

model predictions: {array-like}
def predict_proba(self, X, **kwargs):
278    def predict_proba(self, X, **kwargs):
279        """Predict probabilities for test data X.
280
281        Args:
282
283            X: {array-like}, shape = [n_samples, n_features]
284                Training vectors, where n_samples is the number
285                of samples and n_features is the number of features.
286
287            **kwargs: additional parameters to be passed to
288                    self.cook_test_set
289
290        Returns:
291
292            probability estimates for test data: {array-like}
293
294        """
295        if len(X.shape) == 1:
296            n_features = X.shape[0]
297            new_X = mo.rbind(
298                X.reshape(1, n_features),
299                np.ones(n_features).reshape(1, n_features),
300            )
301
302            Z = self.cook_test_set(new_X, **kwargs)
303
304        else:
305            Z = self.cook_test_set(X, **kwargs)
306
307        ZB = mo.safe_sparse_dot(
308            Z,
309            self.beta_.reshape(
310                self.n_classes,
311                X.shape[1] + self.n_hidden_features + self.n_clusters,
312            ).T,
313        )
314
315        if self.family == "logit":
316            exp_ZB = np.exp(ZB)
317
318            return exp_ZB / exp_ZB.sum(axis=1)[:, None]
319
320        if self.family == "expit":
321            exp_ZB = expit(ZB)
322
323            return exp_ZB / exp_ZB.sum(axis=1)[:, None]
324
325        if self.family == "erf":
326            exp_ZB = 0.5 * (1 + erf(ZB))
327
328            return exp_ZB / exp_ZB.sum(axis=1)[:, None]

Predict probabilities for test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

probability estimates for test data: {array-like}
def score(self, X, y, scoring=None):
330    def score(self, X, y, scoring=None):
331        """Scoring function for classification.
332
333        Args:
334
335            X: {array-like}, shape = [n_samples, n_features]
336                Training vectors, where n_samples is the number
337                of samples and n_features is the number of features.
338
339            y: array-like, shape = [n_samples]
340                Target values.
341
342            scoring: str
343                scoring method (default is accuracy)
344
345        Returns:
346
347            score: float
348        """
349
350        if scoring is None:
351            scoring = "accuracy"
352
353        if scoring == "accuracy":
354            return skm2.accuracy_score(y, self.predict(X))
355
356        if scoring == "f1":
357            return skm2.f1_score(y, self.predict(X))
358
359        if scoring == "precision":
360            return skm2.precision_score(y, self.predict(X))
361
362        if scoring == "recall":
363            return skm2.recall_score(y, self.predict(X))
364
365        if scoring == "roc_auc":
366            return skm2.roc_auc_score(y, self.predict(X))
367
368        if scoring == "log_loss":
369            return skm2.log_loss(y, self.predict_proba(X))
370
371        if scoring == "balanced_accuracy":
372            return skm2.balanced_accuracy_score(y, self.predict(X))
373
374        if scoring == "average_precision":
375            return skm2.average_precision_score(y, self.predict(X))
376
377        if scoring == "neg_brier_score":
378            return -skm2.brier_score_loss(y, self.predict_proba(X))
379
380        if scoring == "neg_log_loss":
381            return -skm2.log_loss(y, self.predict_proba(X))

Scoring function for classification.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

scoring: str
    scoring method (default is accuracy)

Returns:

score: float
class GLMRegressor(nnetsauce.glm.glm.GLM, sklearn.base.RegressorMixin):
 14class GLMRegressor(GLM, RegressorMixin):
 15    """Generalized 'linear' models using quasi-randomized networks (regression)
 16
 17    Attributes:
 18
 19        n_hidden_features: int
 20            number of nodes in the hidden layer
 21
 22        lambda1: float
 23            regularization parameter for GLM coefficients on original features
 24
 25        alpha1: float
 26            controls compromize between l1 and l2 norm of GLM coefficients on original features
 27
 28        lambda2: float
 29            regularization parameter for GLM coefficients on nonlinear features
 30
 31        alpha2: float
 32            controls compromize between l1 and l2 norm of GLM coefficients on nonlinear features
 33
 34        family: str
 35            "gaussian", "laplace" or "poisson" (for now)
 36
 37        activation_name: str
 38            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
 39
 40        a: float
 41            hyperparameter for 'prelu' or 'elu' activation function
 42
 43        nodes_sim: str
 44            type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
 45            'uniform'
 46
 47        bias: boolean
 48            indicates if the hidden layer contains a bias term (True) or not
 49            (False)
 50
 51        dropout: float
 52            regularization parameter; (random) percentage of nodes dropped out
 53            of the training
 54
 55        direct_link: boolean
 56            indicates if the original predictors are included (True) in model's
 57            fitting or not (False)
 58
 59        n_clusters: int
 60            number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
 61                no clustering)
 62
 63        cluster_encode: bool
 64            defines how the variable containing clusters is treated (default is one-hot)
 65            if `False`, then labels are used, without one-hot encoding
 66
 67        type_clust: str
 68            type of clustering method: currently k-means ('kmeans') or Gaussian
 69            Mixture Model ('gmm')
 70
 71        type_scaling: a tuple of 3 strings
 72            scaling methods for inputs, hidden layer, and clustering respectively
 73            (and when relevant).
 74            Currently available: standardization ('std') or MinMax scaling ('minmax')
 75
 76        optimizer: object
 77            optimizer, from class nnetsauce.utils.Optimizer
 78
 79        seed: int
 80            reproducibility seed for nodes_sim=='uniform'
 81
 82    Attributes:
 83
 84        beta_: vector
 85            regression coefficients
 86
 87    Examples:
 88
 89    See [https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_regression.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_regression.py)
 90
 91    """
 92
 93    # construct the object -----
 94
 95    def __init__(
 96        self,
 97        n_hidden_features=5,
 98        lambda1=0.01,
 99        alpha1=0.5,
100        lambda2=0.01,
101        alpha2=0.5,
102        family="gaussian",
103        activation_name="relu",
104        a=0.01,
105        nodes_sim="sobol",
106        bias=True,
107        dropout=0,
108        direct_link=True,
109        n_clusters=2,
110        cluster_encode=True,
111        type_clust="kmeans",
112        type_scaling=("std", "std", "std"),
113        optimizer=Optimizer(),
114        seed=123,
115    ):
116        super().__init__(
117            n_hidden_features=n_hidden_features,
118            lambda1=lambda1,
119            alpha1=alpha1,
120            lambda2=lambda2,
121            alpha2=alpha2,
122            activation_name=activation_name,
123            a=a,
124            nodes_sim=nodes_sim,
125            bias=bias,
126            dropout=dropout,
127            direct_link=direct_link,
128            n_clusters=n_clusters,
129            cluster_encode=cluster_encode,
130            type_clust=type_clust,
131            type_scaling=type_scaling,
132            optimizer=optimizer,
133            seed=seed,
134        )
135
136        self.family = family
137
138    def gaussian_loss(self, y, row_index, XB):
139        return 0.5 * np.mean(np.square(y[row_index] - XB))
140
141    def laplace_loss(self, y, row_index, XB):
142        return 0.5 * np.mean(np.abs(y[row_index] - XB))
143
144    def poisson_loss(self, y, row_index, XB):
145        return -np.mean(y[row_index] * XB - np.exp(XB))
146
147    def loss_func(
148        self, beta, group_index, X, y, row_index=None, type_loss="gaussian", **kwargs
149    ):
150        res = {
151            "gaussian": self.gaussian_loss,
152            "laplace": self.laplace_loss,
153            "poisson": self.poisson_loss,
154        }
155
156        if row_index is None:
157            row_index = range(len(y))
158            XB = self.compute_XB(X, beta=beta)
159
160            return res[type_loss](y, row_index, XB) + self.compute_penalty(
161                group_index=group_index, beta=beta
162            )
163
164        XB = self.compute_XB(X, beta=beta, row_index=row_index)
165
166        return res[type_loss](y, row_index, XB) + self.compute_penalty(
167            group_index=group_index, beta=beta
168        )
169
170    def fit(self, X, y, **kwargs):
171        """Fit GLM model to training data (X, y).
172
173        Args:
174
175            X: {array-like}, shape = [n_samples, n_features]
176                Training vectors, where n_samples is the number
177                of samples and n_features is the number of features.
178
179            y: array-like, shape = [n_samples]
180                Target values.
181
182            **kwargs: additional parameters to be passed to
183                    self.cook_training_set or self.obj.fit
184
185        Returns:
186
187            self: object
188
189        """
190
191        self.beta_ = None
192
193        self.n_iter = 0
194
195        n, self.group_index = X.shape
196
197        centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
198
199        n_Z = scaled_Z.shape[0]
200
201        # initialization
202        beta_ = np.linalg.lstsq(scaled_Z, centered_y, rcond=None)[0]
203
204        # optimization
205        # fit(self, loss_func, response, x0, **kwargs):
206        # loss_func(self, beta, group_index, X, y,
207        #          row_index=None, type_loss="gaussian",
208        #          **kwargs)
209        self.optimizer.fit(
210            self.loss_func,
211            response=centered_y,
212            x0=beta_,
213            group_index=self.group_index,
214            X=scaled_Z,
215            y=centered_y,
216            type_loss=self.family,
217            **kwargs
218        )
219
220        self.beta_ = self.optimizer.results[0]
221
222        return self
223
224    def predict(self, X, **kwargs):
225        """Predict test data X.
226
227        Args:
228
229            X: {array-like}, shape = [n_samples, n_features]
230                Training vectors, where n_samples is the number
231                of samples and n_features is the number of features.
232
233            **kwargs: additional parameters to be passed to
234                    self.cook_test_set
235
236        Returns:
237
238            model predictions: {array-like}
239
240        """
241
242        if len(X.shape) == 1:
243            n_features = X.shape[0]
244            new_X = mo.rbind(
245                X.reshape(1, n_features),
246                np.ones(n_features).reshape(1, n_features),
247            )
248
249            return (
250                self.y_mean_ + np.dot(self.cook_test_set(new_X, **kwargs), self.beta_)
251            )[0]
252
253        return self.y_mean_ + np.dot(self.cook_test_set(X, **kwargs), self.beta_)
254
255    def score(self, X, y, scoring=None):
256        """Compute the score of the model.
257
258        Parameters:
259
260            X: {array-like}, shape = [n_samples, n_features]
261                Training vectors, where n_samples is the number
262                of samples and n_features is the number of features.
263
264            y: array-like, shape = [n_samples]
265                Target values.
266
267            scoring: str
268                scoring method
269
270        Returns:
271
272            score: float
273
274        """
275
276        if scoring is None:
277            return np.sqrt(np.mean((self.predict(X) - y) ** 2))
278
279        return skm2.get_scorer(scoring)(self, X, y)

Generalized 'linear' models using quasi-randomized networks (regression)

Attributes:

n_hidden_features: int
    number of nodes in the hidden layer

lambda1: float
    regularization parameter for GLM coefficients on original features

alpha1: float
    controls compromize between l1 and l2 norm of GLM coefficients on original features

lambda2: float
    regularization parameter for GLM coefficients on nonlinear features

alpha2: float
    controls compromize between l1 and l2 norm of GLM coefficients on nonlinear features

family: str
    "gaussian", "laplace" or "poisson" (for now)

activation_name: str
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'

a: float
    hyperparameter for 'prelu' or 'elu' activation function

nodes_sim: str
    type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
    'uniform'

bias: boolean
    indicates if the hidden layer contains a bias term (True) or not
    (False)

dropout: float
    regularization parameter; (random) percentage of nodes dropped out
    of the training

direct_link: boolean
    indicates if the original predictors are included (True) in model's
    fitting or not (False)

n_clusters: int
    number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
        no clustering)

cluster_encode: bool
    defines how the variable containing clusters is treated (default is one-hot)
    if `False`, then labels are used, without one-hot encoding

type_clust: str
    type of clustering method: currently k-means ('kmeans') or Gaussian
    Mixture Model ('gmm')

type_scaling: a tuple of 3 strings
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax')

optimizer: object
    optimizer, from class nnetsauce.utils.Optimizer

seed: int
    reproducibility seed for nodes_sim=='uniform'

Attributes:

beta_: vector
    regression coefficients

Examples:

See https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_regression.py

def fit(self, X, y, **kwargs):
170    def fit(self, X, y, **kwargs):
171        """Fit GLM model to training data (X, y).
172
173        Args:
174
175            X: {array-like}, shape = [n_samples, n_features]
176                Training vectors, where n_samples is the number
177                of samples and n_features is the number of features.
178
179            y: array-like, shape = [n_samples]
180                Target values.
181
182            **kwargs: additional parameters to be passed to
183                    self.cook_training_set or self.obj.fit
184
185        Returns:
186
187            self: object
188
189        """
190
191        self.beta_ = None
192
193        self.n_iter = 0
194
195        n, self.group_index = X.shape
196
197        centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
198
199        n_Z = scaled_Z.shape[0]
200
201        # initialization
202        beta_ = np.linalg.lstsq(scaled_Z, centered_y, rcond=None)[0]
203
204        # optimization
205        # fit(self, loss_func, response, x0, **kwargs):
206        # loss_func(self, beta, group_index, X, y,
207        #          row_index=None, type_loss="gaussian",
208        #          **kwargs)
209        self.optimizer.fit(
210            self.loss_func,
211            response=centered_y,
212            x0=beta_,
213            group_index=self.group_index,
214            X=scaled_Z,
215            y=centered_y,
216            type_loss=self.family,
217            **kwargs
218        )
219
220        self.beta_ = self.optimizer.results[0]
221
222        return self

Fit GLM model to training data (X, y).

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

**kwargs: additional parameters to be passed to
        self.cook_training_set or self.obj.fit

Returns:

self: object
def predict(self, X, **kwargs):
224    def predict(self, X, **kwargs):
225        """Predict test data X.
226
227        Args:
228
229            X: {array-like}, shape = [n_samples, n_features]
230                Training vectors, where n_samples is the number
231                of samples and n_features is the number of features.
232
233            **kwargs: additional parameters to be passed to
234                    self.cook_test_set
235
236        Returns:
237
238            model predictions: {array-like}
239
240        """
241
242        if len(X.shape) == 1:
243            n_features = X.shape[0]
244            new_X = mo.rbind(
245                X.reshape(1, n_features),
246                np.ones(n_features).reshape(1, n_features),
247            )
248
249            return (
250                self.y_mean_ + np.dot(self.cook_test_set(new_X, **kwargs), self.beta_)
251            )[0]
252
253        return self.y_mean_ + np.dot(self.cook_test_set(X, **kwargs), self.beta_)

Predict test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

model predictions: {array-like}
def score(self, X, y, scoring=None):
255    def score(self, X, y, scoring=None):
256        """Compute the score of the model.
257
258        Parameters:
259
260            X: {array-like}, shape = [n_samples, n_features]
261                Training vectors, where n_samples is the number
262                of samples and n_features is the number of features.
263
264            y: array-like, shape = [n_samples]
265                Target values.
266
267            scoring: str
268                scoring method
269
270        Returns:
271
272            score: float
273
274        """
275
276        if scoring is None:
277            return np.sqrt(np.mean((self.predict(X) - y) ** 2))
278
279        return skm2.get_scorer(scoring)(self, X, y)

Compute the score of the model.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

scoring: str
    scoring method

Returns:

score: float
class KernelRidge(sklearn.base.BaseEstimator, sklearn.base.RegressorMixin):
 18class KernelRidge(BaseEstimator, RegressorMixin):
 19    """
 20    Kernel Ridge Regression with optional GPU support, Matérn kernels, and automatic input standardization.
 21
 22    Parameters:
 23    - alpha: float
 24        Regularization parameter.
 25    - kernel: str
 26        Kernel type ("linear", "rbf", or "matern").
 27    - gamma: float
 28        Kernel coefficient for "rbf". Ignored for other kernels.
 29    - nu: float
 30        Smoothness parameter for the Matérn kernel. Default is 1.5.
 31    - length_scale: float
 32        Length scale parameter for the Matérn kernel. Default is 1.0.
 33    - backend: str
 34        "cpu" or "gpu" (uses JAX if "gpu").
 35    """
 36
 37    def __init__(
 38        self,
 39        alpha=1.0,
 40        kernel="rbf",
 41        gamma=None,
 42        nu=1.5,
 43        length_scale=1.0,
 44        backend="cpu",
 45    ):
 46        self.alpha = alpha
 47        self.alpha_ = alpha
 48        self.kernel = kernel
 49        self.gamma = gamma
 50        self.nu = nu
 51        self.length_scale = length_scale
 52        self.backend = backend
 53        self.scaler = StandardScaler()
 54
 55        if backend == "gpu" and not JAX_AVAILABLE:
 56            raise ImportError(
 57                "JAX is not installed. Please install JAX to use the GPU backend."
 58            )
 59
 60    def _linear_kernel(self, X, Y):
 61        return jnp.dot(X, Y.T) if self.backend == "gpu" else np.dot(X, Y.T)
 62
 63    def _rbf_kernel(self, X, Y):
 64        if self.gamma is None:
 65            self.gamma = 1.0 / X.shape[1]
 66        if self.backend == "gpu":
 67            sq_dists = (
 68                jnp.sum(X**2, axis=1)[:, None]
 69                + jnp.sum(Y**2, axis=1)
 70                - 2 * jnp.dot(X, Y.T)
 71            )
 72            return jnp.exp(-self.gamma * sq_dists)
 73        else:
 74            sq_dists = (
 75                np.sum(X**2, axis=1)[:, None]
 76                + np.sum(Y**2, axis=1)
 77                - 2 * np.dot(X, Y.T)
 78            )
 79            return np.exp(-self.gamma * sq_dists)
 80
 81    def _matern_kernel(self, X, Y):
 82        """
 83        Compute the Matérn kernel using JAX for GPU or NumPy for CPU.
 84
 85        Parameters:
 86        - X: array-like, shape (n_samples_X, n_features)
 87        - Y: array-like, shape (n_samples_Y, n_features)
 88
 89        Returns:
 90        - Kernel matrix, shape (n_samples_X, n_samples_Y)
 91        """
 92        if self.backend == "gpu":
 93            # Compute pairwise distances
 94            dists = jnp.sqrt(jnp.sum((X[:, None, :] - Y[None, :, :]) ** 2, axis=2))
 95            scaled_dists = jnp.sqrt(2 * self.nu) * dists / self.length_scale
 96
 97            # Matérn kernel formula
 98            coeff = (2 ** (1 - self.nu)) / jnp.exp(gammaln(self.nu))
 99            matern_kernel = coeff * (scaled_dists**self.nu) * kv(self.nu, scaled_dists)
100            matern_kernel = jnp.where(
101                dists == 0, 1.0, matern_kernel
102            )  # Handle the case where distance is 0
103            return matern_kernel
104        else:
105            # Use NumPy for CPU
106            from scipy.special import (
107                gammaln,
108                kv,
109            )  # Ensure scipy.special is used for CPU
110
111            dists = np.sqrt(np.sum((X[:, None, :] - Y[None, :, :]) ** 2, axis=2))
112            scaled_dists = np.sqrt(2 * self.nu) * dists / self.length_scale
113
114            # Matérn kernel formula
115            coeff = (2 ** (1 - self.nu)) / np.exp(gammaln(self.nu))
116            matern_kernel = coeff * (scaled_dists**self.nu) * kv(self.nu, scaled_dists)
117            matern_kernel = np.where(
118                dists == 0, 1.0, matern_kernel
119            )  # Handle the case where distance is 0
120            return matern_kernel
121
122    def _get_kernel(self, X, Y):
123        if self.kernel == "linear":
124            return self._linear_kernel(X, Y)
125        elif self.kernel == "rbf":
126            return self._rbf_kernel(X, Y)
127        elif self.kernel == "matern":
128            return self._matern_kernel(X, Y)
129        else:
130            raise ValueError(f"Unsupported kernel: {self.kernel}")
131
132    def fit(self, X, y):
133        """
134        Fit the Kernel Ridge Regression model.
135
136        Parameters:
137        - X: array-like, shape (n_samples, n_features)
138            Training data.
139        - y: array-like, shape (n_samples,)
140            Target values.
141        """
142        # Standardize the inputs
143        X = self.scaler.fit_transform(X)
144        self.X_fit_ = X
145
146        # Center the response
147        self.y_mean_ = np.mean(y)
148        y_centered = y - self.y_mean_
149
150        n_samples = X.shape[0]
151
152        # Compute the kernel matrix
153        K = self._get_kernel(X, X)
154        self.K_ = K
155        self.y_fit_ = y_centered
156
157        if isinstance(self.alpha, (list, np.ndarray)):
158            # If alpha is a list or array, compute LOOE for each alpha
159            self.alphas_ = self.alpha  # Store the list of alphas
160            self.dual_coefs_ = []  # Store dual coefficients for each alpha
161            self.looe_ = []  # Store LOOE for each alpha
162
163            for alpha in self.alpha:
164                G = K + alpha * np.eye(n_samples)
165                G_inv = np.linalg.inv(G)
166                diag_G_inv = np.diag(G_inv)
167                dual_coef = np.linalg.solve(G, y_centered)
168                looe = np.sum((dual_coef / diag_G_inv) ** 2)  # Compute LOOE
169                self.dual_coefs_.append(dual_coef)
170                self.looe_.append(looe)
171
172            # Select the best alpha based on the smallest LOOE
173            best_index = np.argmin(self.looe_)
174            self.alpha_ = self.alpha[best_index]
175            self.dual_coef_ = self.dual_coefs_[best_index]
176        else:
177            # If alpha is a single value, proceed as usual
178            if self.backend == "gpu":
179                self.dual_coef_ = jnp.linalg.solve(
180                    K + self.alpha * jnp.eye(n_samples), y_centered
181                )
182            else:
183                self.dual_coef_ = np.linalg.solve(
184                    K + self.alpha * np.eye(n_samples), y_centered
185                )
186
187        return self
188
189    def predict(self, X, probs=False):
190        """
191        Predict using the Kernel Ridge Regression model.
192
193        Parameters:
194        - X: array-like, shape (n_samples, n_features)
195            Test data.
196
197        Returns:
198        - Predicted values, shape (n_samples,).
199        """
200        # Standardize the inputs
201        X = self.scaler.transform(X)
202        K = self._get_kernel(X, self.X_fit_)
203        if self.backend == "gpu":
204            preds = jnp.dot(K, self.dual_coef_) + self.y_mean_
205            if probs:
206                # Compute similarity to self.X_fit_
207                similarities = jnp.dot(
208                    preds, self.X_fit_.T
209                )  # Shape: (n_samples, n_fit_)
210                # Apply softmax to get probabilities
211                return jaxsoftmax(similarities, axis=1)
212            return preds
213        else:
214            preds = np.dot(K, self.dual_coef_) + self.y_mean_
215            if probs:
216                # Compute similarity to self.X_fit_
217                similarities = np.dot(
218                    preds, self.X_fit_.T
219                )  # Shape: (n_samples, n_fit_)
220                # Apply softmax to get probabilities
221                return softmax(similarities, axis=1)
222            return preds
223
224    def partial_fit(self, X, y):
225        """
226        Incrementally fit the Kernel Ridge Regression model with new data using a recursive approach.
227
228        Parameters:
229        - X: array-like, shape (n_samples, n_features)
230            New training data.
231        - y: array-like, shape (n_samples,)
232            New target values.
233
234        Returns:
235        - self: object
236            The updated model.
237        """
238        # Standardize the inputs
239        X = (
240            self.scaler.fit_transform(X)
241            if not hasattr(self, "X_fit_")
242            else self.scaler.transform(X)
243        )
244
245        if not hasattr(self, "X_fit_"):
246            # Initialize with the first batch of data
247            self.X_fit_ = X
248
249            # Center the response
250            self.y_mean_ = np.mean(y)
251            y_centered = y - self.y_mean_
252            self.y_fit_ = y_centered
253
254            n_samples = X.shape[0]
255
256            # Compute the kernel matrix for the initial data
257            self.K_ = self._get_kernel(X, X)
258
259            # Initialize dual coefficients for each alpha
260            if isinstance(self.alpha, (list, np.ndarray)):
261                self.dual_coefs_ = [np.zeros(n_samples) for _ in self.alpha]
262            else:
263                self.dual_coef_ = np.zeros(n_samples)
264        else:
265            # Incrementally update with new data
266            y_centered = y - self.y_mean_  # Center the new batch of responses
267            for x_new, y_new in zip(X, y_centered):
268                x_new = x_new.reshape(1, -1)  # Ensure x_new is 2D
269                k_new = self._get_kernel(self.X_fit_, x_new).flatten()
270
271                # Compute the kernel value for the new data point
272                k_self = self._get_kernel(x_new, x_new).item()
273
274                if isinstance(self.alpha, (list, np.ndarray)):
275                    # Update dual coefficients for each alpha
276                    for idx, alpha in enumerate(self.alpha):
277                        gamma_new = 1 / (k_self + alpha)
278                        residual = y_new - np.dot(self.dual_coefs_[idx], k_new)
279                        self.dual_coefs_[idx] = np.append(
280                            self.dual_coefs_[idx], gamma_new * residual
281                        )
282                else:
283                    # Update dual coefficients for a single alpha
284                    gamma_new = 1 / (k_self + self.alpha)
285                    residual = y_new - np.dot(self.dual_coef_, k_new)
286                    self.dual_coef_ = np.append(self.dual_coef_, gamma_new * residual)
287
288                # Update the kernel matrix
289                self.K_ = np.block(
290                    [[self.K_, k_new[:, None]], [k_new[None, :], np.array([[k_self]])]]
291                )
292
293                # Update the stored data
294                self.X_fit_ = np.vstack([self.X_fit_, x_new])
295                self.y_fit_ = np.append(self.y_fit_, y_new)
296
297        # Select the best alpha based on LOOE after the batch
298        if isinstance(self.alpha, (list, np.ndarray)):
299            self.looe_ = []
300            for idx, alpha in enumerate(self.alpha):
301                G = self.K_ + alpha * np.eye(self.K_.shape[0])
302                G_inv = np.linalg.inv(G)
303                diag_G_inv = np.diag(G_inv)
304                looe = np.sum((self.dual_coefs_[idx] / diag_G_inv) ** 2)
305                self.looe_.append(looe)
306
307            # Select the best alpha
308            best_index = np.argmin(self.looe_)
309            self.alpha_ = self.alpha[best_index]
310            self.dual_coef_ = self.dual_coefs_[best_index]
311
312        return self

Kernel Ridge Regression with optional GPU support, Matérn kernels, and automatic input standardization.

Parameters:

  • alpha: float Regularization parameter.
  • kernel: str Kernel type ("linear", "rbf", or "matern").
  • gamma: float Kernel coefficient for "rbf". Ignored for other kernels.
  • nu: float Smoothness parameter for the Matérn kernel. Default is 1.5.
  • length_scale: float Length scale parameter for the Matérn kernel. Default is 1.0.
  • backend: str "cpu" or "gpu" (uses JAX if "gpu").
def fit(self, X, y):
132    def fit(self, X, y):
133        """
134        Fit the Kernel Ridge Regression model.
135
136        Parameters:
137        - X: array-like, shape (n_samples, n_features)
138            Training data.
139        - y: array-like, shape (n_samples,)
140            Target values.
141        """
142        # Standardize the inputs
143        X = self.scaler.fit_transform(X)
144        self.X_fit_ = X
145
146        # Center the response
147        self.y_mean_ = np.mean(y)
148        y_centered = y - self.y_mean_
149
150        n_samples = X.shape[0]
151
152        # Compute the kernel matrix
153        K = self._get_kernel(X, X)
154        self.K_ = K
155        self.y_fit_ = y_centered
156
157        if isinstance(self.alpha, (list, np.ndarray)):
158            # If alpha is a list or array, compute LOOE for each alpha
159            self.alphas_ = self.alpha  # Store the list of alphas
160            self.dual_coefs_ = []  # Store dual coefficients for each alpha
161            self.looe_ = []  # Store LOOE for each alpha
162
163            for alpha in self.alpha:
164                G = K + alpha * np.eye(n_samples)
165                G_inv = np.linalg.inv(G)
166                diag_G_inv = np.diag(G_inv)
167                dual_coef = np.linalg.solve(G, y_centered)
168                looe = np.sum((dual_coef / diag_G_inv) ** 2)  # Compute LOOE
169                self.dual_coefs_.append(dual_coef)
170                self.looe_.append(looe)
171
172            # Select the best alpha based on the smallest LOOE
173            best_index = np.argmin(self.looe_)
174            self.alpha_ = self.alpha[best_index]
175            self.dual_coef_ = self.dual_coefs_[best_index]
176        else:
177            # If alpha is a single value, proceed as usual
178            if self.backend == "gpu":
179                self.dual_coef_ = jnp.linalg.solve(
180                    K + self.alpha * jnp.eye(n_samples), y_centered
181                )
182            else:
183                self.dual_coef_ = np.linalg.solve(
184                    K + self.alpha * np.eye(n_samples), y_centered
185                )
186
187        return self

Fit the Kernel Ridge Regression model.

Parameters:

  • X: array-like, shape (n_samples, n_features) Training data.
  • y: array-like, shape (n_samples,) Target values.
def predict(self, X, probs=False):
189    def predict(self, X, probs=False):
190        """
191        Predict using the Kernel Ridge Regression model.
192
193        Parameters:
194        - X: array-like, shape (n_samples, n_features)
195            Test data.
196
197        Returns:
198        - Predicted values, shape (n_samples,).
199        """
200        # Standardize the inputs
201        X = self.scaler.transform(X)
202        K = self._get_kernel(X, self.X_fit_)
203        if self.backend == "gpu":
204            preds = jnp.dot(K, self.dual_coef_) + self.y_mean_
205            if probs:
206                # Compute similarity to self.X_fit_
207                similarities = jnp.dot(
208                    preds, self.X_fit_.T
209                )  # Shape: (n_samples, n_fit_)
210                # Apply softmax to get probabilities
211                return jaxsoftmax(similarities, axis=1)
212            return preds
213        else:
214            preds = np.dot(K, self.dual_coef_) + self.y_mean_
215            if probs:
216                # Compute similarity to self.X_fit_
217                similarities = np.dot(
218                    preds, self.X_fit_.T
219                )  # Shape: (n_samples, n_fit_)
220                # Apply softmax to get probabilities
221                return softmax(similarities, axis=1)
222            return preds

Predict using the Kernel Ridge Regression model.

Parameters:

  • X: array-like, shape (n_samples, n_features) Test data.

Returns:

  • Predicted values, shape (n_samples,).
class LazyClassifier(nnetsauce.LazyDeepClassifier):
757class LazyClassifier(LazyDeepClassifier):
758    """
759        Fitting -- almost -- all the classification algorithms with
760        nnetsauce's CustomClassifier and returning their scores (no layers).
761
762    Parameters:
763
764        verbose: int, optional (default=0)
765            Any positive number for verbosity.
766
767        ignore_warnings: bool, optional (default=True)
768            When set to True, the warning related to algorigms that are not able to run are ignored.
769
770        custom_metric: function, optional (default=None)
771            When function is provided, models are evaluated based on the custom evaluation metric provided.
772
773        predictions: bool, optional (default=False)
774            When set to True, the predictions of all the models models are returned as dataframe.
775
776        sort_by: string, optional (default='Accuracy')
777            Sort models by a metric. Available options are 'Accuracy', 'Balanced Accuracy', 'ROC AUC', 'F1 Score'
778            or a custom metric identified by its name and provided by custom_metric.
779
780        random_state: int, optional (default=42)
781            Reproducibiility seed.
782
783        estimators: list, optional (default='all')
784            list of Estimators names or just 'all' (default='all')
785
786        preprocess: bool
787            preprocessing is done when set to True
788
789        n_jobs : int, when possible, run in parallel
790            For now, only used by individual models that support it.
791
792        All the other parameters are the same as CustomClassifier's.
793
794    Attributes:
795
796        models_: dict-object
797            Returns a dictionary with each model pipeline as value
798            with key as name of models.
799
800        best_model_: object
801            Returns the best model pipeline based on the sort_by metric.
802
803    Examples:
804
805        import nnetsauce as ns
806        import numpy as np
807        from sklearn import datasets
808        from sklearn.utils import shuffle
809
810        dataset = datasets.load_iris()
811        X = dataset.data
812        y = dataset.target
813        X, y = shuffle(X, y, random_state=123)
814        X = X.astype(np.float32)
815        y = y.astype(np.float32)
816        X_train, X_test = X[:100], X[100:]
817        y_train, y_test = y[:100], y[100:]
818
819        clf = ns.LazyClassifier(verbose=0, ignore_warnings=True, custom_metric=None)
820        models, predictions = clf.fit(X_train, X_test, y_train, y_test)
821        model_dictionary = clf.provide_models(X_train,X_test,y_train,y_test)
822        print(models)
823
824    """
825
826    def __init__(
827        self,
828        verbose=0,
829        ignore_warnings=True,
830        custom_metric=None,
831        predictions=False,
832        sort_by="Accuracy",
833        random_state=42,
834        estimators="all",
835        preprocess=False,
836        n_jobs=None,
837        # CustomClassifier attributes
838        obj=None,
839        n_hidden_features=5,
840        activation_name="relu",
841        a=0.01,
842        nodes_sim="sobol",
843        bias=True,
844        dropout=0,
845        direct_link=True,
846        n_clusters=2,
847        cluster_encode=True,
848        type_clust="kmeans",
849        type_scaling=("std", "std", "std"),
850        col_sample=1,
851        row_sample=1,
852        seed=123,
853        backend="cpu",
854    ):
855        super().__init__(
856            verbose=verbose,
857            ignore_warnings=ignore_warnings,
858            custom_metric=custom_metric,
859            predictions=predictions,
860            sort_by=sort_by,
861            random_state=random_state,
862            estimators=estimators,
863            preprocess=preprocess,
864            n_jobs=n_jobs,
865            n_layers=1,
866            obj=obj,
867            n_hidden_features=n_hidden_features,
868            activation_name=activation_name,
869            a=a,
870            nodes_sim=nodes_sim,
871            bias=bias,
872            dropout=dropout,
873            direct_link=direct_link,
874            n_clusters=n_clusters,
875            cluster_encode=cluster_encode,
876            type_clust=type_clust,
877            type_scaling=type_scaling,
878            col_sample=col_sample,
879            row_sample=row_sample,
880            seed=seed,
881            backend=backend,
882        )

Fitting -- almost -- all the classification algorithms with nnetsauce's CustomClassifier and returning their scores (no layers).

Parameters:

verbose: int, optional (default=0)
    Any positive number for verbosity.

ignore_warnings: bool, optional (default=True)
    When set to True, the warning related to algorigms that are not able to run are ignored.

custom_metric: function, optional (default=None)
    When function is provided, models are evaluated based on the custom evaluation metric provided.

predictions: bool, optional (default=False)
    When set to True, the predictions of all the models models are returned as dataframe.

sort_by: string, optional (default='Accuracy')
    Sort models by a metric. Available options are 'Accuracy', 'Balanced Accuracy', 'ROC AUC', 'F1 Score'
    or a custom metric identified by its name and provided by custom_metric.

random_state: int, optional (default=42)
    Reproducibiility seed.

estimators: list, optional (default='all')
    list of Estimators names or just 'all' (default='all')

preprocess: bool
    preprocessing is done when set to True

n_jobs : int, when possible, run in parallel
    For now, only used by individual models that support it.

All the other parameters are the same as CustomClassifier's.

Attributes:

models_: dict-object
    Returns a dictionary with each model pipeline as value
    with key as name of models.

best_model_: object
    Returns the best model pipeline based on the sort_by metric.

Examples:

import nnetsauce as ns
import numpy as np
from sklearn import datasets
from sklearn.utils import shuffle

dataset = datasets.load_iris()
X = dataset.data
y = dataset.target
X, y = shuffle(X, y, random_state=123)
X = X.astype(np.float32)
y = y.astype(np.float32)
X_train, X_test = X[:100], X[100:]
y_train, y_test = y[:100], y[100:]

clf = ns.LazyClassifier(verbose=0, ignore_warnings=True, custom_metric=None)
models, predictions = clf.fit(X_train, X_test, y_train, y_test)
model_dictionary = clf.provide_models(X_train,X_test,y_train,y_test)
print(models)
class LazyRegressor(nnetsauce.LazyDeepRegressor):
646class LazyRegressor(LazyDeepRegressor):
647    """
648        Fitting -- almost -- all the regression algorithms with
649        nnetsauce's CustomRegressor and returning their scores.
650
651    Parameters:
652
653        verbose: int, optional (default=0)
654            Any positive number for verbosity.
655
656        ignore_warnings: bool, optional (default=True)
657            When set to True, the warning related to algorigms that are not able to run are ignored.
658
659        custom_metric: function, optional (default=None)
660            When function is provided, models are evaluated based on the custom evaluation metric provided.
661
662        predictions: bool, optional (default=False)
663            When set to True, the predictions of all the models models are returned as dataframe.
664
665        sort_by: string, optional (default='RMSE')
666            Sort models by a metric. Available options are 'R-Squared', 'Adjusted R-Squared', 'RMSE', 'Time Taken' and 'Custom Metric'.
667            or a custom metric identified by its name and provided by custom_metric.
668
669        random_state: int, optional (default=42)
670            Reproducibiility seed.
671
672        estimators: list, optional (default='all')
673            list of Estimators names or just 'all' (default='all')
674
675        preprocess: bool
676            preprocessing is done when set to True
677
678        n_jobs : int, when possible, run in parallel
679            For now, only used by individual models that support it.
680
681        All the other parameters are the same as CustomRegressor's.
682
683    Attributes:
684
685        models_: dict-object
686            Returns a dictionary with each model pipeline as value
687            with key as name of models.
688
689        best_model_: object
690            Returns the best model pipeline based on the sort_by metric.
691
692    Examples:
693
694        import nnetsauce as ns
695        import numpy as np
696        from sklearn import datasets
697        from sklearn.utils import shuffle
698
699        diabetes = datasets.load_diabetes()
700        X, y = shuffle(diabetes.data, diabetes.target, random_state=13)
701        X = X.astype(np.float32)
702
703        offset = int(X.shape[0] * 0.9)
704        X_train, y_train = X[:offset], y[:offset]
705        X_test, y_test = X[offset:], y[offset:]
706
707        reg = ns.LazyRegressor(verbose=0, ignore_warnings=False,
708                            custom_metric=None)
709        models, predictions = reg.fit(X_train, X_test, y_train, y_test)
710        print(models)
711
712    """
713
714    def __init__(
715        self,
716        verbose=0,
717        ignore_warnings=True,
718        custom_metric=None,
719        predictions=False,
720        sort_by="RMSE",
721        random_state=42,
722        estimators="all",
723        preprocess=False,
724        n_jobs=None,
725        # CustomRegressor attributes
726        obj=None,
727        n_hidden_features=5,
728        activation_name="relu",
729        a=0.01,
730        nodes_sim="sobol",
731        bias=True,
732        dropout=0,
733        direct_link=True,
734        n_clusters=2,
735        cluster_encode=True,
736        type_clust="kmeans",
737        type_scaling=("std", "std", "std"),
738        col_sample=1,
739        row_sample=1,
740        seed=123,
741        backend="cpu",
742    ):
743        super().__init__(
744            verbose=verbose,
745            ignore_warnings=ignore_warnings,
746            custom_metric=custom_metric,
747            predictions=predictions,
748            sort_by=sort_by,
749            random_state=random_state,
750            estimators=estimators,
751            preprocess=preprocess,
752            n_jobs=n_jobs,
753            n_layers=1,
754            obj=obj,
755            n_hidden_features=n_hidden_features,
756            activation_name=activation_name,
757            a=a,
758            nodes_sim=nodes_sim,
759            bias=bias,
760            dropout=dropout,
761            direct_link=direct_link,
762            n_clusters=n_clusters,
763            cluster_encode=cluster_encode,
764            type_clust=type_clust,
765            type_scaling=type_scaling,
766            col_sample=col_sample,
767            row_sample=row_sample,
768            seed=seed,
769            backend=backend,
770        )

Fitting -- almost -- all the regression algorithms with nnetsauce's CustomRegressor and returning their scores.

Parameters:

verbose: int, optional (default=0)
    Any positive number for verbosity.

ignore_warnings: bool, optional (default=True)
    When set to True, the warning related to algorigms that are not able to run are ignored.

custom_metric: function, optional (default=None)
    When function is provided, models are evaluated based on the custom evaluation metric provided.

predictions: bool, optional (default=False)
    When set to True, the predictions of all the models models are returned as dataframe.

sort_by: string, optional (default='RMSE')
    Sort models by a metric. Available options are 'R-Squared', 'Adjusted R-Squared', 'RMSE', 'Time Taken' and 'Custom Metric'.
    or a custom metric identified by its name and provided by custom_metric.

random_state: int, optional (default=42)
    Reproducibiility seed.

estimators: list, optional (default='all')
    list of Estimators names or just 'all' (default='all')

preprocess: bool
    preprocessing is done when set to True

n_jobs : int, when possible, run in parallel
    For now, only used by individual models that support it.

All the other parameters are the same as CustomRegressor's.

Attributes:

models_: dict-object
    Returns a dictionary with each model pipeline as value
    with key as name of models.

best_model_: object
    Returns the best model pipeline based on the sort_by metric.

Examples:

import nnetsauce as ns
import numpy as np
from sklearn import datasets
from sklearn.utils import shuffle

diabetes = datasets.load_diabetes()
X, y = shuffle(diabetes.data, diabetes.target, random_state=13)
X = X.astype(np.float32)

offset = int(X.shape[0] * 0.9)
X_train, y_train = X[:offset], y[:offset]
X_test, y_test = X[offset:], y[offset:]

reg = ns.LazyRegressor(verbose=0, ignore_warnings=False,
                    custom_metric=None)
models, predictions = reg.fit(X_train, X_test, y_train, y_test)
print(models)
class LazyDeepClassifier(nnetsauce.custom.custom.Custom, sklearn.base.ClassifierMixin):
 94class LazyDeepClassifier(Custom, ClassifierMixin):
 95    """
 96
 97    Fitting -- almost -- all the classification algorithms with layers of
 98    nnetsauce's CustomClassifier and returning their scores.
 99
100    Parameters:
101
102        verbose: int, optional (default=0)
103            Any positive number for verbosity.
104
105        ignore_warnings: bool, optional (default=True)
106            When set to True, the warning related to algorigms that are not
107            able to run are ignored.
108
109        custom_metric: function, optional (default=None)
110            When function is provided, models are evaluated based on the custom
111              evaluation metric provided.
112
113        predictions: bool, optional (default=False)
114            When set to True, the predictions of all the models models are
115            returned as data frame.
116
117        sort_by: string, optional (default='Accuracy')
118            Sort models by a metric. Available options are 'Accuracy',
119            'Balanced Accuracy', 'ROC AUC', 'F1 Score' or a custom metric
120            identified by its name and provided by custom_metric.
121
122        random_state: int, optional (default=42)
123            Reproducibiility seed.
124
125        estimators: list, optional (default='all')
126            list of Estimators names or just 'all' for > 90 classifiers
127            (default='all')
128
129        preprocess: bool, preprocessing is done when set to True
130
131        n_jobs: int, when possible, run in parallel
132            For now, only used by individual models that support it.
133
134        n_layers: int, optional (default=3)
135            Number of layers of CustomClassifiers to be used.
136
137        All the other parameters are the same as CustomClassifier's.
138
139    Attributes:
140
141        models_: dict-object
142            Returns a dictionary with each model pipeline as value
143            with key as name of models.
144
145        best_model_: object
146            Returns the best model pipeline.
147
148    Examples
149
150        ```python
151        import nnetsauce as ns
152        from sklearn.datasets import load_breast_cancer
153        from sklearn.model_selection import train_test_split
154        data = load_breast_cancer()
155        X = data.data
156        y= data.target
157        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2,
158            random_state=123)
159        clf = ns.LazyDeepClassifier(verbose=0, ignore_warnings=True, custom_metric=None)
160        models, predictions = clf.fit(X_train, X_test, y_train, y_test)
161        model_dictionary = clf.provide_models(X_train,X_test,y_train,y_test)
162        print(models)
163        ```
164
165    """
166
167    def __init__(
168        self,
169        verbose=0,
170        ignore_warnings=True,
171        custom_metric=None,
172        predictions=False,
173        sort_by="Accuracy",
174        random_state=42,
175        estimators="all",
176        preprocess=False,
177        n_jobs=None,
178        # Defining depth
179        n_layers=3,
180        # CustomClassifier attributes
181        obj=None,
182        n_hidden_features=5,
183        activation_name="relu",
184        a=0.01,
185        nodes_sim="sobol",
186        bias=True,
187        dropout=0,
188        direct_link=True,
189        n_clusters=2,
190        cluster_encode=True,
191        type_clust="kmeans",
192        type_scaling=("std", "std", "std"),
193        col_sample=1,
194        row_sample=1,
195        seed=123,
196        backend="cpu",
197    ):
198        self.verbose = verbose
199        self.ignore_warnings = ignore_warnings
200        self.custom_metric = custom_metric
201        self.predictions = predictions
202        self.sort_by = sort_by
203        self.models_ = {}
204        self.best_model_ = None
205        self.random_state = random_state
206        self.estimators = estimators
207        self.preprocess = preprocess
208        self.n_layers = n_layers - 1
209        self.n_jobs = n_jobs
210        super().__init__(
211            obj=obj,
212            n_hidden_features=n_hidden_features,
213            activation_name=activation_name,
214            a=a,
215            nodes_sim=nodes_sim,
216            bias=bias,
217            dropout=dropout,
218            direct_link=direct_link,
219            n_clusters=n_clusters,
220            cluster_encode=cluster_encode,
221            type_clust=type_clust,
222            type_scaling=type_scaling,
223            col_sample=col_sample,
224            row_sample=row_sample,
225            seed=seed,
226            backend=backend,
227        )
228
229    def fit(self, X_train, X_test, y_train, y_test):
230        """Fit classifiers to X_train and y_train, predict and score on X_test,
231        y_test.
232
233        Parameters:
234
235            X_train: array-like,
236                Training vectors, where rows is the number of samples
237                and columns is the number of features.
238
239            X_test: array-like,
240                Testing vectors, where rows is the number of samples
241                and columns is the number of features.
242
243            y_train: array-like,
244                Training vectors, where rows is the number of samples
245                and columns is the number of features.
246
247            y_test: array-like,
248                Testing vectors, where rows is the number of samples
249                and columns is the number of features.
250
251        Returns:
252
253            scores: Pandas DataFrame
254                Returns metrics of all the models in a Pandas DataFrame.
255
256            predictions: Pandas DataFrame
257                Returns predictions of all the models in a Pandas DataFrame.
258        """
259        Accuracy = []
260        B_Accuracy = []
261        ROC_AUC = []
262        F1 = []
263        names = []
264        TIME = []
265        predictions = {}
266
267        if self.custom_metric is not None:
268            CUSTOM_METRIC = []
269
270        if isinstance(X_train, np.ndarray):
271            X_train = pd.DataFrame(X_train)
272            X_test = pd.DataFrame(X_test)
273
274        numeric_features = X_train.select_dtypes(include=[np.number]).columns
275        categorical_features = X_train.select_dtypes(include=["object"]).columns
276
277        categorical_low, categorical_high = get_card_split(
278            X_train, categorical_features
279        )
280
281        if self.preprocess is True:
282            preprocessor = ColumnTransformer(
283                transformers=[
284                    ("numeric", numeric_transformer, numeric_features),
285                    (
286                        "categorical_low",
287                        categorical_transformer_low,
288                        categorical_low,
289                    ),
290                    (
291                        "categorical_high",
292                        categorical_transformer_high,
293                        categorical_high,
294                    ),
295                ]
296            )
297
298        # baseline models
299        try:
300            baseline_names = ["RandomForestClassifier", "XGBClassifier"]
301            baseline_models = [RandomForestClassifier(), xgb.XGBClassifier()]
302        except Exception as exception:
303            baseline_names = ["RandomForestClassifier"]
304            baseline_models = [RandomForestClassifier()]
305
306        for name, model in zip(baseline_names, baseline_models):
307            start = time.time()
308            try:
309                model.fit(X_train, y_train)
310                self.models_[name] = model
311                y_pred = model.predict(X_test)
312                accuracy = accuracy_score(y_test, y_pred, normalize=True)
313                b_accuracy = balanced_accuracy_score(y_test, y_pred)
314                f1 = f1_score(y_test, y_pred, average="weighted")
315                try:
316                    roc_auc = roc_auc_score(y_test, y_pred)
317                except Exception as exception:
318                    roc_auc = None
319                    if self.ignore_warnings is False:
320                        print("ROC AUC couldn't be calculated for " + name)
321                        print(exception)
322                names.append(name)
323                Accuracy.append(accuracy)
324                B_Accuracy.append(b_accuracy)
325                ROC_AUC.append(roc_auc)
326                F1.append(f1)
327                TIME.append(time.time() - start)
328                if self.custom_metric is not None:
329                    custom_metric = self.custom_metric(y_test, y_pred)
330                    CUSTOM_METRIC.append(custom_metric)
331                if self.verbose > 0:
332                    if self.custom_metric is not None:
333                        print(
334                            {
335                                "Model": name,
336                                "Accuracy": accuracy,
337                                "Balanced Accuracy": b_accuracy,
338                                "ROC AUC": roc_auc,
339                                "F1 Score": f1,
340                                self.custom_metric.__name__: custom_metric,
341                                "Time taken": time.time() - start,
342                            }
343                        )
344                    else:
345                        print(
346                            {
347                                "Model": name,
348                                "Accuracy": accuracy,
349                                "Balanced Accuracy": b_accuracy,
350                                "ROC AUC": roc_auc,
351                                "F1 Score": f1,
352                                "Time taken": time.time() - start,
353                            }
354                        )
355                if self.predictions:
356                    predictions[name] = y_pred
357            except Exception as exception:
358                 if self.ignore_warnings is False:
359                     print(name + " model failed to execute")
360                     print(exception)
361
362        if self.estimators == "all":
363            self.classifiers = [
364                item
365                for sublist in [
366                    DEEPCLASSIFIERS,
367                    DEEPMULTITASKCLASSIFIERS,
368                    DEEPSIMPLEMULTITASKCLASSIFIERS,
369                ]
370                for item in sublist
371            ]
372        else:
373            self.classifiers = (
374                [
375                    ("DeepCustomClassifier(" + est[0] + ")", est[1])
376                    for est in all_estimators()
377                    if (
378                        issubclass(est[1], ClassifierMixin)
379                        and (est[0] in self.estimators)
380                    )
381                ]
382                + [
383                    (
384                        "DeepMultitaskClassifier(" + est[0] + ")",
385                        partial(MultitaskClassifier, obj=est[1]()),
386                    )
387                    for est in all_estimators()
388                    if (
389                        issubclass(est[1], RegressorMixin)
390                        and (est[0] in self.estimators)
391                    )
392                ]
393                + [
394                    (
395                        "DeepSimpleMultitaskClassifier(" + est[0] + ")",
396                        partial(SimpleMultitaskClassifier, obj=est[1]()),
397                    )
398                    for est in all_estimators()
399                    if (
400                        issubclass(est[1], RegressorMixin)
401                        and (est[0] in self.estimators)
402                    )
403                ]
404            )
405
406        if self.preprocess is True:
407
408            for name, model in tqdm(self.classifiers):  # do parallel exec
409
410                other_args = {}  # use this trick for `random_state` too --> refactor
411                try:
412                    if (
413                        "n_jobs" in model().get_params().keys()
414                        and name.find("LogisticRegression") == -1
415                    ):
416                        other_args["n_jobs"] = self.n_jobs
417                except Exception:
418                    pass
419
420                start = time.time()
421
422                try:
423                    if "random_state" in model().get_params().keys():
424                        layer_clf = CustomClassifier(
425                            obj=model(random_state=self.random_state),
426                            n_hidden_features=self.n_hidden_features,
427                            activation_name=self.activation_name,
428                            a=self.a,
429                            nodes_sim=self.nodes_sim,
430                            bias=self.bias,
431                            dropout=self.dropout,
432                            direct_link=self.direct_link,
433                            n_clusters=self.n_clusters,
434                            cluster_encode=self.cluster_encode,
435                            type_clust=self.type_clust,
436                            type_scaling=self.type_scaling,
437                            col_sample=self.col_sample,
438                            row_sample=self.row_sample,
439                            seed=self.seed,
440                            backend=self.backend,
441                            cv_calibration=None,
442                        )
443
444                    else:
445                        layer_clf = CustomClassifier(
446                            obj=model(),
447                            n_hidden_features=self.n_hidden_features,
448                            activation_name=self.activation_name,
449                            a=self.a,
450                            nodes_sim=self.nodes_sim,
451                            bias=self.bias,
452                            dropout=self.dropout,
453                            direct_link=self.direct_link,
454                            n_clusters=self.n_clusters,
455                            cluster_encode=self.cluster_encode,
456                            type_clust=self.type_clust,
457                            type_scaling=self.type_scaling,
458                            col_sample=self.col_sample,
459                            row_sample=self.row_sample,
460                            seed=self.seed,
461                            backend=self.backend,
462                            cv_calibration=None,
463                        )
464
465                    layer_clf.fit(X_train, y_train)
466
467                    for _ in range(self.n_layers):
468                        layer_clf = deepcopy(
469                            CustomClassifier(
470                                obj=layer_clf,
471                                n_hidden_features=self.n_hidden_features,
472                                activation_name=self.activation_name,
473                                a=self.a,
474                                nodes_sim=self.nodes_sim,
475                                bias=self.bias,
476                                dropout=self.dropout,
477                                direct_link=self.direct_link,
478                                n_clusters=self.n_clusters,
479                                cluster_encode=self.cluster_encode,
480                                type_clust=self.type_clust,
481                                type_scaling=self.type_scaling,
482                                col_sample=self.col_sample,
483                                row_sample=self.row_sample,
484                                seed=self.seed,
485                                backend=self.backend,
486                                cv_calibration=None,
487                            )
488                        )
489
490                    pipe = Pipeline(
491                        [
492                            ("preprocessor", preprocessor),
493                            ("classifier", layer_clf),
494                        ]
495                    )
496
497                    pipe.fit(X_train, y_train)
498                    self.models_[name] = pipe
499                    y_pred = pipe.predict(X_test)
500                    accuracy = accuracy_score(y_test, y_pred, normalize=True)
501                    b_accuracy = balanced_accuracy_score(y_test, y_pred)
502                    f1 = f1_score(y_test, y_pred, average="weighted")
503                    try:
504                        roc_auc = roc_auc_score(y_test, y_pred)
505                    except Exception as exception:
506                        roc_auc = None
507                        if self.ignore_warnings is False:
508                            print("ROC AUC couldn't be calculated for " + name)
509                            print(exception)
510                    names.append(name)
511                    Accuracy.append(accuracy)
512                    B_Accuracy.append(b_accuracy)
513                    ROC_AUC.append(roc_auc)
514                    F1.append(f1)
515                    TIME.append(time.time() - start)
516                    if self.custom_metric is not None:
517                        custom_metric = self.custom_metric(y_test, y_pred)
518                        CUSTOM_METRIC.append(custom_metric)
519                    if self.verbose > 0:
520                        if self.custom_metric is not None:
521                            print(
522                                {
523                                    "Model": name,
524                                    "Accuracy": accuracy,
525                                    "Balanced Accuracy": b_accuracy,
526                                    "ROC AUC": roc_auc,
527                                    "F1 Score": f1,
528                                    self.custom_metric.__name__: custom_metric,
529                                    "Time taken": time.time() - start,
530                                }
531                            )
532                        else:
533                            print(
534                                {
535                                    "Model": name,
536                                    "Accuracy": accuracy,
537                                    "Balanced Accuracy": b_accuracy,
538                                    "ROC AUC": roc_auc,
539                                    "F1 Score": f1,
540                                    "Time taken": time.time() - start,
541                                }
542                            )
543                    if self.predictions:
544                        predictions[name] = y_pred
545                except Exception as exception:
546                     if self.ignore_warnings is False:
547                         print(name + " model failed to execute")
548                         print(exception)
549
550        else:  # no preprocessing
551
552            for name, model in tqdm(self.classifiers):  # do parallel exec
553                start = time.time()
554                try:
555                    if "random_state" in model().get_params().keys():
556                        layer_clf = CustomClassifier(
557                            obj=model(random_state=self.random_state),
558                            n_hidden_features=self.n_hidden_features,
559                            activation_name=self.activation_name,
560                            a=self.a,
561                            nodes_sim=self.nodes_sim,
562                            bias=self.bias,
563                            dropout=self.dropout,
564                            direct_link=self.direct_link,
565                            n_clusters=self.n_clusters,
566                            cluster_encode=self.cluster_encode,
567                            type_clust=self.type_clust,
568                            type_scaling=self.type_scaling,
569                            col_sample=self.col_sample,
570                            row_sample=self.row_sample,
571                            seed=self.seed,
572                            backend=self.backend,
573                            cv_calibration=None,
574                        )
575
576                    else:
577                        layer_clf = CustomClassifier(
578                            obj=model(),
579                            n_hidden_features=self.n_hidden_features,
580                            activation_name=self.activation_name,
581                            a=self.a,
582                            nodes_sim=self.nodes_sim,
583                            bias=self.bias,
584                            dropout=self.dropout,
585                            direct_link=self.direct_link,
586                            n_clusters=self.n_clusters,
587                            cluster_encode=self.cluster_encode,
588                            type_clust=self.type_clust,
589                            type_scaling=self.type_scaling,
590                            col_sample=self.col_sample,
591                            row_sample=self.row_sample,
592                            seed=self.seed,
593                            backend=self.backend,
594                            cv_calibration=None,
595                        )
596
597                    layer_clf.fit(X_train, y_train)
598
599                    for _ in range(self.n_layers):
600                        layer_clf = deepcopy(
601                            CustomClassifier(
602                                obj=layer_clf,
603                                n_hidden_features=self.n_hidden_features,
604                                activation_name=self.activation_name,
605                                a=self.a,
606                                nodes_sim=self.nodes_sim,
607                                bias=self.bias,
608                                dropout=self.dropout,
609                                direct_link=self.direct_link,
610                                n_clusters=self.n_clusters,
611                                cluster_encode=self.cluster_encode,
612                                type_clust=self.type_clust,
613                                type_scaling=self.type_scaling,
614                                col_sample=self.col_sample,
615                                row_sample=self.row_sample,
616                                seed=self.seed,
617                                backend=self.backend,
618                                cv_calibration=None,
619                            )
620                        )
621
622                        # layer_clf.fit(X_train, y_train)
623
624                    layer_clf.fit(X_train, y_train)
625
626                    self.models_[name] = layer_clf
627                    y_pred = layer_clf.predict(X_test)
628                    accuracy = accuracy_score(y_test, y_pred, normalize=True)
629                    b_accuracy = balanced_accuracy_score(y_test, y_pred)
630                    f1 = f1_score(y_test, y_pred, average="weighted")
631                    try:
632                        roc_auc = roc_auc_score(y_test, y_pred)
633                    except Exception as exception:
634                        roc_auc = None
635                        if self.ignore_warnings is False:
636                            print("ROC AUC couldn't be calculated for " + name)
637                            print(exception)
638                    names.append(name)
639                    Accuracy.append(accuracy)
640                    B_Accuracy.append(b_accuracy)
641                    ROC_AUC.append(roc_auc)
642                    F1.append(f1)
643                    TIME.append(time.time() - start)
644                    if self.custom_metric is not None:
645                        custom_metric = self.custom_metric(y_test, y_pred)
646                        CUSTOM_METRIC.append(custom_metric)
647                    if self.verbose > 0:
648                        if self.custom_metric is not None:
649                            print(
650                                {
651                                    "Model": name,
652                                    "Accuracy": accuracy,
653                                    "Balanced Accuracy": b_accuracy,
654                                    "ROC AUC": roc_auc,
655                                    "F1 Score": f1,
656                                    self.custom_metric.__name__: custom_metric,
657                                    "Time taken": time.time() - start,
658                                }
659                            )
660                        else:
661                            print(
662                                {
663                                    "Model": name,
664                                    "Accuracy": accuracy,
665                                    "Balanced Accuracy": b_accuracy,
666                                    "ROC AUC": roc_auc,
667                                    "F1 Score": f1,
668                                    "Time taken": time.time() - start,
669                                }
670                            )
671                    if self.predictions:
672                        predictions[name] = y_pred
673                except Exception as exception:
674                     if self.ignore_warnings is False:
675                         print(name + " model failed to execute")
676                         print(exception)
677
678        if self.custom_metric is None:
679            scores = pd.DataFrame(
680                {
681                    "Model": names,
682                    "Accuracy": Accuracy,
683                    "Balanced Accuracy": B_Accuracy,
684                    "ROC AUC": ROC_AUC,
685                    "F1 Score": F1,
686                    "Time Taken": TIME,
687                }
688            )
689        else:
690            scores = pd.DataFrame(
691                {
692                    "Model": names,
693                    "Accuracy": Accuracy,
694                    "Balanced Accuracy": B_Accuracy,
695                    "ROC AUC": ROC_AUC,
696                    "F1 Score": F1,
697                    "Custom metric": CUSTOM_METRIC,
698                    "Time Taken": TIME,
699                }
700            )
701        scores = scores.sort_values(by=self.sort_by, ascending=False).set_index("Model")
702
703        self.best_model_ = self.models_[scores.index[0]]
704
705        if self.predictions is True:
706
707            return scores, predictions
708
709        return scores
710
711    def get_best_model(self):
712        """
713        This function returns the best model pipeline based on the sort_by metric.
714
715        Returns:
716
717            best_model: object,
718                Returns the best model pipeline based on the sort_by metric.
719
720        """
721        return self.best_model_
722
723    def provide_models(self, X_train, X_test, y_train, y_test):
724        """Returns all the model objects trained. If fit hasn't been called yet,
725        then it's called to return the models.
726
727        Parameters:
728
729        X_train: array-like,
730            Training vectors, where rows is the number of samples
731            and columns is the number of features.
732
733        X_test: array-like,
734            Testing vectors, where rows is the number of samples
735            and columns is the number of features.
736
737        y_train: array-like,
738            Training vectors, where rows is the number of samples
739            and columns is the number of features.
740
741        y_test: array-like,
742            Testing vectors, where rows is the number of samples
743            and columns is the number of features.
744
745        Returns:
746
747            models: dict-object,
748                Returns a dictionary with each model's pipeline as value
749                and key = name of the model.
750        """
751        if len(self.models_.keys()) == 0:
752            self.fit(X_train, X_test, y_train, y_test)
753
754        return self.models_

Fitting -- almost -- all the classification algorithms with layers of nnetsauce's CustomClassifier and returning their scores.

Parameters:

verbose: int, optional (default=0)
    Any positive number for verbosity.

ignore_warnings: bool, optional (default=True)
    When set to True, the warning related to algorigms that are not
    able to run are ignored.

custom_metric: function, optional (default=None)
    When function is provided, models are evaluated based on the custom
      evaluation metric provided.

predictions: bool, optional (default=False)
    When set to True, the predictions of all the models models are
    returned as data frame.

sort_by: string, optional (default='Accuracy')
    Sort models by a metric. Available options are 'Accuracy',
    'Balanced Accuracy', 'ROC AUC', 'F1 Score' or a custom metric
    identified by its name and provided by custom_metric.

random_state: int, optional (default=42)
    Reproducibiility seed.

estimators: list, optional (default='all')
    list of Estimators names or just 'all' for > 90 classifiers
    (default='all')

preprocess: bool, preprocessing is done when set to True

n_jobs: int, when possible, run in parallel
    For now, only used by individual models that support it.

n_layers: int, optional (default=3)
    Number of layers of CustomClassifiers to be used.

All the other parameters are the same as CustomClassifier's.

Attributes:

models_: dict-object
    Returns a dictionary with each model pipeline as value
    with key as name of models.

best_model_: object
    Returns the best model pipeline.

Examples

import nnetsauce as ns
    from sklearn.datasets import load_breast_cancer
    from sklearn.model_selection import train_test_split
    data = load_breast_cancer()
    X = data.data
    y= data.target
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2,
        random_state=123)
    clf = ns.LazyDeepClassifier(verbose=0, ignore_warnings=True, custom_metric=None)
    models, predictions = clf.fit(X_train, X_test, y_train, y_test)
    model_dictionary = clf.provide_models(X_train,X_test,y_train,y_test)
    print(models)
    

def fit(self, X_train, X_test, y_train, y_test):
229    def fit(self, X_train, X_test, y_train, y_test):
230        """Fit classifiers to X_train and y_train, predict and score on X_test,
231        y_test.
232
233        Parameters:
234
235            X_train: array-like,
236                Training vectors, where rows is the number of samples
237                and columns is the number of features.
238
239            X_test: array-like,
240                Testing vectors, where rows is the number of samples
241                and columns is the number of features.
242
243            y_train: array-like,
244                Training vectors, where rows is the number of samples
245                and columns is the number of features.
246
247            y_test: array-like,
248                Testing vectors, where rows is the number of samples
249                and columns is the number of features.
250
251        Returns:
252
253            scores: Pandas DataFrame
254                Returns metrics of all the models in a Pandas DataFrame.
255
256            predictions: Pandas DataFrame
257                Returns predictions of all the models in a Pandas DataFrame.
258        """
259        Accuracy = []
260        B_Accuracy = []
261        ROC_AUC = []
262        F1 = []
263        names = []
264        TIME = []
265        predictions = {}
266
267        if self.custom_metric is not None:
268            CUSTOM_METRIC = []
269
270        if isinstance(X_train, np.ndarray):
271            X_train = pd.DataFrame(X_train)
272            X_test = pd.DataFrame(X_test)
273
274        numeric_features = X_train.select_dtypes(include=[np.number]).columns
275        categorical_features = X_train.select_dtypes(include=["object"]).columns
276
277        categorical_low, categorical_high = get_card_split(
278            X_train, categorical_features
279        )
280
281        if self.preprocess is True:
282            preprocessor = ColumnTransformer(
283                transformers=[
284                    ("numeric", numeric_transformer, numeric_features),
285                    (
286                        "categorical_low",
287                        categorical_transformer_low,
288                        categorical_low,
289                    ),
290                    (
291                        "categorical_high",
292                        categorical_transformer_high,
293                        categorical_high,
294                    ),
295                ]
296            )
297
298        # baseline models
299        try:
300            baseline_names = ["RandomForestClassifier", "XGBClassifier"]
301            baseline_models = [RandomForestClassifier(), xgb.XGBClassifier()]
302        except Exception as exception:
303            baseline_names = ["RandomForestClassifier"]
304            baseline_models = [RandomForestClassifier()]
305
306        for name, model in zip(baseline_names, baseline_models):
307            start = time.time()
308            try:
309                model.fit(X_train, y_train)
310                self.models_[name] = model
311                y_pred = model.predict(X_test)
312                accuracy = accuracy_score(y_test, y_pred, normalize=True)
313                b_accuracy = balanced_accuracy_score(y_test, y_pred)
314                f1 = f1_score(y_test, y_pred, average="weighted")
315                try:
316                    roc_auc = roc_auc_score(y_test, y_pred)
317                except Exception as exception:
318                    roc_auc = None
319                    if self.ignore_warnings is False:
320                        print("ROC AUC couldn't be calculated for " + name)
321                        print(exception)
322                names.append(name)
323                Accuracy.append(accuracy)
324                B_Accuracy.append(b_accuracy)
325                ROC_AUC.append(roc_auc)
326                F1.append(f1)
327                TIME.append(time.time() - start)
328                if self.custom_metric is not None:
329                    custom_metric = self.custom_metric(y_test, y_pred)
330                    CUSTOM_METRIC.append(custom_metric)
331                if self.verbose > 0:
332                    if self.custom_metric is not None:
333                        print(
334                            {
335                                "Model": name,
336                                "Accuracy": accuracy,
337                                "Balanced Accuracy": b_accuracy,
338                                "ROC AUC": roc_auc,
339                                "F1 Score": f1,
340                                self.custom_metric.__name__: custom_metric,
341                                "Time taken": time.time() - start,
342                            }
343                        )
344                    else:
345                        print(
346                            {
347                                "Model": name,
348                                "Accuracy": accuracy,
349                                "Balanced Accuracy": b_accuracy,
350                                "ROC AUC": roc_auc,
351                                "F1 Score": f1,
352                                "Time taken": time.time() - start,
353                            }
354                        )
355                if self.predictions:
356                    predictions[name] = y_pred
357            except Exception as exception:
358                 if self.ignore_warnings is False:
359                     print(name + " model failed to execute")
360                     print(exception)
361
362        if self.estimators == "all":
363            self.classifiers = [
364                item
365                for sublist in [
366                    DEEPCLASSIFIERS,
367                    DEEPMULTITASKCLASSIFIERS,
368                    DEEPSIMPLEMULTITASKCLASSIFIERS,
369                ]
370                for item in sublist
371            ]
372        else:
373            self.classifiers = (
374                [
375                    ("DeepCustomClassifier(" + est[0] + ")", est[1])
376                    for est in all_estimators()
377                    if (
378                        issubclass(est[1], ClassifierMixin)
379                        and (est[0] in self.estimators)
380                    )
381                ]
382                + [
383                    (
384                        "DeepMultitaskClassifier(" + est[0] + ")",
385                        partial(MultitaskClassifier, obj=est[1]()),
386                    )
387                    for est in all_estimators()
388                    if (
389                        issubclass(est[1], RegressorMixin)
390                        and (est[0] in self.estimators)
391                    )
392                ]
393                + [
394                    (
395                        "DeepSimpleMultitaskClassifier(" + est[0] + ")",
396                        partial(SimpleMultitaskClassifier, obj=est[1]()),
397                    )
398                    for est in all_estimators()
399                    if (
400                        issubclass(est[1], RegressorMixin)
401                        and (est[0] in self.estimators)
402                    )
403                ]
404            )
405
406        if self.preprocess is True:
407
408            for name, model in tqdm(self.classifiers):  # do parallel exec
409
410                other_args = {}  # use this trick for `random_state` too --> refactor
411                try:
412                    if (
413                        "n_jobs" in model().get_params().keys()
414                        and name.find("LogisticRegression") == -1
415                    ):
416                        other_args["n_jobs"] = self.n_jobs
417                except Exception:
418                    pass
419
420                start = time.time()
421
422                try:
423                    if "random_state" in model().get_params().keys():
424                        layer_clf = CustomClassifier(
425                            obj=model(random_state=self.random_state),
426                            n_hidden_features=self.n_hidden_features,
427                            activation_name=self.activation_name,
428                            a=self.a,
429                            nodes_sim=self.nodes_sim,
430                            bias=self.bias,
431                            dropout=self.dropout,
432                            direct_link=self.direct_link,
433                            n_clusters=self.n_clusters,
434                            cluster_encode=self.cluster_encode,
435                            type_clust=self.type_clust,
436                            type_scaling=self.type_scaling,
437                            col_sample=self.col_sample,
438                            row_sample=self.row_sample,
439                            seed=self.seed,
440                            backend=self.backend,
441                            cv_calibration=None,
442                        )
443
444                    else:
445                        layer_clf = CustomClassifier(
446                            obj=model(),
447                            n_hidden_features=self.n_hidden_features,
448                            activation_name=self.activation_name,
449                            a=self.a,
450                            nodes_sim=self.nodes_sim,
451                            bias=self.bias,
452                            dropout=self.dropout,
453                            direct_link=self.direct_link,
454                            n_clusters=self.n_clusters,
455                            cluster_encode=self.cluster_encode,
456                            type_clust=self.type_clust,
457                            type_scaling=self.type_scaling,
458                            col_sample=self.col_sample,
459                            row_sample=self.row_sample,
460                            seed=self.seed,
461                            backend=self.backend,
462                            cv_calibration=None,
463                        )
464
465                    layer_clf.fit(X_train, y_train)
466
467                    for _ in range(self.n_layers):
468                        layer_clf = deepcopy(
469                            CustomClassifier(
470                                obj=layer_clf,
471                                n_hidden_features=self.n_hidden_features,
472                                activation_name=self.activation_name,
473                                a=self.a,
474                                nodes_sim=self.nodes_sim,
475                                bias=self.bias,
476                                dropout=self.dropout,
477                                direct_link=self.direct_link,
478                                n_clusters=self.n_clusters,
479                                cluster_encode=self.cluster_encode,
480                                type_clust=self.type_clust,
481                                type_scaling=self.type_scaling,
482                                col_sample=self.col_sample,
483                                row_sample=self.row_sample,
484                                seed=self.seed,
485                                backend=self.backend,
486                                cv_calibration=None,
487                            )
488                        )
489
490                    pipe = Pipeline(
491                        [
492                            ("preprocessor", preprocessor),
493                            ("classifier", layer_clf),
494                        ]
495                    )
496
497                    pipe.fit(X_train, y_train)
498                    self.models_[name] = pipe
499                    y_pred = pipe.predict(X_test)
500                    accuracy = accuracy_score(y_test, y_pred, normalize=True)
501                    b_accuracy = balanced_accuracy_score(y_test, y_pred)
502                    f1 = f1_score(y_test, y_pred, average="weighted")
503                    try:
504                        roc_auc = roc_auc_score(y_test, y_pred)
505                    except Exception as exception:
506                        roc_auc = None
507                        if self.ignore_warnings is False:
508                            print("ROC AUC couldn't be calculated for " + name)
509                            print(exception)
510                    names.append(name)
511                    Accuracy.append(accuracy)
512                    B_Accuracy.append(b_accuracy)
513                    ROC_AUC.append(roc_auc)
514                    F1.append(f1)
515                    TIME.append(time.time() - start)
516                    if self.custom_metric is not None:
517                        custom_metric = self.custom_metric(y_test, y_pred)
518                        CUSTOM_METRIC.append(custom_metric)
519                    if self.verbose > 0:
520                        if self.custom_metric is not None:
521                            print(
522                                {
523                                    "Model": name,
524                                    "Accuracy": accuracy,
525                                    "Balanced Accuracy": b_accuracy,
526                                    "ROC AUC": roc_auc,
527                                    "F1 Score": f1,
528                                    self.custom_metric.__name__: custom_metric,
529                                    "Time taken": time.time() - start,
530                                }
531                            )
532                        else:
533                            print(
534                                {
535                                    "Model": name,
536                                    "Accuracy": accuracy,
537                                    "Balanced Accuracy": b_accuracy,
538                                    "ROC AUC": roc_auc,
539                                    "F1 Score": f1,
540                                    "Time taken": time.time() - start,
541                                }
542                            )
543                    if self.predictions:
544                        predictions[name] = y_pred
545                except Exception as exception:
546                     if self.ignore_warnings is False:
547                         print(name + " model failed to execute")
548                         print(exception)
549
550        else:  # no preprocessing
551
552            for name, model in tqdm(self.classifiers):  # do parallel exec
553                start = time.time()
554                try:
555                    if "random_state" in model().get_params().keys():
556                        layer_clf = CustomClassifier(
557                            obj=model(random_state=self.random_state),
558                            n_hidden_features=self.n_hidden_features,
559                            activation_name=self.activation_name,
560                            a=self.a,
561                            nodes_sim=self.nodes_sim,
562                            bias=self.bias,
563                            dropout=self.dropout,
564                            direct_link=self.direct_link,
565                            n_clusters=self.n_clusters,
566                            cluster_encode=self.cluster_encode,
567                            type_clust=self.type_clust,
568                            type_scaling=self.type_scaling,
569                            col_sample=self.col_sample,
570                            row_sample=self.row_sample,
571                            seed=self.seed,
572                            backend=self.backend,
573                            cv_calibration=None,
574                        )
575
576                    else:
577                        layer_clf = CustomClassifier(
578                            obj=model(),
579                            n_hidden_features=self.n_hidden_features,
580                            activation_name=self.activation_name,
581                            a=self.a,
582                            nodes_sim=self.nodes_sim,
583                            bias=self.bias,
584                            dropout=self.dropout,
585                            direct_link=self.direct_link,
586                            n_clusters=self.n_clusters,
587                            cluster_encode=self.cluster_encode,
588                            type_clust=self.type_clust,
589                            type_scaling=self.type_scaling,
590                            col_sample=self.col_sample,
591                            row_sample=self.row_sample,
592                            seed=self.seed,
593                            backend=self.backend,
594                            cv_calibration=None,
595                        )
596
597                    layer_clf.fit(X_train, y_train)
598
599                    for _ in range(self.n_layers):
600                        layer_clf = deepcopy(
601                            CustomClassifier(
602                                obj=layer_clf,
603                                n_hidden_features=self.n_hidden_features,
604                                activation_name=self.activation_name,
605                                a=self.a,
606                                nodes_sim=self.nodes_sim,
607                                bias=self.bias,
608                                dropout=self.dropout,
609                                direct_link=self.direct_link,
610                                n_clusters=self.n_clusters,
611                                cluster_encode=self.cluster_encode,
612                                type_clust=self.type_clust,
613                                type_scaling=self.type_scaling,
614                                col_sample=self.col_sample,
615                                row_sample=self.row_sample,
616                                seed=self.seed,
617                                backend=self.backend,
618                                cv_calibration=None,
619                            )
620                        )
621
622                        # layer_clf.fit(X_train, y_train)
623
624                    layer_clf.fit(X_train, y_train)
625
626                    self.models_[name] = layer_clf
627                    y_pred = layer_clf.predict(X_test)
628                    accuracy = accuracy_score(y_test, y_pred, normalize=True)
629                    b_accuracy = balanced_accuracy_score(y_test, y_pred)
630                    f1 = f1_score(y_test, y_pred, average="weighted")
631                    try:
632                        roc_auc = roc_auc_score(y_test, y_pred)
633                    except Exception as exception:
634                        roc_auc = None
635                        if self.ignore_warnings is False:
636                            print("ROC AUC couldn't be calculated for " + name)
637                            print(exception)
638                    names.append(name)
639                    Accuracy.append(accuracy)
640                    B_Accuracy.append(b_accuracy)
641                    ROC_AUC.append(roc_auc)
642                    F1.append(f1)
643                    TIME.append(time.time() - start)
644                    if self.custom_metric is not None:
645                        custom_metric = self.custom_metric(y_test, y_pred)
646                        CUSTOM_METRIC.append(custom_metric)
647                    if self.verbose > 0:
648                        if self.custom_metric is not None:
649                            print(
650                                {
651                                    "Model": name,
652                                    "Accuracy": accuracy,
653                                    "Balanced Accuracy": b_accuracy,
654                                    "ROC AUC": roc_auc,
655                                    "F1 Score": f1,
656                                    self.custom_metric.__name__: custom_metric,
657                                    "Time taken": time.time() - start,
658                                }
659                            )
660                        else:
661                            print(
662                                {
663                                    "Model": name,
664                                    "Accuracy": accuracy,
665                                    "Balanced Accuracy": b_accuracy,
666                                    "ROC AUC": roc_auc,
667                                    "F1 Score": f1,
668                                    "Time taken": time.time() - start,
669                                }
670                            )
671                    if self.predictions:
672                        predictions[name] = y_pred
673                except Exception as exception:
674                     if self.ignore_warnings is False:
675                         print(name + " model failed to execute")
676                         print(exception)
677
678        if self.custom_metric is None:
679            scores = pd.DataFrame(
680                {
681                    "Model": names,
682                    "Accuracy": Accuracy,
683                    "Balanced Accuracy": B_Accuracy,
684                    "ROC AUC": ROC_AUC,
685                    "F1 Score": F1,
686                    "Time Taken": TIME,
687                }
688            )
689        else:
690            scores = pd.DataFrame(
691                {
692                    "Model": names,
693                    "Accuracy": Accuracy,
694                    "Balanced Accuracy": B_Accuracy,
695                    "ROC AUC": ROC_AUC,
696                    "F1 Score": F1,
697                    "Custom metric": CUSTOM_METRIC,
698                    "Time Taken": TIME,
699                }
700            )
701        scores = scores.sort_values(by=self.sort_by, ascending=False).set_index("Model")
702
703        self.best_model_ = self.models_[scores.index[0]]
704
705        if self.predictions is True:
706
707            return scores, predictions
708
709        return scores

Fit classifiers to X_train and y_train, predict and score on X_test, y_test.

Parameters:

X_train: array-like,
    Training vectors, where rows is the number of samples
    and columns is the number of features.

X_test: array-like,
    Testing vectors, where rows is the number of samples
    and columns is the number of features.

y_train: array-like,
    Training vectors, where rows is the number of samples
    and columns is the number of features.

y_test: array-like,
    Testing vectors, where rows is the number of samples
    and columns is the number of features.

Returns:

scores: Pandas DataFrame
    Returns metrics of all the models in a Pandas DataFrame.

predictions: Pandas DataFrame
    Returns predictions of all the models in a Pandas DataFrame.
def provide_models(self, X_train, X_test, y_train, y_test):
723    def provide_models(self, X_train, X_test, y_train, y_test):
724        """Returns all the model objects trained. If fit hasn't been called yet,
725        then it's called to return the models.
726
727        Parameters:
728
729        X_train: array-like,
730            Training vectors, where rows is the number of samples
731            and columns is the number of features.
732
733        X_test: array-like,
734            Testing vectors, where rows is the number of samples
735            and columns is the number of features.
736
737        y_train: array-like,
738            Training vectors, where rows is the number of samples
739            and columns is the number of features.
740
741        y_test: array-like,
742            Testing vectors, where rows is the number of samples
743            and columns is the number of features.
744
745        Returns:
746
747            models: dict-object,
748                Returns a dictionary with each model's pipeline as value
749                and key = name of the model.
750        """
751        if len(self.models_.keys()) == 0:
752            self.fit(X_train, X_test, y_train, y_test)
753
754        return self.models_

Returns all the model objects trained. If fit hasn't been called yet, then it's called to return the models.

Parameters:

X_train: array-like, Training vectors, where rows is the number of samples and columns is the number of features.

X_test: array-like, Testing vectors, where rows is the number of samples and columns is the number of features.

y_train: array-like, Training vectors, where rows is the number of samples and columns is the number of features.

y_test: array-like, Testing vectors, where rows is the number of samples and columns is the number of features.

Returns:

models: dict-object,
    Returns a dictionary with each model's pipeline as value
    and key = name of the model.
class LazyDeepRegressor(nnetsauce.custom.custom.Custom, sklearn.base.RegressorMixin):
 90class LazyDeepRegressor(Custom, RegressorMixin):
 91    """
 92        Fitting -- almost -- all the regression algorithms with layers of
 93        nnetsauce's CustomRegressor and returning their scores.
 94
 95    Parameters:
 96
 97        verbose: int, optional (default=0)
 98            Any positive number for verbosity.
 99
100        ignore_warnings: bool, optional (default=True)
101            When set to True, the warning related to algorigms that are not able to run are ignored.
102
103        custom_metric: function, optional (default=None)
104            When function is provided, models are evaluated based on the custom evaluation metric provided.
105
106        predictions: bool, optional (default=False)
107            When set to True, the predictions of all the models models are returned as dataframe.
108
109        sort_by: string, optional (default='RMSE')
110            Sort models by a metric. Available options are 'R-Squared', 'Adjusted R-Squared', 'RMSE', 'Time Taken' and 'Custom Metric'.
111            or a custom metric identified by its name and provided by custom_metric.
112
113        random_state: int, optional (default=42)
114            Reproducibiility seed.
115
116        estimators: list, optional (default='all')
117            list of Estimators names or just 'all' (default='all')
118
119        preprocess: bool
120            preprocessing is done when set to True
121
122        n_jobs : int, when possible, run in parallel
123            For now, only used by individual models that support it.
124
125        n_layers: int, optional (default=3)
126            Number of layers of CustomRegressors to be used.
127
128        All the other parameters are the same as CustomRegressor's.
129
130    Attributes:
131
132        models_: dict-object
133            Returns a dictionary with each model pipeline as value
134            with key as name of models.
135
136        best_model_: object
137            Returns the best model pipeline based on the sort_by metric.
138
139    Examples:
140
141        import nnetsauce as ns
142        import numpy as np
143        from sklearn import datasets
144        from sklearn.utils import shuffle
145
146        diabetes = datasets.load_diabetes()
147        X, y = shuffle(diabetes.data, diabetes.target, random_state=13)
148        X = X.astype(np.float32)
149
150        offset = int(X.shape[0] * 0.9)
151        X_train, y_train = X[:offset], y[:offset]
152        X_test, y_test = X[offset:], y[offset:]
153
154        reg = ns.LazyDeepRegressor(verbose=0, ignore_warnings=False, custom_metric=None)
155        models, predictions = reg.fit(X_train, X_test, y_train, y_test)
156        print(models)
157
158    """
159
160    def __init__(
161        self,
162        verbose=0,
163        ignore_warnings=True,
164        custom_metric=None,
165        predictions=False,
166        sort_by="RMSE",
167        random_state=42,
168        estimators="all",
169        preprocess=False,
170        n_jobs=None,
171        # Defining depth
172        n_layers=3,
173        # CustomRegressor attributes
174        obj=None,
175        n_hidden_features=5,
176        activation_name="relu",
177        a=0.01,
178        nodes_sim="sobol",
179        bias=True,
180        dropout=0,
181        direct_link=True,
182        n_clusters=2,
183        cluster_encode=True,
184        type_clust="kmeans",
185        type_scaling=("std", "std", "std"),
186        col_sample=1,
187        row_sample=1,
188        seed=123,
189        backend="cpu",
190    ):
191        self.verbose = verbose
192        self.ignore_warnings = ignore_warnings
193        self.custom_metric = custom_metric
194        self.predictions = predictions
195        self.sort_by = sort_by
196        self.models_ = {}
197        self.best_model_ = None
198        self.random_state = random_state
199        self.estimators = estimators
200        self.preprocess = preprocess
201        self.n_layers = n_layers - 1
202        self.n_jobs = n_jobs
203        super().__init__(
204            obj=obj,
205            n_hidden_features=n_hidden_features,
206            activation_name=activation_name,
207            a=a,
208            nodes_sim=nodes_sim,
209            bias=bias,
210            dropout=dropout,
211            direct_link=direct_link,
212            n_clusters=n_clusters,
213            cluster_encode=cluster_encode,
214            type_clust=type_clust,
215            type_scaling=type_scaling,
216            col_sample=col_sample,
217            row_sample=row_sample,
218            seed=seed,
219            backend=backend,
220        )
221
222    def fit(self, X_train, X_test, y_train, y_test):
223        """Fit Regression algorithms to X_train and y_train, predict and score on X_test, y_test.
224
225        Parameters:
226
227            X_train : array-like,
228                Training vectors, where rows is the number of samples
229                and columns is the number of features.
230
231            X_test : array-like,
232                Testing vectors, where rows is the number of samples
233                and columns is the number of features.
234
235            y_train : array-like,
236                Training vectors, where rows is the number of samples
237                and columns is the number of features.
238
239            y_test : array-like,
240                Testing vectors, where rows is the number of samples
241                and columns is the number of features.
242
243        Returns:
244        -------
245        scores:  Pandas DataFrame
246            Returns metrics of all the models in a Pandas DataFrame.
247
248        predictions : Pandas DataFrame
249            Returns predictions of all the models in a Pandas DataFrame.
250
251        """
252        R2 = []
253        ADJR2 = []
254        RMSE = []
255        # WIN = []
256        names = []
257        TIME = []
258        predictions = {}
259
260        if self.custom_metric:
261            CUSTOM_METRIC = []
262
263        if isinstance(X_train, np.ndarray):
264            X_train = pd.DataFrame(X_train)
265            X_test = pd.DataFrame(X_test)
266
267        numeric_features = X_train.select_dtypes(include=[np.number]).columns
268        categorical_features = X_train.select_dtypes(include=["object"]).columns
269
270        categorical_low, categorical_high = get_card_split(
271            X_train, categorical_features
272        )
273
274        if self.preprocess is True:
275            preprocessor = ColumnTransformer(
276                transformers=[
277                    ("numeric", numeric_transformer, numeric_features),
278                    (
279                        "categorical_low",
280                        categorical_transformer_low,
281                        categorical_low,
282                    ),
283                    (
284                        "categorical_high",
285                        categorical_transformer_high,
286                        categorical_high,
287                    ),
288                ]
289            )
290
291        # base models
292        try:
293            baseline_names = ["RandomForestRegressor", "XGBRegressor"]
294            baseline_models = [RandomForestRegressor(), xgb.XGBRegressor()]
295        except Exception as exception:
296            baseline_names = ["RandomForestRegressor"]
297            baseline_models = [RandomForestRegressor()]
298
299        for name, model in zip(baseline_names, baseline_models):
300            start = time.time()
301            try:
302                model.fit(X_train, y_train)
303                self.models_[name] = model
304                y_pred = model.predict(X_test)
305                r_squared = r2_score(y_test, y_pred)
306                adj_rsquared = adjusted_rsquared(
307                    r_squared, X_test.shape[0], X_test.shape[1]
308                )
309                rmse = np.sqrt(np.mean((y_test - y_pred) ** 2))
310
311                names.append(name)
312                R2.append(r_squared)
313                ADJR2.append(adj_rsquared)
314                RMSE.append(rmse)
315                TIME.append(time.time() - start)
316
317                if self.custom_metric:
318                    custom_metric = self.custom_metric(y_test, y_pred)
319                    CUSTOM_METRIC.append(custom_metric)
320
321                if self.verbose > 0:
322                    scores_verbose = {
323                        "Model": name,
324                        "R-Squared": r_squared,
325                        "Adjusted R-Squared": adj_rsquared,
326                        "RMSE": rmse,
327                        "Time taken": time.time() - start,
328                    }
329
330                    if self.custom_metric:
331                        scores_verbose[self.custom_metric.__name__] = custom_metric
332
333                    print(scores_verbose)
334                if self.predictions:
335                    predictions[name] = y_pred
336            except Exception as exception:
337                if self.ignore_warnings is False:
338                    print(name + " model failed to execute")
339                    print(exception)
340
341        if self.estimators == "all":
342            self.regressors = DEEPREGRESSORS
343        else:
344            self.regressors = [
345                ("DeepCustomRegressor(" + est[0] + ")", est[1])
346                for est in all_estimators()
347                if (issubclass(est[1], RegressorMixin) and (est[0] in self.estimators))
348            ]
349
350        if self.preprocess is True:
351
352            for name, model in tqdm(self.regressors):  # do parallel exec
353                start = time.time()
354                try:
355                    if "random_state" in model().get_params().keys():
356                        layer_regr = CustomRegressor(
357                            obj=model(random_state=self.random_state),
358                            n_hidden_features=self.n_hidden_features,
359                            activation_name=self.activation_name,
360                            a=self.a,
361                            nodes_sim=self.nodes_sim,
362                            bias=self.bias,
363                            dropout=self.dropout,
364                            direct_link=self.direct_link,
365                            n_clusters=self.n_clusters,
366                            cluster_encode=self.cluster_encode,
367                            type_clust=self.type_clust,
368                            type_scaling=self.type_scaling,
369                            col_sample=self.col_sample,
370                            row_sample=self.row_sample,
371                            seed=self.seed,
372                            backend=self.backend,
373                        )
374                    else:
375                        layer_regr = CustomRegressor(
376                            obj=model(),
377                            n_hidden_features=self.n_hidden_features,
378                            activation_name=self.activation_name,
379                            a=self.a,
380                            nodes_sim=self.nodes_sim,
381                            bias=self.bias,
382                            dropout=self.dropout,
383                            direct_link=self.direct_link,
384                            n_clusters=self.n_clusters,
385                            cluster_encode=self.cluster_encode,
386                            type_clust=self.type_clust,
387                            type_scaling=self.type_scaling,
388                            col_sample=self.col_sample,
389                            row_sample=self.row_sample,
390                            seed=self.seed,
391                            backend=self.backend,
392                        )
393
394                    for _ in range(self.n_layers):
395                        layer_regr = deepcopy(
396                            CustomRegressor(
397                                obj=layer_regr,
398                                n_hidden_features=self.n_hidden_features,
399                                activation_name=self.activation_name,
400                                a=self.a,
401                                nodes_sim=self.nodes_sim,
402                                bias=self.bias,
403                                dropout=self.dropout,
404                                direct_link=self.direct_link,
405                                n_clusters=self.n_clusters,
406                                cluster_encode=self.cluster_encode,
407                                type_clust=self.type_clust,
408                                type_scaling=self.type_scaling,
409                                col_sample=self.col_sample,
410                                row_sample=self.row_sample,
411                                seed=self.seed,
412                                backend=self.backend,
413                            )
414                        )
415
416                    layer_regr.fit(X_train, y_train)
417
418                    pipe = Pipeline(
419                        steps=[
420                            ("preprocessor", preprocessor),
421                            ("regressor", layer_regr),
422                        ]
423                    )
424
425                    pipe.fit(X_train, y_train)
426
427                    self.models_[name] = pipe
428                    y_pred = pipe.predict(X_test)
429                    r_squared = r2_score(y_test, y_pred)
430                    adj_rsquared = adjusted_rsquared(
431                        r_squared, X_test.shape[0], X_test.shape[1]
432                    )
433                    rmse = np.sqrt(np.mean((y_test - y_pred) ** 2))
434
435                    names.append(name)
436                    R2.append(r_squared)
437                    ADJR2.append(adj_rsquared)
438                    RMSE.append(rmse)
439                    TIME.append(time.time() - start)
440
441                    if self.custom_metric:
442                        custom_metric = self.custom_metric(y_test, y_pred)
443                        CUSTOM_METRIC.append(custom_metric)
444
445                    if self.verbose > 0:
446                        scores_verbose = {
447                            "Model": name,
448                            "R-Squared": r_squared,
449                            "Adjusted R-Squared": adj_rsquared,
450                            "RMSE": rmse,
451                            "Time taken": time.time() - start,
452                        }
453
454                        if self.custom_metric:
455                            scores_verbose[self.custom_metric.__name__] = custom_metric
456
457                        print(scores_verbose)
458                    if self.predictions:
459                        predictions[name] = y_pred
460                except Exception as exception:
461                    if self.ignore_warnings is False:
462                        print(name + " model failed to execute")
463                        print(exception)
464
465        else:  # no preprocessing
466
467            for name, model in tqdm(self.regressors):  # do parallel exec
468                start = time.time()
469                try:
470                    if "random_state" in model().get_params().keys():
471                        layer_regr = CustomRegressor(
472                            obj=model(random_state=self.random_state),
473                            n_hidden_features=self.n_hidden_features,
474                            activation_name=self.activation_name,
475                            a=self.a,
476                            nodes_sim=self.nodes_sim,
477                            bias=self.bias,
478                            dropout=self.dropout,
479                            direct_link=self.direct_link,
480                            n_clusters=self.n_clusters,
481                            cluster_encode=self.cluster_encode,
482                            type_clust=self.type_clust,
483                            type_scaling=self.type_scaling,
484                            col_sample=self.col_sample,
485                            row_sample=self.row_sample,
486                            seed=self.seed,
487                            backend=self.backend,
488                        )
489                    else:
490                        layer_regr = CustomRegressor(
491                            obj=model(),
492                            n_hidden_features=self.n_hidden_features,
493                            activation_name=self.activation_name,
494                            a=self.a,
495                            nodes_sim=self.nodes_sim,
496                            bias=self.bias,
497                            dropout=self.dropout,
498                            direct_link=self.direct_link,
499                            n_clusters=self.n_clusters,
500                            cluster_encode=self.cluster_encode,
501                            type_clust=self.type_clust,
502                            type_scaling=self.type_scaling,
503                            col_sample=self.col_sample,
504                            row_sample=self.row_sample,
505                            seed=self.seed,
506                            backend=self.backend,
507                        )
508
509                    layer_regr.fit(X_train, y_train)
510
511                    for _ in range(self.n_layers):
512                        layer_regr = deepcopy(
513                            CustomRegressor(
514                                obj=layer_regr,
515                                n_hidden_features=self.n_hidden_features,
516                                activation_name=self.activation_name,
517                                a=self.a,
518                                nodes_sim=self.nodes_sim,
519                                bias=self.bias,
520                                dropout=self.dropout,
521                                direct_link=self.direct_link,
522                                n_clusters=self.n_clusters,
523                                cluster_encode=self.cluster_encode,
524                                type_clust=self.type_clust,
525                                type_scaling=self.type_scaling,
526                                col_sample=self.col_sample,
527                                row_sample=self.row_sample,
528                                seed=self.seed,
529                                backend=self.backend,
530                            )
531                        )
532
533                        # layer_regr.fit(X_train, y_train)
534
535                    layer_regr.fit(X_train, y_train)
536
537                    self.models_[name] = layer_regr
538                    y_pred = layer_regr.predict(X_test)
539
540                    r_squared = r2_score(y_test, y_pred)
541                    adj_rsquared = adjusted_rsquared(
542                        r_squared, X_test.shape[0], X_test.shape[1]
543                    )
544                    rmse = np.sqrt(np.mean((y_test - y_pred) ** 2))
545
546                    names.append(name)
547                    R2.append(r_squared)
548                    ADJR2.append(adj_rsquared)
549                    RMSE.append(rmse)
550                    TIME.append(time.time() - start)
551
552                    if self.custom_metric:
553                        custom_metric = self.custom_metric(y_test, y_pred)
554                        CUSTOM_METRIC.append(custom_metric)
555
556                    if self.verbose > 0:
557                        scores_verbose = {
558                            "Model": name,
559                            "R-Squared": r_squared,
560                            "Adjusted R-Squared": adj_rsquared,
561                            "RMSE": rmse,
562                            "Time taken": time.time() - start,
563                        }
564
565                        if self.custom_metric:
566                            scores_verbose[self.custom_metric.__name__] = custom_metric
567
568                        print(scores_verbose)
569                    if self.predictions:
570                        predictions[name] = y_pred
571                except Exception as exception:
572                    if self.ignore_warnings is False:
573                        print(name + " model failed to execute")
574                        print(exception)
575
576        scores = {
577            "Model": names,
578            "Adjusted R-Squared": ADJR2,
579            "R-Squared": R2,
580            "RMSE": RMSE,
581            "Time Taken": TIME,
582        }
583
584        if self.custom_metric:
585            scores["Custom metric"] = CUSTOM_METRIC
586
587        scores = pd.DataFrame(scores)
588        scores = scores.sort_values(by=self.sort_by, ascending=True).set_index("Model")
589
590        self.best_model_ = self.models_[scores.index[0]]
591
592        if self.predictions is True:
593
594            return scores, predictions
595
596        return scores
597
598    def get_best_model(self):
599        """
600        This function returns the best model pipeline based on the sort_by metric.
601
602        Returns:
603
604            best_model: object,
605                Returns the best model pipeline based on the sort_by metric.
606
607        """
608        return self.best_model_
609
610    def provide_models(self, X_train, X_test, y_train, y_test):
611        """
612        This function returns all the model objects trained in fit function.
613        If fit is not called already, then we call fit and then return the models.
614
615        Parameters:
616
617            X_train : array-like,
618                Training vectors, where rows is the number of samples
619                and columns is the number of features.
620
621            X_test : array-like,
622                Testing vectors, where rows is the number of samples
623                and columns is the number of features.
624
625            y_train : array-like,
626                Training vectors, where rows is the number of samples
627                and columns is the number of features.
628
629            y_test : array-like,
630                Testing vectors, where rows is the number of samples
631                and columns is the number of features.
632
633        Returns:
634
635            models: dict-object,
636                Returns a dictionary with each model pipeline as value
637                with key as name of models.
638
639        """
640        if len(self.models_.keys()) == 0:
641            self.fit(X_train, X_test, y_train, y_test)
642
643        return self.models_

Fitting -- almost -- all the regression algorithms with layers of nnetsauce's CustomRegressor and returning their scores.

Parameters:

verbose: int, optional (default=0)
    Any positive number for verbosity.

ignore_warnings: bool, optional (default=True)
    When set to True, the warning related to algorigms that are not able to run are ignored.

custom_metric: function, optional (default=None)
    When function is provided, models are evaluated based on the custom evaluation metric provided.

predictions: bool, optional (default=False)
    When set to True, the predictions of all the models models are returned as dataframe.

sort_by: string, optional (default='RMSE')
    Sort models by a metric. Available options are 'R-Squared', 'Adjusted R-Squared', 'RMSE', 'Time Taken' and 'Custom Metric'.
    or a custom metric identified by its name and provided by custom_metric.

random_state: int, optional (default=42)
    Reproducibiility seed.

estimators: list, optional (default='all')
    list of Estimators names or just 'all' (default='all')

preprocess: bool
    preprocessing is done when set to True

n_jobs : int, when possible, run in parallel
    For now, only used by individual models that support it.

n_layers: int, optional (default=3)
    Number of layers of CustomRegressors to be used.

All the other parameters are the same as CustomRegressor's.

Attributes:

models_: dict-object
    Returns a dictionary with each model pipeline as value
    with key as name of models.

best_model_: object
    Returns the best model pipeline based on the sort_by metric.

Examples:

import nnetsauce as ns
import numpy as np
from sklearn import datasets
from sklearn.utils import shuffle

diabetes = datasets.load_diabetes()
X, y = shuffle(diabetes.data, diabetes.target, random_state=13)
X = X.astype(np.float32)

offset = int(X.shape[0] * 0.9)
X_train, y_train = X[:offset], y[:offset]
X_test, y_test = X[offset:], y[offset:]

reg = ns.LazyDeepRegressor(verbose=0, ignore_warnings=False, custom_metric=None)
models, predictions = reg.fit(X_train, X_test, y_train, y_test)
print(models)
def fit(self, X_train, X_test, y_train, y_test):
222    def fit(self, X_train, X_test, y_train, y_test):
223        """Fit Regression algorithms to X_train and y_train, predict and score on X_test, y_test.
224
225        Parameters:
226
227            X_train : array-like,
228                Training vectors, where rows is the number of samples
229                and columns is the number of features.
230
231            X_test : array-like,
232                Testing vectors, where rows is the number of samples
233                and columns is the number of features.
234
235            y_train : array-like,
236                Training vectors, where rows is the number of samples
237                and columns is the number of features.
238
239            y_test : array-like,
240                Testing vectors, where rows is the number of samples
241                and columns is the number of features.
242
243        Returns:
244        -------
245        scores:  Pandas DataFrame
246            Returns metrics of all the models in a Pandas DataFrame.
247
248        predictions : Pandas DataFrame
249            Returns predictions of all the models in a Pandas DataFrame.
250
251        """
252        R2 = []
253        ADJR2 = []
254        RMSE = []
255        # WIN = []
256        names = []
257        TIME = []
258        predictions = {}
259
260        if self.custom_metric:
261            CUSTOM_METRIC = []
262
263        if isinstance(X_train, np.ndarray):
264            X_train = pd.DataFrame(X_train)
265            X_test = pd.DataFrame(X_test)
266
267        numeric_features = X_train.select_dtypes(include=[np.number]).columns
268        categorical_features = X_train.select_dtypes(include=["object"]).columns
269
270        categorical_low, categorical_high = get_card_split(
271            X_train, categorical_features
272        )
273
274        if self.preprocess is True:
275            preprocessor = ColumnTransformer(
276                transformers=[
277                    ("numeric", numeric_transformer, numeric_features),
278                    (
279                        "categorical_low",
280                        categorical_transformer_low,
281                        categorical_low,
282                    ),
283                    (
284                        "categorical_high",
285                        categorical_transformer_high,
286                        categorical_high,
287                    ),
288                ]
289            )
290
291        # base models
292        try:
293            baseline_names = ["RandomForestRegressor", "XGBRegressor"]
294            baseline_models = [RandomForestRegressor(), xgb.XGBRegressor()]
295        except Exception as exception:
296            baseline_names = ["RandomForestRegressor"]
297            baseline_models = [RandomForestRegressor()]
298
299        for name, model in zip(baseline_names, baseline_models):
300            start = time.time()
301            try:
302                model.fit(X_train, y_train)
303                self.models_[name] = model
304                y_pred = model.predict(X_test)
305                r_squared = r2_score(y_test, y_pred)
306                adj_rsquared = adjusted_rsquared(
307                    r_squared, X_test.shape[0], X_test.shape[1]
308                )
309                rmse = np.sqrt(np.mean((y_test - y_pred) ** 2))
310
311                names.append(name)
312                R2.append(r_squared)
313                ADJR2.append(adj_rsquared)
314                RMSE.append(rmse)
315                TIME.append(time.time() - start)
316
317                if self.custom_metric:
318                    custom_metric = self.custom_metric(y_test, y_pred)
319                    CUSTOM_METRIC.append(custom_metric)
320
321                if self.verbose > 0:
322                    scores_verbose = {
323                        "Model": name,
324                        "R-Squared": r_squared,
325                        "Adjusted R-Squared": adj_rsquared,
326                        "RMSE": rmse,
327                        "Time taken": time.time() - start,
328                    }
329
330                    if self.custom_metric:
331                        scores_verbose[self.custom_metric.__name__] = custom_metric
332
333                    print(scores_verbose)
334                if self.predictions:
335                    predictions[name] = y_pred
336            except Exception as exception:
337                if self.ignore_warnings is False:
338                    print(name + " model failed to execute")
339                    print(exception)
340
341        if self.estimators == "all":
342            self.regressors = DEEPREGRESSORS
343        else:
344            self.regressors = [
345                ("DeepCustomRegressor(" + est[0] + ")", est[1])
346                for est in all_estimators()
347                if (issubclass(est[1], RegressorMixin) and (est[0] in self.estimators))
348            ]
349
350        if self.preprocess is True:
351
352            for name, model in tqdm(self.regressors):  # do parallel exec
353                start = time.time()
354                try:
355                    if "random_state" in model().get_params().keys():
356                        layer_regr = CustomRegressor(
357                            obj=model(random_state=self.random_state),
358                            n_hidden_features=self.n_hidden_features,
359                            activation_name=self.activation_name,
360                            a=self.a,
361                            nodes_sim=self.nodes_sim,
362                            bias=self.bias,
363                            dropout=self.dropout,
364                            direct_link=self.direct_link,
365                            n_clusters=self.n_clusters,
366                            cluster_encode=self.cluster_encode,
367                            type_clust=self.type_clust,
368                            type_scaling=self.type_scaling,
369                            col_sample=self.col_sample,
370                            row_sample=self.row_sample,
371                            seed=self.seed,
372                            backend=self.backend,
373                        )
374                    else:
375                        layer_regr = CustomRegressor(
376                            obj=model(),
377                            n_hidden_features=self.n_hidden_features,
378                            activation_name=self.activation_name,
379                            a=self.a,
380                            nodes_sim=self.nodes_sim,
381                            bias=self.bias,
382                            dropout=self.dropout,
383                            direct_link=self.direct_link,
384                            n_clusters=self.n_clusters,
385                            cluster_encode=self.cluster_encode,
386                            type_clust=self.type_clust,
387                            type_scaling=self.type_scaling,
388                            col_sample=self.col_sample,
389                            row_sample=self.row_sample,
390                            seed=self.seed,
391                            backend=self.backend,
392                        )
393
394                    for _ in range(self.n_layers):
395                        layer_regr = deepcopy(
396                            CustomRegressor(
397                                obj=layer_regr,
398                                n_hidden_features=self.n_hidden_features,
399                                activation_name=self.activation_name,
400                                a=self.a,
401                                nodes_sim=self.nodes_sim,
402                                bias=self.bias,
403                                dropout=self.dropout,
404                                direct_link=self.direct_link,
405                                n_clusters=self.n_clusters,
406                                cluster_encode=self.cluster_encode,
407                                type_clust=self.type_clust,
408                                type_scaling=self.type_scaling,
409                                col_sample=self.col_sample,
410                                row_sample=self.row_sample,
411                                seed=self.seed,
412                                backend=self.backend,
413                            )
414                        )
415
416                    layer_regr.fit(X_train, y_train)
417
418                    pipe = Pipeline(
419                        steps=[
420                            ("preprocessor", preprocessor),
421                            ("regressor", layer_regr),
422                        ]
423                    )
424
425                    pipe.fit(X_train, y_train)
426
427                    self.models_[name] = pipe
428                    y_pred = pipe.predict(X_test)
429                    r_squared = r2_score(y_test, y_pred)
430                    adj_rsquared = adjusted_rsquared(
431                        r_squared, X_test.shape[0], X_test.shape[1]
432                    )
433                    rmse = np.sqrt(np.mean((y_test - y_pred) ** 2))
434
435                    names.append(name)
436                    R2.append(r_squared)
437                    ADJR2.append(adj_rsquared)
438                    RMSE.append(rmse)
439                    TIME.append(time.time() - start)
440
441                    if self.custom_metric:
442                        custom_metric = self.custom_metric(y_test, y_pred)
443                        CUSTOM_METRIC.append(custom_metric)
444
445                    if self.verbose > 0:
446                        scores_verbose = {
447                            "Model": name,
448                            "R-Squared": r_squared,
449                            "Adjusted R-Squared": adj_rsquared,
450                            "RMSE": rmse,
451                            "Time taken": time.time() - start,
452                        }
453
454                        if self.custom_metric:
455                            scores_verbose[self.custom_metric.__name__] = custom_metric
456
457                        print(scores_verbose)
458                    if self.predictions:
459                        predictions[name] = y_pred
460                except Exception as exception:
461                    if self.ignore_warnings is False:
462                        print(name + " model failed to execute")
463                        print(exception)
464
465        else:  # no preprocessing
466
467            for name, model in tqdm(self.regressors):  # do parallel exec
468                start = time.time()
469                try:
470                    if "random_state" in model().get_params().keys():
471                        layer_regr = CustomRegressor(
472                            obj=model(random_state=self.random_state),
473                            n_hidden_features=self.n_hidden_features,
474                            activation_name=self.activation_name,
475                            a=self.a,
476                            nodes_sim=self.nodes_sim,
477                            bias=self.bias,
478                            dropout=self.dropout,
479                            direct_link=self.direct_link,
480                            n_clusters=self.n_clusters,
481                            cluster_encode=self.cluster_encode,
482                            type_clust=self.type_clust,
483                            type_scaling=self.type_scaling,
484                            col_sample=self.col_sample,
485                            row_sample=self.row_sample,
486                            seed=self.seed,
487                            backend=self.backend,
488                        )
489                    else:
490                        layer_regr = CustomRegressor(
491                            obj=model(),
492                            n_hidden_features=self.n_hidden_features,
493                            activation_name=self.activation_name,
494                            a=self.a,
495                            nodes_sim=self.nodes_sim,
496                            bias=self.bias,
497                            dropout=self.dropout,
498                            direct_link=self.direct_link,
499                            n_clusters=self.n_clusters,
500                            cluster_encode=self.cluster_encode,
501                            type_clust=self.type_clust,
502                            type_scaling=self.type_scaling,
503                            col_sample=self.col_sample,
504                            row_sample=self.row_sample,
505                            seed=self.seed,
506                            backend=self.backend,
507                        )
508
509                    layer_regr.fit(X_train, y_train)
510
511                    for _ in range(self.n_layers):
512                        layer_regr = deepcopy(
513                            CustomRegressor(
514                                obj=layer_regr,
515                                n_hidden_features=self.n_hidden_features,
516                                activation_name=self.activation_name,
517                                a=self.a,
518                                nodes_sim=self.nodes_sim,
519                                bias=self.bias,
520                                dropout=self.dropout,
521                                direct_link=self.direct_link,
522                                n_clusters=self.n_clusters,
523                                cluster_encode=self.cluster_encode,
524                                type_clust=self.type_clust,
525                                type_scaling=self.type_scaling,
526                                col_sample=self.col_sample,
527                                row_sample=self.row_sample,
528                                seed=self.seed,
529                                backend=self.backend,
530                            )
531                        )
532
533                        # layer_regr.fit(X_train, y_train)
534
535                    layer_regr.fit(X_train, y_train)
536
537                    self.models_[name] = layer_regr
538                    y_pred = layer_regr.predict(X_test)
539
540                    r_squared = r2_score(y_test, y_pred)
541                    adj_rsquared = adjusted_rsquared(
542                        r_squared, X_test.shape[0], X_test.shape[1]
543                    )
544                    rmse = np.sqrt(np.mean((y_test - y_pred) ** 2))
545
546                    names.append(name)
547                    R2.append(r_squared)
548                    ADJR2.append(adj_rsquared)
549                    RMSE.append(rmse)
550                    TIME.append(time.time() - start)
551
552                    if self.custom_metric:
553                        custom_metric = self.custom_metric(y_test, y_pred)
554                        CUSTOM_METRIC.append(custom_metric)
555
556                    if self.verbose > 0:
557                        scores_verbose = {
558                            "Model": name,
559                            "R-Squared": r_squared,
560                            "Adjusted R-Squared": adj_rsquared,
561                            "RMSE": rmse,
562                            "Time taken": time.time() - start,
563                        }
564
565                        if self.custom_metric:
566                            scores_verbose[self.custom_metric.__name__] = custom_metric
567
568                        print(scores_verbose)
569                    if self.predictions:
570                        predictions[name] = y_pred
571                except Exception as exception:
572                    if self.ignore_warnings is False:
573                        print(name + " model failed to execute")
574                        print(exception)
575
576        scores = {
577            "Model": names,
578            "Adjusted R-Squared": ADJR2,
579            "R-Squared": R2,
580            "RMSE": RMSE,
581            "Time Taken": TIME,
582        }
583
584        if self.custom_metric:
585            scores["Custom metric"] = CUSTOM_METRIC
586
587        scores = pd.DataFrame(scores)
588        scores = scores.sort_values(by=self.sort_by, ascending=True).set_index("Model")
589
590        self.best_model_ = self.models_[scores.index[0]]
591
592        if self.predictions is True:
593
594            return scores, predictions
595
596        return scores

Fit Regression algorithms to X_train and y_train, predict and score on X_test, y_test.

Parameters:

X_train : array-like,
    Training vectors, where rows is the number of samples
    and columns is the number of features.

X_test : array-like,
    Testing vectors, where rows is the number of samples
    and columns is the number of features.

y_train : array-like,
    Training vectors, where rows is the number of samples
    and columns is the number of features.

y_test : array-like,
    Testing vectors, where rows is the number of samples
    and columns is the number of features.

Returns:

scores: Pandas DataFrame Returns metrics of all the models in a Pandas DataFrame.

predictions : Pandas DataFrame Returns predictions of all the models in a Pandas DataFrame.

def provide_models(self, X_train, X_test, y_train, y_test):
610    def provide_models(self, X_train, X_test, y_train, y_test):
611        """
612        This function returns all the model objects trained in fit function.
613        If fit is not called already, then we call fit and then return the models.
614
615        Parameters:
616
617            X_train : array-like,
618                Training vectors, where rows is the number of samples
619                and columns is the number of features.
620
621            X_test : array-like,
622                Testing vectors, where rows is the number of samples
623                and columns is the number of features.
624
625            y_train : array-like,
626                Training vectors, where rows is the number of samples
627                and columns is the number of features.
628
629            y_test : array-like,
630                Testing vectors, where rows is the number of samples
631                and columns is the number of features.
632
633        Returns:
634
635            models: dict-object,
636                Returns a dictionary with each model pipeline as value
637                with key as name of models.
638
639        """
640        if len(self.models_.keys()) == 0:
641            self.fit(X_train, X_test, y_train, y_test)
642
643        return self.models_

This function returns all the model objects trained in fit function. If fit is not called already, then we call fit and then return the models.

Parameters:

X_train : array-like,
    Training vectors, where rows is the number of samples
    and columns is the number of features.

X_test : array-like,
    Testing vectors, where rows is the number of samples
    and columns is the number of features.

y_train : array-like,
    Training vectors, where rows is the number of samples
    and columns is the number of features.

y_test : array-like,
    Testing vectors, where rows is the number of samples
    and columns is the number of features.

Returns:

models: dict-object,
    Returns a dictionary with each model pipeline as value
    with key as name of models.
class LazyMTS(nnetsauce.LazyDeepMTS):
 988class LazyMTS(LazyDeepMTS):
 989    """
 990    Fitting -- almost -- all the regression algorithms to multivariate time series
 991    and returning their scores (no layers).
 992
 993    Parameters:
 994
 995        verbose: int, optional (default=0)
 996            Any positive number for verbosity.
 997
 998        ignore_warnings: bool, optional (default=True)
 999            When set to True, the warning related to algorigms that are not
1000            able to run are ignored.
1001
1002        custom_metric: function, optional (default=None)
1003            When function is provided, models are evaluated based on the custom
1004              evaluation metric provided.
1005
1006        predictions: bool, optional (default=False)
1007            When set to True, the predictions of all the models models are returned as dataframe.
1008
1009        sort_by: string, optional (default='RMSE')
1010            Sort models by a metric. Available options are 'RMSE', 'MAE', 'MPL', 'MPE', 'MAPE',
1011            'R-Squared', 'Adjusted R-Squared' or a custom metric identified by its name and
1012            provided by custom_metric.
1013
1014        random_state: int, optional (default=42)
1015            Reproducibiility seed.
1016
1017        estimators: list, optional (default='all')
1018            list of Estimators (regression algorithms) names or just 'all' (default='all')
1019
1020        preprocess: bool, preprocessing is done when set to True
1021
1022        h: int, optional (default=None)
1023            Number of steps ahead to predict (when used, must be > 0 and < X_test.shape[0]).
1024
1025        All the other parameters are the same as MTS's.
1026
1027    Attributes:
1028
1029        models_: dict-object
1030            Returns a dictionary with each model pipeline as value
1031            with key as name of models.
1032
1033        best_model_: object
1034            Returns the best model pipeline based on the sort_by metric.
1035
1036    Examples:
1037
1038        See https://thierrymoudiki.github.io/blog/2023/10/29/python/quasirandomizednn/MTS-LazyPredict
1039
1040    """
1041
1042    def __init__(
1043        self,
1044        verbose=0,
1045        ignore_warnings=True,
1046        custom_metric=None,
1047        predictions=False,
1048        sort_by=None,  # leave it as is
1049        random_state=42,
1050        estimators="all",
1051        preprocess=False,
1052        h=None,
1053        # MTS attributes
1054        obj=None,
1055        n_hidden_features=5,
1056        activation_name="relu",
1057        a=0.01,
1058        nodes_sim="sobol",
1059        bias=True,
1060        dropout=0,
1061        direct_link=True,
1062        n_clusters=2,
1063        cluster_encode=True,
1064        type_clust="kmeans",
1065        type_scaling=("std", "std", "std"),
1066        lags=15,
1067        type_pi="scp2-kde",
1068        block_size=None,
1069        replications=None,
1070        kernel=None,
1071        agg="mean",
1072        seed=123,
1073        backend="cpu",
1074        show_progress=False,
1075    ):
1076        super().__init__(
1077            verbose=verbose,
1078            ignore_warnings=ignore_warnings,
1079            custom_metric=custom_metric,
1080            predictions=predictions,
1081            sort_by=sort_by,
1082            random_state=random_state,
1083            estimators=estimators,
1084            preprocess=preprocess,
1085            n_layers=1,
1086            h=h,
1087            obj=obj,
1088            n_hidden_features=n_hidden_features,
1089            activation_name=activation_name,
1090            a=a,
1091            nodes_sim=nodes_sim,
1092            bias=bias,
1093            dropout=dropout,
1094            direct_link=direct_link,
1095            n_clusters=n_clusters,
1096            cluster_encode=cluster_encode,
1097            type_clust=type_clust,
1098            type_scaling=type_scaling,
1099            lags=lags,
1100            type_pi=type_pi,
1101            block_size=block_size,
1102            replications=replications,
1103            kernel=kernel,
1104            agg=agg,
1105            seed=seed,
1106            backend=backend,
1107            show_progress=show_progress,
1108        )

Fitting -- almost -- all the regression algorithms to multivariate time series and returning their scores (no layers).

Parameters:

verbose: int, optional (default=0)
    Any positive number for verbosity.

ignore_warnings: bool, optional (default=True)
    When set to True, the warning related to algorigms that are not
    able to run are ignored.

custom_metric: function, optional (default=None)
    When function is provided, models are evaluated based on the custom
      evaluation metric provided.

predictions: bool, optional (default=False)
    When set to True, the predictions of all the models models are returned as dataframe.

sort_by: string, optional (default='RMSE')
    Sort models by a metric. Available options are 'RMSE', 'MAE', 'MPL', 'MPE', 'MAPE',
    'R-Squared', 'Adjusted R-Squared' or a custom metric identified by its name and
    provided by custom_metric.

random_state: int, optional (default=42)
    Reproducibiility seed.

estimators: list, optional (default='all')
    list of Estimators (regression algorithms) names or just 'all' (default='all')

preprocess: bool, preprocessing is done when set to True

h: int, optional (default=None)
    Number of steps ahead to predict (when used, must be > 0 and < X_test.shape[0]).

All the other parameters are the same as MTS's.

Attributes:

models_: dict-object
    Returns a dictionary with each model pipeline as value
    with key as name of models.

best_model_: object
    Returns the best model pipeline based on the sort_by metric.

Examples:

See https://thierrymoudiki.github.io/blog/2023/10/29/python/quasirandomizednn/MTS-LazyPredict
class LazyDeepMTS(nnetsauce.MTS):
104class LazyDeepMTS(MTS):
105    """
106
107    Fitting -- almost -- all the regression algorithms with layers of
108    nnetsauce's CustomRegressor to multivariate time series
109    and returning their scores.
110
111    Parameters:
112
113        verbose: int, optional (default=0)
114            Any positive number for verbosity.
115
116        ignore_warnings: bool, optional (default=True)
117            When set to True, the warning related to algorigms that are not
118            able to run are ignored.
119
120        custom_metric: function, optional (default=None)
121            When function is provided, models are evaluated based on the custom
122              evaluation metric provided.
123
124        predictions: bool, optional (default=False)
125            When set to True, the predictions of all the models models are returned as dataframe.
126
127        sort_by: string, optional (default='RMSE')
128            Sort models by a metric. Available options are 'RMSE', 'MAE', 'MPL', 'MPE', 'MAPE',
129            'R-Squared', 'Adjusted R-Squared' or a custom metric identified by its name and
130            provided by custom_metric.
131
132        random_state: int, optional (default=42)
133            Reproducibiility seed.
134
135        estimators: list, optional (default='all')
136            list of Estimators (regression algorithms) names or just 'all' (default='all')
137
138        preprocess: bool, preprocessing is done when set to True
139
140        n_layers: int, optional (default=1)
141            Number of layers in the network. When set to 1, the model is equivalent to a MTS.
142
143        h: int, optional (default=None)
144            Number of steps ahead to predict (when used, must be > 0 and < X_test.shape[0]).
145
146        All the other parameters are the same as MTS's.
147
148    Attributes:
149
150        models_: dict-object
151            Returns a dictionary with each model pipeline as value
152            with key as name of models.
153
154        best_model_: object
155            Returns the best model pipeline based on the sort_by metric.
156
157    Examples:
158
159        See https://thierrymoudiki.github.io/blog/2023/10/29/python/quasirandomizednn/MTS-LazyPredict
160
161    """
162
163    def __init__(
164        self,
165        verbose=0,
166        ignore_warnings=True,
167        custom_metric=None,
168        predictions=False,
169        sort_by=None,  # leave it as is
170        random_state=42,
171        estimators="all",
172        preprocess=False,
173        n_layers=1,
174        h=None,
175        # MTS attributes
176        obj=None,
177        n_hidden_features=5,
178        activation_name="relu",
179        a=0.01,
180        nodes_sim="sobol",
181        bias=True,
182        dropout=0,
183        direct_link=True,
184        n_clusters=2,
185        cluster_encode=True,
186        type_clust="kmeans",
187        type_scaling=("std", "std", "std"),
188        lags=15,
189        type_pi="scp2-kde",
190        block_size=None,
191        replications=None,
192        kernel=None,
193        agg="mean",
194        seed=123,
195        backend="cpu",
196        show_progress=False,
197    ):
198        self.verbose = verbose
199        self.ignore_warnings = ignore_warnings
200        self.custom_metric = custom_metric
201        self.predictions = predictions
202        self.sort_by = sort_by
203        self.models_ = {}
204        self.best_model_ = None
205        self.random_state = random_state
206        self.estimators = estimators
207        self.preprocess = preprocess
208        self.n_layers = n_layers
209        self.h = h
210        super().__init__(
211            obj=obj,
212            n_hidden_features=n_hidden_features,
213            activation_name=activation_name,
214            a=a,
215            nodes_sim=nodes_sim,
216            bias=bias,
217            dropout=dropout,
218            direct_link=direct_link,
219            n_clusters=n_clusters,
220            cluster_encode=cluster_encode,
221            type_clust=type_clust,
222            type_scaling=type_scaling,
223            seed=seed,
224            backend=backend,
225            lags=lags,
226            type_pi=type_pi,
227            block_size=block_size,
228            replications=replications,
229            kernel=kernel,
230            agg=agg,
231            verbose=verbose,
232            show_progress=show_progress,
233        )
234        if self.replications is not None or self.type_pi == "gaussian":
235            if self.sort_by is None:
236                self.sort_by = "WINKLERSCORE"
237        else:
238            if self.sort_by is None:
239                self.sort_by = "RMSE"
240
241    def fit(self, X_train, X_test, xreg=None, per_series=False, **kwargs):
242        """Fit Regression algorithms to X_train, predict and score on X_test.
243
244        Parameters:
245
246            X_train: array-like or data frame,
247                Training vectors, where rows is the number of samples
248                and columns is the number of features.
249
250            X_test: array-like or data frame,
251                Testing vectors, where rows is the number of samples
252                and columns is the number of features.
253
254            xreg: array-like, optional (default=None)
255                Additional (external) regressors to be passed to self.obj
256                xreg must be in 'increasing' order (most recent observations last)
257
258            per_series: bool, optional (default=False)
259                When set to True, the metrics are computed series by series.
260
261            **kwargs: dict, optional (default=None)
262                Additional parameters to be passed to `fit` method of `obj`.
263
264        Returns:
265
266            scores: Pandas DataFrame
267                Returns metrics of all the models in a Pandas DataFrame.
268
269            predictions: Pandas DataFrame
270                Returns predictions of all the models in a Pandas DataFrame.
271
272        """
273        R2 = []
274        ADJR2 = []
275        ME = []
276        MPL = []
277        RMSE = []
278        MAE = []
279        MPE = []
280        MAPE = []
281        WINKLERSCORE = []
282        COVERAGE = []
283
284        # WIN = []
285        names = []
286        TIME = []
287        predictions = {}
288
289        if self.custom_metric is not None:
290            CUSTOM_METRIC = []
291
292        if self.h is None:
293            assert X_test is not None, "If h is None, X_test must be provided."
294
295        if isinstance(X_train, np.ndarray):
296            X_train = pd.DataFrame(X_train)
297            X_test = pd.DataFrame(X_test)
298
299        self.series_names = X_train.columns.tolist()
300
301        X_train = convert_df_to_numeric(X_train)
302        X_test = convert_df_to_numeric(X_test)
303
304        numeric_features = X_train.select_dtypes(include=[np.number]).columns
305        categorical_features = X_train.select_dtypes(include=["object"]).columns
306
307        categorical_low, categorical_high = get_card_split(
308            X_train, categorical_features
309        )
310
311        if self.preprocess:
312            preprocessor = ColumnTransformer(
313                transformers=[
314                    ("numeric", numeric_transformer, numeric_features),
315                    (
316                        "categorical_low",
317                        categorical_transformer_low,
318                        categorical_low,
319                    ),
320                    (
321                        "categorical_high",
322                        categorical_transformer_high,
323                        categorical_high,
324                    ),
325                ]
326            )
327
328        # baselines (Classical MTS) ----
329        for i, name in enumerate(["ARIMA", "ETS", "Theta", "VAR", "VECM"]):
330            try:
331                start = time.time()
332                regr = ClassicalMTS(model=name)
333                regr.fit(X_train, **kwargs)
334                self.models_[name] = regr
335                if self.h is None:
336                    X_pred = regr.predict(h=X_test.shape[0], **kwargs)
337                else:
338                    assert self.h > 0, "h must be > 0"
339                    X_pred = regr.predict(h=self.h, **kwargs)
340                    try:
341                        X_test = X_test[0 : self.h, :]
342                    except Exception as e:
343                        X_test = X_test.iloc[0 : self.h, :]
344
345                rmse = mean_errors(
346                    actual=X_test,
347                    pred=X_pred,
348                    scoring="root_mean_squared_error",
349                    per_series=per_series,
350                )
351                mae = mean_errors(
352                    actual=X_test,
353                    pred=X_pred,
354                    scoring="mean_absolute_error",
355                    per_series=per_series,
356                )
357                mpl = mean_errors(
358                    actual=X_test,
359                    pred=X_pred,
360                    scoring="mean_pinball_loss",
361                    per_series=per_series,
362                )
363            except Exception:
364
365                continue
366
367            names.append(name)
368            RMSE.append(rmse)
369            MAE.append(mae)
370            MPL.append(mpl)
371
372            if self.custom_metric is not None:
373                try:
374                    if self.h is None:
375                        custom_metric = self.custom_metric(X_test, X_pred)
376                    else:
377                        custom_metric = self.custom_metric(X_test_h, X_pred)
378                    CUSTOM_METRIC.append(custom_metric)
379                except Exception as e:
380                    custom_metric = np.iinfo(np.float32).max
381                    CUSTOM_METRIC.append(np.iinfo(np.float32).max)
382
383            if (self.replications is not None) or (self.type_pi == "gaussian"):
384                if per_series == False:
385                    winklerscore = winkler_score(obj=X_pred, actual=X_test, level=95)
386                    coveragecalc = coverage(X_pred, X_test, level=95)
387                else:
388                    winklerscore = winkler_score(
389                        obj=X_pred, actual=X_test, level=95, per_series=True
390                    )
391                    coveragecalc = coverage(X_pred, X_test, level=95, per_series=True)
392                WINKLERSCORE.append(winklerscore)
393                COVERAGE.append(coveragecalc)
394            TIME.append(time.time() - start)
395
396        if self.estimators == "all":
397            if self.n_layers <= 1:
398                self.regressors = REGRESSORSMTS
399            else:
400                self.regressors = DEEPREGRESSORSMTS
401        else:
402            if self.n_layers <= 1:
403                self.regressors = [
404                    ("MTS(" + est[0] + ")", est[1])
405                    for est in all_estimators()
406                    if (
407                        issubclass(est[1], RegressorMixin)
408                        and (est[0] in self.estimators)
409                    )
410                ]
411            else:  # self.n_layers > 1
412                self.regressors = [
413                    ("DeepMTS(" + est[0] + ")", est[1])
414                    for est in all_estimators()
415                    if (
416                        issubclass(est[1], RegressorMixin)
417                        and (est[0] in self.estimators)
418                    )
419                ]
420
421        if self.preprocess is True:
422            for name, model in tqdm(self.regressors):  # do parallel exec
423                start = time.time()
424                try:
425                    if "random_state" in model().get_params().keys():
426                        pipe = Pipeline(
427                            steps=[
428                                ("preprocessor", preprocessor),
429                                (
430                                    "regressor",
431                                    DeepMTS(
432                                        obj=model(
433                                            random_state=self.random_state,
434                                            **kwargs,
435                                        ),
436                                        n_layers=self.n_layers,
437                                        n_hidden_features=self.n_hidden_features,
438                                        activation_name=self.activation_name,
439                                        a=self.a,
440                                        nodes_sim=self.nodes_sim,
441                                        bias=self.bias,
442                                        dropout=self.dropout,
443                                        direct_link=self.direct_link,
444                                        n_clusters=self.n_clusters,
445                                        cluster_encode=self.cluster_encode,
446                                        type_clust=self.type_clust,
447                                        type_scaling=self.type_scaling,
448                                        lags=self.lags,
449                                        type_pi=self.type_pi,
450                                        block_size=self.block_size,
451                                        replications=self.replications,
452                                        kernel=self.kernel,
453                                        agg=self.agg,
454                                        seed=self.seed,
455                                        backend=self.backend,
456                                        show_progress=self.show_progress,
457                                    ),
458                                ),
459                            ]
460                        )
461                    else:  # "random_state" in model().get_params().keys()
462                        pipe = Pipeline(
463                            steps=[
464                                ("preprocessor", preprocessor),
465                                (
466                                    "regressor",
467                                    DeepMTS(
468                                        obj=model(**kwargs),
469                                        n_layers=self.n_layers,
470                                        n_hidden_features=self.n_hidden_features,
471                                        activation_name=self.activation_name,
472                                        a=self.a,
473                                        nodes_sim=self.nodes_sim,
474                                        bias=self.bias,
475                                        dropout=self.dropout,
476                                        direct_link=self.direct_link,
477                                        n_clusters=self.n_clusters,
478                                        cluster_encode=self.cluster_encode,
479                                        type_clust=self.type_clust,
480                                        type_scaling=self.type_scaling,
481                                        lags=self.lags,
482                                        type_pi=self.type_pi,
483                                        block_size=self.block_size,
484                                        replications=self.replications,
485                                        kernel=self.kernel,
486                                        agg=self.agg,
487                                        seed=self.seed,
488                                        backend=self.backend,
489                                        show_progress=self.show_progress,
490                                    ),
491                                ),
492                            ]
493                        )
494
495                    pipe.fit(X_train, **kwargs)
496                    # pipe.fit(X_train, xreg=xreg)
497
498                    self.models_[name] = pipe
499
500                    if self.h is None:
501                        X_pred = pipe["regressor"].predict(h=self.h, **kwargs)
502                    else:
503                        assert self.h > 0, "h must be > 0"
504                        X_pred = pipe["regressor"].predict(h=self.h, **kwargs)
505
506                    if (self.replications is not None) or (self.type_pi == "gaussian"):
507                        rmse = mean_errors(
508                            actual=X_test,
509                            pred=X_pred,
510                            scoring="root_mean_squared_error",
511                            per_series=per_series,
512                        )
513                        mae = mean_errors(
514                            actual=X_test,
515                            pred=X_pred,
516                            scoring="mean_absolute_error",
517                            per_series=per_series,
518                        )
519                        mpl = mean_errors(
520                            actual=X_test,
521                            pred=X_pred,
522                            scoring="mean_pinball_loss",
523                            per_series=per_series,
524                        )
525                        winklerscore = winkler_score(
526                            obj=X_pred,
527                            actual=X_test,
528                            level=95,
529                            per_series=per_series,
530                        )
531                        coveragecalc = coverage(
532                            X_pred, X_test, level=95, per_series=per_series
533                        )
534                    else:
535                        rmse = mean_errors(
536                            actual=X_test,
537                            pred=X_pred,
538                            scoring="root_mean_squared_error",
539                            per_series=per_series,
540                        )
541                        mae = mean_errors(
542                            actual=X_test,
543                            pred=X_pred,
544                            scoring="mean_absolute_error",
545                            per_series=per_series,
546                        )
547                        mpl = mean_errors(
548                            actual=X_test,
549                            pred=X_pred,
550                            scoring="mean_pinball_loss",
551                            per_series=per_series,
552                        )
553
554                    names.append(name)
555                    RMSE.append(rmse)
556                    MAE.append(mae)
557                    MPL.append(mpl)
558
559                    if (self.replications is not None) or (self.type_pi == "gaussian"):
560                        WINKLERSCORE.append(winklerscore)
561                        COVERAGE.append(coveragecalc)
562                    TIME.append(time.time() - start)
563
564                    if self.custom_metric is not None:
565                        try:
566                            custom_metric = self.custom_metric(X_test, X_pred)
567                            CUSTOM_METRIC.append(custom_metric)
568                        except Exception as e:
569                            custom_metric = np.iinfo(np.float32).max
570                            CUSTOM_METRIC.append(custom_metric)
571
572                    if self.verbose > 0:
573                        if (self.replications is not None) or (
574                            self.type_pi == "gaussian"
575                        ):
576                            scores_verbose = {
577                                "Model": name,
578                                "RMSE": rmse,
579                                "MAE": mae,
580                                "MPL": mpl,
581                                "WINKLERSCORE": winklerscore,
582                                "COVERAGE": coveragecalc,
583                                "Time taken": time.time() - start,
584                            }
585                        else:
586                            scores_verbose = {
587                                "Model": name,
588                                "RMSE": rmse,
589                                "MAE": mae,
590                                "MPL": mpl,
591                                "Time taken": time.time() - start,
592                            }
593
594                        if self.custom_metric is not None:
595                            scores_verbose["Custom metric"] = custom_metric
596
597                    if self.predictions:
598                        predictions[name] = X_pred
599                except Exception as exception:
600                    if self.ignore_warnings is False:
601                        print(name + " model failed to execute")
602                        print(exception)
603
604        else:  # no preprocessing
605
606            for name, model in tqdm(self.regressors):  # do parallel exec
607                start = time.time()
608                try:
609                    if "random_state" in model().get_params().keys():
610                        pipe = DeepMTS(
611                            obj=model(random_state=self.random_state, **kwargs),
612                            n_layers=self.n_layers,
613                            n_hidden_features=self.n_hidden_features,
614                            activation_name=self.activation_name,
615                            a=self.a,
616                            nodes_sim=self.nodes_sim,
617                            bias=self.bias,
618                            dropout=self.dropout,
619                            direct_link=self.direct_link,
620                            n_clusters=self.n_clusters,
621                            cluster_encode=self.cluster_encode,
622                            type_clust=self.type_clust,
623                            type_scaling=self.type_scaling,
624                            lags=self.lags,
625                            type_pi=self.type_pi,
626                            block_size=self.block_size,
627                            replications=self.replications,
628                            kernel=self.kernel,
629                            agg=self.agg,
630                            seed=self.seed,
631                            backend=self.backend,
632                            show_progress=self.show_progress,
633                        )
634                    else:
635                        pipe = DeepMTS(
636                            obj=model(**kwargs),
637                            n_layers=self.n_layers,
638                            n_hidden_features=self.n_hidden_features,
639                            activation_name=self.activation_name,
640                            a=self.a,
641                            nodes_sim=self.nodes_sim,
642                            bias=self.bias,
643                            dropout=self.dropout,
644                            direct_link=self.direct_link,
645                            n_clusters=self.n_clusters,
646                            cluster_encode=self.cluster_encode,
647                            type_clust=self.type_clust,
648                            type_scaling=self.type_scaling,
649                            lags=self.lags,
650                            type_pi=self.type_pi,
651                            block_size=self.block_size,
652                            replications=self.replications,
653                            kernel=self.kernel,
654                            agg=self.agg,
655                            seed=self.seed,
656                            backend=self.backend,
657                            show_progress=self.show_progress,
658                        )
659
660                    pipe.fit(X_train, xreg, **kwargs)
661                    # pipe.fit(X_train, xreg=xreg) # DO xreg like in `ahead`
662
663                    self.models_[name] = pipe
664
665                    if self.preprocess is True:
666                        if self.h is None:
667                            X_pred = pipe["regressor"].predict(
668                                h=X_test.shape[0], **kwargs
669                            )
670                        else:
671                            assert (
672                                self.h > 0 and self.h <= X_test.shape[0]
673                            ), "h must be > 0 and < X_test.shape[0]"
674                            X_pred = pipe["regressor"].predict(h=self.h, **kwargs)
675
676                    else:
677
678                        if self.h is None:
679                            X_pred = pipe.predict(
680                                h=X_test.shape[0],
681                                **kwargs,
682                                # X_pred = pipe.predict(h=X_test.shape[0], new_xreg=new_xreg) ## DO xreg like in `ahead`
683                            )
684                        else:
685                            assert (
686                                self.h > 0 and self.h <= X_test.shape[0]
687                            ), "h must be > 0 and < X_test.shape[0]"
688                            X_pred = pipe.predict(h=self.h, **kwargs)
689
690                    if self.h is None:
691                        if (self.replications is not None) or (
692                            self.type_pi == "gaussian"
693                        ):
694                            rmse = mean_errors(
695                                actual=X_test,
696                                pred=X_pred.mean,
697                                scoring="root_mean_squared_error",
698                                per_series=per_series,
699                            )
700                            mae = mean_errors(
701                                actual=X_test,
702                                pred=X_pred.mean,
703                                scoring="mean_absolute_error",
704                                per_series=per_series,
705                            )
706                            mpl = mean_errors(
707                                actual=X_test,
708                                pred=X_pred.mean,
709                                scoring="mean_pinball_loss",
710                                per_series=per_series,
711                            )
712                            winklerscore = winkler_score(
713                                obj=X_pred,
714                                actual=X_test,
715                                level=95,
716                                per_series=per_series,
717                            )
718                            coveragecalc = coverage(
719                                X_pred, X_test, level=95, per_series=per_series
720                            )
721                        else:  # no prediction interval
722                            rmse = mean_errors(
723                                actual=X_test,
724                                pred=X_pred,
725                                scoring="root_mean_squared_error",
726                                per_series=per_series,
727                            )
728                            mae = mean_errors(
729                                actual=X_test,
730                                pred=X_pred,
731                                scoring="mean_absolute_error",
732                                per_series=per_series,
733                            )
734                            mpl = mean_errors(
735                                actual=X_test,
736                                pred=X_pred,
737                                scoring="mean_pinball_loss",
738                                per_series=per_series,
739                            )
740                    else:  # self.h is not None
741                        if (self.replications is not None) or (
742                            self.type_pi == "gaussian"
743                        ):
744
745                            if isinstance(X_test, pd.DataFrame):
746                                X_test_h = X_test.iloc[0 : self.h, :]
747                                rmse = mean_errors(
748                                    actual=X_test_h,
749                                    pred=X_pred,
750                                    scoring="root_mean_squared_error",
751                                    per_series=per_series,
752                                )
753                                mae = mean_errors(
754                                    actual=X_test_h,
755                                    pred=X_pred,
756                                    scoring="mean_absolute_error",
757                                    per_series=per_series,
758                                )
759                                mpl = mean_errors(
760                                    actual=X_test_h,
761                                    pred=X_pred,
762                                    scoring="mean_pinball_loss",
763                                    per_series=per_series,
764                                )
765                                winklerscore = winkler_score(
766                                    obj=X_pred,
767                                    actual=X_test_h,
768                                    level=95,
769                                    per_series=per_series,
770                                )
771                                coveragecalc = coverage(
772                                    X_pred,
773                                    X_test_h,
774                                    level=95,
775                                    per_series=per_series,
776                                )
777                            else:
778                                X_test_h = X_test[0 : self.h, :]
779                                rmse = mean_errors(
780                                    actual=X_test_h,
781                                    pred=X_pred,
782                                    scoring="root_mean_squared_error",
783                                    per_series=per_series,
784                                )
785                                mae = mean_errors(
786                                    actual=X_test_h,
787                                    pred=X_pred,
788                                    scoring="mean_absolute_error",
789                                    per_series=per_series,
790                                )
791                                mpl = mean_errors(
792                                    actual=X_test_h,
793                                    pred=X_pred,
794                                    scoring="mean_pinball_loss",
795                                    per_series=per_series,
796                                )
797                                winklerscore = winkler_score(
798                                    obj=X_pred,
799                                    actual=X_test_h,
800                                    level=95,
801                                    per_series=per_series,
802                                )
803                                coveragecalc = coverage(
804                                    X_pred,
805                                    X_test_h,
806                                    level=95,
807                                    per_series=per_series,
808                                )
809                        else:  # no prediction interval
810
811                            if isinstance(X_test, pd.DataFrame):
812                                X_test_h = X_test.iloc[0 : self.h, :]
813                                rmse = mean_errors(
814                                    actual=X_test_h,
815                                    pred=X_pred,
816                                    scoring="root_mean_squared_error",
817                                    per_series=per_series,
818                                )
819                                mae = mean_errors(
820                                    actual=X_test_h,
821                                    pred=X_pred,
822                                    scoring="mean_absolute_error",
823                                    per_series=per_series,
824                                )
825                                mpl = mean_errors(
826                                    actual=X_test_h,
827                                    pred=X_pred,
828                                    scoring="mean_pinball_loss",
829                                    per_series=per_series,
830                                )
831                            else:
832                                X_test_h = X_test[0 : self.h, :]
833                                rmse = mean_errors(
834                                    actual=X_test_h,
835                                    pred=X_pred,
836                                    scoring="root_mean_squared_error",
837                                    per_series=per_series,
838                                )
839                                mae = mean_errors(
840                                    actual=X_test_h,
841                                    pred=X_pred,
842                                    scoring="mean_absolute_error",
843                                    per_series=per_series,
844                                )
845
846                    names.append(name)
847                    RMSE.append(rmse)
848                    MAE.append(mae)
849                    MPL.append(mpl)
850                    if (self.replications is not None) or (self.type_pi == "gaussian"):
851                        WINKLERSCORE.append(winklerscore)
852                        COVERAGE.append(coveragecalc)
853                    TIME.append(time.time() - start)
854
855                    if self.custom_metric is not None:
856                        try:
857                            if self.h is None:
858                                custom_metric = self.custom_metric(X_test, X_pred)
859                            else:
860                                custom_metric = self.custom_metric(X_test_h, X_pred)
861                            CUSTOM_METRIC.append(custom_metric)
862                        except Exception as e:
863                            custom_metric = np.iinfo(np.float32).max
864                            CUSTOM_METRIC.append(np.iinfo(np.float32).max)
865
866                    if self.verbose > 0:
867                        if (self.replications is not None) or (
868                            self.type_pi == "gaussian"
869                        ):
870                            scores_verbose = {
871                                "Model": name,
872                                "RMSE": rmse,
873                                "MAE": mae,
874                                "MPL": mpl,
875                                "WINKLERSCORE": winklerscore,
876                                "COVERAGE": coveragecalc,
877                                "Time taken": time.time() - start,
878                            }
879                        else:
880                            scores_verbose = {
881                                "Model": name,
882                                "RMSE": rmse,
883                                "MAE": mae,
884                                "MPL": mpl,
885                                "Time taken": time.time() - start,
886                            }
887
888                        if self.custom_metric is not None:
889                            scores_verbose["Custom metric"] = custom_metric
890
891                    if self.predictions:
892                        predictions[name] = X_pred
893
894                except Exception as exception:
895                    if self.ignore_warnings is False:
896                        print(name + " model failed to execute")
897                        print(exception)
898
899        if (self.replications is not None) or (self.type_pi == "gaussian"):
900            scores = {
901                "Model": names,
902                "RMSE": RMSE,
903                "MAE": MAE,
904                "MPL": MPL,
905                "WINKLERSCORE": WINKLERSCORE,
906                "COVERAGE": COVERAGE,
907                "Time Taken": TIME,
908            }
909        else:
910            scores = {
911                "Model": names,
912                "RMSE": RMSE,
913                "MAE": MAE,
914                "MPL": MPL,
915                "Time Taken": TIME,
916            }
917
918        if self.custom_metric is not None:
919            scores["Custom metric"] = CUSTOM_METRIC
920
921        if per_series:
922            scores = dict_to_dataframe_series(scores, self.series_names)
923        else:
924            scores = pd.DataFrame(scores)
925
926        try:  # case per_series, can't be sorted
927            scores = scores.sort_values(by=self.sort_by, ascending=True).set_index(
928                "Model"
929            )
930
931            self.best_model_ = self.models_[scores.index[0]]
932        except Exception as e:
933            pass
934
935        if self.predictions is True:
936
937            return scores, predictions
938
939        return scores
940
941    def get_best_model(self):
942        """
943        This function returns the best model pipeline based on the sort_by metric.
944
945        Returns:
946
947            best_model: object,
948                Returns the best model pipeline based on the sort_by metric.
949
950        """
951        return self.best_model_
952
953    def provide_models(self, X_train, X_test):
954        """
955        This function returns all the model objects trained in fit function.
956        If fit is not called already, then we call fit and then return the models.
957
958        Parameters:
959
960            X_train : array-like,
961                Training vectors, where rows is the number of samples
962                and columns is the number of features.
963
964            X_test : array-like,
965                Testing vectors, where rows is the number of samples
966                and columns is the number of features.
967
968        Returns:
969
970            models: dict-object,
971                Returns a dictionary with each model pipeline as value
972                with key as name of models.
973
974        """
975        if self.h is None:
976            if len(self.models_.keys()) == 0:
977                self.fit(X_train, X_test)
978        else:
979            if len(self.models_.keys()) == 0:
980                if isinstance(X_test, pd.DataFrame):
981                    self.fit(X_train, X_test.iloc[0 : self.h, :])
982                else:
983                    self.fit(X_train, X_test[0 : self.h, :])
984
985        return self.models_

Fitting -- almost -- all the regression algorithms with layers of nnetsauce's CustomRegressor to multivariate time series and returning their scores.

Parameters:

verbose: int, optional (default=0)
    Any positive number for verbosity.

ignore_warnings: bool, optional (default=True)
    When set to True, the warning related to algorigms that are not
    able to run are ignored.

custom_metric: function, optional (default=None)
    When function is provided, models are evaluated based on the custom
      evaluation metric provided.

predictions: bool, optional (default=False)
    When set to True, the predictions of all the models models are returned as dataframe.

sort_by: string, optional (default='RMSE')
    Sort models by a metric. Available options are 'RMSE', 'MAE', 'MPL', 'MPE', 'MAPE',
    'R-Squared', 'Adjusted R-Squared' or a custom metric identified by its name and
    provided by custom_metric.

random_state: int, optional (default=42)
    Reproducibiility seed.

estimators: list, optional (default='all')
    list of Estimators (regression algorithms) names or just 'all' (default='all')

preprocess: bool, preprocessing is done when set to True

n_layers: int, optional (default=1)
    Number of layers in the network. When set to 1, the model is equivalent to a MTS.

h: int, optional (default=None)
    Number of steps ahead to predict (when used, must be > 0 and < X_test.shape[0]).

All the other parameters are the same as MTS's.

Attributes:

models_: dict-object
    Returns a dictionary with each model pipeline as value
    with key as name of models.

best_model_: object
    Returns the best model pipeline based on the sort_by metric.

Examples:

See https://thierrymoudiki.github.io/blog/2023/10/29/python/quasirandomizednn/MTS-LazyPredict
def fit(self, X_train, X_test, xreg=None, per_series=False, **kwargs):
241    def fit(self, X_train, X_test, xreg=None, per_series=False, **kwargs):
242        """Fit Regression algorithms to X_train, predict and score on X_test.
243
244        Parameters:
245
246            X_train: array-like or data frame,
247                Training vectors, where rows is the number of samples
248                and columns is the number of features.
249
250            X_test: array-like or data frame,
251                Testing vectors, where rows is the number of samples
252                and columns is the number of features.
253
254            xreg: array-like, optional (default=None)
255                Additional (external) regressors to be passed to self.obj
256                xreg must be in 'increasing' order (most recent observations last)
257
258            per_series: bool, optional (default=False)
259                When set to True, the metrics are computed series by series.
260
261            **kwargs: dict, optional (default=None)
262                Additional parameters to be passed to `fit` method of `obj`.
263
264        Returns:
265
266            scores: Pandas DataFrame
267                Returns metrics of all the models in a Pandas DataFrame.
268
269            predictions: Pandas DataFrame
270                Returns predictions of all the models in a Pandas DataFrame.
271
272        """
273        R2 = []
274        ADJR2 = []
275        ME = []
276        MPL = []
277        RMSE = []
278        MAE = []
279        MPE = []
280        MAPE = []
281        WINKLERSCORE = []
282        COVERAGE = []
283
284        # WIN = []
285        names = []
286        TIME = []
287        predictions = {}
288
289        if self.custom_metric is not None:
290            CUSTOM_METRIC = []
291
292        if self.h is None:
293            assert X_test is not None, "If h is None, X_test must be provided."
294
295        if isinstance(X_train, np.ndarray):
296            X_train = pd.DataFrame(X_train)
297            X_test = pd.DataFrame(X_test)
298
299        self.series_names = X_train.columns.tolist()
300
301        X_train = convert_df_to_numeric(X_train)
302        X_test = convert_df_to_numeric(X_test)
303
304        numeric_features = X_train.select_dtypes(include=[np.number]).columns
305        categorical_features = X_train.select_dtypes(include=["object"]).columns
306
307        categorical_low, categorical_high = get_card_split(
308            X_train, categorical_features
309        )
310
311        if self.preprocess:
312            preprocessor = ColumnTransformer(
313                transformers=[
314                    ("numeric", numeric_transformer, numeric_features),
315                    (
316                        "categorical_low",
317                        categorical_transformer_low,
318                        categorical_low,
319                    ),
320                    (
321                        "categorical_high",
322                        categorical_transformer_high,
323                        categorical_high,
324                    ),
325                ]
326            )
327
328        # baselines (Classical MTS) ----
329        for i, name in enumerate(["ARIMA", "ETS", "Theta", "VAR", "VECM"]):
330            try:
331                start = time.time()
332                regr = ClassicalMTS(model=name)
333                regr.fit(X_train, **kwargs)
334                self.models_[name] = regr
335                if self.h is None:
336                    X_pred = regr.predict(h=X_test.shape[0], **kwargs)
337                else:
338                    assert self.h > 0, "h must be > 0"
339                    X_pred = regr.predict(h=self.h, **kwargs)
340                    try:
341                        X_test = X_test[0 : self.h, :]
342                    except Exception as e:
343                        X_test = X_test.iloc[0 : self.h, :]
344
345                rmse = mean_errors(
346                    actual=X_test,
347                    pred=X_pred,
348                    scoring="root_mean_squared_error",
349                    per_series=per_series,
350                )
351                mae = mean_errors(
352                    actual=X_test,
353                    pred=X_pred,
354                    scoring="mean_absolute_error",
355                    per_series=per_series,
356                )
357                mpl = mean_errors(
358                    actual=X_test,
359                    pred=X_pred,
360                    scoring="mean_pinball_loss",
361                    per_series=per_series,
362                )
363            except Exception:
364
365                continue
366
367            names.append(name)
368            RMSE.append(rmse)
369            MAE.append(mae)
370            MPL.append(mpl)
371
372            if self.custom_metric is not None:
373                try:
374                    if self.h is None:
375                        custom_metric = self.custom_metric(X_test, X_pred)
376                    else:
377                        custom_metric = self.custom_metric(X_test_h, X_pred)
378                    CUSTOM_METRIC.append(custom_metric)
379                except Exception as e:
380                    custom_metric = np.iinfo(np.float32).max
381                    CUSTOM_METRIC.append(np.iinfo(np.float32).max)
382
383            if (self.replications is not None) or (self.type_pi == "gaussian"):
384                if per_series == False:
385                    winklerscore = winkler_score(obj=X_pred, actual=X_test, level=95)
386                    coveragecalc = coverage(X_pred, X_test, level=95)
387                else:
388                    winklerscore = winkler_score(
389                        obj=X_pred, actual=X_test, level=95, per_series=True
390                    )
391                    coveragecalc = coverage(X_pred, X_test, level=95, per_series=True)
392                WINKLERSCORE.append(winklerscore)
393                COVERAGE.append(coveragecalc)
394            TIME.append(time.time() - start)
395
396        if self.estimators == "all":
397            if self.n_layers <= 1:
398                self.regressors = REGRESSORSMTS
399            else:
400                self.regressors = DEEPREGRESSORSMTS
401        else:
402            if self.n_layers <= 1:
403                self.regressors = [
404                    ("MTS(" + est[0] + ")", est[1])
405                    for est in all_estimators()
406                    if (
407                        issubclass(est[1], RegressorMixin)
408                        and (est[0] in self.estimators)
409                    )
410                ]
411            else:  # self.n_layers > 1
412                self.regressors = [
413                    ("DeepMTS(" + est[0] + ")", est[1])
414                    for est in all_estimators()
415                    if (
416                        issubclass(est[1], RegressorMixin)
417                        and (est[0] in self.estimators)
418                    )
419                ]
420
421        if self.preprocess is True:
422            for name, model in tqdm(self.regressors):  # do parallel exec
423                start = time.time()
424                try:
425                    if "random_state" in model().get_params().keys():
426                        pipe = Pipeline(
427                            steps=[
428                                ("preprocessor", preprocessor),
429                                (
430                                    "regressor",
431                                    DeepMTS(
432                                        obj=model(
433                                            random_state=self.random_state,
434                                            **kwargs,
435                                        ),
436                                        n_layers=self.n_layers,
437                                        n_hidden_features=self.n_hidden_features,
438                                        activation_name=self.activation_name,
439                                        a=self.a,
440                                        nodes_sim=self.nodes_sim,
441                                        bias=self.bias,
442                                        dropout=self.dropout,
443                                        direct_link=self.direct_link,
444                                        n_clusters=self.n_clusters,
445                                        cluster_encode=self.cluster_encode,
446                                        type_clust=self.type_clust,
447                                        type_scaling=self.type_scaling,
448                                        lags=self.lags,
449                                        type_pi=self.type_pi,
450                                        block_size=self.block_size,
451                                        replications=self.replications,
452                                        kernel=self.kernel,
453                                        agg=self.agg,
454                                        seed=self.seed,
455                                        backend=self.backend,
456                                        show_progress=self.show_progress,
457                                    ),
458                                ),
459                            ]
460                        )
461                    else:  # "random_state" in model().get_params().keys()
462                        pipe = Pipeline(
463                            steps=[
464                                ("preprocessor", preprocessor),
465                                (
466                                    "regressor",
467                                    DeepMTS(
468                                        obj=model(**kwargs),
469                                        n_layers=self.n_layers,
470                                        n_hidden_features=self.n_hidden_features,
471                                        activation_name=self.activation_name,
472                                        a=self.a,
473                                        nodes_sim=self.nodes_sim,
474                                        bias=self.bias,
475                                        dropout=self.dropout,
476                                        direct_link=self.direct_link,
477                                        n_clusters=self.n_clusters,
478                                        cluster_encode=self.cluster_encode,
479                                        type_clust=self.type_clust,
480                                        type_scaling=self.type_scaling,
481                                        lags=self.lags,
482                                        type_pi=self.type_pi,
483                                        block_size=self.block_size,
484                                        replications=self.replications,
485                                        kernel=self.kernel,
486                                        agg=self.agg,
487                                        seed=self.seed,
488                                        backend=self.backend,
489                                        show_progress=self.show_progress,
490                                    ),
491                                ),
492                            ]
493                        )
494
495                    pipe.fit(X_train, **kwargs)
496                    # pipe.fit(X_train, xreg=xreg)
497
498                    self.models_[name] = pipe
499
500                    if self.h is None:
501                        X_pred = pipe["regressor"].predict(h=self.h, **kwargs)
502                    else:
503                        assert self.h > 0, "h must be > 0"
504                        X_pred = pipe["regressor"].predict(h=self.h, **kwargs)
505
506                    if (self.replications is not None) or (self.type_pi == "gaussian"):
507                        rmse = mean_errors(
508                            actual=X_test,
509                            pred=X_pred,
510                            scoring="root_mean_squared_error",
511                            per_series=per_series,
512                        )
513                        mae = mean_errors(
514                            actual=X_test,
515                            pred=X_pred,
516                            scoring="mean_absolute_error",
517                            per_series=per_series,
518                        )
519                        mpl = mean_errors(
520                            actual=X_test,
521                            pred=X_pred,
522                            scoring="mean_pinball_loss",
523                            per_series=per_series,
524                        )
525                        winklerscore = winkler_score(
526                            obj=X_pred,
527                            actual=X_test,
528                            level=95,
529                            per_series=per_series,
530                        )
531                        coveragecalc = coverage(
532                            X_pred, X_test, level=95, per_series=per_series
533                        )
534                    else:
535                        rmse = mean_errors(
536                            actual=X_test,
537                            pred=X_pred,
538                            scoring="root_mean_squared_error",
539                            per_series=per_series,
540                        )
541                        mae = mean_errors(
542                            actual=X_test,
543                            pred=X_pred,
544                            scoring="mean_absolute_error",
545                            per_series=per_series,
546                        )
547                        mpl = mean_errors(
548                            actual=X_test,
549                            pred=X_pred,
550                            scoring="mean_pinball_loss",
551                            per_series=per_series,
552                        )
553
554                    names.append(name)
555                    RMSE.append(rmse)
556                    MAE.append(mae)
557                    MPL.append(mpl)
558
559                    if (self.replications is not None) or (self.type_pi == "gaussian"):
560                        WINKLERSCORE.append(winklerscore)
561                        COVERAGE.append(coveragecalc)
562                    TIME.append(time.time() - start)
563
564                    if self.custom_metric is not None:
565                        try:
566                            custom_metric = self.custom_metric(X_test, X_pred)
567                            CUSTOM_METRIC.append(custom_metric)
568                        except Exception as e:
569                            custom_metric = np.iinfo(np.float32).max
570                            CUSTOM_METRIC.append(custom_metric)
571
572                    if self.verbose > 0:
573                        if (self.replications is not None) or (
574                            self.type_pi == "gaussian"
575                        ):
576                            scores_verbose = {
577                                "Model": name,
578                                "RMSE": rmse,
579                                "MAE": mae,
580                                "MPL": mpl,
581                                "WINKLERSCORE": winklerscore,
582                                "COVERAGE": coveragecalc,
583                                "Time taken": time.time() - start,
584                            }
585                        else:
586                            scores_verbose = {
587                                "Model": name,
588                                "RMSE": rmse,
589                                "MAE": mae,
590                                "MPL": mpl,
591                                "Time taken": time.time() - start,
592                            }
593
594                        if self.custom_metric is not None:
595                            scores_verbose["Custom metric"] = custom_metric
596
597                    if self.predictions:
598                        predictions[name] = X_pred
599                except Exception as exception:
600                    if self.ignore_warnings is False:
601                        print(name + " model failed to execute")
602                        print(exception)
603
604        else:  # no preprocessing
605
606            for name, model in tqdm(self.regressors):  # do parallel exec
607                start = time.time()
608                try:
609                    if "random_state" in model().get_params().keys():
610                        pipe = DeepMTS(
611                            obj=model(random_state=self.random_state, **kwargs),
612                            n_layers=self.n_layers,
613                            n_hidden_features=self.n_hidden_features,
614                            activation_name=self.activation_name,
615                            a=self.a,
616                            nodes_sim=self.nodes_sim,
617                            bias=self.bias,
618                            dropout=self.dropout,
619                            direct_link=self.direct_link,
620                            n_clusters=self.n_clusters,
621                            cluster_encode=self.cluster_encode,
622                            type_clust=self.type_clust,
623                            type_scaling=self.type_scaling,
624                            lags=self.lags,
625                            type_pi=self.type_pi,
626                            block_size=self.block_size,
627                            replications=self.replications,
628                            kernel=self.kernel,
629                            agg=self.agg,
630                            seed=self.seed,
631                            backend=self.backend,
632                            show_progress=self.show_progress,
633                        )
634                    else:
635                        pipe = DeepMTS(
636                            obj=model(**kwargs),
637                            n_layers=self.n_layers,
638                            n_hidden_features=self.n_hidden_features,
639                            activation_name=self.activation_name,
640                            a=self.a,
641                            nodes_sim=self.nodes_sim,
642                            bias=self.bias,
643                            dropout=self.dropout,
644                            direct_link=self.direct_link,
645                            n_clusters=self.n_clusters,
646                            cluster_encode=self.cluster_encode,
647                            type_clust=self.type_clust,
648                            type_scaling=self.type_scaling,
649                            lags=self.lags,
650                            type_pi=self.type_pi,
651                            block_size=self.block_size,
652                            replications=self.replications,
653                            kernel=self.kernel,
654                            agg=self.agg,
655                            seed=self.seed,
656                            backend=self.backend,
657                            show_progress=self.show_progress,
658                        )
659
660                    pipe.fit(X_train, xreg, **kwargs)
661                    # pipe.fit(X_train, xreg=xreg) # DO xreg like in `ahead`
662
663                    self.models_[name] = pipe
664
665                    if self.preprocess is True:
666                        if self.h is None:
667                            X_pred = pipe["regressor"].predict(
668                                h=X_test.shape[0], **kwargs
669                            )
670                        else:
671                            assert (
672                                self.h > 0 and self.h <= X_test.shape[0]
673                            ), "h must be > 0 and < X_test.shape[0]"
674                            X_pred = pipe["regressor"].predict(h=self.h, **kwargs)
675
676                    else:
677
678                        if self.h is None:
679                            X_pred = pipe.predict(
680                                h=X_test.shape[0],
681                                **kwargs,
682                                # X_pred = pipe.predict(h=X_test.shape[0], new_xreg=new_xreg) ## DO xreg like in `ahead`
683                            )
684                        else:
685                            assert (
686                                self.h > 0 and self.h <= X_test.shape[0]
687                            ), "h must be > 0 and < X_test.shape[0]"
688                            X_pred = pipe.predict(h=self.h, **kwargs)
689
690                    if self.h is None:
691                        if (self.replications is not None) or (
692                            self.type_pi == "gaussian"
693                        ):
694                            rmse = mean_errors(
695                                actual=X_test,
696                                pred=X_pred.mean,
697                                scoring="root_mean_squared_error",
698                                per_series=per_series,
699                            )
700                            mae = mean_errors(
701                                actual=X_test,
702                                pred=X_pred.mean,
703                                scoring="mean_absolute_error",
704                                per_series=per_series,
705                            )
706                            mpl = mean_errors(
707                                actual=X_test,
708                                pred=X_pred.mean,
709                                scoring="mean_pinball_loss",
710                                per_series=per_series,
711                            )
712                            winklerscore = winkler_score(
713                                obj=X_pred,
714                                actual=X_test,
715                                level=95,
716                                per_series=per_series,
717                            )
718                            coveragecalc = coverage(
719                                X_pred, X_test, level=95, per_series=per_series
720                            )
721                        else:  # no prediction interval
722                            rmse = mean_errors(
723                                actual=X_test,
724                                pred=X_pred,
725                                scoring="root_mean_squared_error",
726                                per_series=per_series,
727                            )
728                            mae = mean_errors(
729                                actual=X_test,
730                                pred=X_pred,
731                                scoring="mean_absolute_error",
732                                per_series=per_series,
733                            )
734                            mpl = mean_errors(
735                                actual=X_test,
736                                pred=X_pred,
737                                scoring="mean_pinball_loss",
738                                per_series=per_series,
739                            )
740                    else:  # self.h is not None
741                        if (self.replications is not None) or (
742                            self.type_pi == "gaussian"
743                        ):
744
745                            if isinstance(X_test, pd.DataFrame):
746                                X_test_h = X_test.iloc[0 : self.h, :]
747                                rmse = mean_errors(
748                                    actual=X_test_h,
749                                    pred=X_pred,
750                                    scoring="root_mean_squared_error",
751                                    per_series=per_series,
752                                )
753                                mae = mean_errors(
754                                    actual=X_test_h,
755                                    pred=X_pred,
756                                    scoring="mean_absolute_error",
757                                    per_series=per_series,
758                                )
759                                mpl = mean_errors(
760                                    actual=X_test_h,
761                                    pred=X_pred,
762                                    scoring="mean_pinball_loss",
763                                    per_series=per_series,
764                                )
765                                winklerscore = winkler_score(
766                                    obj=X_pred,
767                                    actual=X_test_h,
768                                    level=95,
769                                    per_series=per_series,
770                                )
771                                coveragecalc = coverage(
772                                    X_pred,
773                                    X_test_h,
774                                    level=95,
775                                    per_series=per_series,
776                                )
777                            else:
778                                X_test_h = X_test[0 : self.h, :]
779                                rmse = mean_errors(
780                                    actual=X_test_h,
781                                    pred=X_pred,
782                                    scoring="root_mean_squared_error",
783                                    per_series=per_series,
784                                )
785                                mae = mean_errors(
786                                    actual=X_test_h,
787                                    pred=X_pred,
788                                    scoring="mean_absolute_error",
789                                    per_series=per_series,
790                                )
791                                mpl = mean_errors(
792                                    actual=X_test_h,
793                                    pred=X_pred,
794                                    scoring="mean_pinball_loss",
795                                    per_series=per_series,
796                                )
797                                winklerscore = winkler_score(
798                                    obj=X_pred,
799                                    actual=X_test_h,
800                                    level=95,
801                                    per_series=per_series,
802                                )
803                                coveragecalc = coverage(
804                                    X_pred,
805                                    X_test_h,
806                                    level=95,
807                                    per_series=per_series,
808                                )
809                        else:  # no prediction interval
810
811                            if isinstance(X_test, pd.DataFrame):
812                                X_test_h = X_test.iloc[0 : self.h, :]
813                                rmse = mean_errors(
814                                    actual=X_test_h,
815                                    pred=X_pred,
816                                    scoring="root_mean_squared_error",
817                                    per_series=per_series,
818                                )
819                                mae = mean_errors(
820                                    actual=X_test_h,
821                                    pred=X_pred,
822                                    scoring="mean_absolute_error",
823                                    per_series=per_series,
824                                )
825                                mpl = mean_errors(
826                                    actual=X_test_h,
827                                    pred=X_pred,
828                                    scoring="mean_pinball_loss",
829                                    per_series=per_series,
830                                )
831                            else:
832                                X_test_h = X_test[0 : self.h, :]
833                                rmse = mean_errors(
834                                    actual=X_test_h,
835                                    pred=X_pred,
836                                    scoring="root_mean_squared_error",
837                                    per_series=per_series,
838                                )
839                                mae = mean_errors(
840                                    actual=X_test_h,
841                                    pred=X_pred,
842                                    scoring="mean_absolute_error",
843                                    per_series=per_series,
844                                )
845
846                    names.append(name)
847                    RMSE.append(rmse)
848                    MAE.append(mae)
849                    MPL.append(mpl)
850                    if (self.replications is not None) or (self.type_pi == "gaussian"):
851                        WINKLERSCORE.append(winklerscore)
852                        COVERAGE.append(coveragecalc)
853                    TIME.append(time.time() - start)
854
855                    if self.custom_metric is not None:
856                        try:
857                            if self.h is None:
858                                custom_metric = self.custom_metric(X_test, X_pred)
859                            else:
860                                custom_metric = self.custom_metric(X_test_h, X_pred)
861                            CUSTOM_METRIC.append(custom_metric)
862                        except Exception as e:
863                            custom_metric = np.iinfo(np.float32).max
864                            CUSTOM_METRIC.append(np.iinfo(np.float32).max)
865
866                    if self.verbose > 0:
867                        if (self.replications is not None) or (
868                            self.type_pi == "gaussian"
869                        ):
870                            scores_verbose = {
871                                "Model": name,
872                                "RMSE": rmse,
873                                "MAE": mae,
874                                "MPL": mpl,
875                                "WINKLERSCORE": winklerscore,
876                                "COVERAGE": coveragecalc,
877                                "Time taken": time.time() - start,
878                            }
879                        else:
880                            scores_verbose = {
881                                "Model": name,
882                                "RMSE": rmse,
883                                "MAE": mae,
884                                "MPL": mpl,
885                                "Time taken": time.time() - start,
886                            }
887
888                        if self.custom_metric is not None:
889                            scores_verbose["Custom metric"] = custom_metric
890
891                    if self.predictions:
892                        predictions[name] = X_pred
893
894                except Exception as exception:
895                    if self.ignore_warnings is False:
896                        print(name + " model failed to execute")
897                        print(exception)
898
899        if (self.replications is not None) or (self.type_pi == "gaussian"):
900            scores = {
901                "Model": names,
902                "RMSE": RMSE,
903                "MAE": MAE,
904                "MPL": MPL,
905                "WINKLERSCORE": WINKLERSCORE,
906                "COVERAGE": COVERAGE,
907                "Time Taken": TIME,
908            }
909        else:
910            scores = {
911                "Model": names,
912                "RMSE": RMSE,
913                "MAE": MAE,
914                "MPL": MPL,
915                "Time Taken": TIME,
916            }
917
918        if self.custom_metric is not None:
919            scores["Custom metric"] = CUSTOM_METRIC
920
921        if per_series:
922            scores = dict_to_dataframe_series(scores, self.series_names)
923        else:
924            scores = pd.DataFrame(scores)
925
926        try:  # case per_series, can't be sorted
927            scores = scores.sort_values(by=self.sort_by, ascending=True).set_index(
928                "Model"
929            )
930
931            self.best_model_ = self.models_[scores.index[0]]
932        except Exception as e:
933            pass
934
935        if self.predictions is True:
936
937            return scores, predictions
938
939        return scores

Fit Regression algorithms to X_train, predict and score on X_test.

Parameters:

X_train: array-like or data frame,
    Training vectors, where rows is the number of samples
    and columns is the number of features.

X_test: array-like or data frame,
    Testing vectors, where rows is the number of samples
    and columns is the number of features.

xreg: array-like, optional (default=None)
    Additional (external) regressors to be passed to self.obj
    xreg must be in 'increasing' order (most recent observations last)

per_series: bool, optional (default=False)
    When set to True, the metrics are computed series by series.

**kwargs: dict, optional (default=None)
    Additional parameters to be passed to `fit` method of `obj`.

Returns:

scores: Pandas DataFrame
    Returns metrics of all the models in a Pandas DataFrame.

predictions: Pandas DataFrame
    Returns predictions of all the models in a Pandas DataFrame.
def provide_models(self, X_train, X_test):
953    def provide_models(self, X_train, X_test):
954        """
955        This function returns all the model objects trained in fit function.
956        If fit is not called already, then we call fit and then return the models.
957
958        Parameters:
959
960            X_train : array-like,
961                Training vectors, where rows is the number of samples
962                and columns is the number of features.
963
964            X_test : array-like,
965                Testing vectors, where rows is the number of samples
966                and columns is the number of features.
967
968        Returns:
969
970            models: dict-object,
971                Returns a dictionary with each model pipeline as value
972                with key as name of models.
973
974        """
975        if self.h is None:
976            if len(self.models_.keys()) == 0:
977                self.fit(X_train, X_test)
978        else:
979            if len(self.models_.keys()) == 0:
980                if isinstance(X_test, pd.DataFrame):
981                    self.fit(X_train, X_test.iloc[0 : self.h, :])
982                else:
983                    self.fit(X_train, X_test[0 : self.h, :])
984
985        return self.models_

This function returns all the model objects trained in fit function. If fit is not called already, then we call fit and then return the models.

Parameters:

X_train : array-like,
    Training vectors, where rows is the number of samples
    and columns is the number of features.

X_test : array-like,
    Testing vectors, where rows is the number of samples
    and columns is the number of features.

Returns:

models: dict-object,
    Returns a dictionary with each model pipeline as value
    with key as name of models.
class MLARCH(nnetsauce.MTS):
 18class MLARCH(MTS):
 19    """Machine Learning with ARCH effects for time series forecasting
 20
 21    Parameters:
 22            
 23        model_mean: object of class nnetsauce.MTS
 24            Model for mean prediction (default: None, uses obj)
 25            
 26        model_sigma: object of class nnetsauce.MTS
 27            Model for residuals volatility prediction (default: None, uses obj)
 28        
 29        model_residuals: object of class nnetsauce.MTS
 30            Model for residuals prediction (default: None, uses obj)
 31    
 32    Examples: 
 33
 34        See examples/mlarch.py
 35                        
 36    """
 37    def __init__(
 38        self,
 39        model_mean,
 40        model_sigma, 
 41        model_residuals
 42    ):
 43        assert isinstance(model_mean, MTS), "model_mean must be an object of class nnetsauce.MTS"
 44        assert isinstance(model_sigma, MTS), "model_sigma must be an object of class nnetsauce.MTS"
 45        assert isinstance(model_residuals, MTS), "model_residuals must be an object of class nnetsauce.MTS"
 46        assert model_sigma.type_pi.startswith("scp") and model_sigma.replications is not None, \
 47        "for now, the models must be conformalized, i.e type_pi must start with 'scp' and replications must be an integer"
 48        assert model_residuals.type_pi.startswith("scp") and model_residuals.replications is not None, \
 49        "for now, the models must be conformalized, i.e type_pi must start with 'scp' and replications must be an integer"        
 50
 51        self.model_mean = model_mean
 52        self.model_sigma = model_sigma
 53        self.model_residuals = model_residuals
 54
 55        self.mean_residuals_ = None
 56        self.mean_residuals_wilcoxon_test_ = None
 57        self.mean_residuals_kpss_test_ = None
 58        self.standardized_residuals_ = None
 59
 60
 61    def fit(self, y):
 62        """Fit the MLARCH model to the time series data.
 63
 64        Parameters
 65        ----------
 66        y : array-like of shape (n_samples,)
 67            The target time series to be fitted.
 68
 69        Returns
 70        -------
 71        self : object
 72            Returns self.
 73
 74        Notes
 75        -----
 76        This method:
 77
 78        1. Fits the mean model to the time series
 79        2. Performs statistical tests on the residuals (Wilcoxon and KPSS)
 80        3. Fits the volatility model to the squared residuals
 81        4. Computes standardized residuals
 82        5. Fits the residuals model to the standardized residuals
 83        """
 84        n = len(y)
 85        self.model_mean.fit(y.reshape(-1, 1)) 
 86        # Wilcoxon signed-rank test on residuals (mean = 0)
 87        self.mean_residuals_wilcoxon_test_ = stats.wilcoxon(self.model_mean.residuals_)
 88        # KPSS test for stationarity on residuals
 89        self.mean_residuals_kpss_test_ = kpss(self.model_mean.residuals_, regression='c')
 90        self.model_sigma.fit(np.log(self.model_mean.residuals_.reshape(-1, 1)**2)) 
 91        # n//2 here because the model is conformalized
 92        fitted_sigma = self.model_sigma.residuals_ + np.log(self.model_mean.residuals_**2)[(n//2):,:]
 93        # standardized residuals
 94        self.standardized_residuals_ = self.model_mean.residuals_[(n//2):,:]/np.sqrt(np.exp(fitted_sigma))
 95        self.model_residuals.fit(self.standardized_residuals_.reshape(-1, 1))
 96
 97        # Calculate AIC
 98        # Get predictions from all models
 99        mean_pred = self.model_mean.predict(h=0).values.ravel()
100        sigma_pred = self.model_sigma.predict(h=0).values.ravel()
101        z_pred = self.model_residuals.predict(h=0).values.ravel()
102        
103        # Calculate combined predictions
104        combined_pred = mean_pred + z_pred * np.sqrt(np.exp(sigma_pred))
105        
106        # Calculate SSE using the last half of the data (matching standardized_residuals_)
107        y_actual = y[(n//2):].ravel()
108        self.sse_ = np.sum((y_actual - combined_pred) ** 2)
109        
110        # Calculate number of parameters (sum of parameters from all three models)
111        n_params = (self.model_mean.n_hidden_features + 1 +  # mean model
112                   self.model_sigma.n_hidden_features + 1 +  # sigma model
113                   self.model_residuals.n_hidden_features + 1)  # residuals model
114        
115        # Calculate AIC
116        n_samples = len(y_actual)
117        self.aic_ = n_samples * np.log(self.sse_/n_samples) + 2 * n_params
118
119        return self
120
121
122    def predict(self, h=5, level=95):
123        """Predict (probabilistic) future values of the time series.
124
125        Parameters
126        ----------
127        h : int, default=5
128            The forecast horizon.
129        level : int, default=95
130            The confidence level for prediction intervals.
131
132        Returns
133        -------
134        DescribeResult : namedtuple
135            A named tuple containing:
136
137            - mean : array-like of shape (h,)
138                The mean forecast.
139            - sims : array-like of shape (h, n_replications)
140                The simulated forecasts.
141            - lower : array-like of shape (h,)
142                The lower bound of the prediction interval.
143            - upper : array-like of shape (h,)
144                The upper bound of the prediction interval.
145
146        Notes
147        -----
148        This method:
149        1. Generates mean forecasts using the mean model
150        2. Generates standardized residual forecasts using the residuals model
151        3. Generates volatility forecasts using the sigma model
152        4. Combines these forecasts to generate the final predictions
153        5. Computes prediction intervals at the specified confidence level
154        """
155        DescribeResult = namedtuple(
156                "DescribeResult", ("mean", "sims", "lower", "upper")
157            )
158        mean_forecast = self.model_mean.predict(h=h).values.ravel()
159        preds_z = self.model_residuals.predict(h=h)
160        preds_sigma = self.model_sigma.predict(h=h)
161        sims_z = preds_z.sims
162        sims_sigma = preds_sigma.sims 
163
164        f = []
165        for i in range(len(sims_z)): 
166            f.append(mean_forecast + sims_z[i].values.ravel()*np.sqrt(np.exp(sims_sigma[i].values.ravel())))
167
168        f = np.asarray(f).T
169        mean_f = np.mean(f, axis=1)
170        alpha = 1 - level/100
171        lower_bound = np.quantile(f, alpha/2, axis=1)
172        upper_bound = np.quantile(f, 1-alpha/2, axis=1)
173
174        return DescribeResult(mean_f, f, 
175                              lower_bound, upper_bound)

Machine Learning with ARCH effects for time series forecasting

Parameters:

model_mean: object of class nnetsauce.MTS
    Model for mean prediction (default: None, uses obj)

model_sigma: object of class nnetsauce.MTS
    Model for residuals volatility prediction (default: None, uses obj)

model_residuals: object of class nnetsauce.MTS
    Model for residuals prediction (default: None, uses obj)

Examples:

See examples/mlarch.py
def fit(self, y):
 61    def fit(self, y):
 62        """Fit the MLARCH model to the time series data.
 63
 64        Parameters
 65        ----------
 66        y : array-like of shape (n_samples,)
 67            The target time series to be fitted.
 68
 69        Returns
 70        -------
 71        self : object
 72            Returns self.
 73
 74        Notes
 75        -----
 76        This method:
 77
 78        1. Fits the mean model to the time series
 79        2. Performs statistical tests on the residuals (Wilcoxon and KPSS)
 80        3. Fits the volatility model to the squared residuals
 81        4. Computes standardized residuals
 82        5. Fits the residuals model to the standardized residuals
 83        """
 84        n = len(y)
 85        self.model_mean.fit(y.reshape(-1, 1)) 
 86        # Wilcoxon signed-rank test on residuals (mean = 0)
 87        self.mean_residuals_wilcoxon_test_ = stats.wilcoxon(self.model_mean.residuals_)
 88        # KPSS test for stationarity on residuals
 89        self.mean_residuals_kpss_test_ = kpss(self.model_mean.residuals_, regression='c')
 90        self.model_sigma.fit(np.log(self.model_mean.residuals_.reshape(-1, 1)**2)) 
 91        # n//2 here because the model is conformalized
 92        fitted_sigma = self.model_sigma.residuals_ + np.log(self.model_mean.residuals_**2)[(n//2):,:]
 93        # standardized residuals
 94        self.standardized_residuals_ = self.model_mean.residuals_[(n//2):,:]/np.sqrt(np.exp(fitted_sigma))
 95        self.model_residuals.fit(self.standardized_residuals_.reshape(-1, 1))
 96
 97        # Calculate AIC
 98        # Get predictions from all models
 99        mean_pred = self.model_mean.predict(h=0).values.ravel()
100        sigma_pred = self.model_sigma.predict(h=0).values.ravel()
101        z_pred = self.model_residuals.predict(h=0).values.ravel()
102        
103        # Calculate combined predictions
104        combined_pred = mean_pred + z_pred * np.sqrt(np.exp(sigma_pred))
105        
106        # Calculate SSE using the last half of the data (matching standardized_residuals_)
107        y_actual = y[(n//2):].ravel()
108        self.sse_ = np.sum((y_actual - combined_pred) ** 2)
109        
110        # Calculate number of parameters (sum of parameters from all three models)
111        n_params = (self.model_mean.n_hidden_features + 1 +  # mean model
112                   self.model_sigma.n_hidden_features + 1 +  # sigma model
113                   self.model_residuals.n_hidden_features + 1)  # residuals model
114        
115        # Calculate AIC
116        n_samples = len(y_actual)
117        self.aic_ = n_samples * np.log(self.sse_/n_samples) + 2 * n_params
118
119        return self

Fit the MLARCH model to the time series data.

Parameters

y : array-like of shape (n_samples,) The target time series to be fitted.

Returns

self : object Returns self.

Notes

This method:

  1. Fits the mean model to the time series
  2. Performs statistical tests on the residuals (Wilcoxon and KPSS)
  3. Fits the volatility model to the squared residuals
  4. Computes standardized residuals
  5. Fits the residuals model to the standardized residuals
def predict(self, h=5, level=95):
122    def predict(self, h=5, level=95):
123        """Predict (probabilistic) future values of the time series.
124
125        Parameters
126        ----------
127        h : int, default=5
128            The forecast horizon.
129        level : int, default=95
130            The confidence level for prediction intervals.
131
132        Returns
133        -------
134        DescribeResult : namedtuple
135            A named tuple containing:
136
137            - mean : array-like of shape (h,)
138                The mean forecast.
139            - sims : array-like of shape (h, n_replications)
140                The simulated forecasts.
141            - lower : array-like of shape (h,)
142                The lower bound of the prediction interval.
143            - upper : array-like of shape (h,)
144                The upper bound of the prediction interval.
145
146        Notes
147        -----
148        This method:
149        1. Generates mean forecasts using the mean model
150        2. Generates standardized residual forecasts using the residuals model
151        3. Generates volatility forecasts using the sigma model
152        4. Combines these forecasts to generate the final predictions
153        5. Computes prediction intervals at the specified confidence level
154        """
155        DescribeResult = namedtuple(
156                "DescribeResult", ("mean", "sims", "lower", "upper")
157            )
158        mean_forecast = self.model_mean.predict(h=h).values.ravel()
159        preds_z = self.model_residuals.predict(h=h)
160        preds_sigma = self.model_sigma.predict(h=h)
161        sims_z = preds_z.sims
162        sims_sigma = preds_sigma.sims 
163
164        f = []
165        for i in range(len(sims_z)): 
166            f.append(mean_forecast + sims_z[i].values.ravel()*np.sqrt(np.exp(sims_sigma[i].values.ravel())))
167
168        f = np.asarray(f).T
169        mean_f = np.mean(f, axis=1)
170        alpha = 1 - level/100
171        lower_bound = np.quantile(f, alpha/2, axis=1)
172        upper_bound = np.quantile(f, 1-alpha/2, axis=1)
173
174        return DescribeResult(mean_f, f, 
175                              lower_bound, upper_bound)

Predict (probabilistic) future values of the time series.

Parameters

h : int, default=5 The forecast horizon. level : int, default=95 The confidence level for prediction intervals.

Returns

DescribeResult : namedtuple A named tuple containing:

- mean : array-like of shape (h,)
    The mean forecast.
- sims : array-like of shape (h, n_replications)
    The simulated forecasts.
- lower : array-like of shape (h,)
    The lower bound of the prediction interval.
- upper : array-like of shape (h,)
    The upper bound of the prediction interval.

Notes

This method:

  1. Generates mean forecasts using the mean model
  2. Generates standardized residual forecasts using the residuals model
  3. Generates volatility forecasts using the sigma model
  4. Combines these forecasts to generate the final predictions
  5. Computes prediction intervals at the specified confidence level
class MedianVotingRegressor(sklearn.ensemble._voting.VotingRegressor):
 6class MedianVotingRegressor(VotingRegressor):
 7    def predict(self, X):
 8        """
 9        Predict using the median of the base regressors' predictions.
10
11        Parameters:
12        X (array-like): Feature matrix for predictions.
13
14        Returns:
15        y_pred (array): Median of predictions from the base regressors.
16        """
17        predictions = np.asarray(
18            [regressor.predict(X) for regressor in self.estimators_]
19        )
20        return np.median(predictions, axis=0)

Prediction voting regressor for unfitted estimators.

A voting regressor is an ensemble meta-estimator that fits several base regressors, each on the whole dataset. Then it averages the individual predictions to form a final prediction.

Read more in the :ref:User Guide <voting_regressor>.

New in version 0.21.

Parameters

estimators : list of (str, estimator) tuples Invoking the fit method on the VotingRegressor will fit clones of those original estimators that will be stored in the class attribute self.estimators_. An estimator can be set to 'drop' using set_params().

*Changed in version 0.21:*
``'drop'`` is accepted. Using None was deprecated in 0.22 and
support was removed in 0.24.

weights : array-like of shape (n_regressors,), default=None Sequence of weights (float or int) to weight the occurrences of predicted values before averaging. Uses uniform weights if None.

n_jobs : int, default=None The number of jobs to run in parallel for fit. None means 1 unless in a joblib.parallel_backend context. -1 means using all processors. See :term:Glossary <n_jobs> for more details.

verbose : bool, default=False If True, the time elapsed while fitting will be printed as it is completed.

*New in version 0.23.*

Attributes

estimators_ : list of regressors The collection of fitted sub-estimators as defined in estimators that are not 'drop'.

named_estimators_ : ~sklearn.utils.Bunch Attribute to access any fitted sub-estimators by name.

*New in version 0.20.*

n_features_in_ : int Number of features seen during :term:fit. Only defined if the underlying regressor exposes such an attribute when fit.

*New in version 0.24.*

feature_names_in_ : ndarray of shape (n_features_in_,) Names of features seen during :term:fit. Only defined if the underlying estimators expose such an attribute when fit.

*New in version 1.0.*

See Also

VotingClassifier : Soft Voting/Majority Rule classifier.

Examples

>>> import numpy as np
>>> from sklearn.linear_model import LinearRegression
>>> from sklearn.ensemble import RandomForestRegressor
>>> from sklearn.ensemble import VotingRegressor
>>> from sklearn.neighbors import KNeighborsRegressor
>>> r1 = LinearRegression()
>>> r2 = RandomForestRegressor(n_estimators=10, random_state=1)
>>> r3 = KNeighborsRegressor()
>>> X = np.array([[1, 1], [2, 4], [3, 9], [4, 16], [5, 25], [6, 36]])
>>> y = np.array([2, 6, 12, 20, 30, 42])
>>> er = VotingRegressor([('lr', r1), ('rf', r2), ('r3', r3)])
>>> print(er.fit(X, y).predict(X))
[ 6.8...  8.4... 12.5... 17.8... 26...  34...]

In the following example, we drop the 'lr' estimator with ~VotingRegressor.set_params() and fit the remaining two estimators:

>>> er = er.set_params(lr='drop')
>>> er = er.fit(X, y)
>>> len(er.estimators_)
2
def predict(self, X):
 7    def predict(self, X):
 8        """
 9        Predict using the median of the base regressors' predictions.
10
11        Parameters:
12        X (array-like): Feature matrix for predictions.
13
14        Returns:
15        y_pred (array): Median of predictions from the base regressors.
16        """
17        predictions = np.asarray(
18            [regressor.predict(X) for regressor in self.estimators_]
19        )
20        return np.median(predictions, axis=0)

Predict using the median of the base regressors' predictions.

Parameters: X (array-like): Feature matrix for predictions.

Returns: y_pred (array): Median of predictions from the base regressors.

class MTS(nnetsauce.Base):
  28class MTS(Base):
  29    """Univariate and multivariate time series (MTS) forecasting with Quasi-Randomized networks
  30
  31    Parameters:
  32
  33        obj: object.
  34            any object containing a method fit (obj.fit()) and a method predict
  35            (obj.predict()).
  36
  37        n_hidden_features: int.
  38            number of nodes in the hidden layer.
  39
  40        activation_name: str.
  41            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'.
  42
  43        a: float.
  44            hyperparameter for 'prelu' or 'elu' activation function.
  45
  46        nodes_sim: str.
  47            type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
  48            'uniform'.
  49
  50        bias: boolean.
  51            indicates if the hidden layer contains a bias term (True) or not
  52            (False).
  53
  54        dropout: float.
  55            regularization parameter; (random) percentage of nodes dropped out
  56            of the training.
  57
  58        direct_link: boolean.
  59            indicates if the original predictors are included (True) in model's fitting or not (False).
  60
  61        n_clusters: int.
  62            number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering).
  63
  64        cluster_encode: bool.
  65            defines how the variable containing clusters is treated (default is one-hot)
  66            if `False`, then labels are used, without one-hot encoding.
  67
  68        type_clust: str.
  69            type of clustering method: currently k-means ('kmeans') or Gaussian
  70            Mixture Model ('gmm').
  71
  72        type_scaling: a tuple of 3 strings.
  73            scaling methods for inputs, hidden layer, and clustering respectively
  74            (and when relevant).
  75            Currently available: standardization ('std') or MinMax scaling ('minmax').
  76
  77        lags: int.
  78            number of lags used for each time series.
  79            If string, lags must be one of 'AIC', 'AICc', or 'BIC'.
  80
  81        type_pi: str.
  82            type of prediction interval; currently:
  83            - "gaussian": simple, fast, but: assumes stationarity of Gaussian in-sample residuals and independence in the multivariate case
  84            - "kde": based on Kernel Density Estimation of in-sample residuals
  85            - "bootstrap": based on independent bootstrap of in-sample residuals
  86            - "block-bootstrap": based on basic block bootstrap of in-sample residuals
  87            - "scp-kde": Sequential split conformal prediction with Kernel Density Estimation of calibrated residuals
  88            - "scp-bootstrap": Sequential split conformal prediction with independent bootstrap of calibrated residuals
  89            - "scp-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of calibrated residuals
  90            - "scp2-kde": Sequential split conformal prediction with Kernel Density Estimation of standardized calibrated residuals
  91            - "scp2-bootstrap": Sequential split conformal prediction with independent bootstrap of standardized calibrated residuals
  92            - "scp2-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of standardized calibrated residuals
  93            - based on copulas of in-sample residuals: 'vine-tll', 'vine-bb1', 'vine-bb6', 'vine-bb7', 'vine-bb8', 'vine-clayton',
  94            'vine-frank', 'vine-gaussian', 'vine-gumbel', 'vine-indep', 'vine-joe', 'vine-student'
  95            - 'scp-vine-tll', 'scp-vine-bb1', 'scp-vine-bb6', 'scp-vine-bb7', 'scp-vine-bb8', 'scp-vine-clayton',
  96            'scp-vine-frank', 'scp-vine-gaussian', 'scp-vine-gumbel', 'scp-vine-indep', 'scp-vine-joe', 'scp-vine-student'
  97            - 'scp2-vine-tll', 'scp2-vine-bb1', 'scp2-vine-bb6', 'scp2-vine-bb7', 'scp2-vine-bb8', 'scp2-vine-clayton',
  98            'scp2-vine-frank', 'scp2-vine-gaussian', 'scp2-vine-gumbel', 'scp2-vine-indep', 'scp2-vine-joe', 'scp2-vine-student'
  99
 100        block_size: int.
 101            size of block for 'type_pi' in ("block-bootstrap", "scp-block-bootstrap", "scp2-block-bootstrap").
 102            Default is round(3.15*(n_residuals^1/3))
 103
 104        replications: int.
 105            number of replications (if needed, for predictive simulation). Default is 'None'.
 106
 107        kernel: str.
 108            the kernel to use for residuals density estimation (used for predictive simulation). Currently, either 'gaussian' or 'tophat'.
 109
 110        agg: str.
 111            either "mean" or "median" for simulation of bootstrap aggregating
 112
 113        seed: int.
 114            reproducibility seed for nodes_sim=='uniform' or predictive simulation.
 115
 116        backend: str.
 117            "cpu" or "gpu" or "tpu".
 118
 119        verbose: int.
 120            0: not printing; 1: printing
 121
 122        show_progress: bool.
 123            True: progress bar when fitting each series; False: no progress bar when fitting each series
 124
 125    Attributes:
 126
 127        fit_objs_: dict
 128            objects adjusted to each individual time series
 129
 130        y_: {array-like}
 131            MTS responses (most recent observations first)
 132
 133        X_: {array-like}
 134            MTS lags
 135
 136        xreg_: {array-like}
 137            external regressors
 138
 139        y_means_: dict
 140            a dictionary of each series mean values
 141
 142        preds_: {array-like}
 143            successive model predictions
 144
 145        preds_std_: {array-like}
 146            standard deviation around the predictions for Bayesian base learners (`obj`)
 147
 148        gaussian_preds_std_: {array-like}
 149            standard deviation around the predictions for `type_pi='gaussian'`
 150
 151        return_std_: boolean
 152            return uncertainty or not (set in predict)
 153
 154        df_: data frame
 155            the input data frame, in case a data.frame is provided to `fit`
 156
 157        n_obs_: int
 158            number of time series observations (number of rows for multivariate)
 159
 160        level_: int
 161            level of confidence for prediction intervals (default is 95)
 162
 163        residuals_: {array-like}
 164            in-sample residuals (for `type_pi` not conformal prediction) or calibrated residuals
 165            (for `type_pi` in conformal prediction)
 166
 167        residuals_sims_: tuple of {array-like}
 168            simulations of in-sample residuals (for `type_pi` not conformal prediction) or
 169            calibrated residuals (for `type_pi` in conformal prediction)
 170
 171        kde_: A scikit-learn object, see https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KernelDensity.html
 172
 173        residuals_std_dev_: residuals standard deviation
 174
 175    Examples:
 176
 177    Example 1:
 178
 179    ```python
 180    import nnetsauce as ns
 181    import numpy as np
 182    from sklearn import linear_model
 183    np.random.seed(123)
 184
 185    M = np.random.rand(10, 3)
 186    M[:,0] = 10*M[:,0]
 187    M[:,2] = 25*M[:,2]
 188    print(M)
 189
 190    # Adjust Bayesian Ridge
 191    regr4 = linear_model.BayesianRidge()
 192    obj_MTS = ns.MTS(regr4, lags = 1, n_hidden_features=5)
 193    obj_MTS.fit(M)
 194    print(obj_MTS.predict())
 195
 196    # with credible intervals
 197    print(obj_MTS.predict(return_std=True, level=80))
 198
 199    print(obj_MTS.predict(return_std=True, level=95))
 200    ```
 201
 202    Example 2:
 203
 204    ```python
 205    import nnetsauce as ns
 206    import numpy as np
 207    from sklearn import linear_model
 208
 209    dataset = {
 210    'date' : ['2001-01-01', '2002-01-01', '2003-01-01', '2004-01-01', '2005-01-01'],
 211    'series1' : [34, 30, 35.6, 33.3, 38.1],
 212    'series2' : [4, 5.5, 5.6, 6.3, 5.1],
 213    'series3' : [100, 100.5, 100.6, 100.2, 100.1]}
 214    df = pd.DataFrame(dataset).set_index('date')
 215    print(df)
 216
 217    # Adjust Bayesian Ridge
 218    regr5 = linear_model.BayesianRidge()
 219    obj_MTS = ns.MTS(regr5, lags = 1, n_hidden_features=5)
 220    obj_MTS.fit(df)
 221    print(obj_MTS.predict())
 222
 223    # with credible intervals
 224    print(obj_MTS.predict(return_std=True, level=80))
 225
 226    print(obj_MTS.predict(return_std=True, level=95))
 227    ```
 228    """
 229
 230    # construct the object -----
 231
 232    def __init__(
 233        self,
 234        obj,
 235        n_hidden_features=5,
 236        activation_name="relu",
 237        a=0.01,
 238        nodes_sim="sobol",
 239        bias=True,
 240        dropout=0,
 241        direct_link=True,
 242        n_clusters=2,
 243        cluster_encode=True,
 244        type_clust="kmeans",
 245        type_scaling=("std", "std", "std"),
 246        lags=1,
 247        type_pi="kde",
 248        block_size=None,
 249        replications=None,
 250        kernel="gaussian",
 251        agg="mean",
 252        seed=123,
 253        backend="cpu",
 254        verbose=0,
 255        show_progress=True,
 256    ):
 257
 258        super().__init__(
 259            n_hidden_features=n_hidden_features,
 260            activation_name=activation_name,
 261            a=a,
 262            nodes_sim=nodes_sim,
 263            bias=bias,
 264            dropout=dropout,
 265            direct_link=direct_link,
 266            n_clusters=n_clusters,
 267            cluster_encode=cluster_encode,
 268            type_clust=type_clust,
 269            type_scaling=type_scaling,
 270            seed=seed,
 271            backend=backend,
 272        )
 273
 274        # Add validation for lags parameter
 275        if isinstance(lags, str):
 276            assert lags in (
 277                "AIC",
 278                "AICc",
 279                "BIC",
 280            ), "if string, lags must be one of 'AIC', 'AICc', or 'BIC'"
 281        else:
 282            assert int(lags) == lags, "if numeric, lags parameter should be an integer"
 283
 284        self.obj = obj
 285        self.n_series = None
 286        self.lags = lags
 287        self.type_pi = type_pi
 288        self.block_size = block_size
 289        self.replications = replications
 290        self.kernel = kernel
 291        self.agg = agg
 292        self.verbose = verbose
 293        self.show_progress = show_progress
 294        self.series_names = None
 295        self.input_dates = None
 296        self.fit_objs_ = {}
 297        self.y_ = None  # MTS responses (most recent observations first)
 298        self.X_ = None  # MTS lags
 299        self.xreg_ = None
 300        self.y_means_ = {}
 301        self.mean_ = None
 302        self.median_ = None
 303        self.upper_ = None
 304        self.lower_ = None
 305        self.output_dates_ = None
 306        self.preds_std_ = []
 307        self.gaussian_preds_std_ = None
 308        self.alpha_ = None
 309        self.return_std_ = None
 310        self.df_ = None
 311        self.residuals_ = []
 312        self.abs_calib_residuals_ = None
 313        self.calib_residuals_quantile_ = None
 314        self.residuals_sims_ = None
 315        self.kde_ = None
 316        self.sims_ = None
 317        self.residuals_std_dev_ = None
 318        self.n_obs_ = None
 319        self.level_ = None
 320        self.init_n_series_ = None
 321
 322    def fit(self, X, xreg=None, **kwargs):
 323        """Fit MTS model to training data X, with optional regressors xreg
 324
 325        Parameters:
 326
 327        X: {array-like}, shape = [n_samples, n_features]
 328            Training time series, where n_samples is the number
 329            of samples and n_features is the number of features;
 330            X must be in increasing order (most recent observations last)
 331
 332        xreg: {array-like}, shape = [n_samples, n_features_xreg]
 333            Additional (external) regressors to be passed to self.obj
 334            xreg must be in 'increasing' order (most recent observations last)
 335
 336        **kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity)
 337
 338        Returns:
 339
 340        self: object
 341        """
 342
 343        self.init_n_series_ = X.shape[1]
 344
 345        # Automatic lag selection if requested
 346        if isinstance(self.lags, str):
 347            max_lags = min(25, X.shape[0] // 4)
 348            best_ic = float("inf")
 349            best_lags = 1
 350
 351            if self.verbose:
 352                print(f"\nSelecting optimal number of lags using {self.lags}...")
 353                iterator = tqdm(range(1, max_lags + 1))
 354            else:
 355                iterator = range(1, max_lags + 1)
 356
 357            for lag in iterator:
 358                # Convert DataFrame to numpy array before reversing
 359                if isinstance(X, pd.DataFrame):
 360                    X_values = X.values[::-1]
 361                else:
 362                    X_values = X[::-1]
 363
 364                # Try current lag value
 365                if self.init_n_series_ > 1:
 366                    mts_input = ts.create_train_inputs(X_values, lag)
 367                else:
 368                    mts_input = ts.create_train_inputs(X_values.reshape(-1, 1), lag)
 369
 370                # Cook training set and fit model
 371                dummy_y, scaled_Z = self.cook_training_set(
 372                    y=np.ones(mts_input[0].shape[0]), X=mts_input[1]
 373                )
 374                residuals_ = []
 375
 376                for i in range(self.init_n_series_):
 377                    y_mean = np.mean(mts_input[0][:, i])
 378                    centered_y_i = mts_input[0][:, i] - y_mean
 379                    self.obj.fit(X=scaled_Z, y=centered_y_i)
 380                    residuals_.append(
 381                        (centered_y_i - self.obj.predict(scaled_Z)).tolist()
 382                    )
 383
 384                self.residuals_ = np.asarray(residuals_).T
 385                ic = self._compute_information_criterion(
 386                    curr_lags=lag, criterion=self.lags
 387                )
 388
 389                if self.verbose:
 390                    print(f"Trying lags={lag}, {self.lags}={ic:.2f}")
 391
 392                if ic < best_ic:
 393                    best_ic = ic
 394                    best_lags = lag
 395
 396            if self.verbose:
 397                print(f"\nSelected {best_lags} lags with {self.lags}={best_ic:.2f}")
 398
 399            self.lags = best_lags
 400
 401        self.input_dates = None
 402        self.df_ = None
 403
 404        if isinstance(X, pd.DataFrame) is False:
 405            # input data set is a numpy array
 406            if xreg is None:
 407                X = pd.DataFrame(X)
 408                self.series_names = ["series" + str(i) for i in range(X.shape[1])]
 409            else:
 410                # xreg is not None
 411                X = mo.cbind(X, xreg)
 412                self.xreg_ = xreg
 413
 414        else:  # input data set is a DataFrame with column names
 415
 416            X_index = None
 417            if X.index is not None:
 418                X_index = X.index
 419            if xreg is None:
 420                X = copy.deepcopy(mo.convert_df_to_numeric(X))
 421            else:
 422                X = copy.deepcopy(mo.cbind(mo.convert_df_to_numeric(X), xreg))
 423                self.xreg_ = xreg
 424            if X_index is not None:
 425                X.index = X_index
 426            self.series_names = X.columns.tolist()
 427
 428        if isinstance(X, pd.DataFrame):
 429            if self.df_ is None:
 430                self.df_ = X
 431                X = X.values
 432            else:
 433                input_dates_prev = pd.DatetimeIndex(self.df_.index.values)
 434                frequency = pd.infer_freq(input_dates_prev)
 435                self.df_ = pd.concat([self.df_, X], axis=0)
 436                self.input_dates = pd.date_range(
 437                    start=input_dates_prev[0],
 438                    periods=len(input_dates_prev) + X.shape[0],
 439                    freq=frequency,
 440                ).values.tolist()
 441                self.df_.index = self.input_dates
 442                X = self.df_.values
 443            self.df_.columns = self.series_names
 444        else:
 445            if self.df_ is None:
 446                self.df_ = pd.DataFrame(X, columns=self.series_names)
 447            else:
 448                self.df_ = pd.concat(
 449                    [self.df_, pd.DataFrame(X, columns=self.series_names)],
 450                    axis=0,
 451                )
 452
 453        self.input_dates = ts.compute_input_dates(self.df_)
 454
 455        try:
 456            # multivariate time series
 457            n, p = X.shape
 458        except:
 459            # univariate time series
 460            n = X.shape[0]
 461            p = 1
 462        self.n_obs_ = n
 463
 464        rep_1_n = np.repeat(1, n)
 465
 466        self.y_ = None
 467        self.X_ = None
 468        self.n_series = p
 469        self.fit_objs_.clear()
 470        self.y_means_.clear()
 471        residuals_ = []
 472        self.residuals_ = None
 473        self.residuals_sims_ = None
 474        self.kde_ = None
 475        self.sims_ = None
 476        self.scaled_Z_ = None
 477        self.centered_y_is_ = []
 478
 479        if self.init_n_series_ > 1:
 480            # multivariate time series
 481            mts_input = ts.create_train_inputs(X[::-1], self.lags)
 482        else:
 483            # univariate time series
 484            mts_input = ts.create_train_inputs(X.reshape(-1, 1)[::-1], self.lags)
 485
 486        self.y_ = mts_input[0]
 487
 488        self.X_ = mts_input[1]
 489
 490        dummy_y, scaled_Z = self.cook_training_set(y=rep_1_n, X=self.X_)
 491
 492        self.scaled_Z_ = scaled_Z
 493
 494        # loop on all the time series and adjust self.obj.fit
 495        if self.verbose > 0:
 496            print(
 497                f"\n Adjusting {type(self.obj).__name__} to multivariate time series... \n"
 498            )
 499
 500        if self.show_progress is True:
 501            iterator = tqdm(range(self.init_n_series_))
 502        else:
 503            iterator = range(self.init_n_series_)
 504
 505        if self.type_pi in (
 506            "gaussian",
 507            "kde",
 508            "bootstrap",
 509            "block-bootstrap",
 510        ) or self.type_pi.startswith("vine"):
 511            for i in iterator:
 512                y_mean = np.mean(self.y_[:, i])
 513                self.y_means_[i] = y_mean
 514                centered_y_i = self.y_[:, i] - y_mean
 515                self.centered_y_is_.append(centered_y_i)
 516                self.obj.fit(X=scaled_Z, y=centered_y_i)
 517                self.fit_objs_[i] = deepcopy(self.obj)
 518                residuals_.append(
 519                    (centered_y_i - self.fit_objs_[i].predict(scaled_Z)).tolist()
 520                )
 521
 522        if self.type_pi.startswith("scp"):
 523            # split conformal prediction
 524            for i in iterator:
 525                n_y = self.y_.shape[0]
 526                n_y_half = n_y // 2
 527                first_half_idx = range(0, n_y_half)
 528                second_half_idx = range(n_y_half, n_y)
 529                y_mean_temp = np.mean(self.y_[first_half_idx, i])
 530                centered_y_i_temp = self.y_[first_half_idx, i] - y_mean_temp
 531                self.obj.fit(X=scaled_Z[first_half_idx, :], y=centered_y_i_temp)
 532                # calibrated residuals actually
 533                residuals_.append(
 534                    (
 535                        self.y_[second_half_idx, i]
 536                        - (y_mean_temp + self.obj.predict(scaled_Z[second_half_idx, :]))
 537                    ).tolist()
 538                )
 539                # fit on the second half
 540                y_mean = np.mean(self.y_[second_half_idx, i])
 541                self.y_means_[i] = y_mean
 542                centered_y_i = self.y_[second_half_idx, i] - y_mean
 543                self.obj.fit(X=scaled_Z[second_half_idx, :], y=centered_y_i)
 544                self.fit_objs_[i] = deepcopy(self.obj)
 545
 546        self.residuals_ = np.asarray(residuals_).T
 547
 548        if self.type_pi == "gaussian":
 549            self.gaussian_preds_std_ = np.std(self.residuals_, axis=0)
 550
 551        if self.type_pi.startswith("scp2"):
 552            # Calculate mean and standard deviation for each column
 553            data_mean = np.mean(self.residuals_, axis=0)
 554            self.residuals_std_dev_ = np.std(self.residuals_, axis=0)
 555            # Center and scale the array using broadcasting
 556            self.residuals_ = (
 557                self.residuals_ - data_mean[np.newaxis, :]
 558            ) / self.residuals_std_dev_[np.newaxis, :]
 559
 560        if self.replications != None and "kde" in self.type_pi:
 561            if self.verbose > 0:
 562                print(f"\n Simulate residuals using {self.kernel} kernel... \n")
 563            assert self.kernel in (
 564                "gaussian",
 565                "tophat",
 566            ), "currently, 'kernel' must be either 'gaussian' or 'tophat'"
 567            kernel_bandwidths = {"bandwidth": np.logspace(-6, 6, 150)}
 568            grid = GridSearchCV(
 569                KernelDensity(kernel=self.kernel, **kwargs),
 570                param_grid=kernel_bandwidths,
 571            )
 572            grid.fit(self.residuals_)
 573
 574            if self.verbose > 0:
 575                print(
 576                    f"\n Best parameters for {self.kernel} kernel: {grid.best_params_} \n"
 577                )
 578
 579            self.kde_ = grid.best_estimator_
 580
 581        return self
 582
 583    def partial_fit(self, X, xreg=None, **kwargs):
 584        """Update the model with new observations X, with optional regressors xreg
 585
 586        Parameters:
 587
 588        X: {array-like}, shape = [n_samples, n_features]
 589            Training time series, where n_samples is the number
 590            of samples and n_features is the number of features;
 591            X must be in increasing order (most recent observations last)
 592
 593        xreg: {array-like}, shape = [n_samples, n_features_xreg]
 594            Additional (external) regressors to be passed to self.obj
 595            xreg must be in 'increasing' order (most recent observations last)
 596
 597        **kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity)
 598
 599        Returns:
 600
 601        self: object
 602        """
 603
 604        assert self.df_ is not None, "fit() must be called before partial_fit()"
 605
 606        if (isinstance(X, pd.DataFrame) is False) and isinstance(X, pd.Series) is False:
 607            if len(X.shape) == 1:
 608                X = X.reshape(1, -1)
 609
 610            return self.fit(X, xreg, **kwargs)
 611
 612        else:
 613            if len(X.shape) == 1:
 614                X = pd.DataFrame(X.values.reshape(1, -1), columns=self.df_.columns)
 615
 616            return self.fit(X, xreg, **kwargs)
 617
 618    def predict(self, h=5, level=95, **kwargs):
 619        """Forecast all the time series, h steps ahead"""
 620
 621        self.output_dates_, frequency = ts.compute_output_dates(self.df_, h)
 622
 623        self.level_ = level
 624
 625        self.return_std_ = False  # do not remove (/!\)
 626
 627        self.mean_ = None  # do not remove (/!\)
 628
 629        self.mean_ = deepcopy(self.y_)  # do not remove (/!\)
 630
 631        self.lower_ = None  # do not remove (/!\)
 632
 633        self.upper_ = None  # do not remove (/!\)
 634
 635        self.sims_ = None  # do not remove (/!\)
 636
 637        y_means_ = np.asarray([self.y_means_[i] for i in range(self.init_n_series_)])
 638
 639        n_features = self.init_n_series_ * self.lags
 640
 641        self.alpha_ = 100 - level
 642
 643        pi_multiplier = norm.ppf(1 - self.alpha_ / 200)
 644
 645        if "return_std" in kwargs:  # bayesian forecasting
 646            self.return_std_ = True
 647            self.preds_std_ = []
 648            DescribeResult = namedtuple(
 649                "DescribeResult", ("mean", "lower", "upper")
 650            )  # to be updated
 651
 652        if "return_pi" in kwargs:  # split conformal, without simulation
 653            mean_pi_ = []
 654            lower_pi_ = []
 655            upper_pi_ = []
 656            median_pi_ = []
 657            DescribeResult = namedtuple(
 658                "DescribeResult", ("mean", "lower", "upper")
 659            )  # to be updated
 660
 661        if self.kde_ != None and "kde" in self.type_pi:  # kde
 662            target_cols = self.df_.columns[
 663                : self.init_n_series_
 664            ]  # Get target column names
 665            if self.verbose == 1:
 666                self.residuals_sims_ = tuple(
 667                    self.kde_.sample(
 668                        n_samples=h, random_state=self.seed + 100 * i
 669                    )  # Keep full sample
 670                    for i in tqdm(range(self.replications))
 671                )
 672            elif self.verbose == 0:
 673                self.residuals_sims_ = tuple(
 674                    self.kde_.sample(
 675                        n_samples=h, random_state=self.seed + 100 * i
 676                    )  # Keep full sample
 677                    for i in range(self.replications)
 678                )
 679
 680            # Convert to DataFrames after sampling
 681            self.residuals_sims_ = tuple(
 682                pd.DataFrame(
 683                    sim,  # Keep all columns
 684                    columns=target_cols,  # Use original target column names
 685                    index=self.output_dates_,
 686                )
 687                for sim in self.residuals_sims_
 688            )
 689
 690        if self.type_pi in ("bootstrap", "scp-bootstrap", "scp2-bootstrap"):
 691            assert self.replications is not None and isinstance(
 692                self.replications, int
 693            ), "'replications' must be provided and be an integer"
 694            if self.verbose == 1:
 695                self.residuals_sims_ = tuple(
 696                    ts.bootstrap(
 697                        self.residuals_,
 698                        h=h,
 699                        block_size=None,
 700                        seed=self.seed + 100 * i,
 701                    )
 702                    for i in tqdm(range(self.replications))
 703                )
 704            elif self.verbose == 0:
 705                self.residuals_sims_ = tuple(
 706                    ts.bootstrap(
 707                        self.residuals_,
 708                        h=h,
 709                        block_size=None,
 710                        seed=self.seed + 100 * i,
 711                    )
 712                    for i in range(self.replications)
 713                )
 714
 715        if self.type_pi in (
 716            "block-bootstrap",
 717            "scp-block-bootstrap",
 718            "scp2-block-bootstrap",
 719        ):
 720            if self.block_size is None:
 721                self.block_size = int(
 722                    np.ceil(3.15 * (self.residuals_.shape[0] ** (1 / 3)))
 723                )
 724
 725            assert self.replications is not None and isinstance(
 726                self.replications, int
 727            ), "'replications' must be provided and be an integer"
 728            if self.verbose == 1:
 729                self.residuals_sims_ = tuple(
 730                    ts.bootstrap(
 731                        self.residuals_,
 732                        h=h,
 733                        block_size=self.block_size,
 734                        seed=self.seed + 100 * i,
 735                    )
 736                    for i in tqdm(range(self.replications))
 737                )
 738            elif self.verbose == 0:
 739                self.residuals_sims_ = tuple(
 740                    ts.bootstrap(
 741                        self.residuals_,
 742                        h=h,
 743                        block_size=self.block_size,
 744                        seed=self.seed + 100 * i,
 745                    )
 746                    for i in range(self.replications)
 747                )
 748
 749        if "vine" in self.type_pi:
 750            if self.verbose == 1:
 751                self.residuals_sims_ = tuple(
 752                    vinecopula_sample(
 753                        x=self.residuals_,
 754                        n_samples=h,
 755                        method=self.type_pi,
 756                        random_state=self.seed + 100 * i,
 757                    )
 758                    for i in tqdm(range(self.replications))
 759                )
 760            elif self.verbose == 0:
 761                self.residuals_sims_ = tuple(
 762                    vinecopula_sample(
 763                        x=self.residuals_,
 764                        n_samples=h,
 765                        method=self.type_pi,
 766                        random_state=self.seed + 100 * i,
 767                    )
 768                    for i in range(self.replications)
 769                )
 770
 771        mean_ = deepcopy(self.mean_)
 772
 773        for i in range(h):
 774
 775            new_obs = ts.reformat_response(mean_, self.lags)
 776            new_X = new_obs.reshape(1, -1)
 777            cooked_new_X = self.cook_test_set(new_X, **kwargs)
 778
 779            if "return_std" in kwargs:
 780                self.preds_std_.append(
 781                    [
 782                        np.asarray(
 783                            self.fit_objs_[i].predict(cooked_new_X, return_std=True)[1]
 784                        ).item()
 785                        for i in range(self.n_series)
 786                    ]
 787                )
 788
 789            if "return_pi" in kwargs:
 790                for i in range(self.n_series):
 791                    preds_pi = self.fit_objs_[i].predict(cooked_new_X, **kwargs)
 792                    mean_pi_.append(preds_pi.mean[0])
 793                    lower_pi_.append(preds_pi.lower[0])
 794                    upper_pi_.append(preds_pi.upper[0])
 795
 796            predicted_cooked_new_X = np.asarray(
 797                [
 798                    np.asarray(self.fit_objs_[i].predict(cooked_new_X)).item()
 799                    for i in range(self.init_n_series_)
 800                ]
 801            )
 802
 803            preds = np.asarray(y_means_ + predicted_cooked_new_X)
 804
 805            # Create full row with both predictions and external regressors
 806            if self.xreg_ is not None and "xreg" in kwargs:
 807                next_xreg = kwargs["xreg"].iloc[i : i + 1].values.flatten()
 808                full_row = np.concatenate([preds, next_xreg])
 809            else:
 810                full_row = preds
 811
 812            # Create a new row with same number of columns as mean_
 813            new_row = np.zeros((1, mean_.shape[1]))
 814            new_row[0, : full_row.shape[0]] = full_row
 815
 816            # Maintain the full dimensionality by using vstack instead of rbind
 817            mean_ = np.vstack([new_row, mean_[:-1]])
 818
 819        # Final output should only include the target columns
 820        self.mean_ = pd.DataFrame(
 821            mean_[0:h, : self.init_n_series_][::-1],
 822            columns=self.df_.columns[: self.init_n_series_],
 823            index=self.output_dates_,
 824        )
 825
 826        # function's return ----------------------------------------------------------------------
 827        if (
 828            (("return_std" not in kwargs) and ("return_pi" not in kwargs))
 829            and (self.type_pi not in ("gaussian", "scp"))
 830        ) or ("vine" in self.type_pi):
 831
 832            if self.replications is None:
 833                return self.mean_.iloc[:, : self.init_n_series_]
 834
 835            # if "return_std" not in kwargs and self.replications is not None
 836            meanf = []
 837            medianf = []
 838            lower = []
 839            upper = []
 840
 841            if "scp2" in self.type_pi:
 842
 843                if self.verbose == 1:
 844                    self.sims_ = tuple(
 845                        (
 846                            self.mean_
 847                            + self.residuals_sims_[i]
 848                            * self.residuals_std_dev_[np.newaxis, :]
 849                            for i in tqdm(range(self.replications))
 850                        )
 851                    )
 852                elif self.verbose == 0:
 853                    self.sims_ = tuple(
 854                        (
 855                            self.mean_
 856                            + self.residuals_sims_[i]
 857                            * self.residuals_std_dev_[np.newaxis, :]
 858                            for i in range(self.replications)
 859                        )
 860                    )
 861            else:
 862
 863                if self.verbose == 1:
 864                    self.sims_ = tuple(
 865                        (
 866                            self.mean_ + self.residuals_sims_[i]
 867                            for i in tqdm(range(self.replications))
 868                        )
 869                    )
 870                elif self.verbose == 0:
 871                    self.sims_ = tuple(
 872                        (
 873                            self.mean_ + self.residuals_sims_[i]
 874                            for i in range(self.replications)
 875                        )
 876                    )
 877
 878            DescribeResult = namedtuple(
 879                "DescribeResult", ("mean", "sims", "lower", "upper")
 880            )
 881            for ix in range(self.init_n_series_):
 882                sims_ix = getsims(self.sims_, ix)
 883                if self.agg == "mean":
 884                    meanf.append(np.mean(sims_ix, axis=1))
 885                else:
 886                    medianf.append(np.median(sims_ix, axis=1))
 887                lower.append(np.quantile(sims_ix, q=self.alpha_ / 200, axis=1))
 888                upper.append(np.quantile(sims_ix, q=1 - self.alpha_ / 200, axis=1))
 889            self.mean_ = pd.DataFrame(
 890                np.asarray(meanf).T,
 891                columns=self.series_names[: self.init_n_series_],  # self.df_.columns,
 892                index=self.output_dates_,
 893            )
 894
 895            self.lower_ = pd.DataFrame(
 896                np.asarray(lower).T,
 897                columns=self.series_names[: self.init_n_series_],  # self.df_.columns,
 898                index=self.output_dates_,
 899            )
 900
 901            self.upper_ = pd.DataFrame(
 902                np.asarray(upper).T,
 903                columns=self.series_names[: self.init_n_series_],  # self.df_.columns,
 904                index=self.output_dates_,
 905            )
 906
 907            try:
 908                self.median_ = pd.DataFrame(
 909                    np.asarray(medianf).T,
 910                    columns=self.series_names[
 911                        : self.init_n_series_
 912                    ],  # self.df_.columns,
 913                    index=self.output_dates_,
 914                )
 915            except Exception as e:
 916                pass
 917
 918            return DescribeResult(self.mean_, self.sims_, self.lower_, self.upper_)
 919
 920        if (
 921            (("return_std" in kwargs) or ("return_pi" in kwargs))
 922            and (self.type_pi not in ("gaussian", "scp"))
 923        ) or "vine" in self.type_pi:
 924            DescribeResult = namedtuple("DescribeResult", ("mean", "lower", "upper"))
 925
 926            self.mean_ = pd.DataFrame(
 927                np.asarray(self.mean_),
 928                columns=self.series_names,  # self.df_.columns,
 929                index=self.output_dates_,
 930            )
 931
 932            if "return_std" in kwargs:
 933
 934                self.preds_std_ = np.asarray(self.preds_std_)
 935
 936                self.lower_ = pd.DataFrame(
 937                    self.mean_.values - pi_multiplier * self.preds_std_,
 938                    columns=self.series_names,  # self.df_.columns,
 939                    index=self.output_dates_,
 940                )
 941
 942                self.upper_ = pd.DataFrame(
 943                    self.mean_.values + pi_multiplier * self.preds_std_,
 944                    columns=self.series_names,  # self.df_.columns,
 945                    index=self.output_dates_,
 946                )
 947
 948            if "return_pi" in kwargs:
 949
 950                self.lower_ = pd.DataFrame(
 951                    np.asarray(lower_pi_).reshape(h, self.n_series)
 952                    + y_means_[np.newaxis, :],
 953                    columns=self.series_names,  # self.df_.columns,
 954                    index=self.output_dates_,
 955                )
 956
 957                self.upper_ = pd.DataFrame(
 958                    np.asarray(upper_pi_).reshape(h, self.n_series)
 959                    + y_means_[np.newaxis, :],
 960                    columns=self.series_names,  # self.df_.columns,
 961                    index=self.output_dates_,
 962                )
 963
 964            res = DescribeResult(self.mean_, self.lower_, self.upper_)
 965
 966            if self.xreg_ is not None:
 967                if len(self.xreg_.shape) > 1:
 968                    res2 = mx.tuple_map(
 969                        res,
 970                        lambda x: mo.delete_last_columns(
 971                            x, num_columns=self.xreg_.shape[1]
 972                        ),
 973                    )
 974                else:
 975                    res2 = mx.tuple_map(
 976                        res, lambda x: mo.delete_last_columns(x, num_columns=1)
 977                    )
 978                return DescribeResult(res2[0], res2[1], res2[2])
 979
 980            return res
 981
 982        if self.type_pi == "gaussian":
 983
 984            DescribeResult = namedtuple("DescribeResult", ("mean", "lower", "upper"))
 985
 986            self.mean_ = pd.DataFrame(
 987                np.asarray(self.mean_),
 988                columns=self.series_names,  # self.df_.columns,
 989                index=self.output_dates_,
 990            )
 991
 992            self.lower_ = pd.DataFrame(
 993                self.mean_.values - pi_multiplier * self.gaussian_preds_std_,
 994                columns=self.series_names,  # self.df_.columns,
 995                index=self.output_dates_,
 996            )
 997
 998            self.upper_ = pd.DataFrame(
 999                self.mean_.values + pi_multiplier * self.gaussian_preds_std_,
1000                columns=self.series_names,  # self.df_.columns,
1001                index=self.output_dates_,
1002            )
1003
1004            res = DescribeResult(self.mean_, self.lower_, self.upper_)
1005
1006            if self.xreg_ is not None:
1007                if len(self.xreg_.shape) > 1:
1008                    res2 = mx.tuple_map(
1009                        res,
1010                        lambda x: mo.delete_last_columns(
1011                            x, num_columns=self.xreg_.shape[1]
1012                        ),
1013                    )
1014                else:
1015                    res2 = mx.tuple_map(
1016                        res, lambda x: mo.delete_last_columns(x, num_columns=1)
1017                    )
1018                return DescribeResult(res2[0], res2[1], res2[2])
1019
1020            return res
1021
1022        # After prediction loop, ensure sims only contain target columns
1023        if self.sims_ is not None:
1024            if self.verbose == 1:
1025                self.sims_ = tuple(
1026                    sim[:h,]  # Only keep target columns and h rows
1027                    for sim in tqdm(self.sims_)
1028                )
1029            elif self.verbose == 0:
1030                self.sims_ = tuple(
1031                    sim[:h,]  # Only keep target columns and h rows
1032                    for sim in self.sims_
1033                )
1034
1035            # Convert numpy arrays to DataFrames with proper columns
1036            self.sims_ = tuple(
1037                pd.DataFrame(
1038                    sim,
1039                    columns=self.df_.columns[: self.init_n_series_],
1040                    index=self.output_dates_,
1041                )
1042                for sim in self.sims_
1043            )
1044
1045        if self.type_pi in ("kde", "bootstrap", "block-bootstrap", "vine-copula"):
1046            if self.xreg_ is not None:
1047                # Use getsimsxreg when external regressors are present
1048                target_cols = self.df_.columns[: self.init_n_series_]
1049                self.sims_ = getsimsxreg(self.sims_, self.output_dates_, target_cols)
1050            else:
1051                # Use original getsims for backward compatibility
1052                self.sims_ = getsims(self.sims_)
1053
1054    def score(self, X, training_index, testing_index, scoring=None, **kwargs):
1055        """Train on training_index, score on testing_index."""
1056
1057        assert (
1058            bool(set(training_index).intersection(set(testing_index))) == False
1059        ), "Non-overlapping 'training_index' and 'testing_index' required"
1060
1061        # Dimensions
1062        try:
1063            # multivariate time series
1064            n, p = X.shape
1065        except:
1066            # univariate time series
1067            n = X.shape[0]
1068            p = 1
1069
1070        # Training and testing sets
1071        if p > 1:
1072            X_train = X[training_index, :]
1073            X_test = X[testing_index, :]
1074        else:
1075            X_train = X[training_index]
1076            X_test = X[testing_index]
1077
1078        # Horizon
1079        h = len(testing_index)
1080        assert (
1081            len(training_index) + h
1082        ) <= n, "Please check lengths of training and testing windows"
1083
1084        # Fit and predict
1085        self.fit(X_train, **kwargs)
1086        preds = self.predict(h=h, **kwargs)
1087
1088        if scoring is None:
1089            scoring = "neg_root_mean_squared_error"
1090
1091        # check inputs
1092        assert scoring in (
1093            "explained_variance",
1094            "neg_mean_absolute_error",
1095            "neg_mean_squared_error",
1096            "neg_root_mean_squared_error",
1097            "neg_mean_squared_log_error",
1098            "neg_median_absolute_error",
1099            "r2",
1100        ), "'scoring' should be in ('explained_variance', 'neg_mean_absolute_error', \
1101                               'neg_mean_squared_error', 'neg_root_mean_squared_error', 'neg_mean_squared_log_error', \
1102                               'neg_median_absolute_error', 'r2')"
1103
1104        scoring_options = {
1105            "explained_variance": skm2.explained_variance_score,
1106            "neg_mean_absolute_error": skm2.mean_absolute_error,
1107            "neg_mean_squared_error": lambda x, y: np.mean((x - y) ** 2),
1108            "neg_root_mean_squared_error": lambda x, y: np.sqrt(np.mean((x - y) ** 2)),
1109            "neg_mean_squared_log_error": skm2.mean_squared_log_error,
1110            "neg_median_absolute_error": skm2.median_absolute_error,
1111            "r2": skm2.r2_score,
1112        }
1113
1114        return scoring_options[scoring](X_test, preds)
1115
1116    def plot(self, series=None, type_axis="dates", type_plot="pi"):
1117        """Plot time series forecast
1118
1119        Parameters:
1120
1121        series: {integer} or {string}
1122            series index or name
1123
1124        """
1125
1126        assert all(
1127            [
1128                self.mean_ is not None,
1129                self.lower_ is not None,
1130                self.upper_ is not None,
1131                self.output_dates_ is not None,
1132            ]
1133        ), "model forecasting must be obtained first (with predict)"
1134
1135        if series is None:
1136            # assert (
1137            #    self.init_n_series_ == 1
1138            # ), "please specify series index or name (n_series > 1)"
1139            series = 0
1140
1141        if isinstance(series, str):
1142            assert (
1143                series in self.series_names
1144            ), f"series {series} doesn't exist in the input dataset"
1145            series_idx = self.df_.columns.get_loc(series)
1146        else:
1147            assert isinstance(series, int) and (
1148                0 <= series < self.n_series
1149            ), f"check series index (< {self.n_series})"
1150            series_idx = series
1151
1152        y_all = list(self.df_.iloc[:, series_idx]) + list(
1153            self.mean_.iloc[:, series_idx]
1154        )
1155        y_test = list(self.mean_.iloc[:, series_idx])
1156        n_points_all = len(y_all)
1157        n_points_train = self.df_.shape[0]
1158
1159        if type_axis == "numeric":
1160            x_all = [i for i in range(n_points_all)]
1161            x_test = [i for i in range(n_points_train, n_points_all)]
1162
1163        if type_axis == "dates":  # use dates
1164            x_all = np.concatenate(
1165                (self.input_dates.values, self.output_dates_.values), axis=None
1166            )
1167            x_test = self.output_dates_.values
1168
1169        if type_plot == "pi":
1170            fig, ax = plt.subplots()
1171            ax.plot(x_all, y_all, "-")
1172            ax.plot(x_test, y_test, "-", color="orange")
1173            ax.fill_between(
1174                x_test,
1175                self.lower_.iloc[:, series_idx],
1176                self.upper_.iloc[:, series_idx],
1177                alpha=0.2,
1178                color="orange",
1179            )
1180            if self.replications is None:
1181                if self.n_series > 1:
1182                    plt.title(
1183                        f"prediction intervals for {series}",
1184                        loc="left",
1185                        fontsize=12,
1186                        fontweight=0,
1187                        color="black",
1188                    )
1189                else:
1190                    plt.title(
1191                        f"prediction intervals for input time series",
1192                        loc="left",
1193                        fontsize=12,
1194                        fontweight=0,
1195                        color="black",
1196                    )
1197                plt.show()
1198            else:  # self.replications is not None
1199                if self.n_series > 1:
1200                    plt.title(
1201                        f"prediction intervals for {self.replications} simulations of {series}",
1202                        loc="left",
1203                        fontsize=12,
1204                        fontweight=0,
1205                        color="black",
1206                    )
1207                else:
1208                    plt.title(
1209                        f"prediction intervals for {self.replications} simulations of input time series",
1210                        loc="left",
1211                        fontsize=12,
1212                        fontweight=0,
1213                        color="black",
1214                    )
1215                plt.show()
1216
1217        if type_plot == "spaghetti":
1218            palette = plt.get_cmap("Set1")
1219            sims_ix = getsims(self.sims_, series_idx)
1220            plt.plot(x_all, y_all, "-")
1221            for col_ix in range(
1222                sims_ix.shape[1]
1223            ):  # avoid this when there are thousands of simulations
1224                plt.plot(
1225                    x_test,
1226                    sims_ix[:, col_ix],
1227                    "-",
1228                    color=palette(col_ix),
1229                    linewidth=1,
1230                    alpha=0.9,
1231                )
1232            plt.plot(x_all, y_all, "-", color="black")
1233            plt.plot(x_test, y_test, "-", color="blue")
1234            # Add titles
1235            if self.n_series > 1:
1236                plt.title(
1237                    f"{self.replications} simulations of {series}",
1238                    loc="left",
1239                    fontsize=12,
1240                    fontweight=0,
1241                    color="black",
1242                )
1243            else:
1244                plt.title(
1245                    f"{self.replications} simulations of input time series",
1246                    loc="left",
1247                    fontsize=12,
1248                    fontweight=0,
1249                    color="black",
1250                )
1251            plt.xlabel("Time")
1252            plt.ylabel("Values")
1253            # Show the graph
1254            plt.show()
1255
1256    def cross_val_score(
1257        self,
1258        X,
1259        scoring="root_mean_squared_error",
1260        n_jobs=None,
1261        verbose=0,
1262        xreg=None,
1263        initial_window=5,
1264        horizon=3,
1265        fixed_window=False,
1266        show_progress=True,
1267        level=95,
1268        **kwargs,
1269    ):
1270        """Evaluate a score by time series cross-validation.
1271
1272        Parameters:
1273
1274            X: {array-like, sparse matrix} of shape (n_samples, n_features)
1275                The data to fit.
1276
1277            scoring: str or a function
1278                A str in ('root_mean_squared_error', 'mean_squared_error', 'mean_error',
1279                'mean_absolute_error', 'mean_error', 'mean_percentage_error',
1280                'mean_absolute_percentage_error',  'winkler_score', 'coverage')
1281                Or a function defined as 'coverage' and 'winkler_score' in `utils.timeseries`
1282
1283            n_jobs: int, default=None
1284                Number of jobs to run in parallel.
1285
1286            verbose: int, default=0
1287                The verbosity level.
1288
1289            xreg: array-like, optional (default=None)
1290                Additional (external) regressors to be passed to `fit`
1291                xreg must be in 'increasing' order (most recent observations last)
1292
1293            initial_window: int
1294                initial number of consecutive values in each training set sample
1295
1296            horizon: int
1297                number of consecutive values in test set sample
1298
1299            fixed_window: boolean
1300                if False, all training samples start at index 0, and the training
1301                window's size is increasing.
1302                if True, the training window's size is fixed, and the window is
1303                rolling forward
1304
1305            show_progress: boolean
1306                if True, a progress bar is printed
1307
1308            **kwargs: dict
1309                additional parameters to be passed to `fit` and `predict`
1310
1311        Returns:
1312
1313            A tuple: descriptive statistics or errors and raw errors
1314
1315        """
1316        tscv = TimeSeriesSplit()
1317
1318        tscv_obj = tscv.split(
1319            X,
1320            initial_window=initial_window,
1321            horizon=horizon,
1322            fixed_window=fixed_window,
1323        )
1324
1325        if isinstance(scoring, str):
1326
1327            assert scoring in (
1328                "root_mean_squared_error",
1329                "mean_squared_error",
1330                "mean_error",
1331                "mean_absolute_error",
1332                "mean_percentage_error",
1333                "mean_absolute_percentage_error",
1334                "winkler_score",
1335                "coverage",
1336            ), "must have scoring in ('root_mean_squared_error', 'mean_squared_error', 'mean_error', 'mean_absolute_error', 'mean_error', 'mean_percentage_error', 'mean_absolute_percentage_error',  'winkler_score', 'coverage')"
1337
1338            def err_func(X_test, X_pred, scoring):
1339                if (self.replications is not None) or (
1340                    self.type_pi == "gaussian"
1341                ):  # probabilistic
1342                    if scoring == "winkler_score":
1343                        return winkler_score(X_pred, X_test, level=level)
1344                    elif scoring == "coverage":
1345                        return coverage(X_pred, X_test, level=level)
1346                    else:
1347                        return mean_errors(
1348                            pred=X_pred.mean, actual=X_test, scoring=scoring
1349                        )
1350                else:  # not probabilistic
1351                    return mean_errors(pred=X_pred, actual=X_test, scoring=scoring)
1352
1353        else:  # isinstance(scoring, str) = False
1354
1355            err_func = scoring
1356
1357        errors = []
1358
1359        train_indices = []
1360
1361        test_indices = []
1362
1363        for train_index, test_index in tscv_obj:
1364            train_indices.append(train_index)
1365            test_indices.append(test_index)
1366
1367        if show_progress is True:
1368            iterator = tqdm(zip(train_indices, test_indices), total=len(train_indices))
1369        else:
1370            iterator = zip(train_indices, test_indices)
1371
1372        for train_index, test_index in iterator:
1373
1374            if verbose == 1:
1375                print(f"TRAIN: {train_index}")
1376                print(f"TEST: {test_index}")
1377
1378            if isinstance(X, pd.DataFrame):
1379                self.fit(X.iloc[train_index, :], xreg=xreg, **kwargs)
1380                X_test = X.iloc[test_index, :]
1381            else:
1382                self.fit(X[train_index, :], xreg=xreg, **kwargs)
1383                X_test = X[test_index, :]
1384            X_pred = self.predict(h=int(len(test_index)), level=level, **kwargs)
1385
1386            errors.append(err_func(X_test, X_pred, scoring))
1387
1388        res = np.asarray(errors)
1389
1390        return res, describe(res)
1391
1392    def _compute_information_criterion(self, curr_lags, criterion="AIC"):
1393        """Compute information criterion using existing residuals
1394
1395        Parameters
1396        ----------
1397        curr_lags : int
1398            Current number of lags being evaluated
1399        criterion : str
1400            One of 'AIC', 'AICc', or 'BIC'
1401
1402        Returns
1403        -------
1404        float
1405            Information criterion value or inf if parameters exceed observations
1406        """
1407        # Get dimensions
1408        n_obs = self.residuals_.shape[0]
1409        n_features = int(self.init_n_series_ * curr_lags)
1410        n_hidden = int(self.n_hidden_features)
1411
1412        # Calculate number of parameters
1413        term1 = int(n_features * n_hidden)
1414        term2 = int(n_hidden * self.init_n_series_)
1415        n_params = term1 + term2
1416
1417        # Check if we have enough observations for the number of parameters
1418        if n_obs <= n_params + 1:
1419            return float("inf")  # Return infinity if too many parameters
1420
1421        # Compute RSS using existing residuals
1422        rss = np.sum(self.residuals_**2)
1423
1424        # Compute criterion
1425        if criterion == "AIC":
1426            ic = n_obs * np.log(rss / n_obs) + 2 * n_params
1427        elif criterion == "AICc":
1428            ic = n_obs * np.log(rss / n_obs) + 2 * n_params * (
1429                n_obs / (n_obs - n_params - 1)
1430            )
1431        else:  # BIC
1432            ic = n_obs * np.log(rss / n_obs) + n_params * np.log(n_obs)
1433
1434        return ic

Univariate and multivariate time series (MTS) forecasting with Quasi-Randomized networks

Parameters:

obj: object.
    any object containing a method fit (obj.fit()) and a method predict
    (obj.predict()).

n_hidden_features: int.
    number of nodes in the hidden layer.

activation_name: str.
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'.

a: float.
    hyperparameter for 'prelu' or 'elu' activation function.

nodes_sim: str.
    type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
    'uniform'.

bias: boolean.
    indicates if the hidden layer contains a bias term (True) or not
    (False).

dropout: float.
    regularization parameter; (random) percentage of nodes dropped out
    of the training.

direct_link: boolean.
    indicates if the original predictors are included (True) in model's fitting or not (False).

n_clusters: int.
    number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering).

cluster_encode: bool.
    defines how the variable containing clusters is treated (default is one-hot)
    if `False`, then labels are used, without one-hot encoding.

type_clust: str.
    type of clustering method: currently k-means ('kmeans') or Gaussian
    Mixture Model ('gmm').

type_scaling: a tuple of 3 strings.
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax').

lags: int.
    number of lags used for each time series.
    If string, lags must be one of 'AIC', 'AICc', or 'BIC'.

type_pi: str.
    type of prediction interval; currently:
    - "gaussian": simple, fast, but: assumes stationarity of Gaussian in-sample residuals and independence in the multivariate case
    - "kde": based on Kernel Density Estimation of in-sample residuals
    - "bootstrap": based on independent bootstrap of in-sample residuals
    - "block-bootstrap": based on basic block bootstrap of in-sample residuals
    - "scp-kde": Sequential split conformal prediction with Kernel Density Estimation of calibrated residuals
    - "scp-bootstrap": Sequential split conformal prediction with independent bootstrap of calibrated residuals
    - "scp-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of calibrated residuals
    - "scp2-kde": Sequential split conformal prediction with Kernel Density Estimation of standardized calibrated residuals
    - "scp2-bootstrap": Sequential split conformal prediction with independent bootstrap of standardized calibrated residuals
    - "scp2-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of standardized calibrated residuals
    - based on copulas of in-sample residuals: 'vine-tll', 'vine-bb1', 'vine-bb6', 'vine-bb7', 'vine-bb8', 'vine-clayton',
    'vine-frank', 'vine-gaussian', 'vine-gumbel', 'vine-indep', 'vine-joe', 'vine-student'
    - 'scp-vine-tll', 'scp-vine-bb1', 'scp-vine-bb6', 'scp-vine-bb7', 'scp-vine-bb8', 'scp-vine-clayton',
    'scp-vine-frank', 'scp-vine-gaussian', 'scp-vine-gumbel', 'scp-vine-indep', 'scp-vine-joe', 'scp-vine-student'
    - 'scp2-vine-tll', 'scp2-vine-bb1', 'scp2-vine-bb6', 'scp2-vine-bb7', 'scp2-vine-bb8', 'scp2-vine-clayton',
    'scp2-vine-frank', 'scp2-vine-gaussian', 'scp2-vine-gumbel', 'scp2-vine-indep', 'scp2-vine-joe', 'scp2-vine-student'

block_size: int.
    size of block for 'type_pi' in ("block-bootstrap", "scp-block-bootstrap", "scp2-block-bootstrap").
    Default is round(3.15*(n_residuals^1/3))

replications: int.
    number of replications (if needed, for predictive simulation). Default is 'None'.

kernel: str.
    the kernel to use for residuals density estimation (used for predictive simulation). Currently, either 'gaussian' or 'tophat'.

agg: str.
    either "mean" or "median" for simulation of bootstrap aggregating

seed: int.
    reproducibility seed for nodes_sim=='uniform' or predictive simulation.

backend: str.
    "cpu" or "gpu" or "tpu".

verbose: int.
    0: not printing; 1: printing

show_progress: bool.
    True: progress bar when fitting each series; False: no progress bar when fitting each series

Attributes:

fit_objs_: dict
    objects adjusted to each individual time series

y_: {array-like}
    MTS responses (most recent observations first)

X_: {array-like}
    MTS lags

xreg_: {array-like}
    external regressors

y_means_: dict
    a dictionary of each series mean values

preds_: {array-like}
    successive model predictions

preds_std_: {array-like}
    standard deviation around the predictions for Bayesian base learners (`obj`)

gaussian_preds_std_: {array-like}
    standard deviation around the predictions for `type_pi='gaussian'`

return_std_: boolean
    return uncertainty or not (set in predict)

df_: data frame
    the input data frame, in case a data.frame is provided to `fit`

n_obs_: int
    number of time series observations (number of rows for multivariate)

level_: int
    level of confidence for prediction intervals (default is 95)

residuals_: {array-like}
    in-sample residuals (for `type_pi` not conformal prediction) or calibrated residuals
    (for `type_pi` in conformal prediction)

residuals_sims_: tuple of {array-like}
    simulations of in-sample residuals (for `type_pi` not conformal prediction) or
    calibrated residuals (for `type_pi` in conformal prediction)

kde_: A scikit-learn object, see https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KernelDensity.html

residuals_std_dev_: residuals standard deviation

Examples:

Example 1:

import nnetsauce as ns
import numpy as np
from sklearn import linear_model
np.random.seed(123)

M = np.random.rand(10, 3)
M[:,0] = 10*M[:,0]
M[:,2] = 25*M[:,2]
print(M)

# Adjust Bayesian Ridge
regr4 = linear_model.BayesianRidge()
obj_MTS = ns.MTS(regr4, lags = 1, n_hidden_features=5)
obj_MTS.fit(M)
print(obj_MTS.predict())

# with credible intervals
print(obj_MTS.predict(return_std=True, level=80))

print(obj_MTS.predict(return_std=True, level=95))

Example 2:

import nnetsauce as ns
import numpy as np
from sklearn import linear_model

dataset = {
'date' : ['2001-01-01', '2002-01-01', '2003-01-01', '2004-01-01', '2005-01-01'],
'series1' : [34, 30, 35.6, 33.3, 38.1],
'series2' : [4, 5.5, 5.6, 6.3, 5.1],
'series3' : [100, 100.5, 100.6, 100.2, 100.1]}
df = pd.DataFrame(dataset).set_index('date')
print(df)

# Adjust Bayesian Ridge
regr5 = linear_model.BayesianRidge()
obj_MTS = ns.MTS(regr5, lags = 1, n_hidden_features=5)
obj_MTS.fit(df)
print(obj_MTS.predict())

# with credible intervals
print(obj_MTS.predict(return_std=True, level=80))

print(obj_MTS.predict(return_std=True, level=95))
def fit(self, X, xreg=None, **kwargs):
322    def fit(self, X, xreg=None, **kwargs):
323        """Fit MTS model to training data X, with optional regressors xreg
324
325        Parameters:
326
327        X: {array-like}, shape = [n_samples, n_features]
328            Training time series, where n_samples is the number
329            of samples and n_features is the number of features;
330            X must be in increasing order (most recent observations last)
331
332        xreg: {array-like}, shape = [n_samples, n_features_xreg]
333            Additional (external) regressors to be passed to self.obj
334            xreg must be in 'increasing' order (most recent observations last)
335
336        **kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity)
337
338        Returns:
339
340        self: object
341        """
342
343        self.init_n_series_ = X.shape[1]
344
345        # Automatic lag selection if requested
346        if isinstance(self.lags, str):
347            max_lags = min(25, X.shape[0] // 4)
348            best_ic = float("inf")
349            best_lags = 1
350
351            if self.verbose:
352                print(f"\nSelecting optimal number of lags using {self.lags}...")
353                iterator = tqdm(range(1, max_lags + 1))
354            else:
355                iterator = range(1, max_lags + 1)
356
357            for lag in iterator:
358                # Convert DataFrame to numpy array before reversing
359                if isinstance(X, pd.DataFrame):
360                    X_values = X.values[::-1]
361                else:
362                    X_values = X[::-1]
363
364                # Try current lag value
365                if self.init_n_series_ > 1:
366                    mts_input = ts.create_train_inputs(X_values, lag)
367                else:
368                    mts_input = ts.create_train_inputs(X_values.reshape(-1, 1), lag)
369
370                # Cook training set and fit model
371                dummy_y, scaled_Z = self.cook_training_set(
372                    y=np.ones(mts_input[0].shape[0]), X=mts_input[1]
373                )
374                residuals_ = []
375
376                for i in range(self.init_n_series_):
377                    y_mean = np.mean(mts_input[0][:, i])
378                    centered_y_i = mts_input[0][:, i] - y_mean
379                    self.obj.fit(X=scaled_Z, y=centered_y_i)
380                    residuals_.append(
381                        (centered_y_i - self.obj.predict(scaled_Z)).tolist()
382                    )
383
384                self.residuals_ = np.asarray(residuals_).T
385                ic = self._compute_information_criterion(
386                    curr_lags=lag, criterion=self.lags
387                )
388
389                if self.verbose:
390                    print(f"Trying lags={lag}, {self.lags}={ic:.2f}")
391
392                if ic < best_ic:
393                    best_ic = ic
394                    best_lags = lag
395
396            if self.verbose:
397                print(f"\nSelected {best_lags} lags with {self.lags}={best_ic:.2f}")
398
399            self.lags = best_lags
400
401        self.input_dates = None
402        self.df_ = None
403
404        if isinstance(X, pd.DataFrame) is False:
405            # input data set is a numpy array
406            if xreg is None:
407                X = pd.DataFrame(X)
408                self.series_names = ["series" + str(i) for i in range(X.shape[1])]
409            else:
410                # xreg is not None
411                X = mo.cbind(X, xreg)
412                self.xreg_ = xreg
413
414        else:  # input data set is a DataFrame with column names
415
416            X_index = None
417            if X.index is not None:
418                X_index = X.index
419            if xreg is None:
420                X = copy.deepcopy(mo.convert_df_to_numeric(X))
421            else:
422                X = copy.deepcopy(mo.cbind(mo.convert_df_to_numeric(X), xreg))
423                self.xreg_ = xreg
424            if X_index is not None:
425                X.index = X_index
426            self.series_names = X.columns.tolist()
427
428        if isinstance(X, pd.DataFrame):
429            if self.df_ is None:
430                self.df_ = X
431                X = X.values
432            else:
433                input_dates_prev = pd.DatetimeIndex(self.df_.index.values)
434                frequency = pd.infer_freq(input_dates_prev)
435                self.df_ = pd.concat([self.df_, X], axis=0)
436                self.input_dates = pd.date_range(
437                    start=input_dates_prev[0],
438                    periods=len(input_dates_prev) + X.shape[0],
439                    freq=frequency,
440                ).values.tolist()
441                self.df_.index = self.input_dates
442                X = self.df_.values
443            self.df_.columns = self.series_names
444        else:
445            if self.df_ is None:
446                self.df_ = pd.DataFrame(X, columns=self.series_names)
447            else:
448                self.df_ = pd.concat(
449                    [self.df_, pd.DataFrame(X, columns=self.series_names)],
450                    axis=0,
451                )
452
453        self.input_dates = ts.compute_input_dates(self.df_)
454
455        try:
456            # multivariate time series
457            n, p = X.shape
458        except:
459            # univariate time series
460            n = X.shape[0]
461            p = 1
462        self.n_obs_ = n
463
464        rep_1_n = np.repeat(1, n)
465
466        self.y_ = None
467        self.X_ = None
468        self.n_series = p
469        self.fit_objs_.clear()
470        self.y_means_.clear()
471        residuals_ = []
472        self.residuals_ = None
473        self.residuals_sims_ = None
474        self.kde_ = None
475        self.sims_ = None
476        self.scaled_Z_ = None
477        self.centered_y_is_ = []
478
479        if self.init_n_series_ > 1:
480            # multivariate time series
481            mts_input = ts.create_train_inputs(X[::-1], self.lags)
482        else:
483            # univariate time series
484            mts_input = ts.create_train_inputs(X.reshape(-1, 1)[::-1], self.lags)
485
486        self.y_ = mts_input[0]
487
488        self.X_ = mts_input[1]
489
490        dummy_y, scaled_Z = self.cook_training_set(y=rep_1_n, X=self.X_)
491
492        self.scaled_Z_ = scaled_Z
493
494        # loop on all the time series and adjust self.obj.fit
495        if self.verbose > 0:
496            print(
497                f"\n Adjusting {type(self.obj).__name__} to multivariate time series... \n"
498            )
499
500        if self.show_progress is True:
501            iterator = tqdm(range(self.init_n_series_))
502        else:
503            iterator = range(self.init_n_series_)
504
505        if self.type_pi in (
506            "gaussian",
507            "kde",
508            "bootstrap",
509            "block-bootstrap",
510        ) or self.type_pi.startswith("vine"):
511            for i in iterator:
512                y_mean = np.mean(self.y_[:, i])
513                self.y_means_[i] = y_mean
514                centered_y_i = self.y_[:, i] - y_mean
515                self.centered_y_is_.append(centered_y_i)
516                self.obj.fit(X=scaled_Z, y=centered_y_i)
517                self.fit_objs_[i] = deepcopy(self.obj)
518                residuals_.append(
519                    (centered_y_i - self.fit_objs_[i].predict(scaled_Z)).tolist()
520                )
521
522        if self.type_pi.startswith("scp"):
523            # split conformal prediction
524            for i in iterator:
525                n_y = self.y_.shape[0]
526                n_y_half = n_y // 2
527                first_half_idx = range(0, n_y_half)
528                second_half_idx = range(n_y_half, n_y)
529                y_mean_temp = np.mean(self.y_[first_half_idx, i])
530                centered_y_i_temp = self.y_[first_half_idx, i] - y_mean_temp
531                self.obj.fit(X=scaled_Z[first_half_idx, :], y=centered_y_i_temp)
532                # calibrated residuals actually
533                residuals_.append(
534                    (
535                        self.y_[second_half_idx, i]
536                        - (y_mean_temp + self.obj.predict(scaled_Z[second_half_idx, :]))
537                    ).tolist()
538                )
539                # fit on the second half
540                y_mean = np.mean(self.y_[second_half_idx, i])
541                self.y_means_[i] = y_mean
542                centered_y_i = self.y_[second_half_idx, i] - y_mean
543                self.obj.fit(X=scaled_Z[second_half_idx, :], y=centered_y_i)
544                self.fit_objs_[i] = deepcopy(self.obj)
545
546        self.residuals_ = np.asarray(residuals_).T
547
548        if self.type_pi == "gaussian":
549            self.gaussian_preds_std_ = np.std(self.residuals_, axis=0)
550
551        if self.type_pi.startswith("scp2"):
552            # Calculate mean and standard deviation for each column
553            data_mean = np.mean(self.residuals_, axis=0)
554            self.residuals_std_dev_ = np.std(self.residuals_, axis=0)
555            # Center and scale the array using broadcasting
556            self.residuals_ = (
557                self.residuals_ - data_mean[np.newaxis, :]
558            ) / self.residuals_std_dev_[np.newaxis, :]
559
560        if self.replications != None and "kde" in self.type_pi:
561            if self.verbose > 0:
562                print(f"\n Simulate residuals using {self.kernel} kernel... \n")
563            assert self.kernel in (
564                "gaussian",
565                "tophat",
566            ), "currently, 'kernel' must be either 'gaussian' or 'tophat'"
567            kernel_bandwidths = {"bandwidth": np.logspace(-6, 6, 150)}
568            grid = GridSearchCV(
569                KernelDensity(kernel=self.kernel, **kwargs),
570                param_grid=kernel_bandwidths,
571            )
572            grid.fit(self.residuals_)
573
574            if self.verbose > 0:
575                print(
576                    f"\n Best parameters for {self.kernel} kernel: {grid.best_params_} \n"
577                )
578
579            self.kde_ = grid.best_estimator_
580
581        return self

Fit MTS model to training data X, with optional regressors xreg

Parameters:

X: {array-like}, shape = [n_samples, n_features] Training time series, where n_samples is the number of samples and n_features is the number of features; X must be in increasing order (most recent observations last)

xreg: {array-like}, shape = [n_samples, n_features_xreg] Additional (external) regressors to be passed to self.obj xreg must be in 'increasing' order (most recent observations last)

**kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity)

Returns:

self: object

def predict(self, h=5, level=95, **kwargs):
 618    def predict(self, h=5, level=95, **kwargs):
 619        """Forecast all the time series, h steps ahead"""
 620
 621        self.output_dates_, frequency = ts.compute_output_dates(self.df_, h)
 622
 623        self.level_ = level
 624
 625        self.return_std_ = False  # do not remove (/!\)
 626
 627        self.mean_ = None  # do not remove (/!\)
 628
 629        self.mean_ = deepcopy(self.y_)  # do not remove (/!\)
 630
 631        self.lower_ = None  # do not remove (/!\)
 632
 633        self.upper_ = None  # do not remove (/!\)
 634
 635        self.sims_ = None  # do not remove (/!\)
 636
 637        y_means_ = np.asarray([self.y_means_[i] for i in range(self.init_n_series_)])
 638
 639        n_features = self.init_n_series_ * self.lags
 640
 641        self.alpha_ = 100 - level
 642
 643        pi_multiplier = norm.ppf(1 - self.alpha_ / 200)
 644
 645        if "return_std" in kwargs:  # bayesian forecasting
 646            self.return_std_ = True
 647            self.preds_std_ = []
 648            DescribeResult = namedtuple(
 649                "DescribeResult", ("mean", "lower", "upper")
 650            )  # to be updated
 651
 652        if "return_pi" in kwargs:  # split conformal, without simulation
 653            mean_pi_ = []
 654            lower_pi_ = []
 655            upper_pi_ = []
 656            median_pi_ = []
 657            DescribeResult = namedtuple(
 658                "DescribeResult", ("mean", "lower", "upper")
 659            )  # to be updated
 660
 661        if self.kde_ != None and "kde" in self.type_pi:  # kde
 662            target_cols = self.df_.columns[
 663                : self.init_n_series_
 664            ]  # Get target column names
 665            if self.verbose == 1:
 666                self.residuals_sims_ = tuple(
 667                    self.kde_.sample(
 668                        n_samples=h, random_state=self.seed + 100 * i
 669                    )  # Keep full sample
 670                    for i in tqdm(range(self.replications))
 671                )
 672            elif self.verbose == 0:
 673                self.residuals_sims_ = tuple(
 674                    self.kde_.sample(
 675                        n_samples=h, random_state=self.seed + 100 * i
 676                    )  # Keep full sample
 677                    for i in range(self.replications)
 678                )
 679
 680            # Convert to DataFrames after sampling
 681            self.residuals_sims_ = tuple(
 682                pd.DataFrame(
 683                    sim,  # Keep all columns
 684                    columns=target_cols,  # Use original target column names
 685                    index=self.output_dates_,
 686                )
 687                for sim in self.residuals_sims_
 688            )
 689
 690        if self.type_pi in ("bootstrap", "scp-bootstrap", "scp2-bootstrap"):
 691            assert self.replications is not None and isinstance(
 692                self.replications, int
 693            ), "'replications' must be provided and be an integer"
 694            if self.verbose == 1:
 695                self.residuals_sims_ = tuple(
 696                    ts.bootstrap(
 697                        self.residuals_,
 698                        h=h,
 699                        block_size=None,
 700                        seed=self.seed + 100 * i,
 701                    )
 702                    for i in tqdm(range(self.replications))
 703                )
 704            elif self.verbose == 0:
 705                self.residuals_sims_ = tuple(
 706                    ts.bootstrap(
 707                        self.residuals_,
 708                        h=h,
 709                        block_size=None,
 710                        seed=self.seed + 100 * i,
 711                    )
 712                    for i in range(self.replications)
 713                )
 714
 715        if self.type_pi in (
 716            "block-bootstrap",
 717            "scp-block-bootstrap",
 718            "scp2-block-bootstrap",
 719        ):
 720            if self.block_size is None:
 721                self.block_size = int(
 722                    np.ceil(3.15 * (self.residuals_.shape[0] ** (1 / 3)))
 723                )
 724
 725            assert self.replications is not None and isinstance(
 726                self.replications, int
 727            ), "'replications' must be provided and be an integer"
 728            if self.verbose == 1:
 729                self.residuals_sims_ = tuple(
 730                    ts.bootstrap(
 731                        self.residuals_,
 732                        h=h,
 733                        block_size=self.block_size,
 734                        seed=self.seed + 100 * i,
 735                    )
 736                    for i in tqdm(range(self.replications))
 737                )
 738            elif self.verbose == 0:
 739                self.residuals_sims_ = tuple(
 740                    ts.bootstrap(
 741                        self.residuals_,
 742                        h=h,
 743                        block_size=self.block_size,
 744                        seed=self.seed + 100 * i,
 745                    )
 746                    for i in range(self.replications)
 747                )
 748
 749        if "vine" in self.type_pi:
 750            if self.verbose == 1:
 751                self.residuals_sims_ = tuple(
 752                    vinecopula_sample(
 753                        x=self.residuals_,
 754                        n_samples=h,
 755                        method=self.type_pi,
 756                        random_state=self.seed + 100 * i,
 757                    )
 758                    for i in tqdm(range(self.replications))
 759                )
 760            elif self.verbose == 0:
 761                self.residuals_sims_ = tuple(
 762                    vinecopula_sample(
 763                        x=self.residuals_,
 764                        n_samples=h,
 765                        method=self.type_pi,
 766                        random_state=self.seed + 100 * i,
 767                    )
 768                    for i in range(self.replications)
 769                )
 770
 771        mean_ = deepcopy(self.mean_)
 772
 773        for i in range(h):
 774
 775            new_obs = ts.reformat_response(mean_, self.lags)
 776            new_X = new_obs.reshape(1, -1)
 777            cooked_new_X = self.cook_test_set(new_X, **kwargs)
 778
 779            if "return_std" in kwargs:
 780                self.preds_std_.append(
 781                    [
 782                        np.asarray(
 783                            self.fit_objs_[i].predict(cooked_new_X, return_std=True)[1]
 784                        ).item()
 785                        for i in range(self.n_series)
 786                    ]
 787                )
 788
 789            if "return_pi" in kwargs:
 790                for i in range(self.n_series):
 791                    preds_pi = self.fit_objs_[i].predict(cooked_new_X, **kwargs)
 792                    mean_pi_.append(preds_pi.mean[0])
 793                    lower_pi_.append(preds_pi.lower[0])
 794                    upper_pi_.append(preds_pi.upper[0])
 795
 796            predicted_cooked_new_X = np.asarray(
 797                [
 798                    np.asarray(self.fit_objs_[i].predict(cooked_new_X)).item()
 799                    for i in range(self.init_n_series_)
 800                ]
 801            )
 802
 803            preds = np.asarray(y_means_ + predicted_cooked_new_X)
 804
 805            # Create full row with both predictions and external regressors
 806            if self.xreg_ is not None and "xreg" in kwargs:
 807                next_xreg = kwargs["xreg"].iloc[i : i + 1].values.flatten()
 808                full_row = np.concatenate([preds, next_xreg])
 809            else:
 810                full_row = preds
 811
 812            # Create a new row with same number of columns as mean_
 813            new_row = np.zeros((1, mean_.shape[1]))
 814            new_row[0, : full_row.shape[0]] = full_row
 815
 816            # Maintain the full dimensionality by using vstack instead of rbind
 817            mean_ = np.vstack([new_row, mean_[:-1]])
 818
 819        # Final output should only include the target columns
 820        self.mean_ = pd.DataFrame(
 821            mean_[0:h, : self.init_n_series_][::-1],
 822            columns=self.df_.columns[: self.init_n_series_],
 823            index=self.output_dates_,
 824        )
 825
 826        # function's return ----------------------------------------------------------------------
 827        if (
 828            (("return_std" not in kwargs) and ("return_pi" not in kwargs))
 829            and (self.type_pi not in ("gaussian", "scp"))
 830        ) or ("vine" in self.type_pi):
 831
 832            if self.replications is None:
 833                return self.mean_.iloc[:, : self.init_n_series_]
 834
 835            # if "return_std" not in kwargs and self.replications is not None
 836            meanf = []
 837            medianf = []
 838            lower = []
 839            upper = []
 840
 841            if "scp2" in self.type_pi:
 842
 843                if self.verbose == 1:
 844                    self.sims_ = tuple(
 845                        (
 846                            self.mean_
 847                            + self.residuals_sims_[i]
 848                            * self.residuals_std_dev_[np.newaxis, :]
 849                            for i in tqdm(range(self.replications))
 850                        )
 851                    )
 852                elif self.verbose == 0:
 853                    self.sims_ = tuple(
 854                        (
 855                            self.mean_
 856                            + self.residuals_sims_[i]
 857                            * self.residuals_std_dev_[np.newaxis, :]
 858                            for i in range(self.replications)
 859                        )
 860                    )
 861            else:
 862
 863                if self.verbose == 1:
 864                    self.sims_ = tuple(
 865                        (
 866                            self.mean_ + self.residuals_sims_[i]
 867                            for i in tqdm(range(self.replications))
 868                        )
 869                    )
 870                elif self.verbose == 0:
 871                    self.sims_ = tuple(
 872                        (
 873                            self.mean_ + self.residuals_sims_[i]
 874                            for i in range(self.replications)
 875                        )
 876                    )
 877
 878            DescribeResult = namedtuple(
 879                "DescribeResult", ("mean", "sims", "lower", "upper")
 880            )
 881            for ix in range(self.init_n_series_):
 882                sims_ix = getsims(self.sims_, ix)
 883                if self.agg == "mean":
 884                    meanf.append(np.mean(sims_ix, axis=1))
 885                else:
 886                    medianf.append(np.median(sims_ix, axis=1))
 887                lower.append(np.quantile(sims_ix, q=self.alpha_ / 200, axis=1))
 888                upper.append(np.quantile(sims_ix, q=1 - self.alpha_ / 200, axis=1))
 889            self.mean_ = pd.DataFrame(
 890                np.asarray(meanf).T,
 891                columns=self.series_names[: self.init_n_series_],  # self.df_.columns,
 892                index=self.output_dates_,
 893            )
 894
 895            self.lower_ = pd.DataFrame(
 896                np.asarray(lower).T,
 897                columns=self.series_names[: self.init_n_series_],  # self.df_.columns,
 898                index=self.output_dates_,
 899            )
 900
 901            self.upper_ = pd.DataFrame(
 902                np.asarray(upper).T,
 903                columns=self.series_names[: self.init_n_series_],  # self.df_.columns,
 904                index=self.output_dates_,
 905            )
 906
 907            try:
 908                self.median_ = pd.DataFrame(
 909                    np.asarray(medianf).T,
 910                    columns=self.series_names[
 911                        : self.init_n_series_
 912                    ],  # self.df_.columns,
 913                    index=self.output_dates_,
 914                )
 915            except Exception as e:
 916                pass
 917
 918            return DescribeResult(self.mean_, self.sims_, self.lower_, self.upper_)
 919
 920        if (
 921            (("return_std" in kwargs) or ("return_pi" in kwargs))
 922            and (self.type_pi not in ("gaussian", "scp"))
 923        ) or "vine" in self.type_pi:
 924            DescribeResult = namedtuple("DescribeResult", ("mean", "lower", "upper"))
 925
 926            self.mean_ = pd.DataFrame(
 927                np.asarray(self.mean_),
 928                columns=self.series_names,  # self.df_.columns,
 929                index=self.output_dates_,
 930            )
 931
 932            if "return_std" in kwargs:
 933
 934                self.preds_std_ = np.asarray(self.preds_std_)
 935
 936                self.lower_ = pd.DataFrame(
 937                    self.mean_.values - pi_multiplier * self.preds_std_,
 938                    columns=self.series_names,  # self.df_.columns,
 939                    index=self.output_dates_,
 940                )
 941
 942                self.upper_ = pd.DataFrame(
 943                    self.mean_.values + pi_multiplier * self.preds_std_,
 944                    columns=self.series_names,  # self.df_.columns,
 945                    index=self.output_dates_,
 946                )
 947
 948            if "return_pi" in kwargs:
 949
 950                self.lower_ = pd.DataFrame(
 951                    np.asarray(lower_pi_).reshape(h, self.n_series)
 952                    + y_means_[np.newaxis, :],
 953                    columns=self.series_names,  # self.df_.columns,
 954                    index=self.output_dates_,
 955                )
 956
 957                self.upper_ = pd.DataFrame(
 958                    np.asarray(upper_pi_).reshape(h, self.n_series)
 959                    + y_means_[np.newaxis, :],
 960                    columns=self.series_names,  # self.df_.columns,
 961                    index=self.output_dates_,
 962                )
 963
 964            res = DescribeResult(self.mean_, self.lower_, self.upper_)
 965
 966            if self.xreg_ is not None:
 967                if len(self.xreg_.shape) > 1:
 968                    res2 = mx.tuple_map(
 969                        res,
 970                        lambda x: mo.delete_last_columns(
 971                            x, num_columns=self.xreg_.shape[1]
 972                        ),
 973                    )
 974                else:
 975                    res2 = mx.tuple_map(
 976                        res, lambda x: mo.delete_last_columns(x, num_columns=1)
 977                    )
 978                return DescribeResult(res2[0], res2[1], res2[2])
 979
 980            return res
 981
 982        if self.type_pi == "gaussian":
 983
 984            DescribeResult = namedtuple("DescribeResult", ("mean", "lower", "upper"))
 985
 986            self.mean_ = pd.DataFrame(
 987                np.asarray(self.mean_),
 988                columns=self.series_names,  # self.df_.columns,
 989                index=self.output_dates_,
 990            )
 991
 992            self.lower_ = pd.DataFrame(
 993                self.mean_.values - pi_multiplier * self.gaussian_preds_std_,
 994                columns=self.series_names,  # self.df_.columns,
 995                index=self.output_dates_,
 996            )
 997
 998            self.upper_ = pd.DataFrame(
 999                self.mean_.values + pi_multiplier * self.gaussian_preds_std_,
1000                columns=self.series_names,  # self.df_.columns,
1001                index=self.output_dates_,
1002            )
1003
1004            res = DescribeResult(self.mean_, self.lower_, self.upper_)
1005
1006            if self.xreg_ is not None:
1007                if len(self.xreg_.shape) > 1:
1008                    res2 = mx.tuple_map(
1009                        res,
1010                        lambda x: mo.delete_last_columns(
1011                            x, num_columns=self.xreg_.shape[1]
1012                        ),
1013                    )
1014                else:
1015                    res2 = mx.tuple_map(
1016                        res, lambda x: mo.delete_last_columns(x, num_columns=1)
1017                    )
1018                return DescribeResult(res2[0], res2[1], res2[2])
1019
1020            return res
1021
1022        # After prediction loop, ensure sims only contain target columns
1023        if self.sims_ is not None:
1024            if self.verbose == 1:
1025                self.sims_ = tuple(
1026                    sim[:h,]  # Only keep target columns and h rows
1027                    for sim in tqdm(self.sims_)
1028                )
1029            elif self.verbose == 0:
1030                self.sims_ = tuple(
1031                    sim[:h,]  # Only keep target columns and h rows
1032                    for sim in self.sims_
1033                )
1034
1035            # Convert numpy arrays to DataFrames with proper columns
1036            self.sims_ = tuple(
1037                pd.DataFrame(
1038                    sim,
1039                    columns=self.df_.columns[: self.init_n_series_],
1040                    index=self.output_dates_,
1041                )
1042                for sim in self.sims_
1043            )
1044
1045        if self.type_pi in ("kde", "bootstrap", "block-bootstrap", "vine-copula"):
1046            if self.xreg_ is not None:
1047                # Use getsimsxreg when external regressors are present
1048                target_cols = self.df_.columns[: self.init_n_series_]
1049                self.sims_ = getsimsxreg(self.sims_, self.output_dates_, target_cols)
1050            else:
1051                # Use original getsims for backward compatibility
1052                self.sims_ = getsims(self.sims_)

Forecast all the time series, h steps ahead

def score(self, X, training_index, testing_index, scoring=None, **kwargs):
1054    def score(self, X, training_index, testing_index, scoring=None, **kwargs):
1055        """Train on training_index, score on testing_index."""
1056
1057        assert (
1058            bool(set(training_index).intersection(set(testing_index))) == False
1059        ), "Non-overlapping 'training_index' and 'testing_index' required"
1060
1061        # Dimensions
1062        try:
1063            # multivariate time series
1064            n, p = X.shape
1065        except:
1066            # univariate time series
1067            n = X.shape[0]
1068            p = 1
1069
1070        # Training and testing sets
1071        if p > 1:
1072            X_train = X[training_index, :]
1073            X_test = X[testing_index, :]
1074        else:
1075            X_train = X[training_index]
1076            X_test = X[testing_index]
1077
1078        # Horizon
1079        h = len(testing_index)
1080        assert (
1081            len(training_index) + h
1082        ) <= n, "Please check lengths of training and testing windows"
1083
1084        # Fit and predict
1085        self.fit(X_train, **kwargs)
1086        preds = self.predict(h=h, **kwargs)
1087
1088        if scoring is None:
1089            scoring = "neg_root_mean_squared_error"
1090
1091        # check inputs
1092        assert scoring in (
1093            "explained_variance",
1094            "neg_mean_absolute_error",
1095            "neg_mean_squared_error",
1096            "neg_root_mean_squared_error",
1097            "neg_mean_squared_log_error",
1098            "neg_median_absolute_error",
1099            "r2",
1100        ), "'scoring' should be in ('explained_variance', 'neg_mean_absolute_error', \
1101                               'neg_mean_squared_error', 'neg_root_mean_squared_error', 'neg_mean_squared_log_error', \
1102                               'neg_median_absolute_error', 'r2')"
1103
1104        scoring_options = {
1105            "explained_variance": skm2.explained_variance_score,
1106            "neg_mean_absolute_error": skm2.mean_absolute_error,
1107            "neg_mean_squared_error": lambda x, y: np.mean((x - y) ** 2),
1108            "neg_root_mean_squared_error": lambda x, y: np.sqrt(np.mean((x - y) ** 2)),
1109            "neg_mean_squared_log_error": skm2.mean_squared_log_error,
1110            "neg_median_absolute_error": skm2.median_absolute_error,
1111            "r2": skm2.r2_score,
1112        }
1113
1114        return scoring_options[scoring](X_test, preds)

Train on training_index, score on testing_index.

class MultitaskClassifier(nnetsauce.Base, sklearn.base.ClassifierMixin):
 16class MultitaskClassifier(Base, ClassifierMixin):
 17    """Multitask Classification model based on regression models, with shared covariates
 18
 19    Parameters:
 20
 21        obj: object
 22            any object (must be a regression model) containing a method fit (obj.fit())
 23            and a method predict (obj.predict())
 24
 25        n_hidden_features: int
 26            number of nodes in the hidden layer
 27
 28        activation_name: str
 29            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
 30
 31        a: float
 32            hyperparameter for 'prelu' or 'elu' activation function
 33
 34        nodes_sim: str
 35            type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
 36            'uniform'
 37
 38        bias: boolean
 39            indicates if the hidden layer contains a bias term (True) or not
 40            (False)
 41
 42        dropout: float
 43            regularization parameter; (random) percentage of nodes dropped out
 44            of the training
 45
 46        direct_link: boolean
 47            indicates if the original predictors are included (True) in model's
 48            fitting or not (False)
 49
 50        n_clusters: int
 51            number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
 52                no clustering)
 53
 54        cluster_encode: bool
 55            defines how the variable containing clusters is treated (default is one-hot)
 56            if `False`, then labels are used, without one-hot encoding
 57
 58        type_clust: str
 59            type of clustering method: currently k-means ('kmeans') or Gaussian
 60            Mixture Model ('gmm')
 61
 62        type_scaling: a tuple of 3 strings
 63            scaling methods for inputs, hidden layer, and clustering respectively
 64            (and when relevant).
 65            Currently available: standardization ('std') or MinMax scaling ('minmax')
 66
 67        col_sample: float
 68            percentage of covariates randomly chosen for training
 69
 70        row_sample: float
 71            percentage of rows chosen for training, by stratified bootstrapping
 72
 73        seed: int
 74            reproducibility seed for nodes_sim=='uniform'
 75
 76        backend: str
 77            "cpu" or "gpu" or "tpu"
 78
 79    Attributes:
 80
 81        fit_objs_: dict
 82            objects adjusted to each individual time series
 83
 84        n_classes_: int
 85            number of classes for the classifier
 86
 87    Examples:
 88
 89    See also [https://github.com/Techtonique/nnetsauce/blob/master/examples/mtask_classification.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/mtask_classification.py)
 90
 91    ```python
 92    import nnetsauce as ns
 93    import numpy as np
 94    from sklearn.datasets import load_breast_cancer
 95    from sklearn.linear_model import LinearRegression
 96    from sklearn.model_selection import train_test_split
 97    from sklearn import metrics
 98    from time import time
 99
100    breast_cancer = load_breast_cancer()
101    Z = breast_cancer.data
102    t = breast_cancer.target
103
104    X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2,
105                                                        random_state=123+2*10)
106
107    # Linear Regression is used
108    regr = LinearRegression()
109    fit_obj = ns.MultitaskClassifier(regr, n_hidden_features=5,
110                                n_clusters=2, type_clust="gmm")
111
112    start = time()
113    fit_obj.fit(X_train, y_train)
114    print(f"Elapsed {time() - start}")
115
116    print(fit_obj.score(X_test, y_test))
117    print(fit_obj.score(X_test, y_test, scoring="roc_auc"))
118
119    start = time()
120    preds = fit_obj.predict(X_test)
121    print(f"Elapsed {time() - start}")
122    print(metrics.classification_report(preds, y_test))
123    ```
124
125    """
126
127    # construct the object -----
128    _estimator_type = "classifier"
129
130    def __init__(
131        self,
132        obj,
133        n_hidden_features=5,
134        activation_name="relu",
135        a=0.01,
136        nodes_sim="sobol",
137        bias=True,
138        dropout=0,
139        direct_link=True,
140        n_clusters=2,
141        cluster_encode=True,
142        type_clust="kmeans",
143        type_scaling=("std", "std", "std"),
144        col_sample=1,
145        row_sample=1,
146        seed=123,
147        backend="cpu",
148    ):
149        super().__init__(
150            n_hidden_features=n_hidden_features,
151            activation_name=activation_name,
152            a=a,
153            nodes_sim=nodes_sim,
154            bias=bias,
155            dropout=dropout,
156            direct_link=direct_link,
157            n_clusters=n_clusters,
158            cluster_encode=cluster_encode,
159            type_clust=type_clust,
160            type_scaling=type_scaling,
161            col_sample=col_sample,
162            row_sample=row_sample,
163            seed=seed,
164            backend=backend,
165        )
166
167        self.type_fit = "classification"
168        self.obj = obj
169        self.fit_objs_ = {}
170
171    def fit(self, X, y, sample_weight=None, **kwargs):
172        """Fit MultitaskClassifier to training data (X, y).
173
174        Args:
175
176            X: {array-like}, shape = [n_samples, n_features]
177                Training vectors, where n_samples is the number
178                of samples and n_features is the number of features.
179
180            y: array-like, shape = [n_samples]
181                Target values.
182
183            **kwargs: additional parameters to be passed to
184                    self.cook_training_set or self.obj.fit
185
186        Returns:
187
188            self: object
189
190        """
191
192        assert mx.is_factor(y), "y must contain only integers"
193
194        output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
195
196        self.classes_ = np.unique(y)  # for compatibility with sklearn
197        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
198
199        # multitask response
200        Y = mo.one_hot_encode2(output_y, self.n_classes_)
201
202        # if sample_weight is None:
203        for i in range(self.n_classes_):
204            self.fit_objs_[i] = deepcopy(self.obj.fit(scaled_Z, Y[:, i], **kwargs))
205
206        self.classes_ = np.unique(y)
207        return self
208
209    def predict(self, X, **kwargs):
210        """Predict test data X.
211
212        Args:
213
214            X: {array-like}, shape = [n_samples, n_features]
215                Training vectors, where n_samples is the number
216                of samples and n_features is the number of features.
217
218            **kwargs: additional parameters to be passed to
219                    self.cook_test_set
220
221        Returns:
222
223            model predictions: {array-like}
224
225        """
226        return np.argmax(self.predict_proba(X, **kwargs), axis=1)
227
228    def predict_proba(self, X, **kwargs):
229        """Predict probabilities for test data X.
230
231        Args:
232
233            X: {array-like}, shape = [n_samples, n_features]
234                Training vectors, where n_samples is the number
235                of samples and n_features is the number of features.
236
237            **kwargs: additional parameters to be passed to
238                    self.cook_test_set
239
240        Returns:
241
242            probability estimates for test data: {array-like}
243
244        """
245
246        shape_X = X.shape
247
248        probs = np.zeros((shape_X[0], self.n_classes_))
249
250        if len(shape_X) == 1:
251            n_features = shape_X[0]
252
253            new_X = mo.rbind(
254                X.reshape(1, n_features),
255                np.ones(n_features).reshape(1, n_features),
256            )
257
258            Z = self.cook_test_set(new_X, **kwargs)
259
260            # loop on all the classes
261            for i in range(self.n_classes_):
262                probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs)[0]
263
264        else:
265            Z = self.cook_test_set(X, **kwargs)
266
267            # loop on all the classes
268            for i in range(self.n_classes_):
269                probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs)
270
271        expit_raw_probs = expit(probs)
272
273        return expit_raw_probs / expit_raw_probs.sum(axis=1)[:, None]
274
275    def decision_function(self, X, **kwargs):
276        """Compute the decision function of X.
277
278        Parameters:
279            X: {array-like}, shape = [n_samples, n_features]
280                Samples to compute decision function for.
281
282            **kwargs: additional parameters to be passed to
283                    self.cook_test_set
284
285        Returns:
286            array-like of shape (n_samples,) or (n_samples, n_classes)
287            Decision function of the input samples. The order of outputs is the same
288            as that of the classes passed to fit.
289        """
290        if not hasattr(self.obj, "decision_function"):
291            # If base classifier doesn't have decision_function, use predict_proba
292            proba = self.predict_proba(X, **kwargs)
293            if proba.shape[1] == 2:
294                return proba[:, 1]  # For binary classification
295            return proba  # For multiclass
296
297        if len(X.shape) == 1:
298            n_features = X.shape[0]
299            new_X = mo.rbind(
300                X.reshape(1, n_features),
301                np.ones(n_features).reshape(1, n_features),
302            )
303
304            return (
305                self.obj.decision_function(
306                    self.cook_test_set(new_X, **kwargs), **kwargs
307                )
308            )[0]
309
310        return self.obj.decision_function(self.cook_test_set(X, **kwargs), **kwargs)
311
312    @property
313    def _estimator_type(self):
314        return "classifier"            

Multitask Classification model based on regression models, with shared covariates

Parameters:

obj: object
    any object (must be a regression model) containing a method fit (obj.fit())
    and a method predict (obj.predict())

n_hidden_features: int
    number of nodes in the hidden layer

activation_name: str
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'

a: float
    hyperparameter for 'prelu' or 'elu' activation function

nodes_sim: str
    type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
    'uniform'

bias: boolean
    indicates if the hidden layer contains a bias term (True) or not
    (False)

dropout: float
    regularization parameter; (random) percentage of nodes dropped out
    of the training

direct_link: boolean
    indicates if the original predictors are included (True) in model's
    fitting or not (False)

n_clusters: int
    number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
        no clustering)

cluster_encode: bool
    defines how the variable containing clusters is treated (default is one-hot)
    if `False`, then labels are used, without one-hot encoding

type_clust: str
    type of clustering method: currently k-means ('kmeans') or Gaussian
    Mixture Model ('gmm')

type_scaling: a tuple of 3 strings
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax')

col_sample: float
    percentage of covariates randomly chosen for training

row_sample: float
    percentage of rows chosen for training, by stratified bootstrapping

seed: int
    reproducibility seed for nodes_sim=='uniform'

backend: str
    "cpu" or "gpu" or "tpu"

Attributes:

fit_objs_: dict
    objects adjusted to each individual time series

n_classes_: int
    number of classes for the classifier

Examples:

See also https://github.com/Techtonique/nnetsauce/blob/master/examples/mtask_classification.py

import nnetsauce as ns
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics
from time import time

breast_cancer = load_breast_cancer()
Z = breast_cancer.data
t = breast_cancer.target

X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2,
                                                    random_state=123+2*10)

# Linear Regression is used
regr = LinearRegression()
fit_obj = ns.MultitaskClassifier(regr, n_hidden_features=5,
                            n_clusters=2, type_clust="gmm")

start = time()
fit_obj.fit(X_train, y_train)
print(f"Elapsed {time() - start}")

print(fit_obj.score(X_test, y_test))
print(fit_obj.score(X_test, y_test, scoring="roc_auc"))

start = time()
preds = fit_obj.predict(X_test)
print(f"Elapsed {time() - start}")
print(metrics.classification_report(preds, y_test))
def fit(self, X, y, sample_weight=None, **kwargs):
171    def fit(self, X, y, sample_weight=None, **kwargs):
172        """Fit MultitaskClassifier to training data (X, y).
173
174        Args:
175
176            X: {array-like}, shape = [n_samples, n_features]
177                Training vectors, where n_samples is the number
178                of samples and n_features is the number of features.
179
180            y: array-like, shape = [n_samples]
181                Target values.
182
183            **kwargs: additional parameters to be passed to
184                    self.cook_training_set or self.obj.fit
185
186        Returns:
187
188            self: object
189
190        """
191
192        assert mx.is_factor(y), "y must contain only integers"
193
194        output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
195
196        self.classes_ = np.unique(y)  # for compatibility with sklearn
197        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
198
199        # multitask response
200        Y = mo.one_hot_encode2(output_y, self.n_classes_)
201
202        # if sample_weight is None:
203        for i in range(self.n_classes_):
204            self.fit_objs_[i] = deepcopy(self.obj.fit(scaled_Z, Y[:, i], **kwargs))
205
206        self.classes_ = np.unique(y)
207        return self

Fit MultitaskClassifier to training data (X, y).

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

**kwargs: additional parameters to be passed to
        self.cook_training_set or self.obj.fit

Returns:

self: object
def predict(self, X, **kwargs):
209    def predict(self, X, **kwargs):
210        """Predict test data X.
211
212        Args:
213
214            X: {array-like}, shape = [n_samples, n_features]
215                Training vectors, where n_samples is the number
216                of samples and n_features is the number of features.
217
218            **kwargs: additional parameters to be passed to
219                    self.cook_test_set
220
221        Returns:
222
223            model predictions: {array-like}
224
225        """
226        return np.argmax(self.predict_proba(X, **kwargs), axis=1)

Predict test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

model predictions: {array-like}
def predict_proba(self, X, **kwargs):
228    def predict_proba(self, X, **kwargs):
229        """Predict probabilities for test data X.
230
231        Args:
232
233            X: {array-like}, shape = [n_samples, n_features]
234                Training vectors, where n_samples is the number
235                of samples and n_features is the number of features.
236
237            **kwargs: additional parameters to be passed to
238                    self.cook_test_set
239
240        Returns:
241
242            probability estimates for test data: {array-like}
243
244        """
245
246        shape_X = X.shape
247
248        probs = np.zeros((shape_X[0], self.n_classes_))
249
250        if len(shape_X) == 1:
251            n_features = shape_X[0]
252
253            new_X = mo.rbind(
254                X.reshape(1, n_features),
255                np.ones(n_features).reshape(1, n_features),
256            )
257
258            Z = self.cook_test_set(new_X, **kwargs)
259
260            # loop on all the classes
261            for i in range(self.n_classes_):
262                probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs)[0]
263
264        else:
265            Z = self.cook_test_set(X, **kwargs)
266
267            # loop on all the classes
268            for i in range(self.n_classes_):
269                probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs)
270
271        expit_raw_probs = expit(probs)
272
273        return expit_raw_probs / expit_raw_probs.sum(axis=1)[:, None]

Predict probabilities for test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

probability estimates for test data: {array-like}
class NeuralNetRegressor(sklearn.base.BaseEstimator, sklearn.base.RegressorMixin):
 98class NeuralNetRegressor(BaseEstimator, RegressorMixin):
 99    """
100    (Pretrained) Neural Network Regressor.
101
102    Parameters:
103
104        hidden_layer_sizes : tuple, default=(100,)
105            The number of neurons in each hidden layer.
106        max_iter : int, default=100
107            The maximum number of iterations to train the model.
108        learning_rate : float, default=0.01
109            The learning rate for the optimizer.
110        l1_ratio : float, default=0.5
111            The ratio of L1 regularization.
112        alpha : float, default=1e-6
113            The regularization parameter.
114        activation_name : str, default="relu"
115            The activation function to use.
116        dropout : float, default=0.0
117            The dropout rate.
118        random_state : int, default=None
119            The random state for the random number generator.
120        weights : list, default=None
121            The weights to initialize the model with.
122
123    Attributes:
124
125        weights : list
126            The weights of the model.
127        params : list
128            The parameters of the model.
129        scaler_ : sklearn.preprocessing.StandardScaler
130            The scaler used to standardize the input features.
131        y_mean_ : float
132            The mean of the target variable.
133
134    Methods:
135
136        fit(X, y)
137            Fit the model to the data.
138        predict(X)
139            Predict the target variable.
140        get_weights()
141            Get the weights of the model.
142        set_weights(weights)
143            Set the weights of the model.
144    """
145
146    def __init__(
147        self,
148        hidden_layer_sizes=None,
149        max_iter=100,
150        learning_rate=0.01,
151        l1_ratio=0.5,
152        alpha=1e-6,
153        activation_name="relu",
154        dropout=0,
155        weights=None,
156        random_state=None,
157    ):
158        if weights is None and hidden_layer_sizes is None:
159            hidden_layer_sizes = (100,)  # default value if neither is provided
160        self.hidden_layer_sizes = hidden_layer_sizes
161        self.max_iter = max_iter
162        self.learning_rate = learning_rate
163        self.l1_ratio = l1_ratio
164        self.alpha = alpha
165        self.activation_name = activation_name
166        self.dropout = dropout
167        self.weights = weights
168        self.random_state = random_state
169        self.params = None
170        self.scaler_ = StandardScaler()
171        self.y_mean_ = None
172
173    def _validate_weights(self, input_dim):
174        """Validate that weights dimensions are coherent."""
175        if not self.weights:
176            return False
177
178        try:
179            # Check each layer's weights and biases
180            prev_dim = input_dim
181            for W, b in self.weights:
182                # Check weight matrix dimensions
183                if W.shape[0] != prev_dim:
184                    raise ValueError(
185                        f"Weight matrix input dimension {W.shape[0]} does not match, previous layer output dimension {prev_dim}"
186                    )
187                # Check bias dimension matches weight matrix output
188                if W.shape[1] != b.shape[0]:
189                    raise ValueError(
190                        f"Bias dimension {b.shape[0]} does not match weight matrix, output dimension {W.shape[1]}"
191                    )
192                prev_dim = W.shape[1]
193
194            # Check final output dimension is 1 for regression
195            if prev_dim != 1:
196                raise ValueError(
197                    f"Final layer output dimension {prev_dim} must be 1 for regression"
198                )
199
200            return True
201        except (AttributeError, IndexError):
202            raise ValueError(
203                "Weights format is invalid. Expected list of (weight, bias) tuples"
204            )
205
206    def fit(self, X, y):
207        # Standardize the input features
208        X = self.scaler_.fit_transform(X)
209        # Ensure y is 2D for consistency
210        y = y.reshape(-1, 1)
211        self.y_mean_ = jnp.mean(y)
212        y = y - self.y_mean_
213        # Validate or initialize weights
214        if self.weights is not None:
215            if self._validate_weights(X.shape[1]):
216                self.params = self.weights
217        else:
218            if self.hidden_layer_sizes is None:
219                raise ValueError(
220                    "Either weights or hidden_layer_sizes must be provided"
221                )
222            self.params = initialize_params(
223                X.shape[1], self.hidden_layer_sizes, self.random_state
224            )
225        loss_fn = partial(loss, l1_ratio=self.l1_ratio, alpha=self.alpha)
226        grad_loss = jit(grad(loss_fn))  # compiled gradient evaluation function
227        perex_grads = jit(
228            vmap(grad_loss, in_axes=(None, 0, 0))
229        )  # fast per-example grads
230        # Training loop
231        for _ in range(self.max_iter):
232            grads = perex_grads(self.params, X, y)
233            # Average gradients across examples
234            grads = jax.tree_map(lambda g: jnp.mean(g, axis=0), grads)
235            # Update parameters
236            self.params = [
237                (W - self.learning_rate * dW, b - self.learning_rate * db)
238                for (W, b), (dW, db) in zip(self.params, grads)
239            ]
240        # Store final weights
241        self.weights = self.params
242        return self
243
244    def get_weights(self):
245        """Return the current weights of the model."""
246        if self.weights is None:
247            raise ValueError("No weights available. Model has not been fitted yet.")
248        return self.weights
249
250    def set_weights(self, weights):
251        """Set the weights of the model manually."""
252        self.weights = weights
253        self.params = weights
254
255    def predict(self, X):
256        X = self.scaler_.transform(X)
257        if self.params is None:
258            raise ValueError("Model has not been fitted yet.")
259        predictions = predict_internal(
260            self.params,
261            X,
262            activation_func=self.activation_name,
263            dropout=self.dropout,
264            seed=self.random_state,
265        )
266        return predictions.reshape(-1) + self.y_mean_

(Pretrained) Neural Network Regressor.

Parameters:

hidden_layer_sizes : tuple, default=(100,)
    The number of neurons in each hidden layer.
max_iter : int, default=100
    The maximum number of iterations to train the model.
learning_rate : float, default=0.01
    The learning rate for the optimizer.
l1_ratio : float, default=0.5
    The ratio of L1 regularization.
alpha : float, default=1e-6
    The regularization parameter.
activation_name : str, default="relu"
    The activation function to use.
dropout : float, default=0.0
    The dropout rate.
random_state : int, default=None
    The random state for the random number generator.
weights : list, default=None
    The weights to initialize the model with.

Attributes:

weights : list
    The weights of the model.
params : list
    The parameters of the model.
scaler_ : sklearn.preprocessing.StandardScaler
    The scaler used to standardize the input features.
y_mean_ : float
    The mean of the target variable.

Methods:

fit(X, y)
    Fit the model to the data.
predict(X)
    Predict the target variable.
get_weights()
    Get the weights of the model.
set_weights(weights)
    Set the weights of the model.
def fit(self, X, y):
206    def fit(self, X, y):
207        # Standardize the input features
208        X = self.scaler_.fit_transform(X)
209        # Ensure y is 2D for consistency
210        y = y.reshape(-1, 1)
211        self.y_mean_ = jnp.mean(y)
212        y = y - self.y_mean_
213        # Validate or initialize weights
214        if self.weights is not None:
215            if self._validate_weights(X.shape[1]):
216                self.params = self.weights
217        else:
218            if self.hidden_layer_sizes is None:
219                raise ValueError(
220                    "Either weights or hidden_layer_sizes must be provided"
221                )
222            self.params = initialize_params(
223                X.shape[1], self.hidden_layer_sizes, self.random_state
224            )
225        loss_fn = partial(loss, l1_ratio=self.l1_ratio, alpha=self.alpha)
226        grad_loss = jit(grad(loss_fn))  # compiled gradient evaluation function
227        perex_grads = jit(
228            vmap(grad_loss, in_axes=(None, 0, 0))
229        )  # fast per-example grads
230        # Training loop
231        for _ in range(self.max_iter):
232            grads = perex_grads(self.params, X, y)
233            # Average gradients across examples
234            grads = jax.tree_map(lambda g: jnp.mean(g, axis=0), grads)
235            # Update parameters
236            self.params = [
237                (W - self.learning_rate * dW, b - self.learning_rate * db)
238                for (W, b), (dW, db) in zip(self.params, grads)
239            ]
240        # Store final weights
241        self.weights = self.params
242        return self
def predict(self, X):
255    def predict(self, X):
256        X = self.scaler_.transform(X)
257        if self.params is None:
258            raise ValueError("Model has not been fitted yet.")
259        predictions = predict_internal(
260            self.params,
261            X,
262            activation_func=self.activation_name,
263            dropout=self.dropout,
264            seed=self.random_state,
265        )
266        return predictions.reshape(-1) + self.y_mean_
class NeuralNetClassifier(sklearn.base.BaseEstimator, sklearn.base.ClassifierMixin):
 10class NeuralNetClassifier(BaseEstimator, ClassifierMixin):
 11    """
 12    (Pretrained) Neural Network Classifier.
 13
 14    Parameters:
 15
 16        hidden_layer_sizes : tuple, default=(100,)
 17            The number of neurons in each hidden layer.
 18        max_iter : int, default=100
 19            The maximum number of iterations to train the model.
 20        learning_rate : float, default=0.01
 21            The learning rate for the optimizer.
 22        l1_ratio : float, default=0.5
 23            The ratio of L1 regularization.
 24        alpha : float, default=1e-6
 25            The regularization parameter.
 26        activation_name : str, default="relu"
 27            The activation function to use.
 28        dropout : float, default=0.0
 29            The dropout rate.
 30        random_state : int, default=None
 31            The random state for the random number generator.
 32        weights : list, default=None
 33            The weights to initialize the model with.
 34
 35    Attributes:
 36
 37        weights : list
 38            The weights of the model.
 39        params : list
 40            The parameters of the model.
 41        scaler_ : sklearn.preprocessing.StandardScaler
 42            The scaler used to standardize the input features.
 43        y_mean_ : float
 44            The mean of the target variable.
 45
 46    Methods:
 47
 48        fit(X, y)
 49            Fit the model to the data.
 50        predict(X)
 51            Predict the target variable.
 52        predict_proba(X)
 53            Predict the probability of the target variable.
 54        get_weights()
 55            Get the weights of the model.
 56        set_weights(weights)
 57            Set the weights of the model.
 58    """
 59    _estimator_type = "classifier"
 60
 61    def __init__(
 62        self,
 63        hidden_layer_sizes=(100,),
 64        max_iter=100,
 65        learning_rate=0.01,
 66        weights=None,
 67        l1_ratio=0.5,
 68        alpha=1e-6,
 69        activation_name="relu",
 70        dropout=0.0,
 71        random_state=None,
 72    ):
 73        self.hidden_layer_sizes = hidden_layer_sizes
 74        self.max_iter = max_iter
 75        self.learning_rate = learning_rate
 76        self.weights = weights
 77        self.l1_ratio = l1_ratio
 78        self.alpha = alpha
 79        self.activation_name = activation_name
 80        self.dropout = dropout
 81        self.random_state = random_state
 82        self.regr = None
 83
 84    def fit(self, X, y):
 85        """Fit the model to the data.
 86
 87        Parameters:
 88
 89            X: {array-like}, shape = [n_samples, n_features]
 90                Training vectors, where n_samples is the number of samples and
 91                n_features is the number of features.
 92            y: array-like, shape = [n_samples]
 93                Target values.
 94        """
 95        regressor = NeuralNetRegressor(
 96            hidden_layer_sizes=self.hidden_layer_sizes,
 97            max_iter=self.max_iter,
 98            learning_rate=self.learning_rate,
 99            weights=self.weights,
100            l1_ratio=self.l1_ratio,
101            alpha=self.alpha,
102            activation_name=self.activation_name,
103            dropout=self.dropout,
104            random_state=self.random_state,
105        )
106        self.regr = SimpleMultitaskClassifier(regressor)
107        self.regr.fit(X, y)
108        self.classes_ = np.unique(y)
109        self.n_classes_ = len(self.classes_)
110        self.n_tasks_ = 1
111        self.n_features_in_ = X.shape[1]
112        self.n_outputs_ = 1
113        self.n_samples_fit_ = X.shape[0]
114        self.n_samples_test_ = X.shape[0]
115        self.n_features_out_ = 1
116        self.n_outputs_ = 1
117        self.n_features_in_ = X.shape[1]
118        self.n_features_out_ = 1
119        self.n_outputs_ = 1
120        return self
121
122    def predict_proba(self, X):
123        """Predict the probability of the target variable.
124
125        Parameters:
126
127            X: {array-like}, shape = [n_samples, n_features]
128                Training vectors, where n_samples is the number of samples and
129                n_features is the number of features.
130        """
131        return self.regr.predict_proba(X)
132
133    def predict(self, X):
134        """Predict the target variable.
135
136        Parameters:
137
138            X: {array-like}, shape = [n_samples, n_features]
139                Training vectors, where n_samples is the number of samples and
140                n_features is the number of features.
141        """
142        return self.regr.predict(X)
143
144    @property
145    def _estimator_type(self):
146        return "classifier"            

(Pretrained) Neural Network Classifier.

Parameters:

hidden_layer_sizes : tuple, default=(100,)
    The number of neurons in each hidden layer.
max_iter : int, default=100
    The maximum number of iterations to train the model.
learning_rate : float, default=0.01
    The learning rate for the optimizer.
l1_ratio : float, default=0.5
    The ratio of L1 regularization.
alpha : float, default=1e-6
    The regularization parameter.
activation_name : str, default="relu"
    The activation function to use.
dropout : float, default=0.0
    The dropout rate.
random_state : int, default=None
    The random state for the random number generator.
weights : list, default=None
    The weights to initialize the model with.

Attributes:

weights : list
    The weights of the model.
params : list
    The parameters of the model.
scaler_ : sklearn.preprocessing.StandardScaler
    The scaler used to standardize the input features.
y_mean_ : float
    The mean of the target variable.

Methods:

fit(X, y)
    Fit the model to the data.
predict(X)
    Predict the target variable.
predict_proba(X)
    Predict the probability of the target variable.
get_weights()
    Get the weights of the model.
set_weights(weights)
    Set the weights of the model.
def fit(self, X, y):
 84    def fit(self, X, y):
 85        """Fit the model to the data.
 86
 87        Parameters:
 88
 89            X: {array-like}, shape = [n_samples, n_features]
 90                Training vectors, where n_samples is the number of samples and
 91                n_features is the number of features.
 92            y: array-like, shape = [n_samples]
 93                Target values.
 94        """
 95        regressor = NeuralNetRegressor(
 96            hidden_layer_sizes=self.hidden_layer_sizes,
 97            max_iter=self.max_iter,
 98            learning_rate=self.learning_rate,
 99            weights=self.weights,
100            l1_ratio=self.l1_ratio,
101            alpha=self.alpha,
102            activation_name=self.activation_name,
103            dropout=self.dropout,
104            random_state=self.random_state,
105        )
106        self.regr = SimpleMultitaskClassifier(regressor)
107        self.regr.fit(X, y)
108        self.classes_ = np.unique(y)
109        self.n_classes_ = len(self.classes_)
110        self.n_tasks_ = 1
111        self.n_features_in_ = X.shape[1]
112        self.n_outputs_ = 1
113        self.n_samples_fit_ = X.shape[0]
114        self.n_samples_test_ = X.shape[0]
115        self.n_features_out_ = 1
116        self.n_outputs_ = 1
117        self.n_features_in_ = X.shape[1]
118        self.n_features_out_ = 1
119        self.n_outputs_ = 1
120        return self

Fit the model to the data.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number of samples and
    n_features is the number of features.
y: array-like, shape = [n_samples]
    Target values.
def predict_proba(self, X):
122    def predict_proba(self, X):
123        """Predict the probability of the target variable.
124
125        Parameters:
126
127            X: {array-like}, shape = [n_samples, n_features]
128                Training vectors, where n_samples is the number of samples and
129                n_features is the number of features.
130        """
131        return self.regr.predict_proba(X)

Predict the probability of the target variable.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number of samples and
    n_features is the number of features.
def predict(self, X):
133    def predict(self, X):
134        """Predict the target variable.
135
136        Parameters:
137
138            X: {array-like}, shape = [n_samples, n_features]
139                Training vectors, where n_samples is the number of samples and
140                n_features is the number of features.
141        """
142        return self.regr.predict(X)

Predict the target variable.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number of samples and
    n_features is the number of features.
class PredictionInterval(sklearn.base.BaseEstimator, sklearn.base.RegressorMixin):
 19class PredictionInterval(BaseEstimator, RegressorMixin):
 20    """Class PredictionInterval: Obtain prediction intervals.
 21
 22    Attributes:
 23
 24        obj: an object;
 25            fitted object containing methods `fit` and `predict`
 26
 27        method: a string;
 28            method for constructing the prediction intervals.
 29            Currently "splitconformal" (default) and "localconformal"
 30
 31        level: a float;
 32            Confidence level for prediction intervals. Default is 95,
 33            equivalent to a miscoverage error of 5 (%)
 34
 35        replications: an integer;
 36            Number of replications for simulated conformal (default is `None`),
 37            for type_pi = "bootstrap" or "kde"
 38
 39        type_pi: a string;
 40            type of prediction interval: currently `None`
 41            (split conformal without simulation), "kde" or "bootstrap"
 42
 43        type_split: a string;
 44            "random" (random split of data) or "sequential" (sequential split of data)
 45
 46        seed: an integer;
 47            Reproducibility of fit (there's a random split between fitting and calibration data)
 48    """
 49
 50    def __init__(
 51        self,
 52        obj,
 53        method="splitconformal",
 54        level=95,
 55        type_pi=None,
 56        type_split="random",
 57        replications=None,
 58        kernel=None,
 59        agg="mean",
 60        seed=123,
 61    ):
 62
 63        self.obj = obj
 64        self.method = method
 65        self.level = level
 66        self.type_pi = type_pi
 67        self.type_split = type_split
 68        self.replications = replications
 69        self.kernel = kernel
 70        self.agg = agg
 71        self.seed = seed
 72        self.alpha_ = 1 - self.level / 100
 73        self.quantile_ = None
 74        self.icp_ = None
 75        self.calibrated_residuals_ = None
 76        self.scaled_calibrated_residuals_ = None
 77        self.calibrated_residuals_scaler_ = None
 78        self.kde_ = None
 79        self.aic_ = None 
 80        self.aicc_ = None
 81        self.bic_ = None
 82        self.sse_ = None
 83
 84    def fit(self, X, y, sample_weight=None, **kwargs):
 85        """Fit the `method` to training data (X, y).
 86
 87        Args:
 88
 89            X: array-like, shape = [n_samples, n_features];
 90                Training set vectors, where n_samples is the number
 91                of samples and n_features is the number of features.
 92
 93            y: array-like, shape = [n_samples, ]; Target values.
 94
 95            sample_weight: array-like, shape = [n_samples]
 96                Sample weights.
 97
 98        """
 99
100        if self.type_split == "random":
101
102            X_train, X_calibration, y_train, y_calibration = train_test_split(
103                X, y, test_size=0.5, random_state=self.seed
104            )
105
106        elif self.type_split == "sequential":
107
108            n_x = X.shape[0]
109            n_x_half = n_x // 2
110            first_half_idx = range(0, n_x_half)
111            second_half_idx = range(n_x_half, n_x)
112            X_train = X[first_half_idx, :]
113            X_calibration = X[second_half_idx, :]
114            y_train = y[first_half_idx]
115            y_calibration = y[second_half_idx]        
116
117        if self.method == "splitconformal":
118
119            self.obj.fit(X_train, y_train)
120            preds_calibration = self.obj.predict(X_calibration)
121            self.calibrated_residuals_ = y_calibration - preds_calibration
122            absolute_residuals = np.abs(self.calibrated_residuals_)
123            self.calibrated_residuals_scaler_ = StandardScaler(
124                with_mean=True, with_std=True
125            )
126            self.scaled_calibrated_residuals_ = (
127                self.calibrated_residuals_scaler_.fit_transform(
128                    self.calibrated_residuals_.reshape(-1, 1)
129                ).ravel()
130            )
131            try:
132                # numpy version >= 1.22
133                self.quantile_ = np.quantile(
134                    a=absolute_residuals, q=self.level / 100, method="higher"
135                )
136            except Exception:
137                # numpy version < 1.22
138                self.quantile_ = np.quantile(
139                    a=absolute_residuals,
140                    q=self.level / 100,
141                    interpolation="higher",
142                )            
143
144        if self.method == "localconformal":
145
146            mad_estimator = ExtraTreesRegressor()
147            normalizer = RegressorNormalizer(self.obj, mad_estimator, AbsErrorErrFunc())
148            nc = RegressorNc(self.obj, AbsErrorErrFunc(), normalizer)
149            self.icp_ = IcpRegressor(nc)
150            self.icp_.fit(X_train, y_train)
151            self.icp_.calibrate(X_calibration, y_calibration)
152        
153        # Calculate AIC
154        # Get predictions
155        preds = self.obj.predict(X_calibration)
156        
157        # Calculate SSE
158        self.sse_ = np.sum((y_calibration - preds) ** 2)
159        
160        # Get number of parameters from the base model
161        n_params = getattr(self.obj, 'n_hidden_features', 0) + X_calibration.shape[1]
162        
163        # Calculate AIC
164        n_samples = len(y_calibration)
165        temp = n_samples * np.log(self.sse_/n_samples)
166        self.aic_ = temp + 2 * n_params
167        self.bic_ = temp + np.log(n_samples) * n_params
168
169        return self
170
171    def predict(self, X, return_pi=False):
172        """Obtain predictions and prediction intervals
173
174        Args:
175
176            X: array-like, shape = [n_samples, n_features];
177                Testing set vectors, where n_samples is the number
178                of samples and n_features is the number of features.
179
180            return_pi: boolean
181                Whether the prediction interval is returned or not.
182                Default is False, for compatibility with other _estimators_.
183                If True, a tuple containing the predictions + lower and upper
184                bounds is returned.
185
186        """
187
188        if self.method == "splitconformal":
189            pred = self.obj.predict(X)
190
191        if self.method == "localconformal":
192            pred = self.icp_.predict(X)
193
194        if self.method == "splitconformal":
195
196            if (
197                self.replications is None and self.type_pi is None
198            ):  # type_pi is not used here, no bootstrap or kde
199
200                if return_pi:
201
202                    DescribeResult = namedtuple(
203                        "DescribeResult", ("mean", "lower", "upper")
204                    )
205                    return DescribeResult(
206                        pred, pred - self.quantile_, pred + self.quantile_
207                    )
208
209                else:
210
211                    return pred
212
213            else:  # self.method == "splitconformal" and if self.replications is not None, type_pi must be used
214
215                if self.type_pi is None:
216                    self.type_pi = "kde"
217                    raise Warning("type_pi must be set, setting to 'kde'")
218
219                if self.replications is None:
220                    self.replications = 100
221                    raise Warning("replications must be set, setting to 100")
222
223                assert self.type_pi in (
224                    "bootstrap",
225                    "kde",
226                ), "`self.type_pi` must be in ('bootstrap', 'kde')"
227
228                if self.type_pi == "bootstrap":
229                    np.random.seed(self.seed)
230                    self.residuals_sims_ = np.asarray(
231                        [
232                            np.random.choice(
233                                a=self.scaled_calibrated_residuals_,
234                                size=X.shape[0],
235                            )
236                            for _ in range(self.replications)
237                        ]
238                    ).T
239                    self.sims_ = np.asarray(
240                        [
241                            pred
242                            + self.calibrated_residuals_scaler_.scale_[0]
243                            * self.residuals_sims_[:, i].ravel()
244                            for i in range(self.replications)
245                        ]
246                    ).T
247                elif self.type_pi == "kde":
248                    self.kde_ = gaussian_kde(dataset=self.scaled_calibrated_residuals_)
249                    self.sims_ = np.asarray(
250                        [
251                            pred
252                            + self.calibrated_residuals_scaler_.scale_[0]
253                            * self.kde_.resample(
254                                size=X.shape[0], seed=self.seed + i
255                            ).ravel()
256                            for i in range(self.replications)
257                        ]
258                    ).T
259
260                self.mean_ = np.mean(self.sims_, axis=1)
261                self.lower_ = np.quantile(self.sims_, q=self.alpha_ / 200, axis=1)
262                self.upper_ = np.quantile(self.sims_, q=1 - self.alpha_ / 200, axis=1)
263
264                DescribeResult = namedtuple(
265                    "DescribeResult", ("mean", "sims", "lower", "upper")
266                )
267
268                return DescribeResult(self.mean_, self.sims_, self.lower_, self.upper_)
269
270        if self.method == "localconformal":
271
272            if self.replications is None:
273
274                if return_pi:
275
276                    predictions_bounds = self.icp_.predict(
277                        X, significance=1 - self.level
278                    )
279                    DescribeResult = namedtuple(
280                        "DescribeResult", ("mean", "lower", "upper")
281                    )
282                    return DescribeResult(
283                        pred, predictions_bounds[:, 0], predictions_bounds[:, 1]
284                    )
285
286                else:
287
288                    return pred
289
290            else:  # (self.method == "localconformal") and if self.replications is not None
291
292                raise NotImplementedError(
293                    "When self.method == 'localconformal', there are no simulations"
294                )

Class PredictionInterval: Obtain prediction intervals.

Attributes:

obj: an object;
    fitted object containing methods `fit` and `predict`

method: a string;
    method for constructing the prediction intervals.
    Currently "splitconformal" (default) and "localconformal"

level: a float;
    Confidence level for prediction intervals. Default is 95,
    equivalent to a miscoverage error of 5 (%)

replications: an integer;
    Number of replications for simulated conformal (default is `None`),
    for type_pi = "bootstrap" or "kde"

type_pi: a string;
    type of prediction interval: currently `None`
    (split conformal without simulation), "kde" or "bootstrap"

type_split: a string;
    "random" (random split of data) or "sequential" (sequential split of data)

seed: an integer;
    Reproducibility of fit (there's a random split between fitting and calibration data)
def fit(self, X, y, sample_weight=None, **kwargs):
 84    def fit(self, X, y, sample_weight=None, **kwargs):
 85        """Fit the `method` to training data (X, y).
 86
 87        Args:
 88
 89            X: array-like, shape = [n_samples, n_features];
 90                Training set vectors, where n_samples is the number
 91                of samples and n_features is the number of features.
 92
 93            y: array-like, shape = [n_samples, ]; Target values.
 94
 95            sample_weight: array-like, shape = [n_samples]
 96                Sample weights.
 97
 98        """
 99
100        if self.type_split == "random":
101
102            X_train, X_calibration, y_train, y_calibration = train_test_split(
103                X, y, test_size=0.5, random_state=self.seed
104            )
105
106        elif self.type_split == "sequential":
107
108            n_x = X.shape[0]
109            n_x_half = n_x // 2
110            first_half_idx = range(0, n_x_half)
111            second_half_idx = range(n_x_half, n_x)
112            X_train = X[first_half_idx, :]
113            X_calibration = X[second_half_idx, :]
114            y_train = y[first_half_idx]
115            y_calibration = y[second_half_idx]        
116
117        if self.method == "splitconformal":
118
119            self.obj.fit(X_train, y_train)
120            preds_calibration = self.obj.predict(X_calibration)
121            self.calibrated_residuals_ = y_calibration - preds_calibration
122            absolute_residuals = np.abs(self.calibrated_residuals_)
123            self.calibrated_residuals_scaler_ = StandardScaler(
124                with_mean=True, with_std=True
125            )
126            self.scaled_calibrated_residuals_ = (
127                self.calibrated_residuals_scaler_.fit_transform(
128                    self.calibrated_residuals_.reshape(-1, 1)
129                ).ravel()
130            )
131            try:
132                # numpy version >= 1.22
133                self.quantile_ = np.quantile(
134                    a=absolute_residuals, q=self.level / 100, method="higher"
135                )
136            except Exception:
137                # numpy version < 1.22
138                self.quantile_ = np.quantile(
139                    a=absolute_residuals,
140                    q=self.level / 100,
141                    interpolation="higher",
142                )            
143
144        if self.method == "localconformal":
145
146            mad_estimator = ExtraTreesRegressor()
147            normalizer = RegressorNormalizer(self.obj, mad_estimator, AbsErrorErrFunc())
148            nc = RegressorNc(self.obj, AbsErrorErrFunc(), normalizer)
149            self.icp_ = IcpRegressor(nc)
150            self.icp_.fit(X_train, y_train)
151            self.icp_.calibrate(X_calibration, y_calibration)
152        
153        # Calculate AIC
154        # Get predictions
155        preds = self.obj.predict(X_calibration)
156        
157        # Calculate SSE
158        self.sse_ = np.sum((y_calibration - preds) ** 2)
159        
160        # Get number of parameters from the base model
161        n_params = getattr(self.obj, 'n_hidden_features', 0) + X_calibration.shape[1]
162        
163        # Calculate AIC
164        n_samples = len(y_calibration)
165        temp = n_samples * np.log(self.sse_/n_samples)
166        self.aic_ = temp + 2 * n_params
167        self.bic_ = temp + np.log(n_samples) * n_params
168
169        return self

Fit the method to training data (X, y).

Args:

X: array-like, shape = [n_samples, n_features];
    Training set vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples, ]; Target values.

sample_weight: array-like, shape = [n_samples]
    Sample weights.
def predict(self, X, return_pi=False):
171    def predict(self, X, return_pi=False):
172        """Obtain predictions and prediction intervals
173
174        Args:
175
176            X: array-like, shape = [n_samples, n_features];
177                Testing set vectors, where n_samples is the number
178                of samples and n_features is the number of features.
179
180            return_pi: boolean
181                Whether the prediction interval is returned or not.
182                Default is False, for compatibility with other _estimators_.
183                If True, a tuple containing the predictions + lower and upper
184                bounds is returned.
185
186        """
187
188        if self.method == "splitconformal":
189            pred = self.obj.predict(X)
190
191        if self.method == "localconformal":
192            pred = self.icp_.predict(X)
193
194        if self.method == "splitconformal":
195
196            if (
197                self.replications is None and self.type_pi is None
198            ):  # type_pi is not used here, no bootstrap or kde
199
200                if return_pi:
201
202                    DescribeResult = namedtuple(
203                        "DescribeResult", ("mean", "lower", "upper")
204                    )
205                    return DescribeResult(
206                        pred, pred - self.quantile_, pred + self.quantile_
207                    )
208
209                else:
210
211                    return pred
212
213            else:  # self.method == "splitconformal" and if self.replications is not None, type_pi must be used
214
215                if self.type_pi is None:
216                    self.type_pi = "kde"
217                    raise Warning("type_pi must be set, setting to 'kde'")
218
219                if self.replications is None:
220                    self.replications = 100
221                    raise Warning("replications must be set, setting to 100")
222
223                assert self.type_pi in (
224                    "bootstrap",
225                    "kde",
226                ), "`self.type_pi` must be in ('bootstrap', 'kde')"
227
228                if self.type_pi == "bootstrap":
229                    np.random.seed(self.seed)
230                    self.residuals_sims_ = np.asarray(
231                        [
232                            np.random.choice(
233                                a=self.scaled_calibrated_residuals_,
234                                size=X.shape[0],
235                            )
236                            for _ in range(self.replications)
237                        ]
238                    ).T
239                    self.sims_ = np.asarray(
240                        [
241                            pred
242                            + self.calibrated_residuals_scaler_.scale_[0]
243                            * self.residuals_sims_[:, i].ravel()
244                            for i in range(self.replications)
245                        ]
246                    ).T
247                elif self.type_pi == "kde":
248                    self.kde_ = gaussian_kde(dataset=self.scaled_calibrated_residuals_)
249                    self.sims_ = np.asarray(
250                        [
251                            pred
252                            + self.calibrated_residuals_scaler_.scale_[0]
253                            * self.kde_.resample(
254                                size=X.shape[0], seed=self.seed + i
255                            ).ravel()
256                            for i in range(self.replications)
257                        ]
258                    ).T
259
260                self.mean_ = np.mean(self.sims_, axis=1)
261                self.lower_ = np.quantile(self.sims_, q=self.alpha_ / 200, axis=1)
262                self.upper_ = np.quantile(self.sims_, q=1 - self.alpha_ / 200, axis=1)
263
264                DescribeResult = namedtuple(
265                    "DescribeResult", ("mean", "sims", "lower", "upper")
266                )
267
268                return DescribeResult(self.mean_, self.sims_, self.lower_, self.upper_)
269
270        if self.method == "localconformal":
271
272            if self.replications is None:
273
274                if return_pi:
275
276                    predictions_bounds = self.icp_.predict(
277                        X, significance=1 - self.level
278                    )
279                    DescribeResult = namedtuple(
280                        "DescribeResult", ("mean", "lower", "upper")
281                    )
282                    return DescribeResult(
283                        pred, predictions_bounds[:, 0], predictions_bounds[:, 1]
284                    )
285
286                else:
287
288                    return pred
289
290            else:  # (self.method == "localconformal") and if self.replications is not None
291
292                raise NotImplementedError(
293                    "When self.method == 'localconformal', there are no simulations"
294                )

Obtain predictions and prediction intervals

Args:

X: array-like, shape = [n_samples, n_features];
    Testing set vectors, where n_samples is the number
    of samples and n_features is the number of features.

return_pi: boolean
    Whether the prediction interval is returned or not.
    Default is False, for compatibility with other _estimators_.
    If True, a tuple containing the predictions + lower and upper
    bounds is returned.
class SimpleMultitaskClassifier(nnetsauce.Base, sklearn.base.ClassifierMixin):
 18class SimpleMultitaskClassifier(Base, ClassifierMixin):
 19    """Multitask Classification model based on regression models, with shared covariates
 20
 21    Parameters:
 22
 23        obj: object
 24            any object (must be a regression model) containing a method fit (obj.fit())
 25            and a method predict (obj.predict())
 26
 27        seed: int
 28            reproducibility seed
 29
 30    Attributes:
 31
 32        fit_objs_: dict
 33            objects adjusted to each individual time series
 34
 35        n_classes_: int
 36            number of classes for the classifier
 37
 38    Examples:
 39
 40    ```python
 41    import nnetsauce as ns
 42    import numpy as np
 43    from sklearn.datasets import load_breast_cancer
 44    from sklearn.linear_model import LinearRegression
 45    from sklearn.model_selection import train_test_split
 46    from sklearn import metrics
 47    from time import time
 48
 49    breast_cancer = load_breast_cancer()
 50    Z = breast_cancer.data
 51    t = breast_cancer.target
 52
 53    X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2,
 54                                                        random_state=123+2*10)
 55
 56    # Linear Regression is used
 57    regr = LinearRegression()
 58    fit_obj = ns.SimpleMultitaskClassifier(regr)
 59
 60    start = time()
 61    fit_obj.fit(X_train, y_train)
 62    print(f"Elapsed {time() - start}")
 63
 64    print(fit_obj.score(X_test, y_test))
 65    print(fit_obj.score(X_test, y_test, scoring="roc_auc"))
 66
 67    start = time()
 68    preds = fit_obj.predict(X_test)
 69    print(f"Elapsed {time() - start}")
 70    print(metrics.classification_report(preds, y_test))
 71    ```
 72
 73    """
 74
 75    # construct the object -----
 76    _estimator_type = "classifier"
 77
 78    def __init__(
 79        self,
 80        obj,
 81    ):
 82        self.type_fit = "classification"
 83        self.obj = obj
 84        self.fit_objs_ = {}
 85        self.X_scaler_ = StandardScaler()
 86        self.scaled_X_ = None
 87
 88    def fit(self, X, y, sample_weight=None, **kwargs):
 89        """Fit SimpleMultitaskClassifier to training data (X, y).
 90
 91        Args:
 92
 93            X: {array-like}, shape = [n_samples, n_features]
 94                Training vectors, where n_samples is the number
 95                of samples and n_features is the number of features.
 96
 97            y: array-like, shape = [n_samples]
 98                Target values.
 99
100            **kwargs: additional parameters to be passed to
101                    self.cook_training_set or self.obj.fit
102
103        Returns:
104
105            self: object
106
107        """
108
109        assert mx.is_factor(y), "y must contain only integers"
110
111        self.classes_ = np.unique(y)  # for compatibility with sklearn
112        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
113
114        self.scaled_X_ = self.X_scaler_.fit_transform(X)
115
116        # multitask response
117        Y = mo.one_hot_encode2(y, self.n_classes_)
118
119        try: 
120            for i in range(self.n_classes_):
121                self.fit_objs_[i] = deepcopy(
122                    self.obj.fit(self.scaled_X_, Y[:, i], sample_weight=sample_weight, **kwargs)
123                )
124        except Exception as e:
125            for i in range(self.n_classes_):
126                self.fit_objs_[i] = deepcopy(
127                    self.obj.fit(self.scaled_X_, Y[:, i], **kwargs)
128                )
129        return self
130
131    def predict(self, X, **kwargs):
132        """Predict test data X.
133
134        Args:
135
136            X: {array-like}, shape = [n_samples, n_features]
137                Training vectors, where n_samples is the number
138                of samples and n_features is the number of features.
139
140            **kwargs: additional parameters
141
142        Returns:
143
144            model predictions: {array-like}
145
146        """
147        return np.argmax(self.predict_proba(X, **kwargs), axis=1)
148
149    def predict_proba(self, X, **kwargs):
150        """Predict probabilities for test data X.
151
152        Args:
153
154            X: {array-like}, shape = [n_samples, n_features]
155                Training vectors, where n_samples is the number
156                of samples and n_features is the number of features.
157
158            **kwargs: additional parameters
159
160        Returns:
161
162            probability estimates for test data: {array-like}
163
164        """
165
166        shape_X = X.shape
167
168        probs = np.zeros((shape_X[0], self.n_classes_))
169
170        if len(shape_X) == 1: # one example
171
172            n_features = shape_X[0]
173
174            new_X = mo.rbind(
175                X.reshape(1, n_features),
176                np.ones(n_features).reshape(1, n_features),
177            )
178
179            Z = self.X_scaler_.transform(new_X, **kwargs)
180
181            # Fallback to standard model
182            for i in range(self.n_classes_):
183                probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs)[0]
184
185        else: # multiple rows
186
187            Z = self.X_scaler_.transform(X, **kwargs)
188
189            # Fallback to standard model
190            for i in range(self.n_classes_):
191                probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs)
192
193        expit_raw_probs = expit(probs)
194        
195        # Add small epsilon to avoid division by zero
196        row_sums = expit_raw_probs.sum(axis=1)[:, None]
197        row_sums[row_sums < 1e-10] = 1e-10
198        
199        return expit_raw_probs / row_sums
200
201    def decision_function(self, X, **kwargs):
202        """Compute the decision function of X.
203
204        Parameters:
205            X: {array-like}, shape = [n_samples, n_features]
206                Samples to compute decision function for.
207
208            **kwargs: additional parameters to be passed to
209                    self.cook_test_set
210
211        Returns:
212            array-like of shape (n_samples,) or (n_samples, n_classes)
213            Decision function of the input samples. The order of outputs is the same
214            as that of the classes passed to fit.
215        """
216        if not hasattr(self.obj, "decision_function"):
217            # If base classifier doesn't have decision_function, use predict_proba
218            proba = self.predict_proba(X, **kwargs)
219            if proba.shape[1] == 2:
220                return proba[:, 1]  # For binary classification
221            return proba  # For multiclass
222
223        if len(X.shape) == 1:
224            n_features = X.shape[0]
225            new_X = mo.rbind(
226                X.reshape(1, n_features),
227                np.ones(n_features).reshape(1, n_features),
228            )
229
230            return (
231                self.obj.decision_function(
232                    self.cook_test_set(new_X, **kwargs), **kwargs
233                )
234            )[0]
235
236        return self.obj.decision_function(self.cook_test_set(X, **kwargs), **kwargs)
237
238    @property
239    def _estimator_type(self):
240        return "classifier"            

Multitask Classification model based on regression models, with shared covariates

Parameters:

obj: object
    any object (must be a regression model) containing a method fit (obj.fit())
    and a method predict (obj.predict())

seed: int
    reproducibility seed

Attributes:

fit_objs_: dict
    objects adjusted to each individual time series

n_classes_: int
    number of classes for the classifier

Examples:

import nnetsauce as ns
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics
from time import time

breast_cancer = load_breast_cancer()
Z = breast_cancer.data
t = breast_cancer.target

X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2,
                                                    random_state=123+2*10)

# Linear Regression is used
regr = LinearRegression()
fit_obj = ns.SimpleMultitaskClassifier(regr)

start = time()
fit_obj.fit(X_train, y_train)
print(f"Elapsed {time() - start}")

print(fit_obj.score(X_test, y_test))
print(fit_obj.score(X_test, y_test, scoring="roc_auc"))

start = time()
preds = fit_obj.predict(X_test)
print(f"Elapsed {time() - start}")
print(metrics.classification_report(preds, y_test))
def fit(self, X, y, sample_weight=None, **kwargs):
 88    def fit(self, X, y, sample_weight=None, **kwargs):
 89        """Fit SimpleMultitaskClassifier to training data (X, y).
 90
 91        Args:
 92
 93            X: {array-like}, shape = [n_samples, n_features]
 94                Training vectors, where n_samples is the number
 95                of samples and n_features is the number of features.
 96
 97            y: array-like, shape = [n_samples]
 98                Target values.
 99
100            **kwargs: additional parameters to be passed to
101                    self.cook_training_set or self.obj.fit
102
103        Returns:
104
105            self: object
106
107        """
108
109        assert mx.is_factor(y), "y must contain only integers"
110
111        self.classes_ = np.unique(y)  # for compatibility with sklearn
112        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
113
114        self.scaled_X_ = self.X_scaler_.fit_transform(X)
115
116        # multitask response
117        Y = mo.one_hot_encode2(y, self.n_classes_)
118
119        try: 
120            for i in range(self.n_classes_):
121                self.fit_objs_[i] = deepcopy(
122                    self.obj.fit(self.scaled_X_, Y[:, i], sample_weight=sample_weight, **kwargs)
123                )
124        except Exception as e:
125            for i in range(self.n_classes_):
126                self.fit_objs_[i] = deepcopy(
127                    self.obj.fit(self.scaled_X_, Y[:, i], **kwargs)
128                )
129        return self

Fit SimpleMultitaskClassifier to training data (X, y).

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

**kwargs: additional parameters to be passed to
        self.cook_training_set or self.obj.fit

Returns:

self: object
def predict(self, X, **kwargs):
131    def predict(self, X, **kwargs):
132        """Predict test data X.
133
134        Args:
135
136            X: {array-like}, shape = [n_samples, n_features]
137                Training vectors, where n_samples is the number
138                of samples and n_features is the number of features.
139
140            **kwargs: additional parameters
141
142        Returns:
143
144            model predictions: {array-like}
145
146        """
147        return np.argmax(self.predict_proba(X, **kwargs), axis=1)

Predict test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters

Returns:

model predictions: {array-like}
def predict_proba(self, X, **kwargs):
149    def predict_proba(self, X, **kwargs):
150        """Predict probabilities for test data X.
151
152        Args:
153
154            X: {array-like}, shape = [n_samples, n_features]
155                Training vectors, where n_samples is the number
156                of samples and n_features is the number of features.
157
158            **kwargs: additional parameters
159
160        Returns:
161
162            probability estimates for test data: {array-like}
163
164        """
165
166        shape_X = X.shape
167
168        probs = np.zeros((shape_X[0], self.n_classes_))
169
170        if len(shape_X) == 1: # one example
171
172            n_features = shape_X[0]
173
174            new_X = mo.rbind(
175                X.reshape(1, n_features),
176                np.ones(n_features).reshape(1, n_features),
177            )
178
179            Z = self.X_scaler_.transform(new_X, **kwargs)
180
181            # Fallback to standard model
182            for i in range(self.n_classes_):
183                probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs)[0]
184
185        else: # multiple rows
186
187            Z = self.X_scaler_.transform(X, **kwargs)
188
189            # Fallback to standard model
190            for i in range(self.n_classes_):
191                probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs)
192
193        expit_raw_probs = expit(probs)
194        
195        # Add small epsilon to avoid division by zero
196        row_sums = expit_raw_probs.sum(axis=1)[:, None]
197        row_sums[row_sums < 1e-10] = 1e-10
198        
199        return expit_raw_probs / row_sums

Predict probabilities for test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters

Returns:

probability estimates for test data: {array-like}
class Optimizer:
  9class Optimizer:
 10    """Optimizer class
 11
 12    Attributes:
 13
 14        type_optim: str
 15            type of optimizer, (currently) either 'sgd' (stochastic minibatch gradient descent)
 16            or 'scd' (stochastic minibatch coordinate descent)
 17
 18        num_iters: int
 19            number of iterations of the optimizer
 20
 21        learning_rate: float
 22            step size
 23
 24        batch_prop: float
 25            proportion of the initial data used at each optimization step
 26
 27        learning_method: str
 28            "poly" - learning rate decreasing as a polynomial function
 29            of # of iterations (default)
 30            "exp" - learning rate decreasing as an exponential function
 31            of # of iterations
 32            "momentum" - gradient descent using momentum
 33
 34        randomization: str
 35            type of randomization applied at each step
 36            "strat" - stratified subsampling (default)
 37            "shuffle" - random subsampling
 38
 39        mass: float
 40            mass on velocity, for `method` == "momentum"
 41
 42        decay: float
 43            coefficient of decrease of the learning rate for
 44            `method` == "poly" and `method` == "exp"
 45
 46        tolerance: float
 47            early stopping parameter (convergence of loss function)
 48
 49        verbose: int
 50            controls verbosity of gradient descent
 51            0 - nothing is printed
 52            1 - a progress bar is printed
 53            2 - successive loss function values are printed
 54
 55    """
 56
 57    # construct the object -----
 58
 59    def __init__(
 60        self,
 61        type_optim="sgd",
 62        num_iters=100,
 63        learning_rate=0.01,
 64        batch_prop=1.0,
 65        learning_method="momentum",
 66        randomization="strat",
 67        mass=0.9,
 68        decay=0.1,
 69        tolerance=1e-3,
 70        verbose=1,
 71    ):
 72        self.type_optim = type_optim
 73        self.num_iters = num_iters
 74        self.learning_rate = learning_rate
 75        self.batch_prop = batch_prop
 76        self.learning_method = learning_method
 77        self.randomization = randomization
 78        self.mass = mass
 79        self.decay = decay
 80        self.tolerance = tolerance
 81        self.verbose = verbose
 82        self.opt = None
 83
 84    def fit(self, loss_func, response, x0, **kwargs):
 85        """Fit GLM model to training data (X, y).
 86
 87        Args:
 88
 89            loss_func: loss function
 90
 91            response: array-like, shape = [n_samples]
 92            target variable (used for subsampling)
 93
 94            x0: array-like, shape = [n_features]
 95                initial value provided to the optimizer
 96
 97            **kwargs: additional parameters to be passed to
 98                    loss function
 99
100        Returns:
101
102            self: object
103
104        """
105
106        if self.type_optim == "scd":
107            self.results = scd(
108                loss_func,
109                response=response,
110                x=x0,
111                num_iters=self.num_iters,
112                batch_prop=self.batch_prop,
113                learning_rate=self.learning_rate,
114                learning_method=self.learning_method,
115                mass=self.mass,
116                decay=self.decay,
117                randomization=self.randomization,
118                tolerance=self.tolerance,
119                verbose=self.verbose,
120                **kwargs
121            )
122
123        if self.type_optim == "sgd":
124            self.results = sgd(
125                loss_func,
126                response=response,
127                x=x0,
128                num_iters=self.num_iters,
129                batch_prop=self.batch_prop,
130                learning_rate=self.learning_rate,
131                learning_method=self.learning_method,
132                mass=self.mass,
133                decay=self.decay,
134                randomization=self.randomization,
135                tolerance=self.tolerance,
136                verbose=self.verbose,
137                **kwargs
138            )
139
140        return self
141
142    def one_hot_encode(self, y, n_classes):
143        return one_hot_encode(y, n_classes)

Optimizer class

Attributes:

type_optim: str
    type of optimizer, (currently) either 'sgd' (stochastic minibatch gradient descent)
    or 'scd' (stochastic minibatch coordinate descent)

num_iters: int
    number of iterations of the optimizer

learning_rate: float
    step size

batch_prop: float
    proportion of the initial data used at each optimization step

learning_method: str
    "poly" - learning rate decreasing as a polynomial function
    of # of iterations (default)
    "exp" - learning rate decreasing as an exponential function
    of # of iterations
    "momentum" - gradient descent using momentum

randomization: str
    type of randomization applied at each step
    "strat" - stratified subsampling (default)
    "shuffle" - random subsampling

mass: float
    mass on velocity, for `method` == "momentum"

decay: float
    coefficient of decrease of the learning rate for
    `method` == "poly" and `method` == "exp"

tolerance: float
    early stopping parameter (convergence of loss function)

verbose: int
    controls verbosity of gradient descent
    0 - nothing is printed
    1 - a progress bar is printed
    2 - successive loss function values are printed
def fit(self, loss_func, response, x0, **kwargs):
 84    def fit(self, loss_func, response, x0, **kwargs):
 85        """Fit GLM model to training data (X, y).
 86
 87        Args:
 88
 89            loss_func: loss function
 90
 91            response: array-like, shape = [n_samples]
 92            target variable (used for subsampling)
 93
 94            x0: array-like, shape = [n_features]
 95                initial value provided to the optimizer
 96
 97            **kwargs: additional parameters to be passed to
 98                    loss function
 99
100        Returns:
101
102            self: object
103
104        """
105
106        if self.type_optim == "scd":
107            self.results = scd(
108                loss_func,
109                response=response,
110                x=x0,
111                num_iters=self.num_iters,
112                batch_prop=self.batch_prop,
113                learning_rate=self.learning_rate,
114                learning_method=self.learning_method,
115                mass=self.mass,
116                decay=self.decay,
117                randomization=self.randomization,
118                tolerance=self.tolerance,
119                verbose=self.verbose,
120                **kwargs
121            )
122
123        if self.type_optim == "sgd":
124            self.results = sgd(
125                loss_func,
126                response=response,
127                x=x0,
128                num_iters=self.num_iters,
129                batch_prop=self.batch_prop,
130                learning_rate=self.learning_rate,
131                learning_method=self.learning_method,
132                mass=self.mass,
133                decay=self.decay,
134                randomization=self.randomization,
135                tolerance=self.tolerance,
136                verbose=self.verbose,
137                **kwargs
138            )
139
140        return self

Fit GLM model to training data (X, y).

Args:

loss_func: loss function

response: array-like, shape = [n_samples]
target variable (used for subsampling)

x0: array-like, shape = [n_features]
    initial value provided to the optimizer

**kwargs: additional parameters to be passed to
        loss function

Returns:

self: object
class QuantileRegressor(sklearn.base.BaseEstimator, sklearn.base.RegressorMixin):
 37class QuantileRegressor(BaseEstimator, RegressorMixin):
 38    """
 39    Quantile Regressor.
 40
 41    Parameters:
 42
 43        obj: base model (regression model)
 44            The base regressor from which to build a
 45            quantile regressor.
 46
 47        level: int, default=95
 48            The level of the quantiles to compute.
 49
 50        scoring: str, default="predictions"
 51            The scoring to use for the optimization and constructing
 52            prediction intervals (predictions, residuals, conformal,
 53              studentized, conformal-studentized).
 54
 55    Attributes:
 56
 57        obj_ : base model (regression model)
 58            The base regressor from which to build a
 59            quantile regressor.
 60
 61        offset_multipliers_ : list
 62            The multipliers for the offset.
 63
 64        scoring_residuals_ : list
 65            The residuals for the scoring.
 66
 67        student_multiplier_ : float
 68            The multiplier for the student.
 69
 70    """
 71
 72    def __init__(self, obj, level=95, scoring="predictions"):
 73        assert scoring in (
 74            "predictions",
 75            "residuals",
 76            "conformal",
 77            "studentized",
 78            "conformal-studentized",
 79        ), "scoring must be 'predictions' or 'residuals'"
 80        self.obj = obj
 81        low_risk_level = (1 - level / 100) / 2
 82        self.quantiles = [low_risk_level, 0.5, 1 - low_risk_level]
 83        self.scoring = scoring
 84        self.offset_multipliers_ = None
 85        self.obj_ = None
 86        self.scoring_residuals_ = None
 87        self.student_multiplier_ = None
 88
 89    def _compute_quantile_loss(self, residuals, quantile):
 90        """
 91        Compute the quantile loss for a given set of residuals and quantile.
 92        """
 93        if not 0 < quantile < 1:
 94            raise ValueError("Quantile should be between 0 and 1.")
 95        loss = quantile * (residuals >= 0) + (quantile - 1) * (residuals < 0)
 96        return np.mean(residuals * loss)
 97
 98    def _optimize_multiplier(
 99        self,
100        y,
101        base_predictions,
102        prev_predictions,
103        scoring_residuals = None,
104        quantile = 0.5,
105    ):
106        """
107        Optimize the multiplier for a given quantile.
108        """
109        if not 0 < quantile < 1:
110            raise ValueError("Quantile should be between 0 and 1.")
111        
112        n = len(y)
113
114        def objective(log_multiplier):
115            """
116            Objective function for optimization.
117            """
118            # Convert to positive multiplier using exp
119            multiplier = np.exp(log_multiplier[0])
120            if self.scoring == "predictions":
121                assert base_predictions is not None, "base_predictions must be not None"
122                # Calculate predictions
123                if prev_predictions is None:
124                    # For first quantile, subtract from conditional expectation
125                    predictions = base_predictions - multiplier * np.abs(
126                        base_predictions
127                    )
128                else:
129                    # For other quantiles, add to previous quantile
130                    offset = multiplier * np.abs(base_predictions)
131                    predictions = prev_predictions + offset
132            elif self.scoring in ("residuals", "conformal"):
133                assert (
134                    scoring_residuals is not None
135                ), "scoring_residuals must be not None"
136                # print("scoring_residuals", scoring_residuals)
137                # Calculate predictions
138                if prev_predictions is None:
139                    # For first quantile, subtract from conditional expectation
140                    predictions = base_predictions - multiplier * np.std(
141                        scoring_residuals
142                    )/np.sqrt(len(scoring_residuals))
143                    # print("predictions", predictions)
144                else:
145                    # For other quantiles, add to previous quantile
146                    offset = multiplier * np.std(scoring_residuals)/np.sqrt(len(scoring_residuals))
147                    predictions = prev_predictions + offset
148            elif self.scoring in ("studentized", "conformal-studentized"):
149                assert (
150                    scoring_residuals is not None
151                ), "scoring_residuals must be not None"
152                # Calculate predictions
153                if prev_predictions is None:
154                    # For first quantile, subtract from conditional expectation
155                    predictions = (
156                        base_predictions - multiplier * self.student_multiplier_
157                    )
158                    # print("predictions", predictions)
159                else:
160                    # For other quantiles, add to previous quantile
161                    offset = multiplier * self.student_multiplier_
162                    predictions = prev_predictions + offset
163            else:
164                raise ValueError("Invalid argument 'scoring'")
165
166            residuals = y - predictions
167            return self._compute_quantile_loss(residuals, quantile)
168
169        # Optimize in log space for numerical stability
170        # bounds = [(-10, 10)]  # log space bounds
171        bounds = [(-100, 100)]  # log space bounds
172        result = differential_evolution(
173            objective,
174            bounds,
175            # popsize=15,
176            # maxiter=100,
177            # tol=1e-4,
178            popsize=25,
179            maxiter=200,
180            tol=1e-6,
181            disp=False,
182        )
183
184        return np.exp(result.x[0])
185
186
187    def fit(self, X, y):
188        """Fit the model to the data.
189
190        Parameters:
191
192            X: {array-like}, shape = [n_samples, n_features]
193                Training vectors, where n_samples is the number of samples and
194                n_features is the number of features.
195            y: array-like, shape = [n_samples]
196                Target values.
197        """
198        self.obj_ = clone(self.obj)
199
200        if self.scoring in ("predictions", "residuals"):
201
202            self.obj_.fit(X, y)
203            base_predictions = self.obj_.predict(X)
204            scoring_residuals = y - base_predictions
205            self.scoring_residuals_ = scoring_residuals
206
207        elif self.scoring == "conformal":
208
209            X_train, X_calib, y_train, y_calib = train_test_split(
210                X, y, test_size=0.5, random_state=42
211            )
212            self.obj_.fit(X_train, y_train)
213            scoring_residuals = y_calib - self.obj_.predict(
214                X_calib
215            )  # These are calibration predictions
216            self.scoring_residuals_ = scoring_residuals
217            # Update base_predictions to use training predictions for optimization
218            self.obj_.fit(X_calib, y_calib)
219            base_predictions = self.obj_.predict(X_calib)
220
221        elif self.scoring in ("studentized", "conformal-studentized"):
222
223            # Calculate student multiplier
224            if self.scoring == "conformal-studentized":
225                X_train, X_calib, y_train, y_calib = train_test_split(
226                    X, y, test_size=0.5, random_state=42
227                )
228                self.obj_.fit(X_train, y_train)
229                scoring_residuals = y_calib - self.obj_.predict(X_calib)
230                # Calculate studentized multiplier using calibration data
231                self.student_multiplier_ = np.std(y_calib, ddof=1) / np.sqrt(
232                    len(y_calib) - 1
233                )
234                self.obj_.fit(X_calib, y_calib)
235                base_predictions = self.obj_.predict(X_calib)
236            else:  # regular studentized
237                self.obj_.fit(X, y)
238                base_predictions = self.obj_.predict(X)
239                scoring_residuals = y - base_predictions
240                self.student_multiplier_ = np.std(y, ddof=1) / np.sqrt(len(y) - 1)
241
242        # Initialize storage for multipliers
243        self.offset_multipliers_ = []
244        # Keep track of current predictions for each quantile
245        current_predictions = None
246
247        # Fit each quantile sequentially
248        for i, quantile in enumerate(self.quantiles):
249
250            if self.scoring == "predictions":
251
252                multiplier = self._optimize_multiplier(
253                    y=y,
254                    base_predictions=base_predictions,
255                    prev_predictions=current_predictions,
256                    quantile=quantile,
257                )
258
259                self.offset_multipliers_.append(multiplier)
260
261                # Update current predictions
262                if current_predictions is None:
263                    # First quantile (lowest)
264                    current_predictions = base_predictions - multiplier * np.abs(
265                        base_predictions
266                    )
267                else:
268                    # Subsequent quantiles
269                    offset = multiplier * np.abs(base_predictions)
270                    current_predictions = current_predictions + offset
271
272            elif self.scoring == "residuals":
273
274                multiplier = self._optimize_multiplier(
275                    y=y,
276                    base_predictions=base_predictions,
277                    scoring_residuals=scoring_residuals,
278                    prev_predictions=current_predictions,
279                    quantile=quantile,
280                )
281
282                self.offset_multipliers_.append(multiplier)
283
284                # Update current predictions
285                if current_predictions is None:
286                    # First quantile (lowest)
287                    current_predictions = base_predictions - multiplier * np.std(
288                        scoring_residuals
289                    )/np.sqrt(len(scoring_residuals))
290                else:
291                    # Subsequent quantiles
292                    offset = multiplier * np.std(scoring_residuals)/np.sqrt(len(scoring_residuals))
293                    current_predictions = current_predictions + offset
294
295            elif self.scoring == "conformal":
296
297                multiplier = self._optimize_multiplier(
298                    y=y_calib,
299                    base_predictions=base_predictions,
300                    scoring_residuals=scoring_residuals,
301                    prev_predictions=current_predictions,
302                    quantile=quantile,
303                )
304
305                self.offset_multipliers_.append(multiplier)
306
307                # Update current predictions
308                if current_predictions is None:
309                    # First quantile (lowest)
310                    current_predictions = base_predictions - multiplier * np.std(
311                        scoring_residuals
312                    )/np.sqrt(len(scoring_residuals))
313                else:
314                    # Subsequent quantiles
315                    offset = multiplier * np.std(scoring_residuals)/np.sqrt(len(scoring_residuals))
316                    current_predictions = current_predictions + offset
317
318            elif self.scoring in ("studentized", "conformal-studentized"):
319
320                multiplier = self._optimize_multiplier(
321                    y=y_calib if self.scoring == "conformal-studentized" else y,
322                    base_predictions=base_predictions,
323                    scoring_residuals=scoring_residuals,
324                    prev_predictions=current_predictions,
325                    quantile=quantile,
326                )
327
328                self.offset_multipliers_.append(multiplier)
329
330                # Update current predictions
331                if current_predictions is None:
332                    current_predictions = (
333                        base_predictions - multiplier * self.student_multiplier_
334                    )
335                else:
336                    offset = multiplier * self.student_multiplier_
337                    current_predictions = current_predictions + offset
338
339        return self
340
341
342    def predict(self, X, return_pi=False):
343        """Predict the target variable.
344
345        Parameters:
346
347            X: {array-like}, shape = [n_samples, n_features]
348                Training vectors, where n_samples is the number of samples and
349                n_features is the number of features.
350
351            return_pi: bool, default=True
352                Whether to return the prediction intervals.
353        """
354        if self.obj_ is None or self.offset_multipliers_ is None:
355            raise ValueError("Model not fitted yet.")
356
357        base_predictions = self.obj_.predict(X)
358        all_predictions = []
359
360        if self.scoring == "predictions":
361
362            # Generate first quantile
363            current_predictions = base_predictions - self.offset_multipliers_[
364                0
365            ] * np.abs(base_predictions)
366            all_predictions.append(current_predictions)
367
368            # Generate remaining quantiles
369            for multiplier in self.offset_multipliers_[1:]:
370                offset = multiplier * np.abs(base_predictions)
371                current_predictions = current_predictions + offset
372                all_predictions.append(current_predictions)
373
374        elif self.scoring in ("residuals", "conformal"):
375
376            # Generate first quantile
377            current_predictions = base_predictions - self.offset_multipliers_[
378                0
379            ] * np.std(self.scoring_residuals_)/np.sqrt(len(self.scoring_residuals_))
380            all_predictions.append(current_predictions)
381
382            # Generate remaining quantiles
383            for multiplier in self.offset_multipliers_[1:]:
384                offset = multiplier * np.std(self.scoring_residuals_)/np.sqrt(len(self.scoring_residuals_))
385                current_predictions = current_predictions + offset
386                all_predictions.append(current_predictions)
387
388        elif self.scoring in ("studentized", "conformal-studentized"):
389            # Generate first quantile
390            current_predictions = (
391                base_predictions
392                - self.offset_multipliers_[0] * self.student_multiplier_
393            )
394            all_predictions.append(current_predictions)
395
396            # Generate remaining quantiles
397            for multiplier in self.offset_multipliers_[1:]:
398                offset = multiplier * self.student_multiplier_
399                current_predictions = current_predictions + offset
400                all_predictions.append(current_predictions)
401
402        if return_pi == False:
403            return np.asarray(all_predictions[1])
404
405        DescribeResult = namedtuple(
406            "DecribeResult", ["mean", "lower", "upper", "median"]
407        )
408        DescribeResult.mean = base_predictions
409        DescribeResult.lower = np.asarray(all_predictions[0])
410        DescribeResult.median = np.asarray(all_predictions[1])
411        DescribeResult.upper = np.asarray(all_predictions[2])
412        
413        return DescribeResult

Quantile Regressor.

Parameters:

obj: base model (regression model)
    The base regressor from which to build a
    quantile regressor.

level: int, default=95
    The level of the quantiles to compute.

scoring: str, default="predictions"
    The scoring to use for the optimization and constructing
    prediction intervals (predictions, residuals, conformal,
      studentized, conformal-studentized).

Attributes:

obj_ : base model (regression model)
    The base regressor from which to build a
    quantile regressor.

offset_multipliers_ : list
    The multipliers for the offset.

scoring_residuals_ : list
    The residuals for the scoring.

student_multiplier_ : float
    The multiplier for the student.
def fit(self, X, y):
187    def fit(self, X, y):
188        """Fit the model to the data.
189
190        Parameters:
191
192            X: {array-like}, shape = [n_samples, n_features]
193                Training vectors, where n_samples is the number of samples and
194                n_features is the number of features.
195            y: array-like, shape = [n_samples]
196                Target values.
197        """
198        self.obj_ = clone(self.obj)
199
200        if self.scoring in ("predictions", "residuals"):
201
202            self.obj_.fit(X, y)
203            base_predictions = self.obj_.predict(X)
204            scoring_residuals = y - base_predictions
205            self.scoring_residuals_ = scoring_residuals
206
207        elif self.scoring == "conformal":
208
209            X_train, X_calib, y_train, y_calib = train_test_split(
210                X, y, test_size=0.5, random_state=42
211            )
212            self.obj_.fit(X_train, y_train)
213            scoring_residuals = y_calib - self.obj_.predict(
214                X_calib
215            )  # These are calibration predictions
216            self.scoring_residuals_ = scoring_residuals
217            # Update base_predictions to use training predictions for optimization
218            self.obj_.fit(X_calib, y_calib)
219            base_predictions = self.obj_.predict(X_calib)
220
221        elif self.scoring in ("studentized", "conformal-studentized"):
222
223            # Calculate student multiplier
224            if self.scoring == "conformal-studentized":
225                X_train, X_calib, y_train, y_calib = train_test_split(
226                    X, y, test_size=0.5, random_state=42
227                )
228                self.obj_.fit(X_train, y_train)
229                scoring_residuals = y_calib - self.obj_.predict(X_calib)
230                # Calculate studentized multiplier using calibration data
231                self.student_multiplier_ = np.std(y_calib, ddof=1) / np.sqrt(
232                    len(y_calib) - 1
233                )
234                self.obj_.fit(X_calib, y_calib)
235                base_predictions = self.obj_.predict(X_calib)
236            else:  # regular studentized
237                self.obj_.fit(X, y)
238                base_predictions = self.obj_.predict(X)
239                scoring_residuals = y - base_predictions
240                self.student_multiplier_ = np.std(y, ddof=1) / np.sqrt(len(y) - 1)
241
242        # Initialize storage for multipliers
243        self.offset_multipliers_ = []
244        # Keep track of current predictions for each quantile
245        current_predictions = None
246
247        # Fit each quantile sequentially
248        for i, quantile in enumerate(self.quantiles):
249
250            if self.scoring == "predictions":
251
252                multiplier = self._optimize_multiplier(
253                    y=y,
254                    base_predictions=base_predictions,
255                    prev_predictions=current_predictions,
256                    quantile=quantile,
257                )
258
259                self.offset_multipliers_.append(multiplier)
260
261                # Update current predictions
262                if current_predictions is None:
263                    # First quantile (lowest)
264                    current_predictions = base_predictions - multiplier * np.abs(
265                        base_predictions
266                    )
267                else:
268                    # Subsequent quantiles
269                    offset = multiplier * np.abs(base_predictions)
270                    current_predictions = current_predictions + offset
271
272            elif self.scoring == "residuals":
273
274                multiplier = self._optimize_multiplier(
275                    y=y,
276                    base_predictions=base_predictions,
277                    scoring_residuals=scoring_residuals,
278                    prev_predictions=current_predictions,
279                    quantile=quantile,
280                )
281
282                self.offset_multipliers_.append(multiplier)
283
284                # Update current predictions
285                if current_predictions is None:
286                    # First quantile (lowest)
287                    current_predictions = base_predictions - multiplier * np.std(
288                        scoring_residuals
289                    )/np.sqrt(len(scoring_residuals))
290                else:
291                    # Subsequent quantiles
292                    offset = multiplier * np.std(scoring_residuals)/np.sqrt(len(scoring_residuals))
293                    current_predictions = current_predictions + offset
294
295            elif self.scoring == "conformal":
296
297                multiplier = self._optimize_multiplier(
298                    y=y_calib,
299                    base_predictions=base_predictions,
300                    scoring_residuals=scoring_residuals,
301                    prev_predictions=current_predictions,
302                    quantile=quantile,
303                )
304
305                self.offset_multipliers_.append(multiplier)
306
307                # Update current predictions
308                if current_predictions is None:
309                    # First quantile (lowest)
310                    current_predictions = base_predictions - multiplier * np.std(
311                        scoring_residuals
312                    )/np.sqrt(len(scoring_residuals))
313                else:
314                    # Subsequent quantiles
315                    offset = multiplier * np.std(scoring_residuals)/np.sqrt(len(scoring_residuals))
316                    current_predictions = current_predictions + offset
317
318            elif self.scoring in ("studentized", "conformal-studentized"):
319
320                multiplier = self._optimize_multiplier(
321                    y=y_calib if self.scoring == "conformal-studentized" else y,
322                    base_predictions=base_predictions,
323                    scoring_residuals=scoring_residuals,
324                    prev_predictions=current_predictions,
325                    quantile=quantile,
326                )
327
328                self.offset_multipliers_.append(multiplier)
329
330                # Update current predictions
331                if current_predictions is None:
332                    current_predictions = (
333                        base_predictions - multiplier * self.student_multiplier_
334                    )
335                else:
336                    offset = multiplier * self.student_multiplier_
337                    current_predictions = current_predictions + offset
338
339        return self

Fit the model to the data.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number of samples and
    n_features is the number of features.
y: array-like, shape = [n_samples]
    Target values.
def predict(self, X, return_pi=False):
342    def predict(self, X, return_pi=False):
343        """Predict the target variable.
344
345        Parameters:
346
347            X: {array-like}, shape = [n_samples, n_features]
348                Training vectors, where n_samples is the number of samples and
349                n_features is the number of features.
350
351            return_pi: bool, default=True
352                Whether to return the prediction intervals.
353        """
354        if self.obj_ is None or self.offset_multipliers_ is None:
355            raise ValueError("Model not fitted yet.")
356
357        base_predictions = self.obj_.predict(X)
358        all_predictions = []
359
360        if self.scoring == "predictions":
361
362            # Generate first quantile
363            current_predictions = base_predictions - self.offset_multipliers_[
364                0
365            ] * np.abs(base_predictions)
366            all_predictions.append(current_predictions)
367
368            # Generate remaining quantiles
369            for multiplier in self.offset_multipliers_[1:]:
370                offset = multiplier * np.abs(base_predictions)
371                current_predictions = current_predictions + offset
372                all_predictions.append(current_predictions)
373
374        elif self.scoring in ("residuals", "conformal"):
375
376            # Generate first quantile
377            current_predictions = base_predictions - self.offset_multipliers_[
378                0
379            ] * np.std(self.scoring_residuals_)/np.sqrt(len(self.scoring_residuals_))
380            all_predictions.append(current_predictions)
381
382            # Generate remaining quantiles
383            for multiplier in self.offset_multipliers_[1:]:
384                offset = multiplier * np.std(self.scoring_residuals_)/np.sqrt(len(self.scoring_residuals_))
385                current_predictions = current_predictions + offset
386                all_predictions.append(current_predictions)
387
388        elif self.scoring in ("studentized", "conformal-studentized"):
389            # Generate first quantile
390            current_predictions = (
391                base_predictions
392                - self.offset_multipliers_[0] * self.student_multiplier_
393            )
394            all_predictions.append(current_predictions)
395
396            # Generate remaining quantiles
397            for multiplier in self.offset_multipliers_[1:]:
398                offset = multiplier * self.student_multiplier_
399                current_predictions = current_predictions + offset
400                all_predictions.append(current_predictions)
401
402        if return_pi == False:
403            return np.asarray(all_predictions[1])
404
405        DescribeResult = namedtuple(
406            "DecribeResult", ["mean", "lower", "upper", "median"]
407        )
408        DescribeResult.mean = base_predictions
409        DescribeResult.lower = np.asarray(all_predictions[0])
410        DescribeResult.median = np.asarray(all_predictions[1])
411        DescribeResult.upper = np.asarray(all_predictions[2])
412        
413        return DescribeResult

Predict the target variable.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number of samples and
    n_features is the number of features.

return_pi: bool, default=True
    Whether to return the prediction intervals.
class QuantileClassifier(sklearn.base.BaseEstimator, sklearn.base.ClassifierMixin):
41class QuantileClassifier(BaseEstimator, ClassifierMixin):
42    """
43    Quantile Classifier.
44
45    Parameters:
46
47        obj: base model (classification model)
48            The base classifier from which to build a
49            quantile classifier.
50
51        level: int, default=95
52            The level of the quantiles to compute.
53
54        scoring: str, default="predictions"
55            The scoring to use for the optimization and constructing
56            prediction intervals (predictions, residuals, conformal,
57              studentized, conformal-studentized).
58
59    Attributes:
60
61        obj_ : base model (classification model)
62            The base classifier from which to build a
63            quantile classifier.
64
65        offset_multipliers_ : list
66            The multipliers for the offset.
67
68        scoring_residuals_ : list
69            The residuals for the scoring.
70
71        student_multiplier_ : float
72            The multiplier for the student.
73
74
75    """
76
77    def __init__(self, obj, level=95, scoring="predictions"):
78        assert scoring in (
79            "predictions",
80            "residuals",
81            "conformal",
82            "studentized",
83            "conformal-studentized",
84        ), "scoring must be 'predictions' or 'residuals'"
85        self.obj = obj
86        quantileregressor = QuantileRegressor(self.obj)
87        quantileregressor.predict = partial(quantileregressor.predict, return_pi=False)
88        self.obj_ = SimpleMultitaskClassifier(quantileregressor)
89
90    def fit(self, X, y, **kwargs):
91        self.obj_.fit(X, y, **kwargs)
92
93    def predict(self, X, **kwargs):
94        return self.obj_.predict(X, **kwargs)
95
96    def predict_proba(self, X, **kwargs):
97        return self.obj_.predict_proba(X, **kwargs)

Quantile Classifier.

Parameters:

obj: base model (classification model)
    The base classifier from which to build a
    quantile classifier.

level: int, default=95
    The level of the quantiles to compute.

scoring: str, default="predictions"
    The scoring to use for the optimization and constructing
    prediction intervals (predictions, residuals, conformal,
      studentized, conformal-studentized).

Attributes:

obj_ : base model (classification model)
    The base classifier from which to build a
    quantile classifier.

offset_multipliers_ : list
    The multipliers for the offset.

scoring_residuals_ : list
    The residuals for the scoring.

student_multiplier_ : float
    The multiplier for the student.
def fit(self, X, y, **kwargs):
90    def fit(self, X, y, **kwargs):
91        self.obj_.fit(X, y, **kwargs)
def predict(self, X, **kwargs):
93    def predict(self, X, **kwargs):
94        return self.obj_.predict(X, **kwargs)
def predict_proba(self, X, **kwargs):
96    def predict_proba(self, X, **kwargs):
97        return self.obj_.predict_proba(X, **kwargs)
class RandomBagRegressor(nnetsauce.randombag.bag.RandomBag, sklearn.base.RegressorMixin):
 18class RandomBagRegressor(RandomBag, RegressorMixin):
 19    """Randomized 'Bagging' Regression model
 20
 21    Parameters:
 22
 23        obj: object
 24            any object containing a method fit (obj.fit()) and a method predict
 25            (obj.predict())
 26
 27        n_estimators: int
 28            number of boosting iterations
 29
 30        n_hidden_features: int
 31            number of nodes in the hidden layer
 32
 33        activation_name: str
 34            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
 35
 36        a: float
 37            hyperparameter for 'prelu' or 'elu' activation function
 38
 39        nodes_sim: str
 40            type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
 41            'uniform'
 42
 43        bias: boolean
 44            indicates if the hidden layer contains a bias term (True) or not
 45            (False)
 46
 47        dropout: float
 48            regularization parameter; (random) percentage of nodes dropped out
 49            of the training
 50
 51        direct_link: boolean
 52            indicates if the original predictors are included (True) in model''s
 53            fitting or not (False)
 54
 55        n_clusters: int
 56            number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
 57                no clustering)
 58
 59        cluster_encode: bool
 60            defines how the variable containing clusters is treated (default is one-hot)
 61            if `False`, then labels are used, without one-hot encoding
 62
 63        type_clust: str
 64            type of clustering method: currently k-means ('kmeans') or Gaussian
 65            Mixture Model ('gmm')
 66
 67        type_scaling: a tuple of 3 strings
 68            scaling methods for inputs, hidden layer, and clustering respectively
 69            (and when relevant).
 70            Currently available: standardization ('std') or MinMax scaling ('minmax')
 71
 72        col_sample: float
 73            percentage of covariates randomly chosen for training
 74
 75        row_sample: float
 76            percentage of rows chosen for training, by stratified bootstrapping
 77
 78        seed: int
 79            reproducibility seed for nodes_sim=='uniform'
 80
 81        backend: str
 82            "cpu" or "gpu" or "tpu"
 83
 84    Attributes:
 85
 86        voter_: dict
 87            dictionary containing all the fitted base-learners
 88
 89
 90    Examples:
 91
 92    ```python
 93    import numpy as np
 94    import nnetsauce as ns
 95    from sklearn.datasets import fetch_california_housing
 96    from sklearn.tree import DecisionTreeRegressor
 97    from sklearn.model_selection import train_test_split
 98
 99    X, y = fetch_california_housing(return_X_y=True, as_frame=False)
100
101    # split data into training test and test set
102    X_train, X_test, y_train, y_test = train_test_split(X, y,
103                                                        test_size=0.2, random_state=13)
104
105    # Requires further tuning
106    obj = DecisionTreeRegressor(max_depth=3, random_state=123)
107    obj2 = ns.RandomBagRegressor(obj=obj, direct_link=False,
108                                n_estimators=50,
109                                col_sample=0.9, row_sample=0.9,
110                                dropout=0, n_clusters=0, verbose=1)
111
112    obj2.fit(X_train, y_train)
113
114    print(np.sqrt(obj2.score(X_test, y_test))) # RMSE
115
116    ```
117
118    """
119
120    # construct the object -----
121
122    def __init__(
123        self,
124        obj,
125        n_estimators=10,
126        n_hidden_features=1,
127        activation_name="relu",
128        a=0.01,
129        nodes_sim="sobol",
130        bias=True,
131        dropout=0,
132        direct_link=False,
133        n_clusters=2,
134        cluster_encode=True,
135        type_clust="kmeans",
136        type_scaling=("std", "std", "std"),
137        col_sample=1,
138        row_sample=1,
139        n_jobs=None,
140        seed=123,
141        verbose=1,
142        backend="cpu",
143    ):
144        super().__init__(
145            obj=obj,
146            n_estimators=n_estimators,
147            n_hidden_features=n_hidden_features,
148            activation_name=activation_name,
149            a=a,
150            nodes_sim=nodes_sim,
151            bias=bias,
152            dropout=dropout,
153            direct_link=direct_link,
154            n_clusters=n_clusters,
155            cluster_encode=cluster_encode,
156            type_clust=type_clust,
157            type_scaling=type_scaling,
158            col_sample=col_sample,
159            row_sample=row_sample,
160            seed=seed,
161            backend=backend,
162        )
163
164        self.type_fit = "regression"
165        self.verbose = verbose
166        self.n_jobs = n_jobs
167        self.voter_ = {}
168
169    def fit(self, X, y, **kwargs):
170        """Fit Random 'Bagging' model to training data (X, y).
171
172        Args:
173
174            X: {array-like}, shape = [n_samples, n_features]
175                Training vectors, where n_samples is the number
176                of samples and n_features is the number of features.
177
178            y: array-like, shape = [n_samples]
179                Target values.
180
181            **kwargs: additional parameters to be passed to
182                    self.cook_training_set or self.obj.fit
183
184        Returns:
185
186            self: object
187
188        """
189
190        base_learner = CustomRegressor(
191            self.obj,
192            n_hidden_features=self.n_hidden_features,
193            activation_name=self.activation_name,
194            a=self.a,
195            nodes_sim=self.nodes_sim,
196            bias=self.bias,
197            dropout=self.dropout,
198            direct_link=self.direct_link,
199            n_clusters=self.n_clusters,
200            type_clust=self.type_clust,
201            type_scaling=self.type_scaling,
202            col_sample=self.col_sample,
203            row_sample=self.row_sample,
204            seed=self.seed,
205        )
206
207        # 1 - Sequential training -----
208
209        if self.n_jobs is None:
210            self.voter_ = rbagloop_regression(
211                base_learner, X, y, self.n_estimators, self.verbose, self.seed
212            )
213
214            self.n_estimators = len(self.voter_)
215
216            return self
217
218        # 2 - Parallel training -----
219        # buggy
220        # if self.n_jobs is not None:
221        def fit_estimators(m):
222            base_learner__ = deepcopy(base_learner)
223            base_learner__.set_params(seed=self.seed + m * 1000)
224            base_learner__.fit(X, y, **kwargs)
225            return base_learner__
226
227        if self.verbose == 1:
228            voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")(
229                delayed(fit_estimators)(m) for m in tqdm(range(self.n_estimators))
230            )
231        else:
232            voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")(
233                delayed(fit_estimators)(m) for m in range(self.n_estimators)
234            )
235
236        self.voter_ = {i: elt for i, elt in enumerate(voters_list)}
237
238        self.n_estimators = len(self.voter_)
239
240        return self
241
242    def predict(self, X, weights=None, **kwargs):
243        """Predict for test data X.
244
245        Args:
246
247            X: {array-like}, shape = [n_samples, n_features]
248                Training vectors, where n_samples is the number
249                of samples and n_features is the number of features.
250
251            **kwargs: additional parameters to be passed to
252                    self.cook_test_set
253
254        Returns:
255
256            estimates for test data: {array-like}
257
258        """
259
260        def calculate_preds(voter, weights=None):
261            ensemble_preds = 0
262
263            n_iter = len(voter)
264
265            assert n_iter > 0, "no estimator found in `RandomBag` ensemble"
266
267            if weights is None:
268                for idx, elt in voter.items():
269                    ensemble_preds += elt.predict(X)
270
271                return ensemble_preds / n_iter
272
273            # if weights is not None:
274            for idx, elt in voter.items():
275                ensemble_preds += weights[idx] * elt.predict(X)
276
277            return ensemble_preds
278
279        # end calculate_preds ----
280
281        if weights is None:
282            return calculate_preds(self.voter_)
283
284        # if weights is not None:
285        self.weights = weights
286
287        return calculate_preds(self.voter_, weights)

Randomized 'Bagging' Regression model

Parameters:

obj: object
    any object containing a method fit (obj.fit()) and a method predict
    (obj.predict())

n_estimators: int
    number of boosting iterations

n_hidden_features: int
    number of nodes in the hidden layer

activation_name: str
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'

a: float
    hyperparameter for 'prelu' or 'elu' activation function

nodes_sim: str
    type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
    'uniform'

bias: boolean
    indicates if the hidden layer contains a bias term (True) or not
    (False)

dropout: float
    regularization parameter; (random) percentage of nodes dropped out
    of the training

direct_link: boolean
    indicates if the original predictors are included (True) in model''s
    fitting or not (False)

n_clusters: int
    number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
        no clustering)

cluster_encode: bool
    defines how the variable containing clusters is treated (default is one-hot)
    if `False`, then labels are used, without one-hot encoding

type_clust: str
    type of clustering method: currently k-means ('kmeans') or Gaussian
    Mixture Model ('gmm')

type_scaling: a tuple of 3 strings
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax')

col_sample: float
    percentage of covariates randomly chosen for training

row_sample: float
    percentage of rows chosen for training, by stratified bootstrapping

seed: int
    reproducibility seed for nodes_sim=='uniform'

backend: str
    "cpu" or "gpu" or "tpu"

Attributes:

voter_: dict
    dictionary containing all the fitted base-learners

Examples:

import numpy as np
import nnetsauce as ns
from sklearn.datasets import fetch_california_housing
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split

X, y = fetch_california_housing(return_X_y=True, as_frame=False)

# split data into training test and test set
X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    test_size=0.2, random_state=13)

# Requires further tuning
obj = DecisionTreeRegressor(max_depth=3, random_state=123)
obj2 = ns.RandomBagRegressor(obj=obj, direct_link=False,
                            n_estimators=50,
                            col_sample=0.9, row_sample=0.9,
                            dropout=0, n_clusters=0, verbose=1)

obj2.fit(X_train, y_train)

print(np.sqrt(obj2.score(X_test, y_test))) # RMSE
def fit(self, X, y, **kwargs):
169    def fit(self, X, y, **kwargs):
170        """Fit Random 'Bagging' model to training data (X, y).
171
172        Args:
173
174            X: {array-like}, shape = [n_samples, n_features]
175                Training vectors, where n_samples is the number
176                of samples and n_features is the number of features.
177
178            y: array-like, shape = [n_samples]
179                Target values.
180
181            **kwargs: additional parameters to be passed to
182                    self.cook_training_set or self.obj.fit
183
184        Returns:
185
186            self: object
187
188        """
189
190        base_learner = CustomRegressor(
191            self.obj,
192            n_hidden_features=self.n_hidden_features,
193            activation_name=self.activation_name,
194            a=self.a,
195            nodes_sim=self.nodes_sim,
196            bias=self.bias,
197            dropout=self.dropout,
198            direct_link=self.direct_link,
199            n_clusters=self.n_clusters,
200            type_clust=self.type_clust,
201            type_scaling=self.type_scaling,
202            col_sample=self.col_sample,
203            row_sample=self.row_sample,
204            seed=self.seed,
205        )
206
207        # 1 - Sequential training -----
208
209        if self.n_jobs is None:
210            self.voter_ = rbagloop_regression(
211                base_learner, X, y, self.n_estimators, self.verbose, self.seed
212            )
213
214            self.n_estimators = len(self.voter_)
215
216            return self
217
218        # 2 - Parallel training -----
219        # buggy
220        # if self.n_jobs is not None:
221        def fit_estimators(m):
222            base_learner__ = deepcopy(base_learner)
223            base_learner__.set_params(seed=self.seed + m * 1000)
224            base_learner__.fit(X, y, **kwargs)
225            return base_learner__
226
227        if self.verbose == 1:
228            voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")(
229                delayed(fit_estimators)(m) for m in tqdm(range(self.n_estimators))
230            )
231        else:
232            voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")(
233                delayed(fit_estimators)(m) for m in range(self.n_estimators)
234            )
235
236        self.voter_ = {i: elt for i, elt in enumerate(voters_list)}
237
238        self.n_estimators = len(self.voter_)
239
240        return self

Fit Random 'Bagging' model to training data (X, y).

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

**kwargs: additional parameters to be passed to
        self.cook_training_set or self.obj.fit

Returns:

self: object
def predict(self, X, weights=None, **kwargs):
242    def predict(self, X, weights=None, **kwargs):
243        """Predict for test data X.
244
245        Args:
246
247            X: {array-like}, shape = [n_samples, n_features]
248                Training vectors, where n_samples is the number
249                of samples and n_features is the number of features.
250
251            **kwargs: additional parameters to be passed to
252                    self.cook_test_set
253
254        Returns:
255
256            estimates for test data: {array-like}
257
258        """
259
260        def calculate_preds(voter, weights=None):
261            ensemble_preds = 0
262
263            n_iter = len(voter)
264
265            assert n_iter > 0, "no estimator found in `RandomBag` ensemble"
266
267            if weights is None:
268                for idx, elt in voter.items():
269                    ensemble_preds += elt.predict(X)
270
271                return ensemble_preds / n_iter
272
273            # if weights is not None:
274            for idx, elt in voter.items():
275                ensemble_preds += weights[idx] * elt.predict(X)
276
277            return ensemble_preds
278
279        # end calculate_preds ----
280
281        if weights is None:
282            return calculate_preds(self.voter_)
283
284        # if weights is not None:
285        self.weights = weights
286
287        return calculate_preds(self.voter_, weights)

Predict for test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

estimates for test data: {array-like}
class RandomBagClassifier(nnetsauce.randombag.bag.RandomBag, sklearn.base.ClassifierMixin):
 18class RandomBagClassifier(RandomBag, ClassifierMixin):
 19    """Randomized 'Bagging' Classification model
 20
 21    Parameters:
 22
 23        obj: object
 24            any object containing a method fit (obj.fit()) and a method predict
 25            (obj.predict())
 26
 27        n_estimators: int
 28            number of boosting iterations
 29
 30        n_hidden_features: int
 31            number of nodes in the hidden layer
 32
 33        activation_name: str
 34            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
 35
 36        a: float
 37            hyperparameter for 'prelu' or 'elu' activation function
 38
 39        nodes_sim: str
 40            type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
 41            'uniform'
 42
 43        bias: boolean
 44            indicates if the hidden layer contains a bias term (True) or not
 45            (False)
 46
 47        dropout: float
 48            regularization parameter; (random) percentage of nodes dropped out
 49            of the training
 50
 51        direct_link: boolean
 52            indicates if the original predictors are included (True) in model's
 53            fitting or not (False)
 54
 55        n_clusters: int
 56            number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
 57                no clustering)
 58
 59        cluster_encode: bool
 60            defines how the variable containing clusters is treated (default is one-hot)
 61            if `False`, then labels are used, without one-hot encoding
 62
 63        type_clust: str
 64            type of clustering method: currently k-means ('kmeans') or Gaussian
 65            Mixture Model ('gmm')
 66
 67        type_scaling: a tuple of 3 strings
 68            scaling methods for inputs, hidden layer, and clustering respectively
 69            (and when relevant).
 70            Currently available: standardization ('std') or MinMax scaling ('minmax')
 71
 72        col_sample: float
 73            percentage of covariates randomly chosen for training
 74
 75        row_sample: float
 76            percentage of rows chosen for training, by stratified bootstrapping
 77
 78        seed: int
 79            reproducibility seed for nodes_sim=='uniform'
 80
 81        backend: str
 82            "cpu" or "gpu" or "tpu"
 83
 84    Attributes:
 85
 86        voter_: dict
 87            dictionary containing all the fitted base-learners
 88
 89
 90    Examples:
 91
 92    See also [https://github.com/Techtonique/nnetsauce/blob/master/examples/randombag_classification.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/randombag_classification.py)
 93
 94    ```python
 95    import nnetsauce as ns
 96    from sklearn.datasets import load_breast_cancer
 97    from sklearn.tree import DecisionTreeClassifier
 98    from sklearn.model_selection import train_test_split
 99    from sklearn import metrics
100    from time import time
101
102
103    breast_cancer = load_breast_cancer()
104    Z = breast_cancer.data
105    t = breast_cancer.target
106    np.random.seed(123)
107    X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2)
108
109    # decision tree
110    clf = DecisionTreeClassifier(max_depth=2, random_state=123)
111    fit_obj = ns.RandomBagClassifier(clf, n_hidden_features=2,
112                                    direct_link=True,
113                                    n_estimators=100,
114                                    col_sample=0.9, row_sample=0.9,
115                                    dropout=0.3, n_clusters=0, verbose=1)
116
117    start = time()
118    fit_obj.fit(X_train, y_train)
119    print(f"Elapsed {time() - start}")
120
121    print(fit_obj.score(X_test, y_test))
122    print(fit_obj.score(X_test, y_test, scoring="roc_auc"))
123
124    start = time()
125    preds = fit_obj.predict(X_test)
126    print(f"Elapsed {time() - start}")
127    print(metrics.classification_report(preds, y_test))
128    ```
129
130    """
131
132    # construct the object -----
133    _estimator_type = "classifier"
134
135    def __init__(
136        self,
137        obj,
138        n_estimators=10,
139        n_hidden_features=1,
140        activation_name="relu",
141        a=0.01,
142        nodes_sim="sobol",
143        bias=True,
144        dropout=0,
145        direct_link=False,
146        n_clusters=2,
147        cluster_encode=True,
148        type_clust="kmeans",
149        type_scaling=("std", "std", "std"),
150        col_sample=1,
151        row_sample=1,
152        n_jobs=None,
153        seed=123,
154        verbose=1,
155        backend="cpu",
156    ):
157        super().__init__(
158            obj=obj,
159            n_estimators=n_estimators,
160            n_hidden_features=n_hidden_features,
161            activation_name=activation_name,
162            a=a,
163            nodes_sim=nodes_sim,
164            bias=bias,
165            dropout=dropout,
166            direct_link=direct_link,
167            n_clusters=n_clusters,
168            cluster_encode=cluster_encode,
169            type_clust=type_clust,
170            type_scaling=type_scaling,
171            col_sample=col_sample,
172            row_sample=row_sample,
173            seed=seed,
174            backend=backend,
175        )
176
177        self.type_fit = "classification"
178        self.verbose = verbose
179        self.n_jobs = n_jobs
180        self.voter_ = {}
181
182    def fit(self, X, y, **kwargs):
183        """Fit Random 'Bagging' model to training data (X, y).
184
185        Args:
186
187            X: {array-like}, shape = [n_samples, n_features]
188                Training vectors, where n_samples is the number
189                of samples and n_features is the number of features.
190
191            y: array-like, shape = [n_samples]
192                Target values.
193
194            **kwargs: additional parameters to be passed to
195                    self.cook_training_set or self.obj.fit
196
197        Returns:
198
199            self: object
200
201        """
202
203        assert mx.is_factor(y), "y must contain only integers"
204
205        self.n_classes_ = len(np.unique(y))  # for compatibility with sklearn
206
207        # training
208        self.n_classes = len(np.unique(y))
209
210        base_learner = CustomClassifier(
211            self.obj,
212            n_hidden_features=self.n_hidden_features,
213            activation_name=self.activation_name,
214            a=self.a,
215            nodes_sim=self.nodes_sim,
216            bias=self.bias,
217            dropout=self.dropout,
218            direct_link=self.direct_link,
219            n_clusters=self.n_clusters,
220            type_clust=self.type_clust,
221            type_scaling=self.type_scaling,
222            col_sample=self.col_sample,
223            row_sample=self.row_sample,
224            seed=self.seed,
225            cv_calibration=None
226        )
227
228        # 1 - Sequential training -----
229
230        if self.n_jobs is None:
231            self.voter_ = rbagloop_classification(
232                base_learner, X, y, self.n_estimators, self.verbose, self.seed
233            )
234
235            self.n_estimators = len(self.voter_)
236
237            return self
238
239        # 2 - Parallel training -----
240        # buggy
241        # if self.n_jobs is not None:
242        def fit_estimators(m):
243            base_learner__ = deepcopy(base_learner)
244            base_learner__.set_params(seed=self.seed + m * 1000)
245            base_learner__.fit(X, y, **kwargs)
246            return base_learner__
247
248        if self.verbose == 1:
249            voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")(
250                delayed(fit_estimators)(m) for m in tqdm(range(self.n_estimators))
251            )
252        else:
253            voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")(
254                delayed(fit_estimators)(m) for m in range(self.n_estimators)
255            )
256
257        self.voter_ = {idx: elt for idx, elt in enumerate(voters_list)}
258
259        self.n_estimators = len(self.voter_)
260        self.classes_ = np.unique(y)
261        return self
262
263    def predict(self, X, weights=None, **kwargs):
264        """Predict test data X.
265
266        Args:
267
268            X: {array-like}, shape = [n_samples, n_features]
269                Training vectors, where n_samples is the number
270                of samples and n_features is the number of features.
271
272            **kwargs: additional parameters to be passed to
273                    self.cook_test_set
274
275        Returns:
276
277            model predictions: {array-like}
278
279        """
280        return self.predict_proba(X, weights, **kwargs).argmax(axis=1)
281
282    def predict_proba(self, X, weights=None, **kwargs):
283        """Predict probabilities for test data X.
284
285        Args:
286
287            X: {array-like}, shape = [n_samples, n_features]
288                Training vectors, where n_samples is the number
289                of samples and n_features is the number of features.
290
291            **kwargs: additional parameters to be passed to
292                    self.cook_test_set
293
294        Returns:
295
296            probability estimates for test data: {array-like}
297
298        """
299
300        def calculate_probas(voter, weights=None, verbose=None):
301            ensemble_proba = 0
302
303            n_iter = len(voter)
304
305            assert n_iter > 0, "no estimator found in `RandomBag` ensemble"
306
307            if weights is None:
308                for idx, elt in voter.items():
309                    try:
310                        ensemble_proba += elt.predict_proba(X)
311
312                        # if verbose == 1:
313                        #    pbar.update(idx)
314
315                    except:
316                        continue
317
318                # if verbose == 1:
319                #    pbar.update(n_iter)
320
321                return ensemble_proba / n_iter
322
323            # if weights is not None:
324            for idx, elt in voter.items():
325                ensemble_proba += weights[idx] * elt.predict_proba(X)
326
327                # if verbose == 1:
328                #    pbar.update(idx)
329
330            # if verbose == 1:
331            #    pbar.update(n_iter)
332
333            return ensemble_proba
334
335        # end calculate_probas ----
336
337        if self.n_jobs is None:
338            # if self.verbose == 1:
339            #    pbar = Progbar(self.n_estimators)
340
341            if weights is None:
342                return calculate_probas(self.voter_, verbose=self.verbose)
343
344            # if weights is not None:
345            self.weights = weights
346
347            return calculate_probas(self.voter_, weights, verbose=self.verbose)
348
349        # if self.n_jobs is not None:
350        def predict_estimator(m):
351            try:
352                return self.voter_[m].predict_proba(X)
353            except:
354                pass
355
356        if self.verbose == 1:
357            preds = Parallel(n_jobs=self.n_jobs, prefer="threads")(
358                delayed(predict_estimator)(m) for m in tqdm(range(self.n_estimators))
359            )
360
361        else:
362            preds = Parallel(n_jobs=self.n_jobs, prefer="threads")(
363                delayed(predict_estimator)(m) for m in range(self.n_estimators)
364            )
365
366        ensemble_proba = 0
367
368        if weights is None:
369            for i in range(self.n_estimators):
370                ensemble_proba += preds[i]
371
372            return ensemble_proba / self.n_estimators
373
374        for i in range(self.n_estimators):
375            ensemble_proba += weights[i] * preds[i]
376
377        return ensemble_proba
378
379
380    @property
381    def _estimator_type(self):
382        return "classifier"            

Randomized 'Bagging' Classification model

Parameters:

obj: object
    any object containing a method fit (obj.fit()) and a method predict
    (obj.predict())

n_estimators: int
    number of boosting iterations

n_hidden_features: int
    number of nodes in the hidden layer

activation_name: str
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'

a: float
    hyperparameter for 'prelu' or 'elu' activation function

nodes_sim: str
    type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
    'uniform'

bias: boolean
    indicates if the hidden layer contains a bias term (True) or not
    (False)

dropout: float
    regularization parameter; (random) percentage of nodes dropped out
    of the training

direct_link: boolean
    indicates if the original predictors are included (True) in model's
    fitting or not (False)

n_clusters: int
    number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
        no clustering)

cluster_encode: bool
    defines how the variable containing clusters is treated (default is one-hot)
    if `False`, then labels are used, without one-hot encoding

type_clust: str
    type of clustering method: currently k-means ('kmeans') or Gaussian
    Mixture Model ('gmm')

type_scaling: a tuple of 3 strings
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax')

col_sample: float
    percentage of covariates randomly chosen for training

row_sample: float
    percentage of rows chosen for training, by stratified bootstrapping

seed: int
    reproducibility seed for nodes_sim=='uniform'

backend: str
    "cpu" or "gpu" or "tpu"

Attributes:

voter_: dict
    dictionary containing all the fitted base-learners

Examples:

See also https://github.com/Techtonique/nnetsauce/blob/master/examples/randombag_classification.py

import nnetsauce as ns
from sklearn.datasets import load_breast_cancer
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn import metrics
from time import time


breast_cancer = load_breast_cancer()
Z = breast_cancer.data
t = breast_cancer.target
np.random.seed(123)
X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2)

# decision tree
clf = DecisionTreeClassifier(max_depth=2, random_state=123)
fit_obj = ns.RandomBagClassifier(clf, n_hidden_features=2,
                                direct_link=True,
                                n_estimators=100,
                                col_sample=0.9, row_sample=0.9,
                                dropout=0.3, n_clusters=0, verbose=1)

start = time()
fit_obj.fit(X_train, y_train)
print(f"Elapsed {time() - start}")

print(fit_obj.score(X_test, y_test))
print(fit_obj.score(X_test, y_test, scoring="roc_auc"))

start = time()
preds = fit_obj.predict(X_test)
print(f"Elapsed {time() - start}")
print(metrics.classification_report(preds, y_test))
def fit(self, X, y, **kwargs):
182    def fit(self, X, y, **kwargs):
183        """Fit Random 'Bagging' model to training data (X, y).
184
185        Args:
186
187            X: {array-like}, shape = [n_samples, n_features]
188                Training vectors, where n_samples is the number
189                of samples and n_features is the number of features.
190
191            y: array-like, shape = [n_samples]
192                Target values.
193
194            **kwargs: additional parameters to be passed to
195                    self.cook_training_set or self.obj.fit
196
197        Returns:
198
199            self: object
200
201        """
202
203        assert mx.is_factor(y), "y must contain only integers"
204
205        self.n_classes_ = len(np.unique(y))  # for compatibility with sklearn
206
207        # training
208        self.n_classes = len(np.unique(y))
209
210        base_learner = CustomClassifier(
211            self.obj,
212            n_hidden_features=self.n_hidden_features,
213            activation_name=self.activation_name,
214            a=self.a,
215            nodes_sim=self.nodes_sim,
216            bias=self.bias,
217            dropout=self.dropout,
218            direct_link=self.direct_link,
219            n_clusters=self.n_clusters,
220            type_clust=self.type_clust,
221            type_scaling=self.type_scaling,
222            col_sample=self.col_sample,
223            row_sample=self.row_sample,
224            seed=self.seed,
225            cv_calibration=None
226        )
227
228        # 1 - Sequential training -----
229
230        if self.n_jobs is None:
231            self.voter_ = rbagloop_classification(
232                base_learner, X, y, self.n_estimators, self.verbose, self.seed
233            )
234
235            self.n_estimators = len(self.voter_)
236
237            return self
238
239        # 2 - Parallel training -----
240        # buggy
241        # if self.n_jobs is not None:
242        def fit_estimators(m):
243            base_learner__ = deepcopy(base_learner)
244            base_learner__.set_params(seed=self.seed + m * 1000)
245            base_learner__.fit(X, y, **kwargs)
246            return base_learner__
247
248        if self.verbose == 1:
249            voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")(
250                delayed(fit_estimators)(m) for m in tqdm(range(self.n_estimators))
251            )
252        else:
253            voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")(
254                delayed(fit_estimators)(m) for m in range(self.n_estimators)
255            )
256
257        self.voter_ = {idx: elt for idx, elt in enumerate(voters_list)}
258
259        self.n_estimators = len(self.voter_)
260        self.classes_ = np.unique(y)
261        return self

Fit Random 'Bagging' model to training data (X, y).

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

**kwargs: additional parameters to be passed to
        self.cook_training_set or self.obj.fit

Returns:

self: object
def predict(self, X, weights=None, **kwargs):
263    def predict(self, X, weights=None, **kwargs):
264        """Predict test data X.
265
266        Args:
267
268            X: {array-like}, shape = [n_samples, n_features]
269                Training vectors, where n_samples is the number
270                of samples and n_features is the number of features.
271
272            **kwargs: additional parameters to be passed to
273                    self.cook_test_set
274
275        Returns:
276
277            model predictions: {array-like}
278
279        """
280        return self.predict_proba(X, weights, **kwargs).argmax(axis=1)

Predict test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

model predictions: {array-like}
def predict_proba(self, X, weights=None, **kwargs):
282    def predict_proba(self, X, weights=None, **kwargs):
283        """Predict probabilities for test data X.
284
285        Args:
286
287            X: {array-like}, shape = [n_samples, n_features]
288                Training vectors, where n_samples is the number
289                of samples and n_features is the number of features.
290
291            **kwargs: additional parameters to be passed to
292                    self.cook_test_set
293
294        Returns:
295
296            probability estimates for test data: {array-like}
297
298        """
299
300        def calculate_probas(voter, weights=None, verbose=None):
301            ensemble_proba = 0
302
303            n_iter = len(voter)
304
305            assert n_iter > 0, "no estimator found in `RandomBag` ensemble"
306
307            if weights is None:
308                for idx, elt in voter.items():
309                    try:
310                        ensemble_proba += elt.predict_proba(X)
311
312                        # if verbose == 1:
313                        #    pbar.update(idx)
314
315                    except:
316                        continue
317
318                # if verbose == 1:
319                #    pbar.update(n_iter)
320
321                return ensemble_proba / n_iter
322
323            # if weights is not None:
324            for idx, elt in voter.items():
325                ensemble_proba += weights[idx] * elt.predict_proba(X)
326
327                # if verbose == 1:
328                #    pbar.update(idx)
329
330            # if verbose == 1:
331            #    pbar.update(n_iter)
332
333            return ensemble_proba
334
335        # end calculate_probas ----
336
337        if self.n_jobs is None:
338            # if self.verbose == 1:
339            #    pbar = Progbar(self.n_estimators)
340
341            if weights is None:
342                return calculate_probas(self.voter_, verbose=self.verbose)
343
344            # if weights is not None:
345            self.weights = weights
346
347            return calculate_probas(self.voter_, weights, verbose=self.verbose)
348
349        # if self.n_jobs is not None:
350        def predict_estimator(m):
351            try:
352                return self.voter_[m].predict_proba(X)
353            except:
354                pass
355
356        if self.verbose == 1:
357            preds = Parallel(n_jobs=self.n_jobs, prefer="threads")(
358                delayed(predict_estimator)(m) for m in tqdm(range(self.n_estimators))
359            )
360
361        else:
362            preds = Parallel(n_jobs=self.n_jobs, prefer="threads")(
363                delayed(predict_estimator)(m) for m in range(self.n_estimators)
364            )
365
366        ensemble_proba = 0
367
368        if weights is None:
369            for i in range(self.n_estimators):
370                ensemble_proba += preds[i]
371
372            return ensemble_proba / self.n_estimators
373
374        for i in range(self.n_estimators):
375            ensemble_proba += weights[i] * preds[i]
376
377        return ensemble_proba

Predict probabilities for test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

probability estimates for test data: {array-like}
class RegressorUpdater(sklearn.base.BaseEstimator, sklearn.base.RegressorMixin):
 16class RegressorUpdater(BaseEstimator, RegressorMixin):
 17    """
 18    Update a regression model with new observations
 19
 20    Parameters
 21    ----------
 22    regr: object
 23        A regression model with a coef_ attribute
 24    alpha: float
 25        Updating factor's exponent
 26
 27    Attributes
 28    ----------
 29    n_obs_: int
 30        Number of observations
 31    coef_: np.ndarray
 32        Coefficients of the model
 33    updating_factor_: float
 34        Updating factor
 35
 36    """
 37
 38    def __init__(self, regr, alpha=0.5):
 39        self.regr = regr
 40        self.alpha = alpha
 41        self.n_obs_ = None
 42        self.coef_ = None
 43        self.updating_factor_ = None
 44        try:
 45            self.coef_ = self.regr.coef_
 46            if isinstance(self.regr, Base):
 47                self.n_obs_ = self.regr.scaler_.n_samples_seen_
 48        except AttributeError:
 49            pass
 50
 51    def fit(self, X, y, **kwargs):
 52
 53        if isinstance(self.regr, CustomRegressor):  # nnetsauce model not deep ---
 54            if check_is_fitted(self.regr) == False:
 55                self.regr.fit(X, y, **kwargs)
 56                self.n_obs_ = X.shape[0]
 57                if hasattr(self.regr, "coef_"):
 58                    self.coef_ = self.regr.coef_
 59                return self
 60            self.n_obs_ = self.regr.scaler_.n_samples_seen_
 61            if hasattr(self.regr, "coef_"):
 62                self.coef_ = self.regr.coef_
 63            return self
 64
 65        if (
 66            hasattr(self.regr, "coef_") == False
 67        ):  # sklearn model or CustomRegressor model ---
 68            self.regr.fit(X, y)
 69            self.n_obs_ = X.shape[0]
 70            self.regr.fit(X, y)
 71            if hasattr(self.regr, "stacked_obj"):
 72                self.coef_ = self.regr.stacked_obj.coef_
 73            else:
 74                self.coef_ = self.regr.coef_
 75            return self
 76        self.n_obs_ = X.shape[0]
 77        if hasattr(self.regr, "coef_"):
 78            self.coef_ = self.regr.coef_
 79        return self
 80
 81    def predict(self, X):
 82        # assert hasattr(self.regr, "coef_"), "model must have coef_ attribute"
 83        return self.regr.predict(X)
 84
 85    def partial_fit(self, X, y):
 86
 87        assert hasattr(
 88            self.regr, "coef_"
 89        ), "model must be fitted first (i.e have 'coef_' attribute)"
 90        assert (
 91            self.n_obs_ is not None
 92        ), "model must be fitted first (i.e have 'n_obs_' attribute)"
 93
 94        if len(X.shape) == 1:
 95            X = X.reshape(1, -1)
 96
 97        assert X.shape[0] == 1, "X must have one row"
 98
 99        self.updating_factor_ = self.n_obs_ ** (-self.alpha)
100
101        if isinstance(self.regr, Base):  # nnetsauce model ---
102
103            newX = deepcopy(X)
104
105            if isinstance(
106                self.regr, CustomRegressor
107            ):  # other nnetsauce model (CustomRegressor) ---
108                newX = self.regr.cook_test_set(X=X)
109                if isinstance(X, pd.DataFrame):
110                    newx = newX.values.ravel()
111                else:
112                    newx = newX.ravel()
113
114        else:  # an sklearn model ---
115
116            if isinstance(X, pd.DataFrame):
117                newx = X.values.ravel()
118            else:
119                newx = X.ravel()
120
121        new_coef = self.regr.coef_ + self.updating_factor_ * np.dot(
122            newx, y - np.dot(newx, self.regr.coef_)
123        )
124        self.regr.coef_ = _update_mean(self.regr.coef_, self.n_obs_, new_coef)
125        self.coef_ = deepcopy(self.regr.coef_)
126        self.n_obs_ += 1
127        return self

Update a regression model with new observations

Parameters

regr: object A regression model with a coef_ attribute alpha: float Updating factor's exponent

Attributes

n_obs_: int Number of observations coef_: np.ndarray Coefficients of the model updating_factor_: float Updating factor

def fit(self, X, y, **kwargs):
51    def fit(self, X, y, **kwargs):
52
53        if isinstance(self.regr, CustomRegressor):  # nnetsauce model not deep ---
54            if check_is_fitted(self.regr) == False:
55                self.regr.fit(X, y, **kwargs)
56                self.n_obs_ = X.shape[0]
57                if hasattr(self.regr, "coef_"):
58                    self.coef_ = self.regr.coef_
59                return self
60            self.n_obs_ = self.regr.scaler_.n_samples_seen_
61            if hasattr(self.regr, "coef_"):
62                self.coef_ = self.regr.coef_
63            return self
64
65        if (
66            hasattr(self.regr, "coef_") == False
67        ):  # sklearn model or CustomRegressor model ---
68            self.regr.fit(X, y)
69            self.n_obs_ = X.shape[0]
70            self.regr.fit(X, y)
71            if hasattr(self.regr, "stacked_obj"):
72                self.coef_ = self.regr.stacked_obj.coef_
73            else:
74                self.coef_ = self.regr.coef_
75            return self
76        self.n_obs_ = X.shape[0]
77        if hasattr(self.regr, "coef_"):
78            self.coef_ = self.regr.coef_
79        return self
def predict(self, X):
81    def predict(self, X):
82        # assert hasattr(self.regr, "coef_"), "model must have coef_ attribute"
83        return self.regr.predict(X)
class ClassifierUpdater(sklearn.base.BaseEstimator, sklearn.base.ClassifierMixin):
 16class ClassifierUpdater(BaseEstimator, ClassifierMixin):
 17    """
 18    Update a regression model with new observations
 19
 20    Parameters
 21    ----------
 22    clf: object
 23        A regression model with a coef_ attribute
 24    alpha: float
 25        Updating factor's exponent
 26
 27    Attributes
 28    ----------
 29    n_obs_: int
 30        Number of observations
 31    coef_: np.ndarray
 32        Coefficients of the model
 33    updating_factor_: float
 34        Updating factor
 35
 36    """
 37    _estimator_type = "classifier"
 38
 39    def __init__(self, clf, alpha=0.5):
 40        self.clf = clf
 41        self.alpha = alpha
 42        self.n_obs_ = None
 43        self.coef_ = None
 44        self.updating_factor_ = None
 45        try:
 46            self.coef_ = self.clf.coef_
 47            if isinstance(self.clf, Base):
 48                self.n_obs_ = self.clf.scaler_.n_samples_seen_
 49        except AttributeError:
 50            pass
 51
 52    def fit(self, X, y, **kwargs):
 53
 54        raise NotImplementedError("fit method is not implemented for ClassifierUpdater")
 55
 56        if isinstance(self.clf, CustomClassifier):  # nnetsauce model not deep ---
 57            if check_is_fitted(self.clf) == False:
 58                self.clf.fit(X, y, **kwargs)
 59                self.n_obs_ = X.shape[0]
 60                if hasattr(self.clf, "coef_"):
 61                    self.coef_ = self.clf.coef_
 62                return self
 63            self.n_obs_ = self.clf.scaler_.n_samples_seen_
 64            if hasattr(self.clf, "coef_"):
 65                self.coef_ = self.clf.coef_
 66            return self
 67
 68        if (
 69            hasattr(self.clf, "coef_") == False
 70        ):  # sklearn model or CustomClassifier model ---
 71            self.clf.fit(X, y)
 72            self.n_obs_ = X.shape[0]
 73            self.clf.fit(X, y)
 74            if hasattr(self.clf, "stacked_obj"):
 75                self.coef_ = self.clf.stacked_obj.coef_
 76            else:
 77                self.coef_ = self.clf.coef_
 78            return self
 79        self.n_obs_ = X.shape[0]
 80        if hasattr(self.clf, "coef_"):
 81            self.coef_ = self.clf.coef_
 82        return self
 83
 84    def predict(self, X):
 85
 86        raise NotImplementedError(
 87            "predict method is not implemented for ClassifierUpdater"
 88        )
 89        # assert hasattr(self.clf, "coef_"), "model must have coef_ attribute"
 90        return self.clf.predict(X)
 91
 92    def partial_fit(self, X, y):
 93
 94        raise NotImplementedError(
 95            "partial_fit method is not implemented for ClassifierUpdater"
 96        )
 97
 98        assert hasattr(
 99            self.clf, "coef_"
100        ), "model must be fitted first (i.e have 'coef_' attribute)"
101        assert (
102            self.n_obs_ is not None
103        ), "model must be fitted first (i.e have 'n_obs_' attribute)"
104
105        if len(X.shape) == 1:
106            X = X.reshape(1, -1)
107
108        assert X.shape[0] == 1, "X must have one row"
109
110        self.updating_factor_ = self.n_obs_ ** (-self.alpha)
111
112        if isinstance(self.clf, Base):  # nnetsauce model ---
113
114            newX = deepcopy(X)
115
116            if isinstance(
117                self.clf, CustomClassifier
118            ):  # other nnetsauce model (CustomClassifier) ---
119                newX = self.clf.cook_test_set(X=X)
120                if isinstance(X, pd.DataFrame):
121                    newx = newX.values.ravel()
122                else:
123                    newx = newX.ravel()
124
125        else:  # an sklearn model ---
126
127            if isinstance(X, pd.DataFrame):
128                newx = X.values.ravel()
129            else:
130                newx = X.ravel()
131
132        new_coef = self.clf.coef_ + self.updating_factor_ * np.dot(
133            newx, y - np.dot(newx, self.clf.coef_)
134        )
135        self.clf.coef_ = _update_mean(self.clf.coef_, self.n_obs_, new_coef)
136        self.coef_ = deepcopy(self.clf.coef_)
137        self.n_obs_ += 1
138        return self

Update a regression model with new observations

Parameters

clf: object A regression model with a coef_ attribute alpha: float Updating factor's exponent

Attributes

n_obs_: int Number of observations coef_: np.ndarray Coefficients of the model updating_factor_: float Updating factor

def fit(self, X, y, **kwargs):
52    def fit(self, X, y, **kwargs):
53
54        raise NotImplementedError("fit method is not implemented for ClassifierUpdater")
55
56        if isinstance(self.clf, CustomClassifier):  # nnetsauce model not deep ---
57            if check_is_fitted(self.clf) == False:
58                self.clf.fit(X, y, **kwargs)
59                self.n_obs_ = X.shape[0]
60                if hasattr(self.clf, "coef_"):
61                    self.coef_ = self.clf.coef_
62                return self
63            self.n_obs_ = self.clf.scaler_.n_samples_seen_
64            if hasattr(self.clf, "coef_"):
65                self.coef_ = self.clf.coef_
66            return self
67
68        if (
69            hasattr(self.clf, "coef_") == False
70        ):  # sklearn model or CustomClassifier model ---
71            self.clf.fit(X, y)
72            self.n_obs_ = X.shape[0]
73            self.clf.fit(X, y)
74            if hasattr(self.clf, "stacked_obj"):
75                self.coef_ = self.clf.stacked_obj.coef_
76            else:
77                self.coef_ = self.clf.coef_
78            return self
79        self.n_obs_ = X.shape[0]
80        if hasattr(self.clf, "coef_"):
81            self.coef_ = self.clf.coef_
82        return self
def predict(self, X):
84    def predict(self, X):
85
86        raise NotImplementedError(
87            "predict method is not implemented for ClassifierUpdater"
88        )
89        # assert hasattr(self.clf, "coef_"), "model must have coef_ attribute"
90        return self.clf.predict(X)
class RidgeRegressor(sklearn.base.BaseEstimator, sklearn.base.RegressorMixin):
 24class RidgeRegressor(BaseEstimator, RegressorMixin):
 25    """Ridge.
 26
 27    Attributes:
 28
 29        reg_lambda: float
 30            regularization parameter.
 31
 32        backend: str
 33            type of backend; must be in ('cpu', 'gpu', 'tpu')
 34
 35    """
 36    def __init__(self, reg_lambda=0.1, backend="cpu"):
 37        assert backend in (
 38            "cpu",
 39            "gpu",
 40            "tpu",
 41        ), "`backend` must be in ('cpu', 'gpu', 'tpu')"
 42
 43        sys_platform = platform.system()
 44
 45        if (sys_platform == "Windows") and (backend in ("gpu", "tpu")):
 46            warnings.warn(
 47                "No GPU/TPU computing on Windows yet, backend set to 'cpu'"
 48            )
 49            backend = "cpu"
 50
 51        self.reg_lambda = reg_lambda
 52        self.backend = backend
 53        self.coef_ = None 
 54
 55    def fit(self, X, y, **kwargs):
 56        """Fit matrixops (classifier) to training data (X, y)
 57
 58        Args:
 59
 60            X: {array-like}, shape = [n_samples, n_features]
 61                Training vectors, where n_samples is the number
 62                of samples and n_features is the number of features.
 63
 64            y: array-like, shape = [n_samples]
 65                Target values.
 66
 67            **kwargs: additional parameters to be passed to self.cook_training_set.
 68
 69        Returns:
 70
 71            self: object.
 72
 73        """
 74        self.ym, centered_y = mo.center_response(y)
 75        self.xm = X.mean(axis=0)
 76        self.xsd = X.std(axis=0)
 77        self.xsd[self.xsd == 0] = 1  # avoid division by zero
 78        X_ = (X - self.xm[None, :]) / self.xsd[None, :]
 79
 80        if self.backend == "cpu":
 81            if len(centered_y.shape) <= 1:
 82                eye_term = np.sqrt(self.reg_lambda) * np.eye(X.shape[1])
 83                X_ = np.row_stack((X_, eye_term))
 84                y_ = np.concatenate((centered_y, np.zeros(X.shape[1])))
 85                beta_info = get_beta(X_, y_)
 86                self.coef_ = beta_info[0]
 87            else:
 88                try:
 89                    eye_term = np.sqrt(self.reg_lambda) * np.eye(X.shape[1])
 90                    X_ = np.row_stack((X_, eye_term))
 91                    y_ = np.row_stack(
 92                        (
 93                            centered_y,
 94                            np.zeros((eye_term.shape[0], centered_y.shape[1])),
 95                        )
 96                    )
 97                    beta_info = get_beta(X_, y_)
 98                    self.coef_ = beta_info[0]
 99                except Exception:
100                    x = inv(
101                        mo.crossprod(X_) + self.reg_lambda * np.eye(X_.shape[1])
102                    )
103                    hat_matrix = mo.tcrossprod(x, X_)
104                    self.coef_ = mo.safe_sparse_dot(hat_matrix, centered_y)
105            return self
106
107        x = jinv(mo.crossprod(X_, backend=self.backend)
108            + self.reg_lambda * jnp.eye(X_.shape[1]))
109
110        hat_matrix = mo.tcrossprod(x, X_, backend=self.backend)
111        self.coef_ = mo.safe_sparse_dot(hat_matrix, centered_y, 
112            backend=self.backend)
113        return self
114
115    def predict(self, X, **kwargs):
116        """Predict test data X.
117
118        Args:
119
120            X: {array-like}, shape = [n_samples, n_features]
121                Training vectors, where n_samples is the number
122                of samples and n_features is the number of features.
123
124            **kwargs: additional parameters to be passed to `predict_proba`
125
126        Returns:
127
128            model predictions: {array-like}
129
130        """
131        X_ = (X - self.xm[None, :]) / self.xsd[None, :]
132
133        if self.backend == "cpu":
134            if isinstance(self.ym, float):
135                return self.ym + mo.safe_sparse_dot(X_, self.coef_)
136            return self.ym[None, :] + mo.safe_sparse_dot(X_, self.coef_)
137
138        # if self.backend in ("gpu", "tpu"):
139        if isinstance(self.ym, float):
140            return self.ym + mo.safe_sparse_dot(
141                X_, self.coef_, backend=self.backend
142            )
143        return self.ym[None, :] + mo.safe_sparse_dot(
144            X_, self.coef_, backend=self.backend
145        )

Ridge.

Attributes:

reg_lambda: float
    regularization parameter.

backend: str
    type of backend; must be in ('cpu', 'gpu', 'tpu')
def fit(self, X, y, **kwargs):
 55    def fit(self, X, y, **kwargs):
 56        """Fit matrixops (classifier) to training data (X, y)
 57
 58        Args:
 59
 60            X: {array-like}, shape = [n_samples, n_features]
 61                Training vectors, where n_samples is the number
 62                of samples and n_features is the number of features.
 63
 64            y: array-like, shape = [n_samples]
 65                Target values.
 66
 67            **kwargs: additional parameters to be passed to self.cook_training_set.
 68
 69        Returns:
 70
 71            self: object.
 72
 73        """
 74        self.ym, centered_y = mo.center_response(y)
 75        self.xm = X.mean(axis=0)
 76        self.xsd = X.std(axis=0)
 77        self.xsd[self.xsd == 0] = 1  # avoid division by zero
 78        X_ = (X - self.xm[None, :]) / self.xsd[None, :]
 79
 80        if self.backend == "cpu":
 81            if len(centered_y.shape) <= 1:
 82                eye_term = np.sqrt(self.reg_lambda) * np.eye(X.shape[1])
 83                X_ = np.row_stack((X_, eye_term))
 84                y_ = np.concatenate((centered_y, np.zeros(X.shape[1])))
 85                beta_info = get_beta(X_, y_)
 86                self.coef_ = beta_info[0]
 87            else:
 88                try:
 89                    eye_term = np.sqrt(self.reg_lambda) * np.eye(X.shape[1])
 90                    X_ = np.row_stack((X_, eye_term))
 91                    y_ = np.row_stack(
 92                        (
 93                            centered_y,
 94                            np.zeros((eye_term.shape[0], centered_y.shape[1])),
 95                        )
 96                    )
 97                    beta_info = get_beta(X_, y_)
 98                    self.coef_ = beta_info[0]
 99                except Exception:
100                    x = inv(
101                        mo.crossprod(X_) + self.reg_lambda * np.eye(X_.shape[1])
102                    )
103                    hat_matrix = mo.tcrossprod(x, X_)
104                    self.coef_ = mo.safe_sparse_dot(hat_matrix, centered_y)
105            return self
106
107        x = jinv(mo.crossprod(X_, backend=self.backend)
108            + self.reg_lambda * jnp.eye(X_.shape[1]))
109
110        hat_matrix = mo.tcrossprod(x, X_, backend=self.backend)
111        self.coef_ = mo.safe_sparse_dot(hat_matrix, centered_y, 
112            backend=self.backend)
113        return self

Fit matrixops (classifier) to training data (X, y)

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

**kwargs: additional parameters to be passed to self.cook_training_set.

Returns:

self: object.
def predict(self, X, **kwargs):
115    def predict(self, X, **kwargs):
116        """Predict test data X.
117
118        Args:
119
120            X: {array-like}, shape = [n_samples, n_features]
121                Training vectors, where n_samples is the number
122                of samples and n_features is the number of features.
123
124            **kwargs: additional parameters to be passed to `predict_proba`
125
126        Returns:
127
128            model predictions: {array-like}
129
130        """
131        X_ = (X - self.xm[None, :]) / self.xsd[None, :]
132
133        if self.backend == "cpu":
134            if isinstance(self.ym, float):
135                return self.ym + mo.safe_sparse_dot(X_, self.coef_)
136            return self.ym[None, :] + mo.safe_sparse_dot(X_, self.coef_)
137
138        # if self.backend in ("gpu", "tpu"):
139        if isinstance(self.ym, float):
140            return self.ym + mo.safe_sparse_dot(
141                X_, self.coef_, backend=self.backend
142            )
143        return self.ym[None, :] + mo.safe_sparse_dot(
144            X_, self.coef_, backend=self.backend
145        )

Predict test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to `predict_proba`

Returns:

model predictions: {array-like}
class Ridge2Regressor(nnetsauce.ridge2.ridge2.Ridge2, sklearn.base.RegressorMixin):
 23class Ridge2Regressor(Ridge2, RegressorMixin):
 24    """Ridge regression with 2 regularization parameters derived from class Ridge
 25
 26    Parameters:
 27
 28        n_hidden_features: int
 29            number of nodes in the hidden layer
 30
 31        activation_name: str
 32            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
 33
 34        a: float
 35            hyperparameter for 'prelu' or 'elu' activation function
 36
 37        nodes_sim: str
 38            type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
 39            'uniform'
 40
 41        bias: boolean
 42            indicates if the hidden layer contains a bias term (True) or not
 43            (False)
 44
 45        dropout: float
 46            regularization parameter; (random) percentage of nodes dropped out
 47            of the training
 48
 49        n_clusters: int
 50            number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
 51                no clustering)
 52
 53        cluster_encode: bool
 54            defines how the variable containing clusters is treated (default is one-hot)
 55            if `False`, then labels are used, without one-hot encoding
 56
 57        type_clust: str
 58            type of clustering method: currently k-means ('kmeans') or Gaussian
 59            Mixture Model ('gmm')
 60
 61        type_scaling: a tuple of 3 strings
 62            scaling methods for inputs, hidden layer, and clustering respectively
 63            (and when relevant).
 64            Currently available: standardization ('std') or MinMax scaling ('minmax')
 65
 66        lambda1: float
 67            regularization parameter on direct link
 68
 69        lambda2: float
 70            regularization parameter on hidden layer
 71
 72        seed: int
 73            reproducibility seed for nodes_sim=='uniform'
 74
 75        backend: str
 76            'cpu' or 'gpu' or 'tpu'
 77
 78    Attributes:
 79
 80        beta_: {array-like}
 81            regression coefficients
 82
 83        y_mean_: float
 84            average response
 85
 86    """
 87
 88    # construct the object -----
 89
 90    def __init__(
 91        self,
 92        n_hidden_features=5,
 93        activation_name="relu",
 94        a=0.01,
 95        nodes_sim="sobol",
 96        bias=True,
 97        dropout=0,
 98        n_clusters=2,
 99        cluster_encode=True,
100        type_clust="kmeans",
101        type_scaling=("std", "std", "std"),
102        lambda1=0.1,
103        lambda2=0.1,
104        seed=123,
105        backend="cpu",
106    ):
107        super().__init__(
108            n_hidden_features=n_hidden_features,
109            activation_name=activation_name,
110            a=a,
111            nodes_sim=nodes_sim,
112            bias=bias,
113            dropout=dropout,
114            n_clusters=n_clusters,
115            cluster_encode=cluster_encode,
116            type_clust=type_clust,
117            type_scaling=type_scaling,
118            lambda1=lambda1,
119            lambda2=lambda2,
120            seed=seed,
121            backend=backend,
122        )
123
124        self.type_fit = "regression"
125
126    def fit(self, X, y, **kwargs):
127        """Fit Ridge model to training data (X, y).
128
129        Args:
130
131            X: {array-like}, shape = [n_samples, n_features]
132                Training vectors, where n_samples is the number
133                of samples and n_features is the number of features.
134
135            y: array-like, shape = [n_samples]
136                Target values.
137
138            **kwargs: additional parameters to be passed to
139                    self.cook_training_set or self.obj.fit
140
141        Returns:
142
143            self: object
144
145        """
146
147        sys_platform = platform.system()
148
149        centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
150
151        n_X, p_X = X.shape
152        n_Z, p_Z = scaled_Z.shape
153
154        if self.n_clusters > 0:
155            if self.encode_clusters == True:
156                n_features = p_X + self.n_clusters
157            else:
158                n_features = p_X + 1
159        else:
160            n_features = p_X
161
162        X_ = scaled_Z[:, 0:n_features]
163        Phi_X_ = scaled_Z[:, n_features:p_Z]
164
165        B = mo.crossprod(x=X_, backend=self.backend) + self.lambda1 * np.diag(
166            np.repeat(1, n_features)
167        )
168        C = mo.crossprod(x=Phi_X_, y=X_, backend=self.backend)
169        D = mo.crossprod(x=Phi_X_, backend=self.backend) + self.lambda2 * np.diag(
170            np.repeat(1, Phi_X_.shape[1])
171        )
172
173        if sys_platform in ("Linux", "Darwin"):
174            B_inv = pinv(B) if self.backend == "cpu" else jpinv(B)
175        else:
176            B_inv = pinv(B)
177
178        W = mo.safe_sparse_dot(a=C, b=B_inv, backend=self.backend)
179        S_mat = D - mo.tcrossprod(x=W, y=C, backend=self.backend)
180
181        if sys_platform in ("Linux", "Darwin"):
182            S_inv = pinv(S_mat) if self.backend == "cpu" else jpinv(S_mat)
183        else:
184            S_inv = pinv(S_mat)
185
186        Y = mo.safe_sparse_dot(a=S_inv, b=W, backend=self.backend)
187        inv = mo.rbind(
188            mo.cbind(
189                x=B_inv + mo.crossprod(x=W, y=Y, backend=self.backend),
190                y=-np.transpose(Y),
191                backend=self.backend,
192            ),
193            mo.cbind(x=-Y, y=S_inv, backend=self.backend),
194            backend=self.backend,
195        )
196
197        self.beta_ = mo.safe_sparse_dot(
198            a=inv,
199            b=mo.crossprod(x=scaled_Z, y=centered_y, backend=self.backend),
200            backend=self.backend,
201        )
202
203        return self
204
205    def predict(self, X, **kwargs):
206        """Predict test data X.
207
208        Args:
209
210            X: {array-like}, shape = [n_samples, n_features]
211                Training vectors, where n_samples is the number
212                of samples and n_features is the number of features.
213
214            **kwargs: additional parameters to be passed to
215                    self.cook_test_set
216
217        Returns:
218
219            model predictions: {array-like}
220
221        """
222
223        if len(X.shape) == 1:
224            n_features = X.shape[0]
225            new_X = mo.rbind(
226                x=X.reshape(1, n_features),
227                y=np.ones(n_features).reshape(1, n_features),
228                backend=self.backend,
229            )
230
231            return (
232                self.y_mean_
233                + mo.safe_sparse_dot(
234                    a=self.cook_test_set(new_X, **kwargs),
235                    b=self.beta_,
236                    backend=self.backend,
237                )
238            )[0]
239
240        return self.y_mean_ + mo.safe_sparse_dot(
241            a=self.cook_test_set(X, **kwargs),
242            b=self.beta_,
243            backend=self.backend,
244        )

Ridge regression with 2 regularization parameters derived from class Ridge

Parameters:

n_hidden_features: int
    number of nodes in the hidden layer

activation_name: str
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'

a: float
    hyperparameter for 'prelu' or 'elu' activation function

nodes_sim: str
    type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
    'uniform'

bias: boolean
    indicates if the hidden layer contains a bias term (True) or not
    (False)

dropout: float
    regularization parameter; (random) percentage of nodes dropped out
    of the training

n_clusters: int
    number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
        no clustering)

cluster_encode: bool
    defines how the variable containing clusters is treated (default is one-hot)
    if `False`, then labels are used, without one-hot encoding

type_clust: str
    type of clustering method: currently k-means ('kmeans') or Gaussian
    Mixture Model ('gmm')

type_scaling: a tuple of 3 strings
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax')

lambda1: float
    regularization parameter on direct link

lambda2: float
    regularization parameter on hidden layer

seed: int
    reproducibility seed for nodes_sim=='uniform'

backend: str
    'cpu' or 'gpu' or 'tpu'

Attributes:

beta_: {array-like}
    regression coefficients

y_mean_: float
    average response
def fit(self, X, y, **kwargs):
126    def fit(self, X, y, **kwargs):
127        """Fit Ridge model to training data (X, y).
128
129        Args:
130
131            X: {array-like}, shape = [n_samples, n_features]
132                Training vectors, where n_samples is the number
133                of samples and n_features is the number of features.
134
135            y: array-like, shape = [n_samples]
136                Target values.
137
138            **kwargs: additional parameters to be passed to
139                    self.cook_training_set or self.obj.fit
140
141        Returns:
142
143            self: object
144
145        """
146
147        sys_platform = platform.system()
148
149        centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
150
151        n_X, p_X = X.shape
152        n_Z, p_Z = scaled_Z.shape
153
154        if self.n_clusters > 0:
155            if self.encode_clusters == True:
156                n_features = p_X + self.n_clusters
157            else:
158                n_features = p_X + 1
159        else:
160            n_features = p_X
161
162        X_ = scaled_Z[:, 0:n_features]
163        Phi_X_ = scaled_Z[:, n_features:p_Z]
164
165        B = mo.crossprod(x=X_, backend=self.backend) + self.lambda1 * np.diag(
166            np.repeat(1, n_features)
167        )
168        C = mo.crossprod(x=Phi_X_, y=X_, backend=self.backend)
169        D = mo.crossprod(x=Phi_X_, backend=self.backend) + self.lambda2 * np.diag(
170            np.repeat(1, Phi_X_.shape[1])
171        )
172
173        if sys_platform in ("Linux", "Darwin"):
174            B_inv = pinv(B) if self.backend == "cpu" else jpinv(B)
175        else:
176            B_inv = pinv(B)
177
178        W = mo.safe_sparse_dot(a=C, b=B_inv, backend=self.backend)
179        S_mat = D - mo.tcrossprod(x=W, y=C, backend=self.backend)
180
181        if sys_platform in ("Linux", "Darwin"):
182            S_inv = pinv(S_mat) if self.backend == "cpu" else jpinv(S_mat)
183        else:
184            S_inv = pinv(S_mat)
185
186        Y = mo.safe_sparse_dot(a=S_inv, b=W, backend=self.backend)
187        inv = mo.rbind(
188            mo.cbind(
189                x=B_inv + mo.crossprod(x=W, y=Y, backend=self.backend),
190                y=-np.transpose(Y),
191                backend=self.backend,
192            ),
193            mo.cbind(x=-Y, y=S_inv, backend=self.backend),
194            backend=self.backend,
195        )
196
197        self.beta_ = mo.safe_sparse_dot(
198            a=inv,
199            b=mo.crossprod(x=scaled_Z, y=centered_y, backend=self.backend),
200            backend=self.backend,
201        )
202
203        return self

Fit Ridge model to training data (X, y).

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

**kwargs: additional parameters to be passed to
        self.cook_training_set or self.obj.fit

Returns:

self: object
def predict(self, X, **kwargs):
205    def predict(self, X, **kwargs):
206        """Predict test data X.
207
208        Args:
209
210            X: {array-like}, shape = [n_samples, n_features]
211                Training vectors, where n_samples is the number
212                of samples and n_features is the number of features.
213
214            **kwargs: additional parameters to be passed to
215                    self.cook_test_set
216
217        Returns:
218
219            model predictions: {array-like}
220
221        """
222
223        if len(X.shape) == 1:
224            n_features = X.shape[0]
225            new_X = mo.rbind(
226                x=X.reshape(1, n_features),
227                y=np.ones(n_features).reshape(1, n_features),
228                backend=self.backend,
229            )
230
231            return (
232                self.y_mean_
233                + mo.safe_sparse_dot(
234                    a=self.cook_test_set(new_X, **kwargs),
235                    b=self.beta_,
236                    backend=self.backend,
237                )
238            )[0]
239
240        return self.y_mean_ + mo.safe_sparse_dot(
241            a=self.cook_test_set(X, **kwargs),
242            b=self.beta_,
243            backend=self.backend,
244        )

Predict test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

model predictions: {array-like}
class Ridge2Classifier(nnetsauce.ridge2.ridge2.Ridge2, sklearn.base.ClassifierMixin):
 18class Ridge2Classifier(Ridge2, ClassifierMixin):
 19    """Multinomial logit classification with 2 regularization parameters
 20
 21    Parameters:
 22
 23        n_hidden_features: int
 24            number of nodes in the hidden layer
 25
 26        activation_name: str
 27            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
 28
 29        a: float
 30            hyperparameter for 'prelu' or 'elu' activation function
 31
 32        nodes_sim: str
 33            type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
 34            'uniform'
 35
 36        bias: boolean
 37            indicates if the hidden layer contains a bias term (True) or not
 38            (False)
 39
 40        dropout: float
 41            regularization parameter; (random) percentage of nodes dropped out
 42            of the training
 43
 44        direct_link: boolean
 45            indicates if the original predictors are included (True) in model's
 46            fitting or not (False)
 47
 48        n_clusters: int
 49            number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
 50                no clustering)
 51
 52        cluster_encode: bool
 53            defines how the variable containing clusters is treated (default is one-hot)
 54            if `False`, then labels are used, without one-hot encoding
 55
 56        type_clust: str
 57            type of clustering method: currently k-means ('kmeans') or Gaussian
 58            Mixture Model ('gmm')
 59
 60        type_scaling: a tuple of 3 strings
 61            scaling methods for inputs, hidden layer, and clustering respectively
 62            (and when relevant).
 63            Currently available: standardization ('std') or MinMax scaling ('minmax')
 64
 65        lambda1: float
 66            regularization parameter on direct link
 67
 68        lambda2: float
 69            regularization parameter on hidden layer
 70
 71        solver: str
 72            optimization function "L-BFGS-B",  "Newton-CG",
 73            "trust-ncg", "L-BFGS-B-lstsq", "Newton-CG-lstsq",
 74            "trust-ncg-lstsq" (see scipy.optimize.minimize)
 75            When using "L-BFGS-B-lstsq", "Newton-CG-lstsq", or "trust-ncg-lstsq",
 76            the initial value for the optimization is set to the least squares solution
 77
 78        seed: int
 79            reproducibility seed for nodes_sim=='uniform'
 80
 81        backend: str
 82            "cpu" or "gpu" or "tpu"
 83
 84    Attributes:
 85
 86        beta_: {array-like}
 87            regression coefficients
 88
 89        classes_: {array-like}
 90            unique classes in the target variable
 91
 92        minloglik_: float
 93            minimum value of the negative log-likelihood
 94
 95    Examples:
 96
 97    See also [https://github.com/Techtonique/nnetsauce/blob/master/examples/ridge_classification.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/ridge_classification.py)
 98
 99    ```python
100    import nnetsauce as ns
101    import numpy as np
102    from sklearn.datasets import load_breast_cancer
103    from sklearn.model_selection import train_test_split
104    from time import time
105
106
107    breast_cancer = load_breast_cancer()
108    X = breast_cancer.data
109    y = breast_cancer.target
110
111    # split data into training test and test set
112    np.random.seed(123)
113    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
114
115    # create the model with nnetsauce
116    fit_obj = ns.Ridge2Classifier(lambda1 = 6.90185578e+04,
117                                lambda2 = 3.17392781e+02,
118                                n_hidden_features=95,
119                                n_clusters=2,
120                                dropout = 3.62817383e-01,
121                                type_clust = "gmm")
122
123    # fit the model on training set
124    start = time()
125    fit_obj.fit(X_train, y_train)
126    print(f"Elapsed {time() - start}")
127
128    # get the accuracy on test set
129    start = time()
130    print(fit_obj.score(X_test, y_test))
131    print(f"Elapsed {time() - start}")
132
133    # get area under the curve on test set (auc)
134    print(fit_obj.score(X_test, y_test, scoring="roc_auc"))
135    ```
136
137
138    """
139    _estimator_type = "classifier"
140
141    # construct the object -----
142
143    def __init__(
144        self,
145        n_hidden_features=5,
146        activation_name="relu",
147        a=0.01,
148        nodes_sim="sobol",
149        bias=True,
150        dropout=0,
151        direct_link=True,
152        n_clusters=2,
153        cluster_encode=True,
154        type_clust="kmeans",
155        type_scaling=("std", "std", "std"),
156        lambda1=0.1,
157        lambda2=0.1,
158        solver="L-BFGS-B",
159        seed=123,
160        backend="cpu",
161    ):
162        super().__init__(
163            n_hidden_features=n_hidden_features,
164            activation_name=activation_name,
165            a=a,
166            nodes_sim=nodes_sim,
167            bias=bias,
168            dropout=dropout,
169            direct_link=direct_link,
170            n_clusters=n_clusters,
171            cluster_encode=cluster_encode,
172            type_clust=type_clust,
173            type_scaling=type_scaling,
174            lambda1=lambda1,
175            lambda2=lambda2,
176            seed=seed,
177            backend=backend,
178        )
179
180        self.type_fit = "classification"
181        self.solver = solver
182        self.beta_ = None
183        self.classes_ = None
184        self.minloglik_ = None
185
186    def loglik(self, X, Y, **kwargs):
187        """Log-likelihood for training data (X, Y).
188
189        Args:
190
191            X: {array-like}, shape = [n_samples, n_features]
192                Training vectors, where n_samples is the number
193                of samples and n_features is the number of features.
194
195            Y: array-like, shape = [n_samples]
196                One-hot encode target values.
197
198            **kwargs: additional parameters to be passed to
199                    self.cook_training_set or self.obj.fit
200
201        Returns:
202
203        """
204
205        def loglik_grad_hess(Y, X, B, XB, hessian=True, **kwargs):
206            # nobs, n_classes
207            n, K = Y.shape
208
209            # total number of covariates
210            p = X.shape[1]
211
212            # initial number of covariates
213            init_p = p - self.n_hidden_features
214
215            max_double = 709.0
216            XB[XB > max_double] = max_double
217            exp_XB = np.exp(XB)
218            probs = exp_XB / exp_XB.sum(axis=1)[:, None]
219
220            # gradient -----
221            # (Y - p) -> (n, K)
222            # X -> (n, p)
223            # (K, n) %*% (n, p) -> (K, p)
224            if hessian is False:
225                grad = (
226                    -mo.safe_sparse_dot(a=(Y - probs).T, b=X, backend=self.backend) / n
227                )
228                grad += self.lambda1 * B[0:init_p, :].sum(axis=0)[:, None]
229                grad += self.lambda2 * B[init_p:p, :].sum(axis=0)[:, None]
230
231                return grad.flatten()
232
233            # hessian -----
234            if hessian is True:
235                Kp = K * p
236                hess = np.zeros((Kp, Kp), float)
237                for k1 in range(K):
238                    x_index = range(k1 * p, (k1 + 1) * p)
239                    for k2 in range(k1, K):
240                        y_index = range(k2 * p, (k2 + 1) * p)
241                        H_sub = (
242                            -mo.safe_sparse_dot(
243                                a=X.T,
244                                b=(probs[:, k1] * probs[:, k2])[:, None] * X,
245                                backend=self.backend,
246                            )
247                            / n
248                        )  # do not store
249                        hess[np.ix_(x_index, y_index)] = hess[
250                            np.ix_(y_index, x_index)
251                        ] = H_sub
252
253                return hess + (self.lambda1 + self.lambda2) * np.identity(Kp)
254
255        # total number of covariates
256        p = X.shape[1]
257
258        # initial number of covariates
259        init_p = p - self.n_hidden_features
260
261        # log-likelihood (1st return)
262        def loglik_func(x):
263            # (p, K)
264            B = x.reshape(Y.shape[1], p).T
265
266            # (n, K)
267            XB = mo.safe_sparse_dot(X, B, backend=self.backend)
268
269            res = -(np.sum(Y * XB, axis=1) - logsumexp(XB)).mean()
270
271            res += (
272                0.5
273                * self.lambda1
274                * mo.squared_norm(B[0:init_p, :], backend=self.backend)
275            )
276            res += (
277                0.5
278                * self.lambda2
279                * mo.squared_norm(B[init_p:p, :], backend=self.backend)
280            )
281
282            return res
283
284        # gradient of log-likelihood
285        def grad_func(x):
286            # (p, K)
287            B = x.reshape(Y.shape[1], p).T
288
289            return loglik_grad_hess(
290                Y=Y,
291                X=X,
292                B=B,
293                XB=mo.safe_sparse_dot(X, B, backend=self.backend),
294                hessian=False,
295                **kwargs
296            )
297
298        # hessian of log-likelihood
299        def hessian_func(x):
300            # (p, K)
301            B = x.reshape(Y.shape[1], p).T
302
303            return loglik_grad_hess(
304                Y=Y,
305                X=X,
306                B=B,
307                XB=mo.safe_sparse_dot(X, B, backend=self.backend),
308                hessian=True,
309                **kwargs
310            )
311
312        return loglik_func, grad_func, hessian_func
313
314    # newton-cg
315    # L-BFGS-B
316    def fit(self, X, y, **kwargs):
317        """Fit Ridge model to training data (X, y).
318
319        for beta: regression coeffs (beta11, ..., beta1p, ..., betaK1, ..., betaKp)
320        for K classes and p covariates.
321
322        Args:
323
324            X: {array-like}, shape = [n_samples, n_features]
325                Training vectors, where n_samples is the number
326                of samples and n_features is the number of features.
327
328            y: array-like, shape = [n_samples]
329                Target values.
330
331            **kwargs: additional parameters to be passed to
332                    self.cook_training_set or self.obj.fit
333
334        Returns:
335
336            self: object
337
338        """
339
340        assert mx.is_factor(y), "y must contain only integers"
341
342        output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
343
344        self.n_classes = len(np.unique(y))
345        self.classes_ = np.unique(y)  # for compatibility with sklearn
346        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
347
348        Y = mo.one_hot_encode2(output_y, self.n_classes)
349
350        # optimize for beta, minimize self.loglik (maximize loglik) -----
351        loglik_func, grad_func, hessian_func = self.loglik(X=scaled_Z, Y=Y)
352
353        if self.solver == "L-BFGS-B":
354            opt = minimize(
355                fun=loglik_func,
356                x0=np.zeros(scaled_Z.shape[1] * self.n_classes),
357                jac=grad_func,
358                method=self.solver,
359            )
360            self.beta_ = opt.x
361            self.minloglik_ = opt.fun
362
363        if self.solver in ("Newton-CG", "trust-ncg"):
364            opt = minimize(
365                fun=loglik_func,
366                x0=np.zeros(scaled_Z.shape[1] * self.n_classes),
367                jac=grad_func,
368                hess=hessian_func,
369                method=self.solver,
370            )
371            self.beta_ = opt.x
372            self.minloglik_ = opt.fun
373
374        if self.solver == "L-BFGS-B-lstsq":
375            opt = minimize(
376                fun=loglik_func,
377                x0=np.linalg.lstsq(scaled_Z, Y, rcond=None)[0].flatten(order="F"),
378                jac=grad_func,
379                method="L-BFGS-B",
380            )
381            self.beta_ = opt.x
382            self.minloglik_ = opt.fun
383
384        if self.solver in "Newton-CG-lstsq":
385            opt = minimize(
386                fun=loglik_func,
387                x0=np.linalg.lstsq(scaled_Z, Y, rcond=None)[0].flatten(order="F"),
388                jac=grad_func,
389                hess=hessian_func,
390                method="Newton-CG",
391            )
392            self.beta_ = opt.x
393            self.minloglik_ = opt.fun
394
395        if self.solver in "trust-ncg-lstsq":
396            opt = minimize(
397                fun=loglik_func,
398                x0=np.linalg.lstsq(scaled_Z, Y, rcond=None)[0].flatten(order="F"),
399                jac=grad_func,
400                hess=hessian_func,
401                method="trust-ncg",
402            )
403            self.beta_ = opt.x
404            self.minloglik_ = opt.fun
405
406        self.classes_ = np.unique(y)
407
408        return self
409
410    def predict(self, X, **kwargs):
411        """Predict test data X.
412
413        Args:
414
415            X: {array-like}, shape = [n_samples, n_features]
416                Training vectors, where n_samples is the number
417                of samples and n_features is the number of features.
418
419            **kwargs: additional parameters to be passed to
420                    self.cook_test_set
421
422        Returns:
423
424            model predictions: {array-like}
425        """
426
427        return np.argmax(self.predict_proba(X, **kwargs), axis=1)
428
429    def predict_proba(self, X, **kwargs):
430        """Predict probabilities for test data X.
431
432        Args:
433
434            X: {array-like}, shape = [n_samples, n_features]
435                Training vectors, where n_samples is the number
436                of samples and n_features is the number of features.
437
438            **kwargs: additional parameters to be passed to
439                    self.cook_test_set
440
441        Returns:
442
443            probability estimates for test data: {array-like}
444
445        """
446        if len(X.shape) == 1:
447            n_features = X.shape[0]
448            new_X = mo.rbind(
449                X.reshape(1, n_features),
450                np.ones(n_features).reshape(1, n_features),
451            )
452
453            Z = self.cook_test_set(new_X, **kwargs)
454
455        else:
456            Z = self.cook_test_set(X, **kwargs)
457
458        ZB = mo.safe_sparse_dot(
459            a=Z,
460            b=self.beta_.reshape(
461                self.n_classes,
462                X.shape[1] + self.n_hidden_features + self.n_clusters,
463            ).T,
464            backend=self.backend,
465        )
466
467        exp_ZB = np.exp(ZB)
468
469        return exp_ZB / exp_ZB.sum(axis=1)[:, None]
470
471    @property
472    def _estimator_type(self):
473        return "classifier"            

Multinomial logit classification with 2 regularization parameters

Parameters:

n_hidden_features: int
    number of nodes in the hidden layer

activation_name: str
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'

a: float
    hyperparameter for 'prelu' or 'elu' activation function

nodes_sim: str
    type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
    'uniform'

bias: boolean
    indicates if the hidden layer contains a bias term (True) or not
    (False)

dropout: float
    regularization parameter; (random) percentage of nodes dropped out
    of the training

direct_link: boolean
    indicates if the original predictors are included (True) in model's
    fitting or not (False)

n_clusters: int
    number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
        no clustering)

cluster_encode: bool
    defines how the variable containing clusters is treated (default is one-hot)
    if `False`, then labels are used, without one-hot encoding

type_clust: str
    type of clustering method: currently k-means ('kmeans') or Gaussian
    Mixture Model ('gmm')

type_scaling: a tuple of 3 strings
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax')

lambda1: float
    regularization parameter on direct link

lambda2: float
    regularization parameter on hidden layer

solver: str
    optimization function "L-BFGS-B",  "Newton-CG",
    "trust-ncg", "L-BFGS-B-lstsq", "Newton-CG-lstsq",
    "trust-ncg-lstsq" (see scipy.optimize.minimize)
    When using "L-BFGS-B-lstsq", "Newton-CG-lstsq", or "trust-ncg-lstsq",
    the initial value for the optimization is set to the least squares solution

seed: int
    reproducibility seed for nodes_sim=='uniform'

backend: str
    "cpu" or "gpu" or "tpu"

Attributes:

beta_: {array-like}
    regression coefficients

classes_: {array-like}
    unique classes in the target variable

minloglik_: float
    minimum value of the negative log-likelihood

Examples:

See also https://github.com/Techtonique/nnetsauce/blob/master/examples/ridge_classification.py

import nnetsauce as ns
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from time import time


breast_cancer = load_breast_cancer()
X = breast_cancer.data
y = breast_cancer.target

# split data into training test and test set
np.random.seed(123)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# create the model with nnetsauce
fit_obj = ns.Ridge2Classifier(lambda1 = 6.90185578e+04,
                            lambda2 = 3.17392781e+02,
                            n_hidden_features=95,
                            n_clusters=2,
                            dropout = 3.62817383e-01,
                            type_clust = "gmm")

# fit the model on training set
start = time()
fit_obj.fit(X_train, y_train)
print(f"Elapsed {time() - start}")

# get the accuracy on test set
start = time()
print(fit_obj.score(X_test, y_test))
print(f"Elapsed {time() - start}")

# get area under the curve on test set (auc)
print(fit_obj.score(X_test, y_test, scoring="roc_auc"))
def fit(self, X, y, **kwargs):
316    def fit(self, X, y, **kwargs):
317        """Fit Ridge model to training data (X, y).
318
319        for beta: regression coeffs (beta11, ..., beta1p, ..., betaK1, ..., betaKp)
320        for K classes and p covariates.
321
322        Args:
323
324            X: {array-like}, shape = [n_samples, n_features]
325                Training vectors, where n_samples is the number
326                of samples and n_features is the number of features.
327
328            y: array-like, shape = [n_samples]
329                Target values.
330
331            **kwargs: additional parameters to be passed to
332                    self.cook_training_set or self.obj.fit
333
334        Returns:
335
336            self: object
337
338        """
339
340        assert mx.is_factor(y), "y must contain only integers"
341
342        output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
343
344        self.n_classes = len(np.unique(y))
345        self.classes_ = np.unique(y)  # for compatibility with sklearn
346        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
347
348        Y = mo.one_hot_encode2(output_y, self.n_classes)
349
350        # optimize for beta, minimize self.loglik (maximize loglik) -----
351        loglik_func, grad_func, hessian_func = self.loglik(X=scaled_Z, Y=Y)
352
353        if self.solver == "L-BFGS-B":
354            opt = minimize(
355                fun=loglik_func,
356                x0=np.zeros(scaled_Z.shape[1] * self.n_classes),
357                jac=grad_func,
358                method=self.solver,
359            )
360            self.beta_ = opt.x
361            self.minloglik_ = opt.fun
362
363        if self.solver in ("Newton-CG", "trust-ncg"):
364            opt = minimize(
365                fun=loglik_func,
366                x0=np.zeros(scaled_Z.shape[1] * self.n_classes),
367                jac=grad_func,
368                hess=hessian_func,
369                method=self.solver,
370            )
371            self.beta_ = opt.x
372            self.minloglik_ = opt.fun
373
374        if self.solver == "L-BFGS-B-lstsq":
375            opt = minimize(
376                fun=loglik_func,
377                x0=np.linalg.lstsq(scaled_Z, Y, rcond=None)[0].flatten(order="F"),
378                jac=grad_func,
379                method="L-BFGS-B",
380            )
381            self.beta_ = opt.x
382            self.minloglik_ = opt.fun
383
384        if self.solver in "Newton-CG-lstsq":
385            opt = minimize(
386                fun=loglik_func,
387                x0=np.linalg.lstsq(scaled_Z, Y, rcond=None)[0].flatten(order="F"),
388                jac=grad_func,
389                hess=hessian_func,
390                method="Newton-CG",
391            )
392            self.beta_ = opt.x
393            self.minloglik_ = opt.fun
394
395        if self.solver in "trust-ncg-lstsq":
396            opt = minimize(
397                fun=loglik_func,
398                x0=np.linalg.lstsq(scaled_Z, Y, rcond=None)[0].flatten(order="F"),
399                jac=grad_func,
400                hess=hessian_func,
401                method="trust-ncg",
402            )
403            self.beta_ = opt.x
404            self.minloglik_ = opt.fun
405
406        self.classes_ = np.unique(y)
407
408        return self

Fit Ridge model to training data (X, y).

for beta: regression coeffs (beta11, ..., beta1p, ..., betaK1, ..., betaKp) for K classes and p covariates.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

**kwargs: additional parameters to be passed to
        self.cook_training_set or self.obj.fit

Returns:

self: object
def predict(self, X, **kwargs):
410    def predict(self, X, **kwargs):
411        """Predict test data X.
412
413        Args:
414
415            X: {array-like}, shape = [n_samples, n_features]
416                Training vectors, where n_samples is the number
417                of samples and n_features is the number of features.
418
419            **kwargs: additional parameters to be passed to
420                    self.cook_test_set
421
422        Returns:
423
424            model predictions: {array-like}
425        """
426
427        return np.argmax(self.predict_proba(X, **kwargs), axis=1)

Predict test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

model predictions: {array-like}
def predict_proba(self, X, **kwargs):
429    def predict_proba(self, X, **kwargs):
430        """Predict probabilities for test data X.
431
432        Args:
433
434            X: {array-like}, shape = [n_samples, n_features]
435                Training vectors, where n_samples is the number
436                of samples and n_features is the number of features.
437
438            **kwargs: additional parameters to be passed to
439                    self.cook_test_set
440
441        Returns:
442
443            probability estimates for test data: {array-like}
444
445        """
446        if len(X.shape) == 1:
447            n_features = X.shape[0]
448            new_X = mo.rbind(
449                X.reshape(1, n_features),
450                np.ones(n_features).reshape(1, n_features),
451            )
452
453            Z = self.cook_test_set(new_X, **kwargs)
454
455        else:
456            Z = self.cook_test_set(X, **kwargs)
457
458        ZB = mo.safe_sparse_dot(
459            a=Z,
460            b=self.beta_.reshape(
461                self.n_classes,
462                X.shape[1] + self.n_hidden_features + self.n_clusters,
463            ).T,
464            backend=self.backend,
465        )
466
467        exp_ZB = np.exp(ZB)
468
469        return exp_ZB / exp_ZB.sum(axis=1)[:, None]

Predict probabilities for test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

probability estimates for test data: {array-like}
class Ridge2MultitaskClassifier(nnetsauce.ridge2.ridge2.Ridge2, sklearn.base.ClassifierMixin):
 23class Ridge2MultitaskClassifier(Ridge2, ClassifierMixin):
 24    """Multitask Ridge classification with 2 regularization parameters
 25
 26    Parameters:
 27
 28        n_hidden_features: int
 29            number of nodes in the hidden layer
 30
 31        activation_name: str
 32            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
 33
 34        a: float
 35            hyperparameter for 'prelu' or 'elu' activation function
 36
 37        nodes_sim: str
 38            type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
 39            'uniform'
 40
 41        bias: boolean
 42            indicates if the hidden layer contains a bias term (True) or not
 43            (False)
 44
 45        dropout: float
 46            regularization parameter; (random) percentage of nodes dropped out
 47            of the training
 48
 49        n_clusters: int
 50            number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
 51                no clustering)
 52
 53        cluster_encode: bool
 54            defines how the variable containing clusters is treated (default is one-hot)
 55            if `False`, then labels are used, without one-hot encoding
 56
 57        type_clust: str
 58            type of clustering method: currently k-means ('kmeans') or Gaussian
 59            Mixture Model ('gmm')
 60
 61        type_scaling: a tuple of 3 strings
 62            scaling methods for inputs, hidden layer, and clustering respectively
 63            (and when relevant).
 64            Currently available: standardization ('std') or MinMax scaling ('minmax')
 65
 66        lambda1: float
 67            regularization parameter on direct link
 68
 69        lambda2: float
 70            regularization parameter on hidden layer
 71
 72        seed: int
 73            reproducibility seed for nodes_sim=='uniform'
 74
 75        backend: str
 76            "cpu" or "gpu" or "tpu"
 77
 78    Attributes:
 79
 80        beta_: {array-like}
 81            regression coefficients
 82
 83    Examples:
 84
 85    See also [https://github.com/Techtonique/nnetsauce/blob/master/examples/ridgemtask_classification.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/ridgemtask_classification.py)
 86
 87    ```python
 88    import nnetsauce as ns
 89    import numpy as np
 90    from sklearn.datasets import load_breast_cancer
 91    from sklearn.model_selection import train_test_split
 92    from sklearn import metrics
 93    from time import time
 94
 95    breast_cancer = load_breast_cancer()
 96    Z = breast_cancer.data
 97    t = breast_cancer.target
 98    np.random.seed(123)
 99    X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2)
100
101    fit_obj = ns.Ridge2MultitaskClassifier(n_hidden_features=int(9.83730469e+01),
102                                    dropout=4.31054687e-01,
103                                    n_clusters=int(1.71484375e+00),
104                                    lambda1=1.24023438e+01, lambda2=7.30263672e+03)
105
106    start = time()
107    fit_obj.fit(X_train, y_train)
108    print(f"Elapsed {time() - start}")
109
110    print(fit_obj.score(X_test, y_test))
111    print(fit_obj.score(X_test, y_test, scoring="roc_auc"))
112
113    start = time()
114    preds = fit_obj.predict(X_test)
115    print(f"Elapsed {time() - start}")
116    print(metrics.classification_report(preds, y_test))
117    ```
118
119    """
120
121    # construct the object -----
122    _estimator_type = "classifier"
123
124    def __init__(
125        self,
126        n_hidden_features=5,
127        activation_name="relu",
128        a=0.01,
129        nodes_sim="sobol",
130        bias=True,
131        dropout=0,
132        n_clusters=2,
133        cluster_encode=True,
134        type_clust="kmeans",
135        type_scaling=("std", "std", "std"),
136        lambda1=0.1,
137        lambda2=0.1,
138        seed=123,
139        backend="cpu",
140    ):
141        super().__init__(
142            n_hidden_features=n_hidden_features,
143            activation_name=activation_name,
144            a=a,
145            nodes_sim=nodes_sim,
146            bias=bias,
147            dropout=dropout,
148            n_clusters=n_clusters,
149            cluster_encode=cluster_encode,
150            type_clust=type_clust,
151            type_scaling=type_scaling,
152            lambda1=lambda1,
153            lambda2=lambda2,
154            seed=seed,
155            backend=backend,
156        )
157
158        self.type_fit = "classification"
159
160    def fit(self, X, y, **kwargs):
161        """Fit Ridge model to training data (X, y).
162
163        Args:
164
165            X: {array-like}, shape = [n_samples, n_features]
166                Training vectors, where n_samples is the number
167                of samples and n_features is the number of features.
168
169            y: array-like, shape = [n_samples]
170                Target values.
171
172            **kwargs: additional parameters to be passed to
173                    self.cook_training_set or self.obj.fit
174
175        Returns:
176
177            self: object
178
179        """
180
181        sys_platform = platform.system()
182
183        assert mx.is_factor(y), "y must contain only integers"
184
185        self.classes_ = np.unique(y)  # for compatibility with sklearn
186        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
187
188        output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
189
190        n_X, p_X = X.shape
191        n_Z, p_Z = scaled_Z.shape
192
193        self.n_classes = len(np.unique(y))
194
195        # multitask response
196        Y = mo.one_hot_encode2(output_y, self.n_classes)
197
198        if self.n_clusters > 0:
199            if self.encode_clusters == True:
200                n_features = p_X + self.n_clusters
201            else:
202                n_features = p_X + 1
203        else:
204            n_features = p_X
205
206        X_ = scaled_Z[:, 0:n_features]
207        Phi_X_ = scaled_Z[:, n_features:p_Z]
208
209        B = mo.crossprod(x=X_, backend=self.backend) + self.lambda1 * np.diag(
210            np.repeat(1, X_.shape[1])
211        )
212        C = mo.crossprod(x=Phi_X_, y=X_, backend=self.backend)
213        D = mo.crossprod(x=Phi_X_, backend=self.backend) + self.lambda2 * np.diag(
214            np.repeat(1, Phi_X_.shape[1])
215        )
216
217        if sys_platform in ("Linux", "Darwin"):
218            B_inv = pinv(B) if self.backend == "cpu" else jpinv(B)
219        else:
220            B_inv = pinv(B)
221
222        W = mo.safe_sparse_dot(a=C, b=B_inv, backend=self.backend)
223        S_mat = D - mo.tcrossprod(x=W, y=C, backend=self.backend)
224
225        if sys_platform in ("Linux", "Darwin"):
226            S_inv = pinv(S_mat) if self.backend == "cpu" else jpinv(S_mat)
227        else:
228            S_inv = pinv(S_mat)
229
230        Y2 = mo.safe_sparse_dot(a=S_inv, b=W, backend=self.backend)
231        inv = mo.rbind(
232            mo.cbind(
233                x=B_inv + mo.crossprod(x=W, y=Y2, backend=self.backend),
234                y=-np.transpose(Y2),
235                backend=self.backend,
236            ),
237            mo.cbind(x=-Y2, y=S_inv, backend=self.backend),
238            backend=self.backend,
239        )
240
241        self.beta_ = mo.safe_sparse_dot(
242            a=inv,
243            b=mo.crossprod(x=scaled_Z, y=Y, backend=self.backend),
244            backend=self.backend,
245        )
246        self.classes_ = np.unique(y)
247        return self
248
249    def predict(self, X, **kwargs):
250        """Predict test data X.
251
252        Args:
253
254            X: {array-like}, shape = [n_samples, n_features]
255                Training vectors, where n_samples is the number
256                of samples and n_features is the number of features.
257
258            **kwargs: additional parameters to be passed to
259                    self.cook_test_set
260
261        Returns:
262
263            model predictions: {array-like}
264
265        """
266
267        return np.argmax(self.predict_proba(X, **kwargs), axis=1)
268
269    def predict_proba(self, X, **kwargs):
270        """Predict probabilities for test data X.
271
272        Args:
273
274            X: {array-like}, shape = [n_samples, n_features]
275                Training vectors, where n_samples is the number
276                of samples and n_features is the number of features.
277
278            **kwargs: additional parameters to be passed to
279                    self.cook_test_set
280
281        Returns:
282
283            probability estimates for test data: {array-like}
284
285        """
286
287        if len(X.shape) == 1:
288            n_features = X.shape[0]
289            new_X = mo.rbind(
290                x=X.reshape(1, n_features),
291                y=np.ones(n_features).reshape(1, n_features),
292                backend=self.backend,
293            )
294
295            Z = self.cook_test_set(new_X, **kwargs)
296
297        else:
298            Z = self.cook_test_set(X, **kwargs)
299
300        ZB = mo.safe_sparse_dot(a=Z, b=self.beta_, backend=self.backend)
301
302        exp_ZB = np.exp(ZB)
303
304        return exp_ZB / exp_ZB.sum(axis=1)[:, None]
305
306    def score(self, X, y, scoring=None):
307        """Scoring function for classification.
308
309        Args:
310
311            X: {array-like}, shape = [n_samples, n_features]
312                Training vectors, where n_samples is the number
313                of samples and n_features is the number of features.
314
315            y: array-like, shape = [n_samples]
316                Target values.
317
318            scoring: str
319                scoring method (default is accuracy)
320
321        Returns:
322
323            score: float
324        """
325
326        if scoring is None:
327            scoring = "accuracy"
328
329        if scoring == "accuracy":
330            return skm2.accuracy_score(y, self.predict(X))
331
332        if scoring == "f1":
333            return skm2.f1_score(y, self.predict(X))
334
335        if scoring == "precision":
336            return skm2.precision_score(y, self.predict(X))
337
338        if scoring == "recall":
339            return skm2.recall_score(y, self.predict(X))
340
341        if scoring == "roc_auc":
342            return skm2.roc_auc_score(y, self.predict(X))
343
344        if scoring == "log_loss":
345            return skm2.log_loss(y, self.predict_proba(X))
346
347        if scoring == "balanced_accuracy":
348            return skm2.balanced_accuracy_score(y, self.predict(X))
349
350        if scoring == "average_precision":
351            return skm2.average_precision_score(y, self.predict(X))
352
353        if scoring == "neg_brier_score":
354            return -skm2.brier_score_loss(y, self.predict_proba(X))
355
356        if scoring == "neg_log_loss":
357            return -skm2.log_loss(y, self.predict_proba(X))
358
359    @property
360    def _estimator_type(self):
361        return "classifier"            

Multitask Ridge classification with 2 regularization parameters

Parameters:

n_hidden_features: int
    number of nodes in the hidden layer

activation_name: str
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'

a: float
    hyperparameter for 'prelu' or 'elu' activation function

nodes_sim: str
    type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
    'uniform'

bias: boolean
    indicates if the hidden layer contains a bias term (True) or not
    (False)

dropout: float
    regularization parameter; (random) percentage of nodes dropped out
    of the training

n_clusters: int
    number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
        no clustering)

cluster_encode: bool
    defines how the variable containing clusters is treated (default is one-hot)
    if `False`, then labels are used, without one-hot encoding

type_clust: str
    type of clustering method: currently k-means ('kmeans') or Gaussian
    Mixture Model ('gmm')

type_scaling: a tuple of 3 strings
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax')

lambda1: float
    regularization parameter on direct link

lambda2: float
    regularization parameter on hidden layer

seed: int
    reproducibility seed for nodes_sim=='uniform'

backend: str
    "cpu" or "gpu" or "tpu"

Attributes:

beta_: {array-like}
    regression coefficients

Examples:

See also https://github.com/Techtonique/nnetsauce/blob/master/examples/ridgemtask_classification.py

import nnetsauce as ns
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn import metrics
from time import time

breast_cancer = load_breast_cancer()
Z = breast_cancer.data
t = breast_cancer.target
np.random.seed(123)
X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2)

fit_obj = ns.Ridge2MultitaskClassifier(n_hidden_features=int(9.83730469e+01),
                                dropout=4.31054687e-01,
                                n_clusters=int(1.71484375e+00),
                                lambda1=1.24023438e+01, lambda2=7.30263672e+03)

start = time()
fit_obj.fit(X_train, y_train)
print(f"Elapsed {time() - start}")

print(fit_obj.score(X_test, y_test))
print(fit_obj.score(X_test, y_test, scoring="roc_auc"))

start = time()
preds = fit_obj.predict(X_test)
print(f"Elapsed {time() - start}")
print(metrics.classification_report(preds, y_test))
def fit(self, X, y, **kwargs):
160    def fit(self, X, y, **kwargs):
161        """Fit Ridge model to training data (X, y).
162
163        Args:
164
165            X: {array-like}, shape = [n_samples, n_features]
166                Training vectors, where n_samples is the number
167                of samples and n_features is the number of features.
168
169            y: array-like, shape = [n_samples]
170                Target values.
171
172            **kwargs: additional parameters to be passed to
173                    self.cook_training_set or self.obj.fit
174
175        Returns:
176
177            self: object
178
179        """
180
181        sys_platform = platform.system()
182
183        assert mx.is_factor(y), "y must contain only integers"
184
185        self.classes_ = np.unique(y)  # for compatibility with sklearn
186        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
187
188        output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
189
190        n_X, p_X = X.shape
191        n_Z, p_Z = scaled_Z.shape
192
193        self.n_classes = len(np.unique(y))
194
195        # multitask response
196        Y = mo.one_hot_encode2(output_y, self.n_classes)
197
198        if self.n_clusters > 0:
199            if self.encode_clusters == True:
200                n_features = p_X + self.n_clusters
201            else:
202                n_features = p_X + 1
203        else:
204            n_features = p_X
205
206        X_ = scaled_Z[:, 0:n_features]
207        Phi_X_ = scaled_Z[:, n_features:p_Z]
208
209        B = mo.crossprod(x=X_, backend=self.backend) + self.lambda1 * np.diag(
210            np.repeat(1, X_.shape[1])
211        )
212        C = mo.crossprod(x=Phi_X_, y=X_, backend=self.backend)
213        D = mo.crossprod(x=Phi_X_, backend=self.backend) + self.lambda2 * np.diag(
214            np.repeat(1, Phi_X_.shape[1])
215        )
216
217        if sys_platform in ("Linux", "Darwin"):
218            B_inv = pinv(B) if self.backend == "cpu" else jpinv(B)
219        else:
220            B_inv = pinv(B)
221
222        W = mo.safe_sparse_dot(a=C, b=B_inv, backend=self.backend)
223        S_mat = D - mo.tcrossprod(x=W, y=C, backend=self.backend)
224
225        if sys_platform in ("Linux", "Darwin"):
226            S_inv = pinv(S_mat) if self.backend == "cpu" else jpinv(S_mat)
227        else:
228            S_inv = pinv(S_mat)
229
230        Y2 = mo.safe_sparse_dot(a=S_inv, b=W, backend=self.backend)
231        inv = mo.rbind(
232            mo.cbind(
233                x=B_inv + mo.crossprod(x=W, y=Y2, backend=self.backend),
234                y=-np.transpose(Y2),
235                backend=self.backend,
236            ),
237            mo.cbind(x=-Y2, y=S_inv, backend=self.backend),
238            backend=self.backend,
239        )
240
241        self.beta_ = mo.safe_sparse_dot(
242            a=inv,
243            b=mo.crossprod(x=scaled_Z, y=Y, backend=self.backend),
244            backend=self.backend,
245        )
246        self.classes_ = np.unique(y)
247        return self

Fit Ridge model to training data (X, y).

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

**kwargs: additional parameters to be passed to
        self.cook_training_set or self.obj.fit

Returns:

self: object
def predict(self, X, **kwargs):
249    def predict(self, X, **kwargs):
250        """Predict test data X.
251
252        Args:
253
254            X: {array-like}, shape = [n_samples, n_features]
255                Training vectors, where n_samples is the number
256                of samples and n_features is the number of features.
257
258            **kwargs: additional parameters to be passed to
259                    self.cook_test_set
260
261        Returns:
262
263            model predictions: {array-like}
264
265        """
266
267        return np.argmax(self.predict_proba(X, **kwargs), axis=1)

Predict test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

model predictions: {array-like}
def predict_proba(self, X, **kwargs):
269    def predict_proba(self, X, **kwargs):
270        """Predict probabilities for test data X.
271
272        Args:
273
274            X: {array-like}, shape = [n_samples, n_features]
275                Training vectors, where n_samples is the number
276                of samples and n_features is the number of features.
277
278            **kwargs: additional parameters to be passed to
279                    self.cook_test_set
280
281        Returns:
282
283            probability estimates for test data: {array-like}
284
285        """
286
287        if len(X.shape) == 1:
288            n_features = X.shape[0]
289            new_X = mo.rbind(
290                x=X.reshape(1, n_features),
291                y=np.ones(n_features).reshape(1, n_features),
292                backend=self.backend,
293            )
294
295            Z = self.cook_test_set(new_X, **kwargs)
296
297        else:
298            Z = self.cook_test_set(X, **kwargs)
299
300        ZB = mo.safe_sparse_dot(a=Z, b=self.beta_, backend=self.backend)
301
302        exp_ZB = np.exp(ZB)
303
304        return exp_ZB / exp_ZB.sum(axis=1)[:, None]

Predict probabilities for test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

probability estimates for test data: {array-like}
def score(self, X, y, scoring=None):
306    def score(self, X, y, scoring=None):
307        """Scoring function for classification.
308
309        Args:
310
311            X: {array-like}, shape = [n_samples, n_features]
312                Training vectors, where n_samples is the number
313                of samples and n_features is the number of features.
314
315            y: array-like, shape = [n_samples]
316                Target values.
317
318            scoring: str
319                scoring method (default is accuracy)
320
321        Returns:
322
323            score: float
324        """
325
326        if scoring is None:
327            scoring = "accuracy"
328
329        if scoring == "accuracy":
330            return skm2.accuracy_score(y, self.predict(X))
331
332        if scoring == "f1":
333            return skm2.f1_score(y, self.predict(X))
334
335        if scoring == "precision":
336            return skm2.precision_score(y, self.predict(X))
337
338        if scoring == "recall":
339            return skm2.recall_score(y, self.predict(X))
340
341        if scoring == "roc_auc":
342            return skm2.roc_auc_score(y, self.predict(X))
343
344        if scoring == "log_loss":
345            return skm2.log_loss(y, self.predict_proba(X))
346
347        if scoring == "balanced_accuracy":
348            return skm2.balanced_accuracy_score(y, self.predict(X))
349
350        if scoring == "average_precision":
351            return skm2.average_precision_score(y, self.predict(X))
352
353        if scoring == "neg_brier_score":
354            return -skm2.brier_score_loss(y, self.predict_proba(X))
355
356        if scoring == "neg_log_loss":
357            return -skm2.log_loss(y, self.predict_proba(X))

Scoring function for classification.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

scoring: str
    scoring method (default is accuracy)

Returns:

score: float
class SubSampler:
 6class SubSampler:
 7    """Subsampling class.
 8
 9    Attributes:
10
11       y: array-like, shape = [n_samples]
12           Target values.
13
14       row_sample: double
15           subsampling fraction
16
17       n_samples: int
18            subsampling by using the number of rows (supersedes row_sample)
19
20       seed: int
21           reproductibility seed
22
23       n_jobs: int
24            number of jobs to run in parallel
25
26       verbose: bool
27            print progress messages and bars
28    """
29
30    def __init__(
31        self,
32        y,
33        row_sample=0.8,
34        n_samples=None,
35        seed=123,
36        n_jobs=None,
37        verbose=False,
38    ):
39        self.y = y
40        self.n_samples = n_samples
41        if self.n_samples is None:
42            assert (
43                row_sample < 1 and row_sample >= 0
44            ), "'row_sample' must be provided, plus < 1 and >= 0"
45            self.row_sample = row_sample
46        else:
47            assert self.n_samples < len(y), "'n_samples' must be < len(y)"
48            self.row_sample = self.n_samples / len(y)
49        self.seed = seed
50        self.indices = None
51        self.n_jobs = n_jobs
52        self.verbose = verbose
53
54    def subsample(self):
55        """Returns indices of subsampled input data.
56
57        Examples:
58
59        <ul>
60            <li> <a href="https://github.com/Techtonique/nnetsauce/blob/master/nnetsauce/demo/thierrymoudiki_20240105_subsampling.ipynb">20240105_subsampling.ipynb</a> </li>
61            <li> <a href="https://github.com/Techtonique/nnetsauce/blob/master/nnetsauce/demo/thierrymoudiki_20240131_subsampling_nsamples.ipynb">20240131_subsampling_nsamples.ipynb</a> </li>
62        </ul>
63
64        """
65        self.indices = dosubsample(
66            y=self.y,
67            row_sample=self.row_sample,
68            seed=self.seed,
69            n_jobs=self.n_jobs,
70            verbose=self.verbose,
71        )
72        return self.indices

Subsampling class.

Attributes:

y: array-like, shape = [n_samples] Target values.

row_sample: double subsampling fraction

n_samples: int subsampling by using the number of rows (supersedes row_sample)

seed: int reproductibility seed

n_jobs: int number of jobs to run in parallel

verbose: bool print progress messages and bars

def subsample(self):
54    def subsample(self):
55        """Returns indices of subsampled input data.
56
57        Examples:
58
59        <ul>
60            <li> <a href="https://github.com/Techtonique/nnetsauce/blob/master/nnetsauce/demo/thierrymoudiki_20240105_subsampling.ipynb">20240105_subsampling.ipynb</a> </li>
61            <li> <a href="https://github.com/Techtonique/nnetsauce/blob/master/nnetsauce/demo/thierrymoudiki_20240131_subsampling_nsamples.ipynb">20240131_subsampling_nsamples.ipynb</a> </li>
62        </ul>
63
64        """
65        self.indices = dosubsample(
66            y=self.y,
67            row_sample=self.row_sample,
68            seed=self.seed,
69            n_jobs=self.n_jobs,
70            verbose=self.verbose,
71        )
72        return self.indices

Returns indices of subsampled input data.

Examples: