nnetsauce
1from .attention import AttentionMechanism 2from .base.base import Base 3from .base.baseRegressor import BaseRegressor 4from .boosting.adaBoostClassifier import AdaBoostClassifier 5from .custom.customClassifier import CustomClassifier 6from .custom.customRegressor import CustomRegressor 7from .custom.customBackpropRegressor import CustomBackPropRegressor 8from .datasets import Downloader 9from .deep.deepClassifier import DeepClassifier 10from .deep.deepRegressor import DeepRegressor 11from .deep.deepMTS import DeepMTS 12from .glm.glmClassifier import GLMClassifier 13from .glm.glmRegressor import GLMRegressor 14from .kernel.kernel import KernelRidge 15from .lazypredict.lazydeepClassifier import LazyDeepClassifier, LazyClassifier 16from .lazypredict.lazydeepRegressor import LazyDeepRegressor, LazyRegressor 17from .lazypredict.lazydeepClassifier import LazyDeepClassifier 18from .lazypredict.lazydeepRegressor import LazyDeepRegressor 19from .lazypredict.lazydeepMTS import LazyDeepMTS, LazyMTS 20from .mts.mts import MTS 21from .mts.mlarch import MLARCH 22from .mts.classical import ClassicalMTS 23from .mts.stackedmts import MTSStacker 24from .multitask.multitaskClassifier import MultitaskClassifier 25from .multitask.simplemultitaskClassifier import SimpleMultitaskClassifier 26from .neuralnet.neuralnetregression import NeuralNetRegressor 27from .neuralnet.neuralnetclassification import NeuralNetClassifier 28from .optimizers.optimizer import Optimizer 29from .predictioninterval import PredictionInterval 30from .predictionset import PredictionSet 31from .quantile.quantileregression import QuantileRegressor 32from .quantile.quantileclassification import QuantileClassifier 33from .randombag.randomBagClassifier import RandomBagClassifier 34from .randombag.randomBagRegressor import RandomBagRegressor 35from .randomfourier.randomfourier import RandomFourierEstimator 36from .rff.rffridge import ( 37 RandomFourierFeaturesRidge, 38 RandomFourierFeaturesRidgeGCV, 39) 40from .ridge.ridge import RidgeRegressor 41from .ridge2.ridge2Classifier import Ridge2Classifier 42from .ridge2.ridge2Regressor import Ridge2Regressor 43from .ridge2.ridge2MultitaskClassifier import Ridge2MultitaskClassifier 44from .ridge2.ridge2MTSJAX import Ridge2Forecaster 45from .rvfl.bayesianrvflRegressor import BayesianRVFLRegressor 46from .rvfl.bayesianrvfl2Regressor import BayesianRVFL2Regressor 47from .sampling import SubSampler 48from .updater import RegressorUpdater, ClassifierUpdater 49from .votingregressor import MedianVotingRegressor 50 51__all__ = [ 52 "AdaBoostClassifier", 53 "AttentionMechanism", 54 "Base", 55 "BaseRegressor", 56 "BayesianRVFLRegressor", 57 "BayesianRVFL2Regressor", 58 "ClassicalMTS", 59 "CustomClassifier", 60 "CustomRegressor", 61 "CustomBackPropRegressor", 62 "DeepClassifier", 63 "DeepRegressor", 64 "DeepMTS", 65 "Downloader", 66 "GLMClassifier", 67 "GLMRegressor", 68 "KernelRidge", 69 "LazyClassifier", 70 "LazyRegressor", 71 "LazyDeepClassifier", 72 "LazyDeepRegressor", 73 "LazyMTS", 74 "LazyDeepMTS", 75 "MLARCH", 76 "MedianVotingRegressor", 77 "MTS", 78 "MTSStacker", 79 "MultitaskClassifier", 80 "NeuralNetRegressor", 81 "NeuralNetClassifier", 82 "PredictionInterval", 83 "PredictionSet", 84 "SimpleMultitaskClassifier", 85 "Optimizer", 86 "QuantileRegressor", 87 "QuantileClassifier", 88 "RandomBagRegressor", 89 "RandomBagClassifier", 90 "RandomFourierEstimator", 91 "RandomFourierFeaturesRidge", 92 "RandomFourierFeaturesRidgeGCV", 93 "RegressorUpdater", 94 "ClassifierUpdater", 95 "RidgeRegressor", 96 "Ridge2Regressor", 97 "Ridge2Classifier", 98 "Ridge2MultitaskClassifier", 99 "Ridge2Forecaster", 100 "SubSampler", 101]
21class AdaBoostClassifier(Boosting, ClassifierMixin): 22 """AdaBoost Classification (SAMME) model class derived from class Boosting 23 24 Parameters: 25 26 obj: object 27 any object containing a method fit (obj.fit()) and a method predict 28 (obj.predict()) 29 30 n_estimators: int 31 number of boosting iterations 32 33 learning_rate: float 34 learning rate of the boosting procedure 35 36 n_hidden_features: int 37 number of nodes in the hidden layer 38 39 reg_lambda: float 40 regularization parameter for weights 41 42 reg_alpha: float 43 controls compromize between l1 and l2 norm of weights 44 45 activation_name: str 46 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 47 48 a: float 49 hyperparameter for 'prelu' or 'elu' activation function 50 51 nodes_sim: str 52 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 53 'uniform' 54 55 bias: boolean 56 indicates if the hidden layer contains a bias term (True) or not 57 (False) 58 59 dropout: float 60 regularization parameter; (random) percentage of nodes dropped out 61 of the training 62 63 direct_link: boolean 64 indicates if the original predictors are included (True) in model's 65 fitting or not (False) 66 67 n_clusters: int 68 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: 69 no clustering) 70 71 cluster_encode: bool 72 defines how the variable containing clusters is treated (default is one-hot) 73 if `False`, then labels are used, without one-hot encoding 74 75 type_clust: str 76 type of clustering method: currently k-means ('kmeans') or Gaussian 77 Mixture Model ('gmm') 78 79 type_scaling: a tuple of 3 strings 80 scaling methods for inputs, hidden layer, and clustering respectively 81 (and when relevant). 82 Currently available: standardization ('std') or MinMax scaling ('minmax') 83 84 col_sample: float 85 percentage of covariates randomly chosen for training 86 87 row_sample: float 88 percentage of rows chosen for training, by stratified bootstrapping 89 90 seed: int 91 reproducibility seed for nodes_sim=='uniform' 92 93 verbose: int 94 0 for no output, 1 for a progress bar (default is 1) 95 96 method: str 97 type of Adaboost method, 'SAMME' (discrete) or 'SAMME.R' (real) 98 99 backend: str 100 "cpu" or "gpu" or "tpu" 101 102 Attributes: 103 104 alpha_: list 105 AdaBoost coefficients alpha_m 106 107 base_learners_: dict 108 a dictionary containing the base learners 109 110 Examples: 111 112 See also [https://github.com/Techtonique/nnetsauce/blob/master/examples/adaboost_classification.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/adaboost_classification.py) 113 114 ```python 115 import nnetsauce as ns 116 import numpy as np 117 from sklearn.datasets import load_breast_cancer 118 from sklearn.linear_model import LogisticRegression 119 from sklearn.model_selection import train_test_split 120 from sklearn import metrics 121 from time import time 122 123 breast_cancer = load_breast_cancer() 124 Z = breast_cancer.data 125 t = breast_cancer.target 126 np.random.seed(123) 127 X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2) 128 129 # SAMME.R 130 clf = LogisticRegression(solver='liblinear', multi_class = 'ovr', 131 random_state=123) 132 fit_obj = ns.AdaBoostClassifier(clf, 133 n_hidden_features=int(11.22338867), 134 direct_link=True, 135 n_estimators=250, learning_rate=0.01126343, 136 col_sample=0.72684326, row_sample=0.86429443, 137 dropout=0.63078613, n_clusters=2, 138 type_clust="gmm", 139 verbose=1, seed = 123, 140 method="SAMME.R") 141 142 start = time() 143 fit_obj.fit(X_train, y_train) 144 print(f"Elapsed {time() - start}") 145 146 start = time() 147 print(fit_obj.score(X_test, y_test)) 148 print(f"Elapsed {time() - start}") 149 150 preds = fit_obj.predict(X_test) 151 152 print(metrics.classification_report(preds, y_test)) 153 154 ``` 155 156 """ 157 158 # construct the object ----- 159 _estimator_type = "classifier" 160 161 def __init__( 162 self, 163 obj, 164 n_estimators=10, 165 learning_rate=0.1, 166 n_hidden_features=1, 167 reg_lambda=0, 168 reg_alpha=0.5, 169 activation_name="relu", 170 a=0.01, 171 nodes_sim="sobol", 172 bias=True, 173 dropout=0, 174 direct_link=False, 175 n_clusters=2, 176 cluster_encode=True, 177 type_clust="kmeans", 178 type_scaling=("std", "std", "std"), 179 col_sample=1, 180 row_sample=1, 181 seed=123, 182 verbose=1, 183 method="SAMME", 184 backend="cpu", 185 ): 186 self.type_fit = "classification" 187 self.verbose = verbose 188 self.method = method 189 self.reg_lambda = reg_lambda 190 self.reg_alpha = reg_alpha 191 192 super().__init__( 193 obj=obj, 194 n_estimators=n_estimators, 195 learning_rate=learning_rate, 196 n_hidden_features=n_hidden_features, 197 activation_name=activation_name, 198 a=a, 199 nodes_sim=nodes_sim, 200 bias=bias, 201 dropout=dropout, 202 direct_link=direct_link, 203 n_clusters=n_clusters, 204 cluster_encode=cluster_encode, 205 type_clust=type_clust, 206 type_scaling=type_scaling, 207 col_sample=col_sample, 208 row_sample=row_sample, 209 seed=seed, 210 backend=backend, 211 ) 212 213 self.alpha_ = [] 214 self.base_learners_ = dict.fromkeys(range(n_estimators)) 215 216 def fit(self, X, y, sample_weight=None, **kwargs): 217 """Fit Boosting model to training data (X, y). 218 219 Parameters: 220 221 X: {array-like}, shape = [n_samples, n_features] 222 Training vectors, where n_samples is the number 223 of samples and n_features is the number of features. 224 225 y: array-like, shape = [n_samples] 226 Target values. 227 228 **kwargs: additional parameters to be passed to 229 self.cook_training_set or self.obj.fit 230 231 Returns: 232 233 self: object 234 """ 235 236 assert mx.is_factor(y), "y must contain only integers" 237 238 assert self.method in ( 239 "SAMME", 240 "SAMME.R", 241 ), "`method` must be either 'SAMME' or 'SAMME.R'" 242 243 assert (self.reg_lambda <= 1) & ( 244 self.reg_lambda >= 0 245 ), "must have self.reg_lambda <= 1 & self.reg_lambda >= 0" 246 247 assert (self.reg_alpha <= 1) & ( 248 self.reg_alpha >= 0 249 ), "must have self.reg_alpha <= 1 & self.reg_alpha >= 0" 250 251 # training 252 n, p = X.shape 253 self.n_classes = len(np.unique(y)) 254 self.classes_ = np.unique(y) # for compatibility with sklearn 255 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 256 257 if sample_weight is None: 258 w_m = np.repeat(1.0 / n, n) 259 else: 260 w_m = np.asarray(sample_weight) 261 262 base_learner = CustomClassifier( 263 self.obj, 264 n_hidden_features=self.n_hidden_features, 265 activation_name=self.activation_name, 266 a=self.a, 267 nodes_sim=self.nodes_sim, 268 bias=self.bias, 269 dropout=self.dropout, 270 direct_link=self.direct_link, 271 n_clusters=self.n_clusters, 272 type_clust=self.type_clust, 273 type_scaling=self.type_scaling, 274 col_sample=self.col_sample, 275 row_sample=self.row_sample, 276 seed=self.seed, 277 ) 278 279 if self.verbose == 1: 280 pbar = Progbar(self.n_estimators) 281 282 if self.method == "SAMME": 283 err_m = 1e6 284 err_bound = 1 - 1 / self.n_classes 285 self.alpha_.append(1.0) 286 x_range_n = range(n) 287 288 for m in range(self.n_estimators): 289 preds = base_learner.fit( 290 X, y, sample_weight=w_m.ravel(), **kwargs 291 ).predict(X) 292 293 self.base_learners_.update({m: deepcopy(base_learner)}) 294 295 cond = [y[i] != preds[i] for i in x_range_n] 296 297 err_m = max( 298 sum([elt[0] * elt[1] for elt in zip(cond, w_m)]), 299 2.220446049250313e-16, 300 ) # sum(w_m) == 1 301 302 if self.reg_lambda > 0: 303 err_m += self.reg_lambda * ( 304 (1 - self.reg_alpha) * 0.5 * sum([x**2 for x in w_m]) 305 + self.reg_alpha * sum([abs(x) for x in w_m]) 306 ) 307 308 err_m = min(err_m, err_bound) 309 310 alpha_m = self.learning_rate * log( 311 (self.n_classes - 1) * (1 - err_m) / err_m 312 ) 313 314 self.alpha_.append(alpha_m) 315 316 w_m_temp = [exp(alpha_m * cond[i]) for i in x_range_n] 317 318 sum_w_m = sum(w_m_temp) 319 320 w_m = np.asarray([w_m_temp[i] / sum_w_m for i in x_range_n]) 321 322 base_learner.set_params(seed=self.seed + (m + 1) * 1000) 323 324 if self.verbose == 1: 325 pbar.update(m) 326 327 if self.verbose == 1: 328 pbar.update(self.n_estimators) 329 330 self.n_estimators = len(self.base_learners_) 331 self.classes_ = np.unique(y) 332 333 return self 334 335 if self.method == "SAMME.R": 336 Y = mo.one_hot_encode2(y, self.n_classes) 337 338 if sample_weight is None: 339 w_m = np.repeat(1.0 / n, n) # (N, 1) 340 341 else: 342 w_m = np.asarray(sample_weight) 343 344 for m in range(self.n_estimators): 345 probs = base_learner.fit( 346 X, y, sample_weight=w_m.ravel(), **kwargs 347 ).predict_proba(X) 348 349 np.clip( 350 a=probs, a_min=2.220446049250313e-16, a_max=1.0, out=probs 351 ) 352 353 self.base_learners_.update({m: deepcopy(base_learner)}) 354 355 w_m *= np.exp( 356 -1.0 357 * self.learning_rate 358 * (1.0 - 1.0 / self.n_classes) 359 * xlogy(Y, probs).sum(axis=1) 360 ) 361 362 w_m /= np.sum(w_m) 363 364 base_learner.set_params(seed=self.seed + (m + 1) * 1000) 365 366 if self.verbose == 1: 367 pbar.update(m) 368 369 if self.verbose == 1: 370 pbar.update(self.n_estimators) 371 372 self.n_estimators = len(self.base_learners_) 373 self.classes_ = np.unique(y) 374 375 return self 376 377 def predict(self, X, **kwargs): 378 """Predict test data X. 379 380 Parameters: 381 382 X: {array-like}, shape = [n_samples, n_features] 383 Training vectors, where n_samples is the number 384 of samples and n_features is the number of features. 385 386 **kwargs: additional parameters to be passed to 387 self.cook_test_set 388 389 Returns: 390 391 model predictions: {array-like} 392 """ 393 return self.predict_proba(X, **kwargs).argmax(axis=1) 394 395 def predict_proba(self, X, **kwargs): 396 """Predict probabilities for test data X. 397 398 Parameters: 399 400 X: {array-like}, shape = [n_samples, n_features] 401 Training vectors, where n_samples is the number 402 of samples and n_features is the number of features. 403 404 **kwargs: additional parameters to be passed to 405 self.cook_test_set 406 407 Returns: 408 409 probability estimates for test data: {array-like} 410 411 """ 412 413 n_iter = len(self.base_learners_) 414 415 if self.method == "SAMME": 416 ensemble_learner = np.zeros((X.shape[0], self.n_classes)) 417 418 # if self.verbose == 1: 419 # pbar = Progbar(n_iter) 420 421 for idx, base_learner in self.base_learners_.items(): 422 preds = base_learner.predict(X, **kwargs) 423 424 ensemble_learner += self.alpha_[idx] * mo.one_hot_encode2( 425 preds, self.n_classes 426 ) 427 428 # if self.verbose == 1: 429 # pbar.update(idx) 430 431 # if self.verbose == 1: 432 # pbar.update(n_iter) 433 434 expit_ensemble_learner = expit(ensemble_learner) 435 436 sum_ensemble = expit_ensemble_learner.sum(axis=1) 437 438 return expit_ensemble_learner / sum_ensemble[:, None] 439 440 # if self.method == "SAMME.R": 441 ensemble_learner = 0 442 443 # if self.verbose == 1: 444 # pbar = Progbar(n_iter) 445 446 for idx, base_learner in self.base_learners_.items(): 447 probs = base_learner.predict_proba(X, **kwargs) 448 449 np.clip(a=probs, a_min=2.220446049250313e-16, a_max=1.0, out=probs) 450 451 log_preds_proba = np.log(probs) 452 453 ensemble_learner += ( 454 log_preds_proba - log_preds_proba.mean(axis=1)[:, None] 455 ) 456 457 # if self.verbose == 1: 458 # pbar.update(idx) 459 460 ensemble_learner *= self.n_classes - 1 461 462 # if self.verbose == 1: 463 # pbar.update(n_iter) 464 465 expit_ensemble_learner = expit(ensemble_learner) 466 467 sum_ensemble = expit_ensemble_learner.sum(axis=1) 468 469 return expit_ensemble_learner / sum_ensemble[:, None] 470 471 @property 472 def _estimator_type(self): 473 return "classifier"
AdaBoost Classification (SAMME) model class derived from class Boosting
Parameters:
obj: object
any object containing a method fit (obj.fit()) and a method predict
(obj.predict())
n_estimators: int
number of boosting iterations
learning_rate: float
learning rate of the boosting procedure
n_hidden_features: int
number of nodes in the hidden layer
reg_lambda: float
regularization parameter for weights
reg_alpha: float
controls compromize between l1 and l2 norm of weights
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not
(False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original predictors are included (True) in model's
fitting or not (False)
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
col_sample: float
percentage of covariates randomly chosen for training
row_sample: float
percentage of rows chosen for training, by stratified bootstrapping
seed: int
reproducibility seed for nodes_sim=='uniform'
verbose: int
0 for no output, 1 for a progress bar (default is 1)
method: str
type of Adaboost method, 'SAMME' (discrete) or 'SAMME.R' (real)
backend: str
"cpu" or "gpu" or "tpu"
Attributes:
alpha_: list
AdaBoost coefficients alpha_m
base_learners_: dict
a dictionary containing the base learners
Examples:
See also https://github.com/Techtonique/nnetsauce/blob/master/examples/adaboost_classification.py
import nnetsauce as ns
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics
from time import time
breast_cancer = load_breast_cancer()
Z = breast_cancer.data
t = breast_cancer.target
np.random.seed(123)
X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2)
# SAMME.R
clf = LogisticRegression(solver='liblinear', multi_class = 'ovr',
random_state=123)
fit_obj = ns.AdaBoostClassifier(clf,
n_hidden_features=int(11.22338867),
direct_link=True,
n_estimators=250, learning_rate=0.01126343,
col_sample=0.72684326, row_sample=0.86429443,
dropout=0.63078613, n_clusters=2,
type_clust="gmm",
verbose=1, seed = 123,
method="SAMME.R")
start = time()
fit_obj.fit(X_train, y_train)
print(f"Elapsed {time() - start}")
start = time()
print(fit_obj.score(X_test, y_test))
print(f"Elapsed {time() - start}")
preds = fit_obj.predict(X_test)
print(metrics.classification_report(preds, y_test))
216 def fit(self, X, y, sample_weight=None, **kwargs): 217 """Fit Boosting model to training data (X, y). 218 219 Parameters: 220 221 X: {array-like}, shape = [n_samples, n_features] 222 Training vectors, where n_samples is the number 223 of samples and n_features is the number of features. 224 225 y: array-like, shape = [n_samples] 226 Target values. 227 228 **kwargs: additional parameters to be passed to 229 self.cook_training_set or self.obj.fit 230 231 Returns: 232 233 self: object 234 """ 235 236 assert mx.is_factor(y), "y must contain only integers" 237 238 assert self.method in ( 239 "SAMME", 240 "SAMME.R", 241 ), "`method` must be either 'SAMME' or 'SAMME.R'" 242 243 assert (self.reg_lambda <= 1) & ( 244 self.reg_lambda >= 0 245 ), "must have self.reg_lambda <= 1 & self.reg_lambda >= 0" 246 247 assert (self.reg_alpha <= 1) & ( 248 self.reg_alpha >= 0 249 ), "must have self.reg_alpha <= 1 & self.reg_alpha >= 0" 250 251 # training 252 n, p = X.shape 253 self.n_classes = len(np.unique(y)) 254 self.classes_ = np.unique(y) # for compatibility with sklearn 255 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 256 257 if sample_weight is None: 258 w_m = np.repeat(1.0 / n, n) 259 else: 260 w_m = np.asarray(sample_weight) 261 262 base_learner = CustomClassifier( 263 self.obj, 264 n_hidden_features=self.n_hidden_features, 265 activation_name=self.activation_name, 266 a=self.a, 267 nodes_sim=self.nodes_sim, 268 bias=self.bias, 269 dropout=self.dropout, 270 direct_link=self.direct_link, 271 n_clusters=self.n_clusters, 272 type_clust=self.type_clust, 273 type_scaling=self.type_scaling, 274 col_sample=self.col_sample, 275 row_sample=self.row_sample, 276 seed=self.seed, 277 ) 278 279 if self.verbose == 1: 280 pbar = Progbar(self.n_estimators) 281 282 if self.method == "SAMME": 283 err_m = 1e6 284 err_bound = 1 - 1 / self.n_classes 285 self.alpha_.append(1.0) 286 x_range_n = range(n) 287 288 for m in range(self.n_estimators): 289 preds = base_learner.fit( 290 X, y, sample_weight=w_m.ravel(), **kwargs 291 ).predict(X) 292 293 self.base_learners_.update({m: deepcopy(base_learner)}) 294 295 cond = [y[i] != preds[i] for i in x_range_n] 296 297 err_m = max( 298 sum([elt[0] * elt[1] for elt in zip(cond, w_m)]), 299 2.220446049250313e-16, 300 ) # sum(w_m) == 1 301 302 if self.reg_lambda > 0: 303 err_m += self.reg_lambda * ( 304 (1 - self.reg_alpha) * 0.5 * sum([x**2 for x in w_m]) 305 + self.reg_alpha * sum([abs(x) for x in w_m]) 306 ) 307 308 err_m = min(err_m, err_bound) 309 310 alpha_m = self.learning_rate * log( 311 (self.n_classes - 1) * (1 - err_m) / err_m 312 ) 313 314 self.alpha_.append(alpha_m) 315 316 w_m_temp = [exp(alpha_m * cond[i]) for i in x_range_n] 317 318 sum_w_m = sum(w_m_temp) 319 320 w_m = np.asarray([w_m_temp[i] / sum_w_m for i in x_range_n]) 321 322 base_learner.set_params(seed=self.seed + (m + 1) * 1000) 323 324 if self.verbose == 1: 325 pbar.update(m) 326 327 if self.verbose == 1: 328 pbar.update(self.n_estimators) 329 330 self.n_estimators = len(self.base_learners_) 331 self.classes_ = np.unique(y) 332 333 return self 334 335 if self.method == "SAMME.R": 336 Y = mo.one_hot_encode2(y, self.n_classes) 337 338 if sample_weight is None: 339 w_m = np.repeat(1.0 / n, n) # (N, 1) 340 341 else: 342 w_m = np.asarray(sample_weight) 343 344 for m in range(self.n_estimators): 345 probs = base_learner.fit( 346 X, y, sample_weight=w_m.ravel(), **kwargs 347 ).predict_proba(X) 348 349 np.clip( 350 a=probs, a_min=2.220446049250313e-16, a_max=1.0, out=probs 351 ) 352 353 self.base_learners_.update({m: deepcopy(base_learner)}) 354 355 w_m *= np.exp( 356 -1.0 357 * self.learning_rate 358 * (1.0 - 1.0 / self.n_classes) 359 * xlogy(Y, probs).sum(axis=1) 360 ) 361 362 w_m /= np.sum(w_m) 363 364 base_learner.set_params(seed=self.seed + (m + 1) * 1000) 365 366 if self.verbose == 1: 367 pbar.update(m) 368 369 if self.verbose == 1: 370 pbar.update(self.n_estimators) 371 372 self.n_estimators = len(self.base_learners_) 373 self.classes_ = np.unique(y) 374 375 return self
Fit Boosting model to training data (X, y).
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
377 def predict(self, X, **kwargs): 378 """Predict test data X. 379 380 Parameters: 381 382 X: {array-like}, shape = [n_samples, n_features] 383 Training vectors, where n_samples is the number 384 of samples and n_features is the number of features. 385 386 **kwargs: additional parameters to be passed to 387 self.cook_test_set 388 389 Returns: 390 391 model predictions: {array-like} 392 """ 393 return self.predict_proba(X, **kwargs).argmax(axis=1)
Predict test data X.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
395 def predict_proba(self, X, **kwargs): 396 """Predict probabilities for test data X. 397 398 Parameters: 399 400 X: {array-like}, shape = [n_samples, n_features] 401 Training vectors, where n_samples is the number 402 of samples and n_features is the number of features. 403 404 **kwargs: additional parameters to be passed to 405 self.cook_test_set 406 407 Returns: 408 409 probability estimates for test data: {array-like} 410 411 """ 412 413 n_iter = len(self.base_learners_) 414 415 if self.method == "SAMME": 416 ensemble_learner = np.zeros((X.shape[0], self.n_classes)) 417 418 # if self.verbose == 1: 419 # pbar = Progbar(n_iter) 420 421 for idx, base_learner in self.base_learners_.items(): 422 preds = base_learner.predict(X, **kwargs) 423 424 ensemble_learner += self.alpha_[idx] * mo.one_hot_encode2( 425 preds, self.n_classes 426 ) 427 428 # if self.verbose == 1: 429 # pbar.update(idx) 430 431 # if self.verbose == 1: 432 # pbar.update(n_iter) 433 434 expit_ensemble_learner = expit(ensemble_learner) 435 436 sum_ensemble = expit_ensemble_learner.sum(axis=1) 437 438 return expit_ensemble_learner / sum_ensemble[:, None] 439 440 # if self.method == "SAMME.R": 441 ensemble_learner = 0 442 443 # if self.verbose == 1: 444 # pbar = Progbar(n_iter) 445 446 for idx, base_learner in self.base_learners_.items(): 447 probs = base_learner.predict_proba(X, **kwargs) 448 449 np.clip(a=probs, a_min=2.220446049250313e-16, a_max=1.0, out=probs) 450 451 log_preds_proba = np.log(probs) 452 453 ensemble_learner += ( 454 log_preds_proba - log_preds_proba.mean(axis=1)[:, None] 455 ) 456 457 # if self.verbose == 1: 458 # pbar.update(idx) 459 460 ensemble_learner *= self.n_classes - 1 461 462 # if self.verbose == 1: 463 # pbar.update(n_iter) 464 465 expit_ensemble_learner = expit(ensemble_learner) 466 467 sum_ensemble = expit_ensemble_learner.sum(axis=1) 468 469 return expit_ensemble_learner / sum_ensemble[:, None]
Predict probabilities for test data X.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
probability estimates for test data: {array-like}
10class AttentionMechanism: 11 """ 12 A comprehensive class implementing various attention mechanisms 13 for both univariate time series and tabular data using JAX. 14 15 Supported attention types: 16 - Scaled Dot-Product Attention 17 - Additive (Bahdanau) Attention 18 - Multi-Head Attention 19 - Self-Attention 20 - Temporal Attention (for sequences) 21 - Feature Attention (for tabular data) 22 - Cross-Attention 23 - Context Vector Attention 24 """ 25 26 def __init__( 27 self, 28 input_dim: int, 29 hidden_dim: int = 64, 30 num_heads: int = 4, 31 dropout: float = 0.1, 32 seed: int = 42, 33 ): 34 """ 35 Args: 36 input_dim: Dimension of input features 37 hidden_dim: Hidden dimension for attention computations 38 num_heads: Number of attention heads for multi-head attention 39 dropout: Dropout rate 40 seed: Random seed for parameter initialization 41 """ 42 self.input_dim = input_dim 43 self.hidden_dim = hidden_dim 44 self.num_heads = num_heads 45 self.dropout = dropout 46 47 # Initialize random key 48 self.rng = random.PRNGKey(seed) 49 50 # Initialize parameters 51 self.params = self._initialize_parameters() 52 53 assert ( 54 hidden_dim % num_heads == 0 55 ), "hidden_dim must be divisible by num_heads" 56 self.head_dim = hidden_dim // num_heads 57 58 def _initialize_parameters(self) -> Dict: 59 """Initialize all network parameters using JAX""" 60 keys = random.split(self.rng, 20) 61 62 def init_weight(key, shape): 63 return random.normal(key, shape) * np.sqrt(2.0 / shape[0]) 64 65 def init_bias(shape): 66 return jnp.zeros(shape) 67 68 params = { 69 # Scaled Dot-Product Attention 70 "query_w": init_weight(keys[0], (self.input_dim, self.hidden_dim)), 71 "query_b": init_bias((self.hidden_dim,)), 72 "key_w": init_weight(keys[1], (self.input_dim, self.hidden_dim)), 73 "key_b": init_bias((self.hidden_dim,)), 74 "value_w": init_weight(keys[2], (self.input_dim, self.hidden_dim)), 75 "value_b": init_bias((self.hidden_dim,)), 76 # Additive Attention 77 "additive_query_w": init_weight( 78 keys[3], (self.input_dim, self.hidden_dim) 79 ), 80 "additive_query_b": init_bias((self.hidden_dim,)), 81 "additive_key_w": init_weight( 82 keys[4], (self.input_dim, self.hidden_dim) 83 ), 84 "additive_key_b": init_bias((self.hidden_dim,)), 85 "additive_v_w": init_weight(keys[5], (self.hidden_dim, 1)), 86 "additive_v_b": init_bias((1,)), 87 # Multi-Head Attention 88 "mha_query_w": init_weight( 89 keys[6], (self.input_dim, self.hidden_dim) 90 ), 91 "mha_query_b": init_bias((self.hidden_dim,)), 92 "mha_key_w": init_weight( 93 keys[7], (self.input_dim, self.hidden_dim) 94 ), 95 "mha_key_b": init_bias((self.hidden_dim,)), 96 "mha_value_w": init_weight( 97 keys[8], (self.input_dim, self.hidden_dim) 98 ), 99 "mha_value_b": init_bias((self.hidden_dim,)), 100 "mha_output_w": init_weight( 101 keys[9], (self.hidden_dim, self.hidden_dim) 102 ), 103 "mha_output_b": init_bias((self.hidden_dim,)), 104 # Feature Attention 105 "feature_w1": init_weight( 106 keys[10], (self.input_dim, self.hidden_dim) 107 ), 108 "feature_b1": init_bias((self.hidden_dim,)), 109 "feature_w2": init_weight( 110 keys[11], (self.hidden_dim, self.input_dim) 111 ), 112 "feature_b2": init_bias((self.input_dim,)), 113 # Temporal Attention 114 "temporal_query_w": init_weight( 115 keys[12], (self.input_dim, self.hidden_dim) 116 ), 117 "temporal_query_b": init_bias((self.hidden_dim,)), 118 "temporal_key_w": init_weight( 119 keys[13], (self.input_dim, self.hidden_dim) 120 ), 121 "temporal_key_b": init_bias((self.hidden_dim,)), 122 # Context Vector Attention 123 "context_vector": random.normal(keys[14], (1, 1, self.hidden_dim)), 124 "context_query_w": init_weight( 125 keys[15], (self.hidden_dim, self.hidden_dim) 126 ), 127 "context_query_b": init_bias((self.hidden_dim,)), 128 "context_key_w": init_weight( 129 keys[16], (self.input_dim, self.hidden_dim) 130 ), 131 "context_key_b": init_bias((self.hidden_dim,)), 132 "context_value_w": init_weight( 133 keys[17], (self.input_dim, self.hidden_dim) 134 ), 135 "context_value_b": init_bias((self.hidden_dim,)), 136 } 137 138 return params 139 140 @staticmethod 141 @jit 142 def _apply_dropout( 143 x: jnp.ndarray, 144 key: jax.random.PRNGKey, 145 rate: float, 146 training: bool = True, 147 ) -> jnp.ndarray: 148 """Apply dropout""" 149 if training and rate > 0: 150 keep_prob = 1 - rate 151 mask = random.bernoulli(key, keep_prob, x.shape) 152 return jnp.where(mask, x / keep_prob, 0) 153 return x 154 155 @partial(jit, static_argnums=(0,)) 156 def scaled_dot_product_attention( 157 self, 158 query: jnp.ndarray, 159 key: jnp.ndarray, 160 value: jnp.ndarray, 161 params: Dict, 162 mask: Optional[jnp.ndarray] = None, 163 training: bool = False, 164 ) -> Tuple[jnp.ndarray, jnp.ndarray]: 165 """ 166 Scaled Dot-Product Attention 167 168 Args: 169 query: (batch_size, seq_len, input_dim) or (batch_size, input_dim) 170 key: (batch_size, seq_len, input_dim) 171 value: (batch_size, seq_len, input_dim) 172 params: Parameter dictionary 173 mask: Optional mask (batch_size, seq_len) 174 training: Whether in training mode 175 176 Returns: 177 context: Attended context vector 178 attention_weights: Attention weights 179 """ 180 # Project inputs 181 Q = jnp.dot(query, params["query_w"]) + params["query_b"] 182 K = jnp.dot(key, params["key_w"]) + params["key_b"] 183 V = jnp.dot(value, params["value_w"]) + params["value_b"] 184 185 # Compute attention scores 186 scores = jnp.matmul(Q, jnp.swapaxes(K, -2, -1)) 187 scores = scores / jnp.sqrt(self.hidden_dim) 188 189 # Apply mask if provided 190 if mask is not None: 191 scores = jnp.where(mask == 0, -1e9, scores) 192 193 # Compute attention weights 194 attention_weights = jax.nn.softmax(scores, axis=-1) 195 196 # Apply attention to values 197 context = jnp.matmul(attention_weights, V) 198 199 return context, attention_weights 200 201 @partial(jit, static_argnums=(0,)) 202 def additive_attention( 203 self, 204 query: jnp.ndarray, 205 key: jnp.ndarray, 206 value: jnp.ndarray, 207 params: Dict, 208 mask: Optional[jnp.ndarray] = None, 209 ) -> Tuple[jnp.ndarray, jnp.ndarray]: 210 """ 211 Additive (Bahdanau) Attention 212 213 Args: 214 query: (batch_size, hidden_dim) or (batch_size, 1, hidden_dim) 215 key: (batch_size, seq_len, hidden_dim) 216 value: (batch_size, seq_len, hidden_dim) 217 params: Parameter dictionary 218 mask: Optional mask 219 220 Returns: 221 context: Attended context vector 222 attention_weights: Attention weights 223 """ 224 # Ensure query has seq_len dimension 225 if query.ndim == 2: 226 query = jnp.expand_dims(query, axis=1) 227 228 # Project query and key 229 Q = ( 230 jnp.dot(query, params["additive_query_w"]) 231 + params["additive_query_b"] 232 ) 233 K = jnp.dot(key, params["additive_key_w"]) + params["additive_key_b"] 234 235 # Additive attention: score = v^T tanh(W_q Q + W_k K) 236 combined = jnp.tanh(Q + K) 237 scores = ( 238 jnp.dot(combined, params["additive_v_w"]) + params["additive_v_b"] 239 ) 240 scores = jnp.squeeze(scores, axis=-1) 241 242 # Apply mask if provided 243 if mask is not None: 244 scores = jnp.where(mask == 0, -1e9, scores) 245 246 # Compute attention weights 247 attention_weights = jax.nn.softmax(scores, axis=-1) 248 249 # Apply attention to values 250 context = jnp.matmul(jnp.expand_dims(attention_weights, axis=1), value) 251 context = jnp.squeeze(context, axis=1) 252 253 return context, attention_weights 254 255 @partial(jit, static_argnums=(0,)) 256 def multi_head_attention( 257 self, 258 query: jnp.ndarray, 259 key: jnp.ndarray, 260 value: jnp.ndarray, 261 params: Dict, 262 mask: Optional[jnp.ndarray] = None, 263 ) -> Tuple[jnp.ndarray, jnp.ndarray]: 264 """ 265 Multi-Head Attention 266 267 Args: 268 query: (batch_size, seq_len_q, input_dim) 269 key: (batch_size, seq_len_k, input_dim) 270 value: (batch_size, seq_len_v, input_dim) 271 params: Parameter dictionary 272 mask: Optional mask 273 274 Returns: 275 output: Multi-head attention output 276 attention_weights: Attention weights from all heads 277 """ 278 batch_size = query.shape[0] 279 280 # Project and reshape for multi-head attention 281 Q = jnp.dot(query, params["mha_query_w"]) + params["mha_query_b"] 282 K = jnp.dot(key, params["mha_key_w"]) + params["mha_key_b"] 283 V = jnp.dot(value, params["mha_value_w"]) + params["mha_value_b"] 284 285 Q = Q.reshape(batch_size, -1, self.num_heads, self.head_dim) 286 K = K.reshape(batch_size, -1, self.num_heads, self.head_dim) 287 V = V.reshape(batch_size, -1, self.num_heads, self.head_dim) 288 289 # Transpose for attention: (batch, num_heads, seq_len, head_dim) 290 Q = jnp.transpose(Q, (0, 2, 1, 3)) 291 K = jnp.transpose(K, (0, 2, 1, 3)) 292 V = jnp.transpose(V, (0, 2, 1, 3)) 293 294 # Compute attention scores 295 scores = jnp.matmul(Q, jnp.swapaxes(K, -2, -1)) / jnp.sqrt( 296 self.head_dim 297 ) 298 299 # Apply mask if provided 300 if mask is not None: 301 mask_expanded = jnp.expand_dims(jnp.expand_dims(mask, 1), 2) 302 scores = jnp.where(mask_expanded == 0, -1e9, scores) 303 304 # Attention weights 305 attention_weights = jax.nn.softmax(scores, axis=-1) 306 307 # Apply attention to values 308 context = jnp.matmul(attention_weights, V) 309 310 # Reshape back: (batch, seq_len, hidden_dim) 311 context = jnp.transpose(context, (0, 2, 1, 3)) 312 context = context.reshape(batch_size, -1, self.hidden_dim) 313 314 # Final linear projection 315 output = ( 316 jnp.dot(context, params["mha_output_w"]) + params["mha_output_b"] 317 ) 318 319 return output, attention_weights 320 321 @partial(jit, static_argnums=(0,)) 322 def self_attention( 323 self, x: jnp.ndarray, params: Dict, mask: Optional[jnp.ndarray] = None 324 ) -> Tuple[jnp.ndarray, jnp.ndarray]: 325 """Self-Attention mechanism""" 326 return self.scaled_dot_product_attention(x, x, x, params, mask) 327 328 @partial(jit, static_argnums=(0,)) 329 def temporal_attention( 330 self, x: jnp.ndarray, params: Dict, mask: Optional[jnp.ndarray] = None 331 ) -> Tuple[jnp.ndarray, jnp.ndarray]: 332 """ 333 Temporal Attention for time series data 334 335 Args: 336 x: (batch_size, seq_len, input_dim) 337 params: Parameter dictionary 338 mask: Optional mask 339 340 Returns: 341 context: Temporally attended context 342 attention_weights: Temporal attention weights 343 """ 344 # Use last time step as query 345 query = x[:, -1:, :] 346 347 Q = ( 348 jnp.dot(query, params["temporal_query_w"]) 349 + params["temporal_query_b"] 350 ) 351 K = jnp.dot(x, params["temporal_key_w"]) + params["temporal_key_b"] 352 353 # Compute attention scores 354 scores = jnp.matmul(Q, jnp.swapaxes(K, -2, -1)) / jnp.sqrt( 355 self.hidden_dim 356 ) 357 scores = jnp.squeeze(scores, axis=1) 358 359 # Apply mask if provided 360 if mask is not None: 361 scores = jnp.where(mask == 0, -1e9, scores) 362 363 # Attention weights 364 attention_weights = jax.nn.softmax(scores, axis=-1) 365 366 # Apply attention 367 context = jnp.matmul(jnp.expand_dims(attention_weights, axis=1), x) 368 context = jnp.squeeze(context, axis=1) 369 370 return context, attention_weights 371 372 @partial(jit, static_argnums=(0,)) 373 def feature_attention_tabular( 374 self, x: jnp.ndarray, params: Dict 375 ) -> Tuple[jnp.ndarray, jnp.ndarray]: 376 """ 377 Feature Attention for tabular data 378 379 Args: 380 x: (batch_size, num_features) 381 params: Parameter dictionary 382 383 Returns: 384 output: Feature-weighted output 385 attention_weights: Feature importance weights 386 """ 387 # Compute feature attention weights 388 hidden = jnp.dot(x, params["feature_w1"]) + params["feature_b1"] 389 hidden = jnp.tanh(hidden) 390 logits = jnp.dot(hidden, params["feature_w2"]) + params["feature_b2"] 391 attention_weights = jax.nn.softmax(logits, axis=-1) 392 393 # Apply attention to features 394 output = x * attention_weights 395 396 return output, attention_weights 397 398 @partial(jit, static_argnums=(0,)) 399 def context_vector_attention( 400 self, x: jnp.ndarray, params: Dict, mask: Optional[jnp.ndarray] = None 401 ) -> Tuple[jnp.ndarray, jnp.ndarray]: 402 """ 403 Context Vector Attention 404 Uses a learnable global context vector as the query. 405 406 Args: 407 x: (batch_size, seq_len, input_dim) 408 params: Parameter dictionary 409 mask: Optional mask (batch_size, seq_len) 410 411 Returns: 412 context: Global context representation (batch_size, hidden_dim) 413 attention_weights: Attention weights (batch_size, seq_len) 414 """ 415 batch_size = x.shape[0] 416 417 # Expand context vector for batch 418 context_vec = jnp.broadcast_to( 419 params["context_vector"], (batch_size, 1, self.hidden_dim) 420 ) 421 422 # Project context vector and input 423 Q = ( 424 jnp.dot(context_vec, params["context_query_w"]) 425 + params["context_query_b"] 426 ) 427 K = jnp.dot(x, params["context_key_w"]) + params["context_key_b"] 428 V = jnp.dot(x, params["context_value_w"]) + params["context_value_b"] 429 430 # Compute attention scores 431 scores = jnp.matmul(Q, jnp.swapaxes(K, -2, -1)) / jnp.sqrt( 432 self.hidden_dim 433 ) 434 scores = jnp.squeeze(scores, axis=1) 435 436 # Apply mask if provided 437 if mask is not None: 438 scores = jnp.where(mask == 0, -1e9, scores) 439 440 # Compute attention weights 441 attention_weights = jax.nn.softmax(scores, axis=-1) 442 443 # Apply attention to values 444 context = jnp.matmul(jnp.expand_dims(attention_weights, axis=1), V) 445 context = jnp.squeeze(context, axis=1) 446 447 return context, attention_weights 448 449 @partial(jit, static_argnums=(0,)) 450 def cross_attention( 451 self, 452 query: jnp.ndarray, 453 key_value: jnp.ndarray, 454 params: Dict, 455 mask: Optional[jnp.ndarray] = None, 456 ) -> Tuple[jnp.ndarray, jnp.ndarray]: 457 """Cross-Attention between two different sequences""" 458 return self.scaled_dot_product_attention( 459 query, key_value, key_value, params, mask 460 ) 461 462 def __call__( 463 self, 464 x: jnp.ndarray, 465 attention_type: str = "scaled_dot_product", 466 query: Optional[jnp.ndarray] = None, 467 key_value: Optional[jnp.ndarray] = None, 468 mask: Optional[jnp.ndarray] = None, 469 training: bool = False, 470 ) -> Tuple[jnp.ndarray, jnp.ndarray]: 471 """ 472 Forward pass with specified attention mechanism 473 474 Args: 475 x: Input tensor 476 attention_type: Type of attention to use 477 query: Optional query for cross-attention 478 key_value: Optional key-value for cross-attention 479 mask: Optional mask 480 training: Whether in training mode 481 482 Returns: 483 output: Attention output 484 attention_weights: Attention weights 485 """ 486 if attention_type == "scaled_dot_product": 487 return self.scaled_dot_product_attention( 488 x, x, x, self.params, mask, training 489 ) 490 elif attention_type == "additive": 491 return self.additive_attention( 492 x[:, -1:, :], x, x, self.params, mask 493 ) 494 elif attention_type == "multi_head": 495 return self.multi_head_attention(x, x, x, self.params, mask) 496 elif attention_type == "self": 497 return self.self_attention(x, self.params, mask) 498 elif attention_type == "temporal": 499 return self.temporal_attention(x, self.params, mask) 500 elif attention_type == "feature": 501 return self.feature_attention_tabular(x, self.params) 502 elif attention_type == "cross": 503 if query is None or key_value is None: 504 raise ValueError( 505 "Cross-attention requires both query and key_value" 506 ) 507 return self.cross_attention(query, key_value, self.params, mask) 508 elif attention_type == "context_vector": 509 return self.context_vector_attention(x, self.params, mask) 510 else: 511 raise ValueError(f"Unknown attention type: {attention_type}")
A comprehensive class implementing various attention mechanisms for both univariate time series and tabular data using JAX.
Supported attention types:
- Scaled Dot-Product Attention
- Additive (Bahdanau) Attention
- Multi-Head Attention
- Self-Attention
- Temporal Attention (for sequences)
- Feature Attention (for tabular data)
- Cross-Attention
- Context Vector Attention
48class Base(BaseEstimator): 49 """Base model from which all the other classes inherit. 50 51 This class contains the most important data preprocessing/feature engineering methods. 52 53 Parameters: 54 55 n_hidden_features: int 56 number of nodes in the hidden layer 57 58 activation_name: str 59 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 60 61 a: float 62 hyperparameter for 'prelu' or 'elu' activation function 63 64 nodes_sim: str 65 type of simulation for hidden layer nodes: 'sobol', 'hammersley', 'halton', 66 'uniform' 67 68 bias: boolean 69 indicates if the hidden layer contains a bias term (True) or 70 not (False) 71 72 dropout: float 73 regularization parameter; (random) percentage of nodes dropped out 74 of the training 75 76 direct_link: boolean 77 indicates if the original features are included (True) in model's 78 fitting or not (False) 79 80 n_clusters: int 81 number of clusters for type_clust='kmeans' or type_clust='gmm' 82 clustering (could be 0: no clustering) 83 84 cluster_encode: bool 85 defines how the variable containing clusters is treated (default is one-hot); 86 if `False`, then labels are used, without one-hot encoding 87 88 type_clust: str 89 type of clustering method: currently k-means ('kmeans') or Gaussian 90 Mixture Model ('gmm') 91 92 type_scaling: a tuple of 3 strings 93 scaling methods for inputs, hidden layer, and clustering respectively 94 (and when relevant). 95 Currently available: standardization ('std') or MinMax scaling ('minmax') or robust scaling ('robust') or max absolute scaling ('maxabs') 96 97 col_sample: float 98 percentage of features randomly chosen for training 99 100 row_sample: float 101 percentage of rows chosen for training, by stratified bootstrapping 102 103 seed: int 104 reproducibility seed for nodes_sim=='uniform', clustering and dropout 105 106 backend: str 107 "cpu" or "gpu" or "tpu" 108 109 """ 110 111 # construct the object ----- 112 113 def __init__( 114 self, 115 n_hidden_features=5, 116 activation_name="relu", 117 a=0.01, 118 nodes_sim="sobol", 119 bias=True, 120 dropout=0, 121 direct_link=True, 122 n_clusters=2, 123 cluster_encode=True, 124 type_clust="kmeans", 125 type_scaling=("std", "std", "std"), 126 col_sample=1, 127 row_sample=1, 128 seed=123, 129 backend="cpu", 130 ): 131 # input checks ----- 132 133 sys_platform = platform.system() 134 135 if (sys_platform == "Windows") and (backend in ("gpu", "tpu")): 136 warnings.warn( 137 "No GPU/TPU computing on Windows yet, backend set to 'cpu'" 138 ) 139 backend = "cpu" 140 141 assert activation_name in ( 142 "relu", 143 "tanh", 144 "sigmoid", 145 "prelu", 146 "elu", 147 ), "'activation_name' must be in ('relu', 'tanh', 'sigmoid','prelu', 'elu')" 148 149 assert nodes_sim in ( 150 "sobol", 151 "hammersley", 152 "uniform", 153 "halton", 154 ), "'nodes_sim' must be in ('sobol', 'hammersley', 'uniform', 'halton')" 155 156 assert type_clust in ( 157 "kmeans", 158 "gmm", 159 ), "'type_clust' must be in ('kmeans', 'gmm')" 160 161 assert (len(type_scaling) == 3) & all( 162 type_scaling[i] in ("minmax", "std", "robust", "maxabs") 163 for i in range(len(type_scaling)) 164 ), "'type_scaling' must have length 3, and available scaling methods are 'minmax' scaling, standardization ('std'), robust scaling ('robust') and max absolute ('maxabs')" 165 166 assert (col_sample >= 0) & ( 167 col_sample <= 1 168 ), "'col_sample' must be comprised between 0 and 1 (both included)" 169 170 assert backend in ( 171 "cpu", 172 "gpu", 173 "tpu", 174 ), "must have 'backend' in ('cpu', 'gpu', 'tpu')" 175 176 self.n_hidden_features = n_hidden_features 177 self.activation_name = activation_name 178 self.a = a 179 self.nodes_sim = nodes_sim 180 self.bias = bias 181 self.seed = seed 182 self.backend = backend 183 self.dropout = dropout 184 self.direct_link = direct_link 185 self.cluster_encode = cluster_encode 186 self.type_clust = type_clust 187 self.type_scaling = type_scaling 188 self.col_sample = col_sample 189 self.row_sample = row_sample 190 self.n_clusters = n_clusters 191 if isinstance(self, RegressorMixin): 192 self.type_fit = "regression" 193 elif isinstance(self, ClassifierMixin): 194 self.type_fit = "classification" 195 self.subsampler_ = None 196 self.index_col_ = None 197 self.index_row_ = True 198 self.clustering_obj_ = None 199 self.clustering_scaler_ = None 200 self.nn_scaler_ = None 201 self.scaler_ = None 202 self.encoder_ = None 203 self.W_ = None 204 self.X_ = None 205 self.y_ = None 206 self.y_mean_ = None 207 self.beta_ = None 208 209 # activation function ----- 210 if sys_platform in ("Linux", "Darwin"): 211 activation_options = { 212 "relu": ac.relu if (self.backend == "cpu") else jnn.relu, 213 "tanh": np.tanh if (self.backend == "cpu") else jnp.tanh, 214 "sigmoid": ( 215 ac.sigmoid if (self.backend == "cpu") else jnn.sigmoid 216 ), 217 "prelu": partial(ac.prelu, a=a), 218 "elu": ( 219 partial(ac.elu, a=a) 220 if (self.backend == "cpu") 221 else partial(jnn.elu, a=a) 222 ), 223 } 224 else: # on Windows currently, no JAX 225 activation_options = { 226 "relu": ( 227 ac.relu if (self.backend == "cpu") else NotImplementedError 228 ), 229 "tanh": ( 230 np.tanh if (self.backend == "cpu") else NotImplementedError 231 ), 232 "sigmoid": ( 233 ac.sigmoid 234 if (self.backend == "cpu") 235 else NotImplementedError 236 ), 237 "prelu": partial(ac.prelu, a=a), 238 "elu": ( 239 partial(ac.elu, a=a) 240 if (self.backend == "cpu") 241 else NotImplementedError 242 ), 243 } 244 self.activation_func = activation_options[activation_name] 245 246 # "preprocessing" methods to be inherited ----- 247 248 def encode_clusters(self, X=None, predict=False, scaler=None, **kwargs): # 249 """Create new covariates with kmeans or GMM clustering 250 251 Parameters: 252 253 X: {array-like}, shape = [n_samples, n_features] 254 Training vectors, where n_samples is the number 255 of samples and n_features is the number of features. 256 257 predict: boolean 258 is False on training set and True on test set 259 260 scaler: {object} of class StandardScaler, MinMaxScaler, RobustScaler or MaxAbsScaler 261 if scaler has already been fitted on training data (online training), it can be passed here 262 263 **kwargs: 264 additional parameters to be passed to the 265 clustering method 266 267 Returns: 268 269 Clusters' matrix, one-hot encoded: {array-like} 270 271 """ 272 273 np.random.seed(self.seed) 274 275 if X is None: 276 X = self.X_ 277 278 if isinstance(X, pd.DataFrame): 279 X = copy.deepcopy(X.values.astype(float)) 280 281 if len(X.shape) == 1: 282 X = X.reshape(1, -1) 283 284 if predict is False: # encode training set 285 # scale input data before clustering 286 self.clustering_scaler_, scaled_X = mo.scale_covariates( 287 X, choice=self.type_scaling[2], scaler=self.clustering_scaler_ 288 ) 289 290 self.clustering_obj_, X_clustered = mo.cluster_covariates( 291 scaled_X, 292 self.n_clusters, 293 self.seed, 294 type_clust=self.type_clust, 295 **kwargs 296 ) 297 298 if self.cluster_encode: 299 return mo.one_hot_encode(X_clustered, self.n_clusters).astype( 300 np.float16 301 ) 302 303 return X_clustered.astype(np.float16) 304 305 # if predict == True, encode test set 306 X_clustered = self.clustering_obj_.predict( 307 self.clustering_scaler_.transform(X) 308 ) 309 310 if self.cluster_encode == True: 311 return mo.one_hot_encode(X_clustered, self.n_clusters).astype( 312 np.float16 313 ) 314 315 return X_clustered.astype(np.float16) 316 317 def create_layer(self, scaled_X, W=None): 318 """Create hidden layer. 319 320 Parameters: 321 322 scaled_X: {array-like}, shape = [n_samples, n_features] 323 Training vectors, where n_samples is the number 324 of samples and n_features is the number of features 325 326 W: {array-like}, shape = [n_features, hidden_features] 327 if provided, constructs the hidden layer with W; otherwise computed internally 328 329 Returns: 330 331 Hidden layer matrix: {array-like} 332 333 """ 334 335 n_features = scaled_X.shape[1] 336 337 # hash_sim = { 338 # "sobol": generate_sobol, 339 # "hammersley": generate_hammersley, 340 # "uniform": generate_uniform, 341 # "halton": generate_halton 342 # } 343 344 if self.bias is False: # no bias term in the hidden layer 345 if W is None: 346 if self.nodes_sim == "sobol": 347 self.W_ = generate_sobol( 348 n_dims=n_features, 349 n_points=self.n_hidden_features, 350 seed=self.seed, 351 ) 352 elif self.nodes_sim == "hammersley": 353 self.W_ = generate_hammersley( 354 n_dims=n_features, 355 n_points=self.n_hidden_features, 356 seed=self.seed, 357 ) 358 elif self.nodes_sim == "uniform": 359 self.W_ = generate_uniform( 360 n_dims=n_features, 361 n_points=self.n_hidden_features, 362 seed=self.seed, 363 ) 364 else: 365 self.W_ = generate_halton( 366 n_dims=n_features, 367 n_points=self.n_hidden_features, 368 seed=self.seed, 369 ) 370 371 assert ( 372 scaled_X.shape[1] == self.W_.shape[0] 373 ), "check dimensions of covariates X and matrix W" 374 375 return mo.dropout( 376 x=self.activation_func( 377 mo.safe_sparse_dot( 378 a=scaled_X, b=self.W_, backend=self.backend 379 ) 380 ), 381 drop_prob=self.dropout, 382 seed=self.seed, 383 ) 384 385 # W is not none 386 assert ( 387 scaled_X.shape[1] == W.shape[0] 388 ), "check dimensions of covariates X and matrix W" 389 390 # self.W_ = W 391 return mo.dropout( 392 x=self.activation_func( 393 mo.safe_sparse_dot(a=scaled_X, b=W, backend=self.backend) 394 ), 395 drop_prob=self.dropout, 396 seed=self.seed, 397 ) 398 399 # with bias term in the hidden layer 400 if W is None: 401 n_features_1 = n_features + 1 402 403 if self.nodes_sim == "sobol": 404 self.W_ = generate_sobol( 405 n_dims=n_features_1, 406 n_points=self.n_hidden_features, 407 seed=self.seed, 408 ) 409 elif self.nodes_sim == "hammersley": 410 self.W_ = generate_hammersley( 411 n_dims=n_features_1, 412 n_points=self.n_hidden_features, 413 seed=self.seed, 414 ) 415 elif self.nodes_sim == "uniform": 416 self.W_ = generate_uniform( 417 n_dims=n_features_1, 418 n_points=self.n_hidden_features, 419 seed=self.seed, 420 ) 421 else: 422 self.W_ = generate_halton( 423 n_dims=n_features_1, 424 n_points=self.n_hidden_features, 425 seed=self.seed, 426 ) 427 428 # self.W_ = hash_sim[self.nodes_sim]( 429 # n_dims=n_features_1, 430 # n_points=self.n_hidden_features, 431 # seed=self.seed, 432 # ) 433 434 return mo.dropout( 435 x=self.activation_func( 436 mo.safe_sparse_dot( 437 a=mo.cbind( 438 np.ones(scaled_X.shape[0]), 439 scaled_X, 440 backend=self.backend, 441 ), 442 b=self.W_, 443 backend=self.backend, 444 ) 445 ), 446 drop_prob=self.dropout, 447 seed=self.seed, 448 ) 449 450 # W is not None 451 # self.W_ = W 452 return mo.dropout( 453 x=self.activation_func( 454 mo.safe_sparse_dot( 455 a=mo.cbind( 456 np.ones(scaled_X.shape[0]), 457 scaled_X, 458 backend=self.backend, 459 ), 460 b=W, 461 backend=self.backend, 462 ) 463 ), 464 drop_prob=self.dropout, 465 seed=self.seed, 466 ) 467 468 def _jax_create_layer( 469 self, scaled_X: jnp.ndarray, W: Optional[jnp.ndarray] = None 470 ) -> jnp.ndarray: 471 """JAX-compatible version of create_layer that exactly matches the original functionality.""" 472 key = jax.random.PRNGKey(self.seed) 473 n_features = scaled_X.shape[1] 474 475 # Generate weights if not provided 476 if W is None: 477 if self.bias: 478 n_features_1 = n_features + 1 479 shape = (n_features_1, self.n_hidden_features) 480 else: 481 shape = (n_features, self.n_hidden_features) 482 483 # JAX-compatible weight generation matching original behavior 484 if self.nodes_sim == "sobol": 485 W_np = generate_sobol( 486 n_dims=n_features_1, 487 n_points=self.n_hidden_features, 488 seed=self.seed, 489 ) 490 W = jnp.asarray(W_np) 491 elif self.nodes_sim == "hammersley": 492 W_np = generate_hammersley( 493 n_dims=n_features_1, 494 n_points=self.n_hidden_features, 495 seed=self.seed, 496 ) 497 W = jnp.asarray(W_np) 498 elif self.nodes_sim == "uniform": 499 key, subkey = jax.random.split(key) 500 W = jax.random.uniform( 501 subkey, shape=shape, minval=-1.0, maxval=1.0 502 ) 503 else: # halton 504 W_np = generate_halton( 505 n_dims=n_features_1, 506 n_points=self.n_hidden_features, 507 seed=self.seed, 508 ) 509 W = jnp.asarray(W_np) 510 511 self.W_ = np.array(W) # Store as numpy for original methods 512 513 # Prepare input with bias if needed 514 if self.bias: 515 X_with_bias = jnp.hstack( 516 [jnp.ones((scaled_X.shape[0], 1)), scaled_X] 517 ) 518 print("X_with_bias shape:", X_with_bias.shape) 519 print("W shape:", W.shape) 520 linear_output = jnp.dot(X_with_bias, W) 521 else: 522 linear_output = jnp.dot(scaled_X, W) 523 524 # Apply activation function 525 if self.activation_name == "relu": 526 activated = jax.nn.relu(linear_output) 527 elif self.activation_name == "tanh": 528 activated = jnp.tanh(linear_output) 529 elif self.activation_name == "sigmoid": 530 activated = jax.nn.sigmoid(linear_output) 531 else: # leaky relu 532 activated = jax.nn.leaky_relu(linear_output, negative_slope=self.a) 533 534 # Apply dropout 535 if self.dropout > 0: 536 key, subkey = jax.random.split(key) 537 mask = jax.random.bernoulli( 538 subkey, p=1 - self.dropout, shape=activated.shape 539 ) 540 activated = jnp.where(mask, activated / (1 - self.dropout), 0) 541 542 return activated 543 544 def cook_training_set(self, y=None, X=None, W=None, **kwargs): 545 """Create new hidden features for training set, with hidden layer, center the response. 546 547 Parameters: 548 549 y: array-like, shape = [n_samples] 550 Target values 551 552 X: {array-like}, shape = [n_samples, n_features] 553 Training vectors, where n_samples is the number 554 of samples and n_features is the number of features 555 556 W: {array-like}, shape = [n_features, hidden_features] 557 if provided, constructs the hidden layer via W 558 559 Returns: 560 561 (centered response, direct link + hidden layer matrix): {tuple} 562 563 """ 564 565 # either X and y are stored or not 566 # assert ((y is None) & (X is None)) | ((y is not None) & (X is not None)) 567 if self.n_hidden_features > 0: # has a hidden layer 568 assert ( 569 len(self.type_scaling) >= 2 570 ), "must have len(self.type_scaling) >= 2 when self.n_hidden_features > 0" 571 572 if X is None: 573 if self.col_sample == 1: 574 input_X = self.X_ 575 else: 576 n_features = self.X_.shape[1] 577 new_n_features = int(np.ceil(n_features * self.col_sample)) 578 assert ( 579 new_n_features >= 1 580 ), "check class attribute 'col_sample' and the number of covariates provided for X" 581 np.random.seed(self.seed) 582 index_col = np.random.choice( 583 range(n_features), size=new_n_features, replace=False 584 ) 585 self.index_col_ = index_col 586 input_X = self.X_[:, self.index_col_] 587 588 else: # X is not None # keep X vs self.X_ 589 if isinstance(X, pd.DataFrame): 590 X = copy.deepcopy(X.values.astype(float)) 591 592 if self.col_sample == 1: 593 input_X = X 594 else: 595 n_features = X.shape[1] 596 new_n_features = int(np.ceil(n_features * self.col_sample)) 597 assert ( 598 new_n_features >= 1 599 ), "check class attribute 'col_sample' and the number of covariates provided for X" 600 np.random.seed(self.seed) 601 index_col = np.random.choice( 602 range(n_features), size=new_n_features, replace=False 603 ) 604 self.index_col_ = index_col 605 input_X = X[:, self.index_col_] 606 607 if self.n_clusters <= 0: 608 # data without any clustering: self.n_clusters is None ----- 609 610 if self.n_hidden_features > 0: # with hidden layer 611 self.nn_scaler_, scaled_X = mo.scale_covariates( 612 input_X, choice=self.type_scaling[1], scaler=self.nn_scaler_ 613 ) 614 Phi_X = ( 615 self.create_layer(scaled_X) 616 if W is None 617 else self.create_layer(scaled_X, W=W) 618 ) 619 Z = ( 620 mo.cbind(input_X, Phi_X, backend=self.backend) 621 if self.direct_link is True 622 else Phi_X 623 ) 624 self.scaler_, scaled_Z = mo.scale_covariates( 625 Z, choice=self.type_scaling[0], scaler=self.scaler_ 626 ) 627 else: # no hidden layer 628 Z = input_X 629 self.scaler_, scaled_Z = mo.scale_covariates( 630 Z, choice=self.type_scaling[0], scaler=self.scaler_ 631 ) 632 633 else: 634 # data with clustering: self.n_clusters is not None ----- # keep 635 636 augmented_X = mo.cbind( 637 input_X, 638 self.encode_clusters(input_X, **kwargs), 639 backend=self.backend, 640 ) 641 642 if self.n_hidden_features > 0: # with hidden layer 643 self.nn_scaler_, scaled_X = mo.scale_covariates( 644 augmented_X, 645 choice=self.type_scaling[1], 646 scaler=self.nn_scaler_, 647 ) 648 Phi_X = ( 649 self.create_layer(scaled_X) 650 if W is None 651 else self.create_layer(scaled_X, W=W) 652 ) 653 Z = ( 654 mo.cbind(augmented_X, Phi_X, backend=self.backend) 655 if self.direct_link is True 656 else Phi_X 657 ) 658 self.scaler_, scaled_Z = mo.scale_covariates( 659 Z, choice=self.type_scaling[0], scaler=self.scaler_ 660 ) 661 else: # no hidden layer 662 Z = augmented_X 663 self.scaler_, scaled_Z = mo.scale_covariates( 664 Z, choice=self.type_scaling[0], scaler=self.scaler_ 665 ) 666 667 # Returning model inputs ----- 668 if mx.is_factor(y) is False: # regression 669 # center y 670 if y is None: 671 self.y_mean_, centered_y = mo.center_response(self.y_) 672 else: 673 self.y_mean_, centered_y = mo.center_response(y) 674 675 # y is subsampled 676 if self.row_sample < 1: 677 n, p = Z.shape 678 679 self.subsampler_ = ( 680 SubSampler( 681 y=self.y_, row_sample=self.row_sample, seed=self.seed 682 ) 683 if y is None 684 else SubSampler( 685 y=y, row_sample=self.row_sample, seed=self.seed 686 ) 687 ) 688 689 self.index_row_ = self.subsampler_.subsample() 690 691 n_row_sample = len(self.index_row_) 692 # regression 693 return ( 694 centered_y[self.index_row_].reshape(n_row_sample), 695 self.scaler_.transform( 696 Z[self.index_row_, :].reshape(n_row_sample, p) 697 ), 698 ) 699 # y is not subsampled 700 # regression 701 return (centered_y, self.scaler_.transform(Z)) 702 703 # classification 704 # y is subsampled 705 if self.row_sample < 1: 706 n, p = Z.shape 707 708 self.subsampler_ = ( 709 SubSampler( 710 y=self.y_, row_sample=self.row_sample, seed=self.seed 711 ) 712 if y is None 713 else SubSampler(y=y, row_sample=self.row_sample, seed=self.seed) 714 ) 715 716 self.index_row_ = self.subsampler_.subsample() 717 718 n_row_sample = len(self.index_row_) 719 # classification 720 return ( 721 y[self.index_row_].reshape(n_row_sample), 722 self.scaler_.transform( 723 Z[self.index_row_, :].reshape(n_row_sample, p) 724 ), 725 ) 726 # y is not subsampled 727 # classification 728 return (y, self.scaler_.transform(Z)) 729 730 def cook_test_set(self, X, **kwargs): 731 """Transform data from test set, with hidden layer. 732 733 Parameters: 734 735 X: {array-like}, shape = [n_samples, n_features] 736 Training vectors, where n_samples is the number 737 of samples and n_features is the number of features 738 739 **kwargs: additional parameters to be passed to self.encode_cluster 740 741 Returns: 742 743 Transformed test set : {array-like} 744 """ 745 746 if isinstance(X, pd.DataFrame): 747 X = copy.deepcopy(X.values.astype(float)) 748 749 if len(X.shape) == 1: 750 X = X.reshape(1, -1) 751 752 if ( 753 self.n_clusters == 0 754 ): # data without clustering: self.n_clusters is None ----- 755 if self.n_hidden_features > 0: 756 # if hidden layer 757 scaled_X = ( 758 self.nn_scaler_.transform(X) 759 if (self.col_sample == 1) 760 else self.nn_scaler_.transform(X[:, self.index_col_]) 761 ) 762 Phi_X = self.create_layer(scaled_X, self.W_) 763 if self.direct_link: 764 return self.scaler_.transform( 765 mo.cbind(scaled_X, Phi_X, backend=self.backend) 766 ) 767 # when self.direct_link == False 768 return self.scaler_.transform(Phi_X) 769 # if no hidden layer # self.n_hidden_features == 0 770 return self.scaler_.transform(X) 771 772 # data with clustering: self.n_clusters > 0 ----- 773 if self.col_sample == 1: 774 predicted_clusters = self.encode_clusters( 775 X=X, predict=True, **kwargs 776 ) 777 augmented_X = mo.cbind(X, predicted_clusters, backend=self.backend) 778 else: 779 predicted_clusters = self.encode_clusters( 780 X=X[:, self.index_col_], predict=True, **kwargs 781 ) 782 augmented_X = mo.cbind( 783 X[:, self.index_col_], predicted_clusters, backend=self.backend 784 ) 785 786 if self.n_hidden_features > 0: # if hidden layer 787 scaled_X = self.nn_scaler_.transform(augmented_X) 788 Phi_X = self.create_layer(scaled_X, self.W_) 789 if self.direct_link: 790 return self.scaler_.transform( 791 mo.cbind(augmented_X, Phi_X, backend=self.backend) 792 ) 793 return self.scaler_.transform(Phi_X) 794 795 # if no hidden layer 796 return self.scaler_.transform(augmented_X) 797 798 def cook_training_set_jax(self, y=None, X=None, W=None, **kwargs): 799 """JAX-compatible version of cook_training_set that maintains side effects.""" 800 # Initialize random key 801 key = jax.random.PRNGKey(self.seed) 802 803 # Convert inputs to JAX arrays 804 X = jnp.asarray(X) if X is not None else jnp.asarray(self.X_) 805 y = jnp.asarray(y) if y is not None else jnp.asarray(self.y_) 806 807 # Handle column sampling 808 if self.col_sample < 1: 809 n_features = X.shape[1] 810 new_n_features = int(jnp.ceil(n_features * self.col_sample)) 811 assert new_n_features >= 1, "Invalid col_sample" 812 813 key, subkey = jax.random.split(key) 814 index_col = jax.random.choice( 815 subkey, n_features, shape=(new_n_features,), replace=False 816 ) 817 self.index_col_ = np.array( 818 index_col 819 ) # Store as numpy for original methods 820 input_X = X[:, index_col] 821 n_features = ( 822 new_n_features # Update n_features after column sampling 823 ) 824 else: 825 input_X = X 826 n_features = X.shape[1] 827 828 augmented_X = input_X 829 830 # JAX-compatible scaling 831 def jax_scale(data, mean=None, std=None): 832 if mean is None: 833 mean = jnp.mean(data, axis=0) 834 if std is None: 835 std = jnp.std(data, axis=0) 836 return (data - mean) / (std + 1e-10), mean, std 837 838 # Hidden layer processing 839 if self.n_hidden_features > 0: 840 # Initialize weights if not provided 841 if W is None: 842 shape = (n_features, self.n_hidden_features) 843 844 # JAX-compatible weight generation 845 if self.nodes_sim == "uniform": 846 key, subkey = jax.random.split(key) 847 W = jax.random.uniform( 848 subkey, shape=shape, minval=-1.0, maxval=1.0 849 ) * (1 / jnp.sqrt(n_features)) 850 else: 851 # For other sequences, use numpy generation then convert to JAX 852 if self.nodes_sim == "sobol": 853 W_np = generate_sobol( 854 n_dims=shape[0], 855 n_points=shape[1], 856 seed=self.seed, 857 ) 858 elif self.nodes_sim == "hammersley": 859 W_np = generate_hammersley( 860 n_dims=shape[0], 861 n_points=shape[1], 862 seed=self.seed, 863 ) 864 elif self.nodes_sim == "halton": 865 W_np = generate_halton( 866 n_dims=shape[0], 867 n_points=shape[1], 868 seed=self.seed, 869 ) 870 else: # default to uniform 871 key, subkey = jax.random.split(key) 872 W = jax.random.uniform( 873 subkey, shape=shape, minval=-1.0, maxval=1.0 874 ) * (1 / jnp.sqrt(n_features)) 875 876 if self.nodes_sim in ["sobol", "hammersley", "halton"]: 877 W = jnp.asarray(W_np) * (1 / jnp.sqrt(n_features)) 878 879 self.W_ = np.array(W) # Store as numpy for original methods 880 881 # Scale features 882 scaled_X, self.nn_mean_, self.nn_std_ = jax_scale( 883 augmented_X, 884 getattr(self, "nn_mean_", None), 885 getattr(self, "nn_std_", None), 886 ) 887 888 # Create hidden layer with proper bias handling 889 linear_output = jnp.dot(scaled_X, W) 890 891 # Apply activation 892 if self.activation_name == "relu": 893 Phi_X = jax.nn.relu(linear_output) 894 elif self.activation_name == "tanh": 895 Phi_X = jnp.tanh(linear_output) 896 elif self.activation_name == "sigmoid": 897 Phi_X = jax.nn.sigmoid(linear_output) 898 else: # leaky relu 899 Phi_X = jax.nn.leaky_relu(linear_output, negative_slope=self.a) 900 901 # Apply dropout 902 if self.dropout > 0: 903 key, subkey = jax.random.split(key) 904 mask = jax.random.bernoulli( 905 subkey, p=1 - self.dropout, shape=Phi_X.shape 906 ) 907 Phi_X = jnp.where(mask, Phi_X / (1 - self.dropout), 0) 908 909 Z = jnp.hstack([scaled_X, Phi_X]) if self.direct_link else Phi_X 910 else: 911 Z = augmented_X 912 913 # Final scaling 914 scaled_Z, self.scale_mean_, self.scale_std_ = jax_scale( 915 Z, 916 getattr(self, "scale_mean_", None), 917 getattr(self, "scale_std_", None), 918 ) 919 920 # Center response for regression 921 if not hasattr(mx, "is_factor") or not mx.is_factor( 922 y 923 ): # regression case 924 self.y_mean_ = float( 925 jnp.mean(y) 926 ) # Convert to Python float for compatibility 927 centered_y = y - self.y_mean_ 928 else: 929 centered_y = y 930 931 # Handle row sampling 932 if self.row_sample < 1: 933 key, subkey = jax.random.split(key) 934 n_samples = Z.shape[0] 935 n_row_sample = int(jnp.ceil(n_samples * self.row_sample)) 936 index_row = jax.random.choice( 937 subkey, n_samples, shape=(n_row_sample,), replace=False 938 ) 939 self.index_row_ = np.array( 940 index_row 941 ) # Store as numpy for original methods 942 return (centered_y[index_row], scaled_Z[index_row]) 943 944 return (centered_y, scaled_Z) 945 946 def cook_test_set_jax(self, X, **kwargs): 947 """JAX-compatible test set processing with matching dimension handling.""" 948 X = jnp.asarray(X) 949 950 if len(X.shape) == 1: 951 X = X.reshape(1, -1) 952 953 # Handle column sampling 954 input_X = ( 955 X if self.col_sample == 1 else X[:, jnp.asarray(self.index_col_)] 956 ) 957 958 augmented_X = input_X 959 960 # JAX-compatible scaling 961 scaled_X = (augmented_X - self.nn_mean_) / (self.nn_std_ + 1e-10) 962 963 # Process hidden layer if needed 964 if self.n_hidden_features > 0: 965 Phi_X = self._jax_create_layer(scaled_X, jnp.asarray(self.W_)) 966 Z = jnp.hstack([scaled_X, Phi_X]) if self.direct_link else Phi_X 967 else: 968 Z = augmented_X 969 970 # Final scaling 971 scaled_Z = (Z - self.scale_mean_) / (self.scale_std_ + 1e-10) 972 973 return scaled_Z 974 975 def _jax_create_layer(self, X, W): 976 """JAX-compatible hidden layer creation.""" 977 # print("X", X.shape) 978 # print("W", W.shape) 979 # print("self.W_", self.W_.shape) 980 linear_output = jnp.dot(X, W) 981 982 if self.activation_name == "relu": 983 return jax.nn.relu(linear_output) 984 elif self.activation_name == "tanh": 985 return jnp.tanh(linear_output) 986 elif self.activation_name == "sigmoid": 987 return jax.nn.sigmoid(linear_output) 988 else: # leaky relu 989 return jax.nn.leaky_relu(linear_output, negative_slope=self.a) 990 991 def cross_val_score( 992 self, 993 X, 994 y, 995 cv=5, 996 scoring="accuracy", 997 random_state=42, 998 n_jobs=-1, 999 epsilon=0.5, 1000 penalized=True, 1001 objective="abs", 1002 **kwargs 1003 ): 1004 """ 1005 Penalized Cross-validation score for a model. 1006 1007 Parameters: 1008 1009 X: {array-like}, shape = [n_samples, n_features] 1010 Training vectors, where n_samples is the number 1011 of samples and n_features is the number of features 1012 1013 y: array-like, shape = [n_samples] 1014 Target values 1015 1016 X_test: {array-like}, shape = [n_samples, n_features] 1017 Test vectors, where n_samples is the number 1018 of samples and n_features is the number of features 1019 1020 y_test: array-like, shape = [n_samples] 1021 Target values 1022 1023 cv: int 1024 Number of folds 1025 1026 scoring: str 1027 Scoring metric 1028 1029 random_state: int 1030 Random state 1031 1032 n_jobs: int 1033 Number of jobs to run in parallel 1034 1035 epsilon: float 1036 Penalty parameter 1037 1038 penalized: bool 1039 Whether to obtain penalized cross-validation score or not 1040 1041 objective: str 1042 'abs': Minimize the absolute difference between cross-validation score and validation score 1043 'relative': Minimize the relative difference between cross-validation score and validation score 1044 Returns: 1045 1046 A namedtuple with the following fields: 1047 - cv_score: float 1048 cross-validation score 1049 - val_score: float 1050 validation score 1051 - penalized_score: float 1052 penalized cross-validation score: cv_score / val_score + epsilon*(1/val_score + 1/cv_score) 1053 If higher scoring metric is better, minimize the function result. 1054 If lower scoring metric is better, maximize the function result. 1055 """ 1056 if scoring == "accuracy": 1057 scoring_func = accuracy_score 1058 elif scoring == "balanced_accuracy": 1059 scoring_func = balanced_accuracy_score 1060 elif scoring == "f1": 1061 scoring_func = f1_score 1062 elif scoring == "roc_auc": 1063 scoring_func = roc_auc_score 1064 elif scoring == "r2": 1065 scoring_func = r2_score 1066 elif scoring == "mse": 1067 scoring_func = mean_squared_error 1068 elif scoring == "mae": 1069 scoring_func = mean_absolute_error 1070 elif scoring == "mape": 1071 scoring_func = mean_absolute_percentage_error 1072 elif scoring == "rmse": 1073 1074 def scoring_func(y_true, y_pred): 1075 return np.sqrt(mean_squared_error(y_true, y_pred)) 1076 1077 X_train, X_val, y_train, y_val = train_test_split( 1078 X, y, test_size=0.2, random_state=random_state 1079 ) 1080 1081 res = cross_val_score( 1082 self, X_train, y_train, cv=cv, scoring=scoring, n_jobs=n_jobs 1083 ) # cross-validation error 1084 1085 if penalized == False: 1086 return res 1087 1088 DescribeResult = namedtuple( 1089 "DescribeResult", ["cv_score", "val_score", "penalized_score"] 1090 ) 1091 1092 numerator = res.mean() 1093 1094 # Evaluate on the (cv+1)-th fold 1095 preds_val = self.fit(X_train, y_train).predict(X_val) 1096 try: 1097 denominator = scoring(y_val, preds_val) # validation error 1098 except Exception as e: 1099 denominator = scoring_func(y_val, preds_val) 1100 1101 # if higher is better 1102 if objective == "abs": 1103 penalized_score = np.abs(numerator - denominator) + epsilon * ( 1104 1 / denominator + 1 / numerator 1105 ) 1106 elif objective == "relative": 1107 ratio = numerator / denominator 1108 penalized_score = np.abs(ratio - 1) + epsilon * ( 1109 1 / denominator + 1 / numerator 1110 ) 1111 1112 return DescribeResult( 1113 cv_score=numerator, 1114 val_score=denominator, 1115 penalized_score=penalized_score, 1116 )
Base model from which all the other classes inherit.
This class contains the most important data preprocessing/feature engineering methods.
Parameters:
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for hidden layer nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or
not (False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original features are included (True) in model's
fitting or not (False)
n_clusters: int
number of clusters for type_clust='kmeans' or type_clust='gmm'
clustering (could be 0: no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot);
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax') or robust scaling ('robust') or max absolute scaling ('maxabs')
col_sample: float
percentage of features randomly chosen for training
row_sample: float
percentage of rows chosen for training, by stratified bootstrapping
seed: int
reproducibility seed for nodes_sim=='uniform', clustering and dropout
backend: str
"cpu" or "gpu" or "tpu"
248 def encode_clusters(self, X=None, predict=False, scaler=None, **kwargs): # 249 """Create new covariates with kmeans or GMM clustering 250 251 Parameters: 252 253 X: {array-like}, shape = [n_samples, n_features] 254 Training vectors, where n_samples is the number 255 of samples and n_features is the number of features. 256 257 predict: boolean 258 is False on training set and True on test set 259 260 scaler: {object} of class StandardScaler, MinMaxScaler, RobustScaler or MaxAbsScaler 261 if scaler has already been fitted on training data (online training), it can be passed here 262 263 **kwargs: 264 additional parameters to be passed to the 265 clustering method 266 267 Returns: 268 269 Clusters' matrix, one-hot encoded: {array-like} 270 271 """ 272 273 np.random.seed(self.seed) 274 275 if X is None: 276 X = self.X_ 277 278 if isinstance(X, pd.DataFrame): 279 X = copy.deepcopy(X.values.astype(float)) 280 281 if len(X.shape) == 1: 282 X = X.reshape(1, -1) 283 284 if predict is False: # encode training set 285 # scale input data before clustering 286 self.clustering_scaler_, scaled_X = mo.scale_covariates( 287 X, choice=self.type_scaling[2], scaler=self.clustering_scaler_ 288 ) 289 290 self.clustering_obj_, X_clustered = mo.cluster_covariates( 291 scaled_X, 292 self.n_clusters, 293 self.seed, 294 type_clust=self.type_clust, 295 **kwargs 296 ) 297 298 if self.cluster_encode: 299 return mo.one_hot_encode(X_clustered, self.n_clusters).astype( 300 np.float16 301 ) 302 303 return X_clustered.astype(np.float16) 304 305 # if predict == True, encode test set 306 X_clustered = self.clustering_obj_.predict( 307 self.clustering_scaler_.transform(X) 308 ) 309 310 if self.cluster_encode == True: 311 return mo.one_hot_encode(X_clustered, self.n_clusters).astype( 312 np.float16 313 ) 314 315 return X_clustered.astype(np.float16)
Create new covariates with kmeans or GMM clustering
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
predict: boolean
is False on training set and True on test set
scaler: {object} of class StandardScaler, MinMaxScaler, RobustScaler or MaxAbsScaler
if scaler has already been fitted on training data (online training), it can be passed here
**kwargs:
additional parameters to be passed to the
clustering method
Returns:
Clusters' matrix, one-hot encoded: {array-like}
317 def create_layer(self, scaled_X, W=None): 318 """Create hidden layer. 319 320 Parameters: 321 322 scaled_X: {array-like}, shape = [n_samples, n_features] 323 Training vectors, where n_samples is the number 324 of samples and n_features is the number of features 325 326 W: {array-like}, shape = [n_features, hidden_features] 327 if provided, constructs the hidden layer with W; otherwise computed internally 328 329 Returns: 330 331 Hidden layer matrix: {array-like} 332 333 """ 334 335 n_features = scaled_X.shape[1] 336 337 # hash_sim = { 338 # "sobol": generate_sobol, 339 # "hammersley": generate_hammersley, 340 # "uniform": generate_uniform, 341 # "halton": generate_halton 342 # } 343 344 if self.bias is False: # no bias term in the hidden layer 345 if W is None: 346 if self.nodes_sim == "sobol": 347 self.W_ = generate_sobol( 348 n_dims=n_features, 349 n_points=self.n_hidden_features, 350 seed=self.seed, 351 ) 352 elif self.nodes_sim == "hammersley": 353 self.W_ = generate_hammersley( 354 n_dims=n_features, 355 n_points=self.n_hidden_features, 356 seed=self.seed, 357 ) 358 elif self.nodes_sim == "uniform": 359 self.W_ = generate_uniform( 360 n_dims=n_features, 361 n_points=self.n_hidden_features, 362 seed=self.seed, 363 ) 364 else: 365 self.W_ = generate_halton( 366 n_dims=n_features, 367 n_points=self.n_hidden_features, 368 seed=self.seed, 369 ) 370 371 assert ( 372 scaled_X.shape[1] == self.W_.shape[0] 373 ), "check dimensions of covariates X and matrix W" 374 375 return mo.dropout( 376 x=self.activation_func( 377 mo.safe_sparse_dot( 378 a=scaled_X, b=self.W_, backend=self.backend 379 ) 380 ), 381 drop_prob=self.dropout, 382 seed=self.seed, 383 ) 384 385 # W is not none 386 assert ( 387 scaled_X.shape[1] == W.shape[0] 388 ), "check dimensions of covariates X and matrix W" 389 390 # self.W_ = W 391 return mo.dropout( 392 x=self.activation_func( 393 mo.safe_sparse_dot(a=scaled_X, b=W, backend=self.backend) 394 ), 395 drop_prob=self.dropout, 396 seed=self.seed, 397 ) 398 399 # with bias term in the hidden layer 400 if W is None: 401 n_features_1 = n_features + 1 402 403 if self.nodes_sim == "sobol": 404 self.W_ = generate_sobol( 405 n_dims=n_features_1, 406 n_points=self.n_hidden_features, 407 seed=self.seed, 408 ) 409 elif self.nodes_sim == "hammersley": 410 self.W_ = generate_hammersley( 411 n_dims=n_features_1, 412 n_points=self.n_hidden_features, 413 seed=self.seed, 414 ) 415 elif self.nodes_sim == "uniform": 416 self.W_ = generate_uniform( 417 n_dims=n_features_1, 418 n_points=self.n_hidden_features, 419 seed=self.seed, 420 ) 421 else: 422 self.W_ = generate_halton( 423 n_dims=n_features_1, 424 n_points=self.n_hidden_features, 425 seed=self.seed, 426 ) 427 428 # self.W_ = hash_sim[self.nodes_sim]( 429 # n_dims=n_features_1, 430 # n_points=self.n_hidden_features, 431 # seed=self.seed, 432 # ) 433 434 return mo.dropout( 435 x=self.activation_func( 436 mo.safe_sparse_dot( 437 a=mo.cbind( 438 np.ones(scaled_X.shape[0]), 439 scaled_X, 440 backend=self.backend, 441 ), 442 b=self.W_, 443 backend=self.backend, 444 ) 445 ), 446 drop_prob=self.dropout, 447 seed=self.seed, 448 ) 449 450 # W is not None 451 # self.W_ = W 452 return mo.dropout( 453 x=self.activation_func( 454 mo.safe_sparse_dot( 455 a=mo.cbind( 456 np.ones(scaled_X.shape[0]), 457 scaled_X, 458 backend=self.backend, 459 ), 460 b=W, 461 backend=self.backend, 462 ) 463 ), 464 drop_prob=self.dropout, 465 seed=self.seed, 466 )
Create hidden layer.
Parameters:
scaled_X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features
W: {array-like}, shape = [n_features, hidden_features]
if provided, constructs the hidden layer with W; otherwise computed internally
Returns:
Hidden layer matrix: {array-like}
544 def cook_training_set(self, y=None, X=None, W=None, **kwargs): 545 """Create new hidden features for training set, with hidden layer, center the response. 546 547 Parameters: 548 549 y: array-like, shape = [n_samples] 550 Target values 551 552 X: {array-like}, shape = [n_samples, n_features] 553 Training vectors, where n_samples is the number 554 of samples and n_features is the number of features 555 556 W: {array-like}, shape = [n_features, hidden_features] 557 if provided, constructs the hidden layer via W 558 559 Returns: 560 561 (centered response, direct link + hidden layer matrix): {tuple} 562 563 """ 564 565 # either X and y are stored or not 566 # assert ((y is None) & (X is None)) | ((y is not None) & (X is not None)) 567 if self.n_hidden_features > 0: # has a hidden layer 568 assert ( 569 len(self.type_scaling) >= 2 570 ), "must have len(self.type_scaling) >= 2 when self.n_hidden_features > 0" 571 572 if X is None: 573 if self.col_sample == 1: 574 input_X = self.X_ 575 else: 576 n_features = self.X_.shape[1] 577 new_n_features = int(np.ceil(n_features * self.col_sample)) 578 assert ( 579 new_n_features >= 1 580 ), "check class attribute 'col_sample' and the number of covariates provided for X" 581 np.random.seed(self.seed) 582 index_col = np.random.choice( 583 range(n_features), size=new_n_features, replace=False 584 ) 585 self.index_col_ = index_col 586 input_X = self.X_[:, self.index_col_] 587 588 else: # X is not None # keep X vs self.X_ 589 if isinstance(X, pd.DataFrame): 590 X = copy.deepcopy(X.values.astype(float)) 591 592 if self.col_sample == 1: 593 input_X = X 594 else: 595 n_features = X.shape[1] 596 new_n_features = int(np.ceil(n_features * self.col_sample)) 597 assert ( 598 new_n_features >= 1 599 ), "check class attribute 'col_sample' and the number of covariates provided for X" 600 np.random.seed(self.seed) 601 index_col = np.random.choice( 602 range(n_features), size=new_n_features, replace=False 603 ) 604 self.index_col_ = index_col 605 input_X = X[:, self.index_col_] 606 607 if self.n_clusters <= 0: 608 # data without any clustering: self.n_clusters is None ----- 609 610 if self.n_hidden_features > 0: # with hidden layer 611 self.nn_scaler_, scaled_X = mo.scale_covariates( 612 input_X, choice=self.type_scaling[1], scaler=self.nn_scaler_ 613 ) 614 Phi_X = ( 615 self.create_layer(scaled_X) 616 if W is None 617 else self.create_layer(scaled_X, W=W) 618 ) 619 Z = ( 620 mo.cbind(input_X, Phi_X, backend=self.backend) 621 if self.direct_link is True 622 else Phi_X 623 ) 624 self.scaler_, scaled_Z = mo.scale_covariates( 625 Z, choice=self.type_scaling[0], scaler=self.scaler_ 626 ) 627 else: # no hidden layer 628 Z = input_X 629 self.scaler_, scaled_Z = mo.scale_covariates( 630 Z, choice=self.type_scaling[0], scaler=self.scaler_ 631 ) 632 633 else: 634 # data with clustering: self.n_clusters is not None ----- # keep 635 636 augmented_X = mo.cbind( 637 input_X, 638 self.encode_clusters(input_X, **kwargs), 639 backend=self.backend, 640 ) 641 642 if self.n_hidden_features > 0: # with hidden layer 643 self.nn_scaler_, scaled_X = mo.scale_covariates( 644 augmented_X, 645 choice=self.type_scaling[1], 646 scaler=self.nn_scaler_, 647 ) 648 Phi_X = ( 649 self.create_layer(scaled_X) 650 if W is None 651 else self.create_layer(scaled_X, W=W) 652 ) 653 Z = ( 654 mo.cbind(augmented_X, Phi_X, backend=self.backend) 655 if self.direct_link is True 656 else Phi_X 657 ) 658 self.scaler_, scaled_Z = mo.scale_covariates( 659 Z, choice=self.type_scaling[0], scaler=self.scaler_ 660 ) 661 else: # no hidden layer 662 Z = augmented_X 663 self.scaler_, scaled_Z = mo.scale_covariates( 664 Z, choice=self.type_scaling[0], scaler=self.scaler_ 665 ) 666 667 # Returning model inputs ----- 668 if mx.is_factor(y) is False: # regression 669 # center y 670 if y is None: 671 self.y_mean_, centered_y = mo.center_response(self.y_) 672 else: 673 self.y_mean_, centered_y = mo.center_response(y) 674 675 # y is subsampled 676 if self.row_sample < 1: 677 n, p = Z.shape 678 679 self.subsampler_ = ( 680 SubSampler( 681 y=self.y_, row_sample=self.row_sample, seed=self.seed 682 ) 683 if y is None 684 else SubSampler( 685 y=y, row_sample=self.row_sample, seed=self.seed 686 ) 687 ) 688 689 self.index_row_ = self.subsampler_.subsample() 690 691 n_row_sample = len(self.index_row_) 692 # regression 693 return ( 694 centered_y[self.index_row_].reshape(n_row_sample), 695 self.scaler_.transform( 696 Z[self.index_row_, :].reshape(n_row_sample, p) 697 ), 698 ) 699 # y is not subsampled 700 # regression 701 return (centered_y, self.scaler_.transform(Z)) 702 703 # classification 704 # y is subsampled 705 if self.row_sample < 1: 706 n, p = Z.shape 707 708 self.subsampler_ = ( 709 SubSampler( 710 y=self.y_, row_sample=self.row_sample, seed=self.seed 711 ) 712 if y is None 713 else SubSampler(y=y, row_sample=self.row_sample, seed=self.seed) 714 ) 715 716 self.index_row_ = self.subsampler_.subsample() 717 718 n_row_sample = len(self.index_row_) 719 # classification 720 return ( 721 y[self.index_row_].reshape(n_row_sample), 722 self.scaler_.transform( 723 Z[self.index_row_, :].reshape(n_row_sample, p) 724 ), 725 ) 726 # y is not subsampled 727 # classification 728 return (y, self.scaler_.transform(Z))
Create new hidden features for training set, with hidden layer, center the response.
Parameters:
y: array-like, shape = [n_samples]
Target values
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features
W: {array-like}, shape = [n_features, hidden_features]
if provided, constructs the hidden layer via W
Returns:
(centered response, direct link + hidden layer matrix): {tuple}
730 def cook_test_set(self, X, **kwargs): 731 """Transform data from test set, with hidden layer. 732 733 Parameters: 734 735 X: {array-like}, shape = [n_samples, n_features] 736 Training vectors, where n_samples is the number 737 of samples and n_features is the number of features 738 739 **kwargs: additional parameters to be passed to self.encode_cluster 740 741 Returns: 742 743 Transformed test set : {array-like} 744 """ 745 746 if isinstance(X, pd.DataFrame): 747 X = copy.deepcopy(X.values.astype(float)) 748 749 if len(X.shape) == 1: 750 X = X.reshape(1, -1) 751 752 if ( 753 self.n_clusters == 0 754 ): # data without clustering: self.n_clusters is None ----- 755 if self.n_hidden_features > 0: 756 # if hidden layer 757 scaled_X = ( 758 self.nn_scaler_.transform(X) 759 if (self.col_sample == 1) 760 else self.nn_scaler_.transform(X[:, self.index_col_]) 761 ) 762 Phi_X = self.create_layer(scaled_X, self.W_) 763 if self.direct_link: 764 return self.scaler_.transform( 765 mo.cbind(scaled_X, Phi_X, backend=self.backend) 766 ) 767 # when self.direct_link == False 768 return self.scaler_.transform(Phi_X) 769 # if no hidden layer # self.n_hidden_features == 0 770 return self.scaler_.transform(X) 771 772 # data with clustering: self.n_clusters > 0 ----- 773 if self.col_sample == 1: 774 predicted_clusters = self.encode_clusters( 775 X=X, predict=True, **kwargs 776 ) 777 augmented_X = mo.cbind(X, predicted_clusters, backend=self.backend) 778 else: 779 predicted_clusters = self.encode_clusters( 780 X=X[:, self.index_col_], predict=True, **kwargs 781 ) 782 augmented_X = mo.cbind( 783 X[:, self.index_col_], predicted_clusters, backend=self.backend 784 ) 785 786 if self.n_hidden_features > 0: # if hidden layer 787 scaled_X = self.nn_scaler_.transform(augmented_X) 788 Phi_X = self.create_layer(scaled_X, self.W_) 789 if self.direct_link: 790 return self.scaler_.transform( 791 mo.cbind(augmented_X, Phi_X, backend=self.backend) 792 ) 793 return self.scaler_.transform(Phi_X) 794 795 # if no hidden layer 796 return self.scaler_.transform(augmented_X)
Transform data from test set, with hidden layer.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features
**kwargs: additional parameters to be passed to self.encode_cluster
Returns:
Transformed test set : {array-like}
15class BaseRegressor(Base, RegressorMixin): 16 """Random Vector Functional Link Network regression without shrinkage 17 18 Parameters: 19 20 n_hidden_features: int 21 number of nodes in the hidden layer 22 23 activation_name: str 24 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 25 26 a: float 27 hyperparameter for 'prelu' or 'elu' activation function 28 29 nodes_sim: str 30 type of simulation for hidden layer nodes: 'sobol', 'hammersley', 'halton', 31 'uniform' 32 33 bias: boolean 34 indicates if the hidden layer contains a bias term (True) or 35 not (False) 36 37 dropout: float 38 regularization parameter; (random) percentage of nodes dropped out 39 of the training 40 41 direct_link: boolean 42 indicates if the original features are included (True) in model's 43 fitting or not (False) 44 45 n_clusters: int 46 number of clusters for type_clust='kmeans' or type_clust='gmm' 47 clustering (could be 0: no clustering) 48 49 cluster_encode: bool 50 defines how the variable containing clusters is treated (default is one-hot); 51 if `False`, then labels are used, without one-hot encoding 52 53 type_clust: str 54 type of clustering method: currently k-means ('kmeans') or Gaussian 55 Mixture Model ('gmm') 56 57 type_scaling: a tuple of 3 strings 58 scaling methods for inputs, hidden layer, and clustering respectively 59 (and when relevant). 60 Currently available: standardization ('std') or MinMax scaling ('minmax') 61 62 col_sample: float 63 percentage of features randomly chosen for training 64 65 row_sample: float 66 percentage of rows chosen for training, by stratified bootstrapping 67 68 seed: int 69 reproducibility seed for nodes_sim=='uniform', clustering and dropout 70 71 backend: str 72 "cpu" or "gpu" or "tpu" 73 74 Attributes: 75 76 beta_: vector 77 regression coefficients 78 79 GCV_: float 80 Generalized Cross-Validation error 81 82 """ 83 84 # construct the object ----- 85 86 def __init__( 87 self, 88 n_hidden_features=5, 89 activation_name="relu", 90 a=0.01, 91 nodes_sim="sobol", 92 bias=True, 93 dropout=0, 94 direct_link=True, 95 n_clusters=2, 96 cluster_encode=True, 97 type_clust="kmeans", 98 type_scaling=("std", "std", "std"), 99 col_sample=1, 100 row_sample=1, 101 seed=123, 102 backend="cpu", 103 ): 104 super().__init__( 105 n_hidden_features=n_hidden_features, 106 activation_name=activation_name, 107 a=a, 108 nodes_sim=nodes_sim, 109 bias=bias, 110 dropout=dropout, 111 direct_link=direct_link, 112 n_clusters=n_clusters, 113 cluster_encode=cluster_encode, 114 type_clust=type_clust, 115 type_scaling=type_scaling, 116 col_sample=col_sample, 117 row_sample=row_sample, 118 seed=seed, 119 backend=backend, 120 ) 121 122 def fit(self, X, y, **kwargs): 123 """Fit BaseRegressor to training data (X, y) 124 125 Parameters: 126 127 X: {array-like}, shape = [n_samples, n_features] 128 Training vectors, where n_samples is the number 129 of samples and n_features is the number of features 130 131 y: array-like, shape = [n_samples] 132 Target values 133 134 **kwargs: additional parameters to be passed to self.cook_training_set 135 136 Returns: 137 138 self: object 139 """ 140 141 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 142 143 fit_obj = lmf.beta_Sigma_hat( 144 X=scaled_Z, y=centered_y, backend=self.backend 145 ) 146 147 self.beta_ = fit_obj["beta_hat"] 148 149 self.GCV_ = fit_obj["GCV"] 150 151 return self 152 153 def predict(self, X, **kwargs): 154 """Predict test data X. 155 156 Parameters: 157 158 X: {array-like}, shape = [n_samples, n_features] 159 Training vectors, where n_samples is the number 160 of samples and n_features is the number of features 161 162 **kwargs: additional parameters to be passed to self.cook_test_set 163 164 Returns: 165 166 model predictions: {array-like} 167 """ 168 169 if len(X.shape) == 1: 170 n_features = X.shape[0] 171 new_X = mo.rbind( 172 X.reshape(1, n_features), 173 np.ones(n_features).reshape(1, n_features), 174 ) 175 176 return ( 177 self.y_mean_ 178 + mo.safe_sparse_dot( 179 a=self.cook_test_set(new_X, **kwargs), 180 b=self.beta_, 181 backend=self.backend, 182 ) 183 )[0] 184 185 return self.y_mean_ + mo.safe_sparse_dot( 186 a=self.cook_test_set(X, **kwargs), 187 b=self.beta_, 188 backend=self.backend, 189 )
Random Vector Functional Link Network regression without shrinkage
Parameters:
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for hidden layer nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or
not (False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original features are included (True) in model's
fitting or not (False)
n_clusters: int
number of clusters for type_clust='kmeans' or type_clust='gmm'
clustering (could be 0: no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot);
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
col_sample: float
percentage of features randomly chosen for training
row_sample: float
percentage of rows chosen for training, by stratified bootstrapping
seed: int
reproducibility seed for nodes_sim=='uniform', clustering and dropout
backend: str
"cpu" or "gpu" or "tpu"
Attributes:
beta_: vector
regression coefficients
GCV_: float
Generalized Cross-Validation error
122 def fit(self, X, y, **kwargs): 123 """Fit BaseRegressor to training data (X, y) 124 125 Parameters: 126 127 X: {array-like}, shape = [n_samples, n_features] 128 Training vectors, where n_samples is the number 129 of samples and n_features is the number of features 130 131 y: array-like, shape = [n_samples] 132 Target values 133 134 **kwargs: additional parameters to be passed to self.cook_training_set 135 136 Returns: 137 138 self: object 139 """ 140 141 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 142 143 fit_obj = lmf.beta_Sigma_hat( 144 X=scaled_Z, y=centered_y, backend=self.backend 145 ) 146 147 self.beta_ = fit_obj["beta_hat"] 148 149 self.GCV_ = fit_obj["GCV"] 150 151 return self
Fit BaseRegressor to training data (X, y)
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features
y: array-like, shape = [n_samples]
Target values
**kwargs: additional parameters to be passed to self.cook_training_set
Returns:
self: object
153 def predict(self, X, **kwargs): 154 """Predict test data X. 155 156 Parameters: 157 158 X: {array-like}, shape = [n_samples, n_features] 159 Training vectors, where n_samples is the number 160 of samples and n_features is the number of features 161 162 **kwargs: additional parameters to be passed to self.cook_test_set 163 164 Returns: 165 166 model predictions: {array-like} 167 """ 168 169 if len(X.shape) == 1: 170 n_features = X.shape[0] 171 new_X = mo.rbind( 172 X.reshape(1, n_features), 173 np.ones(n_features).reshape(1, n_features), 174 ) 175 176 return ( 177 self.y_mean_ 178 + mo.safe_sparse_dot( 179 a=self.cook_test_set(new_X, **kwargs), 180 b=self.beta_, 181 backend=self.backend, 182 ) 183 )[0] 184 185 return self.y_mean_ + mo.safe_sparse_dot( 186 a=self.cook_test_set(X, **kwargs), 187 b=self.beta_, 188 backend=self.backend, 189 )
Predict test data X.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features
**kwargs: additional parameters to be passed to self.cook_test_set
Returns:
model predictions: {array-like}
15class BayesianRVFLRegressor(Base, RegressorMixin): 16 """Bayesian Random Vector Functional Link Network regression with one prior 17 18 Parameters: 19 20 n_hidden_features: int 21 number of nodes in the hidden layer 22 23 activation_name: str 24 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 25 26 a: float 27 hyperparameter for 'prelu' or 'elu' activation function 28 29 nodes_sim: str 30 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 'uniform' 31 32 bias: boolean 33 indicates if the hidden layer contains a bias term (True) or not (False) 34 35 dropout: float 36 regularization parameter; (random) percentage of nodes dropped out 37 of the training 38 39 direct_link: boolean 40 indicates if the original features are included (True) in model''s fitting or not (False) 41 42 n_clusters: int 43 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering) 44 45 cluster_encode: bool 46 defines how the variable containing clusters is treated (default is one-hot) 47 if `False`, then labels are used, without one-hot encoding 48 49 type_clust: str 50 type of clustering method: currently k-means ('kmeans') or Gaussian Mixture Model ('gmm') 51 52 type_scaling: a tuple of 3 strings 53 scaling methods for inputs, hidden layer, and clustering respectively 54 (and when relevant). 55 Currently available: standardization ('std') or MinMax scaling ('minmax') 56 57 seed: int 58 reproducibility seed for nodes_sim=='uniform' 59 60 s: float 61 std. dev. of regression parameters in Bayesian Ridge Regression 62 63 sigma: float 64 std. dev. of residuals in Bayesian Ridge Regression 65 66 return_std: boolean 67 if True, uncertainty around predictions is evaluated 68 69 backend: str 70 "cpu" or "gpu" or "tpu" 71 72 Attributes: 73 74 beta_: array-like 75 regression''s coefficients 76 77 Sigma_: array-like 78 covariance of the distribution of fitted parameters 79 80 GCV_: float 81 Generalized cross-validation error 82 83 y_mean_: float 84 average response 85 86 Examples: 87 88 ```python 89 TBD 90 ``` 91 92 """ 93 94 # construct the object ----- 95 96 def __init__( 97 self, 98 n_hidden_features=5, 99 activation_name="relu", 100 a=0.01, 101 nodes_sim="sobol", 102 bias=True, 103 dropout=0, 104 direct_link=True, 105 n_clusters=2, 106 cluster_encode=True, 107 type_clust="kmeans", 108 type_scaling=("std", "std", "std"), 109 seed=123, 110 s=0.1, 111 sigma=0.05, 112 return_std=True, 113 backend="cpu", 114 ): 115 super().__init__( 116 n_hidden_features=n_hidden_features, 117 activation_name=activation_name, 118 a=a, 119 nodes_sim=nodes_sim, 120 bias=bias, 121 dropout=dropout, 122 direct_link=direct_link, 123 n_clusters=n_clusters, 124 cluster_encode=cluster_encode, 125 type_clust=type_clust, 126 type_scaling=type_scaling, 127 seed=seed, 128 backend=backend, 129 ) 130 self.s = s 131 self.sigma = sigma 132 self.beta_ = None 133 self.Sigma_ = None 134 self.GCV_ = None 135 self.return_std = return_std 136 137 def fit(self, X, y, **kwargs): 138 """Fit BayesianRVFLRegressor to training data (X, y). 139 140 Parameters: 141 142 X: {array-like}, shape = [n_samples, n_features] 143 Training vectors, where n_samples is the number 144 of samples and n_features is the number of features. 145 146 y: array-like, shape = [n_samples] 147 Target values. 148 149 **kwargs: additional parameters to be passed to 150 self.cook_training_set 151 152 Returns: 153 154 self: object 155 156 """ 157 158 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 159 160 fit_obj = lmf.beta_Sigma_hat_rvfl( 161 X=scaled_Z, 162 y=centered_y, 163 s=self.s, 164 sigma=self.sigma, 165 fit_intercept=False, 166 return_cov=self.return_std, 167 backend=self.backend, 168 ) 169 170 self.beta_ = fit_obj["beta_hat"] 171 172 if self.return_std == True: 173 self.Sigma_ = fit_obj["Sigma_hat"] 174 175 self.GCV_ = fit_obj["GCV"] 176 177 return self 178 179 def predict(self, X, return_std=False, **kwargs): 180 """Predict test data X. 181 182 Parameters: 183 184 X: {array-like}, shape = [n_samples, n_features] 185 Training vectors, where n_samples is the number 186 of samples and n_features is the number of features. 187 188 return_std: {boolean}, standard dev. is returned or not 189 190 **kwargs: additional parameters to be passed to 191 self.cook_test_set 192 193 Returns: 194 195 model predictions: {array-like} 196 197 """ 198 199 if len(X.shape) == 1: # one observation in the test set only 200 n_features = X.shape[0] 201 new_X = mo.rbind( 202 x=X.reshape(1, n_features), 203 y=np.ones(n_features).reshape(1, n_features), 204 backend=self.backend, 205 ) 206 207 self.return_std = return_std 208 209 if self.return_std == False: 210 if len(X.shape) == 1: 211 return ( 212 self.y_mean_ 213 + mo.safe_sparse_dot( 214 a=self.cook_test_set(new_X, **kwargs), 215 b=self.beta_, 216 backend=self.backend, 217 ) 218 )[0] 219 220 return self.y_mean_ + mo.safe_sparse_dot( 221 a=self.cook_test_set(X, **kwargs), 222 b=self.beta_, 223 backend=self.backend, 224 ) 225 226 else: # confidence interval required for preds? 227 if len(X.shape) == 1: 228 Z = self.cook_test_set(new_X, **kwargs) 229 230 pred_obj = lmf.beta_Sigma_hat_rvfl( 231 s=self.s, 232 sigma=self.sigma, 233 X_star=Z, 234 return_cov=True, 235 beta_hat_=self.beta_, 236 Sigma_hat_=self.Sigma_, 237 backend=self.backend, 238 ) 239 240 return ( 241 self.y_mean_ + pred_obj["preds"][0], 242 pred_obj["preds_std"][0], 243 ) 244 245 Z = self.cook_test_set(X, **kwargs) 246 247 pred_obj = lmf.beta_Sigma_hat_rvfl( 248 s=self.s, 249 sigma=self.sigma, 250 X_star=Z, 251 return_cov=True, 252 beta_hat_=self.beta_, 253 Sigma_hat_=self.Sigma_, 254 backend=self.backend, 255 ) 256 257 return (self.y_mean_ + pred_obj["preds"], pred_obj["preds_std"])
Bayesian Random Vector Functional Link Network regression with one prior
Parameters:
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not (False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original features are included (True) in model''s fitting or not (False)
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
seed: int
reproducibility seed for nodes_sim=='uniform'
s: float
std. dev. of regression parameters in Bayesian Ridge Regression
sigma: float
std. dev. of residuals in Bayesian Ridge Regression
return_std: boolean
if True, uncertainty around predictions is evaluated
backend: str
"cpu" or "gpu" or "tpu"
Attributes:
beta_: array-like
regression''s coefficients
Sigma_: array-like
covariance of the distribution of fitted parameters
GCV_: float
Generalized cross-validation error
y_mean_: float
average response
Examples:
TBD
137 def fit(self, X, y, **kwargs): 138 """Fit BayesianRVFLRegressor to training data (X, y). 139 140 Parameters: 141 142 X: {array-like}, shape = [n_samples, n_features] 143 Training vectors, where n_samples is the number 144 of samples and n_features is the number of features. 145 146 y: array-like, shape = [n_samples] 147 Target values. 148 149 **kwargs: additional parameters to be passed to 150 self.cook_training_set 151 152 Returns: 153 154 self: object 155 156 """ 157 158 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 159 160 fit_obj = lmf.beta_Sigma_hat_rvfl( 161 X=scaled_Z, 162 y=centered_y, 163 s=self.s, 164 sigma=self.sigma, 165 fit_intercept=False, 166 return_cov=self.return_std, 167 backend=self.backend, 168 ) 169 170 self.beta_ = fit_obj["beta_hat"] 171 172 if self.return_std == True: 173 self.Sigma_ = fit_obj["Sigma_hat"] 174 175 self.GCV_ = fit_obj["GCV"] 176 177 return self
Fit BayesianRVFLRegressor to training data (X, y).
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
**kwargs: additional parameters to be passed to
self.cook_training_set
Returns:
self: object
179 def predict(self, X, return_std=False, **kwargs): 180 """Predict test data X. 181 182 Parameters: 183 184 X: {array-like}, shape = [n_samples, n_features] 185 Training vectors, where n_samples is the number 186 of samples and n_features is the number of features. 187 188 return_std: {boolean}, standard dev. is returned or not 189 190 **kwargs: additional parameters to be passed to 191 self.cook_test_set 192 193 Returns: 194 195 model predictions: {array-like} 196 197 """ 198 199 if len(X.shape) == 1: # one observation in the test set only 200 n_features = X.shape[0] 201 new_X = mo.rbind( 202 x=X.reshape(1, n_features), 203 y=np.ones(n_features).reshape(1, n_features), 204 backend=self.backend, 205 ) 206 207 self.return_std = return_std 208 209 if self.return_std == False: 210 if len(X.shape) == 1: 211 return ( 212 self.y_mean_ 213 + mo.safe_sparse_dot( 214 a=self.cook_test_set(new_X, **kwargs), 215 b=self.beta_, 216 backend=self.backend, 217 ) 218 )[0] 219 220 return self.y_mean_ + mo.safe_sparse_dot( 221 a=self.cook_test_set(X, **kwargs), 222 b=self.beta_, 223 backend=self.backend, 224 ) 225 226 else: # confidence interval required for preds? 227 if len(X.shape) == 1: 228 Z = self.cook_test_set(new_X, **kwargs) 229 230 pred_obj = lmf.beta_Sigma_hat_rvfl( 231 s=self.s, 232 sigma=self.sigma, 233 X_star=Z, 234 return_cov=True, 235 beta_hat_=self.beta_, 236 Sigma_hat_=self.Sigma_, 237 backend=self.backend, 238 ) 239 240 return ( 241 self.y_mean_ + pred_obj["preds"][0], 242 pred_obj["preds_std"][0], 243 ) 244 245 Z = self.cook_test_set(X, **kwargs) 246 247 pred_obj = lmf.beta_Sigma_hat_rvfl( 248 s=self.s, 249 sigma=self.sigma, 250 X_star=Z, 251 return_cov=True, 252 beta_hat_=self.beta_, 253 Sigma_hat_=self.Sigma_, 254 backend=self.backend, 255 ) 256 257 return (self.y_mean_ + pred_obj["preds"], pred_obj["preds_std"])
Predict test data X.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
return_std: {boolean}, standard dev. is returned or not
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
15class BayesianRVFL2Regressor(Base, RegressorMixin): 16 """Bayesian Random Vector Functional Link Network regression with two priors 17 18 Parameters: 19 20 n_hidden_features: int 21 number of nodes in the hidden layer 22 23 activation_name: str 24 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 25 26 a: float 27 hyperparameter for 'prelu' or 'elu' activation function 28 29 nodes_sim: str 30 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 'uniform' 31 32 bias: boolean 33 indicates if the hidden layer contains a bias term (True) or not (False) 34 35 dropout: float 36 regularization parameter; (random) percentage of nodes dropped out 37 of the training 38 39 direct_link: boolean 40 indicates if the original features are included (True) in model''s fitting or not (False) 41 42 n_clusters: int 43 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering) 44 45 cluster_encode: bool 46 defines how the variable containing clusters is treated (default is one-hot) 47 if `False`, then labels are used, without one-hot encoding 48 49 type_clust: str 50 type of clustering method: currently k-means ('kmeans') or Gaussian Mixture Model ('gmm') 51 52 type_scaling: a tuple of 3 strings 53 scaling methods for inputs, hidden layer, and clustering respectively 54 (and when relevant). 55 Currently available: standardization ('std') or MinMax scaling ('minmax') 56 57 seed: int 58 reproducibility seed for nodes_sim=='uniform' 59 60 s1: float 61 std. dev. of init. regression parameters in Bayesian Ridge Regression 62 63 s2: float 64 std. dev. of augmented regression parameters in Bayesian Ridge Regression 65 66 sigma: float 67 std. dev. of residuals in Bayesian Ridge Regression 68 69 return_std: boolean 70 if True, uncertainty around predictions is evaluated 71 72 backend: str 73 "cpu" or "gpu" or "tpu" 74 75 Attributes: 76 77 beta_: array-like 78 regression''s coefficients 79 80 Sigma_: array-like 81 covariance of the distribution of fitted parameters 82 83 GCV_: float 84 Generalized cross-validation error 85 86 y_mean_: float 87 average response 88 89 Examples: 90 91 ```python 92 TBD 93 ``` 94 95 """ 96 97 # construct the object ----- 98 99 def __init__( 100 self, 101 n_hidden_features=5, 102 activation_name="relu", 103 a=0.01, 104 nodes_sim="sobol", 105 bias=True, 106 dropout=0, 107 direct_link=True, 108 n_clusters=0, 109 cluster_encode=True, 110 type_clust="kmeans", 111 type_scaling=("std", "std", "std"), 112 seed=123, 113 s1=0.1, 114 s2=0.1, 115 sigma=0.05, 116 return_std=True, 117 backend="cpu", 118 ): 119 super().__init__( 120 n_hidden_features=n_hidden_features, 121 activation_name=activation_name, 122 a=a, 123 nodes_sim=nodes_sim, 124 bias=bias, 125 dropout=dropout, 126 direct_link=direct_link, 127 n_clusters=n_clusters, 128 cluster_encode=cluster_encode, 129 type_clust=type_clust, 130 type_scaling=type_scaling, 131 seed=seed, 132 backend=backend, 133 ) 134 135 self.s1 = s1 136 self.s2 = s2 137 self.sigma = sigma 138 self.beta_ = None 139 self.Sigma_ = None 140 self.GCV_ = None 141 self.return_std = return_std 142 self.coef_ = None 143 144 def fit(self, X, y, **kwargs): 145 """Fit BayesianRVFL2Regressor to training data (X, y) 146 147 Parameters: 148 149 X: {array-like}, shape = [n_samples, n_features] 150 Training vectors, where n_samples is the number 151 of samples and n_features is the number of features 152 153 y: array-like, shape = [n_samples] 154 Target values 155 156 **kwargs: additional parameters to be passed to 157 self.cook_training_set 158 159 Returns: 160 161 self: object 162 163 """ 164 165 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 166 167 n, p = X.shape 168 q = self.n_hidden_features 169 170 if self.direct_link == True: 171 r = p + self.n_clusters 172 173 block11 = (self.s1**2) * np.eye(r) 174 block12 = np.zeros((r, q)) 175 block21 = np.zeros((q, r)) 176 block22 = (self.s2**2) * np.eye(q) 177 178 Sigma_prior = mo.rbind( 179 x=mo.cbind(x=block11, y=block12, backend=self.backend), 180 y=mo.cbind(x=block21, y=block22, backend=self.backend), 181 backend=self.backend, 182 ) 183 184 else: 185 Sigma_prior = (self.s2**2) * np.eye(q) 186 187 fit_obj = lmf.beta_Sigma_hat_rvfl2( 188 X=scaled_Z, 189 y=centered_y, 190 Sigma=Sigma_prior, 191 sigma=self.sigma, 192 fit_intercept=False, 193 return_cov=self.return_std, 194 backend=self.backend, 195 ) 196 197 self.beta_ = fit_obj["beta_hat"] 198 199 self.coef_ = self.beta_ 200 201 if self.return_std == True: 202 self.Sigma_ = fit_obj["Sigma_hat"] 203 204 self.GCV_ = fit_obj["GCV"] 205 206 return self 207 208 def predict(self, X, return_std=False, **kwargs): 209 """Predict test data X. 210 211 Parameters: 212 213 X: {array-like}, shape = [n_samples, n_features] 214 Training vectors, where n_samples is the number 215 of samples and n_features is the number of features. 216 217 return_std: {boolean}, standard dev. is returned or not 218 219 **kwargs: additional parameters to be passed to 220 self.cook_test_set 221 222 Returns: 223 224 model predictions: {array-like} 225 226 """ 227 228 if len(X.shape) == 1: # one observation in the test set only 229 n_features = X.shape[0] 230 new_X = mo.rbind( 231 x=X.reshape(1, n_features), 232 y=np.ones(n_features).reshape(1, n_features), 233 backend=self.backend, 234 ) 235 236 self.return_std = return_std 237 238 if self.return_std == False: 239 if len(X.shape) == 1: 240 return ( 241 self.y_mean_ 242 + mo.safe_sparse_dot( 243 self.cook_test_set(new_X, **kwargs), 244 self.beta_, 245 backend=self.backend, 246 ) 247 )[0] 248 249 return self.y_mean_ + mo.safe_sparse_dot( 250 self.cook_test_set(X, **kwargs), 251 self.beta_, 252 backend=self.backend, 253 ) 254 255 else: # confidence interval required for preds? 256 if len(X.shape) == 1: 257 Z = self.cook_test_set(new_X, **kwargs) 258 259 pred_obj = lmf.beta_Sigma_hat_rvfl2( 260 X_star=Z, 261 return_cov=self.return_std, 262 beta_hat_=self.beta_, 263 Sigma_hat_=self.Sigma_, 264 backend=self.backend, 265 ) 266 267 return ( 268 self.y_mean_ + pred_obj["preds"][0], 269 pred_obj["preds_std"][0], 270 ) 271 272 Z = self.cook_test_set(X, **kwargs) 273 274 pred_obj = lmf.beta_Sigma_hat_rvfl2( 275 X_star=Z, 276 return_cov=self.return_std, 277 beta_hat_=self.beta_, 278 Sigma_hat_=self.Sigma_, 279 backend=self.backend, 280 ) 281 282 return (self.y_mean_ + pred_obj["preds"], pred_obj["preds_std"])
Bayesian Random Vector Functional Link Network regression with two priors
Parameters:
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not (False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original features are included (True) in model''s fitting or not (False)
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
seed: int
reproducibility seed for nodes_sim=='uniform'
s1: float
std. dev. of init. regression parameters in Bayesian Ridge Regression
s2: float
std. dev. of augmented regression parameters in Bayesian Ridge Regression
sigma: float
std. dev. of residuals in Bayesian Ridge Regression
return_std: boolean
if True, uncertainty around predictions is evaluated
backend: str
"cpu" or "gpu" or "tpu"
Attributes:
beta_: array-like
regression''s coefficients
Sigma_: array-like
covariance of the distribution of fitted parameters
GCV_: float
Generalized cross-validation error
y_mean_: float
average response
Examples:
TBD
144 def fit(self, X, y, **kwargs): 145 """Fit BayesianRVFL2Regressor to training data (X, y) 146 147 Parameters: 148 149 X: {array-like}, shape = [n_samples, n_features] 150 Training vectors, where n_samples is the number 151 of samples and n_features is the number of features 152 153 y: array-like, shape = [n_samples] 154 Target values 155 156 **kwargs: additional parameters to be passed to 157 self.cook_training_set 158 159 Returns: 160 161 self: object 162 163 """ 164 165 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 166 167 n, p = X.shape 168 q = self.n_hidden_features 169 170 if self.direct_link == True: 171 r = p + self.n_clusters 172 173 block11 = (self.s1**2) * np.eye(r) 174 block12 = np.zeros((r, q)) 175 block21 = np.zeros((q, r)) 176 block22 = (self.s2**2) * np.eye(q) 177 178 Sigma_prior = mo.rbind( 179 x=mo.cbind(x=block11, y=block12, backend=self.backend), 180 y=mo.cbind(x=block21, y=block22, backend=self.backend), 181 backend=self.backend, 182 ) 183 184 else: 185 Sigma_prior = (self.s2**2) * np.eye(q) 186 187 fit_obj = lmf.beta_Sigma_hat_rvfl2( 188 X=scaled_Z, 189 y=centered_y, 190 Sigma=Sigma_prior, 191 sigma=self.sigma, 192 fit_intercept=False, 193 return_cov=self.return_std, 194 backend=self.backend, 195 ) 196 197 self.beta_ = fit_obj["beta_hat"] 198 199 self.coef_ = self.beta_ 200 201 if self.return_std == True: 202 self.Sigma_ = fit_obj["Sigma_hat"] 203 204 self.GCV_ = fit_obj["GCV"] 205 206 return self
Fit BayesianRVFL2Regressor to training data (X, y)
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features
y: array-like, shape = [n_samples]
Target values
**kwargs: additional parameters to be passed to
self.cook_training_set
Returns:
self: object
208 def predict(self, X, return_std=False, **kwargs): 209 """Predict test data X. 210 211 Parameters: 212 213 X: {array-like}, shape = [n_samples, n_features] 214 Training vectors, where n_samples is the number 215 of samples and n_features is the number of features. 216 217 return_std: {boolean}, standard dev. is returned or not 218 219 **kwargs: additional parameters to be passed to 220 self.cook_test_set 221 222 Returns: 223 224 model predictions: {array-like} 225 226 """ 227 228 if len(X.shape) == 1: # one observation in the test set only 229 n_features = X.shape[0] 230 new_X = mo.rbind( 231 x=X.reshape(1, n_features), 232 y=np.ones(n_features).reshape(1, n_features), 233 backend=self.backend, 234 ) 235 236 self.return_std = return_std 237 238 if self.return_std == False: 239 if len(X.shape) == 1: 240 return ( 241 self.y_mean_ 242 + mo.safe_sparse_dot( 243 self.cook_test_set(new_X, **kwargs), 244 self.beta_, 245 backend=self.backend, 246 ) 247 )[0] 248 249 return self.y_mean_ + mo.safe_sparse_dot( 250 self.cook_test_set(X, **kwargs), 251 self.beta_, 252 backend=self.backend, 253 ) 254 255 else: # confidence interval required for preds? 256 if len(X.shape) == 1: 257 Z = self.cook_test_set(new_X, **kwargs) 258 259 pred_obj = lmf.beta_Sigma_hat_rvfl2( 260 X_star=Z, 261 return_cov=self.return_std, 262 beta_hat_=self.beta_, 263 Sigma_hat_=self.Sigma_, 264 backend=self.backend, 265 ) 266 267 return ( 268 self.y_mean_ + pred_obj["preds"][0], 269 pred_obj["preds_std"][0], 270 ) 271 272 Z = self.cook_test_set(X, **kwargs) 273 274 pred_obj = lmf.beta_Sigma_hat_rvfl2( 275 X_star=Z, 276 return_cov=self.return_std, 277 beta_hat_=self.beta_, 278 Sigma_hat_=self.Sigma_, 279 backend=self.backend, 280 ) 281 282 return (self.y_mean_ + pred_obj["preds"], pred_obj["preds_std"])
Predict test data X.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
return_std: {boolean}, standard dev. is returned or not
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
42class ClassicalMTS(MTS): 43 """Time series with statistical models (statsmodels), mostly for benchmarks 44 45 Parameters: 46 47 model: type of model: str. 48 currently, 'VAR', 'VECM', 'ARIMA', 'ETS', 'Theta' 49 Default is None 50 51 obj: object 52 A time series model from statsmodels 53 54 Attributes: 55 56 df_: data frame 57 the input data frame, in case a data.frame is provided to `fit` 58 59 level_: int 60 level of confidence for prediction intervals (default is 95) 61 62 Examples: 63 See examples/classical_mts_timeseries.py 64 """ 65 66 # construct the object ----- 67 68 def __init__(self, model="VAR", obj=None): 69 if obj is not None: 70 self.model = None 71 self.obj = obj 72 else: 73 self.model = model 74 if self.model == "VAR": 75 self.obj = VAR 76 elif self.model == "VECM": 77 self.obj = VECM 78 elif self.model == "ARIMA": 79 self.obj = ARIMA 80 elif self.model == "ETS": 81 self.obj = ExponentialSmoothing 82 elif self.model == "Theta": 83 self.obj = ThetaModel 84 else: 85 raise ValueError("model not recognized") 86 self.n_series = None 87 self.replications = None 88 self.mean_ = None 89 self.upper_ = None 90 self.lower_ = None 91 self.output_dates_ = None 92 self.alpha_ = None 93 self.df_ = None 94 self.residuals_ = [] 95 self.sims_ = None 96 self.level_ = None 97 98 def fit(self, X, **kwargs): 99 """Fit ClassicalMTS model to training data X, with optional regressors xreg 100 101 Parameters: 102 103 X: {array-like}, shape = [n_samples, n_features] 104 Training time series, where n_samples is the number 105 of samples and n_features is the number of features; 106 X must be in increasing order (most recent observations last) 107 108 **kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity) 109 110 Returns: 111 112 self: object 113 """ 114 115 try: 116 self.n_series = X.shape[1] 117 except Exception: 118 self.n_series = 1 119 120 if (isinstance(X, pd.DataFrame) is False) and isinstance( 121 X, pd.Series 122 ) is False: # input data set is a numpy array 123 X = pd.DataFrame(X) 124 if self.n_series > 1: 125 self.series_names = [ 126 "series" + str(i) for i in range(X.shape[1]) 127 ] 128 else: 129 self.series_names = "series0" 130 131 else: # input data set is a DataFrame or Series with column names 132 X_index = None 133 if X.index is not None and len(X.shape) > 1: 134 X_index = X.index 135 X = copy.deepcopy(mo.convert_df_to_numeric(X)) 136 if X_index is not None: 137 try: 138 X.index = X_index 139 except Exception: 140 pass 141 if isinstance(X, pd.DataFrame): 142 self.series_names = X.columns.tolist() 143 else: 144 self.series_names = X.name 145 146 if isinstance(X, pd.DataFrame) or isinstance(X, pd.Series): 147 self.df_ = X 148 X = X.values 149 self.df_.columns = self.series_names 150 self.input_dates = ts.compute_input_dates(self.df_) 151 else: 152 self.df_ = pd.DataFrame(X, columns=self.series_names) 153 154 if self.model == "Theta": 155 try: 156 self.obj = self.obj(self.df_, **kwargs).fit() 157 except Exception as e: 158 self.obj = self.obj(self.df_.values, **kwargs).fit() 159 self.residuals_ = None 160 else: 161 self.obj = self.obj(X, **kwargs).fit() 162 try: 163 self.residuals_ = self.obj.resid 164 except Exception as e: # Theta 165 self.residuals_ = None 166 167 return self 168 169 def predict(self, h=5, level=95, **kwargs): 170 """Forecast all the time series, h steps ahead 171 172 Parameters: 173 174 h: {integer} 175 Forecasting horizon 176 177 **kwargs: additional parameters to be passed to 178 self.cook_test_set 179 180 Returns: 181 182 model predictions for horizon = h: {array-like} 183 184 """ 185 186 self.output_dates_, frequency = ts.compute_output_dates(self.df_, h) 187 self.level_ = level 188 self.lower_ = None # do not remove (/!\) 189 self.upper_ = None # do not remove (/!\) 190 self.sims_ = None # do not remove (/!\) 191 self.level_ = level 192 self.alpha_ = 100 - level 193 194 pi_multiplier = norm.ppf(1 - self.alpha_ / 200) 195 196 # Named tuple for forecast results 197 DescribeResult = namedtuple( 198 "DescribeResult", ("mean", "lower", "upper") 199 ) 200 201 if ( 202 self.obj is not None 203 ): # try all the special cases of the else section (there's probably a better way) 204 try: 205 ( 206 mean_forecast, 207 lower_bound, 208 upper_bound, 209 ) = self.obj.forecast_interval( 210 self.obj.endog, steps=h, alpha=self.alpha_ / 100, **kwargs 211 ) 212 213 except Exception as e: 214 try: 215 forecast_result = self.obj.predict(steps=h) 216 mean_forecast = forecast_result 217 ( 218 lower_bound, 219 upper_bound, 220 ) = self._compute_confidence_intervals( 221 forecast_result, alpha=self.alpha_ / 100, **kwargs 222 ) 223 224 except Exception as e: 225 try: 226 forecast_result = self.obj.get_forecast(steps=h) 227 mean_forecast = forecast_result.predicted_mean 228 lower_bound = forecast_result.conf_int()[:, 0] 229 upper_bound = forecast_result.conf_int()[:, 1] 230 231 except Exception as e: 232 try: 233 forecast_result = self.obj.forecast(steps=h) 234 residuals = self.obj.resid 235 std_errors = np.std(residuals) 236 mean_forecast = forecast_result 237 lower_bound = ( 238 forecast_result - pi_multiplier * std_errors 239 ) 240 upper_bound = ( 241 forecast_result + pi_multiplier * std_errors 242 ) 243 244 except Exception as e: 245 try: 246 mean_forecast = self.obj.forecast( 247 steps=h 248 ).values 249 forecast_result = self.obj.prediction_intervals( 250 steps=h, alpha=self.alpha_ / 100, **kwargs 251 ) 252 lower_bound = forecast_result["lower"].values 253 upper_bound = forecast_result["upper"].values 254 except Exception: 255 mean_forecast = self.obj.forecast(steps=h) 256 forecast_result = self.obj.prediction_intervals( 257 steps=h, alpha=self.alpha_ / 100, **kwargs 258 ) 259 lower_bound = forecast_result["lower"] 260 upper_bound = forecast_result["upper"] 261 262 else: 263 if self.model == "VAR": 264 ( 265 mean_forecast, 266 lower_bound, 267 upper_bound, 268 ) = self.obj.forecast_interval( 269 self.obj.endog, steps=h, alpha=self.alpha_ / 100, **kwargs 270 ) 271 272 elif self.model == "VECM": 273 forecast_result = self.obj.predict(steps=h) 274 mean_forecast = forecast_result 275 lower_bound, upper_bound = self._compute_confidence_intervals( 276 forecast_result, alpha=self.alpha_ / 100, **kwargs 277 ) 278 279 elif self.model == "ARIMA": 280 forecast_result = self.obj.get_forecast(steps=h) 281 mean_forecast = forecast_result.predicted_mean 282 lower_bound = forecast_result.conf_int()[:, 0] 283 upper_bound = forecast_result.conf_int()[:, 1] 284 285 elif self.model == "ETS": 286 forecast_result = self.obj.forecast(steps=h) 287 residuals = self.obj.resid 288 std_errors = np.std(residuals) 289 mean_forecast = forecast_result 290 lower_bound = forecast_result - pi_multiplier * std_errors 291 upper_bound = forecast_result + pi_multiplier * std_errors 292 293 elif self.model == "Theta": 294 try: 295 mean_forecast = self.obj.forecast(steps=h).values 296 forecast_result = self.obj.prediction_intervals( 297 steps=h, alpha=self.alpha_ / 100, **kwargs 298 ) 299 lower_bound = forecast_result["lower"].values 300 upper_bound = forecast_result["upper"].values 301 except Exception: 302 mean_forecast = self.obj.forecast(steps=h) 303 forecast_result = self.obj.prediction_intervals( 304 steps=h, alpha=self.alpha_ / 100, **kwargs 305 ) 306 lower_bound = forecast_result["lower"] 307 upper_bound = forecast_result["upper"] 308 309 else: 310 raise ValueError("model not recognized") 311 312 try: 313 self.mean_ = pd.DataFrame( 314 mean_forecast, 315 columns=self.series_names, 316 index=self.output_dates_, 317 ) 318 self.lower_ = pd.DataFrame( 319 lower_bound, columns=self.series_names, index=self.output_dates_ 320 ) 321 self.upper_ = pd.DataFrame( 322 upper_bound, columns=self.series_names, index=self.output_dates_ 323 ) 324 except Exception: 325 self.mean_ = pd.Series( 326 mean_forecast, name=self.series_names, index=self.output_dates_ 327 ) 328 self.lower_ = pd.Series( 329 lower_bound, name=self.series_names, index=self.output_dates_ 330 ) 331 self.upper_ = pd.Series( 332 upper_bound, name=self.series_names, index=self.output_dates_ 333 ) 334 335 return DescribeResult( 336 mean=self.mean_, lower=self.lower_, upper=self.upper_ 337 ) 338 339 def _compute_confidence_intervals(self, forecast_result, alpha): 340 """ 341 Compute confidence intervals for VECM forecasts. 342 Uses the covariance of residuals to approximate the confidence intervals. 343 """ 344 residuals = self.obj.resid 345 cov_matrix = np.cov(residuals.T) # Covariance matrix of residuals 346 std_errors = np.sqrt(np.diag(cov_matrix)) # Standard errors 347 348 z_value = norm.ppf(1 - alpha / 2) # Z-score for the given alpha level 349 lower_bound = forecast_result - z_value * std_errors 350 upper_bound = forecast_result + z_value * std_errors 351 352 return lower_bound, upper_bound 353 354 def score(self, X, training_index, testing_index, scoring=None, **kwargs): 355 """Train on training_index, score on testing_index.""" 356 357 assert ( 358 bool(set(training_index).intersection(set(testing_index))) == False 359 ), "Non-overlapping 'training_index' and 'testing_index' required" 360 361 # Dimensions 362 try: 363 # multivariate time series 364 n, p = X.shape 365 except: 366 # univariate time series 367 n = X.shape[0] 368 p = 1 369 370 # Training and testing sets 371 if p > 1: 372 X_train = X[training_index, :] 373 X_test = X[testing_index, :] 374 else: 375 X_train = X[training_index] 376 X_test = X[testing_index] 377 378 # Horizon 379 h = len(testing_index) 380 assert ( 381 len(training_index) + h 382 ) <= n, "Please check lengths of training and testing windows" 383 384 # Fit and predict 385 self.fit(X_train, **kwargs) 386 preds = self.predict(h=h, **kwargs) 387 388 if scoring is None: 389 scoring = "neg_root_mean_squared_error" 390 391 # check inputs 392 assert scoring in ( 393 "explained_variance", 394 "neg_mean_absolute_error", 395 "neg_mean_squared_error", 396 "neg_root_mean_squared_error", 397 "neg_mean_squared_log_error", 398 "neg_median_absolute_error", 399 "r2", 400 ), "'scoring' should be in ('explained_variance', 'neg_mean_absolute_error', \ 401 'neg_mean_squared_error', 'neg_root_mean_squared_error', 'neg_mean_squared_log_error', \ 402 'neg_median_absolute_error', 'r2')" 403 404 scoring_options = { 405 "explained_variance": skm2.explained_variance_score, 406 "neg_mean_absolute_error": skm2.mean_absolute_error, 407 "neg_mean_squared_error": lambda x, y: np.mean((x - y) ** 2), 408 "neg_root_mean_squared_error": lambda x, y: np.sqrt( 409 np.mean((x - y) ** 2) 410 ), 411 "neg_mean_squared_log_error": skm2.mean_squared_log_error, 412 "neg_median_absolute_error": skm2.median_absolute_error, 413 "r2": skm2.r2_score, 414 } 415 416 # if p > 1: 417 # return tuple( 418 # [ 419 # scoring_options[scoring]( 420 # X_test[:, i], preds[:, i]#, **kwargs 421 # ) 422 # for i in range(p) 423 # ] 424 # ) 425 # else: 426 return scoring_options[scoring](X_test, preds) 427 428 def plot(self, series=None, type_axis="dates", type_plot="pi"): 429 """Plot time series forecast 430 431 Parameters: 432 433 series: {integer} or {string} 434 series index or name 435 436 """ 437 438 assert all( 439 [ 440 self.mean_ is not None, 441 self.lower_ is not None, 442 self.upper_ is not None, 443 self.output_dates_ is not None, 444 ] 445 ), "model forecasting must be obtained first (with predict)" 446 447 if series is None: 448 assert ( 449 self.n_series == 1 450 ), "please specify series index or name (n_series > 1)" 451 series = 0 452 453 if isinstance(series, str): 454 assert ( 455 series in self.series_names 456 ), f"series {series} doesn't exist in the input dataset" 457 series_idx = self.df_.columns.get_loc(series) 458 else: 459 assert isinstance(series, int) and ( 460 0 <= series < self.n_series 461 ), f"check series index (< {self.n_series})" 462 series_idx = series 463 464 if isinstance(self.df_, pd.DataFrame): 465 y_all = list(self.df_.iloc[:, series_idx]) + list( 466 self.mean_.iloc[:, series_idx] 467 ) 468 y_test = list(self.mean_.iloc[:, series_idx]) 469 else: 470 y_all = list(self.df_.values) + list(self.mean_.values) 471 y_test = list(self.mean_.values) 472 n_points_all = len(y_all) 473 n_points_train = self.df_.shape[0] 474 475 if type_axis == "numeric": 476 x_all = [i for i in range(n_points_all)] 477 x_test = [i for i in range(n_points_train, n_points_all)] 478 479 if type_axis == "dates": # use dates 480 x_all = np.concatenate( 481 (self.input_dates.values, self.output_dates_.values), axis=None 482 ) 483 x_test = self.output_dates_.values 484 485 if type_plot == "pi": 486 fig, ax = plt.subplots() 487 ax.plot(x_all, y_all, "-") 488 ax.plot(x_test, y_test, "-", color="orange") 489 try: 490 ax.fill_between( 491 x_test, 492 self.lower_.iloc[:, series_idx], 493 self.upper_.iloc[:, series_idx], 494 alpha=0.2, 495 color="orange", 496 ) 497 except Exception: 498 ax.fill_between( 499 x_test, 500 self.lower_.values, 501 self.upper_.values, 502 alpha=0.2, 503 color="orange", 504 ) 505 if self.replications is None: 506 if self.n_series > 1: 507 plt.title( 508 f"prediction intervals for {series}", 509 loc="left", 510 fontsize=12, 511 fontweight=0, 512 color="black", 513 ) 514 else: 515 plt.title( 516 f"prediction intervals for input time series", 517 loc="left", 518 fontsize=12, 519 fontweight=0, 520 color="black", 521 ) 522 plt.show() 523 else: # self.replications is not None 524 if self.n_series > 1: 525 plt.title( 526 f"prediction intervals for {self.replications} simulations of {series}", 527 loc="left", 528 fontsize=12, 529 fontweight=0, 530 color="black", 531 ) 532 else: 533 plt.title( 534 f"prediction intervals for {self.replications} simulations of input time series", 535 loc="left", 536 fontsize=12, 537 fontweight=0, 538 color="black", 539 ) 540 plt.show() 541 542 if type_plot == "spaghetti": 543 palette = plt.get_cmap("Set1") 544 sims_ix = getsims(self.sims_, series_idx) 545 plt.plot(x_all, y_all, "-") 546 for col_ix in range( 547 sims_ix.shape[1] 548 ): # avoid this when there are thousands of simulations 549 plt.plot( 550 x_test, 551 sims_ix[:, col_ix], 552 "-", 553 color=palette(col_ix), 554 linewidth=1, 555 alpha=0.9, 556 ) 557 plt.plot(x_all, y_all, "-", color="black") 558 plt.plot(x_test, y_test, "-", color="blue") 559 # Add titles 560 if self.n_series > 1: 561 plt.title( 562 f"{self.replications} simulations of {series}", 563 loc="left", 564 fontsize=12, 565 fontweight=0, 566 color="black", 567 ) 568 else: 569 plt.title( 570 f"{self.replications} simulations of input time series", 571 loc="left", 572 fontsize=12, 573 fontweight=0, 574 color="black", 575 ) 576 plt.xlabel("Time") 577 plt.ylabel("Values") 578 # Show the graph 579 plt.show() 580 581 def cross_val_score( 582 self, 583 X, 584 scoring="root_mean_squared_error", 585 n_jobs=None, 586 verbose=0, 587 xreg=None, 588 initial_window=5, 589 horizon=3, 590 fixed_window=False, 591 show_progress=True, 592 level=95, 593 **kwargs, 594 ): 595 """Evaluate a score by time series cross-validation. 596 597 Parameters: 598 599 X: {array-like, sparse matrix} of shape (n_samples, n_features) 600 The data to fit. 601 602 scoring: str or a function 603 A str in ('root_mean_squared_error', 'mean_squared_error', 'mean_error', 604 'mean_absolute_error', 'mean_error', 'mean_percentage_error', 605 'mean_absolute_percentage_error', 'winkler_score', 'coverage') 606 Or a function defined as 'coverage' and 'winkler_score' in `utils.timeseries` 607 608 n_jobs: int, default=None 609 Number of jobs to run in parallel. 610 611 verbose: int, default=0 612 The verbosity level. 613 614 xreg: array-like, optional (default=None) 615 Additional (external) regressors to be passed to `fit` 616 xreg must be in 'increasing' order (most recent observations last) 617 618 initial_window: int 619 initial number of consecutive values in each training set sample 620 621 horizon: int 622 number of consecutive values in test set sample 623 624 fixed_window: boolean 625 if False, all training samples start at index 0, and the training 626 window's size is increasing. 627 if True, the training window's size is fixed, and the window is 628 rolling forward 629 630 show_progress: boolean 631 if True, a progress bar is printed 632 633 **kwargs: dict 634 additional parameters to be passed to `fit` and `predict` 635 636 Returns: 637 638 A tuple: descriptive statistics or errors and raw errors 639 640 """ 641 tscv = TimeSeriesSplit() 642 643 tscv_obj = tscv.split( 644 X, 645 initial_window=initial_window, 646 horizon=horizon, 647 fixed_window=fixed_window, 648 ) 649 650 if isinstance(scoring, str): 651 assert scoring in ( 652 "root_mean_squared_error", 653 "mean_squared_error", 654 "mean_error", 655 "mean_absolute_error", 656 "mean_percentage_error", 657 "mean_absolute_percentage_error", 658 "winkler_score", 659 "coverage", 660 ), "must have scoring in ('root_mean_squared_error', 'mean_squared_error', 'mean_error', 'mean_absolute_error', 'mean_error', 'mean_percentage_error', 'mean_absolute_percentage_error', 'winkler_score', 'coverage')" 661 662 def err_func(X_test, X_pred, scoring): 663 if (self.replications is not None) or ( 664 self.type_pi == "gaussian" 665 ): # probabilistic 666 if scoring == "winkler_score": 667 return winkler_score(X_pred, X_test, level=level) 668 elif scoring == "coverage": 669 return coverage(X_pred, X_test, level=level) 670 else: 671 return mean_errors( 672 pred=X_pred.mean, actual=X_test, scoring=scoring 673 ) 674 else: # not probabilistic 675 return mean_errors( 676 pred=X_pred, actual=X_test, scoring=scoring 677 ) 678 679 else: # isinstance(scoring, str) = False 680 err_func = scoring 681 682 errors = [] 683 684 train_indices = [] 685 686 test_indices = [] 687 688 for train_index, test_index in tscv_obj: 689 train_indices.append(train_index) 690 test_indices.append(test_index) 691 692 if show_progress is True: 693 iterator = tqdm( 694 zip(train_indices, test_indices), total=len(train_indices) 695 ) 696 else: 697 iterator = zip(train_indices, test_indices) 698 699 for train_index, test_index in iterator: 700 if verbose == 1: 701 print(f"TRAIN: {train_index}") 702 print(f"TEST: {test_index}") 703 704 if isinstance(X, pd.DataFrame): 705 self.fit(X.iloc[train_index, :], xreg=xreg, **kwargs) 706 X_test = X.iloc[test_index, :] 707 else: 708 self.fit(X[train_index, :], xreg=xreg, **kwargs) 709 X_test = X[test_index, :] 710 X_pred = self.predict(h=int(len(test_index)), level=level, **kwargs) 711 712 errors.append(err_func(X_test, X_pred, scoring)) 713 714 res = np.asarray(errors) 715 716 return res, describe(res)
Time series with statistical models (statsmodels), mostly for benchmarks
Parameters:
model: type of model: str.
currently, 'VAR', 'VECM', 'ARIMA', 'ETS', 'Theta'
Default is None
obj: object
A time series model from statsmodels
Attributes:
df_: data frame
the input data frame, in case a data.frame is provided to `fit`
level_: int
level of confidence for prediction intervals (default is 95)
Examples: See examples/classical_mts_timeseries.py
98 def fit(self, X, **kwargs): 99 """Fit ClassicalMTS model to training data X, with optional regressors xreg 100 101 Parameters: 102 103 X: {array-like}, shape = [n_samples, n_features] 104 Training time series, where n_samples is the number 105 of samples and n_features is the number of features; 106 X must be in increasing order (most recent observations last) 107 108 **kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity) 109 110 Returns: 111 112 self: object 113 """ 114 115 try: 116 self.n_series = X.shape[1] 117 except Exception: 118 self.n_series = 1 119 120 if (isinstance(X, pd.DataFrame) is False) and isinstance( 121 X, pd.Series 122 ) is False: # input data set is a numpy array 123 X = pd.DataFrame(X) 124 if self.n_series > 1: 125 self.series_names = [ 126 "series" + str(i) for i in range(X.shape[1]) 127 ] 128 else: 129 self.series_names = "series0" 130 131 else: # input data set is a DataFrame or Series with column names 132 X_index = None 133 if X.index is not None and len(X.shape) > 1: 134 X_index = X.index 135 X = copy.deepcopy(mo.convert_df_to_numeric(X)) 136 if X_index is not None: 137 try: 138 X.index = X_index 139 except Exception: 140 pass 141 if isinstance(X, pd.DataFrame): 142 self.series_names = X.columns.tolist() 143 else: 144 self.series_names = X.name 145 146 if isinstance(X, pd.DataFrame) or isinstance(X, pd.Series): 147 self.df_ = X 148 X = X.values 149 self.df_.columns = self.series_names 150 self.input_dates = ts.compute_input_dates(self.df_) 151 else: 152 self.df_ = pd.DataFrame(X, columns=self.series_names) 153 154 if self.model == "Theta": 155 try: 156 self.obj = self.obj(self.df_, **kwargs).fit() 157 except Exception as e: 158 self.obj = self.obj(self.df_.values, **kwargs).fit() 159 self.residuals_ = None 160 else: 161 self.obj = self.obj(X, **kwargs).fit() 162 try: 163 self.residuals_ = self.obj.resid 164 except Exception as e: # Theta 165 self.residuals_ = None 166 167 return self
Fit ClassicalMTS model to training data X, with optional regressors xreg
Parameters:
X: {array-like}, shape = [n_samples, n_features] Training time series, where n_samples is the number of samples and n_features is the number of features; X must be in increasing order (most recent observations last)
**kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity)
Returns:
self: object
169 def predict(self, h=5, level=95, **kwargs): 170 """Forecast all the time series, h steps ahead 171 172 Parameters: 173 174 h: {integer} 175 Forecasting horizon 176 177 **kwargs: additional parameters to be passed to 178 self.cook_test_set 179 180 Returns: 181 182 model predictions for horizon = h: {array-like} 183 184 """ 185 186 self.output_dates_, frequency = ts.compute_output_dates(self.df_, h) 187 self.level_ = level 188 self.lower_ = None # do not remove (/!\) 189 self.upper_ = None # do not remove (/!\) 190 self.sims_ = None # do not remove (/!\) 191 self.level_ = level 192 self.alpha_ = 100 - level 193 194 pi_multiplier = norm.ppf(1 - self.alpha_ / 200) 195 196 # Named tuple for forecast results 197 DescribeResult = namedtuple( 198 "DescribeResult", ("mean", "lower", "upper") 199 ) 200 201 if ( 202 self.obj is not None 203 ): # try all the special cases of the else section (there's probably a better way) 204 try: 205 ( 206 mean_forecast, 207 lower_bound, 208 upper_bound, 209 ) = self.obj.forecast_interval( 210 self.obj.endog, steps=h, alpha=self.alpha_ / 100, **kwargs 211 ) 212 213 except Exception as e: 214 try: 215 forecast_result = self.obj.predict(steps=h) 216 mean_forecast = forecast_result 217 ( 218 lower_bound, 219 upper_bound, 220 ) = self._compute_confidence_intervals( 221 forecast_result, alpha=self.alpha_ / 100, **kwargs 222 ) 223 224 except Exception as e: 225 try: 226 forecast_result = self.obj.get_forecast(steps=h) 227 mean_forecast = forecast_result.predicted_mean 228 lower_bound = forecast_result.conf_int()[:, 0] 229 upper_bound = forecast_result.conf_int()[:, 1] 230 231 except Exception as e: 232 try: 233 forecast_result = self.obj.forecast(steps=h) 234 residuals = self.obj.resid 235 std_errors = np.std(residuals) 236 mean_forecast = forecast_result 237 lower_bound = ( 238 forecast_result - pi_multiplier * std_errors 239 ) 240 upper_bound = ( 241 forecast_result + pi_multiplier * std_errors 242 ) 243 244 except Exception as e: 245 try: 246 mean_forecast = self.obj.forecast( 247 steps=h 248 ).values 249 forecast_result = self.obj.prediction_intervals( 250 steps=h, alpha=self.alpha_ / 100, **kwargs 251 ) 252 lower_bound = forecast_result["lower"].values 253 upper_bound = forecast_result["upper"].values 254 except Exception: 255 mean_forecast = self.obj.forecast(steps=h) 256 forecast_result = self.obj.prediction_intervals( 257 steps=h, alpha=self.alpha_ / 100, **kwargs 258 ) 259 lower_bound = forecast_result["lower"] 260 upper_bound = forecast_result["upper"] 261 262 else: 263 if self.model == "VAR": 264 ( 265 mean_forecast, 266 lower_bound, 267 upper_bound, 268 ) = self.obj.forecast_interval( 269 self.obj.endog, steps=h, alpha=self.alpha_ / 100, **kwargs 270 ) 271 272 elif self.model == "VECM": 273 forecast_result = self.obj.predict(steps=h) 274 mean_forecast = forecast_result 275 lower_bound, upper_bound = self._compute_confidence_intervals( 276 forecast_result, alpha=self.alpha_ / 100, **kwargs 277 ) 278 279 elif self.model == "ARIMA": 280 forecast_result = self.obj.get_forecast(steps=h) 281 mean_forecast = forecast_result.predicted_mean 282 lower_bound = forecast_result.conf_int()[:, 0] 283 upper_bound = forecast_result.conf_int()[:, 1] 284 285 elif self.model == "ETS": 286 forecast_result = self.obj.forecast(steps=h) 287 residuals = self.obj.resid 288 std_errors = np.std(residuals) 289 mean_forecast = forecast_result 290 lower_bound = forecast_result - pi_multiplier * std_errors 291 upper_bound = forecast_result + pi_multiplier * std_errors 292 293 elif self.model == "Theta": 294 try: 295 mean_forecast = self.obj.forecast(steps=h).values 296 forecast_result = self.obj.prediction_intervals( 297 steps=h, alpha=self.alpha_ / 100, **kwargs 298 ) 299 lower_bound = forecast_result["lower"].values 300 upper_bound = forecast_result["upper"].values 301 except Exception: 302 mean_forecast = self.obj.forecast(steps=h) 303 forecast_result = self.obj.prediction_intervals( 304 steps=h, alpha=self.alpha_ / 100, **kwargs 305 ) 306 lower_bound = forecast_result["lower"] 307 upper_bound = forecast_result["upper"] 308 309 else: 310 raise ValueError("model not recognized") 311 312 try: 313 self.mean_ = pd.DataFrame( 314 mean_forecast, 315 columns=self.series_names, 316 index=self.output_dates_, 317 ) 318 self.lower_ = pd.DataFrame( 319 lower_bound, columns=self.series_names, index=self.output_dates_ 320 ) 321 self.upper_ = pd.DataFrame( 322 upper_bound, columns=self.series_names, index=self.output_dates_ 323 ) 324 except Exception: 325 self.mean_ = pd.Series( 326 mean_forecast, name=self.series_names, index=self.output_dates_ 327 ) 328 self.lower_ = pd.Series( 329 lower_bound, name=self.series_names, index=self.output_dates_ 330 ) 331 self.upper_ = pd.Series( 332 upper_bound, name=self.series_names, index=self.output_dates_ 333 ) 334 335 return DescribeResult( 336 mean=self.mean_, lower=self.lower_, upper=self.upper_ 337 )
Forecast all the time series, h steps ahead
Parameters:
h: {integer} Forecasting horizon
**kwargs: additional parameters to be passed to self.cook_test_set
Returns:
model predictions for horizon = h: {array-like}
354 def score(self, X, training_index, testing_index, scoring=None, **kwargs): 355 """Train on training_index, score on testing_index.""" 356 357 assert ( 358 bool(set(training_index).intersection(set(testing_index))) == False 359 ), "Non-overlapping 'training_index' and 'testing_index' required" 360 361 # Dimensions 362 try: 363 # multivariate time series 364 n, p = X.shape 365 except: 366 # univariate time series 367 n = X.shape[0] 368 p = 1 369 370 # Training and testing sets 371 if p > 1: 372 X_train = X[training_index, :] 373 X_test = X[testing_index, :] 374 else: 375 X_train = X[training_index] 376 X_test = X[testing_index] 377 378 # Horizon 379 h = len(testing_index) 380 assert ( 381 len(training_index) + h 382 ) <= n, "Please check lengths of training and testing windows" 383 384 # Fit and predict 385 self.fit(X_train, **kwargs) 386 preds = self.predict(h=h, **kwargs) 387 388 if scoring is None: 389 scoring = "neg_root_mean_squared_error" 390 391 # check inputs 392 assert scoring in ( 393 "explained_variance", 394 "neg_mean_absolute_error", 395 "neg_mean_squared_error", 396 "neg_root_mean_squared_error", 397 "neg_mean_squared_log_error", 398 "neg_median_absolute_error", 399 "r2", 400 ), "'scoring' should be in ('explained_variance', 'neg_mean_absolute_error', \ 401 'neg_mean_squared_error', 'neg_root_mean_squared_error', 'neg_mean_squared_log_error', \ 402 'neg_median_absolute_error', 'r2')" 403 404 scoring_options = { 405 "explained_variance": skm2.explained_variance_score, 406 "neg_mean_absolute_error": skm2.mean_absolute_error, 407 "neg_mean_squared_error": lambda x, y: np.mean((x - y) ** 2), 408 "neg_root_mean_squared_error": lambda x, y: np.sqrt( 409 np.mean((x - y) ** 2) 410 ), 411 "neg_mean_squared_log_error": skm2.mean_squared_log_error, 412 "neg_median_absolute_error": skm2.median_absolute_error, 413 "r2": skm2.r2_score, 414 } 415 416 # if p > 1: 417 # return tuple( 418 # [ 419 # scoring_options[scoring]( 420 # X_test[:, i], preds[:, i]#, **kwargs 421 # ) 422 # for i in range(p) 423 # ] 424 # ) 425 # else: 426 return scoring_options[scoring](X_test, preds)
Train on training_index, score on testing_index.
16class CustomClassifier(Custom, ClassifierMixin): 17 """Custom Classification model 18 19 Attributes: 20 21 obj: object 22 any object containing a method fit (obj.fit()) and a method predict 23 (obj.predict()) 24 25 n_hidden_features: int 26 number of nodes in the hidden layer 27 28 activation_name: str 29 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 30 31 a: float 32 hyperparameter for 'prelu' or 'elu' activation function 33 34 nodes_sim: str 35 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 36 'uniform' 37 38 bias: boolean 39 indicates if the hidden layer contains a bias term (True) or not 40 (False) 41 42 dropout: float 43 regularization parameter; (random) percentage of nodes dropped out 44 of the training 45 46 direct_link: boolean 47 indicates if the original predictors are included (True) in model''s 48 fitting or not (False) 49 50 n_clusters: int 51 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: 52 no clustering) 53 54 cluster_encode: bool 55 defines how the variable containing clusters is treated (default is one-hot) 56 if `False`, then labels are used, without one-hot encoding 57 58 type_clust: str 59 type of clustering method: currently k-means ('kmeans') or Gaussian 60 Mixture Model ('gmm') 61 62 type_scaling: a tuple of 3 strings 63 scaling methods for inputs, hidden layer, and clustering respectively 64 (and when relevant). 65 Currently available: standardization ('std') or MinMax scaling ('minmax') 66 67 col_sample: float 68 percentage of covariates randomly chosen for training 69 70 row_sample: float 71 percentage of rows chosen for training, by stratified bootstrapping 72 73 cv_calibration: int, cross-validation generator, or iterable, default=2 74 Determines the cross-validation splitting strategy. Same as 75 `sklearn.calibration.CalibratedClassifierCV` 76 77 calibration_method: str 78 {‘sigmoid’, ‘isotonic’}, default=’sigmoid’ 79 The method to use for calibration. Same as 80 `sklearn.calibration.CalibratedClassifierCV` 81 82 seed: int 83 reproducibility seed for nodes_sim=='uniform' 84 85 backend: str 86 "cpu" or "gpu" or "tpu" 87 88 Examples: 89 90 Note: it's better to use the `DeepClassifier` or `LazyDeepClassifier` classes directly 91 92 ```python 93 import nnetsauce as ns 94 from sklearn.ensemble import RandomForestClassifier 95 from sklearn.model_selection import train_test_split 96 from sklearn.datasets import load_digits 97 from time import time 98 99 digits = load_digits() 100 X = digits.data 101 y = digits.target 102 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, 103 random_state=123) 104 105 # layer 1 (base layer) ---- 106 layer1_regr = RandomForestClassifier(n_estimators=10, random_state=123) 107 108 start = time() 109 110 layer1_regr.fit(X_train, y_train) 111 112 # Accuracy in layer 1 113 print(layer1_regr.score(X_test, y_test)) 114 115 # layer 2 using layer 1 ---- 116 layer2_regr = ns.CustomClassifier(obj = layer1_regr, n_hidden_features=5, 117 direct_link=True, bias=True, 118 nodes_sim='uniform', activation_name='relu', 119 n_clusters=2, seed=123) 120 layer2_regr.fit(X_train, y_train) 121 122 # Accuracy in layer 2 123 print(layer2_regr.score(X_test, y_test)) 124 125 # layer 3 using layer 2 ---- 126 layer3_regr = ns.CustomClassifier(obj = layer2_regr, n_hidden_features=10, 127 direct_link=True, bias=True, dropout=0.7, 128 nodes_sim='uniform', activation_name='relu', 129 n_clusters=2, seed=123) 130 layer3_regr.fit(X_train, y_train) 131 132 # Accuracy in layer 3 133 print(layer3_regr.score(X_test, y_test)) 134 135 print(f"Elapsed {time() - start}") 136 ``` 137 138 """ 139 140 # construct the object ----- 141 _estimator_type = "classifier" 142 143 def __init__( 144 self, 145 obj, 146 n_hidden_features=5, 147 activation_name="relu", 148 a=0.01, 149 nodes_sim="sobol", 150 bias=True, 151 dropout=0, 152 direct_link=True, 153 n_clusters=2, 154 cluster_encode=True, 155 type_clust="kmeans", 156 type_scaling=("std", "std", "std"), 157 col_sample=1, 158 row_sample=1, 159 cv_calibration=2, 160 calibration_method="sigmoid", 161 seed=123, 162 backend="cpu", 163 ): 164 super().__init__( 165 obj=obj, 166 n_hidden_features=n_hidden_features, 167 activation_name=activation_name, 168 a=a, 169 nodes_sim=nodes_sim, 170 bias=bias, 171 dropout=dropout, 172 direct_link=direct_link, 173 n_clusters=n_clusters, 174 cluster_encode=cluster_encode, 175 type_clust=type_clust, 176 type_scaling=type_scaling, 177 col_sample=col_sample, 178 row_sample=row_sample, 179 seed=seed, 180 backend=backend, 181 ) 182 self.coef_ = None 183 self.intercept_ = None 184 self.type_fit = "classification" 185 self.cv_calibration = cv_calibration 186 self.calibration_method = calibration_method 187 188 def __sklearn_clone__(self): 189 """Create a clone of the estimator. 190 191 This is required for scikit-learn's calibration system to work properly. 192 """ 193 # Create a new instance with the same parameters 194 clone = CustomClassifier( 195 obj=self.obj, 196 n_hidden_features=self.n_hidden_features, 197 activation_name=self.activation_name, 198 a=self.a, 199 nodes_sim=self.nodes_sim, 200 bias=self.bias, 201 dropout=self.dropout, 202 direct_link=self.direct_link, 203 n_clusters=self.n_clusters, 204 cluster_encode=self.cluster_encode, 205 type_clust=self.type_clust, 206 type_scaling=self.type_scaling, 207 col_sample=self.col_sample, 208 row_sample=self.row_sample, 209 cv_calibration=self.cv_calibration, 210 calibration_method=self.calibration_method, 211 seed=self.seed, 212 backend=self.backend, 213 ) 214 return clone 215 216 def fit(self, X, y, sample_weight=None, **kwargs): 217 """Fit custom model to training data (X, y). 218 219 Parameters: 220 221 X: {array-like}, shape = [n_samples, n_features] 222 Training vectors, where n_samples is the number 223 of samples and n_features is the number of features. 224 225 y: array-like, shape = [n_samples] 226 Target values. 227 228 sample_weight: array-like, shape = [n_samples] 229 Sample weights. 230 231 **kwargs: additional parameters to be passed to 232 self.cook_training_set or self.obj.fit 233 234 Returns: 235 236 self: object 237 """ 238 239 if len(X.shape) == 1: 240 if isinstance(X, pd.DataFrame): 241 X = pd.DataFrame(X.values.reshape(1, -1), columns=X.columns) 242 else: 243 X = X.reshape(1, -1) 244 245 output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 246 self.classes_ = np.unique(y) 247 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 248 249 # Wrap in CalibratedClassifierCV if needed 250 if self.cv_calibration is not None: 251 self.obj = CalibratedClassifierCV( 252 self.obj, cv=self.cv_calibration, method=self.calibration_method 253 ) 254 255 # if sample_weights, else: (must use self.row_index) 256 if sample_weight is not None: 257 self.obj.fit( 258 scaled_Z, 259 output_y, 260 sample_weight=sample_weight[self.index_row_].ravel(), 261 **kwargs 262 ) 263 return self 264 265 # if sample_weight is None: 266 self.obj.fit(scaled_Z, output_y, **kwargs) 267 self.classes_ = np.unique(y) # for compatibility with sklearn 268 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 269 270 if hasattr(self.obj, "coef_"): 271 self.coef_ = self.obj.coef_ 272 273 if hasattr(self.obj, "intercept_"): 274 self.intercept_ = self.obj.intercept_ 275 276 return self 277 278 def partial_fit(self, X, y, sample_weight=None, **kwargs): 279 """Partial fit custom model to training data (X, y). 280 281 Parameters: 282 283 X: {array-like}, shape = [n_samples, n_features] 284 Subset of training vectors, where n_samples is the number 285 of samples and n_features is the number of features. 286 287 y: array-like, shape = [n_samples] 288 Subset of target values. 289 290 sample_weight: array-like, shape = [n_samples] 291 Sample weights. 292 293 **kwargs: additional parameters to be passed to 294 self.cook_training_set or self.obj.fit 295 296 Returns: 297 298 self: object 299 """ 300 301 if len(X.shape) == 1: 302 if isinstance(X, pd.DataFrame): 303 X = pd.DataFrame(X.values.reshape(1, -1), columns=X.columns) 304 else: 305 X = X.reshape(1, -1) 306 y = np.array([y], dtype=np.integer) 307 308 output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 309 self.n_classes_ = len(np.unique(y)) # for compatibility with sklearn 310 311 # if sample_weights, else: (must use self.row_index) 312 if sample_weight is not None: 313 try: 314 self.obj.partial_fit( 315 scaled_Z, 316 output_y, 317 sample_weight=sample_weight[self.index_row_].ravel(), 318 # **kwargs 319 ) 320 except: 321 NotImplementedError 322 323 return self 324 325 # if sample_weight is None: 326 # try: 327 self.obj.partial_fit(scaled_Z, output_y) 328 # except: 329 # raise NotImplementedError 330 331 self.classes_ = np.unique(y) # for compatibility with sklearn 332 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 333 334 return self 335 336 def predict(self, X, **kwargs): 337 """Predict test data X. 338 339 Parameters: 340 341 X: {array-like}, shape = [n_samples, n_features] 342 Training vectors, where n_samples is the number 343 of samples and n_features is the number of features. 344 345 **kwargs: additional parameters to be passed to 346 self.cook_test_set 347 348 Returns: 349 350 model predictions: {array-like} 351 """ 352 353 if len(X.shape) == 1: 354 n_features = X.shape[0] 355 new_X = mo.rbind( 356 X.reshape(1, n_features), 357 np.ones(n_features).reshape(1, n_features), 358 ) 359 360 return ( 361 self.obj.predict(self.cook_test_set(new_X, **kwargs), **kwargs) 362 )[0] 363 364 return self.obj.predict(self.cook_test_set(X, **kwargs), **kwargs) 365 366 def predict_proba(self, X, **kwargs): 367 """Predict probabilities for test data X. 368 369 Args: 370 371 X: {array-like}, shape = [n_samples, n_features] 372 Training vectors, where n_samples is the number 373 of samples and n_features is the number of features. 374 375 **kwargs: additional parameters to be passed to 376 self.cook_test_set 377 378 Returns: 379 380 probability estimates for test data: {array-like} 381 """ 382 383 if len(X.shape) == 1: 384 n_features = X.shape[0] 385 new_X = mo.rbind( 386 X.reshape(1, n_features), 387 np.ones(n_features).reshape(1, n_features), 388 ) 389 return ( 390 self.obj.predict_proba( 391 self.cook_test_set(new_X, **kwargs), **kwargs 392 ) 393 )[0] 394 return self.obj.predict_proba(self.cook_test_set(X, **kwargs), **kwargs) 395 396 def decision_function(self, X, **kwargs): 397 """Compute the decision function of X. 398 399 Parameters: 400 X: {array-like}, shape = [n_samples, n_features] 401 Samples to compute decision function for. 402 403 **kwargs: additional parameters to be passed to 404 self.cook_test_set 405 406 Returns: 407 array-like of shape (n_samples,) or (n_samples, n_classes) 408 Decision function of the input samples. The order of outputs is the same 409 as that of the classes passed to fit. 410 """ 411 if not hasattr(self.obj, "decision_function"): 412 # If base classifier doesn't have decision_function, use predict_proba 413 proba = self.predict_proba(X, **kwargs) 414 if proba.shape[1] == 2: 415 return proba[:, 1] # For binary classification 416 return proba # For multiclass 417 418 if len(X.shape) == 1: 419 n_features = X.shape[0] 420 new_X = mo.rbind( 421 X.reshape(1, n_features), 422 np.ones(n_features).reshape(1, n_features), 423 ) 424 425 return ( 426 self.obj.decision_function( 427 self.cook_test_set(new_X, **kwargs), **kwargs 428 ) 429 )[0] 430 431 return self.obj.decision_function( 432 self.cook_test_set(X, **kwargs), **kwargs 433 ) 434 435 def score(self, X, y, scoring=None): 436 """Scoring function for classification. 437 438 Args: 439 440 X: {array-like}, shape = [n_samples, n_features] 441 Training vectors, where n_samples is the number 442 of samples and n_features is the number of features. 443 444 y: array-like, shape = [n_samples] 445 Target values. 446 447 scoring: str 448 scoring method (default is accuracy) 449 450 Returns: 451 452 score: float 453 """ 454 455 if scoring is None: 456 scoring = "accuracy" 457 458 if scoring == "accuracy": 459 return skm2.accuracy_score(y, self.predict(X)) 460 461 if scoring == "f1": 462 return skm2.f1_score(y, self.predict(X)) 463 464 if scoring == "precision": 465 return skm2.precision_score(y, self.predict(X)) 466 467 if scoring == "recall": 468 return skm2.recall_score(y, self.predict(X)) 469 470 if scoring == "roc_auc": 471 return skm2.roc_auc_score(y, self.predict(X)) 472 473 if scoring == "log_loss": 474 return skm2.log_loss(y, self.predict_proba(X)) 475 476 if scoring == "balanced_accuracy": 477 return skm2.balanced_accuracy_score(y, self.predict(X)) 478 479 if scoring == "average_precision": 480 return skm2.average_precision_score(y, self.predict(X)) 481 482 if scoring == "neg_brier_score": 483 return -skm2.brier_score_loss(y, self.predict_proba(X)) 484 485 if scoring == "neg_log_loss": 486 return -skm2.log_loss(y, self.predict_proba(X)) 487 488 @property 489 def _estimator_type(self): 490 return "classifier"
Custom Classification model
Attributes:
obj: object
any object containing a method fit (obj.fit()) and a method predict
(obj.predict())
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not
(False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original predictors are included (True) in model''s
fitting or not (False)
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
col_sample: float
percentage of covariates randomly chosen for training
row_sample: float
percentage of rows chosen for training, by stratified bootstrapping
cv_calibration: int, cross-validation generator, or iterable, default=2
Determines the cross-validation splitting strategy. Same as
`sklearn.calibration.CalibratedClassifierCV`
calibration_method: str
{‘sigmoid’, ‘isotonic’}, default=’sigmoid’
The method to use for calibration. Same as
`sklearn.calibration.CalibratedClassifierCV`
seed: int
reproducibility seed for nodes_sim=='uniform'
backend: str
"cpu" or "gpu" or "tpu"
Examples:
Note: it's better to use the DeepClassifier or LazyDeepClassifier classes directly
import nnetsauce as ns
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_digits
from time import time
digits = load_digits()
X = digits.data
y = digits.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=123)
# layer 1 (base layer) ----
layer1_regr = RandomForestClassifier(n_estimators=10, random_state=123)
start = time()
layer1_regr.fit(X_train, y_train)
# Accuracy in layer 1
print(layer1_regr.score(X_test, y_test))
# layer 2 using layer 1 ----
layer2_regr = ns.CustomClassifier(obj = layer1_regr, n_hidden_features=5,
direct_link=True, bias=True,
nodes_sim='uniform', activation_name='relu',
n_clusters=2, seed=123)
layer2_regr.fit(X_train, y_train)
# Accuracy in layer 2
print(layer2_regr.score(X_test, y_test))
# layer 3 using layer 2 ----
layer3_regr = ns.CustomClassifier(obj = layer2_regr, n_hidden_features=10,
direct_link=True, bias=True, dropout=0.7,
nodes_sim='uniform', activation_name='relu',
n_clusters=2, seed=123)
layer3_regr.fit(X_train, y_train)
# Accuracy in layer 3
print(layer3_regr.score(X_test, y_test))
print(f"Elapsed {time() - start}")
216 def fit(self, X, y, sample_weight=None, **kwargs): 217 """Fit custom model to training data (X, y). 218 219 Parameters: 220 221 X: {array-like}, shape = [n_samples, n_features] 222 Training vectors, where n_samples is the number 223 of samples and n_features is the number of features. 224 225 y: array-like, shape = [n_samples] 226 Target values. 227 228 sample_weight: array-like, shape = [n_samples] 229 Sample weights. 230 231 **kwargs: additional parameters to be passed to 232 self.cook_training_set or self.obj.fit 233 234 Returns: 235 236 self: object 237 """ 238 239 if len(X.shape) == 1: 240 if isinstance(X, pd.DataFrame): 241 X = pd.DataFrame(X.values.reshape(1, -1), columns=X.columns) 242 else: 243 X = X.reshape(1, -1) 244 245 output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 246 self.classes_ = np.unique(y) 247 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 248 249 # Wrap in CalibratedClassifierCV if needed 250 if self.cv_calibration is not None: 251 self.obj = CalibratedClassifierCV( 252 self.obj, cv=self.cv_calibration, method=self.calibration_method 253 ) 254 255 # if sample_weights, else: (must use self.row_index) 256 if sample_weight is not None: 257 self.obj.fit( 258 scaled_Z, 259 output_y, 260 sample_weight=sample_weight[self.index_row_].ravel(), 261 **kwargs 262 ) 263 return self 264 265 # if sample_weight is None: 266 self.obj.fit(scaled_Z, output_y, **kwargs) 267 self.classes_ = np.unique(y) # for compatibility with sklearn 268 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 269 270 if hasattr(self.obj, "coef_"): 271 self.coef_ = self.obj.coef_ 272 273 if hasattr(self.obj, "intercept_"): 274 self.intercept_ = self.obj.intercept_ 275 276 return self
Fit custom model to training data (X, y).
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
sample_weight: array-like, shape = [n_samples]
Sample weights.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
336 def predict(self, X, **kwargs): 337 """Predict test data X. 338 339 Parameters: 340 341 X: {array-like}, shape = [n_samples, n_features] 342 Training vectors, where n_samples is the number 343 of samples and n_features is the number of features. 344 345 **kwargs: additional parameters to be passed to 346 self.cook_test_set 347 348 Returns: 349 350 model predictions: {array-like} 351 """ 352 353 if len(X.shape) == 1: 354 n_features = X.shape[0] 355 new_X = mo.rbind( 356 X.reshape(1, n_features), 357 np.ones(n_features).reshape(1, n_features), 358 ) 359 360 return ( 361 self.obj.predict(self.cook_test_set(new_X, **kwargs), **kwargs) 362 )[0] 363 364 return self.obj.predict(self.cook_test_set(X, **kwargs), **kwargs)
Predict test data X.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
366 def predict_proba(self, X, **kwargs): 367 """Predict probabilities for test data X. 368 369 Args: 370 371 X: {array-like}, shape = [n_samples, n_features] 372 Training vectors, where n_samples is the number 373 of samples and n_features is the number of features. 374 375 **kwargs: additional parameters to be passed to 376 self.cook_test_set 377 378 Returns: 379 380 probability estimates for test data: {array-like} 381 """ 382 383 if len(X.shape) == 1: 384 n_features = X.shape[0] 385 new_X = mo.rbind( 386 X.reshape(1, n_features), 387 np.ones(n_features).reshape(1, n_features), 388 ) 389 return ( 390 self.obj.predict_proba( 391 self.cook_test_set(new_X, **kwargs), **kwargs 392 ) 393 )[0] 394 return self.obj.predict_proba(self.cook_test_set(X, **kwargs), **kwargs)
Predict probabilities for test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
probability estimates for test data: {array-like}
435 def score(self, X, y, scoring=None): 436 """Scoring function for classification. 437 438 Args: 439 440 X: {array-like}, shape = [n_samples, n_features] 441 Training vectors, where n_samples is the number 442 of samples and n_features is the number of features. 443 444 y: array-like, shape = [n_samples] 445 Target values. 446 447 scoring: str 448 scoring method (default is accuracy) 449 450 Returns: 451 452 score: float 453 """ 454 455 if scoring is None: 456 scoring = "accuracy" 457 458 if scoring == "accuracy": 459 return skm2.accuracy_score(y, self.predict(X)) 460 461 if scoring == "f1": 462 return skm2.f1_score(y, self.predict(X)) 463 464 if scoring == "precision": 465 return skm2.precision_score(y, self.predict(X)) 466 467 if scoring == "recall": 468 return skm2.recall_score(y, self.predict(X)) 469 470 if scoring == "roc_auc": 471 return skm2.roc_auc_score(y, self.predict(X)) 472 473 if scoring == "log_loss": 474 return skm2.log_loss(y, self.predict_proba(X)) 475 476 if scoring == "balanced_accuracy": 477 return skm2.balanced_accuracy_score(y, self.predict(X)) 478 479 if scoring == "average_precision": 480 return skm2.average_precision_score(y, self.predict(X)) 481 482 if scoring == "neg_brier_score": 483 return -skm2.brier_score_loss(y, self.predict_proba(X)) 484 485 if scoring == "neg_log_loss": 486 return -skm2.log_loss(y, self.predict_proba(X))
Scoring function for classification.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
scoring: str
scoring method (default is accuracy)
Returns:
score: float
18class CustomRegressor(Custom, RegressorMixin): 19 """Custom Regression model 20 21 This class is used to 'augment' any regression model with transformed features. 22 23 Parameters: 24 25 obj: object 26 any object containing a method fit (obj.fit()) and a method predict 27 (obj.predict()) 28 29 n_hidden_features: int 30 number of nodes in the hidden layer 31 32 activation_name: str 33 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 34 35 a: float 36 hyperparameter for 'prelu' or 'elu' activation function 37 38 nodes_sim: str 39 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 40 'uniform' 41 42 bias: boolean 43 indicates if the hidden layer contains a bias term (True) or not 44 (False) 45 46 dropout: float 47 regularization parameter; (random) percentage of nodes dropped out 48 of the training 49 50 direct_link: boolean 51 indicates if the original predictors are included (True) in model's 52 fitting or not (False) 53 54 n_clusters: int 55 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: 56 no clustering) 57 58 cluster_encode: bool 59 defines how the variable containing clusters is treated (default is one-hot) 60 if `False`, then labels are used, without one-hot encoding 61 62 type_clust: str 63 type of clustering method: currently k-means ('kmeans') or Gaussian 64 Mixture Model ('gmm') 65 66 type_scaling: a tuple of 3 strings 67 scaling methods for inputs, hidden layer, and clustering respectively 68 (and when relevant). 69 Currently available: standardization ('std') or MinMax scaling ('minmax') 70 71 type_pi: str. 72 type of prediction interval; currently `None` (split or local 73 conformal without simulation), "kde" or "bootstrap" (simulated split 74 conformal). 75 76 replications: int. 77 number of replications (if needed) for predictive simulation. 78 Used only in `self.predict`, for `self.kernel` in ('gaussian', 79 'tophat') and `self.type_pi = 'kde'`. Default is `None`. 80 81 kernel: str. 82 the kernel to use for kernel density estimation (used for predictive 83 simulation in `self.predict`, with `method='splitconformal'` and 84 `type_pi = 'kde'`). Currently, either 'gaussian' or 'tophat'. 85 86 type_split: str. 87 Type of splitting for conformal prediction. None (default), or 88 "random" (random split of data) or "sequential" (sequential split of data) 89 90 col_sample: float 91 percentage of covariates randomly chosen for training 92 93 row_sample: float 94 percentage of rows chosen for training, by stratified bootstrapping 95 96 level: float 97 confidence level for prediction intervals 98 99 pi_method: str 100 method for prediction intervals: 'splitconformal' or 'localconformal' 101 102 seed: int 103 reproducibility seed for nodes_sim=='uniform' 104 105 type_fit: str 106 'regression' 107 108 backend: str 109 "cpu" or "gpu" or "tpu" 110 111 Examples: 112 113 See [https://thierrymoudiki.github.io/blog/2024/03/18/python/conformal-and-bayesian-regression](https://thierrymoudiki.github.io/blog/2024/03/18/python/conformal-and-bayesian-regression) 114 115 """ 116 117 # construct the object ----- 118 119 def __init__( 120 self, 121 obj, 122 n_hidden_features=5, 123 activation_name="relu", 124 a=0.01, 125 nodes_sim="sobol", 126 bias=True, 127 dropout=0, 128 direct_link=True, 129 n_clusters=2, 130 cluster_encode=True, 131 type_clust="kmeans", 132 type_scaling=("std", "std", "std"), 133 type_pi=None, 134 replications=None, 135 kernel=None, 136 type_split=None, 137 col_sample=1, 138 row_sample=1, 139 level=None, 140 pi_method=None, 141 seed=123, 142 backend="cpu", 143 ): 144 super().__init__( 145 obj=obj, 146 n_hidden_features=n_hidden_features, 147 activation_name=activation_name, 148 a=a, 149 nodes_sim=nodes_sim, 150 bias=bias, 151 dropout=dropout, 152 direct_link=direct_link, 153 n_clusters=n_clusters, 154 cluster_encode=cluster_encode, 155 type_clust=type_clust, 156 type_scaling=type_scaling, 157 col_sample=col_sample, 158 row_sample=row_sample, 159 seed=seed, 160 backend=backend, 161 ) 162 163 self.type_fit = "regression" 164 self.type_pi = type_pi 165 self.replications = replications 166 self.kernel = kernel 167 self.type_split = type_split 168 self.level = level 169 self.pi_method = pi_method 170 self.coef_ = None 171 self.intercept_ = None 172 self.X_ = None 173 self.y_ = None 174 self.aic_ = None 175 self.aicc_ = None 176 self.bic_ = None 177 178 def fit(self, X, y, sample_weight=None, **kwargs): 179 """Fit custom model to training data (X, y). 180 181 Parameters: 182 183 X: {array-like}, shape = [n_samples, n_features] 184 Training vectors, where n_samples is the number 185 of samples and n_features is the number of features. 186 187 y: array-like, shape = [n_samples] 188 Target values. 189 190 sample_weight: array-like, shape = [n_samples] 191 Sample weights. 192 193 **kwargs: additional parameters to be passed to 194 self.cook_training_set or self.obj.fit 195 196 Returns: 197 198 self: object 199 200 """ 201 202 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 203 204 if self.level is not None: 205 self.obj = PredictionInterval( 206 obj=self.obj, method=self.pi_method, level=self.level 207 ) 208 209 # if sample_weights, else: (must use self.row_index) 210 if sample_weight is not None: 211 self.obj.fit( 212 scaled_Z, 213 centered_y, 214 sample_weight=sample_weight[self.index_row_].ravel(), 215 **kwargs 216 ) 217 218 return self 219 220 self.obj.fit(scaled_Z, centered_y, **kwargs) 221 222 self.X_ = X 223 224 self.y_ = y 225 226 # Compute SSE 227 centered_y_pred = self.obj.predict(scaled_Z) 228 self.sse_ = np.sum((centered_y - centered_y_pred) ** 2) 229 230 # Get number of parameters 231 n_params = ( 232 self.n_hidden_features + X.shape[1] 233 ) # hidden features + original features 234 if self.n_clusters > 0: 235 n_params += self.n_clusters # add clusters if used 236 237 # Compute information criteria 238 n_samples = X.shape[0] 239 temp = n_samples * np.log(self.sse_ / n_samples) 240 self.aic_ = temp + 2 * n_params 241 self.bic_ = temp + np.log(n_samples) * n_params 242 243 if hasattr(self.obj, "coef_"): 244 self.coef_ = self.obj.coef_ 245 246 if hasattr(self.obj, "intercept_"): 247 self.intercept_ = self.obj.intercept_ 248 249 return self 250 251 def partial_fit(self, X, y, **kwargs): 252 """Partial fit custom model to training data (X, y). 253 254 Parameters: 255 256 X: {array-like}, shape = [n_samples, n_features] 257 Subset of training vectors, where n_samples is the number 258 of samples and n_features is the number of features. 259 260 y: array-like, shape = [n_samples] 261 Subset of target values. 262 263 **kwargs: additional parameters to be passed to 264 self.cook_training_set or self.obj.fit 265 266 Returns: 267 268 self: object 269 270 """ 271 272 if len(X.shape) == 1: 273 if isinstance(X, pd.DataFrame): 274 X = pd.DataFrame(X.values.reshape(1, -1), columns=X.columns) 275 else: 276 X = X.reshape(1, -1) 277 y = np.array([y]) 278 279 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 280 281 self.obj.partial_fit(scaled_Z, centered_y, **kwargs) 282 283 self.X_ = X 284 285 self.y_ = y 286 287 return self 288 289 def predict(self, X, level=95, method="splitconformal", **kwargs): 290 """Predict test data X. 291 292 Parameters: 293 294 X: {array-like}, shape = [n_samples, n_features] 295 Training vectors, where n_samples is the number 296 of samples and n_features is the number of features. 297 298 level: int 299 Level of confidence (default = 95) 300 301 method: str 302 'splitconformal', 'localconformal' 303 prediction (if you specify `return_pi = True`) 304 305 **kwargs: additional parameters 306 `return_pi = True` for conformal prediction, 307 with `method` in ('splitconformal', 'localconformal') 308 or `return_std = True` for `self.obj` in 309 (`sklearn.linear_model.BayesianRidge`, 310 `sklearn.linear_model.ARDRegressor`, 311 `sklearn.gaussian_process.GaussianProcessRegressor`)` 312 313 Returns: 314 315 model predictions: 316 an array if uncertainty quantification is not requested, 317 or a tuple if with prediction intervals and simulations 318 if `return_std = True` (mean, standard deviation, 319 lower and upper prediction interval) or `return_pi = True` 320 () 321 322 """ 323 324 if "return_std" in kwargs: 325 alpha = 100 - level 326 pi_multiplier = norm.ppf(1 - alpha / 200) 327 328 if len(X.shape) == 1: 329 n_features = X.shape[0] 330 new_X = mo.rbind( 331 X.reshape(1, n_features), 332 np.ones(n_features).reshape(1, n_features), 333 ) 334 335 mean_, std_ = self.obj.predict( 336 self.cook_test_set(new_X, **kwargs), return_std=True 337 )[0] 338 339 preds = self.y_mean_ + mean_ 340 lower = self.y_mean_ + (mean_ - pi_multiplier * std_) 341 upper = self.y_mean_ + (mean_ + pi_multiplier * std_) 342 343 DescribeResults = namedtuple( 344 "DescribeResults", ["mean", "std", "lower", "upper"] 345 ) 346 347 return DescribeResults(preds, std_, lower, upper) 348 349 # len(X.shape) > 1 350 mean_, std_ = self.obj.predict( 351 self.cook_test_set(X, **kwargs), return_std=True 352 ) 353 354 preds = self.y_mean_ + mean_ 355 lower = self.y_mean_ + (mean_ - pi_multiplier * std_) 356 upper = self.y_mean_ + (mean_ + pi_multiplier * std_) 357 358 DescribeResults = namedtuple( 359 "DescribeResults", ["mean", "std", "lower", "upper"] 360 ) 361 362 return DescribeResults(preds, std_, lower, upper) 363 364 if "return_pi" in kwargs: 365 assert method in ( 366 "splitconformal", 367 "localconformal", 368 ), "method must be in ('splitconformal', 'localconformal')" 369 self.pi = PredictionInterval( 370 obj=self, 371 method=method, 372 level=level, 373 type_pi=self.type_pi, 374 replications=self.replications, 375 kernel=self.kernel, 376 ) 377 378 if len(self.X_.shape) == 1: 379 if isinstance(X, pd.DataFrame): 380 self.X_ = pd.DataFrame( 381 self.X_.values.reshape(1, -1), columns=self.X_.columns 382 ) 383 else: 384 self.X_ = self.X_.reshape(1, -1) 385 self.y_ = np.array([self.y_]) 386 387 self.pi.fit(self.X_, self.y_) 388 # self.X_ = None # consumes memory to keep, dangerous to delete (side effect) 389 # self.y_ = None # consumes memory to keep, dangerous to delete (side effect) 390 preds = self.pi.predict(X, return_pi=True) 391 return preds 392 393 # "return_std" not in kwargs 394 if len(X.shape) == 1: 395 n_features = X.shape[0] 396 new_X = mo.rbind( 397 X.reshape(1, n_features), 398 np.ones(n_features).reshape(1, n_features), 399 ) 400 401 return ( 402 self.y_mean_ 403 + self.obj.predict( 404 self.cook_test_set(new_X, **kwargs), **kwargs 405 ) 406 )[0] 407 408 # len(X.shape) > 1 409 return self.y_mean_ + self.obj.predict( 410 self.cook_test_set(X, **kwargs), **kwargs 411 ) 412 413 def score(self, X, y, scoring=None): 414 """Compute the score of the model. 415 416 Parameters: 417 418 X: {array-like}, shape = [n_samples, n_features] 419 Training vectors, where n_samples is the number 420 of samples and n_features is the number of features. 421 422 y: array-like, shape = [n_samples] 423 Target values. 424 425 scoring: str 426 scoring method 427 428 Returns: 429 430 score: float 431 432 """ 433 434 if scoring is None: 435 return np.sqrt(np.mean((self.predict(X) - y) ** 2)) 436 437 return skm2.get_scorer(scoring)(self, X, y)
Custom Regression model
This class is used to 'augment' any regression model with transformed features.
Parameters:
obj: object
any object containing a method fit (obj.fit()) and a method predict
(obj.predict())
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not
(False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original predictors are included (True) in model's
fitting or not (False)
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
type_pi: str.
type of prediction interval; currently `None` (split or local
conformal without simulation), "kde" or "bootstrap" (simulated split
conformal).
replications: int.
number of replications (if needed) for predictive simulation.
Used only in `self.predict`, for `self.kernel` in ('gaussian',
'tophat') and `self.type_pi = 'kde'`. Default is `None`.
kernel: str.
the kernel to use for kernel density estimation (used for predictive
simulation in `self.predict`, with `method='splitconformal'` and
`type_pi = 'kde'`). Currently, either 'gaussian' or 'tophat'.
type_split: str.
Type of splitting for conformal prediction. None (default), or
"random" (random split of data) or "sequential" (sequential split of data)
col_sample: float
percentage of covariates randomly chosen for training
row_sample: float
percentage of rows chosen for training, by stratified bootstrapping
level: float
confidence level for prediction intervals
pi_method: str
method for prediction intervals: 'splitconformal' or 'localconformal'
seed: int
reproducibility seed for nodes_sim=='uniform'
type_fit: str
'regression'
backend: str
"cpu" or "gpu" or "tpu"
Examples:
See https://thierrymoudiki.github.io/blog/2024/03/18/python/conformal-and-bayesian-regression
178 def fit(self, X, y, sample_weight=None, **kwargs): 179 """Fit custom model to training data (X, y). 180 181 Parameters: 182 183 X: {array-like}, shape = [n_samples, n_features] 184 Training vectors, where n_samples is the number 185 of samples and n_features is the number of features. 186 187 y: array-like, shape = [n_samples] 188 Target values. 189 190 sample_weight: array-like, shape = [n_samples] 191 Sample weights. 192 193 **kwargs: additional parameters to be passed to 194 self.cook_training_set or self.obj.fit 195 196 Returns: 197 198 self: object 199 200 """ 201 202 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 203 204 if self.level is not None: 205 self.obj = PredictionInterval( 206 obj=self.obj, method=self.pi_method, level=self.level 207 ) 208 209 # if sample_weights, else: (must use self.row_index) 210 if sample_weight is not None: 211 self.obj.fit( 212 scaled_Z, 213 centered_y, 214 sample_weight=sample_weight[self.index_row_].ravel(), 215 **kwargs 216 ) 217 218 return self 219 220 self.obj.fit(scaled_Z, centered_y, **kwargs) 221 222 self.X_ = X 223 224 self.y_ = y 225 226 # Compute SSE 227 centered_y_pred = self.obj.predict(scaled_Z) 228 self.sse_ = np.sum((centered_y - centered_y_pred) ** 2) 229 230 # Get number of parameters 231 n_params = ( 232 self.n_hidden_features + X.shape[1] 233 ) # hidden features + original features 234 if self.n_clusters > 0: 235 n_params += self.n_clusters # add clusters if used 236 237 # Compute information criteria 238 n_samples = X.shape[0] 239 temp = n_samples * np.log(self.sse_ / n_samples) 240 self.aic_ = temp + 2 * n_params 241 self.bic_ = temp + np.log(n_samples) * n_params 242 243 if hasattr(self.obj, "coef_"): 244 self.coef_ = self.obj.coef_ 245 246 if hasattr(self.obj, "intercept_"): 247 self.intercept_ = self.obj.intercept_ 248 249 return self
Fit custom model to training data (X, y).
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
sample_weight: array-like, shape = [n_samples]
Sample weights.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
289 def predict(self, X, level=95, method="splitconformal", **kwargs): 290 """Predict test data X. 291 292 Parameters: 293 294 X: {array-like}, shape = [n_samples, n_features] 295 Training vectors, where n_samples is the number 296 of samples and n_features is the number of features. 297 298 level: int 299 Level of confidence (default = 95) 300 301 method: str 302 'splitconformal', 'localconformal' 303 prediction (if you specify `return_pi = True`) 304 305 **kwargs: additional parameters 306 `return_pi = True` for conformal prediction, 307 with `method` in ('splitconformal', 'localconformal') 308 or `return_std = True` for `self.obj` in 309 (`sklearn.linear_model.BayesianRidge`, 310 `sklearn.linear_model.ARDRegressor`, 311 `sklearn.gaussian_process.GaussianProcessRegressor`)` 312 313 Returns: 314 315 model predictions: 316 an array if uncertainty quantification is not requested, 317 or a tuple if with prediction intervals and simulations 318 if `return_std = True` (mean, standard deviation, 319 lower and upper prediction interval) or `return_pi = True` 320 () 321 322 """ 323 324 if "return_std" in kwargs: 325 alpha = 100 - level 326 pi_multiplier = norm.ppf(1 - alpha / 200) 327 328 if len(X.shape) == 1: 329 n_features = X.shape[0] 330 new_X = mo.rbind( 331 X.reshape(1, n_features), 332 np.ones(n_features).reshape(1, n_features), 333 ) 334 335 mean_, std_ = self.obj.predict( 336 self.cook_test_set(new_X, **kwargs), return_std=True 337 )[0] 338 339 preds = self.y_mean_ + mean_ 340 lower = self.y_mean_ + (mean_ - pi_multiplier * std_) 341 upper = self.y_mean_ + (mean_ + pi_multiplier * std_) 342 343 DescribeResults = namedtuple( 344 "DescribeResults", ["mean", "std", "lower", "upper"] 345 ) 346 347 return DescribeResults(preds, std_, lower, upper) 348 349 # len(X.shape) > 1 350 mean_, std_ = self.obj.predict( 351 self.cook_test_set(X, **kwargs), return_std=True 352 ) 353 354 preds = self.y_mean_ + mean_ 355 lower = self.y_mean_ + (mean_ - pi_multiplier * std_) 356 upper = self.y_mean_ + (mean_ + pi_multiplier * std_) 357 358 DescribeResults = namedtuple( 359 "DescribeResults", ["mean", "std", "lower", "upper"] 360 ) 361 362 return DescribeResults(preds, std_, lower, upper) 363 364 if "return_pi" in kwargs: 365 assert method in ( 366 "splitconformal", 367 "localconformal", 368 ), "method must be in ('splitconformal', 'localconformal')" 369 self.pi = PredictionInterval( 370 obj=self, 371 method=method, 372 level=level, 373 type_pi=self.type_pi, 374 replications=self.replications, 375 kernel=self.kernel, 376 ) 377 378 if len(self.X_.shape) == 1: 379 if isinstance(X, pd.DataFrame): 380 self.X_ = pd.DataFrame( 381 self.X_.values.reshape(1, -1), columns=self.X_.columns 382 ) 383 else: 384 self.X_ = self.X_.reshape(1, -1) 385 self.y_ = np.array([self.y_]) 386 387 self.pi.fit(self.X_, self.y_) 388 # self.X_ = None # consumes memory to keep, dangerous to delete (side effect) 389 # self.y_ = None # consumes memory to keep, dangerous to delete (side effect) 390 preds = self.pi.predict(X, return_pi=True) 391 return preds 392 393 # "return_std" not in kwargs 394 if len(X.shape) == 1: 395 n_features = X.shape[0] 396 new_X = mo.rbind( 397 X.reshape(1, n_features), 398 np.ones(n_features).reshape(1, n_features), 399 ) 400 401 return ( 402 self.y_mean_ 403 + self.obj.predict( 404 self.cook_test_set(new_X, **kwargs), **kwargs 405 ) 406 )[0] 407 408 # len(X.shape) > 1 409 return self.y_mean_ + self.obj.predict( 410 self.cook_test_set(X, **kwargs), **kwargs 411 )
Predict test data X.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
level: int
Level of confidence (default = 95)
method: str
'splitconformal', 'localconformal'
prediction (if you specify `return_pi = True`)
**kwargs: additional parameters
`return_pi = True` for conformal prediction,
with `method` in ('splitconformal', 'localconformal')
or `return_std = True` for `self.obj` in
(`sklearn.linear_model.BayesianRidge`,
`sklearn.linear_model.ARDRegressor`,
`sklearn.gaussian_process.GaussianProcessRegressor`)`
Returns:
model predictions:
an array if uncertainty quantification is not requested,
or a tuple if with prediction intervals and simulations
if `return_std = True` (mean, standard deviation,
lower and upper prediction interval) or `return_pi = True`
()
413 def score(self, X, y, scoring=None): 414 """Compute the score of the model. 415 416 Parameters: 417 418 X: {array-like}, shape = [n_samples, n_features] 419 Training vectors, where n_samples is the number 420 of samples and n_features is the number of features. 421 422 y: array-like, shape = [n_samples] 423 Target values. 424 425 scoring: str 426 scoring method 427 428 Returns: 429 430 score: float 431 432 """ 433 434 if scoring is None: 435 return np.sqrt(np.mean((self.predict(X) - y) ** 2)) 436 437 return skm2.get_scorer(scoring)(self, X, y)
Compute the score of the model.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
scoring: str
scoring method
Returns:
score: float
20class CustomBackPropRegressor(Custom, RegressorMixin): 21 """ 22 Finite difference trainer for nnetsauce models. 23 24 Parameters 25 ---------- 26 27 base_model : str 28 The name of the base model (e.g., 'RidgeCV'). 29 30 type_grad : {'finitediff', 'autodiff'}, optional 31 Type of gradient computation to use (default='finitediff'). 32 33 lr : float, optional 34 Learning rate for optimization (default=1e-4). 35 36 optimizer : {'gd', 'sgd', 'adam', 'cd'}, optional 37 Optimization algorithm: gradient descent ('gd'), stochastic gradient descent ('sgd'), 38 Adam ('adam'), or coordinate descent ('cd'). Default is 'gd'. 39 40 eps : float, optional 41 Scaling factor for adaptive finite difference step size (default=1e-3). 42 43 batch_size : int, optional 44 Batch size for 'sgd' optimizer (default=32). 45 46 alpha : float, optional 47 Elastic net penalty strength (default=0.0). 48 49 l1_ratio : float, optional 50 Elastic net mixing parameter (0 = Ridge, 1 = Lasso, default=0.0). 51 52 type_loss : {'mse', 'quantile'}, optional 53 Type of loss function to use (default='mse'). 54 55 q : float, optional 56 Quantile for quantile loss (default=0.5). 57 58 **kwargs 59 Additional parameters to pass to the scikit-learn model. 60 61 """ 62 63 def __init__( 64 self, 65 base_model, 66 type_grad="finitediff", 67 lr=1e-4, 68 optimizer="gd", 69 eps=1e-3, 70 batch_size=32, 71 alpha=0.0, 72 l1_ratio=0.0, 73 type_loss="mse", 74 q=0.5, 75 backend="cpu", 76 **kwargs, 77 ): 78 super().__init__(base_model, True, **kwargs) 79 self.base_model = base_model 80 self.custom_kwargs = kwargs 81 self.backend = backend 82 self.model = ns.CustomRegressor( 83 self.base_model, backend=self.backend, **self.custom_kwargs 84 ) 85 assert isinstance( 86 self.model, ns.CustomRegressor 87 ), "'model' must be of class ns.CustomRegressor" 88 self.type_grad = type_grad 89 self.lr = lr 90 self.optimizer = optimizer 91 self.eps = eps 92 self.loss_history_ = [] 93 self.opt_state = None 94 self.batch_size = batch_size # for SGD 95 self.loss_history_ = [] 96 self._cd_index = 0 # For coordinate descent 97 self.alpha = alpha 98 self.l1_ratio = l1_ratio 99 self.type_loss = type_loss 100 self.q = q 101 102 def _loss(self, X, y, **kwargs): 103 """ 104 Compute the loss (with elastic net penalty) for the current model. 105 106 Parameters 107 ---------- 108 109 X : array-like of shape (n_samples, n_features) 110 Input data. 111 112 y : array-like of shape (n_samples,) 113 Target values. 114 115 **kwargs 116 Additional keyword arguments for loss calculation. 117 118 Returns 119 ------- 120 float 121 The computed loss value. 122 """ 123 y_pred = self.model.predict(X) 124 if self.type_loss == "mse": 125 loss = np.mean((y - y_pred) ** 2) 126 elif self.type_loss == "quantile": 127 loss = mean_pinball_loss(y, y_pred, alpha=self.q, **kwargs) 128 W = self.model.W_ 129 l1 = np.sum(np.abs(W)) 130 l2 = np.sum(W**2) 131 return loss + self.alpha * ( 132 self.l1_ratio * l1 + 0.5 * (1 - self.l1_ratio) * l2 133 ) 134 135 def _compute_grad(self, X, y): 136 """ 137 Compute the gradient of the loss with respect to W_ using finite differences. 138 139 Parameters 140 ---------- 141 142 X : array-like of shape (n_samples, n_features) 143 Input data. 144 145 y : array-like of shape (n_samples,) 146 Target values. 147 148 Returns 149 ------- 150 151 ndarray 152 Gradient array with the same shape as W_. 153 """ 154 if self.type_grad == "autodiff": 155 raise NotImplementedError( 156 "Automatic differentiation is not implemented yet." 157 ) 158 # Use JAX for automatic differentiation 159 W = deepcopy(self.model.W_) 160 W_flat = W.flatten() 161 n_params = W_flat.size 162 163 def loss_fn(W_flat): 164 W_reshaped = W_flat.reshape(W.shape) 165 self.model.W_ = W_reshaped 166 return self._loss(X, y) 167 168 grad_fn = jax.grad(loss_fn) 169 grad_flat = grad_fn(W_flat) 170 grad = grad_flat.reshape(W.shape) 171 172 # Add elastic net gradient 173 l1_grad = self.alpha * self.l1_ratio * np.sign(W) 174 l2_grad = self.alpha * (1 - self.l1_ratio) * W 175 grad += l1_grad + l2_grad 176 177 self.model.W_ = W 178 return grad 179 180 # Finite difference gradient computation 181 W = deepcopy(self.model.W_) 182 shape = W.shape 183 W_flat = W.flatten() 184 n_params = W_flat.size 185 186 # Adaptive finite difference step 187 h_vec = self.eps * np.maximum(1.0, np.abs(W_flat)) 188 eye = np.eye(n_params) 189 190 loss_plus = np.zeros(n_params) 191 loss_minus = np.zeros(n_params) 192 193 for i in range(n_params): 194 h_i = h_vec[i] 195 Wp = W_flat.copy() 196 Wp[i] += h_i 197 Wm = W_flat.copy() 198 Wm[i] -= h_i 199 200 self.model.W_ = Wp.reshape(shape) 201 loss_plus[i] = self._loss(X, y) 202 203 self.model.W_ = Wm.reshape(shape) 204 loss_minus[i] = self._loss(X, y) 205 206 grad = ((loss_plus - loss_minus) / (2 * h_vec)).reshape(shape) 207 208 # Add elastic net gradient 209 l1_grad = self.alpha * self.l1_ratio * np.sign(W) 210 l2_grad = self.alpha * (1 - self.l1_ratio) * W 211 grad += l1_grad + l2_grad 212 213 self.model.W_ = W # restore original 214 return grad 215 216 def fit( 217 self, 218 X, 219 y, 220 epochs=10, 221 verbose=True, 222 show_progress=True, 223 sample_weight=None, 224 **kwargs, 225 ): 226 """ 227 Fit the model using finite difference optimization. 228 229 Parameters 230 ---------- 231 232 X : array-like of shape (n_samples, n_features) 233 Training data. 234 235 y : array-like of shape (n_samples,) 236 Target values. 237 238 epochs : int, optional 239 Number of optimization steps (default=10). 240 241 verbose : bool, optional 242 Whether to print progress messages (default=True). 243 244 show_progress : bool, optional 245 Whether to show tqdm progress bar (default=True). 246 247 sample_weight : array-like, optional 248 Sample weights. 249 250 **kwargs 251 Additional keyword arguments. 252 253 Returns 254 ------- 255 256 self : object 257 Returns self. 258 """ 259 260 self.model.fit(X, y) 261 262 iterator = tqdm(range(epochs)) if show_progress else range(epochs) 263 264 for epoch in iterator: 265 grad = self._compute_grad(X, y) 266 267 if self.optimizer == "gd": 268 self.model.W_ -= self.lr * grad 269 self.model.W_ = np.clip(self.model.W_, 0, 1) 270 # print("self.model.W_", self.model.W_) 271 272 elif self.optimizer == "sgd": 273 # Sample a mini-batch for stochastic gradient 274 n_samples = X.shape[0] 275 idxs = np.random.choice( 276 n_samples, self.batch_size, replace=False 277 ) 278 if isinstance(X, pd.DataFrame): 279 X_batch = X.iloc[idxs, :] 280 else: 281 X_batch = X[idxs, :] 282 y_batch = y[idxs] 283 grad = self._compute_grad(X_batch, y_batch) 284 285 self.model.W_ -= self.lr * grad 286 self.model.W_ = np.clip(self.model.W_, 0, 1) 287 288 elif self.optimizer == "adam": 289 if self.opt_state is None: 290 self.opt_state = { 291 "m": np.zeros_like(grad), 292 "v": np.zeros_like(grad), 293 "t": 0, 294 } 295 beta1, beta2, eps = 0.9, 0.999, 1e-8 296 self.opt_state["t"] += 1 297 self.opt_state["m"] = ( 298 beta1 * self.opt_state["m"] + (1 - beta1) * grad 299 ) 300 self.opt_state["v"] = beta2 * self.opt_state["v"] + ( 301 1 - beta2 302 ) * (grad**2) 303 m_hat = self.opt_state["m"] / (1 - beta1 ** self.opt_state["t"]) 304 v_hat = self.opt_state["v"] / (1 - beta2 ** self.opt_state["t"]) 305 306 self.model.W_ -= self.lr * m_hat / (np.sqrt(v_hat) + eps) 307 self.model.W_ = np.clip(self.model.W_, 0, 1) 308 # print("self.model.W_", self.model.W_) 309 310 elif self.optimizer == "cd": # coordinate descent 311 W_shape = self.model.W_.shape 312 W_flat_size = self.model.W_.size 313 W_flat = self.model.W_.flatten() 314 grad_flat = grad.flatten() 315 316 # Update only one coordinate per epoch (cyclic) 317 idx = self._cd_index % W_flat_size 318 W_flat[idx] -= self.lr * grad_flat[idx] 319 # Clip the updated value 320 W_flat[idx] = np.clip(W_flat[idx], 0, 1) 321 322 # Restore W_ 323 self.model.W_ = W_flat.reshape(W_shape) 324 325 self._cd_index += 1 326 327 else: 328 raise ValueError(f"Unsupported optimizer: {self.optimizer}") 329 330 loss = self._loss(X, y) 331 self.loss_history_.append(loss) 332 333 if verbose: 334 print(f"Epoch {epoch+1}: Loss = {loss:.6f}") 335 336 # if sample_weights, else: (must use self.row_index) 337 if sample_weight in kwargs: 338 self.model.fit( 339 X, 340 y, 341 sample_weight=sample_weight[self.index_row_].ravel(), 342 **kwargs, 343 ) 344 345 return self 346 347 return self 348 349 def predict(self, X, level=95, method="splitconformal", **kwargs): 350 """ 351 Predict using the trained model. 352 353 Parameters 354 ---------- 355 356 X : array-like of shape (n_samples, n_features) 357 Input data. 358 359 level : int, optional 360 Level of confidence for prediction intervals (default=95). 361 362 method : {'splitconformal', 'localconformal'}, optional 363 Method for conformal prediction (default='splitconformal'). 364 365 **kwargs 366 Additional keyword arguments. Use `return_pi=True` for prediction intervals, 367 or `return_std=True` for standard deviation estimates. 368 369 Returns 370 ------- 371 372 array or tuple 373 Model predictions, or a tuple with prediction intervals or standard deviations if requested. 374 """ 375 if "return_std" in kwargs: 376 alpha = 100 - level 377 pi_multiplier = norm.ppf(1 - alpha / 200) 378 379 if len(X.shape) == 1: 380 n_features = X.shape[0] 381 new_X = mo.rbind( 382 X.reshape(1, n_features), 383 np.ones(n_features).reshape(1, n_features), 384 ) 385 386 mean_, std_ = self.model.predict(new_X, return_std=True)[0] 387 388 preds = mean_ 389 lower = mean_ - pi_multiplier * std_ 390 upper = mean_ + pi_multiplier * std_ 391 392 DescribeResults = namedtuple( 393 "DescribeResults", ["mean", "std", "lower", "upper"] 394 ) 395 396 return DescribeResults(preds, std_, lower, upper) 397 398 # len(X.shape) > 1 399 mean_, std_ = self.model.predict(X, return_std=True) 400 401 preds = mean_ 402 lower = mean_ - pi_multiplier * std_ 403 upper = mean_ + pi_multiplier * std_ 404 405 DescribeResults = namedtuple( 406 "DescribeResults", ["mean", "std", "lower", "upper"] 407 ) 408 409 return DescribeResults(preds, std_, lower, upper) 410 411 if "return_pi" in kwargs: 412 assert method in ( 413 "splitconformal", 414 "localconformal", 415 ), "method must be in ('splitconformal', 'localconformal')" 416 self.pi = ns.PredictionInterval( 417 obj=self, 418 method=method, 419 level=level, 420 type_pi=self.type_pi, 421 replications=self.replications, 422 kernel=self.kernel, 423 ) 424 425 if len(self.X_.shape) == 1: 426 if isinstance(X, pd.DataFrame): 427 self.X_ = pd.DataFrame( 428 self.X_.values.reshape(1, -1), columns=self.X_.columns 429 ) 430 else: 431 self.X_ = self.X_.reshape(1, -1) 432 self.y_ = np.array([self.y_]) 433 434 self.pi.fit(self.X_, self.y_) 435 # self.X_ = None # consumes memory to keep, dangerous to delete (side effect) 436 # self.y_ = None # consumes memory to keep, dangerous to delete (side effect) 437 preds = self.pi.predict(X, return_pi=True) 438 return preds 439 440 # "return_std" not in kwargs 441 if len(X.shape) == 1: 442 n_features = X.shape[0] 443 new_X = mo.rbind( 444 X.reshape(1, n_features), 445 np.ones(n_features).reshape(1, n_features), 446 ) 447 448 return (0 + self.model.predict(new_X, **kwargs))[0] 449 450 # len(X.shape) > 1 451 return self.model.predict(X, **kwargs)
Finite difference trainer for nnetsauce models.
Parameters
base_model : str The name of the base model (e.g., 'RidgeCV').
type_grad : {'finitediff', 'autodiff'}, optional Type of gradient computation to use (default='finitediff').
lr : float, optional Learning rate for optimization (default=1e-4).
optimizer : {'gd', 'sgd', 'adam', 'cd'}, optional Optimization algorithm: gradient descent ('gd'), stochastic gradient descent ('sgd'), Adam ('adam'), or coordinate descent ('cd'). Default is 'gd'.
eps : float, optional Scaling factor for adaptive finite difference step size (default=1e-3).
batch_size : int, optional Batch size for 'sgd' optimizer (default=32).
alpha : float, optional Elastic net penalty strength (default=0.0).
l1_ratio : float, optional Elastic net mixing parameter (0 = Ridge, 1 = Lasso, default=0.0).
type_loss : {'mse', 'quantile'}, optional Type of loss function to use (default='mse').
q : float, optional Quantile for quantile loss (default=0.5).
**kwargs Additional parameters to pass to the scikit-learn model.
216 def fit( 217 self, 218 X, 219 y, 220 epochs=10, 221 verbose=True, 222 show_progress=True, 223 sample_weight=None, 224 **kwargs, 225 ): 226 """ 227 Fit the model using finite difference optimization. 228 229 Parameters 230 ---------- 231 232 X : array-like of shape (n_samples, n_features) 233 Training data. 234 235 y : array-like of shape (n_samples,) 236 Target values. 237 238 epochs : int, optional 239 Number of optimization steps (default=10). 240 241 verbose : bool, optional 242 Whether to print progress messages (default=True). 243 244 show_progress : bool, optional 245 Whether to show tqdm progress bar (default=True). 246 247 sample_weight : array-like, optional 248 Sample weights. 249 250 **kwargs 251 Additional keyword arguments. 252 253 Returns 254 ------- 255 256 self : object 257 Returns self. 258 """ 259 260 self.model.fit(X, y) 261 262 iterator = tqdm(range(epochs)) if show_progress else range(epochs) 263 264 for epoch in iterator: 265 grad = self._compute_grad(X, y) 266 267 if self.optimizer == "gd": 268 self.model.W_ -= self.lr * grad 269 self.model.W_ = np.clip(self.model.W_, 0, 1) 270 # print("self.model.W_", self.model.W_) 271 272 elif self.optimizer == "sgd": 273 # Sample a mini-batch for stochastic gradient 274 n_samples = X.shape[0] 275 idxs = np.random.choice( 276 n_samples, self.batch_size, replace=False 277 ) 278 if isinstance(X, pd.DataFrame): 279 X_batch = X.iloc[idxs, :] 280 else: 281 X_batch = X[idxs, :] 282 y_batch = y[idxs] 283 grad = self._compute_grad(X_batch, y_batch) 284 285 self.model.W_ -= self.lr * grad 286 self.model.W_ = np.clip(self.model.W_, 0, 1) 287 288 elif self.optimizer == "adam": 289 if self.opt_state is None: 290 self.opt_state = { 291 "m": np.zeros_like(grad), 292 "v": np.zeros_like(grad), 293 "t": 0, 294 } 295 beta1, beta2, eps = 0.9, 0.999, 1e-8 296 self.opt_state["t"] += 1 297 self.opt_state["m"] = ( 298 beta1 * self.opt_state["m"] + (1 - beta1) * grad 299 ) 300 self.opt_state["v"] = beta2 * self.opt_state["v"] + ( 301 1 - beta2 302 ) * (grad**2) 303 m_hat = self.opt_state["m"] / (1 - beta1 ** self.opt_state["t"]) 304 v_hat = self.opt_state["v"] / (1 - beta2 ** self.opt_state["t"]) 305 306 self.model.W_ -= self.lr * m_hat / (np.sqrt(v_hat) + eps) 307 self.model.W_ = np.clip(self.model.W_, 0, 1) 308 # print("self.model.W_", self.model.W_) 309 310 elif self.optimizer == "cd": # coordinate descent 311 W_shape = self.model.W_.shape 312 W_flat_size = self.model.W_.size 313 W_flat = self.model.W_.flatten() 314 grad_flat = grad.flatten() 315 316 # Update only one coordinate per epoch (cyclic) 317 idx = self._cd_index % W_flat_size 318 W_flat[idx] -= self.lr * grad_flat[idx] 319 # Clip the updated value 320 W_flat[idx] = np.clip(W_flat[idx], 0, 1) 321 322 # Restore W_ 323 self.model.W_ = W_flat.reshape(W_shape) 324 325 self._cd_index += 1 326 327 else: 328 raise ValueError(f"Unsupported optimizer: {self.optimizer}") 329 330 loss = self._loss(X, y) 331 self.loss_history_.append(loss) 332 333 if verbose: 334 print(f"Epoch {epoch+1}: Loss = {loss:.6f}") 335 336 # if sample_weights, else: (must use self.row_index) 337 if sample_weight in kwargs: 338 self.model.fit( 339 X, 340 y, 341 sample_weight=sample_weight[self.index_row_].ravel(), 342 **kwargs, 343 ) 344 345 return self 346 347 return self
Fit the model using finite difference optimization.
Parameters
X : array-like of shape (n_samples, n_features) Training data.
y : array-like of shape (n_samples,) Target values.
epochs : int, optional Number of optimization steps (default=10).
verbose : bool, optional Whether to print progress messages (default=True).
show_progress : bool, optional Whether to show tqdm progress bar (default=True).
sample_weight : array-like, optional Sample weights.
**kwargs Additional keyword arguments.
Returns
self : object Returns self.
349 def predict(self, X, level=95, method="splitconformal", **kwargs): 350 """ 351 Predict using the trained model. 352 353 Parameters 354 ---------- 355 356 X : array-like of shape (n_samples, n_features) 357 Input data. 358 359 level : int, optional 360 Level of confidence for prediction intervals (default=95). 361 362 method : {'splitconformal', 'localconformal'}, optional 363 Method for conformal prediction (default='splitconformal'). 364 365 **kwargs 366 Additional keyword arguments. Use `return_pi=True` for prediction intervals, 367 or `return_std=True` for standard deviation estimates. 368 369 Returns 370 ------- 371 372 array or tuple 373 Model predictions, or a tuple with prediction intervals or standard deviations if requested. 374 """ 375 if "return_std" in kwargs: 376 alpha = 100 - level 377 pi_multiplier = norm.ppf(1 - alpha / 200) 378 379 if len(X.shape) == 1: 380 n_features = X.shape[0] 381 new_X = mo.rbind( 382 X.reshape(1, n_features), 383 np.ones(n_features).reshape(1, n_features), 384 ) 385 386 mean_, std_ = self.model.predict(new_X, return_std=True)[0] 387 388 preds = mean_ 389 lower = mean_ - pi_multiplier * std_ 390 upper = mean_ + pi_multiplier * std_ 391 392 DescribeResults = namedtuple( 393 "DescribeResults", ["mean", "std", "lower", "upper"] 394 ) 395 396 return DescribeResults(preds, std_, lower, upper) 397 398 # len(X.shape) > 1 399 mean_, std_ = self.model.predict(X, return_std=True) 400 401 preds = mean_ 402 lower = mean_ - pi_multiplier * std_ 403 upper = mean_ + pi_multiplier * std_ 404 405 DescribeResults = namedtuple( 406 "DescribeResults", ["mean", "std", "lower", "upper"] 407 ) 408 409 return DescribeResults(preds, std_, lower, upper) 410 411 if "return_pi" in kwargs: 412 assert method in ( 413 "splitconformal", 414 "localconformal", 415 ), "method must be in ('splitconformal', 'localconformal')" 416 self.pi = ns.PredictionInterval( 417 obj=self, 418 method=method, 419 level=level, 420 type_pi=self.type_pi, 421 replications=self.replications, 422 kernel=self.kernel, 423 ) 424 425 if len(self.X_.shape) == 1: 426 if isinstance(X, pd.DataFrame): 427 self.X_ = pd.DataFrame( 428 self.X_.values.reshape(1, -1), columns=self.X_.columns 429 ) 430 else: 431 self.X_ = self.X_.reshape(1, -1) 432 self.y_ = np.array([self.y_]) 433 434 self.pi.fit(self.X_, self.y_) 435 # self.X_ = None # consumes memory to keep, dangerous to delete (side effect) 436 # self.y_ = None # consumes memory to keep, dangerous to delete (side effect) 437 preds = self.pi.predict(X, return_pi=True) 438 return preds 439 440 # "return_std" not in kwargs 441 if len(X.shape) == 1: 442 n_features = X.shape[0] 443 new_X = mo.rbind( 444 X.reshape(1, n_features), 445 np.ones(n_features).reshape(1, n_features), 446 ) 447 448 return (0 + self.model.predict(new_X, **kwargs))[0] 449 450 # len(X.shape) > 1 451 return self.model.predict(X, **kwargs)
Predict using the trained model.
Parameters
X : array-like of shape (n_samples, n_features) Input data.
level : int, optional Level of confidence for prediction intervals (default=95).
method : {'splitconformal', 'localconformal'}, optional Method for conformal prediction (default='splitconformal').
**kwargs
Additional keyword arguments. Use return_pi=True for prediction intervals,
or return_std=True for standard deviation estimates.
Returns
array or tuple Model predictions, or a tuple with prediction intervals or standard deviations if requested.
36class DeepClassifier(CustomClassifier, ClassifierMixin): 37 """ 38 Deep Classifier 39 40 Parameters: 41 42 obj: an object 43 A base learner, see also https://www.researchgate.net/publication/380701207_Deep_Quasi-Randomized_neural_Networks_for_classification 44 45 n_layers: int (default=3) 46 Number of layers. `n_layers = 1` is a simple `CustomClassifier` 47 48 verbose : int, optional (default=0) 49 Monitor progress when fitting. 50 51 All the other parameters are nnetsauce `CustomClassifier`'s 52 53 Examples: 54 55 ```python 56 import nnetsauce as ns 57 from sklearn.datasets import load_breast_cancer 58 from sklearn.model_selection import train_test_split 59 from sklearn.linear_model import LogisticRegressionCV 60 data = load_breast_cancer() 61 X = data.data 62 y= data.target 63 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=123) 64 obj = LogisticRegressionCV() 65 clf = ns.DeepClassifier(obj) 66 clf.fit(X_train, y_train) 67 print(clf.score(clf.predict(X_test), y_test)) 68 ``` 69 """ 70 71 _estimator_type = "classifier" 72 73 def __init__( 74 self, 75 obj, 76 # Defining depth 77 n_layers=3, 78 verbose=0, 79 # CustomClassifier attributes 80 n_hidden_features=5, 81 activation_name="relu", 82 a=0.01, 83 nodes_sim="sobol", 84 bias=True, 85 dropout=0, 86 direct_link=True, 87 n_clusters=2, 88 cluster_encode=True, 89 type_clust="kmeans", 90 type_scaling=("std", "std", "std"), 91 col_sample=1, 92 row_sample=1, 93 cv_calibration=2, 94 calibration_method="sigmoid", 95 seed=123, 96 backend="cpu", 97 ): 98 super().__init__( 99 obj=obj, 100 n_hidden_features=n_hidden_features, 101 activation_name=activation_name, 102 a=a, 103 nodes_sim=nodes_sim, 104 bias=bias, 105 dropout=dropout, 106 direct_link=direct_link, 107 n_clusters=n_clusters, 108 cluster_encode=cluster_encode, 109 type_clust=type_clust, 110 type_scaling=type_scaling, 111 col_sample=col_sample, 112 row_sample=row_sample, 113 seed=seed, 114 backend=backend, 115 ) 116 self.coef_ = None 117 self.intercept_ = None 118 self.type_fit = "classification" 119 self.cv_calibration = cv_calibration 120 self.calibration_method = calibration_method 121 122 # Only wrap in CalibratedClassifierCV if not already wrapped 123 # if not isinstance(obj, CalibratedClassifierCV): 124 # self.obj = CalibratedClassifierCV( 125 # self.obj, 126 # cv=self.cv_calibration, 127 # method=self.calibration_method 128 # ) 129 # else: 130 self.coef_ = None 131 self.intercept_ = None 132 self.type_fit = "classification" 133 self.cv_calibration = cv_calibration 134 self.calibration_method = calibration_method 135 self.obj = obj 136 137 assert n_layers >= 1, "must have n_layers >= 1" 138 self.stacked_obj = obj 139 self.verbose = verbose 140 self.n_layers = n_layers 141 self.classes_ = None 142 self.n_classes_ = None 143 144 def fit(self, X, y, **kwargs): 145 """Fit Classification algorithms to X and y. 146 Parameters 147 ---------- 148 X : array-like, 149 Training vectors, where rows is the number of samples 150 and columns is the number of features. 151 y : array-like, 152 Training vectors, where rows is the number of samples 153 and columns is the number of features. 154 **kwargs: dict 155 Additional parameters to be passed to the fit method 156 of the base learner. For example, `sample_weight`. 157 158 Returns 159 ------- 160 A fitted object 161 """ 162 163 self.classes_ = np.unique(y) 164 self.n_classes_ = len( 165 self.classes_ 166 ) # for compatibility with scikit-learn 167 168 if isinstance(X, np.ndarray): 169 X = pd.DataFrame(X) 170 171 # init layer 172 self.stacked_obj = CustomClassifier( 173 obj=self.stacked_obj, 174 n_hidden_features=self.n_hidden_features, 175 activation_name=self.activation_name, 176 a=self.a, 177 nodes_sim=self.nodes_sim, 178 bias=self.bias, 179 dropout=self.dropout, 180 direct_link=self.direct_link, 181 n_clusters=self.n_clusters, 182 cluster_encode=self.cluster_encode, 183 type_clust=self.type_clust, 184 type_scaling=self.type_scaling, 185 col_sample=self.col_sample, 186 row_sample=self.row_sample, 187 cv_calibration=None, 188 calibration_method=None, 189 seed=self.seed, 190 backend=self.backend, 191 ) 192 193 if self.verbose > 0: 194 iterator = tqdm(range(self.n_layers - 1)) 195 else: 196 iterator = range(self.n_layers - 1) 197 198 for _ in iterator: 199 self.stacked_obj = deepcopy( 200 CustomClassifier( 201 obj=self.stacked_obj, 202 n_hidden_features=self.n_hidden_features, 203 activation_name=self.activation_name, 204 a=self.a, 205 nodes_sim=self.nodes_sim, 206 bias=self.bias, 207 dropout=self.dropout, 208 direct_link=self.direct_link, 209 n_clusters=self.n_clusters, 210 cluster_encode=self.cluster_encode, 211 type_clust=self.type_clust, 212 type_scaling=self.type_scaling, 213 col_sample=self.col_sample, 214 row_sample=self.row_sample, 215 cv_calibration=None, 216 calibration_method=None, 217 seed=self.seed, 218 backend=self.backend, 219 ) 220 ) 221 self.stacked_obj.fit(X, y, **kwargs) 222 223 return self 224 225 def partial_fit(self, X, y, **kwargs): 226 """Fit Regression algorithms to X and y. 227 Parameters 228 ---------- 229 X : array-like, 230 Training vectors, where rows is the number of samples 231 and columns is the number of features. 232 y : array-like, 233 Training vectors, where rows is the number of samples 234 and columns is the number of features. 235 **kwargs: dict 236 Additional parameters to be passed to the fit method 237 of the base learner. For example, `sample_weight`. 238 Returns 239 ------- 240 A fitted object 241 """ 242 assert hasattr(self, "stacked_obj"), "model must be fitted first" 243 current_obj = self.stacked_obj 244 for _ in range(self.n_layers): 245 try: 246 input_X = current_obj.obj.cook_test_set(X) 247 current_obj.obj.partial_fit(input_X, y, **kwargs) 248 try: 249 current_obj = current_obj.obj 250 except AttributeError: 251 pass 252 except ValueError: 253 pass 254 return self 255 256 def predict(self, X): 257 return self.stacked_obj.predict(X) 258 259 def predict_proba(self, X): 260 return self.stacked_obj.predict_proba(X) 261 262 def score(self, X, y, scoring=None): 263 return self.stacked_obj.score(X, y, scoring) 264 265 def cross_val_optim( 266 self, 267 X_train, 268 y_train, 269 X_test=None, 270 y_test=None, 271 scoring="accuracy", 272 surrogate_obj=None, 273 cv=5, 274 n_jobs=None, 275 n_init=10, 276 n_iter=190, 277 abs_tol=1e-3, 278 verbose=2, 279 seed=123, 280 **kwargs, 281 ): 282 """Cross-validation function and hyperparameters' search 283 284 Parameters: 285 286 X_train: array-like, 287 Training vectors, where rows is the number of samples 288 and columns is the number of features. 289 290 y_train: array-like, 291 Training vectors, where rows is the number of samples 292 and columns is the number of features. 293 294 X_test: array-like, 295 Testing vectors, where rows is the number of samples 296 and columns is the number of features. 297 298 y_test: array-like, 299 Testing vectors, where rows is the number of samples 300 and columns is the number of features. 301 302 scoring: str 303 scoring metric; see https://scikit-learn.org/stable/modules/model_evaluation.html#the-scoring-parameter-defining-model-evaluation-rules 304 305 surrogate_obj: an object; 306 An ML model for estimating the uncertainty around the objective function 307 308 cv: int; 309 number of cross-validation folds 310 311 n_jobs: int; 312 number of jobs for parallel execution 313 314 n_init: an integer; 315 number of points in the initial setting, when `x_init` and `y_init` are not provided 316 317 n_iter: an integer; 318 number of iterations of the minimization algorithm 319 320 abs_tol: a float; 321 tolerance for convergence of the optimizer (early stopping based on acquisition function) 322 323 verbose: int 324 controls verbosity 325 326 seed: int 327 reproducibility seed 328 329 **kwargs: dict 330 additional parameters to be passed to the estimator 331 332 Examples: 333 334 ```python 335 ``` 336 """ 337 338 num_to_activation_name = {1: "relu", 2: "sigmoid", 3: "tanh"} 339 num_to_nodes_sim = {1: "sobol", 2: "uniform", 3: "hammersley"} 340 num_to_type_clust = {1: "kmeans", 2: "gmm"} 341 342 def deepclassifier_cv( 343 X_train, 344 y_train, 345 # Defining depth 346 n_layers=3, 347 # CustomClassifier attributes 348 n_hidden_features=5, 349 activation_name="relu", 350 nodes_sim="sobol", 351 dropout=0, 352 n_clusters=2, 353 type_clust="kmeans", 354 cv=5, 355 n_jobs=None, 356 scoring="accuracy", 357 seed=123, 358 ): 359 self.set_params( 360 **{ 361 "n_layers": n_layers, 362 # CustomClassifier attributes 363 "n_hidden_features": n_hidden_features, 364 "activation_name": activation_name, 365 "nodes_sim": nodes_sim, 366 "dropout": dropout, 367 "n_clusters": n_clusters, 368 "type_clust": type_clust, 369 **kwargs, 370 } 371 ) 372 return -cross_val_score( 373 estimator=self, 374 X=X_train, 375 y=y_train, 376 scoring=scoring, 377 cv=cv, 378 n_jobs=n_jobs, 379 verbose=0, 380 ).mean() 381 382 # objective function for hyperparams tuning 383 def crossval_objective(xx): 384 return deepclassifier_cv( 385 X_train=X_train, 386 y_train=y_train, 387 # Defining depth 388 n_layers=int(np.ceil(xx[0])), 389 # CustomClassifier attributes 390 n_hidden_features=int(np.ceil(xx[1])), 391 activation_name=num_to_activation_name[np.ceil(xx[2])], 392 nodes_sim=num_to_nodes_sim[int(np.ceil(xx[3]))], 393 dropout=xx[4], 394 n_clusters=int(np.ceil(xx[5])), 395 type_clust=num_to_type_clust[int(np.ceil(xx[6]))], 396 cv=cv, 397 n_jobs=n_jobs, 398 scoring=scoring, 399 seed=seed, 400 ) 401 402 if surrogate_obj is None: 403 gp_opt = gp.GPOpt( 404 objective_func=crossval_objective, 405 lower_bound=np.array([0, 3, 0, 0, 0.0, 0, 0]), 406 upper_bound=np.array([5, 100, 3, 3, 0.4, 5, 2]), 407 params_names=[ 408 "n_layers", 409 # CustomClassifier attributes 410 "n_hidden_features", 411 "activation_name", 412 "nodes_sim", 413 "dropout", 414 "n_clusters", 415 "type_clust", 416 ], 417 method="bayesian", 418 n_init=n_init, 419 n_iter=n_iter, 420 seed=seed, 421 ) 422 else: 423 gp_opt = gp.GPOpt( 424 objective_func=crossval_objective, 425 lower_bound=np.array([0, 3, 0, 0, 0.0, 0, 0]), 426 upper_bound=np.array([5, 100, 3, 3, 0.4, 5, 2]), 427 params_names=[ 428 "n_layers", 429 # CustomClassifier attributes 430 "n_hidden_features", 431 "activation_name", 432 "nodes_sim", 433 "dropout", 434 "n_clusters", 435 "type_clust", 436 ], 437 acquisition="ucb", 438 method="splitconformal", 439 surrogate_obj=ns.PredictionInterval( 440 obj=surrogate_obj, method="splitconformal" 441 ), 442 n_init=n_init, 443 n_iter=n_iter, 444 seed=seed, 445 ) 446 447 res = gp_opt.optimize(verbose=verbose, abs_tol=abs_tol) 448 res.best_params["n_layers"] = int(np.ceil(res.best_params["n_layers"])) 449 res.best_params["n_hidden_features"] = int( 450 np.ceil(res.best_params["n_hidden_features"]) 451 ) 452 res.best_params["activation_name"] = num_to_activation_name[ 453 np.ceil(res.best_params["activation_name"]) 454 ] 455 res.best_params["nodes_sim"] = num_to_nodes_sim[ 456 int(np.ceil(res.best_params["nodes_sim"])) 457 ] 458 res.best_params["dropout"] = res.best_params["dropout"] 459 res.best_params["n_clusters"] = int( 460 np.ceil(res.best_params["n_clusters"]) 461 ) 462 res.best_params["type_clust"] = num_to_type_clust[ 463 int(np.ceil(res.best_params["type_clust"])) 464 ] 465 466 # out-of-sample error 467 if X_test is not None and y_test is not None: 468 self.set_params(**res.best_params, verbose=0, seed=seed) 469 preds = self.fit(X_train, y_train).predict(X_test) 470 # check error on y_test 471 oos_err = getattr(metrics, scoring + "_score")( 472 y_true=y_test, y_pred=preds 473 ) 474 result = namedtuple("result", res._fields + ("test_" + scoring,)) 475 return result(*res, oos_err) 476 else: 477 return res 478 479 def lazy_cross_val_optim( 480 self, 481 X_train, 482 y_train, 483 X_test=None, 484 y_test=None, 485 scoring="accuracy", 486 surrogate_objs=None, 487 customize=False, 488 cv=5, 489 n_jobs=None, 490 n_init=10, 491 n_iter=190, 492 abs_tol=1e-3, 493 verbose=1, 494 seed=123, 495 ): 496 """Automated Cross-validation function and hyperparameters' search using multiple surrogates 497 498 Parameters: 499 500 X_train: array-like, 501 Training vectors, where rows is the number of samples 502 and columns is the number of features. 503 504 y_train: array-like, 505 Training vectors, where rows is the number of samples 506 and columns is the number of features. 507 508 X_test: array-like, 509 Testing vectors, where rows is the number of samples 510 and columns is the number of features. 511 512 y_test: array-like, 513 Testing vectors, where rows is the number of samples 514 and columns is the number of features. 515 516 scoring: str 517 scoring metric; see https://scikit-learn.org/stable/modules/model_evaluation.html#the-scoring-parameter-defining-model-evaluation-rules 518 519 surrogate_objs: object names as a list of strings; 520 ML models for estimating the uncertainty around the objective function 521 522 customize: boolean 523 if True, the surrogate is transformed into a quasi-randomized network (default is False) 524 525 cv: int; 526 number of cross-validation folds 527 528 n_jobs: int; 529 number of jobs for parallel execution 530 531 n_init: an integer; 532 number of points in the initial setting, when `x_init` and `y_init` are not provided 533 534 n_iter: an integer; 535 number of iterations of the minimization algorithm 536 537 abs_tol: a float; 538 tolerance for convergence of the optimizer (early stopping based on acquisition function) 539 540 verbose: int 541 controls verbosity 542 543 seed: int 544 reproducibility seed 545 546 Examples: 547 548 ```python 549 ``` 550 """ 551 552 removed_regressors = [ 553 "TheilSenRegressor", 554 "ARDRegression", 555 "CCA", 556 "GaussianProcessRegressor", 557 "GradientBoostingRegressor", 558 "HistGradientBoostingRegressor", 559 "IsotonicRegression", 560 "MultiOutputRegressor", 561 "MultiTaskElasticNet", 562 "MultiTaskElasticNetCV", 563 "MultiTaskLasso", 564 "MultiTaskLassoCV", 565 "OrthogonalMatchingPursuit", 566 "OrthogonalMatchingPursuitCV", 567 "PLSCanonical", 568 "PLSRegression", 569 "RadiusNeighborsRegressor", 570 "RegressorChain", 571 "StackingRegressor", 572 "VotingRegressor", 573 ] 574 575 results = [] 576 577 for est in all_estimators(): 578 if surrogate_objs is None: 579 if issubclass(est[1], RegressorMixin) and ( 580 est[0] not in removed_regressors 581 ): 582 try: 583 if customize == True: 584 surr_obj = ns.CustomClassifier(obj=est[1]()) 585 else: 586 surr_obj = est[1]() 587 res = self.cross_val_optim( 588 X_train=X_train, 589 y_train=y_train, 590 X_test=X_test, 591 y_test=y_test, 592 surrogate_obj=surr_obj, 593 cv=cv, 594 n_jobs=n_jobs, 595 scoring=scoring, 596 n_init=n_init, 597 n_iter=n_iter, 598 abs_tol=abs_tol, 599 verbose=verbose, 600 seed=seed, 601 ) 602 if customize == True: 603 results.append((f"CustomClassifier({est[0]})", res)) 604 else: 605 results.append((est[0], res)) 606 except: 607 pass 608 609 else: 610 if ( 611 issubclass(est[1], RegressorMixin) 612 and (est[0] not in removed_regressors) 613 and est[0] in surrogate_objs 614 ): 615 try: 616 if customize == True: 617 surr_obj = ns.CustomClassifier(obj=est[1]()) 618 else: 619 surr_obj = est[1]() 620 res = self.cross_val_optim( 621 X_train=X_train, 622 y_train=y_train, 623 X_test=X_test, 624 y_test=y_test, 625 surrogate_obj=surr_obj, 626 cv=cv, 627 n_jobs=n_jobs, 628 scoring=scoring, 629 n_init=n_init, 630 n_iter=n_iter, 631 abs_tol=abs_tol, 632 verbose=verbose, 633 seed=seed, 634 ) 635 if customize == True: 636 results.append((f"CustomClassifier({est[0]})", res)) 637 else: 638 results.append((est[0], res)) 639 except: 640 pass 641 642 return results 643 644 @property 645 def _estimator_type(self): 646 return "classifier"
Deep Classifier
Parameters:
obj: an object
A base learner, see also https://www.researchgate.net/publication/380701207_Deep_Quasi-Randomized_neural_Networks_for_classification
n_layers: int (default=3)
Number of layers. `n_layers = 1` is a simple `CustomClassifier`
verbose : int, optional (default=0)
Monitor progress when fitting.
All the other parameters are nnetsauce `CustomClassifier`'s
Examples:
import nnetsauce as ns
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegressionCV
data = load_breast_cancer()
X = data.data
y= data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=123)
obj = LogisticRegressionCV()
clf = ns.DeepClassifier(obj)
clf.fit(X_train, y_train)
print(clf.score(clf.predict(X_test), y_test))
144 def fit(self, X, y, **kwargs): 145 """Fit Classification algorithms to X and y. 146 Parameters 147 ---------- 148 X : array-like, 149 Training vectors, where rows is the number of samples 150 and columns is the number of features. 151 y : array-like, 152 Training vectors, where rows is the number of samples 153 and columns is the number of features. 154 **kwargs: dict 155 Additional parameters to be passed to the fit method 156 of the base learner. For example, `sample_weight`. 157 158 Returns 159 ------- 160 A fitted object 161 """ 162 163 self.classes_ = np.unique(y) 164 self.n_classes_ = len( 165 self.classes_ 166 ) # for compatibility with scikit-learn 167 168 if isinstance(X, np.ndarray): 169 X = pd.DataFrame(X) 170 171 # init layer 172 self.stacked_obj = CustomClassifier( 173 obj=self.stacked_obj, 174 n_hidden_features=self.n_hidden_features, 175 activation_name=self.activation_name, 176 a=self.a, 177 nodes_sim=self.nodes_sim, 178 bias=self.bias, 179 dropout=self.dropout, 180 direct_link=self.direct_link, 181 n_clusters=self.n_clusters, 182 cluster_encode=self.cluster_encode, 183 type_clust=self.type_clust, 184 type_scaling=self.type_scaling, 185 col_sample=self.col_sample, 186 row_sample=self.row_sample, 187 cv_calibration=None, 188 calibration_method=None, 189 seed=self.seed, 190 backend=self.backend, 191 ) 192 193 if self.verbose > 0: 194 iterator = tqdm(range(self.n_layers - 1)) 195 else: 196 iterator = range(self.n_layers - 1) 197 198 for _ in iterator: 199 self.stacked_obj = deepcopy( 200 CustomClassifier( 201 obj=self.stacked_obj, 202 n_hidden_features=self.n_hidden_features, 203 activation_name=self.activation_name, 204 a=self.a, 205 nodes_sim=self.nodes_sim, 206 bias=self.bias, 207 dropout=self.dropout, 208 direct_link=self.direct_link, 209 n_clusters=self.n_clusters, 210 cluster_encode=self.cluster_encode, 211 type_clust=self.type_clust, 212 type_scaling=self.type_scaling, 213 col_sample=self.col_sample, 214 row_sample=self.row_sample, 215 cv_calibration=None, 216 calibration_method=None, 217 seed=self.seed, 218 backend=self.backend, 219 ) 220 ) 221 self.stacked_obj.fit(X, y, **kwargs) 222 223 return self
Fit Classification algorithms to X and y.
Parameters
X : array-like,
Training vectors, where rows is the number of samples
and columns is the number of features.
y : array-like,
Training vectors, where rows is the number of samples
and columns is the number of features.
**kwargs: dict
Additional parameters to be passed to the fit method
of the base learner. For example, sample_weight.
Returns
A fitted object
Predict test data X.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
Predict probabilities for test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
probability estimates for test data: {array-like}
Scoring function for classification.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
scoring: str
scoring method (default is accuracy)
Returns:
score: float
13class DeepRegressor(CustomRegressor, RegressorMixin): 14 """ 15 Deep Regressor 16 17 Parameters: 18 19 obj: an object 20 A base learner, see also https://www.researchgate.net/publication/380701207_Deep_Quasi-Randomized_neural_Networks_for_classification 21 22 verbose : int, optional (default=0) 23 Monitor progress when fitting. 24 25 n_layers: int (default=2) 26 Number of layers. `n_layers = 1` is a simple `CustomRegressor` 27 28 All the other parameters are nnetsauce `CustomRegressor`'s 29 30 Examples: 31 32 ```python 33 import nnetsauce as ns 34 from sklearn.datasets import load_diabetes 35 from sklearn.model_selection import train_test_split 36 from sklearn.linear_model import RidgeCV 37 data = load_diabetes() 38 X = data.data 39 y= data.target 40 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=123) 41 obj = RidgeCV() 42 clf = ns.DeepRegressor(obj) 43 clf.fit(X_train, y_train) 44 print(clf.score(clf.predict(X_test), y_test)) 45 ``` 46 47 """ 48 49 def __init__( 50 self, 51 obj, 52 # Defining depth 53 n_layers=2, 54 verbose=0, 55 # CustomRegressor attributes 56 n_hidden_features=5, 57 activation_name="relu", 58 a=0.01, 59 nodes_sim="sobol", 60 bias=True, 61 dropout=0, 62 direct_link=True, 63 n_clusters=2, 64 cluster_encode=True, 65 type_clust="kmeans", 66 type_scaling=("std", "std", "std"), 67 col_sample=1, 68 row_sample=1, 69 level=None, 70 pi_method="splitconformal", 71 seed=123, 72 backend="cpu", 73 ): 74 super().__init__( 75 obj=obj, 76 n_hidden_features=n_hidden_features, 77 activation_name=activation_name, 78 a=a, 79 nodes_sim=nodes_sim, 80 bias=bias, 81 dropout=dropout, 82 direct_link=direct_link, 83 n_clusters=n_clusters, 84 cluster_encode=cluster_encode, 85 type_clust=type_clust, 86 type_scaling=type_scaling, 87 col_sample=col_sample, 88 row_sample=row_sample, 89 level=level, 90 pi_method=pi_method, 91 seed=seed, 92 backend=backend, 93 ) 94 95 assert n_layers >= 1, "must have n_layers >= 1" 96 97 self.stacked_obj = deepcopy(obj) 98 self.verbose = verbose 99 self.n_layers = n_layers 100 self.level = level 101 self.pi_method = pi_method 102 self.coef_ = None 103 104 def fit(self, X, y, **kwargs): 105 """Fit Regression algorithms to X and y. 106 Parameters 107 ---------- 108 X : array-like, 109 Training vectors, where rows is the number of samples 110 and columns is the number of features. 111 y : array-like, 112 Training vectors, where rows is the number of samples 113 and columns is the number of features. 114 **kwargs: dict 115 Additional parameters to be passed to the fit method 116 of the base learner. For example, `sample_weight`. 117 Returns 118 ------- 119 A fitted object 120 """ 121 122 if isinstance(X, np.ndarray): 123 X = pd.DataFrame(X) 124 125 # init layer 126 self.stacked_obj = CustomRegressor( 127 obj=self.stacked_obj, 128 n_hidden_features=self.n_hidden_features, 129 activation_name=self.activation_name, 130 a=self.a, 131 nodes_sim=self.nodes_sim, 132 bias=self.bias, 133 dropout=self.dropout, 134 direct_link=self.direct_link, 135 n_clusters=self.n_clusters, 136 cluster_encode=self.cluster_encode, 137 type_clust=self.type_clust, 138 type_scaling=self.type_scaling, 139 col_sample=self.col_sample, 140 row_sample=self.row_sample, 141 seed=self.seed, 142 backend=self.backend, 143 ) 144 145 if self.verbose > 0: 146 iterator = tqdm(range(self.n_layers - 1)) 147 else: 148 iterator = range(self.n_layers - 1) 149 150 for _ in iterator: 151 self.stacked_obj = deepcopy( 152 CustomRegressor( 153 obj=self.stacked_obj, 154 n_hidden_features=self.n_hidden_features, 155 activation_name=self.activation_name, 156 a=self.a, 157 nodes_sim=self.nodes_sim, 158 bias=self.bias, 159 dropout=self.dropout, 160 direct_link=self.direct_link, 161 n_clusters=self.n_clusters, 162 cluster_encode=self.cluster_encode, 163 type_clust=self.type_clust, 164 type_scaling=self.type_scaling, 165 col_sample=self.col_sample, 166 row_sample=self.row_sample, 167 seed=self.seed, 168 backend=self.backend, 169 ) 170 ) 171 172 self.stacked_obj.fit(X, y, **kwargs) 173 174 if self.level is not None: 175 self.stacked_obj = PredictionInterval( 176 obj=self.stacked_obj, method=self.pi_method, level=self.level 177 ) 178 179 if hasattr(self.stacked_obj, "clustering_obj_"): 180 self.clustering_obj_ = self.stacked_obj.clustering_obj_ 181 182 if hasattr(self.stacked_obj, "coef_"): 183 self.coef_ = self.stacked_obj.coef_ 184 185 if hasattr(self.stacked_obj, "scaler_"): 186 self.scaler_ = self.stacked_obj.scaler_ 187 188 if hasattr(self.stacked_obj, "nn_scaler_"): 189 self.nn_scaler_ = self.stacked_obj.nn_scaler_ 190 191 if hasattr(self.stacked_obj, "clustering_scaler_"): 192 self.clustering_scaler_ = self.stacked_obj.clustering_scaler_ 193 194 return self 195 196 def partial_fit(self, X, y, **kwargs): 197 """Fit Regression algorithms to X and y. 198 Parameters 199 ---------- 200 X : array-like, 201 Training vectors, where rows is the number of samples 202 and columns is the number of features. 203 y : array-like, 204 Training vectors, where rows is the number of samples 205 and columns is the number of features. 206 **kwargs: dict 207 Additional parameters to be passed to the fit method 208 of the base learner. For example, `sample_weight`. 209 Returns 210 ------- 211 A fitted object 212 """ 213 assert hasattr(self, "stacked_obj"), "model must be fitted first" 214 current_obj = self.stacked_obj 215 for _ in range(self.n_layers): 216 try: 217 input_X = current_obj.obj.cook_test_set(X) 218 current_obj.obj.partial_fit(input_X, y, **kwargs) 219 try: 220 current_obj = current_obj.obj 221 except AttributeError: 222 pass 223 except ValueError as e: 224 print(e) 225 pass 226 return self 227 228 def predict(self, X, **kwargs): 229 if self.level is not None: 230 return self.stacked_obj.predict(X, return_pi=True) 231 return self.stacked_obj.predict(X, **kwargs) 232 233 def score(self, X, y, scoring=None): 234 return self.stacked_obj.score(X, y, scoring)
Deep Regressor
Parameters:
obj: an object
A base learner, see also https://www.researchgate.net/publication/380701207_Deep_Quasi-Randomized_neural_Networks_for_classification
verbose : int, optional (default=0)
Monitor progress when fitting.
n_layers: int (default=2)
Number of layers. `n_layers = 1` is a simple `CustomRegressor`
All the other parameters are nnetsauce `CustomRegressor`'s
Examples:
import nnetsauce as ns
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.linear_model import RidgeCV
data = load_diabetes()
X = data.data
y= data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=123)
obj = RidgeCV()
clf = ns.DeepRegressor(obj)
clf.fit(X_train, y_train)
print(clf.score(clf.predict(X_test), y_test))
104 def fit(self, X, y, **kwargs): 105 """Fit Regression algorithms to X and y. 106 Parameters 107 ---------- 108 X : array-like, 109 Training vectors, where rows is the number of samples 110 and columns is the number of features. 111 y : array-like, 112 Training vectors, where rows is the number of samples 113 and columns is the number of features. 114 **kwargs: dict 115 Additional parameters to be passed to the fit method 116 of the base learner. For example, `sample_weight`. 117 Returns 118 ------- 119 A fitted object 120 """ 121 122 if isinstance(X, np.ndarray): 123 X = pd.DataFrame(X) 124 125 # init layer 126 self.stacked_obj = CustomRegressor( 127 obj=self.stacked_obj, 128 n_hidden_features=self.n_hidden_features, 129 activation_name=self.activation_name, 130 a=self.a, 131 nodes_sim=self.nodes_sim, 132 bias=self.bias, 133 dropout=self.dropout, 134 direct_link=self.direct_link, 135 n_clusters=self.n_clusters, 136 cluster_encode=self.cluster_encode, 137 type_clust=self.type_clust, 138 type_scaling=self.type_scaling, 139 col_sample=self.col_sample, 140 row_sample=self.row_sample, 141 seed=self.seed, 142 backend=self.backend, 143 ) 144 145 if self.verbose > 0: 146 iterator = tqdm(range(self.n_layers - 1)) 147 else: 148 iterator = range(self.n_layers - 1) 149 150 for _ in iterator: 151 self.stacked_obj = deepcopy( 152 CustomRegressor( 153 obj=self.stacked_obj, 154 n_hidden_features=self.n_hidden_features, 155 activation_name=self.activation_name, 156 a=self.a, 157 nodes_sim=self.nodes_sim, 158 bias=self.bias, 159 dropout=self.dropout, 160 direct_link=self.direct_link, 161 n_clusters=self.n_clusters, 162 cluster_encode=self.cluster_encode, 163 type_clust=self.type_clust, 164 type_scaling=self.type_scaling, 165 col_sample=self.col_sample, 166 row_sample=self.row_sample, 167 seed=self.seed, 168 backend=self.backend, 169 ) 170 ) 171 172 self.stacked_obj.fit(X, y, **kwargs) 173 174 if self.level is not None: 175 self.stacked_obj = PredictionInterval( 176 obj=self.stacked_obj, method=self.pi_method, level=self.level 177 ) 178 179 if hasattr(self.stacked_obj, "clustering_obj_"): 180 self.clustering_obj_ = self.stacked_obj.clustering_obj_ 181 182 if hasattr(self.stacked_obj, "coef_"): 183 self.coef_ = self.stacked_obj.coef_ 184 185 if hasattr(self.stacked_obj, "scaler_"): 186 self.scaler_ = self.stacked_obj.scaler_ 187 188 if hasattr(self.stacked_obj, "nn_scaler_"): 189 self.nn_scaler_ = self.stacked_obj.nn_scaler_ 190 191 if hasattr(self.stacked_obj, "clustering_scaler_"): 192 self.clustering_scaler_ = self.stacked_obj.clustering_scaler_ 193 194 return self
Fit Regression algorithms to X and y.
Parameters
X : array-like,
Training vectors, where rows is the number of samples
and columns is the number of features.
y : array-like,
Training vectors, where rows is the number of samples
and columns is the number of features.
**kwargs: dict
Additional parameters to be passed to the fit method
of the base learner. For example, sample_weight.
Returns
A fitted object
228 def predict(self, X, **kwargs): 229 if self.level is not None: 230 return self.stacked_obj.predict(X, return_pi=True) 231 return self.stacked_obj.predict(X, **kwargs)
Predict test data X.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
level: int
Level of confidence (default = 95)
method: str
'splitconformal', 'localconformal'
prediction (if you specify `return_pi = True`)
**kwargs: additional parameters
`return_pi = True` for conformal prediction,
with `method` in ('splitconformal', 'localconformal')
or `return_std = True` for `self.obj` in
(`sklearn.linear_model.BayesianRidge`,
`sklearn.linear_model.ARDRegressor`,
`sklearn.gaussian_process.GaussianProcessRegressor`)`
Returns:
model predictions:
an array if uncertainty quantification is not requested,
or a tuple if with prediction intervals and simulations
if `return_std = True` (mean, standard deviation,
lower and upper prediction interval) or `return_pi = True`
()
Compute the score of the model.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
scoring: str
scoring method
Returns:
score: float
11class DeepMTS(MTS): 12 """Univariate and multivariate time series (DeepMTS) forecasting with Quasi-Randomized networks (Work in progress) 13 14 Parameters: 15 16 obj: object. 17 any object containing a method fit (obj.fit()) and a method predict 18 (obj.predict()). 19 20 n_layers: int. 21 number of layers in the neural network. 22 23 n_hidden_features: int. 24 number of nodes in the hidden layer. 25 26 activation_name: str. 27 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'. 28 29 a: float. 30 hyperparameter for 'prelu' or 'elu' activation function. 31 32 nodes_sim: str. 33 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 34 'uniform'. 35 36 bias: boolean. 37 indicates if the hidden layer contains a bias term (True) or not 38 (False). 39 40 dropout: float. 41 regularization parameter; (random) percentage of nodes dropped out 42 of the training. 43 44 direct_link: boolean. 45 indicates if the original predictors are included (True) in model's fitting or not (False). 46 47 n_clusters: int. 48 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering). 49 50 cluster_encode: bool. 51 defines how the variable containing clusters is treated (default is one-hot) 52 if `False`, then labels are used, without one-hot encoding. 53 54 type_clust: str. 55 type of clustering method: currently k-means ('kmeans') or Gaussian 56 Mixture Model ('gmm'). 57 58 type_scaling: a tuple of 3 strings. 59 scaling methods for inputs, hidden layer, and clustering respectively 60 (and when relevant). 61 Currently available: standardization ('std') or MinMax scaling ('minmax'). 62 63 lags: int. 64 number of lags used for each time series. 65 66 type_pi: str. 67 type of prediction interval; currently: 68 - "gaussian": simple, fast, but: assumes stationarity of Gaussian in-sample residuals and independence in the multivariate case 69 - "kde": based on Kernel Density Estimation of in-sample residuals 70 - "bootstrap": based on independent bootstrap of in-sample residuals 71 - "block-bootstrap": based on basic block bootstrap of in-sample residuals 72 - "scp-kde": Sequential split conformal prediction with Kernel Density Estimation of calibrated residuals 73 - "scp-bootstrap": Sequential split conformal prediction with independent bootstrap of calibrated residuals 74 - "scp-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of calibrated residuals 75 - "scp2-kde": Sequential split conformal prediction with Kernel Density Estimation of standardized calibrated residuals 76 - "scp2-bootstrap": Sequential split conformal prediction with independent bootstrap of standardized calibrated residuals 77 - "scp2-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of standardized calibrated residuals 78 79 block_size: int. 80 size of block for 'type_pi' in ("block-bootstrap", "scp-block-bootstrap", "scp2-block-bootstrap"). 81 Default is round(3.15*(n_residuals^1/3)) 82 83 replications: int. 84 number of replications (if needed, for predictive simulation). Default is 'None'. 85 86 kernel: str. 87 the kernel to use for residuals density estimation (used for predictive simulation). Currently, either 'gaussian' or 'tophat'. 88 89 agg: str. 90 either "mean" or "median" for simulation of bootstrap aggregating 91 92 seed: int. 93 reproducibility seed for nodes_sim=='uniform' or predictive simulation. 94 95 backend: str. 96 "cpu" or "gpu" or "tpu". 97 98 verbose: int. 99 0: not printing; 1: printing 100 101 show_progress: bool. 102 True: progress bar when fitting each series; False: no progress bar when fitting each series 103 104 Attributes: 105 106 fit_objs_: dict 107 objects adjusted to each individual time series 108 109 y_: {array-like} 110 DeepMTS responses (most recent observations first) 111 112 X_: {array-like} 113 DeepMTS lags 114 115 xreg_: {array-like} 116 external regressors 117 118 y_means_: dict 119 a dictionary of each series mean values 120 121 preds_: {array-like} 122 successive model predictions 123 124 preds_std_: {array-like} 125 standard deviation around the predictions 126 127 return_std_: boolean 128 return uncertainty or not (set in predict) 129 130 df_: data frame 131 the input data frame, in case a data.frame is provided to `fit` 132 133 Examples: 134 135 Example 1: 136 137 ```python 138 import nnetsauce as ns 139 import numpy as np 140 from sklearn import linear_model 141 np.random.seed(123) 142 143 M = np.random.rand(10, 3) 144 M[:,0] = 10*M[:,0] 145 M[:,2] = 25*M[:,2] 146 print(M) 147 148 # Adjust Bayesian Ridge 149 regr4 = linear_model.BayesianRidge() 150 obj_DeepMTS = ns.DeepMTS(regr4, lags = 1, n_hidden_features=5) 151 obj_DeepMTS.fit(M) 152 print(obj_DeepMTS.predict()) 153 154 # with credible intervals 155 print(obj_DeepMTS.predict(return_std=True, level=80)) 156 157 print(obj_DeepMTS.predict(return_std=True, level=95)) 158 ``` 159 160 Example 2: 161 162 ```python 163 import nnetsauce as ns 164 import numpy as np 165 from sklearn import linear_model 166 167 dataset = { 168 'date' : ['2001-01-01', '2002-01-01', '2003-01-01', '2004-01-01', '2005-01-01'], 169 'series1' : [34, 30, 35.6, 33.3, 38.1], 170 'series2' : [4, 5.5, 5.6, 6.3, 5.1], 171 'series3' : [100, 100.5, 100.6, 100.2, 100.1]} 172 df = pd.DataFrame(dataset).set_index('date') 173 print(df) 174 175 # Adjust Bayesian Ridge 176 regr5 = linear_model.BayesianRidge() 177 obj_DeepMTS = ns.DeepMTS(regr5, lags = 1, n_hidden_features=5) 178 obj_DeepMTS.fit(df) 179 print(obj_DeepMTS.predict()) 180 181 # with credible intervals 182 print(obj_DeepMTS.predict(return_std=True, level=80)) 183 184 print(obj_DeepMTS.predict(return_std=True, level=95)) 185 ``` 186 187 """ 188 189 # construct the object ----- 190 191 def __init__( 192 self, 193 obj, 194 n_layers=3, 195 n_hidden_features=5, 196 activation_name="relu", 197 a=0.01, 198 nodes_sim="sobol", 199 bias=True, 200 dropout=0, 201 direct_link=True, 202 n_clusters=2, 203 cluster_encode=True, 204 type_clust="kmeans", 205 type_scaling=("std", "std", "std"), 206 lags=1, 207 type_pi="kde", 208 block_size=None, 209 replications=None, 210 kernel=None, 211 agg="mean", 212 seed=123, 213 backend="cpu", 214 verbose=0, 215 show_progress=True, 216 ): 217 assert int(lags) == lags, "parameter 'lags' should be an integer" 218 assert n_layers >= 1, "must have n_layers >= 1" 219 self.n_layers = int(n_layers) 220 221 if self.n_layers > 1: 222 for _ in range(self.n_layers - 1): 223 obj = CustomRegressor( 224 obj=deepcopy(obj), 225 n_hidden_features=n_hidden_features, 226 activation_name=activation_name, 227 a=a, 228 nodes_sim=nodes_sim, 229 bias=bias, 230 dropout=dropout, 231 direct_link=direct_link, 232 n_clusters=n_clusters, 233 cluster_encode=cluster_encode, 234 type_clust=type_clust, 235 type_scaling=type_scaling, 236 seed=seed, 237 backend=backend, 238 ) 239 240 self.obj = deepcopy(obj) 241 super().__init__( 242 obj=self.obj, 243 n_hidden_features=n_hidden_features, 244 activation_name=activation_name, 245 a=a, 246 nodes_sim=nodes_sim, 247 bias=bias, 248 dropout=dropout, 249 direct_link=direct_link, 250 n_clusters=n_clusters, 251 cluster_encode=cluster_encode, 252 type_clust=type_clust, 253 type_scaling=type_scaling, 254 lags=lags, 255 type_pi=type_pi, 256 block_size=block_size, 257 replications=replications, 258 kernel=kernel, 259 agg=agg, 260 seed=seed, 261 backend=backend, 262 verbose=verbose, 263 show_progress=show_progress, 264 )
Univariate and multivariate time series (DeepMTS) forecasting with Quasi-Randomized networks (Work in progress)
Parameters:
obj: object.
any object containing a method fit (obj.fit()) and a method predict
(obj.predict()).
n_layers: int.
number of layers in the neural network.
n_hidden_features: int.
number of nodes in the hidden layer.
activation_name: str.
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'.
a: float.
hyperparameter for 'prelu' or 'elu' activation function.
nodes_sim: str.
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'.
bias: boolean.
indicates if the hidden layer contains a bias term (True) or not
(False).
dropout: float.
regularization parameter; (random) percentage of nodes dropped out
of the training.
direct_link: boolean.
indicates if the original predictors are included (True) in model's fitting or not (False).
n_clusters: int.
number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering).
cluster_encode: bool.
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding.
type_clust: str.
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm').
type_scaling: a tuple of 3 strings.
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax').
lags: int.
number of lags used for each time series.
type_pi: str.
type of prediction interval; currently:
- "gaussian": simple, fast, but: assumes stationarity of Gaussian in-sample residuals and independence in the multivariate case
- "kde": based on Kernel Density Estimation of in-sample residuals
- "bootstrap": based on independent bootstrap of in-sample residuals
- "block-bootstrap": based on basic block bootstrap of in-sample residuals
- "scp-kde": Sequential split conformal prediction with Kernel Density Estimation of calibrated residuals
- "scp-bootstrap": Sequential split conformal prediction with independent bootstrap of calibrated residuals
- "scp-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of calibrated residuals
- "scp2-kde": Sequential split conformal prediction with Kernel Density Estimation of standardized calibrated residuals
- "scp2-bootstrap": Sequential split conformal prediction with independent bootstrap of standardized calibrated residuals
- "scp2-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of standardized calibrated residuals
block_size: int.
size of block for 'type_pi' in ("block-bootstrap", "scp-block-bootstrap", "scp2-block-bootstrap").
Default is round(3.15*(n_residuals^1/3))
replications: int.
number of replications (if needed, for predictive simulation). Default is 'None'.
kernel: str.
the kernel to use for residuals density estimation (used for predictive simulation). Currently, either 'gaussian' or 'tophat'.
agg: str.
either "mean" or "median" for simulation of bootstrap aggregating
seed: int.
reproducibility seed for nodes_sim=='uniform' or predictive simulation.
backend: str.
"cpu" or "gpu" or "tpu".
verbose: int.
0: not printing; 1: printing
show_progress: bool.
True: progress bar when fitting each series; False: no progress bar when fitting each series
Attributes:
fit_objs_: dict
objects adjusted to each individual time series
y_: {array-like}
DeepMTS responses (most recent observations first)
X_: {array-like}
DeepMTS lags
xreg_: {array-like}
external regressors
y_means_: dict
a dictionary of each series mean values
preds_: {array-like}
successive model predictions
preds_std_: {array-like}
standard deviation around the predictions
return_std_: boolean
return uncertainty or not (set in predict)
df_: data frame
the input data frame, in case a data.frame is provided to `fit`
Examples:
Example 1:
import nnetsauce as ns
import numpy as np
from sklearn import linear_model
np.random.seed(123)
M = np.random.rand(10, 3)
M[:,0] = 10M[:,0]
M[:,2] = 25M[:,2]
print(M)
# Adjust Bayesian Ridge
regr4 = linear_model.BayesianRidge()
obj_DeepMTS = ns.DeepMTS(regr4, lags = 1, n_hidden_features=5)
obj_DeepMTS.fit(M)
print(obj_DeepMTS.predict())
# with credible intervals
print(obj_DeepMTS.predict(return_std=True, level=80))
print(obj_DeepMTS.predict(return_std=True, level=95))
Example 2:
import nnetsauce as ns
import numpy as np
from sklearn import linear_model
dataset = {
'date' : ['2001-01-01', '2002-01-01', '2003-01-01', '2004-01-01', '2005-01-01'],
'series1' : [34, 30, 35.6, 33.3, 38.1],
'series2' : [4, 5.5, 5.6, 6.3, 5.1],
'series3' : [100, 100.5, 100.6, 100.2, 100.1]}
df = pd.DataFrame(dataset).set_index('date')
print(df)
# Adjust Bayesian Ridge
regr5 = linear_model.BayesianRidge()
obj_DeepMTS = ns.DeepMTS(regr5, lags = 1, n_hidden_features=5)
obj_DeepMTS.fit(df)
print(obj_DeepMTS.predict())
# with credible intervals
print(obj_DeepMTS.predict(return_std=True, level=80))
print(obj_DeepMTS.predict(return_std=True, level=95))
6class Downloader: 7 """Download datasets from data sources (R-universe for now)""" 8 9 def __init__(self): 10 self.pkgname = None 11 self.dataset = None 12 self.source = None 13 self.url = None 14 self.request = None 15 16 def download( 17 self, 18 pkgname="MASS", 19 dataset="Boston", 20 source="https://cran.r-universe.dev/", 21 **kwargs 22 ): 23 """Download datasets from data sources (R-universe for now) 24 25 Examples: 26 27 ```python 28 import nnetsauce as ns 29 30 downloader = ns.Downloader() 31 df = downloader.download(pkgname="MASS", dataset="Boston") 32 ``` 33 34 """ 35 self.pkgname = pkgname 36 self.dataset = dataset 37 self.source = source 38 self.url = source + pkgname + "/data/" + dataset + "/json" 39 self.request = requests.get(self.url) 40 return pd.DataFrame(self.request.json(), **kwargs)
Download datasets from data sources (R-universe for now)
16 def download( 17 self, 18 pkgname="MASS", 19 dataset="Boston", 20 source="https://cran.r-universe.dev/", 21 **kwargs 22 ): 23 """Download datasets from data sources (R-universe for now) 24 25 Examples: 26 27 ```python 28 import nnetsauce as ns 29 30 downloader = ns.Downloader() 31 df = downloader.download(pkgname="MASS", dataset="Boston") 32 ``` 33 34 """ 35 self.pkgname = pkgname 36 self.dataset = dataset 37 self.source = source 38 self.url = source + pkgname + "/data/" + dataset + "/json" 39 self.request = requests.get(self.url) 40 return pd.DataFrame(self.request.json(), **kwargs)
Download datasets from data sources (R-universe for now)
Examples:
import nnetsauce as ns
downloader = ns.Downloader()
df = downloader.download(pkgname="MASS", dataset="Boston")
21class GLMClassifier(GLM, ClassifierMixin): 22 """Generalized 'linear' models using quasi-randomized networks (classification) 23 24 Parameters: 25 26 n_hidden_features: int 27 number of nodes in the hidden layer 28 29 lambda1: float 30 regularization parameter for GLM coefficients on original features 31 32 alpha1: float 33 controls compromize between l1 and l2 norm of GLM coefficients on original features 34 35 lambda2: float 36 regularization parameter for GLM coefficients on nonlinear features 37 38 alpha2: float 39 controls compromize between l1 and l2 norm of GLM coefficients on nonlinear features 40 41 activation_name: str 42 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 43 44 a: float 45 hyperparameter for 'prelu' or 'elu' activation function 46 47 nodes_sim: str 48 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 49 'uniform' 50 51 bias: boolean 52 indicates if the hidden layer contains a bias term (True) or not 53 (False) 54 55 dropout: float 56 regularization parameter; (random) percentage of nodes dropped out 57 of the training 58 59 direct_link: boolean 60 indicates if the original predictors are included (True) in model's 61 fitting or not (False) 62 63 n_clusters: int 64 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: 65 no clustering) 66 67 cluster_encode: bool 68 defines how the variable containing clusters is treated (default is one-hot) 69 if `False`, then labels are used, without one-hot encoding 70 71 type_clust: str 72 type of clustering method: currently k-means ('kmeans') or Gaussian 73 Mixture Model ('gmm') 74 75 type_scaling: a tuple of 3 strings 76 scaling methods for inputs, hidden layer, and clustering respectively 77 (and when relevant). 78 Currently available: standardization ('std') or MinMax scaling ('minmax') 79 80 optimizer: object 81 optimizer, from class nnetsauce.Optimizer 82 83 backend: str. 84 "cpu" or "gpu" or "tpu". 85 86 seed: int 87 reproducibility seed for nodes_sim=='uniform' 88 89 Attributes: 90 91 beta_: vector 92 regression coefficients 93 94 Examples: 95 96 See [https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_classification.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_classification.py) 97 98 """ 99 100 # construct the object ----- 101 _estimator_type = "classifier" 102 103 def __init__( 104 self, 105 n_hidden_features=5, 106 lambda1=0.01, 107 alpha1=0.5, 108 lambda2=0.01, 109 alpha2=0.5, 110 family="expit", 111 activation_name="relu", 112 a=0.01, 113 nodes_sim="sobol", 114 bias=True, 115 dropout=0, 116 direct_link=True, 117 n_clusters=2, 118 cluster_encode=True, 119 type_clust="kmeans", 120 type_scaling=("std", "std", "std"), 121 optimizer=Optimizer(), 122 backend="cpu", 123 seed=123, 124 ): 125 super().__init__( 126 n_hidden_features=n_hidden_features, 127 lambda1=lambda1, 128 alpha1=alpha1, 129 lambda2=lambda2, 130 alpha2=alpha2, 131 activation_name=activation_name, 132 a=a, 133 nodes_sim=nodes_sim, 134 bias=bias, 135 dropout=dropout, 136 direct_link=direct_link, 137 n_clusters=n_clusters, 138 cluster_encode=cluster_encode, 139 type_clust=type_clust, 140 type_scaling=type_scaling, 141 optimizer=optimizer, 142 backend=backend, 143 seed=seed, 144 ) 145 146 self.family = family 147 148 def logit_loss(self, Y, row_index, XB): 149 self.n_classes = Y.shape[1] # len(np.unique(y)) 150 # Y = mo.one_hot_encode2(y, self.n_classes) 151 # Y = self.optimizer.one_hot_encode(y, self.n_classes) 152 153 # max_double = 709.0 # only if softmax 154 # XB[XB > max_double] = max_double 155 XB[XB > 709.0] = 709.0 156 157 if row_index is None: 158 return -np.mean(np.sum(Y * XB, axis=1) - logsumexp(XB)) 159 160 return -np.mean(np.sum(Y[row_index, :] * XB, axis=1) - logsumexp(XB)) 161 162 def expit_erf_loss(self, Y, row_index, XB): 163 # self.n_classes = len(np.unique(y)) 164 # Y = mo.one_hot_encode2(y, self.n_classes) 165 # Y = self.optimizer.one_hot_encode(y, self.n_classes) 166 self.n_classes = Y.shape[1] 167 168 if row_index is None: 169 return -np.mean(np.sum(Y * XB, axis=1) - logsumexp(XB)) 170 171 return -np.mean(np.sum(Y[row_index, :] * XB, axis=1) - logsumexp(XB)) 172 173 def loss_func( 174 self, 175 beta, 176 group_index, 177 X, 178 Y, 179 y, 180 row_index=None, 181 type_loss="logit", 182 **kwargs 183 ): 184 res = { 185 "logit": self.logit_loss, 186 "expit": self.expit_erf_loss, 187 "erf": self.expit_erf_loss, 188 } 189 190 if row_index is None: 191 row_index = range(len(y)) 192 XB = self.compute_XB( 193 X, 194 beta=np.reshape(beta, (X.shape[1], self.n_classes), order="F"), 195 ) 196 197 return res[type_loss](Y, row_index, XB) + self.compute_penalty( 198 group_index=group_index, beta=beta 199 ) 200 201 XB = self.compute_XB( 202 X, 203 beta=np.reshape(beta, (X.shape[1], self.n_classes), order="F"), 204 row_index=row_index, 205 ) 206 207 return res[type_loss](Y, row_index, XB) + self.compute_penalty( 208 group_index=group_index, beta=beta 209 ) 210 211 def fit(self, X, y, **kwargs): 212 """Fit GLM model to training data (X, y). 213 214 Args: 215 216 X: {array-like}, shape = [n_samples, n_features] 217 Training vectors, where n_samples is the number 218 of samples and n_features is the number of features. 219 220 y: array-like, shape = [n_samples] 221 Target values. 222 223 **kwargs: additional parameters to be passed to 224 self.cook_training_set or self.obj.fit 225 226 Returns: 227 228 self: object 229 230 """ 231 232 assert mx.is_factor( 233 y 234 ), "y must contain only integers" # change is_factor and subsampling everywhere 235 236 self.classes_ = np.unique(y) # for compatibility with sklearn 237 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 238 239 self.beta_ = None 240 241 n, p = X.shape 242 243 self.group_index = n * X.shape[1] 244 245 self.n_classes = len(np.unique(y)) 246 247 output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 248 249 # Y = mo.one_hot_encode2(output_y, self.n_classes) 250 Y = self.optimizer.one_hot_encode(output_y, self.n_classes) 251 252 # initialization 253 if self.backend == "cpu": 254 beta_ = np.linalg.lstsq(scaled_Z, Y, rcond=None)[0] 255 else: 256 beta_ = jnp.linalg.lstsq(scaled_Z, Y, rcond=None)[0] 257 258 # optimization 259 # fit(self, loss_func, response, x0, **kwargs): 260 # loss_func(self, beta, group_index, X, y, 261 # row_index=None, type_loss="gaussian", 262 # **kwargs) 263 self.optimizer.fit( 264 self.loss_func, 265 response=y, 266 x0=beta_.flatten(order="F"), 267 group_index=self.group_index, 268 X=scaled_Z, 269 Y=Y, 270 y=y, 271 type_loss=self.family, 272 ) 273 274 self.beta_ = self.optimizer.results[0] 275 self.classes_ = np.unique(y) 276 277 return self 278 279 def predict(self, X, **kwargs): 280 """Predict test data X. 281 282 Args: 283 284 X: {array-like}, shape = [n_samples, n_features] 285 Training vectors, where n_samples is the number 286 of samples and n_features is the number of features. 287 288 **kwargs: additional parameters to be passed to 289 self.cook_test_set 290 291 Returns: 292 293 model predictions: {array-like} 294 295 """ 296 297 return np.argmax(self.predict_proba(X, **kwargs), axis=1) 298 299 def predict_proba(self, X, **kwargs): 300 """Predict probabilities for test data X. 301 302 Args: 303 304 X: {array-like}, shape = [n_samples, n_features] 305 Training vectors, where n_samples is the number 306 of samples and n_features is the number of features. 307 308 **kwargs: additional parameters to be passed to 309 self.cook_test_set 310 311 Returns: 312 313 probability estimates for test data: {array-like} 314 315 """ 316 if len(X.shape) == 1: 317 n_features = X.shape[0] 318 new_X = mo.rbind( 319 X.reshape(1, n_features), 320 np.ones(n_features).reshape(1, n_features), 321 ) 322 323 Z = self.cook_test_set(new_X, **kwargs) 324 325 else: 326 Z = self.cook_test_set(X, **kwargs) 327 328 ZB = mo.safe_sparse_dot( 329 Z, 330 self.beta_.reshape( 331 self.n_classes, 332 X.shape[1] + self.n_hidden_features + self.n_clusters, 333 ).T, 334 ) 335 336 if self.family == "logit": 337 exp_ZB = np.exp(ZB) 338 339 return exp_ZB / exp_ZB.sum(axis=1)[:, None] 340 341 if self.family == "expit": 342 exp_ZB = expit(ZB) 343 344 return exp_ZB / exp_ZB.sum(axis=1)[:, None] 345 346 if self.family == "erf": 347 exp_ZB = 0.5 * (1 + erf(ZB)) 348 349 return exp_ZB / exp_ZB.sum(axis=1)[:, None] 350 351 def score(self, X, y, scoring=None): 352 """Scoring function for classification. 353 354 Args: 355 356 X: {array-like}, shape = [n_samples, n_features] 357 Training vectors, where n_samples is the number 358 of samples and n_features is the number of features. 359 360 y: array-like, shape = [n_samples] 361 Target values. 362 363 scoring: str 364 scoring method (default is accuracy) 365 366 Returns: 367 368 score: float 369 """ 370 371 if scoring is None: 372 scoring = "accuracy" 373 374 if scoring == "accuracy": 375 return skm2.accuracy_score(y, self.predict(X)) 376 377 if scoring == "f1": 378 return skm2.f1_score(y, self.predict(X)) 379 380 if scoring == "precision": 381 return skm2.precision_score(y, self.predict(X)) 382 383 if scoring == "recall": 384 return skm2.recall_score(y, self.predict(X)) 385 386 if scoring == "roc_auc": 387 return skm2.roc_auc_score(y, self.predict(X)) 388 389 if scoring == "log_loss": 390 return skm2.log_loss(y, self.predict_proba(X)) 391 392 if scoring == "balanced_accuracy": 393 return skm2.balanced_accuracy_score(y, self.predict(X)) 394 395 if scoring == "average_precision": 396 return skm2.average_precision_score(y, self.predict(X)) 397 398 if scoring == "neg_brier_score": 399 return -skm2.brier_score_loss(y, self.predict_proba(X)) 400 401 if scoring == "neg_log_loss": 402 return -skm2.log_loss(y, self.predict_proba(X)) 403 404 @property 405 def _estimator_type(self): 406 return "classifier"
Generalized 'linear' models using quasi-randomized networks (classification)
Parameters:
n_hidden_features: int
number of nodes in the hidden layer
lambda1: float
regularization parameter for GLM coefficients on original features
alpha1: float
controls compromize between l1 and l2 norm of GLM coefficients on original features
lambda2: float
regularization parameter for GLM coefficients on nonlinear features
alpha2: float
controls compromize between l1 and l2 norm of GLM coefficients on nonlinear features
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not
(False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original predictors are included (True) in model's
fitting or not (False)
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
optimizer: object
optimizer, from class nnetsauce.Optimizer
backend: str.
"cpu" or "gpu" or "tpu".
seed: int
reproducibility seed for nodes_sim=='uniform'
Attributes:
beta_: vector
regression coefficients
Examples:
See https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_classification.py
211 def fit(self, X, y, **kwargs): 212 """Fit GLM model to training data (X, y). 213 214 Args: 215 216 X: {array-like}, shape = [n_samples, n_features] 217 Training vectors, where n_samples is the number 218 of samples and n_features is the number of features. 219 220 y: array-like, shape = [n_samples] 221 Target values. 222 223 **kwargs: additional parameters to be passed to 224 self.cook_training_set or self.obj.fit 225 226 Returns: 227 228 self: object 229 230 """ 231 232 assert mx.is_factor( 233 y 234 ), "y must contain only integers" # change is_factor and subsampling everywhere 235 236 self.classes_ = np.unique(y) # for compatibility with sklearn 237 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 238 239 self.beta_ = None 240 241 n, p = X.shape 242 243 self.group_index = n * X.shape[1] 244 245 self.n_classes = len(np.unique(y)) 246 247 output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 248 249 # Y = mo.one_hot_encode2(output_y, self.n_classes) 250 Y = self.optimizer.one_hot_encode(output_y, self.n_classes) 251 252 # initialization 253 if self.backend == "cpu": 254 beta_ = np.linalg.lstsq(scaled_Z, Y, rcond=None)[0] 255 else: 256 beta_ = jnp.linalg.lstsq(scaled_Z, Y, rcond=None)[0] 257 258 # optimization 259 # fit(self, loss_func, response, x0, **kwargs): 260 # loss_func(self, beta, group_index, X, y, 261 # row_index=None, type_loss="gaussian", 262 # **kwargs) 263 self.optimizer.fit( 264 self.loss_func, 265 response=y, 266 x0=beta_.flatten(order="F"), 267 group_index=self.group_index, 268 X=scaled_Z, 269 Y=Y, 270 y=y, 271 type_loss=self.family, 272 ) 273 274 self.beta_ = self.optimizer.results[0] 275 self.classes_ = np.unique(y) 276 277 return self
Fit GLM model to training data (X, y).
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
279 def predict(self, X, **kwargs): 280 """Predict test data X. 281 282 Args: 283 284 X: {array-like}, shape = [n_samples, n_features] 285 Training vectors, where n_samples is the number 286 of samples and n_features is the number of features. 287 288 **kwargs: additional parameters to be passed to 289 self.cook_test_set 290 291 Returns: 292 293 model predictions: {array-like} 294 295 """ 296 297 return np.argmax(self.predict_proba(X, **kwargs), axis=1)
Predict test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
299 def predict_proba(self, X, **kwargs): 300 """Predict probabilities for test data X. 301 302 Args: 303 304 X: {array-like}, shape = [n_samples, n_features] 305 Training vectors, where n_samples is the number 306 of samples and n_features is the number of features. 307 308 **kwargs: additional parameters to be passed to 309 self.cook_test_set 310 311 Returns: 312 313 probability estimates for test data: {array-like} 314 315 """ 316 if len(X.shape) == 1: 317 n_features = X.shape[0] 318 new_X = mo.rbind( 319 X.reshape(1, n_features), 320 np.ones(n_features).reshape(1, n_features), 321 ) 322 323 Z = self.cook_test_set(new_X, **kwargs) 324 325 else: 326 Z = self.cook_test_set(X, **kwargs) 327 328 ZB = mo.safe_sparse_dot( 329 Z, 330 self.beta_.reshape( 331 self.n_classes, 332 X.shape[1] + self.n_hidden_features + self.n_clusters, 333 ).T, 334 ) 335 336 if self.family == "logit": 337 exp_ZB = np.exp(ZB) 338 339 return exp_ZB / exp_ZB.sum(axis=1)[:, None] 340 341 if self.family == "expit": 342 exp_ZB = expit(ZB) 343 344 return exp_ZB / exp_ZB.sum(axis=1)[:, None] 345 346 if self.family == "erf": 347 exp_ZB = 0.5 * (1 + erf(ZB)) 348 349 return exp_ZB / exp_ZB.sum(axis=1)[:, None]
Predict probabilities for test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
probability estimates for test data: {array-like}
351 def score(self, X, y, scoring=None): 352 """Scoring function for classification. 353 354 Args: 355 356 X: {array-like}, shape = [n_samples, n_features] 357 Training vectors, where n_samples is the number 358 of samples and n_features is the number of features. 359 360 y: array-like, shape = [n_samples] 361 Target values. 362 363 scoring: str 364 scoring method (default is accuracy) 365 366 Returns: 367 368 score: float 369 """ 370 371 if scoring is None: 372 scoring = "accuracy" 373 374 if scoring == "accuracy": 375 return skm2.accuracy_score(y, self.predict(X)) 376 377 if scoring == "f1": 378 return skm2.f1_score(y, self.predict(X)) 379 380 if scoring == "precision": 381 return skm2.precision_score(y, self.predict(X)) 382 383 if scoring == "recall": 384 return skm2.recall_score(y, self.predict(X)) 385 386 if scoring == "roc_auc": 387 return skm2.roc_auc_score(y, self.predict(X)) 388 389 if scoring == "log_loss": 390 return skm2.log_loss(y, self.predict_proba(X)) 391 392 if scoring == "balanced_accuracy": 393 return skm2.balanced_accuracy_score(y, self.predict(X)) 394 395 if scoring == "average_precision": 396 return skm2.average_precision_score(y, self.predict(X)) 397 398 if scoring == "neg_brier_score": 399 return -skm2.brier_score_loss(y, self.predict_proba(X)) 400 401 if scoring == "neg_log_loss": 402 return -skm2.log_loss(y, self.predict_proba(X))
Scoring function for classification.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
scoring: str
scoring method (default is accuracy)
Returns:
score: float
20class GLMRegressor(GLM, RegressorMixin): 21 """Generalized 'linear' models using quasi-randomized networks (regression) 22 23 Attributes: 24 25 n_hidden_features: int 26 number of nodes in the hidden layer 27 28 lambda1: float 29 regularization parameter for GLM coefficients on original features 30 31 alpha1: float 32 controls compromize between l1 and l2 norm of GLM coefficients on original features 33 34 lambda2: float 35 regularization parameter for GLM coefficients on nonlinear features 36 37 alpha2: float 38 controls compromize between l1 and l2 norm of GLM coefficients on nonlinear features 39 40 family: str 41 "gaussian", "laplace", "poisson", or "quantile" (for now) 42 43 level: int, default=50 44 The level of the quantiles to compute for family = "quantile". 45 Default is the median. 46 47 activation_name: str 48 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 49 50 a: float 51 hyperparameter for 'prelu' or 'elu' activation function 52 53 nodes_sim: str 54 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 55 'uniform' 56 57 bias: boolean 58 indicates if the hidden layer contains a bias term (True) or not 59 (False) 60 61 dropout: float 62 regularization parameter; (random) percentage of nodes dropped out 63 of the training 64 65 direct_link: boolean 66 indicates if the original predictors are included (True) in model's 67 fitting or not (False) 68 69 n_clusters: int 70 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: 71 no clustering) 72 73 cluster_encode: bool 74 defines how the variable containing clusters is treated (default is one-hot) 75 if `False`, then labels are used, without one-hot encoding 76 77 type_clust: str 78 type of clustering method: currently k-means ('kmeans') or Gaussian 79 Mixture Model ('gmm') 80 81 type_scaling: a tuple of 3 strings 82 scaling methods for inputs, hidden layer, and clustering respectively 83 (and when relevant). 84 Currently available: standardization ('std') or MinMax scaling ('minmax') 85 86 optimizer: object 87 optimizer, from class nnetsauce.utils.Optimizer 88 89 backend: str. 90 "cpu" or "gpu" or "tpu". 91 92 seed: int 93 reproducibility seed for nodes_sim=='uniform' 94 95 backend: str 96 "cpu", "gpu", "tpu" 97 98 Attributes: 99 100 beta_: vector 101 regression coefficients 102 103 Examples: 104 105 See [https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_regression.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_regression.py) 106 107 """ 108 109 # construct the object ----- 110 111 def __init__( 112 self, 113 n_hidden_features=5, 114 lambda1=0.01, 115 alpha1=0.5, 116 lambda2=0.01, 117 alpha2=0.5, 118 family="gaussian", 119 level=50, 120 activation_name="relu", 121 a=0.01, 122 nodes_sim="sobol", 123 bias=True, 124 dropout=0, 125 direct_link=True, 126 n_clusters=2, 127 cluster_encode=True, 128 type_clust="kmeans", 129 type_scaling=("std", "std", "std"), 130 optimizer=Optimizer(), 131 backend="cpu", 132 seed=123, 133 ): 134 super().__init__( 135 n_hidden_features=n_hidden_features, 136 lambda1=lambda1, 137 alpha1=alpha1, 138 lambda2=lambda2, 139 alpha2=alpha2, 140 activation_name=activation_name, 141 a=a, 142 nodes_sim=nodes_sim, 143 bias=bias, 144 dropout=dropout, 145 direct_link=direct_link, 146 n_clusters=n_clusters, 147 cluster_encode=cluster_encode, 148 type_clust=type_clust, 149 type_scaling=type_scaling, 150 optimizer=optimizer, 151 backend=backend, 152 seed=seed, 153 ) 154 155 self.family = family 156 self.level = level 157 self.q = self.level / 100 158 159 def gaussian_loss(self, y, row_index, XB): 160 return 0.5 * np.mean(np.square(y[row_index] - XB)) 161 162 def laplace_loss(self, y, row_index, XB): 163 return 0.5 * np.mean(np.abs(y[row_index] - XB)) 164 165 def poisson_loss(self, y, row_index, XB): 166 return -np.mean(y[row_index] * XB - np.exp(XB)) 167 168 def pinball_loss(self, y, row_index, XB, tau=0.5): 169 y = np.array(y[row_index]) 170 y_pred = np.array(XB) 171 return mean_pinball_loss(y, y_pred, alpha=tau) 172 # return np.mean(np.maximum(tau * residuals, (tau - 1) * residuals)) 173 174 def loss_func( 175 self, 176 beta, 177 group_index, 178 X, 179 y, 180 row_index=None, 181 type_loss="gaussian", 182 **kwargs 183 ): 184 res = { 185 "gaussian": self.gaussian_loss, 186 "laplace": self.laplace_loss, 187 "poisson": self.poisson_loss, 188 "quantile": self.pinball_loss, 189 } 190 191 if type_loss != "quantile": 192 if row_index is None: 193 row_index = range(len(y)) 194 XB = self.compute_XB(X, beta=beta) 195 196 return res[type_loss](y, row_index, XB) + self.compute_penalty( 197 group_index=group_index, beta=beta 198 ) 199 200 XB = self.compute_XB(X, beta=beta, row_index=row_index) 201 202 return res[type_loss](y, row_index, XB) + self.compute_penalty( 203 group_index=group_index, beta=beta 204 ) 205 206 else: # quantile 207 assert ( 208 self.q > 0 and self.q < 1 209 ), "'tau' must be comprised 0 < tau < 1" 210 211 if row_index is None: 212 row_index = range(len(y)) 213 XB = self.compute_XB(X, beta=beta) 214 return res[type_loss](y, row_index, XB, self.q) 215 216 XB = self.compute_XB(X, beta=beta, row_index=row_index) 217 return res[type_loss](y, row_index, XB, self.q) 218 219 def fit(self, X, y, **kwargs): 220 """Fit GLM model to training data (X, y). 221 222 Args: 223 224 X: {array-like}, shape = [n_samples, n_features] 225 Training vectors, where n_samples is the number 226 of samples and n_features is the number of features. 227 228 y: array-like, shape = [n_samples] 229 Target values. 230 231 **kwargs: additional parameters to be passed to 232 self.cook_training_set or self.obj.fit 233 234 Returns: 235 236 self: object 237 238 """ 239 self.beta_ = None 240 self.n_iter = 0 241 242 _, self.group_index = X.shape 243 244 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 245 # initialization 246 if self.backend == "cpu": 247 beta_ = np.linalg.lstsq(scaled_Z, centered_y, rcond=None)[0] 248 else: 249 beta_ = jnp.linalg.lstsq(scaled_Z, centered_y, rcond=None)[0] 250 # optimization 251 # fit(self, loss_func, response, x0, **kwargs): 252 # loss_func(self, beta, group_index, X, y, 253 # row_index=None, type_loss="gaussian", 254 # **kwargs) 255 self.optimizer.fit( 256 self.loss_func, 257 response=centered_y, 258 x0=beta_, 259 group_index=self.group_index, 260 X=scaled_Z, 261 y=centered_y, 262 type_loss=self.family, 263 **kwargs 264 ) 265 266 self.beta_ = self.optimizer.results[0] 267 268 return self 269 270 def predict(self, X, **kwargs): 271 """Predict test data X. 272 273 Args: 274 275 X: {array-like}, shape = [n_samples, n_features] 276 Training vectors, where n_samples is the number 277 of samples and n_features is the number of features. 278 279 **kwargs: additional parameters to be passed to 280 self.cook_test_set 281 282 Returns: 283 284 model predictions: {array-like} 285 286 """ 287 288 if len(X.shape) == 1: 289 n_features = X.shape[0] 290 new_X = mo.rbind( 291 X.reshape(1, n_features), 292 np.ones(n_features).reshape(1, n_features), 293 ) 294 295 return ( 296 self.y_mean_ 297 + np.dot(self.cook_test_set(new_X, **kwargs), self.beta_) 298 )[0] 299 300 return self.y_mean_ + np.dot( 301 self.cook_test_set(X, **kwargs), self.beta_ 302 ) 303 304 def score(self, X, y, scoring=None): 305 """Compute the score of the model. 306 307 Parameters: 308 309 X: {array-like}, shape = [n_samples, n_features] 310 Training vectors, where n_samples is the number 311 of samples and n_features is the number of features. 312 313 y: array-like, shape = [n_samples] 314 Target values. 315 316 scoring: str 317 scoring method 318 319 Returns: 320 321 score: float 322 323 """ 324 325 if scoring is None: 326 return np.sqrt(np.mean((self.predict(X) - y) ** 2)) 327 328 return skm2.get_scorer(scoring)(self, X, y)
Generalized 'linear' models using quasi-randomized networks (regression)
Attributes:
n_hidden_features: int
number of nodes in the hidden layer
lambda1: float
regularization parameter for GLM coefficients on original features
alpha1: float
controls compromize between l1 and l2 norm of GLM coefficients on original features
lambda2: float
regularization parameter for GLM coefficients on nonlinear features
alpha2: float
controls compromize between l1 and l2 norm of GLM coefficients on nonlinear features
family: str
"gaussian", "laplace", "poisson", or "quantile" (for now)
level: int, default=50
The level of the quantiles to compute for family = "quantile".
Default is the median.
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not
(False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original predictors are included (True) in model's
fitting or not (False)
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
optimizer: object
optimizer, from class nnetsauce.utils.Optimizer
backend: str.
"cpu" or "gpu" or "tpu".
seed: int
reproducibility seed for nodes_sim=='uniform'
backend: str
"cpu", "gpu", "tpu"
Attributes:
beta_: vector
regression coefficients
Examples:
See [https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_regression.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_regression.py)
219 def fit(self, X, y, **kwargs): 220 """Fit GLM model to training data (X, y). 221 222 Args: 223 224 X: {array-like}, shape = [n_samples, n_features] 225 Training vectors, where n_samples is the number 226 of samples and n_features is the number of features. 227 228 y: array-like, shape = [n_samples] 229 Target values. 230 231 **kwargs: additional parameters to be passed to 232 self.cook_training_set or self.obj.fit 233 234 Returns: 235 236 self: object 237 238 """ 239 self.beta_ = None 240 self.n_iter = 0 241 242 _, self.group_index = X.shape 243 244 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 245 # initialization 246 if self.backend == "cpu": 247 beta_ = np.linalg.lstsq(scaled_Z, centered_y, rcond=None)[0] 248 else: 249 beta_ = jnp.linalg.lstsq(scaled_Z, centered_y, rcond=None)[0] 250 # optimization 251 # fit(self, loss_func, response, x0, **kwargs): 252 # loss_func(self, beta, group_index, X, y, 253 # row_index=None, type_loss="gaussian", 254 # **kwargs) 255 self.optimizer.fit( 256 self.loss_func, 257 response=centered_y, 258 x0=beta_, 259 group_index=self.group_index, 260 X=scaled_Z, 261 y=centered_y, 262 type_loss=self.family, 263 **kwargs 264 ) 265 266 self.beta_ = self.optimizer.results[0] 267 268 return self
Fit GLM model to training data (X, y).
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
270 def predict(self, X, **kwargs): 271 """Predict test data X. 272 273 Args: 274 275 X: {array-like}, shape = [n_samples, n_features] 276 Training vectors, where n_samples is the number 277 of samples and n_features is the number of features. 278 279 **kwargs: additional parameters to be passed to 280 self.cook_test_set 281 282 Returns: 283 284 model predictions: {array-like} 285 286 """ 287 288 if len(X.shape) == 1: 289 n_features = X.shape[0] 290 new_X = mo.rbind( 291 X.reshape(1, n_features), 292 np.ones(n_features).reshape(1, n_features), 293 ) 294 295 return ( 296 self.y_mean_ 297 + np.dot(self.cook_test_set(new_X, **kwargs), self.beta_) 298 )[0] 299 300 return self.y_mean_ + np.dot( 301 self.cook_test_set(X, **kwargs), self.beta_ 302 )
Predict test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
304 def score(self, X, y, scoring=None): 305 """Compute the score of the model. 306 307 Parameters: 308 309 X: {array-like}, shape = [n_samples, n_features] 310 Training vectors, where n_samples is the number 311 of samples and n_features is the number of features. 312 313 y: array-like, shape = [n_samples] 314 Target values. 315 316 scoring: str 317 scoring method 318 319 Returns: 320 321 score: float 322 323 """ 324 325 if scoring is None: 326 return np.sqrt(np.mean((self.predict(X) - y) ** 2)) 327 328 return skm2.get_scorer(scoring)(self, X, y)
Compute the score of the model.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
scoring: str
scoring method
Returns:
score: float
18class KernelRidge(BaseEstimator, RegressorMixin): 19 """ 20 Kernel Ridge Regression with optional GPU support, Matérn kernels, and automatic input standardization. 21 22 Parameters: 23 - alpha: float 24 Regularization parameter. 25 - kernel: str 26 Kernel type ("linear", "rbf", or "matern"). 27 - gamma: float 28 Kernel coefficient for "rbf". Ignored for other kernels. 29 - nu: float 30 Smoothness parameter for the Matérn kernel. Default is 1.5. 31 - length_scale: float 32 Length scale parameter for the Matérn kernel. Default is 1.0. 33 - backend: str 34 "cpu" or "gpu" (uses JAX if "gpu"). 35 """ 36 37 def __init__( 38 self, 39 alpha=1.0, 40 kernel="rbf", 41 gamma=None, 42 nu=1.5, 43 length_scale=1.0, 44 backend="cpu", 45 ): 46 self.alpha = alpha 47 self.alpha_ = alpha 48 self.kernel = kernel 49 self.gamma = gamma 50 self.nu = nu 51 self.length_scale = length_scale 52 self.backend = backend 53 self.scaler = StandardScaler() 54 55 if backend == "gpu" and not JAX_AVAILABLE: 56 raise ImportError( 57 "JAX is not installed. Please install JAX to use the GPU backend." 58 ) 59 60 def _linear_kernel(self, X, Y): 61 return jnp.dot(X, Y.T) if self.backend == "gpu" else np.dot(X, Y.T) 62 63 def _rbf_kernel(self, X, Y): 64 if self.gamma is None: 65 self.gamma = 1.0 / X.shape[1] 66 if self.backend == "gpu": 67 sq_dists = ( 68 jnp.sum(X**2, axis=1)[:, None] 69 + jnp.sum(Y**2, axis=1) 70 - 2 * jnp.dot(X, Y.T) 71 ) 72 return jnp.exp(-self.gamma * sq_dists) 73 else: 74 sq_dists = ( 75 np.sum(X**2, axis=1)[:, None] 76 + np.sum(Y**2, axis=1) 77 - 2 * np.dot(X, Y.T) 78 ) 79 return np.exp(-self.gamma * sq_dists) 80 81 def _matern_kernel(self, X, Y): 82 """ 83 Compute the Matérn kernel using JAX for GPU or NumPy for CPU. 84 85 Parameters: 86 - X: array-like, shape (n_samples_X, n_features) 87 - Y: array-like, shape (n_samples_Y, n_features) 88 89 Returns: 90 - Kernel matrix, shape (n_samples_X, n_samples_Y) 91 """ 92 if self.backend == "gpu": 93 # Compute pairwise distances 94 dists = jnp.sqrt( 95 jnp.sum((X[:, None, :] - Y[None, :, :]) ** 2, axis=2) 96 ) 97 scaled_dists = jnp.sqrt(2 * self.nu) * dists / self.length_scale 98 99 # Matérn kernel formula 100 coeff = (2 ** (1 - self.nu)) / jnp.exp(gammaln(self.nu)) 101 matern_kernel = ( 102 coeff * (scaled_dists**self.nu) * kv(self.nu, scaled_dists) 103 ) 104 matern_kernel = jnp.where( 105 dists == 0, 1.0, matern_kernel 106 ) # Handle the case where distance is 0 107 return matern_kernel 108 else: 109 # Use NumPy for CPU 110 from scipy.special import ( 111 gammaln, 112 kv, 113 ) # Ensure scipy.special is used for CPU 114 115 dists = np.sqrt( 116 np.sum((X[:, None, :] - Y[None, :, :]) ** 2, axis=2) 117 ) 118 scaled_dists = np.sqrt(2 * self.nu) * dists / self.length_scale 119 120 # Matérn kernel formula 121 coeff = (2 ** (1 - self.nu)) / np.exp(gammaln(self.nu)) 122 matern_kernel = ( 123 coeff * (scaled_dists**self.nu) * kv(self.nu, scaled_dists) 124 ) 125 matern_kernel = np.where( 126 dists == 0, 1.0, matern_kernel 127 ) # Handle the case where distance is 0 128 return matern_kernel 129 130 def _get_kernel(self, X, Y): 131 if self.kernel == "linear": 132 return self._linear_kernel(X, Y) 133 elif self.kernel == "rbf": 134 return self._rbf_kernel(X, Y) 135 elif self.kernel == "matern": 136 return self._matern_kernel(X, Y) 137 else: 138 raise ValueError(f"Unsupported kernel: {self.kernel}") 139 140 def fit(self, X, y): 141 """ 142 Fit the Kernel Ridge Regression model. 143 144 Parameters: 145 - X: array-like, shape (n_samples, n_features) 146 Training data. 147 - y: array-like, shape (n_samples,) 148 Target values. 149 """ 150 # Standardize the inputs 151 X = self.scaler.fit_transform(X) 152 self.X_fit_ = X 153 154 # Center the response 155 self.y_mean_ = np.mean(y) 156 y_centered = y - self.y_mean_ 157 158 n_samples = X.shape[0] 159 160 # Compute the kernel matrix 161 K = self._get_kernel(X, X) 162 self.K_ = K 163 self.y_fit_ = y_centered 164 165 if isinstance(self.alpha, (list, np.ndarray)): 166 # If alpha is a list or array, compute LOOE for each alpha 167 self.alphas_ = self.alpha # Store the list of alphas 168 self.dual_coefs_ = [] # Store dual coefficients for each alpha 169 self.looe_ = [] # Store LOOE for each alpha 170 171 for alpha in self.alpha: 172 G = K + alpha * np.eye(n_samples) 173 G_inv = np.linalg.inv(G) 174 diag_G_inv = np.diag(G_inv) 175 dual_coef = np.linalg.solve(G, y_centered) 176 looe = np.sum((dual_coef / diag_G_inv) ** 2) # Compute LOOE 177 self.dual_coefs_.append(dual_coef) 178 self.looe_.append(looe) 179 180 # Select the best alpha based on the smallest LOOE 181 best_index = np.argmin(self.looe_) 182 self.alpha_ = self.alpha[best_index] 183 self.dual_coef_ = self.dual_coefs_[best_index] 184 else: 185 # If alpha is a single value, proceed as usual 186 if self.backend == "gpu": 187 self.dual_coef_ = jnp.linalg.solve( 188 K + self.alpha * jnp.eye(n_samples), y_centered 189 ) 190 else: 191 self.dual_coef_ = np.linalg.solve( 192 K + self.alpha * np.eye(n_samples), y_centered 193 ) 194 195 return self 196 197 def predict(self, X, probs=False): 198 """ 199 Predict using the Kernel Ridge Regression model. 200 201 Parameters: 202 - X: array-like, shape (n_samples, n_features) 203 Test data. 204 205 Returns: 206 - Predicted values, shape (n_samples,). 207 """ 208 # Standardize the inputs 209 X = self.scaler.transform(X) 210 K = self._get_kernel(X, self.X_fit_) 211 if self.backend == "gpu": 212 preds = jnp.dot(K, self.dual_coef_) + self.y_mean_ 213 if probs: 214 # Compute similarity to self.X_fit_ 215 similarities = jnp.dot( 216 preds, self.X_fit_.T 217 ) # Shape: (n_samples, n_fit_) 218 # Apply softmax to get probabilities 219 return jaxsoftmax(similarities, axis=1) 220 return preds 221 else: 222 preds = np.dot(K, self.dual_coef_) + self.y_mean_ 223 if probs: 224 # Compute similarity to self.X_fit_ 225 similarities = np.dot( 226 preds, self.X_fit_.T 227 ) # Shape: (n_samples, n_fit_) 228 # Apply softmax to get probabilities 229 return softmax(similarities, axis=1) 230 return preds 231 232 def partial_fit(self, X, y): 233 """ 234 Incrementally fit the Kernel Ridge Regression model with new data using a recursive approach. 235 236 Parameters: 237 - X: array-like, shape (n_samples, n_features) 238 New training data. 239 - y: array-like, shape (n_samples,) 240 New target values. 241 242 Returns: 243 - self: object 244 The updated model. 245 """ 246 # Standardize the inputs 247 X = ( 248 self.scaler.fit_transform(X) 249 if not hasattr(self, "X_fit_") 250 else self.scaler.transform(X) 251 ) 252 253 if not hasattr(self, "X_fit_"): 254 # Initialize with the first batch of data 255 self.X_fit_ = X 256 257 # Center the response 258 self.y_mean_ = np.mean(y) 259 y_centered = y - self.y_mean_ 260 self.y_fit_ = y_centered 261 262 n_samples = X.shape[0] 263 264 # Compute the kernel matrix for the initial data 265 self.K_ = self._get_kernel(X, X) 266 267 # Initialize dual coefficients for each alpha 268 if isinstance(self.alpha, (list, np.ndarray)): 269 self.dual_coefs_ = [np.zeros(n_samples) for _ in self.alpha] 270 else: 271 self.dual_coef_ = np.zeros(n_samples) 272 else: 273 # Incrementally update with new data 274 y_centered = y - self.y_mean_ # Center the new batch of responses 275 for x_new, y_new in zip(X, y_centered): 276 x_new = x_new.reshape(1, -1) # Ensure x_new is 2D 277 k_new = self._get_kernel(self.X_fit_, x_new).flatten() 278 279 # Compute the kernel value for the new data point 280 k_self = self._get_kernel(x_new, x_new).item() 281 282 if isinstance(self.alpha, (list, np.ndarray)): 283 # Update dual coefficients for each alpha 284 for idx, alpha in enumerate(self.alpha): 285 gamma_new = 1 / (k_self + alpha) 286 residual = y_new - np.dot(self.dual_coefs_[idx], k_new) 287 self.dual_coefs_[idx] = np.append( 288 self.dual_coefs_[idx], gamma_new * residual 289 ) 290 else: 291 # Update dual coefficients for a single alpha 292 gamma_new = 1 / (k_self + self.alpha) 293 residual = y_new - np.dot(self.dual_coef_, k_new) 294 self.dual_coef_ = np.append( 295 self.dual_coef_, gamma_new * residual 296 ) 297 298 # Update the kernel matrix 299 self.K_ = np.block( 300 [ 301 [self.K_, k_new[:, None]], 302 [k_new[None, :], np.array([[k_self]])], 303 ] 304 ) 305 306 # Update the stored data 307 self.X_fit_ = np.vstack([self.X_fit_, x_new]) 308 self.y_fit_ = np.append(self.y_fit_, y_new) 309 310 # Select the best alpha based on LOOE after the batch 311 if isinstance(self.alpha, (list, np.ndarray)): 312 self.looe_ = [] 313 for idx, alpha in enumerate(self.alpha): 314 G = self.K_ + alpha * np.eye(self.K_.shape[0]) 315 G_inv = np.linalg.inv(G) 316 diag_G_inv = np.diag(G_inv) 317 looe = np.sum((self.dual_coefs_[idx] / diag_G_inv) ** 2) 318 self.looe_.append(looe) 319 320 # Select the best alpha 321 best_index = np.argmin(self.looe_) 322 self.alpha_ = self.alpha[best_index] 323 self.dual_coef_ = self.dual_coefs_[best_index] 324 325 return self
Kernel Ridge Regression with optional GPU support, Matérn kernels, and automatic input standardization.
Parameters:
- alpha: float Regularization parameter.
- kernel: str Kernel type ("linear", "rbf", or "matern").
- gamma: float Kernel coefficient for "rbf". Ignored for other kernels.
- nu: float Smoothness parameter for the Matérn kernel. Default is 1.5.
- length_scale: float Length scale parameter for the Matérn kernel. Default is 1.0.
- backend: str "cpu" or "gpu" (uses JAX if "gpu").
140 def fit(self, X, y): 141 """ 142 Fit the Kernel Ridge Regression model. 143 144 Parameters: 145 - X: array-like, shape (n_samples, n_features) 146 Training data. 147 - y: array-like, shape (n_samples,) 148 Target values. 149 """ 150 # Standardize the inputs 151 X = self.scaler.fit_transform(X) 152 self.X_fit_ = X 153 154 # Center the response 155 self.y_mean_ = np.mean(y) 156 y_centered = y - self.y_mean_ 157 158 n_samples = X.shape[0] 159 160 # Compute the kernel matrix 161 K = self._get_kernel(X, X) 162 self.K_ = K 163 self.y_fit_ = y_centered 164 165 if isinstance(self.alpha, (list, np.ndarray)): 166 # If alpha is a list or array, compute LOOE for each alpha 167 self.alphas_ = self.alpha # Store the list of alphas 168 self.dual_coefs_ = [] # Store dual coefficients for each alpha 169 self.looe_ = [] # Store LOOE for each alpha 170 171 for alpha in self.alpha: 172 G = K + alpha * np.eye(n_samples) 173 G_inv = np.linalg.inv(G) 174 diag_G_inv = np.diag(G_inv) 175 dual_coef = np.linalg.solve(G, y_centered) 176 looe = np.sum((dual_coef / diag_G_inv) ** 2) # Compute LOOE 177 self.dual_coefs_.append(dual_coef) 178 self.looe_.append(looe) 179 180 # Select the best alpha based on the smallest LOOE 181 best_index = np.argmin(self.looe_) 182 self.alpha_ = self.alpha[best_index] 183 self.dual_coef_ = self.dual_coefs_[best_index] 184 else: 185 # If alpha is a single value, proceed as usual 186 if self.backend == "gpu": 187 self.dual_coef_ = jnp.linalg.solve( 188 K + self.alpha * jnp.eye(n_samples), y_centered 189 ) 190 else: 191 self.dual_coef_ = np.linalg.solve( 192 K + self.alpha * np.eye(n_samples), y_centered 193 ) 194 195 return self
Fit the Kernel Ridge Regression model.
Parameters:
- X: array-like, shape (n_samples, n_features) Training data.
- y: array-like, shape (n_samples,) Target values.
197 def predict(self, X, probs=False): 198 """ 199 Predict using the Kernel Ridge Regression model. 200 201 Parameters: 202 - X: array-like, shape (n_samples, n_features) 203 Test data. 204 205 Returns: 206 - Predicted values, shape (n_samples,). 207 """ 208 # Standardize the inputs 209 X = self.scaler.transform(X) 210 K = self._get_kernel(X, self.X_fit_) 211 if self.backend == "gpu": 212 preds = jnp.dot(K, self.dual_coef_) + self.y_mean_ 213 if probs: 214 # Compute similarity to self.X_fit_ 215 similarities = jnp.dot( 216 preds, self.X_fit_.T 217 ) # Shape: (n_samples, n_fit_) 218 # Apply softmax to get probabilities 219 return jaxsoftmax(similarities, axis=1) 220 return preds 221 else: 222 preds = np.dot(K, self.dual_coef_) + self.y_mean_ 223 if probs: 224 # Compute similarity to self.X_fit_ 225 similarities = np.dot( 226 preds, self.X_fit_.T 227 ) # Shape: (n_samples, n_fit_) 228 # Apply softmax to get probabilities 229 return softmax(similarities, axis=1) 230 return preds
Predict using the Kernel Ridge Regression model.
Parameters:
- X: array-like, shape (n_samples, n_features) Test data.
Returns:
- Predicted values, shape (n_samples,).
757class LazyClassifier(LazyDeepClassifier): 758 """ 759 Fitting -- almost -- all the classification algorithms with 760 nnetsauce's CustomClassifier and returning their scores (no layers). 761 762 Parameters: 763 764 verbose: int, optional (default=0) 765 Any positive number for verbosity. 766 767 ignore_warnings: bool, optional (default=True) 768 When set to True, the warning related to algorigms that are not able to run are ignored. 769 770 custom_metric: function, optional (default=None) 771 When function is provided, models are evaluated based on the custom evaluation metric provided. 772 773 predictions: bool, optional (default=False) 774 When set to True, the predictions of all the models models are returned as dataframe. 775 776 sort_by: string, optional (default='Accuracy') 777 Sort models by a metric. Available options are 'Accuracy', 'Balanced Accuracy', 'ROC AUC', 'F1 Score' 778 or a custom metric identified by its name and provided by custom_metric. 779 780 random_state: int, optional (default=42) 781 Reproducibiility seed. 782 783 estimators: list, optional (default='all') 784 list of Estimators names or just 'all' (default='all') 785 786 preprocess: bool 787 preprocessing is done when set to True 788 789 n_jobs : int, when possible, run in parallel 790 For now, only used by individual models that support it. 791 792 All the other parameters are the same as CustomClassifier's. 793 794 Attributes: 795 796 models_: dict-object 797 Returns a dictionary with each model pipeline as value 798 with key as name of models. 799 800 best_model_: object 801 Returns the best model pipeline based on the sort_by metric. 802 803 Examples: 804 805 import nnetsauce as ns 806 import numpy as np 807 from sklearn import datasets 808 from sklearn.utils import shuffle 809 810 dataset = datasets.load_iris() 811 X = dataset.data 812 y = dataset.target 813 X, y = shuffle(X, y, random_state=123) 814 X = X.astype(np.float32) 815 y = y.astype(np.float32) 816 X_train, X_test = X[:100], X[100:] 817 y_train, y_test = y[:100], y[100:] 818 819 clf = ns.LazyClassifier(verbose=0, ignore_warnings=True, custom_metric=None) 820 models, predictions = clf.fit(X_train, X_test, y_train, y_test) 821 model_dictionary = clf.provide_models(X_train,X_test,y_train,y_test) 822 print(models) 823 824 """ 825 826 def __init__( 827 self, 828 verbose=0, 829 ignore_warnings=True, 830 custom_metric=None, 831 predictions=False, 832 sort_by="Accuracy", 833 random_state=42, 834 estimators="all", 835 preprocess=False, 836 n_jobs=None, 837 # CustomClassifier attributes 838 obj=None, 839 n_hidden_features=5, 840 activation_name="relu", 841 a=0.01, 842 nodes_sim="sobol", 843 bias=True, 844 dropout=0, 845 direct_link=True, 846 n_clusters=2, 847 cluster_encode=True, 848 type_clust="kmeans", 849 type_scaling=("std", "std", "std"), 850 col_sample=1, 851 row_sample=1, 852 seed=123, 853 backend="cpu", 854 ): 855 super().__init__( 856 verbose=verbose, 857 ignore_warnings=ignore_warnings, 858 custom_metric=custom_metric, 859 predictions=predictions, 860 sort_by=sort_by, 861 random_state=random_state, 862 estimators=estimators, 863 preprocess=preprocess, 864 n_jobs=n_jobs, 865 n_layers=1, 866 obj=obj, 867 n_hidden_features=n_hidden_features, 868 activation_name=activation_name, 869 a=a, 870 nodes_sim=nodes_sim, 871 bias=bias, 872 dropout=dropout, 873 direct_link=direct_link, 874 n_clusters=n_clusters, 875 cluster_encode=cluster_encode, 876 type_clust=type_clust, 877 type_scaling=type_scaling, 878 col_sample=col_sample, 879 row_sample=row_sample, 880 seed=seed, 881 backend=backend, 882 )
Fitting -- almost -- all the classification algorithms with nnetsauce's CustomClassifier and returning their scores (no layers).
Parameters:
verbose: int, optional (default=0)
Any positive number for verbosity.
ignore_warnings: bool, optional (default=True)
When set to True, the warning related to algorigms that are not able to run are ignored.
custom_metric: function, optional (default=None)
When function is provided, models are evaluated based on the custom evaluation metric provided.
predictions: bool, optional (default=False)
When set to True, the predictions of all the models models are returned as dataframe.
sort_by: string, optional (default='Accuracy')
Sort models by a metric. Available options are 'Accuracy', 'Balanced Accuracy', 'ROC AUC', 'F1 Score'
or a custom metric identified by its name and provided by custom_metric.
random_state: int, optional (default=42)
Reproducibiility seed.
estimators: list, optional (default='all')
list of Estimators names or just 'all' (default='all')
preprocess: bool
preprocessing is done when set to True
n_jobs : int, when possible, run in parallel
For now, only used by individual models that support it.
All the other parameters are the same as CustomClassifier's.
Attributes:
models_: dict-object
Returns a dictionary with each model pipeline as value
with key as name of models.
best_model_: object
Returns the best model pipeline based on the sort_by metric.
Examples:
import nnetsauce as ns
import numpy as np
from sklearn import datasets
from sklearn.utils import shuffle
dataset = datasets.load_iris()
X = dataset.data
y = dataset.target
X, y = shuffle(X, y, random_state=123)
X = X.astype(np.float32)
y = y.astype(np.float32)
X_train, X_test = X[:100], X[100:]
y_train, y_test = y[:100], y[100:]
clf = ns.LazyClassifier(verbose=0, ignore_warnings=True, custom_metric=None)
models, predictions = clf.fit(X_train, X_test, y_train, y_test)
model_dictionary = clf.provide_models(X_train,X_test,y_train,y_test)
print(models)
654class LazyRegressor(LazyDeepRegressor): 655 """ 656 Fitting -- almost -- all the regression algorithms with 657 nnetsauce's CustomRegressor and returning their scores. 658 659 Parameters: 660 661 verbose: int, optional (default=0) 662 Any positive number for verbosity. 663 664 ignore_warnings: bool, optional (default=True) 665 When set to True, the warning related to algorigms that are not able to run are ignored. 666 667 custom_metric: function, optional (default=None) 668 When function is provided, models are evaluated based on the custom evaluation metric provided. 669 670 predictions: bool, optional (default=False) 671 When set to True, the predictions of all the models models are returned as dataframe. 672 673 sort_by: string, optional (default='RMSE') 674 Sort models by a metric. Available options are 'R-Squared', 'Adjusted R-Squared', 'RMSE', 'Time Taken' and 'Custom Metric'. 675 or a custom metric identified by its name and provided by custom_metric. 676 677 random_state: int, optional (default=42) 678 Reproducibiility seed. 679 680 estimators: list, optional (default='all') 681 list of Estimators names or just 'all' (default='all') 682 683 preprocess: bool 684 preprocessing is done when set to True 685 686 n_jobs : int, when possible, run in parallel 687 For now, only used by individual models that support it. 688 689 All the other parameters are the same as CustomRegressor's. 690 691 Attributes: 692 693 models_: dict-object 694 Returns a dictionary with each model pipeline as value 695 with key as name of models. 696 697 best_model_: object 698 Returns the best model pipeline based on the sort_by metric. 699 700 Examples: 701 702 import nnetsauce as ns 703 import numpy as np 704 from sklearn import datasets 705 from sklearn.utils import shuffle 706 707 diabetes = datasets.load_diabetes() 708 X, y = shuffle(diabetes.data, diabetes.target, random_state=13) 709 X = X.astype(np.float32) 710 711 offset = int(X.shape[0] * 0.9) 712 X_train, y_train = X[:offset], y[:offset] 713 X_test, y_test = X[offset:], y[offset:] 714 715 reg = ns.LazyRegressor(verbose=0, ignore_warnings=False, 716 custom_metric=None) 717 models, predictions = reg.fit(X_train, X_test, y_train, y_test) 718 print(models) 719 720 """ 721 722 def __init__( 723 self, 724 verbose=0, 725 ignore_warnings=True, 726 custom_metric=None, 727 predictions=False, 728 sort_by="RMSE", 729 random_state=42, 730 estimators="all", 731 preprocess=False, 732 n_jobs=None, 733 # CustomRegressor attributes 734 obj=None, 735 n_hidden_features=5, 736 activation_name="relu", 737 a=0.01, 738 nodes_sim="sobol", 739 bias=True, 740 dropout=0, 741 direct_link=True, 742 n_clusters=2, 743 cluster_encode=True, 744 type_clust="kmeans", 745 type_scaling=("std", "std", "std"), 746 col_sample=1, 747 row_sample=1, 748 seed=123, 749 backend="cpu", 750 ): 751 super().__init__( 752 verbose=verbose, 753 ignore_warnings=ignore_warnings, 754 custom_metric=custom_metric, 755 predictions=predictions, 756 sort_by=sort_by, 757 random_state=random_state, 758 estimators=estimators, 759 preprocess=preprocess, 760 n_jobs=n_jobs, 761 n_layers=1, 762 obj=obj, 763 n_hidden_features=n_hidden_features, 764 activation_name=activation_name, 765 a=a, 766 nodes_sim=nodes_sim, 767 bias=bias, 768 dropout=dropout, 769 direct_link=direct_link, 770 n_clusters=n_clusters, 771 cluster_encode=cluster_encode, 772 type_clust=type_clust, 773 type_scaling=type_scaling, 774 col_sample=col_sample, 775 row_sample=row_sample, 776 seed=seed, 777 backend=backend, 778 )
Fitting -- almost -- all the regression algorithms with nnetsauce's CustomRegressor and returning their scores.
Parameters:
verbose: int, optional (default=0)
Any positive number for verbosity.
ignore_warnings: bool, optional (default=True)
When set to True, the warning related to algorigms that are not able to run are ignored.
custom_metric: function, optional (default=None)
When function is provided, models are evaluated based on the custom evaluation metric provided.
predictions: bool, optional (default=False)
When set to True, the predictions of all the models models are returned as dataframe.
sort_by: string, optional (default='RMSE')
Sort models by a metric. Available options are 'R-Squared', 'Adjusted R-Squared', 'RMSE', 'Time Taken' and 'Custom Metric'.
or a custom metric identified by its name and provided by custom_metric.
random_state: int, optional (default=42)
Reproducibiility seed.
estimators: list, optional (default='all')
list of Estimators names or just 'all' (default='all')
preprocess: bool
preprocessing is done when set to True
n_jobs : int, when possible, run in parallel
For now, only used by individual models that support it.
All the other parameters are the same as CustomRegressor's.
Attributes:
models_: dict-object
Returns a dictionary with each model pipeline as value
with key as name of models.
best_model_: object
Returns the best model pipeline based on the sort_by metric.
Examples:
import nnetsauce as ns
import numpy as np
from sklearn import datasets
from sklearn.utils import shuffle
diabetes = datasets.load_diabetes()
X, y = shuffle(diabetes.data, diabetes.target, random_state=13)
X = X.astype(np.float32)
offset = int(X.shape[0] * 0.9)
X_train, y_train = X[:offset], y[:offset]
X_test, y_test = X[offset:], y[offset:]
reg = ns.LazyRegressor(verbose=0, ignore_warnings=False,
custom_metric=None)
models, predictions = reg.fit(X_train, X_test, y_train, y_test)
print(models)
94class LazyDeepClassifier(Custom, ClassifierMixin): 95 """ 96 97 Fitting -- almost -- all the classification algorithms with layers of 98 nnetsauce's CustomClassifier and returning their scores. 99 100 Parameters: 101 102 verbose: int, optional (default=0) 103 Any positive number for verbosity. 104 105 ignore_warnings: bool, optional (default=True) 106 When set to True, the warning related to algorigms that are not 107 able to run are ignored. 108 109 custom_metric: function, optional (default=None) 110 When function is provided, models are evaluated based on the custom 111 evaluation metric provided. 112 113 predictions: bool, optional (default=False) 114 When set to True, the predictions of all the models models are 115 returned as data frame. 116 117 sort_by: string, optional (default='Accuracy') 118 Sort models by a metric. Available options are 'Accuracy', 119 'Balanced Accuracy', 'ROC AUC', 'F1 Score' or a custom metric 120 identified by its name and provided by custom_metric. 121 122 random_state: int, optional (default=42) 123 Reproducibiility seed. 124 125 estimators: list, optional (default='all') 126 list of Estimators names or just 'all' for > 90 classifiers 127 (default='all') 128 129 preprocess: bool, preprocessing is done when set to True 130 131 n_jobs: int, when possible, run in parallel 132 For now, only used by individual models that support it. 133 134 n_layers: int, optional (default=3) 135 Number of layers of CustomClassifiers to be used. 136 137 All the other parameters are the same as CustomClassifier's. 138 139 Attributes: 140 141 models_: dict-object 142 Returns a dictionary with each model pipeline as value 143 with key as name of models. 144 145 best_model_: object 146 Returns the best model pipeline. 147 148 Examples 149 150 ```python 151 import nnetsauce as ns 152 from sklearn.datasets import load_breast_cancer 153 from sklearn.model_selection import train_test_split 154 data = load_breast_cancer() 155 X = data.data 156 y= data.target 157 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, 158 random_state=123) 159 clf = ns.LazyDeepClassifier(verbose=0, ignore_warnings=True, custom_metric=None) 160 models, predictions = clf.fit(X_train, X_test, y_train, y_test) 161 model_dictionary = clf.provide_models(X_train,X_test,y_train,y_test) 162 print(models) 163 ``` 164 165 """ 166 167 def __init__( 168 self, 169 verbose=0, 170 ignore_warnings=True, 171 custom_metric=None, 172 predictions=False, 173 sort_by="Accuracy", 174 random_state=42, 175 estimators="all", 176 preprocess=False, 177 n_jobs=None, 178 # Defining depth 179 n_layers=3, 180 # CustomClassifier attributes 181 obj=None, 182 n_hidden_features=5, 183 activation_name="relu", 184 a=0.01, 185 nodes_sim="sobol", 186 bias=True, 187 dropout=0, 188 direct_link=True, 189 n_clusters=2, 190 cluster_encode=True, 191 type_clust="kmeans", 192 type_scaling=("std", "std", "std"), 193 col_sample=1, 194 row_sample=1, 195 seed=123, 196 backend="cpu", 197 ): 198 self.verbose = verbose 199 self.ignore_warnings = ignore_warnings 200 self.custom_metric = custom_metric 201 self.predictions = predictions 202 self.sort_by = sort_by 203 self.models_ = {} 204 self.best_model_ = None 205 self.random_state = random_state 206 self.estimators = estimators 207 self.preprocess = preprocess 208 self.n_layers = n_layers - 1 209 self.n_jobs = n_jobs 210 super().__init__( 211 obj=obj, 212 n_hidden_features=n_hidden_features, 213 activation_name=activation_name, 214 a=a, 215 nodes_sim=nodes_sim, 216 bias=bias, 217 dropout=dropout, 218 direct_link=direct_link, 219 n_clusters=n_clusters, 220 cluster_encode=cluster_encode, 221 type_clust=type_clust, 222 type_scaling=type_scaling, 223 col_sample=col_sample, 224 row_sample=row_sample, 225 seed=seed, 226 backend=backend, 227 ) 228 229 def fit(self, X_train, X_test, y_train, y_test): 230 """Fit classifiers to X_train and y_train, predict and score on X_test, 231 y_test. 232 233 Parameters: 234 235 X_train: array-like, 236 Training vectors, where rows is the number of samples 237 and columns is the number of features. 238 239 X_test: array-like, 240 Testing vectors, where rows is the number of samples 241 and columns is the number of features. 242 243 y_train: array-like, 244 Training vectors, where rows is the number of samples 245 and columns is the number of features. 246 247 y_test: array-like, 248 Testing vectors, where rows is the number of samples 249 and columns is the number of features. 250 251 Returns: 252 253 scores: Pandas DataFrame 254 Returns metrics of all the models in a Pandas DataFrame. 255 256 predictions: Pandas DataFrame 257 Returns predictions of all the models in a Pandas DataFrame. 258 """ 259 Accuracy = [] 260 B_Accuracy = [] 261 ROC_AUC = [] 262 F1 = [] 263 names = [] 264 TIME = [] 265 predictions = {} 266 267 if self.custom_metric is not None: 268 CUSTOM_METRIC = [] 269 270 if isinstance(X_train, np.ndarray): 271 X_train = pd.DataFrame(X_train) 272 X_test = pd.DataFrame(X_test) 273 274 numeric_features = X_train.select_dtypes(include=[np.number]).columns 275 categorical_features = X_train.select_dtypes(include=["object"]).columns 276 277 categorical_low, categorical_high = get_card_split( 278 X_train, categorical_features 279 ) 280 281 if self.preprocess is True: 282 preprocessor = ColumnTransformer( 283 transformers=[ 284 ("numeric", numeric_transformer, numeric_features), 285 ( 286 "categorical_low", 287 categorical_transformer_low, 288 categorical_low, 289 ), 290 ( 291 "categorical_high", 292 categorical_transformer_high, 293 categorical_high, 294 ), 295 ] 296 ) 297 298 # baseline models 299 try: 300 baseline_names = ["RandomForestClassifier", "XGBClassifier"] 301 baseline_models = [RandomForestClassifier(), xgb.XGBClassifier()] 302 except Exception as exception: 303 baseline_names = ["RandomForestClassifier"] 304 baseline_models = [RandomForestClassifier()] 305 306 for name, model in zip(baseline_names, baseline_models): 307 start = time.time() 308 try: 309 model.fit(X_train, y_train) 310 self.models_[name] = model 311 y_pred = model.predict(X_test) 312 accuracy = accuracy_score(y_test, y_pred, normalize=True) 313 b_accuracy = balanced_accuracy_score(y_test, y_pred) 314 f1 = f1_score(y_test, y_pred, average="weighted") 315 try: 316 roc_auc = roc_auc_score(y_test, y_pred) 317 except Exception as exception: 318 roc_auc = None 319 if self.ignore_warnings is False: 320 print("ROC AUC couldn't be calculated for " + name) 321 print(exception) 322 names.append(name) 323 Accuracy.append(accuracy) 324 B_Accuracy.append(b_accuracy) 325 ROC_AUC.append(roc_auc) 326 F1.append(f1) 327 TIME.append(time.time() - start) 328 if self.custom_metric is not None: 329 custom_metric = self.custom_metric(y_test, y_pred) 330 CUSTOM_METRIC.append(custom_metric) 331 if self.verbose > 0: 332 if self.custom_metric is not None: 333 print( 334 { 335 "Model": name, 336 "Accuracy": accuracy, 337 "Balanced Accuracy": b_accuracy, 338 "ROC AUC": roc_auc, 339 "F1 Score": f1, 340 self.custom_metric.__name__: custom_metric, 341 "Time taken": time.time() - start, 342 } 343 ) 344 else: 345 print( 346 { 347 "Model": name, 348 "Accuracy": accuracy, 349 "Balanced Accuracy": b_accuracy, 350 "ROC AUC": roc_auc, 351 "F1 Score": f1, 352 "Time taken": time.time() - start, 353 } 354 ) 355 if self.predictions: 356 predictions[name] = y_pred 357 except Exception as exception: 358 if self.ignore_warnings is False: 359 print(name + " model failed to execute") 360 print(exception) 361 362 if self.estimators == "all": 363 self.classifiers = [ 364 item 365 for sublist in [ 366 DEEPCLASSIFIERS, 367 DEEPMULTITASKCLASSIFIERS, 368 DEEPSIMPLEMULTITASKCLASSIFIERS, 369 ] 370 for item in sublist 371 ] 372 else: 373 self.classifiers = ( 374 [ 375 ("DeepCustomClassifier(" + est[0] + ")", est[1]) 376 for est in all_estimators() 377 if ( 378 issubclass(est[1], ClassifierMixin) 379 and (est[0] in self.estimators) 380 ) 381 ] 382 + [ 383 ( 384 "DeepMultitaskClassifier(" + est[0] + ")", 385 partial(MultitaskClassifier, obj=est[1]()), 386 ) 387 for est in all_estimators() 388 if ( 389 issubclass(est[1], RegressorMixin) 390 and (est[0] in self.estimators) 391 ) 392 ] 393 + [ 394 ( 395 "DeepSimpleMultitaskClassifier(" + est[0] + ")", 396 partial(SimpleMultitaskClassifier, obj=est[1]()), 397 ) 398 for est in all_estimators() 399 if ( 400 issubclass(est[1], RegressorMixin) 401 and (est[0] in self.estimators) 402 ) 403 ] 404 ) 405 406 if self.preprocess is True: 407 for name, model in tqdm(self.classifiers): # do parallel exec 408 other_args = ( 409 {} 410 ) # use this trick for `random_state` too --> refactor 411 try: 412 if ( 413 "n_jobs" in model().get_params().keys() 414 and name.find("LogisticRegression") == -1 415 ): 416 other_args["n_jobs"] = self.n_jobs 417 except Exception: 418 pass 419 420 start = time.time() 421 422 try: 423 if "random_state" in model().get_params().keys(): 424 layer_clf = CustomClassifier( 425 obj=model(random_state=self.random_state), 426 n_hidden_features=self.n_hidden_features, 427 activation_name=self.activation_name, 428 a=self.a, 429 nodes_sim=self.nodes_sim, 430 bias=self.bias, 431 dropout=self.dropout, 432 direct_link=self.direct_link, 433 n_clusters=self.n_clusters, 434 cluster_encode=self.cluster_encode, 435 type_clust=self.type_clust, 436 type_scaling=self.type_scaling, 437 col_sample=self.col_sample, 438 row_sample=self.row_sample, 439 seed=self.seed, 440 backend=self.backend, 441 cv_calibration=None, 442 ) 443 444 else: 445 layer_clf = CustomClassifier( 446 obj=model(), 447 n_hidden_features=self.n_hidden_features, 448 activation_name=self.activation_name, 449 a=self.a, 450 nodes_sim=self.nodes_sim, 451 bias=self.bias, 452 dropout=self.dropout, 453 direct_link=self.direct_link, 454 n_clusters=self.n_clusters, 455 cluster_encode=self.cluster_encode, 456 type_clust=self.type_clust, 457 type_scaling=self.type_scaling, 458 col_sample=self.col_sample, 459 row_sample=self.row_sample, 460 seed=self.seed, 461 backend=self.backend, 462 cv_calibration=None, 463 ) 464 465 layer_clf.fit(X_train, y_train) 466 467 for _ in range(self.n_layers): 468 layer_clf = deepcopy( 469 CustomClassifier( 470 obj=layer_clf, 471 n_hidden_features=self.n_hidden_features, 472 activation_name=self.activation_name, 473 a=self.a, 474 nodes_sim=self.nodes_sim, 475 bias=self.bias, 476 dropout=self.dropout, 477 direct_link=self.direct_link, 478 n_clusters=self.n_clusters, 479 cluster_encode=self.cluster_encode, 480 type_clust=self.type_clust, 481 type_scaling=self.type_scaling, 482 col_sample=self.col_sample, 483 row_sample=self.row_sample, 484 seed=self.seed, 485 backend=self.backend, 486 cv_calibration=None, 487 ) 488 ) 489 490 pipe = Pipeline( 491 [ 492 ("preprocessor", preprocessor), 493 ("classifier", layer_clf), 494 ] 495 ) 496 497 pipe.fit(X_train, y_train) 498 self.models_[name] = pipe 499 y_pred = pipe.predict(X_test) 500 accuracy = accuracy_score(y_test, y_pred, normalize=True) 501 b_accuracy = balanced_accuracy_score(y_test, y_pred) 502 f1 = f1_score(y_test, y_pred, average="weighted") 503 try: 504 roc_auc = roc_auc_score(y_test, y_pred) 505 except Exception as exception: 506 roc_auc = None 507 if self.ignore_warnings is False: 508 print("ROC AUC couldn't be calculated for " + name) 509 print(exception) 510 names.append(name) 511 Accuracy.append(accuracy) 512 B_Accuracy.append(b_accuracy) 513 ROC_AUC.append(roc_auc) 514 F1.append(f1) 515 TIME.append(time.time() - start) 516 if self.custom_metric is not None: 517 custom_metric = self.custom_metric(y_test, y_pred) 518 CUSTOM_METRIC.append(custom_metric) 519 if self.verbose > 0: 520 if self.custom_metric is not None: 521 print( 522 { 523 "Model": name, 524 "Accuracy": accuracy, 525 "Balanced Accuracy": b_accuracy, 526 "ROC AUC": roc_auc, 527 "F1 Score": f1, 528 self.custom_metric.__name__: custom_metric, 529 "Time taken": time.time() - start, 530 } 531 ) 532 else: 533 print( 534 { 535 "Model": name, 536 "Accuracy": accuracy, 537 "Balanced Accuracy": b_accuracy, 538 "ROC AUC": roc_auc, 539 "F1 Score": f1, 540 "Time taken": time.time() - start, 541 } 542 ) 543 if self.predictions: 544 predictions[name] = y_pred 545 except Exception as exception: 546 if self.ignore_warnings is False: 547 print(name + " model failed to execute") 548 print(exception) 549 550 else: # no preprocessing 551 for name, model in tqdm(self.classifiers): # do parallel exec 552 start = time.time() 553 try: 554 if "random_state" in model().get_params().keys(): 555 layer_clf = CustomClassifier( 556 obj=model(random_state=self.random_state), 557 n_hidden_features=self.n_hidden_features, 558 activation_name=self.activation_name, 559 a=self.a, 560 nodes_sim=self.nodes_sim, 561 bias=self.bias, 562 dropout=self.dropout, 563 direct_link=self.direct_link, 564 n_clusters=self.n_clusters, 565 cluster_encode=self.cluster_encode, 566 type_clust=self.type_clust, 567 type_scaling=self.type_scaling, 568 col_sample=self.col_sample, 569 row_sample=self.row_sample, 570 seed=self.seed, 571 backend=self.backend, 572 cv_calibration=None, 573 ) 574 575 else: 576 layer_clf = CustomClassifier( 577 obj=model(), 578 n_hidden_features=self.n_hidden_features, 579 activation_name=self.activation_name, 580 a=self.a, 581 nodes_sim=self.nodes_sim, 582 bias=self.bias, 583 dropout=self.dropout, 584 direct_link=self.direct_link, 585 n_clusters=self.n_clusters, 586 cluster_encode=self.cluster_encode, 587 type_clust=self.type_clust, 588 type_scaling=self.type_scaling, 589 col_sample=self.col_sample, 590 row_sample=self.row_sample, 591 seed=self.seed, 592 backend=self.backend, 593 cv_calibration=None, 594 ) 595 596 layer_clf.fit(X_train, y_train) 597 598 for _ in range(self.n_layers): 599 layer_clf = deepcopy( 600 CustomClassifier( 601 obj=layer_clf, 602 n_hidden_features=self.n_hidden_features, 603 activation_name=self.activation_name, 604 a=self.a, 605 nodes_sim=self.nodes_sim, 606 bias=self.bias, 607 dropout=self.dropout, 608 direct_link=self.direct_link, 609 n_clusters=self.n_clusters, 610 cluster_encode=self.cluster_encode, 611 type_clust=self.type_clust, 612 type_scaling=self.type_scaling, 613 col_sample=self.col_sample, 614 row_sample=self.row_sample, 615 seed=self.seed, 616 backend=self.backend, 617 cv_calibration=None, 618 ) 619 ) 620 621 # layer_clf.fit(X_train, y_train) 622 623 layer_clf.fit(X_train, y_train) 624 625 self.models_[name] = layer_clf 626 y_pred = layer_clf.predict(X_test) 627 accuracy = accuracy_score(y_test, y_pred, normalize=True) 628 b_accuracy = balanced_accuracy_score(y_test, y_pred) 629 f1 = f1_score(y_test, y_pred, average="weighted") 630 try: 631 roc_auc = roc_auc_score(y_test, y_pred) 632 except Exception as exception: 633 roc_auc = None 634 if self.ignore_warnings is False: 635 print("ROC AUC couldn't be calculated for " + name) 636 print(exception) 637 names.append(name) 638 Accuracy.append(accuracy) 639 B_Accuracy.append(b_accuracy) 640 ROC_AUC.append(roc_auc) 641 F1.append(f1) 642 TIME.append(time.time() - start) 643 if self.custom_metric is not None: 644 custom_metric = self.custom_metric(y_test, y_pred) 645 CUSTOM_METRIC.append(custom_metric) 646 if self.verbose > 0: 647 if self.custom_metric is not None: 648 print( 649 { 650 "Model": name, 651 "Accuracy": accuracy, 652 "Balanced Accuracy": b_accuracy, 653 "ROC AUC": roc_auc, 654 "F1 Score": f1, 655 self.custom_metric.__name__: custom_metric, 656 "Time taken": time.time() - start, 657 } 658 ) 659 else: 660 print( 661 { 662 "Model": name, 663 "Accuracy": accuracy, 664 "Balanced Accuracy": b_accuracy, 665 "ROC AUC": roc_auc, 666 "F1 Score": f1, 667 "Time taken": time.time() - start, 668 } 669 ) 670 if self.predictions: 671 predictions[name] = y_pred 672 except Exception as exception: 673 if self.ignore_warnings is False: 674 print(name + " model failed to execute") 675 print(exception) 676 677 if self.custom_metric is None: 678 scores = pd.DataFrame( 679 { 680 "Model": names, 681 "Accuracy": Accuracy, 682 "Balanced Accuracy": B_Accuracy, 683 "ROC AUC": ROC_AUC, 684 "F1 Score": F1, 685 "Time Taken": TIME, 686 } 687 ) 688 else: 689 scores = pd.DataFrame( 690 { 691 "Model": names, 692 "Accuracy": Accuracy, 693 "Balanced Accuracy": B_Accuracy, 694 "ROC AUC": ROC_AUC, 695 "F1 Score": F1, 696 "Custom metric": CUSTOM_METRIC, 697 "Time Taken": TIME, 698 } 699 ) 700 scores = scores.sort_values(by=self.sort_by, ascending=False).set_index( 701 "Model" 702 ) 703 704 self.best_model_ = self.models_[scores.index[0]] 705 706 if self.predictions is True: 707 return scores, predictions 708 709 return scores 710 711 def get_best_model(self): 712 """ 713 This function returns the best model pipeline based on the sort_by metric. 714 715 Returns: 716 717 best_model: object, 718 Returns the best model pipeline based on the sort_by metric. 719 720 """ 721 return self.best_model_ 722 723 def provide_models(self, X_train, X_test, y_train, y_test): 724 """Returns all the model objects trained. If fit hasn't been called yet, 725 then it's called to return the models. 726 727 Parameters: 728 729 X_train: array-like, 730 Training vectors, where rows is the number of samples 731 and columns is the number of features. 732 733 X_test: array-like, 734 Testing vectors, where rows is the number of samples 735 and columns is the number of features. 736 737 y_train: array-like, 738 Training vectors, where rows is the number of samples 739 and columns is the number of features. 740 741 y_test: array-like, 742 Testing vectors, where rows is the number of samples 743 and columns is the number of features. 744 745 Returns: 746 747 models: dict-object, 748 Returns a dictionary with each model's pipeline as value 749 and key = name of the model. 750 """ 751 if len(self.models_.keys()) == 0: 752 self.fit(X_train, X_test, y_train, y_test) 753 754 return self.models_
Fitting -- almost -- all the classification algorithms with layers of nnetsauce's CustomClassifier and returning their scores.
Parameters:
verbose: int, optional (default=0)
Any positive number for verbosity.
ignore_warnings: bool, optional (default=True)
When set to True, the warning related to algorigms that are not
able to run are ignored.
custom_metric: function, optional (default=None)
When function is provided, models are evaluated based on the custom
evaluation metric provided.
predictions: bool, optional (default=False)
When set to True, the predictions of all the models models are
returned as data frame.
sort_by: string, optional (default='Accuracy')
Sort models by a metric. Available options are 'Accuracy',
'Balanced Accuracy', 'ROC AUC', 'F1 Score' or a custom metric
identified by its name and provided by custom_metric.
random_state: int, optional (default=42)
Reproducibiility seed.
estimators: list, optional (default='all')
list of Estimators names or just 'all' for > 90 classifiers
(default='all')
preprocess: bool, preprocessing is done when set to True
n_jobs: int, when possible, run in parallel
For now, only used by individual models that support it.
n_layers: int, optional (default=3)
Number of layers of CustomClassifiers to be used.
All the other parameters are the same as CustomClassifier's.
Attributes:
models_: dict-object
Returns a dictionary with each model pipeline as value
with key as name of models.
best_model_: object
Returns the best model pipeline.
Examples
import nnetsauce as ns
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
data = load_breast_cancer()
X = data.data
y= data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2,
random_state=123)
clf = ns.LazyDeepClassifier(verbose=0, ignore_warnings=True, custom_metric=None)
models, predictions = clf.fit(X_train, X_test, y_train, y_test)
model_dictionary = clf.provide_models(X_train,X_test,y_train,y_test)
print(models)
229 def fit(self, X_train, X_test, y_train, y_test): 230 """Fit classifiers to X_train and y_train, predict and score on X_test, 231 y_test. 232 233 Parameters: 234 235 X_train: array-like, 236 Training vectors, where rows is the number of samples 237 and columns is the number of features. 238 239 X_test: array-like, 240 Testing vectors, where rows is the number of samples 241 and columns is the number of features. 242 243 y_train: array-like, 244 Training vectors, where rows is the number of samples 245 and columns is the number of features. 246 247 y_test: array-like, 248 Testing vectors, where rows is the number of samples 249 and columns is the number of features. 250 251 Returns: 252 253 scores: Pandas DataFrame 254 Returns metrics of all the models in a Pandas DataFrame. 255 256 predictions: Pandas DataFrame 257 Returns predictions of all the models in a Pandas DataFrame. 258 """ 259 Accuracy = [] 260 B_Accuracy = [] 261 ROC_AUC = [] 262 F1 = [] 263 names = [] 264 TIME = [] 265 predictions = {} 266 267 if self.custom_metric is not None: 268 CUSTOM_METRIC = [] 269 270 if isinstance(X_train, np.ndarray): 271 X_train = pd.DataFrame(X_train) 272 X_test = pd.DataFrame(X_test) 273 274 numeric_features = X_train.select_dtypes(include=[np.number]).columns 275 categorical_features = X_train.select_dtypes(include=["object"]).columns 276 277 categorical_low, categorical_high = get_card_split( 278 X_train, categorical_features 279 ) 280 281 if self.preprocess is True: 282 preprocessor = ColumnTransformer( 283 transformers=[ 284 ("numeric", numeric_transformer, numeric_features), 285 ( 286 "categorical_low", 287 categorical_transformer_low, 288 categorical_low, 289 ), 290 ( 291 "categorical_high", 292 categorical_transformer_high, 293 categorical_high, 294 ), 295 ] 296 ) 297 298 # baseline models 299 try: 300 baseline_names = ["RandomForestClassifier", "XGBClassifier"] 301 baseline_models = [RandomForestClassifier(), xgb.XGBClassifier()] 302 except Exception as exception: 303 baseline_names = ["RandomForestClassifier"] 304 baseline_models = [RandomForestClassifier()] 305 306 for name, model in zip(baseline_names, baseline_models): 307 start = time.time() 308 try: 309 model.fit(X_train, y_train) 310 self.models_[name] = model 311 y_pred = model.predict(X_test) 312 accuracy = accuracy_score(y_test, y_pred, normalize=True) 313 b_accuracy = balanced_accuracy_score(y_test, y_pred) 314 f1 = f1_score(y_test, y_pred, average="weighted") 315 try: 316 roc_auc = roc_auc_score(y_test, y_pred) 317 except Exception as exception: 318 roc_auc = None 319 if self.ignore_warnings is False: 320 print("ROC AUC couldn't be calculated for " + name) 321 print(exception) 322 names.append(name) 323 Accuracy.append(accuracy) 324 B_Accuracy.append(b_accuracy) 325 ROC_AUC.append(roc_auc) 326 F1.append(f1) 327 TIME.append(time.time() - start) 328 if self.custom_metric is not None: 329 custom_metric = self.custom_metric(y_test, y_pred) 330 CUSTOM_METRIC.append(custom_metric) 331 if self.verbose > 0: 332 if self.custom_metric is not None: 333 print( 334 { 335 "Model": name, 336 "Accuracy": accuracy, 337 "Balanced Accuracy": b_accuracy, 338 "ROC AUC": roc_auc, 339 "F1 Score": f1, 340 self.custom_metric.__name__: custom_metric, 341 "Time taken": time.time() - start, 342 } 343 ) 344 else: 345 print( 346 { 347 "Model": name, 348 "Accuracy": accuracy, 349 "Balanced Accuracy": b_accuracy, 350 "ROC AUC": roc_auc, 351 "F1 Score": f1, 352 "Time taken": time.time() - start, 353 } 354 ) 355 if self.predictions: 356 predictions[name] = y_pred 357 except Exception as exception: 358 if self.ignore_warnings is False: 359 print(name + " model failed to execute") 360 print(exception) 361 362 if self.estimators == "all": 363 self.classifiers = [ 364 item 365 for sublist in [ 366 DEEPCLASSIFIERS, 367 DEEPMULTITASKCLASSIFIERS, 368 DEEPSIMPLEMULTITASKCLASSIFIERS, 369 ] 370 for item in sublist 371 ] 372 else: 373 self.classifiers = ( 374 [ 375 ("DeepCustomClassifier(" + est[0] + ")", est[1]) 376 for est in all_estimators() 377 if ( 378 issubclass(est[1], ClassifierMixin) 379 and (est[0] in self.estimators) 380 ) 381 ] 382 + [ 383 ( 384 "DeepMultitaskClassifier(" + est[0] + ")", 385 partial(MultitaskClassifier, obj=est[1]()), 386 ) 387 for est in all_estimators() 388 if ( 389 issubclass(est[1], RegressorMixin) 390 and (est[0] in self.estimators) 391 ) 392 ] 393 + [ 394 ( 395 "DeepSimpleMultitaskClassifier(" + est[0] + ")", 396 partial(SimpleMultitaskClassifier, obj=est[1]()), 397 ) 398 for est in all_estimators() 399 if ( 400 issubclass(est[1], RegressorMixin) 401 and (est[0] in self.estimators) 402 ) 403 ] 404 ) 405 406 if self.preprocess is True: 407 for name, model in tqdm(self.classifiers): # do parallel exec 408 other_args = ( 409 {} 410 ) # use this trick for `random_state` too --> refactor 411 try: 412 if ( 413 "n_jobs" in model().get_params().keys() 414 and name.find("LogisticRegression") == -1 415 ): 416 other_args["n_jobs"] = self.n_jobs 417 except Exception: 418 pass 419 420 start = time.time() 421 422 try: 423 if "random_state" in model().get_params().keys(): 424 layer_clf = CustomClassifier( 425 obj=model(random_state=self.random_state), 426 n_hidden_features=self.n_hidden_features, 427 activation_name=self.activation_name, 428 a=self.a, 429 nodes_sim=self.nodes_sim, 430 bias=self.bias, 431 dropout=self.dropout, 432 direct_link=self.direct_link, 433 n_clusters=self.n_clusters, 434 cluster_encode=self.cluster_encode, 435 type_clust=self.type_clust, 436 type_scaling=self.type_scaling, 437 col_sample=self.col_sample, 438 row_sample=self.row_sample, 439 seed=self.seed, 440 backend=self.backend, 441 cv_calibration=None, 442 ) 443 444 else: 445 layer_clf = CustomClassifier( 446 obj=model(), 447 n_hidden_features=self.n_hidden_features, 448 activation_name=self.activation_name, 449 a=self.a, 450 nodes_sim=self.nodes_sim, 451 bias=self.bias, 452 dropout=self.dropout, 453 direct_link=self.direct_link, 454 n_clusters=self.n_clusters, 455 cluster_encode=self.cluster_encode, 456 type_clust=self.type_clust, 457 type_scaling=self.type_scaling, 458 col_sample=self.col_sample, 459 row_sample=self.row_sample, 460 seed=self.seed, 461 backend=self.backend, 462 cv_calibration=None, 463 ) 464 465 layer_clf.fit(X_train, y_train) 466 467 for _ in range(self.n_layers): 468 layer_clf = deepcopy( 469 CustomClassifier( 470 obj=layer_clf, 471 n_hidden_features=self.n_hidden_features, 472 activation_name=self.activation_name, 473 a=self.a, 474 nodes_sim=self.nodes_sim, 475 bias=self.bias, 476 dropout=self.dropout, 477 direct_link=self.direct_link, 478 n_clusters=self.n_clusters, 479 cluster_encode=self.cluster_encode, 480 type_clust=self.type_clust, 481 type_scaling=self.type_scaling, 482 col_sample=self.col_sample, 483 row_sample=self.row_sample, 484 seed=self.seed, 485 backend=self.backend, 486 cv_calibration=None, 487 ) 488 ) 489 490 pipe = Pipeline( 491 [ 492 ("preprocessor", preprocessor), 493 ("classifier", layer_clf), 494 ] 495 ) 496 497 pipe.fit(X_train, y_train) 498 self.models_[name] = pipe 499 y_pred = pipe.predict(X_test) 500 accuracy = accuracy_score(y_test, y_pred, normalize=True) 501 b_accuracy = balanced_accuracy_score(y_test, y_pred) 502 f1 = f1_score(y_test, y_pred, average="weighted") 503 try: 504 roc_auc = roc_auc_score(y_test, y_pred) 505 except Exception as exception: 506 roc_auc = None 507 if self.ignore_warnings is False: 508 print("ROC AUC couldn't be calculated for " + name) 509 print(exception) 510 names.append(name) 511 Accuracy.append(accuracy) 512 B_Accuracy.append(b_accuracy) 513 ROC_AUC.append(roc_auc) 514 F1.append(f1) 515 TIME.append(time.time() - start) 516 if self.custom_metric is not None: 517 custom_metric = self.custom_metric(y_test, y_pred) 518 CUSTOM_METRIC.append(custom_metric) 519 if self.verbose > 0: 520 if self.custom_metric is not None: 521 print( 522 { 523 "Model": name, 524 "Accuracy": accuracy, 525 "Balanced Accuracy": b_accuracy, 526 "ROC AUC": roc_auc, 527 "F1 Score": f1, 528 self.custom_metric.__name__: custom_metric, 529 "Time taken": time.time() - start, 530 } 531 ) 532 else: 533 print( 534 { 535 "Model": name, 536 "Accuracy": accuracy, 537 "Balanced Accuracy": b_accuracy, 538 "ROC AUC": roc_auc, 539 "F1 Score": f1, 540 "Time taken": time.time() - start, 541 } 542 ) 543 if self.predictions: 544 predictions[name] = y_pred 545 except Exception as exception: 546 if self.ignore_warnings is False: 547 print(name + " model failed to execute") 548 print(exception) 549 550 else: # no preprocessing 551 for name, model in tqdm(self.classifiers): # do parallel exec 552 start = time.time() 553 try: 554 if "random_state" in model().get_params().keys(): 555 layer_clf = CustomClassifier( 556 obj=model(random_state=self.random_state), 557 n_hidden_features=self.n_hidden_features, 558 activation_name=self.activation_name, 559 a=self.a, 560 nodes_sim=self.nodes_sim, 561 bias=self.bias, 562 dropout=self.dropout, 563 direct_link=self.direct_link, 564 n_clusters=self.n_clusters, 565 cluster_encode=self.cluster_encode, 566 type_clust=self.type_clust, 567 type_scaling=self.type_scaling, 568 col_sample=self.col_sample, 569 row_sample=self.row_sample, 570 seed=self.seed, 571 backend=self.backend, 572 cv_calibration=None, 573 ) 574 575 else: 576 layer_clf = CustomClassifier( 577 obj=model(), 578 n_hidden_features=self.n_hidden_features, 579 activation_name=self.activation_name, 580 a=self.a, 581 nodes_sim=self.nodes_sim, 582 bias=self.bias, 583 dropout=self.dropout, 584 direct_link=self.direct_link, 585 n_clusters=self.n_clusters, 586 cluster_encode=self.cluster_encode, 587 type_clust=self.type_clust, 588 type_scaling=self.type_scaling, 589 col_sample=self.col_sample, 590 row_sample=self.row_sample, 591 seed=self.seed, 592 backend=self.backend, 593 cv_calibration=None, 594 ) 595 596 layer_clf.fit(X_train, y_train) 597 598 for _ in range(self.n_layers): 599 layer_clf = deepcopy( 600 CustomClassifier( 601 obj=layer_clf, 602 n_hidden_features=self.n_hidden_features, 603 activation_name=self.activation_name, 604 a=self.a, 605 nodes_sim=self.nodes_sim, 606 bias=self.bias, 607 dropout=self.dropout, 608 direct_link=self.direct_link, 609 n_clusters=self.n_clusters, 610 cluster_encode=self.cluster_encode, 611 type_clust=self.type_clust, 612 type_scaling=self.type_scaling, 613 col_sample=self.col_sample, 614 row_sample=self.row_sample, 615 seed=self.seed, 616 backend=self.backend, 617 cv_calibration=None, 618 ) 619 ) 620 621 # layer_clf.fit(X_train, y_train) 622 623 layer_clf.fit(X_train, y_train) 624 625 self.models_[name] = layer_clf 626 y_pred = layer_clf.predict(X_test) 627 accuracy = accuracy_score(y_test, y_pred, normalize=True) 628 b_accuracy = balanced_accuracy_score(y_test, y_pred) 629 f1 = f1_score(y_test, y_pred, average="weighted") 630 try: 631 roc_auc = roc_auc_score(y_test, y_pred) 632 except Exception as exception: 633 roc_auc = None 634 if self.ignore_warnings is False: 635 print("ROC AUC couldn't be calculated for " + name) 636 print(exception) 637 names.append(name) 638 Accuracy.append(accuracy) 639 B_Accuracy.append(b_accuracy) 640 ROC_AUC.append(roc_auc) 641 F1.append(f1) 642 TIME.append(time.time() - start) 643 if self.custom_metric is not None: 644 custom_metric = self.custom_metric(y_test, y_pred) 645 CUSTOM_METRIC.append(custom_metric) 646 if self.verbose > 0: 647 if self.custom_metric is not None: 648 print( 649 { 650 "Model": name, 651 "Accuracy": accuracy, 652 "Balanced Accuracy": b_accuracy, 653 "ROC AUC": roc_auc, 654 "F1 Score": f1, 655 self.custom_metric.__name__: custom_metric, 656 "Time taken": time.time() - start, 657 } 658 ) 659 else: 660 print( 661 { 662 "Model": name, 663 "Accuracy": accuracy, 664 "Balanced Accuracy": b_accuracy, 665 "ROC AUC": roc_auc, 666 "F1 Score": f1, 667 "Time taken": time.time() - start, 668 } 669 ) 670 if self.predictions: 671 predictions[name] = y_pred 672 except Exception as exception: 673 if self.ignore_warnings is False: 674 print(name + " model failed to execute") 675 print(exception) 676 677 if self.custom_metric is None: 678 scores = pd.DataFrame( 679 { 680 "Model": names, 681 "Accuracy": Accuracy, 682 "Balanced Accuracy": B_Accuracy, 683 "ROC AUC": ROC_AUC, 684 "F1 Score": F1, 685 "Time Taken": TIME, 686 } 687 ) 688 else: 689 scores = pd.DataFrame( 690 { 691 "Model": names, 692 "Accuracy": Accuracy, 693 "Balanced Accuracy": B_Accuracy, 694 "ROC AUC": ROC_AUC, 695 "F1 Score": F1, 696 "Custom metric": CUSTOM_METRIC, 697 "Time Taken": TIME, 698 } 699 ) 700 scores = scores.sort_values(by=self.sort_by, ascending=False).set_index( 701 "Model" 702 ) 703 704 self.best_model_ = self.models_[scores.index[0]] 705 706 if self.predictions is True: 707 return scores, predictions 708 709 return scores
Fit classifiers to X_train and y_train, predict and score on X_test, y_test.
Parameters:
X_train: array-like,
Training vectors, where rows is the number of samples
and columns is the number of features.
X_test: array-like,
Testing vectors, where rows is the number of samples
and columns is the number of features.
y_train: array-like,
Training vectors, where rows is the number of samples
and columns is the number of features.
y_test: array-like,
Testing vectors, where rows is the number of samples
and columns is the number of features.
Returns:
scores: Pandas DataFrame
Returns metrics of all the models in a Pandas DataFrame.
predictions: Pandas DataFrame
Returns predictions of all the models in a Pandas DataFrame.
723 def provide_models(self, X_train, X_test, y_train, y_test): 724 """Returns all the model objects trained. If fit hasn't been called yet, 725 then it's called to return the models. 726 727 Parameters: 728 729 X_train: array-like, 730 Training vectors, where rows is the number of samples 731 and columns is the number of features. 732 733 X_test: array-like, 734 Testing vectors, where rows is the number of samples 735 and columns is the number of features. 736 737 y_train: array-like, 738 Training vectors, where rows is the number of samples 739 and columns is the number of features. 740 741 y_test: array-like, 742 Testing vectors, where rows is the number of samples 743 and columns is the number of features. 744 745 Returns: 746 747 models: dict-object, 748 Returns a dictionary with each model's pipeline as value 749 and key = name of the model. 750 """ 751 if len(self.models_.keys()) == 0: 752 self.fit(X_train, X_test, y_train, y_test) 753 754 return self.models_
Returns all the model objects trained. If fit hasn't been called yet, then it's called to return the models.
Parameters:
X_train: array-like, Training vectors, where rows is the number of samples and columns is the number of features.
X_test: array-like, Testing vectors, where rows is the number of samples and columns is the number of features.
y_train: array-like, Training vectors, where rows is the number of samples and columns is the number of features.
y_test: array-like, Testing vectors, where rows is the number of samples and columns is the number of features.
Returns:
models: dict-object,
Returns a dictionary with each model's pipeline as value
and key = name of the model.
90class LazyDeepRegressor(Custom, RegressorMixin): 91 """ 92 Fitting -- almost -- all the regression algorithms with layers of 93 nnetsauce's CustomRegressor and returning their scores. 94 95 Parameters: 96 97 verbose: int, optional (default=0) 98 Any positive number for verbosity. 99 100 ignore_warnings: bool, optional (default=True) 101 When set to True, the warning related to algorigms that are not able to run are ignored. 102 103 custom_metric: function, optional (default=None) 104 When function is provided, models are evaluated based on the custom evaluation metric provided. 105 106 predictions: bool, optional (default=False) 107 When set to True, the predictions of all the models models are returned as dataframe. 108 109 sort_by: string, optional (default='RMSE') 110 Sort models by a metric. Available options are 'R-Squared', 'Adjusted R-Squared', 'RMSE', 'Time Taken' and 'Custom Metric'. 111 or a custom metric identified by its name and provided by custom_metric. 112 113 random_state: int, optional (default=42) 114 Reproducibiility seed. 115 116 estimators: list, optional (default='all') 117 list of Estimators names or just 'all' (default='all') 118 119 preprocess: bool 120 preprocessing is done when set to True 121 122 n_jobs : int, when possible, run in parallel 123 For now, only used by individual models that support it. 124 125 n_layers: int, optional (default=3) 126 Number of layers of CustomRegressors to be used. 127 128 All the other parameters are the same as CustomRegressor's. 129 130 Attributes: 131 132 models_: dict-object 133 Returns a dictionary with each model pipeline as value 134 with key as name of models. 135 136 best_model_: object 137 Returns the best model pipeline based on the sort_by metric. 138 139 Examples: 140 141 import nnetsauce as ns 142 import numpy as np 143 from sklearn import datasets 144 from sklearn.utils import shuffle 145 146 diabetes = datasets.load_diabetes() 147 X, y = shuffle(diabetes.data, diabetes.target, random_state=13) 148 X = X.astype(np.float32) 149 150 offset = int(X.shape[0] * 0.9) 151 X_train, y_train = X[:offset], y[:offset] 152 X_test, y_test = X[offset:], y[offset:] 153 154 reg = ns.LazyDeepRegressor(verbose=0, ignore_warnings=False, custom_metric=None) 155 models, predictions = reg.fit(X_train, X_test, y_train, y_test) 156 print(models) 157 158 """ 159 160 def __init__( 161 self, 162 verbose=0, 163 ignore_warnings=True, 164 custom_metric=None, 165 predictions=False, 166 sort_by="RMSE", 167 random_state=42, 168 estimators="all", 169 preprocess=False, 170 n_jobs=None, 171 # Defining depth 172 n_layers=3, 173 # CustomRegressor attributes 174 obj=None, 175 n_hidden_features=5, 176 activation_name="relu", 177 a=0.01, 178 nodes_sim="sobol", 179 bias=True, 180 dropout=0, 181 direct_link=True, 182 n_clusters=2, 183 cluster_encode=True, 184 type_clust="kmeans", 185 type_scaling=("std", "std", "std"), 186 col_sample=1, 187 row_sample=1, 188 seed=123, 189 backend="cpu", 190 ): 191 self.verbose = verbose 192 self.ignore_warnings = ignore_warnings 193 self.custom_metric = custom_metric 194 self.predictions = predictions 195 self.sort_by = sort_by 196 self.models_ = {} 197 self.best_model_ = None 198 self.random_state = random_state 199 self.estimators = estimators 200 self.preprocess = preprocess 201 self.n_layers = n_layers - 1 202 self.n_jobs = n_jobs 203 super().__init__( 204 obj=obj, 205 n_hidden_features=n_hidden_features, 206 activation_name=activation_name, 207 a=a, 208 nodes_sim=nodes_sim, 209 bias=bias, 210 dropout=dropout, 211 direct_link=direct_link, 212 n_clusters=n_clusters, 213 cluster_encode=cluster_encode, 214 type_clust=type_clust, 215 type_scaling=type_scaling, 216 col_sample=col_sample, 217 row_sample=row_sample, 218 seed=seed, 219 backend=backend, 220 ) 221 222 def fit(self, X_train, X_test, y_train, y_test): 223 """Fit Regression algorithms to X_train and y_train, predict and score on X_test, y_test. 224 225 Parameters: 226 227 X_train : array-like, 228 Training vectors, where rows is the number of samples 229 and columns is the number of features. 230 231 X_test : array-like, 232 Testing vectors, where rows is the number of samples 233 and columns is the number of features. 234 235 y_train : array-like, 236 Training vectors, where rows is the number of samples 237 and columns is the number of features. 238 239 y_test : array-like, 240 Testing vectors, where rows is the number of samples 241 and columns is the number of features. 242 243 Returns: 244 ------- 245 scores: Pandas DataFrame 246 Returns metrics of all the models in a Pandas DataFrame. 247 248 predictions : Pandas DataFrame 249 Returns predictions of all the models in a Pandas DataFrame. 250 251 """ 252 R2 = [] 253 ADJR2 = [] 254 RMSE = [] 255 # WIN = [] 256 names = [] 257 TIME = [] 258 predictions = {} 259 260 if self.custom_metric: 261 CUSTOM_METRIC = [] 262 263 if isinstance(X_train, np.ndarray): 264 X_train = pd.DataFrame(X_train) 265 X_test = pd.DataFrame(X_test) 266 267 numeric_features = X_train.select_dtypes(include=[np.number]).columns 268 categorical_features = X_train.select_dtypes(include=["object"]).columns 269 270 categorical_low, categorical_high = get_card_split( 271 X_train, categorical_features 272 ) 273 274 if self.preprocess is True: 275 preprocessor = ColumnTransformer( 276 transformers=[ 277 ("numeric", numeric_transformer, numeric_features), 278 ( 279 "categorical_low", 280 categorical_transformer_low, 281 categorical_low, 282 ), 283 ( 284 "categorical_high", 285 categorical_transformer_high, 286 categorical_high, 287 ), 288 ] 289 ) 290 291 # base models 292 try: 293 baseline_names = ["RandomForestRegressor", "XGBRegressor"] 294 baseline_models = [RandomForestRegressor(), xgb.XGBRegressor()] 295 except Exception as exception: 296 baseline_names = ["RandomForestRegressor"] 297 baseline_models = [RandomForestRegressor()] 298 299 for name, model in zip(baseline_names, baseline_models): 300 start = time.time() 301 try: 302 model.fit(X_train, y_train) 303 self.models_[name] = model 304 y_pred = model.predict(X_test) 305 r_squared = r2_score(y_test, y_pred) 306 adj_rsquared = adjusted_rsquared( 307 r_squared, X_test.shape[0], X_test.shape[1] 308 ) 309 rmse = np.sqrt(np.mean((y_test - y_pred) ** 2)) 310 311 names.append(name) 312 R2.append(r_squared) 313 ADJR2.append(adj_rsquared) 314 RMSE.append(rmse) 315 TIME.append(time.time() - start) 316 317 if self.custom_metric: 318 custom_metric = self.custom_metric(y_test, y_pred) 319 CUSTOM_METRIC.append(custom_metric) 320 321 if self.verbose > 0: 322 scores_verbose = { 323 "Model": name, 324 "R-Squared": r_squared, 325 "Adjusted R-Squared": adj_rsquared, 326 "RMSE": rmse, 327 "Time taken": time.time() - start, 328 } 329 330 if self.custom_metric: 331 scores_verbose[ 332 self.custom_metric.__name__ 333 ] = custom_metric 334 335 print(scores_verbose) 336 if self.predictions: 337 predictions[name] = y_pred 338 except Exception as exception: 339 if self.ignore_warnings is False: 340 print(name + " model failed to execute") 341 print(exception) 342 343 if self.estimators == "all": 344 self.regressors = DEEPREGRESSORS 345 else: 346 self.regressors = [ 347 ("DeepCustomRegressor(" + est[0] + ")", est[1]) 348 for est in all_estimators() 349 if ( 350 issubclass(est[1], RegressorMixin) 351 and (est[0] in self.estimators) 352 ) 353 ] 354 355 if self.preprocess is True: 356 for name, model in tqdm(self.regressors): # do parallel exec 357 start = time.time() 358 try: 359 if "random_state" in model().get_params().keys(): 360 layer_regr = CustomRegressor( 361 obj=model(random_state=self.random_state), 362 n_hidden_features=self.n_hidden_features, 363 activation_name=self.activation_name, 364 a=self.a, 365 nodes_sim=self.nodes_sim, 366 bias=self.bias, 367 dropout=self.dropout, 368 direct_link=self.direct_link, 369 n_clusters=self.n_clusters, 370 cluster_encode=self.cluster_encode, 371 type_clust=self.type_clust, 372 type_scaling=self.type_scaling, 373 col_sample=self.col_sample, 374 row_sample=self.row_sample, 375 seed=self.seed, 376 backend=self.backend, 377 ) 378 else: 379 layer_regr = CustomRegressor( 380 obj=model(), 381 n_hidden_features=self.n_hidden_features, 382 activation_name=self.activation_name, 383 a=self.a, 384 nodes_sim=self.nodes_sim, 385 bias=self.bias, 386 dropout=self.dropout, 387 direct_link=self.direct_link, 388 n_clusters=self.n_clusters, 389 cluster_encode=self.cluster_encode, 390 type_clust=self.type_clust, 391 type_scaling=self.type_scaling, 392 col_sample=self.col_sample, 393 row_sample=self.row_sample, 394 seed=self.seed, 395 backend=self.backend, 396 ) 397 398 for _ in range(self.n_layers): 399 layer_regr = deepcopy( 400 CustomRegressor( 401 obj=layer_regr, 402 n_hidden_features=self.n_hidden_features, 403 activation_name=self.activation_name, 404 a=self.a, 405 nodes_sim=self.nodes_sim, 406 bias=self.bias, 407 dropout=self.dropout, 408 direct_link=self.direct_link, 409 n_clusters=self.n_clusters, 410 cluster_encode=self.cluster_encode, 411 type_clust=self.type_clust, 412 type_scaling=self.type_scaling, 413 col_sample=self.col_sample, 414 row_sample=self.row_sample, 415 seed=self.seed, 416 backend=self.backend, 417 ) 418 ) 419 420 layer_regr.fit(X_train, y_train) 421 422 pipe = Pipeline( 423 steps=[ 424 ("preprocessor", preprocessor), 425 ("regressor", layer_regr), 426 ] 427 ) 428 429 pipe.fit(X_train, y_train) 430 431 self.models_[name] = pipe 432 y_pred = pipe.predict(X_test) 433 r_squared = r2_score(y_test, y_pred) 434 adj_rsquared = adjusted_rsquared( 435 r_squared, X_test.shape[0], X_test.shape[1] 436 ) 437 rmse = np.sqrt(np.mean((y_test - y_pred) ** 2)) 438 439 names.append(name) 440 R2.append(r_squared) 441 ADJR2.append(adj_rsquared) 442 RMSE.append(rmse) 443 TIME.append(time.time() - start) 444 445 if self.custom_metric: 446 custom_metric = self.custom_metric(y_test, y_pred) 447 CUSTOM_METRIC.append(custom_metric) 448 449 if self.verbose > 0: 450 scores_verbose = { 451 "Model": name, 452 "R-Squared": r_squared, 453 "Adjusted R-Squared": adj_rsquared, 454 "RMSE": rmse, 455 "Time taken": time.time() - start, 456 } 457 458 if self.custom_metric: 459 scores_verbose[ 460 self.custom_metric.__name__ 461 ] = custom_metric 462 463 print(scores_verbose) 464 if self.predictions: 465 predictions[name] = y_pred 466 except Exception as exception: 467 if self.ignore_warnings is False: 468 print(name + " model failed to execute") 469 print(exception) 470 471 else: # no preprocessing 472 for name, model in tqdm(self.regressors): # do parallel exec 473 start = time.time() 474 try: 475 if "random_state" in model().get_params().keys(): 476 layer_regr = CustomRegressor( 477 obj=model(random_state=self.random_state), 478 n_hidden_features=self.n_hidden_features, 479 activation_name=self.activation_name, 480 a=self.a, 481 nodes_sim=self.nodes_sim, 482 bias=self.bias, 483 dropout=self.dropout, 484 direct_link=self.direct_link, 485 n_clusters=self.n_clusters, 486 cluster_encode=self.cluster_encode, 487 type_clust=self.type_clust, 488 type_scaling=self.type_scaling, 489 col_sample=self.col_sample, 490 row_sample=self.row_sample, 491 seed=self.seed, 492 backend=self.backend, 493 ) 494 else: 495 layer_regr = CustomRegressor( 496 obj=model(), 497 n_hidden_features=self.n_hidden_features, 498 activation_name=self.activation_name, 499 a=self.a, 500 nodes_sim=self.nodes_sim, 501 bias=self.bias, 502 dropout=self.dropout, 503 direct_link=self.direct_link, 504 n_clusters=self.n_clusters, 505 cluster_encode=self.cluster_encode, 506 type_clust=self.type_clust, 507 type_scaling=self.type_scaling, 508 col_sample=self.col_sample, 509 row_sample=self.row_sample, 510 seed=self.seed, 511 backend=self.backend, 512 ) 513 514 layer_regr.fit(X_train, y_train) 515 516 for _ in range(self.n_layers): 517 layer_regr = deepcopy( 518 CustomRegressor( 519 obj=layer_regr, 520 n_hidden_features=self.n_hidden_features, 521 activation_name=self.activation_name, 522 a=self.a, 523 nodes_sim=self.nodes_sim, 524 bias=self.bias, 525 dropout=self.dropout, 526 direct_link=self.direct_link, 527 n_clusters=self.n_clusters, 528 cluster_encode=self.cluster_encode, 529 type_clust=self.type_clust, 530 type_scaling=self.type_scaling, 531 col_sample=self.col_sample, 532 row_sample=self.row_sample, 533 seed=self.seed, 534 backend=self.backend, 535 ) 536 ) 537 538 # layer_regr.fit(X_train, y_train) 539 540 layer_regr.fit(X_train, y_train) 541 542 self.models_[name] = layer_regr 543 y_pred = layer_regr.predict(X_test) 544 545 r_squared = r2_score(y_test, y_pred) 546 adj_rsquared = adjusted_rsquared( 547 r_squared, X_test.shape[0], X_test.shape[1] 548 ) 549 rmse = np.sqrt(np.mean((y_test - y_pred) ** 2)) 550 551 names.append(name) 552 R2.append(r_squared) 553 ADJR2.append(adj_rsquared) 554 RMSE.append(rmse) 555 TIME.append(time.time() - start) 556 557 if self.custom_metric: 558 custom_metric = self.custom_metric(y_test, y_pred) 559 CUSTOM_METRIC.append(custom_metric) 560 561 if self.verbose > 0: 562 scores_verbose = { 563 "Model": name, 564 "R-Squared": r_squared, 565 "Adjusted R-Squared": adj_rsquared, 566 "RMSE": rmse, 567 "Time taken": time.time() - start, 568 } 569 570 if self.custom_metric: 571 scores_verbose[ 572 self.custom_metric.__name__ 573 ] = custom_metric 574 575 print(scores_verbose) 576 if self.predictions: 577 predictions[name] = y_pred 578 except Exception as exception: 579 if self.ignore_warnings is False: 580 print(name + " model failed to execute") 581 print(exception) 582 583 scores = { 584 "Model": names, 585 "Adjusted R-Squared": ADJR2, 586 "R-Squared": R2, 587 "RMSE": RMSE, 588 "Time Taken": TIME, 589 } 590 591 if self.custom_metric: 592 scores["Custom metric"] = CUSTOM_METRIC 593 594 scores = pd.DataFrame(scores) 595 scores = scores.sort_values(by=self.sort_by, ascending=True).set_index( 596 "Model" 597 ) 598 599 self.best_model_ = self.models_[scores.index[0]] 600 601 if self.predictions is True: 602 return scores, predictions 603 604 return scores 605 606 def get_best_model(self): 607 """ 608 This function returns the best model pipeline based on the sort_by metric. 609 610 Returns: 611 612 best_model: object, 613 Returns the best model pipeline based on the sort_by metric. 614 615 """ 616 return self.best_model_ 617 618 def provide_models(self, X_train, X_test, y_train, y_test): 619 """ 620 This function returns all the model objects trained in fit function. 621 If fit is not called already, then we call fit and then return the models. 622 623 Parameters: 624 625 X_train : array-like, 626 Training vectors, where rows is the number of samples 627 and columns is the number of features. 628 629 X_test : array-like, 630 Testing vectors, where rows is the number of samples 631 and columns is the number of features. 632 633 y_train : array-like, 634 Training vectors, where rows is the number of samples 635 and columns is the number of features. 636 637 y_test : array-like, 638 Testing vectors, where rows is the number of samples 639 and columns is the number of features. 640 641 Returns: 642 643 models: dict-object, 644 Returns a dictionary with each model pipeline as value 645 with key as name of models. 646 647 """ 648 if len(self.models_.keys()) == 0: 649 self.fit(X_train, X_test, y_train, y_test) 650 651 return self.models_
Fitting -- almost -- all the regression algorithms with layers of nnetsauce's CustomRegressor and returning their scores.
Parameters:
verbose: int, optional (default=0)
Any positive number for verbosity.
ignore_warnings: bool, optional (default=True)
When set to True, the warning related to algorigms that are not able to run are ignored.
custom_metric: function, optional (default=None)
When function is provided, models are evaluated based on the custom evaluation metric provided.
predictions: bool, optional (default=False)
When set to True, the predictions of all the models models are returned as dataframe.
sort_by: string, optional (default='RMSE')
Sort models by a metric. Available options are 'R-Squared', 'Adjusted R-Squared', 'RMSE', 'Time Taken' and 'Custom Metric'.
or a custom metric identified by its name and provided by custom_metric.
random_state: int, optional (default=42)
Reproducibiility seed.
estimators: list, optional (default='all')
list of Estimators names or just 'all' (default='all')
preprocess: bool
preprocessing is done when set to True
n_jobs : int, when possible, run in parallel
For now, only used by individual models that support it.
n_layers: int, optional (default=3)
Number of layers of CustomRegressors to be used.
All the other parameters are the same as CustomRegressor's.
Attributes:
models_: dict-object
Returns a dictionary with each model pipeline as value
with key as name of models.
best_model_: object
Returns the best model pipeline based on the sort_by metric.
Examples:
import nnetsauce as ns
import numpy as np
from sklearn import datasets
from sklearn.utils import shuffle
diabetes = datasets.load_diabetes()
X, y = shuffle(diabetes.data, diabetes.target, random_state=13)
X = X.astype(np.float32)
offset = int(X.shape[0] * 0.9)
X_train, y_train = X[:offset], y[:offset]
X_test, y_test = X[offset:], y[offset:]
reg = ns.LazyDeepRegressor(verbose=0, ignore_warnings=False, custom_metric=None)
models, predictions = reg.fit(X_train, X_test, y_train, y_test)
print(models)
222 def fit(self, X_train, X_test, y_train, y_test): 223 """Fit Regression algorithms to X_train and y_train, predict and score on X_test, y_test. 224 225 Parameters: 226 227 X_train : array-like, 228 Training vectors, where rows is the number of samples 229 and columns is the number of features. 230 231 X_test : array-like, 232 Testing vectors, where rows is the number of samples 233 and columns is the number of features. 234 235 y_train : array-like, 236 Training vectors, where rows is the number of samples 237 and columns is the number of features. 238 239 y_test : array-like, 240 Testing vectors, where rows is the number of samples 241 and columns is the number of features. 242 243 Returns: 244 ------- 245 scores: Pandas DataFrame 246 Returns metrics of all the models in a Pandas DataFrame. 247 248 predictions : Pandas DataFrame 249 Returns predictions of all the models in a Pandas DataFrame. 250 251 """ 252 R2 = [] 253 ADJR2 = [] 254 RMSE = [] 255 # WIN = [] 256 names = [] 257 TIME = [] 258 predictions = {} 259 260 if self.custom_metric: 261 CUSTOM_METRIC = [] 262 263 if isinstance(X_train, np.ndarray): 264 X_train = pd.DataFrame(X_train) 265 X_test = pd.DataFrame(X_test) 266 267 numeric_features = X_train.select_dtypes(include=[np.number]).columns 268 categorical_features = X_train.select_dtypes(include=["object"]).columns 269 270 categorical_low, categorical_high = get_card_split( 271 X_train, categorical_features 272 ) 273 274 if self.preprocess is True: 275 preprocessor = ColumnTransformer( 276 transformers=[ 277 ("numeric", numeric_transformer, numeric_features), 278 ( 279 "categorical_low", 280 categorical_transformer_low, 281 categorical_low, 282 ), 283 ( 284 "categorical_high", 285 categorical_transformer_high, 286 categorical_high, 287 ), 288 ] 289 ) 290 291 # base models 292 try: 293 baseline_names = ["RandomForestRegressor", "XGBRegressor"] 294 baseline_models = [RandomForestRegressor(), xgb.XGBRegressor()] 295 except Exception as exception: 296 baseline_names = ["RandomForestRegressor"] 297 baseline_models = [RandomForestRegressor()] 298 299 for name, model in zip(baseline_names, baseline_models): 300 start = time.time() 301 try: 302 model.fit(X_train, y_train) 303 self.models_[name] = model 304 y_pred = model.predict(X_test) 305 r_squared = r2_score(y_test, y_pred) 306 adj_rsquared = adjusted_rsquared( 307 r_squared, X_test.shape[0], X_test.shape[1] 308 ) 309 rmse = np.sqrt(np.mean((y_test - y_pred) ** 2)) 310 311 names.append(name) 312 R2.append(r_squared) 313 ADJR2.append(adj_rsquared) 314 RMSE.append(rmse) 315 TIME.append(time.time() - start) 316 317 if self.custom_metric: 318 custom_metric = self.custom_metric(y_test, y_pred) 319 CUSTOM_METRIC.append(custom_metric) 320 321 if self.verbose > 0: 322 scores_verbose = { 323 "Model": name, 324 "R-Squared": r_squared, 325 "Adjusted R-Squared": adj_rsquared, 326 "RMSE": rmse, 327 "Time taken": time.time() - start, 328 } 329 330 if self.custom_metric: 331 scores_verbose[ 332 self.custom_metric.__name__ 333 ] = custom_metric 334 335 print(scores_verbose) 336 if self.predictions: 337 predictions[name] = y_pred 338 except Exception as exception: 339 if self.ignore_warnings is False: 340 print(name + " model failed to execute") 341 print(exception) 342 343 if self.estimators == "all": 344 self.regressors = DEEPREGRESSORS 345 else: 346 self.regressors = [ 347 ("DeepCustomRegressor(" + est[0] + ")", est[1]) 348 for est in all_estimators() 349 if ( 350 issubclass(est[1], RegressorMixin) 351 and (est[0] in self.estimators) 352 ) 353 ] 354 355 if self.preprocess is True: 356 for name, model in tqdm(self.regressors): # do parallel exec 357 start = time.time() 358 try: 359 if "random_state" in model().get_params().keys(): 360 layer_regr = CustomRegressor( 361 obj=model(random_state=self.random_state), 362 n_hidden_features=self.n_hidden_features, 363 activation_name=self.activation_name, 364 a=self.a, 365 nodes_sim=self.nodes_sim, 366 bias=self.bias, 367 dropout=self.dropout, 368 direct_link=self.direct_link, 369 n_clusters=self.n_clusters, 370 cluster_encode=self.cluster_encode, 371 type_clust=self.type_clust, 372 type_scaling=self.type_scaling, 373 col_sample=self.col_sample, 374 row_sample=self.row_sample, 375 seed=self.seed, 376 backend=self.backend, 377 ) 378 else: 379 layer_regr = CustomRegressor( 380 obj=model(), 381 n_hidden_features=self.n_hidden_features, 382 activation_name=self.activation_name, 383 a=self.a, 384 nodes_sim=self.nodes_sim, 385 bias=self.bias, 386 dropout=self.dropout, 387 direct_link=self.direct_link, 388 n_clusters=self.n_clusters, 389 cluster_encode=self.cluster_encode, 390 type_clust=self.type_clust, 391 type_scaling=self.type_scaling, 392 col_sample=self.col_sample, 393 row_sample=self.row_sample, 394 seed=self.seed, 395 backend=self.backend, 396 ) 397 398 for _ in range(self.n_layers): 399 layer_regr = deepcopy( 400 CustomRegressor( 401 obj=layer_regr, 402 n_hidden_features=self.n_hidden_features, 403 activation_name=self.activation_name, 404 a=self.a, 405 nodes_sim=self.nodes_sim, 406 bias=self.bias, 407 dropout=self.dropout, 408 direct_link=self.direct_link, 409 n_clusters=self.n_clusters, 410 cluster_encode=self.cluster_encode, 411 type_clust=self.type_clust, 412 type_scaling=self.type_scaling, 413 col_sample=self.col_sample, 414 row_sample=self.row_sample, 415 seed=self.seed, 416 backend=self.backend, 417 ) 418 ) 419 420 layer_regr.fit(X_train, y_train) 421 422 pipe = Pipeline( 423 steps=[ 424 ("preprocessor", preprocessor), 425 ("regressor", layer_regr), 426 ] 427 ) 428 429 pipe.fit(X_train, y_train) 430 431 self.models_[name] = pipe 432 y_pred = pipe.predict(X_test) 433 r_squared = r2_score(y_test, y_pred) 434 adj_rsquared = adjusted_rsquared( 435 r_squared, X_test.shape[0], X_test.shape[1] 436 ) 437 rmse = np.sqrt(np.mean((y_test - y_pred) ** 2)) 438 439 names.append(name) 440 R2.append(r_squared) 441 ADJR2.append(adj_rsquared) 442 RMSE.append(rmse) 443 TIME.append(time.time() - start) 444 445 if self.custom_metric: 446 custom_metric = self.custom_metric(y_test, y_pred) 447 CUSTOM_METRIC.append(custom_metric) 448 449 if self.verbose > 0: 450 scores_verbose = { 451 "Model": name, 452 "R-Squared": r_squared, 453 "Adjusted R-Squared": adj_rsquared, 454 "RMSE": rmse, 455 "Time taken": time.time() - start, 456 } 457 458 if self.custom_metric: 459 scores_verbose[ 460 self.custom_metric.__name__ 461 ] = custom_metric 462 463 print(scores_verbose) 464 if self.predictions: 465 predictions[name] = y_pred 466 except Exception as exception: 467 if self.ignore_warnings is False: 468 print(name + " model failed to execute") 469 print(exception) 470 471 else: # no preprocessing 472 for name, model in tqdm(self.regressors): # do parallel exec 473 start = time.time() 474 try: 475 if "random_state" in model().get_params().keys(): 476 layer_regr = CustomRegressor( 477 obj=model(random_state=self.random_state), 478 n_hidden_features=self.n_hidden_features, 479 activation_name=self.activation_name, 480 a=self.a, 481 nodes_sim=self.nodes_sim, 482 bias=self.bias, 483 dropout=self.dropout, 484 direct_link=self.direct_link, 485 n_clusters=self.n_clusters, 486 cluster_encode=self.cluster_encode, 487 type_clust=self.type_clust, 488 type_scaling=self.type_scaling, 489 col_sample=self.col_sample, 490 row_sample=self.row_sample, 491 seed=self.seed, 492 backend=self.backend, 493 ) 494 else: 495 layer_regr = CustomRegressor( 496 obj=model(), 497 n_hidden_features=self.n_hidden_features, 498 activation_name=self.activation_name, 499 a=self.a, 500 nodes_sim=self.nodes_sim, 501 bias=self.bias, 502 dropout=self.dropout, 503 direct_link=self.direct_link, 504 n_clusters=self.n_clusters, 505 cluster_encode=self.cluster_encode, 506 type_clust=self.type_clust, 507 type_scaling=self.type_scaling, 508 col_sample=self.col_sample, 509 row_sample=self.row_sample, 510 seed=self.seed, 511 backend=self.backend, 512 ) 513 514 layer_regr.fit(X_train, y_train) 515 516 for _ in range(self.n_layers): 517 layer_regr = deepcopy( 518 CustomRegressor( 519 obj=layer_regr, 520 n_hidden_features=self.n_hidden_features, 521 activation_name=self.activation_name, 522 a=self.a, 523 nodes_sim=self.nodes_sim, 524 bias=self.bias, 525 dropout=self.dropout, 526 direct_link=self.direct_link, 527 n_clusters=self.n_clusters, 528 cluster_encode=self.cluster_encode, 529 type_clust=self.type_clust, 530 type_scaling=self.type_scaling, 531 col_sample=self.col_sample, 532 row_sample=self.row_sample, 533 seed=self.seed, 534 backend=self.backend, 535 ) 536 ) 537 538 # layer_regr.fit(X_train, y_train) 539 540 layer_regr.fit(X_train, y_train) 541 542 self.models_[name] = layer_regr 543 y_pred = layer_regr.predict(X_test) 544 545 r_squared = r2_score(y_test, y_pred) 546 adj_rsquared = adjusted_rsquared( 547 r_squared, X_test.shape[0], X_test.shape[1] 548 ) 549 rmse = np.sqrt(np.mean((y_test - y_pred) ** 2)) 550 551 names.append(name) 552 R2.append(r_squared) 553 ADJR2.append(adj_rsquared) 554 RMSE.append(rmse) 555 TIME.append(time.time() - start) 556 557 if self.custom_metric: 558 custom_metric = self.custom_metric(y_test, y_pred) 559 CUSTOM_METRIC.append(custom_metric) 560 561 if self.verbose > 0: 562 scores_verbose = { 563 "Model": name, 564 "R-Squared": r_squared, 565 "Adjusted R-Squared": adj_rsquared, 566 "RMSE": rmse, 567 "Time taken": time.time() - start, 568 } 569 570 if self.custom_metric: 571 scores_verbose[ 572 self.custom_metric.__name__ 573 ] = custom_metric 574 575 print(scores_verbose) 576 if self.predictions: 577 predictions[name] = y_pred 578 except Exception as exception: 579 if self.ignore_warnings is False: 580 print(name + " model failed to execute") 581 print(exception) 582 583 scores = { 584 "Model": names, 585 "Adjusted R-Squared": ADJR2, 586 "R-Squared": R2, 587 "RMSE": RMSE, 588 "Time Taken": TIME, 589 } 590 591 if self.custom_metric: 592 scores["Custom metric"] = CUSTOM_METRIC 593 594 scores = pd.DataFrame(scores) 595 scores = scores.sort_values(by=self.sort_by, ascending=True).set_index( 596 "Model" 597 ) 598 599 self.best_model_ = self.models_[scores.index[0]] 600 601 if self.predictions is True: 602 return scores, predictions 603 604 return scores
Fit Regression algorithms to X_train and y_train, predict and score on X_test, y_test.
Parameters:
X_train : array-like,
Training vectors, where rows is the number of samples
and columns is the number of features.
X_test : array-like,
Testing vectors, where rows is the number of samples
and columns is the number of features.
y_train : array-like,
Training vectors, where rows is the number of samples
and columns is the number of features.
y_test : array-like,
Testing vectors, where rows is the number of samples
and columns is the number of features.
Returns:
scores: Pandas DataFrame Returns metrics of all the models in a Pandas DataFrame.
predictions : Pandas DataFrame Returns predictions of all the models in a Pandas DataFrame.
618 def provide_models(self, X_train, X_test, y_train, y_test): 619 """ 620 This function returns all the model objects trained in fit function. 621 If fit is not called already, then we call fit and then return the models. 622 623 Parameters: 624 625 X_train : array-like, 626 Training vectors, where rows is the number of samples 627 and columns is the number of features. 628 629 X_test : array-like, 630 Testing vectors, where rows is the number of samples 631 and columns is the number of features. 632 633 y_train : array-like, 634 Training vectors, where rows is the number of samples 635 and columns is the number of features. 636 637 y_test : array-like, 638 Testing vectors, where rows is the number of samples 639 and columns is the number of features. 640 641 Returns: 642 643 models: dict-object, 644 Returns a dictionary with each model pipeline as value 645 with key as name of models. 646 647 """ 648 if len(self.models_.keys()) == 0: 649 self.fit(X_train, X_test, y_train, y_test) 650 651 return self.models_
This function returns all the model objects trained in fit function. If fit is not called already, then we call fit and then return the models.
Parameters:
X_train : array-like,
Training vectors, where rows is the number of samples
and columns is the number of features.
X_test : array-like,
Testing vectors, where rows is the number of samples
and columns is the number of features.
y_train : array-like,
Training vectors, where rows is the number of samples
and columns is the number of features.
y_test : array-like,
Testing vectors, where rows is the number of samples
and columns is the number of features.
Returns:
models: dict-object,
Returns a dictionary with each model pipeline as value
with key as name of models.
998class LazyMTS(LazyDeepMTS): 999 """ 1000 Fitting -- almost -- all the regression algorithms to multivariate time series 1001 and returning their scores (no layers). 1002 1003 Parameters: 1004 1005 verbose: int, optional (default=0) 1006 Any positive number for verbosity. 1007 1008 ignore_warnings: bool, optional (default=True) 1009 When set to True, the warning related to algorigms that are not 1010 able to run are ignored. 1011 1012 custom_metric: function, optional (default=None) 1013 When function is provided, models are evaluated based on the custom 1014 evaluation metric provided. 1015 1016 predictions: bool, optional (default=False) 1017 When set to True, the predictions of all the models models are returned as dataframe. 1018 1019 sort_by: string, optional (default='RMSE') 1020 Sort models by a metric. Available options are 'RMSE', 'MAE', 'MPL', 'MPE', 'MAPE', 1021 'R-Squared', 'Adjusted R-Squared' or a custom metric identified by its name and 1022 provided by custom_metric. 1023 1024 random_state: int, optional (default=42) 1025 Reproducibiility seed. 1026 1027 estimators: list, optional (default='all') 1028 list of Estimators (regression algorithms) names or just 'all' (default='all') 1029 1030 preprocess: bool, preprocessing is done when set to True 1031 1032 h: int, optional (default=None) 1033 Number of steps ahead to predict (when used, must be > 0 and < X_test.shape[0]). 1034 1035 All the other parameters are the same as MTS's. 1036 1037 Attributes: 1038 1039 models_: dict-object 1040 Returns a dictionary with each model pipeline as value 1041 with key as name of models. 1042 1043 best_model_: object 1044 Returns the best model pipeline based on the sort_by metric. 1045 1046 Examples: 1047 1048 See https://thierrymoudiki.github.io/blog/2023/10/29/python/quasirandomizednn/MTS-LazyPredict 1049 1050 """ 1051 1052 def __init__( 1053 self, 1054 verbose=0, 1055 ignore_warnings=True, 1056 custom_metric=None, 1057 predictions=False, 1058 sort_by=None, # leave it as is 1059 random_state=42, 1060 estimators="all", 1061 preprocess=False, 1062 h=None, 1063 # MTS attributes 1064 obj=None, 1065 n_hidden_features=5, 1066 activation_name="relu", 1067 a=0.01, 1068 nodes_sim="sobol", 1069 bias=True, 1070 dropout=0, 1071 direct_link=True, 1072 n_clusters=2, 1073 cluster_encode=True, 1074 type_clust="kmeans", 1075 type_scaling=("std", "std", "std"), 1076 lags=15, 1077 type_pi="scp2-kde", 1078 block_size=None, 1079 replications=None, 1080 kernel=None, 1081 agg="mean", 1082 seed=123, 1083 backend="cpu", 1084 show_progress=False, 1085 ): 1086 super().__init__( 1087 verbose=verbose, 1088 ignore_warnings=ignore_warnings, 1089 custom_metric=custom_metric, 1090 predictions=predictions, 1091 sort_by=sort_by, 1092 random_state=random_state, 1093 estimators=estimators, 1094 preprocess=preprocess, 1095 n_layers=1, 1096 h=h, 1097 obj=obj, 1098 n_hidden_features=n_hidden_features, 1099 activation_name=activation_name, 1100 a=a, 1101 nodes_sim=nodes_sim, 1102 bias=bias, 1103 dropout=dropout, 1104 direct_link=direct_link, 1105 n_clusters=n_clusters, 1106 cluster_encode=cluster_encode, 1107 type_clust=type_clust, 1108 type_scaling=type_scaling, 1109 lags=lags, 1110 type_pi=type_pi, 1111 block_size=block_size, 1112 replications=replications, 1113 kernel=kernel, 1114 agg=agg, 1115 seed=seed, 1116 backend=backend, 1117 show_progress=show_progress, 1118 )
Fitting -- almost -- all the regression algorithms to multivariate time series and returning their scores (no layers).
Parameters:
verbose: int, optional (default=0)
Any positive number for verbosity.
ignore_warnings: bool, optional (default=True)
When set to True, the warning related to algorigms that are not
able to run are ignored.
custom_metric: function, optional (default=None)
When function is provided, models are evaluated based on the custom
evaluation metric provided.
predictions: bool, optional (default=False)
When set to True, the predictions of all the models models are returned as dataframe.
sort_by: string, optional (default='RMSE')
Sort models by a metric. Available options are 'RMSE', 'MAE', 'MPL', 'MPE', 'MAPE',
'R-Squared', 'Adjusted R-Squared' or a custom metric identified by its name and
provided by custom_metric.
random_state: int, optional (default=42)
Reproducibiility seed.
estimators: list, optional (default='all')
list of Estimators (regression algorithms) names or just 'all' (default='all')
preprocess: bool, preprocessing is done when set to True
h: int, optional (default=None)
Number of steps ahead to predict (when used, must be > 0 and < X_test.shape[0]).
All the other parameters are the same as MTS's.
Attributes:
models_: dict-object
Returns a dictionary with each model pipeline as value
with key as name of models.
best_model_: object
Returns the best model pipeline based on the sort_by metric.
Examples:
See https://thierrymoudiki.github.io/blog/2023/10/29/python/quasirandomizednn/MTS-LazyPredict
104class LazyDeepMTS(MTS): 105 """ 106 107 Fitting -- almost -- all the regression algorithms with layers of 108 nnetsauce's CustomRegressor to multivariate time series 109 and returning their scores. 110 111 Parameters: 112 113 verbose: int, optional (default=0) 114 Any positive number for verbosity. 115 116 ignore_warnings: bool, optional (default=True) 117 When set to True, the warning related to algorigms that are not 118 able to run are ignored. 119 120 custom_metric: function, optional (default=None) 121 When function is provided, models are evaluated based on the custom 122 evaluation metric provided. 123 124 predictions: bool, optional (default=False) 125 When set to True, the predictions of all the models models are returned as dataframe. 126 127 sort_by: string, optional (default='RMSE') 128 Sort models by a metric. Available options are 'RMSE', 'MAE', 'MPL', 'MPE', 'MAPE', 129 'R-Squared', 'Adjusted R-Squared' or a custom metric identified by its name and 130 provided by custom_metric. 131 132 random_state: int, optional (default=42) 133 Reproducibiility seed. 134 135 estimators: list, optional (default='all') 136 list of Estimators (regression algorithms) names or just 'all' (default='all') 137 138 preprocess: bool, preprocessing is done when set to True 139 140 n_layers: int, optional (default=1) 141 Number of layers in the network. When set to 1, the model is equivalent to a MTS. 142 143 h: int, optional (default=None) 144 Number of steps ahead to predict (when used, must be > 0 and < X_test.shape[0]). 145 146 All the other parameters are the same as MTS's. 147 148 Attributes: 149 150 models_: dict-object 151 Returns a dictionary with each model pipeline as value 152 with key as name of models. 153 154 best_model_: object 155 Returns the best model pipeline based on the sort_by metric. 156 157 Examples: 158 159 See https://thierrymoudiki.github.io/blog/2023/10/29/python/quasirandomizednn/MTS-LazyPredict 160 161 """ 162 163 def __init__( 164 self, 165 verbose=0, 166 ignore_warnings=True, 167 custom_metric=None, 168 predictions=False, 169 sort_by=None, # leave it as is 170 random_state=42, 171 estimators="all", 172 preprocess=False, 173 n_layers=1, 174 h=None, 175 # MTS attributes 176 obj=None, 177 n_hidden_features=5, 178 activation_name="relu", 179 a=0.01, 180 nodes_sim="sobol", 181 bias=True, 182 dropout=0, 183 direct_link=True, 184 n_clusters=2, 185 cluster_encode=True, 186 type_clust="kmeans", 187 type_scaling=("std", "std", "std"), 188 lags=15, 189 type_pi="scp2-kde", 190 block_size=None, 191 replications=None, 192 kernel=None, 193 agg="mean", 194 seed=123, 195 backend="cpu", 196 show_progress=False, 197 ): 198 self.verbose = verbose 199 self.ignore_warnings = ignore_warnings 200 self.custom_metric = custom_metric 201 self.predictions = predictions 202 self.sort_by = sort_by 203 self.models_ = {} 204 self.best_model_ = None 205 self.random_state = random_state 206 self.estimators = estimators 207 self.preprocess = preprocess 208 self.n_layers = n_layers 209 self.h = h 210 super().__init__( 211 obj=obj, 212 n_hidden_features=n_hidden_features, 213 activation_name=activation_name, 214 a=a, 215 nodes_sim=nodes_sim, 216 bias=bias, 217 dropout=dropout, 218 direct_link=direct_link, 219 n_clusters=n_clusters, 220 cluster_encode=cluster_encode, 221 type_clust=type_clust, 222 type_scaling=type_scaling, 223 seed=seed, 224 backend=backend, 225 lags=lags, 226 type_pi=type_pi, 227 block_size=block_size, 228 replications=replications, 229 kernel=kernel, 230 agg=agg, 231 verbose=verbose, 232 show_progress=show_progress, 233 ) 234 if self.replications is not None or self.type_pi == "gaussian": 235 if self.sort_by is None: 236 self.sort_by = "WINKLERSCORE" 237 else: 238 if self.sort_by is None: 239 self.sort_by = "RMSE" 240 241 def fit(self, X_train, X_test, xreg=None, per_series=False, **kwargs): 242 """Fit Regression algorithms to X_train, predict and score on X_test. 243 244 Parameters: 245 246 X_train: array-like or data frame, 247 Training vectors, where rows is the number of samples 248 and columns is the number of features. 249 250 X_test: array-like or data frame, 251 Testing vectors, where rows is the number of samples 252 and columns is the number of features. 253 254 xreg: array-like, optional (default=None) 255 Additional (external) regressors to be passed to self.obj 256 xreg must be in 'increasing' order (most recent observations last) 257 258 per_series: bool, optional (default=False) 259 When set to True, the metrics are computed series by series. 260 261 **kwargs: dict, optional (default=None) 262 Additional parameters to be passed to `fit` method of `obj`. 263 264 Returns: 265 266 scores: Pandas DataFrame 267 Returns metrics of all the models in a Pandas DataFrame. 268 269 predictions: Pandas DataFrame 270 Returns predictions of all the models in a Pandas DataFrame. 271 272 """ 273 R2 = [] 274 ADJR2 = [] 275 ME = [] 276 MPL = [] 277 RMSE = [] 278 MAE = [] 279 MPE = [] 280 MAPE = [] 281 WINKLERSCORE = [] 282 COVERAGE = [] 283 284 # WIN = [] 285 names = [] 286 TIME = [] 287 predictions = {} 288 289 if self.custom_metric is not None: 290 CUSTOM_METRIC = [] 291 292 if self.h is None: 293 assert X_test is not None, "If h is None, X_test must be provided." 294 295 if isinstance(X_train, np.ndarray): 296 X_train = pd.DataFrame(X_train) 297 X_test = pd.DataFrame(X_test) 298 299 self.series_names = X_train.columns.tolist() 300 301 X_train = convert_df_to_numeric(X_train) 302 X_test = convert_df_to_numeric(X_test) 303 304 numeric_features = X_train.select_dtypes(include=[np.number]).columns 305 categorical_features = X_train.select_dtypes(include=["object"]).columns 306 307 categorical_low, categorical_high = get_card_split( 308 X_train, categorical_features 309 ) 310 311 if self.preprocess: 312 preprocessor = ColumnTransformer( 313 transformers=[ 314 ("numeric", numeric_transformer, numeric_features), 315 ( 316 "categorical_low", 317 categorical_transformer_low, 318 categorical_low, 319 ), 320 ( 321 "categorical_high", 322 categorical_transformer_high, 323 categorical_high, 324 ), 325 ] 326 ) 327 328 # baselines (Classical MTS) ---- 329 for i, name in enumerate(["ARIMA", "ETS", "Theta", "VAR", "VECM"]): 330 try: 331 start = time.time() 332 regr = ClassicalMTS(model=name) 333 regr.fit(X_train, **kwargs) 334 self.models_[name] = regr 335 if self.h is None: 336 X_pred = regr.predict(h=X_test.shape[0], **kwargs) 337 else: 338 assert self.h > 0, "h must be > 0" 339 X_pred = regr.predict(h=self.h, **kwargs) 340 try: 341 X_test = X_test[0: self.h, :] 342 except Exception as e: 343 X_test = X_test.iloc[0: self.h, :] 344 345 rmse = mean_errors( 346 actual=X_test, 347 pred=X_pred, 348 scoring="root_mean_squared_error", 349 per_series=per_series, 350 ) 351 mae = mean_errors( 352 actual=X_test, 353 pred=X_pred, 354 scoring="mean_absolute_error", 355 per_series=per_series, 356 ) 357 mpl = mean_errors( 358 actual=X_test, 359 pred=X_pred, 360 scoring="mean_pinball_loss", 361 per_series=per_series, 362 ) 363 except Exception: 364 continue 365 366 names.append(name) 367 RMSE.append(rmse) 368 MAE.append(mae) 369 MPL.append(mpl) 370 371 if self.custom_metric is not None: 372 try: 373 if self.h is None: 374 custom_metric = self.custom_metric(X_test, X_pred) 375 else: 376 custom_metric = self.custom_metric(X_test_h, X_pred) 377 CUSTOM_METRIC.append(custom_metric) 378 except Exception as e: 379 custom_metric = np.iinfo(np.float32).max 380 CUSTOM_METRIC.append(np.iinfo(np.float32).max) 381 382 if (self.replications is not None) or (self.type_pi == "gaussian"): 383 if per_series == False: 384 winklerscore = winkler_score( 385 obj=X_pred, actual=X_test, level=95 386 ) 387 coveragecalc = coverage(X_pred, X_test, level=95) 388 else: 389 winklerscore = winkler_score( 390 obj=X_pred, actual=X_test, level=95, per_series=True 391 ) 392 coveragecalc = coverage( 393 X_pred, X_test, level=95, per_series=True 394 ) 395 WINKLERSCORE.append(winklerscore) 396 COVERAGE.append(coveragecalc) 397 TIME.append(time.time() - start) 398 399 if self.estimators == "all": 400 if self.n_layers <= 1: 401 self.regressors = REGRESSORSMTS 402 else: 403 self.regressors = DEEPREGRESSORSMTS 404 else: 405 if self.n_layers <= 1: 406 self.regressors = [ 407 ("MTS(" + est[0] + ")", est[1]) 408 for est in all_estimators() 409 if ( 410 issubclass(est[1], RegressorMixin) 411 and (est[0] in self.estimators) 412 ) 413 ] 414 else: # self.n_layers > 1 415 self.regressors = [ 416 ("DeepMTS(" + est[0] + ")", est[1]) 417 for est in all_estimators() 418 if ( 419 issubclass(est[1], RegressorMixin) 420 and (est[0] in self.estimators) 421 ) 422 ] 423 424 if self.preprocess is True: 425 for name, model in tqdm(self.regressors): # do parallel exec 426 start = time.time() 427 try: 428 if "random_state" in model().get_params().keys(): 429 pipe = Pipeline( 430 steps=[ 431 ("preprocessor", preprocessor), 432 ( 433 "regressor", 434 DeepMTS( 435 obj=model( 436 random_state=self.random_state, 437 **kwargs, 438 ), 439 n_layers=self.n_layers, 440 n_hidden_features=self.n_hidden_features, 441 activation_name=self.activation_name, 442 a=self.a, 443 nodes_sim=self.nodes_sim, 444 bias=self.bias, 445 dropout=self.dropout, 446 direct_link=self.direct_link, 447 n_clusters=self.n_clusters, 448 cluster_encode=self.cluster_encode, 449 type_clust=self.type_clust, 450 type_scaling=self.type_scaling, 451 lags=self.lags, 452 type_pi=self.type_pi, 453 block_size=self.block_size, 454 replications=self.replications, 455 kernel=self.kernel, 456 agg=self.agg, 457 seed=self.seed, 458 backend=self.backend, 459 show_progress=self.show_progress, 460 ), 461 ), 462 ] 463 ) 464 else: # "random_state" in model().get_params().keys() 465 pipe = Pipeline( 466 steps=[ 467 ("preprocessor", preprocessor), 468 ( 469 "regressor", 470 DeepMTS( 471 obj=model(**kwargs), 472 n_layers=self.n_layers, 473 n_hidden_features=self.n_hidden_features, 474 activation_name=self.activation_name, 475 a=self.a, 476 nodes_sim=self.nodes_sim, 477 bias=self.bias, 478 dropout=self.dropout, 479 direct_link=self.direct_link, 480 n_clusters=self.n_clusters, 481 cluster_encode=self.cluster_encode, 482 type_clust=self.type_clust, 483 type_scaling=self.type_scaling, 484 lags=self.lags, 485 type_pi=self.type_pi, 486 block_size=self.block_size, 487 replications=self.replications, 488 kernel=self.kernel, 489 agg=self.agg, 490 seed=self.seed, 491 backend=self.backend, 492 show_progress=self.show_progress, 493 ), 494 ), 495 ] 496 ) 497 498 pipe.fit(X_train, **kwargs) 499 # pipe.fit(X_train, xreg=xreg) 500 501 self.models_[name] = pipe 502 503 if self.h is None: 504 X_pred = pipe["regressor"].predict(h=self.h, **kwargs) 505 else: 506 assert self.h > 0, "h must be > 0" 507 X_pred = pipe["regressor"].predict(h=self.h, **kwargs) 508 509 if (self.replications is not None) or ( 510 self.type_pi == "gaussian" 511 ): 512 rmse = mean_errors( 513 actual=X_test, 514 pred=X_pred, 515 scoring="root_mean_squared_error", 516 per_series=per_series, 517 ) 518 mae = mean_errors( 519 actual=X_test, 520 pred=X_pred, 521 scoring="mean_absolute_error", 522 per_series=per_series, 523 ) 524 mpl = mean_errors( 525 actual=X_test, 526 pred=X_pred, 527 scoring="mean_pinball_loss", 528 per_series=per_series, 529 ) 530 winklerscore = winkler_score( 531 obj=X_pred, 532 actual=X_test, 533 level=95, 534 per_series=per_series, 535 ) 536 coveragecalc = coverage( 537 X_pred, X_test, level=95, per_series=per_series 538 ) 539 else: 540 rmse = mean_errors( 541 actual=X_test, 542 pred=X_pred, 543 scoring="root_mean_squared_error", 544 per_series=per_series, 545 ) 546 mae = mean_errors( 547 actual=X_test, 548 pred=X_pred, 549 scoring="mean_absolute_error", 550 per_series=per_series, 551 ) 552 mpl = mean_errors( 553 actual=X_test, 554 pred=X_pred, 555 scoring="mean_pinball_loss", 556 per_series=per_series, 557 ) 558 559 names.append(name) 560 RMSE.append(rmse) 561 MAE.append(mae) 562 MPL.append(mpl) 563 564 if (self.replications is not None) or ( 565 self.type_pi == "gaussian" 566 ): 567 WINKLERSCORE.append(winklerscore) 568 COVERAGE.append(coveragecalc) 569 TIME.append(time.time() - start) 570 571 if self.custom_metric is not None: 572 try: 573 custom_metric = self.custom_metric(X_test, X_pred) 574 CUSTOM_METRIC.append(custom_metric) 575 except Exception as e: 576 custom_metric = np.iinfo(np.float32).max 577 CUSTOM_METRIC.append(custom_metric) 578 579 if self.verbose > 0: 580 if (self.replications is not None) or ( 581 self.type_pi == "gaussian" 582 ): 583 scores_verbose = { 584 "Model": name, 585 "RMSE": rmse, 586 "MAE": mae, 587 "MPL": mpl, 588 "WINKLERSCORE": winklerscore, 589 "COVERAGE": coveragecalc, 590 "Time taken": time.time() - start, 591 } 592 else: 593 scores_verbose = { 594 "Model": name, 595 "RMSE": rmse, 596 "MAE": mae, 597 "MPL": mpl, 598 "Time taken": time.time() - start, 599 } 600 601 if self.custom_metric is not None: 602 scores_verbose["Custom metric"] = custom_metric 603 604 if self.predictions: 605 predictions[name] = X_pred 606 except Exception as exception: 607 if self.ignore_warnings is False: 608 print(name + " model failed to execute") 609 print(exception) 610 611 else: # no preprocessing 612 for name, model in tqdm(self.regressors): # do parallel exec 613 start = time.time() 614 try: 615 if "random_state" in model().get_params().keys(): 616 pipe = DeepMTS( 617 obj=model(random_state=self.random_state, **kwargs), 618 n_layers=self.n_layers, 619 n_hidden_features=self.n_hidden_features, 620 activation_name=self.activation_name, 621 a=self.a, 622 nodes_sim=self.nodes_sim, 623 bias=self.bias, 624 dropout=self.dropout, 625 direct_link=self.direct_link, 626 n_clusters=self.n_clusters, 627 cluster_encode=self.cluster_encode, 628 type_clust=self.type_clust, 629 type_scaling=self.type_scaling, 630 lags=self.lags, 631 type_pi=self.type_pi, 632 block_size=self.block_size, 633 replications=self.replications, 634 kernel=self.kernel, 635 agg=self.agg, 636 seed=self.seed, 637 backend=self.backend, 638 show_progress=self.show_progress, 639 ) 640 else: 641 pipe = DeepMTS( 642 obj=model(**kwargs), 643 n_layers=self.n_layers, 644 n_hidden_features=self.n_hidden_features, 645 activation_name=self.activation_name, 646 a=self.a, 647 nodes_sim=self.nodes_sim, 648 bias=self.bias, 649 dropout=self.dropout, 650 direct_link=self.direct_link, 651 n_clusters=self.n_clusters, 652 cluster_encode=self.cluster_encode, 653 type_clust=self.type_clust, 654 type_scaling=self.type_scaling, 655 lags=self.lags, 656 type_pi=self.type_pi, 657 block_size=self.block_size, 658 replications=self.replications, 659 kernel=self.kernel, 660 agg=self.agg, 661 seed=self.seed, 662 backend=self.backend, 663 show_progress=self.show_progress, 664 ) 665 666 pipe.fit(X_train, xreg, **kwargs) 667 # pipe.fit(X_train, xreg=xreg) # DO xreg like in `ahead` 668 669 self.models_[name] = pipe 670 671 if self.preprocess is True: 672 if self.h is None: 673 X_pred = pipe["regressor"].predict( 674 h=X_test.shape[0], **kwargs 675 ) 676 else: 677 assert ( 678 self.h > 0 and self.h <= X_test.shape[0] 679 ), "h must be > 0 and < X_test.shape[0]" 680 X_pred = pipe["regressor"].predict( 681 h=self.h, **kwargs 682 ) 683 684 else: 685 if self.h is None: 686 X_pred = pipe.predict( 687 h=X_test.shape[0], 688 **kwargs, 689 # X_pred = pipe.predict(h=X_test.shape[0], new_xreg=new_xreg) ## DO xreg like in `ahead` 690 ) 691 else: 692 assert ( 693 self.h > 0 and self.h <= X_test.shape[0] 694 ), "h must be > 0 and < X_test.shape[0]" 695 X_pred = pipe.predict(h=self.h, **kwargs) 696 697 if self.h is None: 698 if (self.replications is not None) or ( 699 self.type_pi == "gaussian" 700 ): 701 rmse = mean_errors( 702 actual=X_test, 703 pred=X_pred.mean, 704 scoring="root_mean_squared_error", 705 per_series=per_series, 706 ) 707 mae = mean_errors( 708 actual=X_test, 709 pred=X_pred.mean, 710 scoring="mean_absolute_error", 711 per_series=per_series, 712 ) 713 mpl = mean_errors( 714 actual=X_test, 715 pred=X_pred.mean, 716 scoring="mean_pinball_loss", 717 per_series=per_series, 718 ) 719 winklerscore = winkler_score( 720 obj=X_pred, 721 actual=X_test, 722 level=95, 723 per_series=per_series, 724 ) 725 coveragecalc = coverage( 726 X_pred, X_test, level=95, per_series=per_series 727 ) 728 else: # no prediction interval 729 rmse = mean_errors( 730 actual=X_test, 731 pred=X_pred, 732 scoring="root_mean_squared_error", 733 per_series=per_series, 734 ) 735 mae = mean_errors( 736 actual=X_test, 737 pred=X_pred, 738 scoring="mean_absolute_error", 739 per_series=per_series, 740 ) 741 mpl = mean_errors( 742 actual=X_test, 743 pred=X_pred, 744 scoring="mean_pinball_loss", 745 per_series=per_series, 746 ) 747 else: # self.h is not None 748 if (self.replications is not None) or ( 749 self.type_pi == "gaussian" 750 ): 751 if isinstance(X_test, pd.DataFrame): 752 X_test_h = X_test.iloc[0: self.h, :] 753 rmse = mean_errors( 754 actual=X_test_h, 755 pred=X_pred, 756 scoring="root_mean_squared_error", 757 per_series=per_series, 758 ) 759 mae = mean_errors( 760 actual=X_test_h, 761 pred=X_pred, 762 scoring="mean_absolute_error", 763 per_series=per_series, 764 ) 765 mpl = mean_errors( 766 actual=X_test_h, 767 pred=X_pred, 768 scoring="mean_pinball_loss", 769 per_series=per_series, 770 ) 771 winklerscore = winkler_score( 772 obj=X_pred, 773 actual=X_test_h, 774 level=95, 775 per_series=per_series, 776 ) 777 coveragecalc = coverage( 778 X_pred, 779 X_test_h, 780 level=95, 781 per_series=per_series, 782 ) 783 else: 784 X_test_h = X_test[0: self.h, :] 785 rmse = mean_errors( 786 actual=X_test_h, 787 pred=X_pred, 788 scoring="root_mean_squared_error", 789 per_series=per_series, 790 ) 791 mae = mean_errors( 792 actual=X_test_h, 793 pred=X_pred, 794 scoring="mean_absolute_error", 795 per_series=per_series, 796 ) 797 mpl = mean_errors( 798 actual=X_test_h, 799 pred=X_pred, 800 scoring="mean_pinball_loss", 801 per_series=per_series, 802 ) 803 winklerscore = winkler_score( 804 obj=X_pred, 805 actual=X_test_h, 806 level=95, 807 per_series=per_series, 808 ) 809 coveragecalc = coverage( 810 X_pred, 811 X_test_h, 812 level=95, 813 per_series=per_series, 814 ) 815 else: # no prediction interval 816 if isinstance(X_test, pd.DataFrame): 817 X_test_h = X_test.iloc[0: self.h, :] 818 rmse = mean_errors( 819 actual=X_test_h, 820 pred=X_pred, 821 scoring="root_mean_squared_error", 822 per_series=per_series, 823 ) 824 mae = mean_errors( 825 actual=X_test_h, 826 pred=X_pred, 827 scoring="mean_absolute_error", 828 per_series=per_series, 829 ) 830 mpl = mean_errors( 831 actual=X_test_h, 832 pred=X_pred, 833 scoring="mean_pinball_loss", 834 per_series=per_series, 835 ) 836 else: 837 X_test_h = X_test[0: self.h, :] 838 rmse = mean_errors( 839 actual=X_test_h, 840 pred=X_pred, 841 scoring="root_mean_squared_error", 842 per_series=per_series, 843 ) 844 mae = mean_errors( 845 actual=X_test_h, 846 pred=X_pred, 847 scoring="mean_absolute_error", 848 per_series=per_series, 849 ) 850 851 names.append(name) 852 RMSE.append(rmse) 853 MAE.append(mae) 854 MPL.append(mpl) 855 if (self.replications is not None) or ( 856 self.type_pi == "gaussian" 857 ): 858 WINKLERSCORE.append(winklerscore) 859 COVERAGE.append(coveragecalc) 860 TIME.append(time.time() - start) 861 862 if self.custom_metric is not None: 863 try: 864 if self.h is None: 865 custom_metric = self.custom_metric( 866 X_test, X_pred 867 ) 868 else: 869 custom_metric = self.custom_metric( 870 X_test_h, X_pred 871 ) 872 CUSTOM_METRIC.append(custom_metric) 873 except Exception as e: 874 custom_metric = np.iinfo(np.float32).max 875 CUSTOM_METRIC.append(np.iinfo(np.float32).max) 876 877 if self.verbose > 0: 878 if (self.replications is not None) or ( 879 self.type_pi == "gaussian" 880 ): 881 scores_verbose = { 882 "Model": name, 883 "RMSE": rmse, 884 "MAE": mae, 885 "MPL": mpl, 886 "WINKLERSCORE": winklerscore, 887 "COVERAGE": coveragecalc, 888 "Time taken": time.time() - start, 889 } 890 else: 891 scores_verbose = { 892 "Model": name, 893 "RMSE": rmse, 894 "MAE": mae, 895 "MPL": mpl, 896 "Time taken": time.time() - start, 897 } 898 899 if self.custom_metric is not None: 900 scores_verbose["Custom metric"] = custom_metric 901 902 if self.predictions: 903 predictions[name] = X_pred 904 905 except Exception as exception: 906 if self.ignore_warnings is False: 907 print(name + " model failed to execute") 908 print(exception) 909 910 if (self.replications is not None) or (self.type_pi == "gaussian"): 911 scores = { 912 "Model": names, 913 "RMSE": RMSE, 914 "MAE": MAE, 915 "MPL": MPL, 916 "WINKLERSCORE": WINKLERSCORE, 917 "COVERAGE": COVERAGE, 918 "Time Taken": TIME, 919 } 920 else: 921 scores = { 922 "Model": names, 923 "RMSE": RMSE, 924 "MAE": MAE, 925 "MPL": MPL, 926 "Time Taken": TIME, 927 } 928 929 if self.custom_metric is not None: 930 scores["Custom metric"] = CUSTOM_METRIC 931 932 if per_series: 933 scores = dict_to_dataframe_series(scores, self.series_names) 934 else: 935 scores = pd.DataFrame(scores) 936 937 try: # case per_series, can't be sorted 938 scores = scores.sort_values( 939 by=self.sort_by, ascending=True 940 ).set_index("Model") 941 942 self.best_model_ = self.models_[scores.index[0]] 943 except Exception as e: 944 pass 945 946 if self.predictions is True: 947 return scores, predictions 948 949 return scores 950 951 def get_best_model(self): 952 """ 953 This function returns the best model pipeline based on the sort_by metric. 954 955 Returns: 956 957 best_model: object, 958 Returns the best model pipeline based on the sort_by metric. 959 960 """ 961 return self.best_model_ 962 963 def provide_models(self, X_train, X_test): 964 """ 965 This function returns all the model objects trained in fit function. 966 If fit is not called already, then we call fit and then return the models. 967 968 Parameters: 969 970 X_train : array-like, 971 Training vectors, where rows is the number of samples 972 and columns is the number of features. 973 974 X_test : array-like, 975 Testing vectors, where rows is the number of samples 976 and columns is the number of features. 977 978 Returns: 979 980 models: dict-object, 981 Returns a dictionary with each model pipeline as value 982 with key as name of models. 983 984 """ 985 if self.h is None: 986 if len(self.models_.keys()) == 0: 987 self.fit(X_train, X_test) 988 else: 989 if len(self.models_.keys()) == 0: 990 if isinstance(X_test, pd.DataFrame): 991 self.fit(X_train, X_test.iloc[0: self.h, :]) 992 else: 993 self.fit(X_train, X_test[0: self.h, :]) 994 995 return self.models_
Fitting -- almost -- all the regression algorithms with layers of nnetsauce's CustomRegressor to multivariate time series and returning their scores.
Parameters:
verbose: int, optional (default=0)
Any positive number for verbosity.
ignore_warnings: bool, optional (default=True)
When set to True, the warning related to algorigms that are not
able to run are ignored.
custom_metric: function, optional (default=None)
When function is provided, models are evaluated based on the custom
evaluation metric provided.
predictions: bool, optional (default=False)
When set to True, the predictions of all the models models are returned as dataframe.
sort_by: string, optional (default='RMSE')
Sort models by a metric. Available options are 'RMSE', 'MAE', 'MPL', 'MPE', 'MAPE',
'R-Squared', 'Adjusted R-Squared' or a custom metric identified by its name and
provided by custom_metric.
random_state: int, optional (default=42)
Reproducibiility seed.
estimators: list, optional (default='all')
list of Estimators (regression algorithms) names or just 'all' (default='all')
preprocess: bool, preprocessing is done when set to True
n_layers: int, optional (default=1)
Number of layers in the network. When set to 1, the model is equivalent to a MTS.
h: int, optional (default=None)
Number of steps ahead to predict (when used, must be > 0 and < X_test.shape[0]).
All the other parameters are the same as MTS's.
Attributes:
models_: dict-object
Returns a dictionary with each model pipeline as value
with key as name of models.
best_model_: object
Returns the best model pipeline based on the sort_by metric.
Examples:
See https://thierrymoudiki.github.io/blog/2023/10/29/python/quasirandomizednn/MTS-LazyPredict
241 def fit(self, X_train, X_test, xreg=None, per_series=False, **kwargs): 242 """Fit Regression algorithms to X_train, predict and score on X_test. 243 244 Parameters: 245 246 X_train: array-like or data frame, 247 Training vectors, where rows is the number of samples 248 and columns is the number of features. 249 250 X_test: array-like or data frame, 251 Testing vectors, where rows is the number of samples 252 and columns is the number of features. 253 254 xreg: array-like, optional (default=None) 255 Additional (external) regressors to be passed to self.obj 256 xreg must be in 'increasing' order (most recent observations last) 257 258 per_series: bool, optional (default=False) 259 When set to True, the metrics are computed series by series. 260 261 **kwargs: dict, optional (default=None) 262 Additional parameters to be passed to `fit` method of `obj`. 263 264 Returns: 265 266 scores: Pandas DataFrame 267 Returns metrics of all the models in a Pandas DataFrame. 268 269 predictions: Pandas DataFrame 270 Returns predictions of all the models in a Pandas DataFrame. 271 272 """ 273 R2 = [] 274 ADJR2 = [] 275 ME = [] 276 MPL = [] 277 RMSE = [] 278 MAE = [] 279 MPE = [] 280 MAPE = [] 281 WINKLERSCORE = [] 282 COVERAGE = [] 283 284 # WIN = [] 285 names = [] 286 TIME = [] 287 predictions = {} 288 289 if self.custom_metric is not None: 290 CUSTOM_METRIC = [] 291 292 if self.h is None: 293 assert X_test is not None, "If h is None, X_test must be provided." 294 295 if isinstance(X_train, np.ndarray): 296 X_train = pd.DataFrame(X_train) 297 X_test = pd.DataFrame(X_test) 298 299 self.series_names = X_train.columns.tolist() 300 301 X_train = convert_df_to_numeric(X_train) 302 X_test = convert_df_to_numeric(X_test) 303 304 numeric_features = X_train.select_dtypes(include=[np.number]).columns 305 categorical_features = X_train.select_dtypes(include=["object"]).columns 306 307 categorical_low, categorical_high = get_card_split( 308 X_train, categorical_features 309 ) 310 311 if self.preprocess: 312 preprocessor = ColumnTransformer( 313 transformers=[ 314 ("numeric", numeric_transformer, numeric_features), 315 ( 316 "categorical_low", 317 categorical_transformer_low, 318 categorical_low, 319 ), 320 ( 321 "categorical_high", 322 categorical_transformer_high, 323 categorical_high, 324 ), 325 ] 326 ) 327 328 # baselines (Classical MTS) ---- 329 for i, name in enumerate(["ARIMA", "ETS", "Theta", "VAR", "VECM"]): 330 try: 331 start = time.time() 332 regr = ClassicalMTS(model=name) 333 regr.fit(X_train, **kwargs) 334 self.models_[name] = regr 335 if self.h is None: 336 X_pred = regr.predict(h=X_test.shape[0], **kwargs) 337 else: 338 assert self.h > 0, "h must be > 0" 339 X_pred = regr.predict(h=self.h, **kwargs) 340 try: 341 X_test = X_test[0: self.h, :] 342 except Exception as e: 343 X_test = X_test.iloc[0: self.h, :] 344 345 rmse = mean_errors( 346 actual=X_test, 347 pred=X_pred, 348 scoring="root_mean_squared_error", 349 per_series=per_series, 350 ) 351 mae = mean_errors( 352 actual=X_test, 353 pred=X_pred, 354 scoring="mean_absolute_error", 355 per_series=per_series, 356 ) 357 mpl = mean_errors( 358 actual=X_test, 359 pred=X_pred, 360 scoring="mean_pinball_loss", 361 per_series=per_series, 362 ) 363 except Exception: 364 continue 365 366 names.append(name) 367 RMSE.append(rmse) 368 MAE.append(mae) 369 MPL.append(mpl) 370 371 if self.custom_metric is not None: 372 try: 373 if self.h is None: 374 custom_metric = self.custom_metric(X_test, X_pred) 375 else: 376 custom_metric = self.custom_metric(X_test_h, X_pred) 377 CUSTOM_METRIC.append(custom_metric) 378 except Exception as e: 379 custom_metric = np.iinfo(np.float32).max 380 CUSTOM_METRIC.append(np.iinfo(np.float32).max) 381 382 if (self.replications is not None) or (self.type_pi == "gaussian"): 383 if per_series == False: 384 winklerscore = winkler_score( 385 obj=X_pred, actual=X_test, level=95 386 ) 387 coveragecalc = coverage(X_pred, X_test, level=95) 388 else: 389 winklerscore = winkler_score( 390 obj=X_pred, actual=X_test, level=95, per_series=True 391 ) 392 coveragecalc = coverage( 393 X_pred, X_test, level=95, per_series=True 394 ) 395 WINKLERSCORE.append(winklerscore) 396 COVERAGE.append(coveragecalc) 397 TIME.append(time.time() - start) 398 399 if self.estimators == "all": 400 if self.n_layers <= 1: 401 self.regressors = REGRESSORSMTS 402 else: 403 self.regressors = DEEPREGRESSORSMTS 404 else: 405 if self.n_layers <= 1: 406 self.regressors = [ 407 ("MTS(" + est[0] + ")", est[1]) 408 for est in all_estimators() 409 if ( 410 issubclass(est[1], RegressorMixin) 411 and (est[0] in self.estimators) 412 ) 413 ] 414 else: # self.n_layers > 1 415 self.regressors = [ 416 ("DeepMTS(" + est[0] + ")", est[1]) 417 for est in all_estimators() 418 if ( 419 issubclass(est[1], RegressorMixin) 420 and (est[0] in self.estimators) 421 ) 422 ] 423 424 if self.preprocess is True: 425 for name, model in tqdm(self.regressors): # do parallel exec 426 start = time.time() 427 try: 428 if "random_state" in model().get_params().keys(): 429 pipe = Pipeline( 430 steps=[ 431 ("preprocessor", preprocessor), 432 ( 433 "regressor", 434 DeepMTS( 435 obj=model( 436 random_state=self.random_state, 437 **kwargs, 438 ), 439 n_layers=self.n_layers, 440 n_hidden_features=self.n_hidden_features, 441 activation_name=self.activation_name, 442 a=self.a, 443 nodes_sim=self.nodes_sim, 444 bias=self.bias, 445 dropout=self.dropout, 446 direct_link=self.direct_link, 447 n_clusters=self.n_clusters, 448 cluster_encode=self.cluster_encode, 449 type_clust=self.type_clust, 450 type_scaling=self.type_scaling, 451 lags=self.lags, 452 type_pi=self.type_pi, 453 block_size=self.block_size, 454 replications=self.replications, 455 kernel=self.kernel, 456 agg=self.agg, 457 seed=self.seed, 458 backend=self.backend, 459 show_progress=self.show_progress, 460 ), 461 ), 462 ] 463 ) 464 else: # "random_state" in model().get_params().keys() 465 pipe = Pipeline( 466 steps=[ 467 ("preprocessor", preprocessor), 468 ( 469 "regressor", 470 DeepMTS( 471 obj=model(**kwargs), 472 n_layers=self.n_layers, 473 n_hidden_features=self.n_hidden_features, 474 activation_name=self.activation_name, 475 a=self.a, 476 nodes_sim=self.nodes_sim, 477 bias=self.bias, 478 dropout=self.dropout, 479 direct_link=self.direct_link, 480 n_clusters=self.n_clusters, 481 cluster_encode=self.cluster_encode, 482 type_clust=self.type_clust, 483 type_scaling=self.type_scaling, 484 lags=self.lags, 485 type_pi=self.type_pi, 486 block_size=self.block_size, 487 replications=self.replications, 488 kernel=self.kernel, 489 agg=self.agg, 490 seed=self.seed, 491 backend=self.backend, 492 show_progress=self.show_progress, 493 ), 494 ), 495 ] 496 ) 497 498 pipe.fit(X_train, **kwargs) 499 # pipe.fit(X_train, xreg=xreg) 500 501 self.models_[name] = pipe 502 503 if self.h is None: 504 X_pred = pipe["regressor"].predict(h=self.h, **kwargs) 505 else: 506 assert self.h > 0, "h must be > 0" 507 X_pred = pipe["regressor"].predict(h=self.h, **kwargs) 508 509 if (self.replications is not None) or ( 510 self.type_pi == "gaussian" 511 ): 512 rmse = mean_errors( 513 actual=X_test, 514 pred=X_pred, 515 scoring="root_mean_squared_error", 516 per_series=per_series, 517 ) 518 mae = mean_errors( 519 actual=X_test, 520 pred=X_pred, 521 scoring="mean_absolute_error", 522 per_series=per_series, 523 ) 524 mpl = mean_errors( 525 actual=X_test, 526 pred=X_pred, 527 scoring="mean_pinball_loss", 528 per_series=per_series, 529 ) 530 winklerscore = winkler_score( 531 obj=X_pred, 532 actual=X_test, 533 level=95, 534 per_series=per_series, 535 ) 536 coveragecalc = coverage( 537 X_pred, X_test, level=95, per_series=per_series 538 ) 539 else: 540 rmse = mean_errors( 541 actual=X_test, 542 pred=X_pred, 543 scoring="root_mean_squared_error", 544 per_series=per_series, 545 ) 546 mae = mean_errors( 547 actual=X_test, 548 pred=X_pred, 549 scoring="mean_absolute_error", 550 per_series=per_series, 551 ) 552 mpl = mean_errors( 553 actual=X_test, 554 pred=X_pred, 555 scoring="mean_pinball_loss", 556 per_series=per_series, 557 ) 558 559 names.append(name) 560 RMSE.append(rmse) 561 MAE.append(mae) 562 MPL.append(mpl) 563 564 if (self.replications is not None) or ( 565 self.type_pi == "gaussian" 566 ): 567 WINKLERSCORE.append(winklerscore) 568 COVERAGE.append(coveragecalc) 569 TIME.append(time.time() - start) 570 571 if self.custom_metric is not None: 572 try: 573 custom_metric = self.custom_metric(X_test, X_pred) 574 CUSTOM_METRIC.append(custom_metric) 575 except Exception as e: 576 custom_metric = np.iinfo(np.float32).max 577 CUSTOM_METRIC.append(custom_metric) 578 579 if self.verbose > 0: 580 if (self.replications is not None) or ( 581 self.type_pi == "gaussian" 582 ): 583 scores_verbose = { 584 "Model": name, 585 "RMSE": rmse, 586 "MAE": mae, 587 "MPL": mpl, 588 "WINKLERSCORE": winklerscore, 589 "COVERAGE": coveragecalc, 590 "Time taken": time.time() - start, 591 } 592 else: 593 scores_verbose = { 594 "Model": name, 595 "RMSE": rmse, 596 "MAE": mae, 597 "MPL": mpl, 598 "Time taken": time.time() - start, 599 } 600 601 if self.custom_metric is not None: 602 scores_verbose["Custom metric"] = custom_metric 603 604 if self.predictions: 605 predictions[name] = X_pred 606 except Exception as exception: 607 if self.ignore_warnings is False: 608 print(name + " model failed to execute") 609 print(exception) 610 611 else: # no preprocessing 612 for name, model in tqdm(self.regressors): # do parallel exec 613 start = time.time() 614 try: 615 if "random_state" in model().get_params().keys(): 616 pipe = DeepMTS( 617 obj=model(random_state=self.random_state, **kwargs), 618 n_layers=self.n_layers, 619 n_hidden_features=self.n_hidden_features, 620 activation_name=self.activation_name, 621 a=self.a, 622 nodes_sim=self.nodes_sim, 623 bias=self.bias, 624 dropout=self.dropout, 625 direct_link=self.direct_link, 626 n_clusters=self.n_clusters, 627 cluster_encode=self.cluster_encode, 628 type_clust=self.type_clust, 629 type_scaling=self.type_scaling, 630 lags=self.lags, 631 type_pi=self.type_pi, 632 block_size=self.block_size, 633 replications=self.replications, 634 kernel=self.kernel, 635 agg=self.agg, 636 seed=self.seed, 637 backend=self.backend, 638 show_progress=self.show_progress, 639 ) 640 else: 641 pipe = DeepMTS( 642 obj=model(**kwargs), 643 n_layers=self.n_layers, 644 n_hidden_features=self.n_hidden_features, 645 activation_name=self.activation_name, 646 a=self.a, 647 nodes_sim=self.nodes_sim, 648 bias=self.bias, 649 dropout=self.dropout, 650 direct_link=self.direct_link, 651 n_clusters=self.n_clusters, 652 cluster_encode=self.cluster_encode, 653 type_clust=self.type_clust, 654 type_scaling=self.type_scaling, 655 lags=self.lags, 656 type_pi=self.type_pi, 657 block_size=self.block_size, 658 replications=self.replications, 659 kernel=self.kernel, 660 agg=self.agg, 661 seed=self.seed, 662 backend=self.backend, 663 show_progress=self.show_progress, 664 ) 665 666 pipe.fit(X_train, xreg, **kwargs) 667 # pipe.fit(X_train, xreg=xreg) # DO xreg like in `ahead` 668 669 self.models_[name] = pipe 670 671 if self.preprocess is True: 672 if self.h is None: 673 X_pred = pipe["regressor"].predict( 674 h=X_test.shape[0], **kwargs 675 ) 676 else: 677 assert ( 678 self.h > 0 and self.h <= X_test.shape[0] 679 ), "h must be > 0 and < X_test.shape[0]" 680 X_pred = pipe["regressor"].predict( 681 h=self.h, **kwargs 682 ) 683 684 else: 685 if self.h is None: 686 X_pred = pipe.predict( 687 h=X_test.shape[0], 688 **kwargs, 689 # X_pred = pipe.predict(h=X_test.shape[0], new_xreg=new_xreg) ## DO xreg like in `ahead` 690 ) 691 else: 692 assert ( 693 self.h > 0 and self.h <= X_test.shape[0] 694 ), "h must be > 0 and < X_test.shape[0]" 695 X_pred = pipe.predict(h=self.h, **kwargs) 696 697 if self.h is None: 698 if (self.replications is not None) or ( 699 self.type_pi == "gaussian" 700 ): 701 rmse = mean_errors( 702 actual=X_test, 703 pred=X_pred.mean, 704 scoring="root_mean_squared_error", 705 per_series=per_series, 706 ) 707 mae = mean_errors( 708 actual=X_test, 709 pred=X_pred.mean, 710 scoring="mean_absolute_error", 711 per_series=per_series, 712 ) 713 mpl = mean_errors( 714 actual=X_test, 715 pred=X_pred.mean, 716 scoring="mean_pinball_loss", 717 per_series=per_series, 718 ) 719 winklerscore = winkler_score( 720 obj=X_pred, 721 actual=X_test, 722 level=95, 723 per_series=per_series, 724 ) 725 coveragecalc = coverage( 726 X_pred, X_test, level=95, per_series=per_series 727 ) 728 else: # no prediction interval 729 rmse = mean_errors( 730 actual=X_test, 731 pred=X_pred, 732 scoring="root_mean_squared_error", 733 per_series=per_series, 734 ) 735 mae = mean_errors( 736 actual=X_test, 737 pred=X_pred, 738 scoring="mean_absolute_error", 739 per_series=per_series, 740 ) 741 mpl = mean_errors( 742 actual=X_test, 743 pred=X_pred, 744 scoring="mean_pinball_loss", 745 per_series=per_series, 746 ) 747 else: # self.h is not None 748 if (self.replications is not None) or ( 749 self.type_pi == "gaussian" 750 ): 751 if isinstance(X_test, pd.DataFrame): 752 X_test_h = X_test.iloc[0: self.h, :] 753 rmse = mean_errors( 754 actual=X_test_h, 755 pred=X_pred, 756 scoring="root_mean_squared_error", 757 per_series=per_series, 758 ) 759 mae = mean_errors( 760 actual=X_test_h, 761 pred=X_pred, 762 scoring="mean_absolute_error", 763 per_series=per_series, 764 ) 765 mpl = mean_errors( 766 actual=X_test_h, 767 pred=X_pred, 768 scoring="mean_pinball_loss", 769 per_series=per_series, 770 ) 771 winklerscore = winkler_score( 772 obj=X_pred, 773 actual=X_test_h, 774 level=95, 775 per_series=per_series, 776 ) 777 coveragecalc = coverage( 778 X_pred, 779 X_test_h, 780 level=95, 781 per_series=per_series, 782 ) 783 else: 784 X_test_h = X_test[0: self.h, :] 785 rmse = mean_errors( 786 actual=X_test_h, 787 pred=X_pred, 788 scoring="root_mean_squared_error", 789 per_series=per_series, 790 ) 791 mae = mean_errors( 792 actual=X_test_h, 793 pred=X_pred, 794 scoring="mean_absolute_error", 795 per_series=per_series, 796 ) 797 mpl = mean_errors( 798 actual=X_test_h, 799 pred=X_pred, 800 scoring="mean_pinball_loss", 801 per_series=per_series, 802 ) 803 winklerscore = winkler_score( 804 obj=X_pred, 805 actual=X_test_h, 806 level=95, 807 per_series=per_series, 808 ) 809 coveragecalc = coverage( 810 X_pred, 811 X_test_h, 812 level=95, 813 per_series=per_series, 814 ) 815 else: # no prediction interval 816 if isinstance(X_test, pd.DataFrame): 817 X_test_h = X_test.iloc[0: self.h, :] 818 rmse = mean_errors( 819 actual=X_test_h, 820 pred=X_pred, 821 scoring="root_mean_squared_error", 822 per_series=per_series, 823 ) 824 mae = mean_errors( 825 actual=X_test_h, 826 pred=X_pred, 827 scoring="mean_absolute_error", 828 per_series=per_series, 829 ) 830 mpl = mean_errors( 831 actual=X_test_h, 832 pred=X_pred, 833 scoring="mean_pinball_loss", 834 per_series=per_series, 835 ) 836 else: 837 X_test_h = X_test[0: self.h, :] 838 rmse = mean_errors( 839 actual=X_test_h, 840 pred=X_pred, 841 scoring="root_mean_squared_error", 842 per_series=per_series, 843 ) 844 mae = mean_errors( 845 actual=X_test_h, 846 pred=X_pred, 847 scoring="mean_absolute_error", 848 per_series=per_series, 849 ) 850 851 names.append(name) 852 RMSE.append(rmse) 853 MAE.append(mae) 854 MPL.append(mpl) 855 if (self.replications is not None) or ( 856 self.type_pi == "gaussian" 857 ): 858 WINKLERSCORE.append(winklerscore) 859 COVERAGE.append(coveragecalc) 860 TIME.append(time.time() - start) 861 862 if self.custom_metric is not None: 863 try: 864 if self.h is None: 865 custom_metric = self.custom_metric( 866 X_test, X_pred 867 ) 868 else: 869 custom_metric = self.custom_metric( 870 X_test_h, X_pred 871 ) 872 CUSTOM_METRIC.append(custom_metric) 873 except Exception as e: 874 custom_metric = np.iinfo(np.float32).max 875 CUSTOM_METRIC.append(np.iinfo(np.float32).max) 876 877 if self.verbose > 0: 878 if (self.replications is not None) or ( 879 self.type_pi == "gaussian" 880 ): 881 scores_verbose = { 882 "Model": name, 883 "RMSE": rmse, 884 "MAE": mae, 885 "MPL": mpl, 886 "WINKLERSCORE": winklerscore, 887 "COVERAGE": coveragecalc, 888 "Time taken": time.time() - start, 889 } 890 else: 891 scores_verbose = { 892 "Model": name, 893 "RMSE": rmse, 894 "MAE": mae, 895 "MPL": mpl, 896 "Time taken": time.time() - start, 897 } 898 899 if self.custom_metric is not None: 900 scores_verbose["Custom metric"] = custom_metric 901 902 if self.predictions: 903 predictions[name] = X_pred 904 905 except Exception as exception: 906 if self.ignore_warnings is False: 907 print(name + " model failed to execute") 908 print(exception) 909 910 if (self.replications is not None) or (self.type_pi == "gaussian"): 911 scores = { 912 "Model": names, 913 "RMSE": RMSE, 914 "MAE": MAE, 915 "MPL": MPL, 916 "WINKLERSCORE": WINKLERSCORE, 917 "COVERAGE": COVERAGE, 918 "Time Taken": TIME, 919 } 920 else: 921 scores = { 922 "Model": names, 923 "RMSE": RMSE, 924 "MAE": MAE, 925 "MPL": MPL, 926 "Time Taken": TIME, 927 } 928 929 if self.custom_metric is not None: 930 scores["Custom metric"] = CUSTOM_METRIC 931 932 if per_series: 933 scores = dict_to_dataframe_series(scores, self.series_names) 934 else: 935 scores = pd.DataFrame(scores) 936 937 try: # case per_series, can't be sorted 938 scores = scores.sort_values( 939 by=self.sort_by, ascending=True 940 ).set_index("Model") 941 942 self.best_model_ = self.models_[scores.index[0]] 943 except Exception as e: 944 pass 945 946 if self.predictions is True: 947 return scores, predictions 948 949 return scores
Fit Regression algorithms to X_train, predict and score on X_test.
Parameters:
X_train: array-like or data frame,
Training vectors, where rows is the number of samples
and columns is the number of features.
X_test: array-like or data frame,
Testing vectors, where rows is the number of samples
and columns is the number of features.
xreg: array-like, optional (default=None)
Additional (external) regressors to be passed to self.obj
xreg must be in 'increasing' order (most recent observations last)
per_series: bool, optional (default=False)
When set to True, the metrics are computed series by series.
**kwargs: dict, optional (default=None)
Additional parameters to be passed to `fit` method of `obj`.
Returns:
scores: Pandas DataFrame
Returns metrics of all the models in a Pandas DataFrame.
predictions: Pandas DataFrame
Returns predictions of all the models in a Pandas DataFrame.
963 def provide_models(self, X_train, X_test): 964 """ 965 This function returns all the model objects trained in fit function. 966 If fit is not called already, then we call fit and then return the models. 967 968 Parameters: 969 970 X_train : array-like, 971 Training vectors, where rows is the number of samples 972 and columns is the number of features. 973 974 X_test : array-like, 975 Testing vectors, where rows is the number of samples 976 and columns is the number of features. 977 978 Returns: 979 980 models: dict-object, 981 Returns a dictionary with each model pipeline as value 982 with key as name of models. 983 984 """ 985 if self.h is None: 986 if len(self.models_.keys()) == 0: 987 self.fit(X_train, X_test) 988 else: 989 if len(self.models_.keys()) == 0: 990 if isinstance(X_test, pd.DataFrame): 991 self.fit(X_train, X_test.iloc[0: self.h, :]) 992 else: 993 self.fit(X_train, X_test[0: self.h, :]) 994 995 return self.models_
This function returns all the model objects trained in fit function. If fit is not called already, then we call fit and then return the models.
Parameters:
X_train : array-like,
Training vectors, where rows is the number of samples
and columns is the number of features.
X_test : array-like,
Testing vectors, where rows is the number of samples
and columns is the number of features.
Returns:
models: dict-object,
Returns a dictionary with each model pipeline as value
with key as name of models.
10class MLARCH: 11 """Machine Learning-agnostic ARCH for nearly-stationary time series (e.g., returns) 12 13 Parameters 14 ---------- 15 model_mean : object 16 Model for mean component 17 model_sigma : object 18 Model for volatility component (sklearn regressor) 19 model_residuals : object 20 Model for standardized residuals 21 lags_vol : int, default=10 22 Number of lags for squared residuals in volatility model 23 """ 24 25 def __init__(self, model_mean, model_sigma, model_residuals, lags_vol=10): 26 self.model_mean = model_mean 27 self.model_sigma = model_sigma 28 self.model_residuals = model_residuals 29 self.lags_vol = lags_vol 30 31 def _create_lags(self, y, lags): 32 """Create lagged feature matrix""" 33 n = len(y) 34 if n <= lags: 35 raise ValueError(f"Series length {n} must be > lags {lags}") 36 X = np.zeros((n - lags, lags)) 37 for i in range(lags): 38 X[:, i] = y[i: (n - lags + i)] 39 return X 40 41 def fit(self, y, **kwargs): 42 """Fit the MLARCH model 43 44 Parameters 45 ---------- 46 y : array-like 47 Target time series (should be stationary, e.g., returns) 48 49 Returns 50 ------- 51 self 52 """ 53 # Format input 54 if isinstance(y, (pd.Series, pd.DataFrame)): 55 y = y.values 56 y = y.ravel() 57 58 if len(y) < self.lags_vol + 20: 59 raise ValueError(f"Need at least {self.lags_vol + 20} observations") 60 61 # Step 1: Fit mean model 62 self.model_mean.fit(y.reshape(-1, 1)) 63 mean_residuals = self.model_mean.residuals_.ravel() 64 65 # Step 2: Fit ARCH volatility model on lagged squared residuals 66 resid_squared = mean_residuals**2 67 X_vol = self._create_lags(resid_squared, self.lags_vol) 68 y_vol = np.log(resid_squared[self.lags_vol:] + 1e-8) 69 70 self.model_sigma.fit(X_vol, y_vol) 71 72 # Get fitted volatility 73 fitted_log_sigma = self.model_sigma.predict(X_vol) 74 fitted_sigma = np.exp(fitted_log_sigma) 75 76 # Step 3: Compute standardized residuals with proper scaling 77 standardized_residuals = mean_residuals[self.lags_vol:] / np.sqrt( 78 fitted_sigma 79 ) 80 81 # Enforce zero mean and unit variance 82 self.z_mean_ = np.mean(standardized_residuals) 83 self.z_std_ = np.std(standardized_residuals) 84 standardized_residuals = ( 85 standardized_residuals - self.z_mean_ 86 ) / self.z_std_ 87 88 # Step 4: Fit residuals model 89 self.model_residuals.fit(standardized_residuals.reshape(-1, 1)) 90 91 # Store for prediction 92 self.last_residuals_squared_ = resid_squared[-self.lags_vol:] 93 94 # Store diagnostics 95 self.fitted_volatility_mean_ = np.mean(np.sqrt(fitted_sigma)) 96 self.fitted_volatility_std_ = np.std(np.sqrt(fitted_sigma)) 97 98 return self 99 100 def predict(self, h=5, level=95, return_sims=False): 101 """Predict future values 102 103 Parameters 104 ---------- 105 h : int 106 Forecast horizon 107 level : int 108 Confidence level for prediction intervals 109 return_sims : bool 110 If True, return full simulation paths 111 112 Returns 113 ------- 114 DescribeResult 115 Named tuple with mean, sims, lower, upper 116 """ 117 DescribeResult = namedtuple( 118 "DescribeResult", ("mean", "sims", "lower", "upper") 119 ) 120 121 # Get mean forecast 122 mean_forecast = self.model_mean.predict(h=h).values.ravel() 123 124 # Recursive ARCH volatility forecasting 125 sigma_forecast = np.zeros(h) 126 current_lags = self.last_residuals_squared_.copy() 127 128 for i in range(h): 129 X_t = current_lags.reshape(1, -1) 130 log_sigma_t = self.model_sigma.predict(X_t)[0] 131 sigma_forecast[i] = np.exp(log_sigma_t) 132 # Update lags with predicted variance 133 current_lags = np.append(current_lags[1:], sigma_forecast[i]) 134 135 # Predict standardized residuals and rescale 136 z_forecast_normalized = self.model_residuals.predict(h=h).values.ravel() 137 z_forecast = z_forecast_normalized * self.z_std_ + self.z_mean_ 138 139 # Combine: μ + z × σ 140 point_forecast = mean_forecast + z_forecast * np.sqrt(sigma_forecast) 141 142 # Generate prediction intervals 143 sims = None 144 if return_sims: 145 preds_z_for_sims = self.model_residuals.predict(h=h) 146 if hasattr(preds_z_for_sims, "sims") and isinstance( 147 preds_z_for_sims.sims, pd.DataFrame 148 ): 149 sims_z_normalized = preds_z_for_sims.sims 150 n_sims = sims_z_normalized.shape[1] 151 152 sims = np.zeros((h, n_sims)) 153 for sim_idx in range(n_sims): 154 # Rescale simulations 155 z_sim = ( 156 sims_z_normalized.iloc[:, sim_idx].values * self.z_std_ 157 + self.z_mean_ 158 ) 159 sims[:, sim_idx] = mean_forecast + z_sim * np.sqrt( 160 sigma_forecast 161 ) 162 163 alpha = 1 - level / 100 164 lower_bound = np.quantile(sims, alpha / 2, axis=1) 165 upper_bound = np.quantile(sims, 1 - alpha / 2, axis=1) 166 else: 167 # Fallback to Gaussian 168 z_score = norm.ppf(1 - (1 - level / 100) / 2) 169 margin = z_score * np.sqrt(sigma_forecast) * self.z_std_ 170 lower_bound = point_forecast - margin 171 upper_bound = point_forecast + margin 172 else: 173 # Gaussian intervals with proper scaling 174 z_score = norm.ppf(1 - (1 - level / 100) / 2) 175 margin = z_score * np.sqrt(sigma_forecast) * self.z_std_ 176 lower_bound = point_forecast - margin 177 upper_bound = point_forecast + margin 178 179 return DescribeResult(point_forecast, sims, lower_bound, upper_bound)
Machine Learning-agnostic ARCH for nearly-stationary time series (e.g., returns)
Parameters
model_mean : object Model for mean component model_sigma : object Model for volatility component (sklearn regressor) model_residuals : object Model for standardized residuals lags_vol : int, default=10 Number of lags for squared residuals in volatility model
41 def fit(self, y, **kwargs): 42 """Fit the MLARCH model 43 44 Parameters 45 ---------- 46 y : array-like 47 Target time series (should be stationary, e.g., returns) 48 49 Returns 50 ------- 51 self 52 """ 53 # Format input 54 if isinstance(y, (pd.Series, pd.DataFrame)): 55 y = y.values 56 y = y.ravel() 57 58 if len(y) < self.lags_vol + 20: 59 raise ValueError(f"Need at least {self.lags_vol + 20} observations") 60 61 # Step 1: Fit mean model 62 self.model_mean.fit(y.reshape(-1, 1)) 63 mean_residuals = self.model_mean.residuals_.ravel() 64 65 # Step 2: Fit ARCH volatility model on lagged squared residuals 66 resid_squared = mean_residuals**2 67 X_vol = self._create_lags(resid_squared, self.lags_vol) 68 y_vol = np.log(resid_squared[self.lags_vol:] + 1e-8) 69 70 self.model_sigma.fit(X_vol, y_vol) 71 72 # Get fitted volatility 73 fitted_log_sigma = self.model_sigma.predict(X_vol) 74 fitted_sigma = np.exp(fitted_log_sigma) 75 76 # Step 3: Compute standardized residuals with proper scaling 77 standardized_residuals = mean_residuals[self.lags_vol:] / np.sqrt( 78 fitted_sigma 79 ) 80 81 # Enforce zero mean and unit variance 82 self.z_mean_ = np.mean(standardized_residuals) 83 self.z_std_ = np.std(standardized_residuals) 84 standardized_residuals = ( 85 standardized_residuals - self.z_mean_ 86 ) / self.z_std_ 87 88 # Step 4: Fit residuals model 89 self.model_residuals.fit(standardized_residuals.reshape(-1, 1)) 90 91 # Store for prediction 92 self.last_residuals_squared_ = resid_squared[-self.lags_vol:] 93 94 # Store diagnostics 95 self.fitted_volatility_mean_ = np.mean(np.sqrt(fitted_sigma)) 96 self.fitted_volatility_std_ = np.std(np.sqrt(fitted_sigma)) 97 98 return self
Fit the MLARCH model
Parameters
y : array-like Target time series (should be stationary, e.g., returns)
Returns
self
100 def predict(self, h=5, level=95, return_sims=False): 101 """Predict future values 102 103 Parameters 104 ---------- 105 h : int 106 Forecast horizon 107 level : int 108 Confidence level for prediction intervals 109 return_sims : bool 110 If True, return full simulation paths 111 112 Returns 113 ------- 114 DescribeResult 115 Named tuple with mean, sims, lower, upper 116 """ 117 DescribeResult = namedtuple( 118 "DescribeResult", ("mean", "sims", "lower", "upper") 119 ) 120 121 # Get mean forecast 122 mean_forecast = self.model_mean.predict(h=h).values.ravel() 123 124 # Recursive ARCH volatility forecasting 125 sigma_forecast = np.zeros(h) 126 current_lags = self.last_residuals_squared_.copy() 127 128 for i in range(h): 129 X_t = current_lags.reshape(1, -1) 130 log_sigma_t = self.model_sigma.predict(X_t)[0] 131 sigma_forecast[i] = np.exp(log_sigma_t) 132 # Update lags with predicted variance 133 current_lags = np.append(current_lags[1:], sigma_forecast[i]) 134 135 # Predict standardized residuals and rescale 136 z_forecast_normalized = self.model_residuals.predict(h=h).values.ravel() 137 z_forecast = z_forecast_normalized * self.z_std_ + self.z_mean_ 138 139 # Combine: μ + z × σ 140 point_forecast = mean_forecast + z_forecast * np.sqrt(sigma_forecast) 141 142 # Generate prediction intervals 143 sims = None 144 if return_sims: 145 preds_z_for_sims = self.model_residuals.predict(h=h) 146 if hasattr(preds_z_for_sims, "sims") and isinstance( 147 preds_z_for_sims.sims, pd.DataFrame 148 ): 149 sims_z_normalized = preds_z_for_sims.sims 150 n_sims = sims_z_normalized.shape[1] 151 152 sims = np.zeros((h, n_sims)) 153 for sim_idx in range(n_sims): 154 # Rescale simulations 155 z_sim = ( 156 sims_z_normalized.iloc[:, sim_idx].values * self.z_std_ 157 + self.z_mean_ 158 ) 159 sims[:, sim_idx] = mean_forecast + z_sim * np.sqrt( 160 sigma_forecast 161 ) 162 163 alpha = 1 - level / 100 164 lower_bound = np.quantile(sims, alpha / 2, axis=1) 165 upper_bound = np.quantile(sims, 1 - alpha / 2, axis=1) 166 else: 167 # Fallback to Gaussian 168 z_score = norm.ppf(1 - (1 - level / 100) / 2) 169 margin = z_score * np.sqrt(sigma_forecast) * self.z_std_ 170 lower_bound = point_forecast - margin 171 upper_bound = point_forecast + margin 172 else: 173 # Gaussian intervals with proper scaling 174 z_score = norm.ppf(1 - (1 - level / 100) / 2) 175 margin = z_score * np.sqrt(sigma_forecast) * self.z_std_ 176 lower_bound = point_forecast - margin 177 upper_bound = point_forecast + margin 178 179 return DescribeResult(point_forecast, sims, lower_bound, upper_bound)
Predict future values
Parameters
h : int Forecast horizon level : int Confidence level for prediction intervals return_sims : bool If True, return full simulation paths
Returns
DescribeResult Named tuple with mean, sims, lower, upper
6class MedianVotingRegressor(VotingRegressor): 7 def predict(self, X): 8 """ 9 Predict using the median of the base regressors' predictions. 10 11 Parameters: 12 X (array-like): Feature matrix for predictions. 13 14 Returns: 15 y_pred (array): Median of predictions from the base regressors. 16 """ 17 predictions = np.asarray( 18 [regressor.predict(X) for regressor in self.estimators_] 19 ) 20 return np.median(predictions, axis=0)
Prediction voting regressor for unfitted estimators.
A voting regressor is an ensemble meta-estimator that fits several base regressors, each on the whole dataset. Then it averages the individual predictions to form a final prediction.
Read more in the :ref:User Guide <voting_regressor>.
New in version 0.21.
Parameters
estimators : list of (str, estimator) tuples
Invoking the fit method on the VotingRegressor will fit clones
of those original estimators that will be stored in the class attribute
self.estimators_. An estimator can be set to 'drop' using
set_params().
*Changed in version 0.21:*
``'drop'`` is accepted. Using None was deprecated in 0.22 and
support was removed in 0.24.
weights : array-like of shape (n_regressors,), default=None
Sequence of weights (float or int) to weight the occurrences of
predicted values before averaging. Uses uniform weights if None.
n_jobs : int, default=None
The number of jobs to run in parallel for fit.
None means 1 unless in a joblib.parallel_backend context.
-1 means using all processors. See :term:Glossary <n_jobs>
for more details.
verbose : bool, default=False If True, the time elapsed while fitting will be printed as it is completed.
*New in version 0.23.*
Attributes
estimators_ : list of regressors
The collection of fitted sub-estimators as defined in estimators
that are not 'drop'.
named_estimators_ : ~sklearn.utils.Bunch
Attribute to access any fitted sub-estimators by name.
*New in version 0.20.*
n_features_in_ : int
Number of features seen during :term:fit. Only defined if the
underlying regressor exposes such an attribute when fit.
*New in version 0.24.*
feature_names_in_ : ndarray of shape (n_features_in_,)
Names of features seen during :term:fit. Only defined if the
underlying estimators expose such an attribute when fit.
*New in version 1.0.*
See Also
VotingClassifier : Soft Voting/Majority Rule classifier.
Examples
>>> import numpy as np
>>> from sklearn.linear_model import LinearRegression
>>> from sklearn.ensemble import RandomForestRegressor
>>> from sklearn.ensemble import VotingRegressor
>>> from sklearn.neighbors import KNeighborsRegressor
>>> r1 = LinearRegression()
>>> r2 = RandomForestRegressor(n_estimators=10, random_state=1)
>>> r3 = KNeighborsRegressor()
>>> X = np.array([[1, 1], [2, 4], [3, 9], [4, 16], [5, 25], [6, 36]])
>>> y = np.array([2, 6, 12, 20, 30, 42])
>>> er = VotingRegressor([('lr', r1), ('rf', r2), ('r3', r3)])
>>> print(er.fit(X, y).predict(X))
[ 6.8... 8.4... 12.5... 17.8... 26... 34...]
In the following example, we drop the 'lr' estimator with
~VotingRegressor.set_params() and fit the remaining two estimators:
>>> er = er.set_params(lr='drop')
>>> er = er.fit(X, y)
>>> len(er.estimators_)
2
7 def predict(self, X): 8 """ 9 Predict using the median of the base regressors' predictions. 10 11 Parameters: 12 X (array-like): Feature matrix for predictions. 13 14 Returns: 15 y_pred (array): Median of predictions from the base regressors. 16 """ 17 predictions = np.asarray( 18 [regressor.predict(X) for regressor in self.estimators_] 19 ) 20 return np.median(predictions, axis=0)
Predict using the median of the base regressors' predictions.
Parameters: X (array-like): Feature matrix for predictions.
Returns: y_pred (array): Median of predictions from the base regressors.
31class MTS(Base): 32 """Univariate and multivariate time series (MTS) forecasting with Quasi-Randomized networks 33 34 Parameters: 35 36 obj: object. 37 any object containing a method fit (obj.fit()) and a method predict 38 (obj.predict()). 39 40 n_hidden_features: int. 41 number of nodes in the hidden layer. 42 43 activation_name: str. 44 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'. 45 46 a: float. 47 hyperparameter for 'prelu' or 'elu' activation function. 48 49 nodes_sim: str. 50 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 51 'uniform'. 52 53 bias: boolean. 54 indicates if the hidden layer contains a bias term (True) or not 55 (False). 56 57 dropout: float. 58 regularization parameter; (random) percentage of nodes dropped out 59 of the training. 60 61 direct_link: boolean. 62 indicates if the original predictors are included (True) in model's fitting or not (False). 63 64 n_clusters: int. 65 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering). 66 67 cluster_encode: bool. 68 defines how the variable containing clusters is treated (default is one-hot) 69 if `False`, then labels are used, without one-hot encoding. 70 71 type_clust: str. 72 type of clustering method: currently k-means ('kmeans') or Gaussian 73 Mixture Model ('gmm'). 74 75 type_scaling: a tuple of 3 strings. 76 scaling methods for inputs, hidden layer, and clustering respectively 77 (and when relevant). 78 Currently available: standardization ('std') or MinMax scaling ('minmax'). 79 80 lags: int. 81 number of lags used for each time series. 82 If string, lags must be one of 'AIC', 'AICc', or 'BIC'. 83 84 type_pi: str. 85 type of prediction interval; currently: 86 - "gaussian": simple, fast, but: assumes stationarity of Gaussian in-sample residuals and independence in the multivariate case 87 - "quantile": use model-agnostic quantile regression under the hood 88 - "kde": based on Kernel Density Estimation of in-sample residuals 89 - "bootstrap": based on independent bootstrap of in-sample residuals 90 - "block-bootstrap": based on basic block bootstrap of in-sample residuals 91 - "scp-kde": Sequential split conformal prediction with Kernel Density Estimation of calibrated residuals 92 - "scp-bootstrap": Sequential split conformal prediction with independent bootstrap of calibrated residuals 93 - "scp-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of calibrated residuals 94 - "scp2-kde": Sequential split conformal prediction with Kernel Density Estimation of standardized calibrated residuals 95 - "scp2-bootstrap": Sequential split conformal prediction with independent bootstrap of standardized calibrated residuals 96 - "scp2-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of standardized calibrated residuals 97 - based on copulas of in-sample residuals: 'vine-tll', 'vine-bb1', 'vine-bb6', 'vine-bb7', 'vine-bb8', 'vine-clayton', 98 'vine-frank', 'vine-gaussian', 'vine-gumbel', 'vine-indep', 'vine-joe', 'vine-student' 99 - 'scp-vine-tll', 'scp-vine-bb1', 'scp-vine-bb6', 'scp-vine-bb7', 'scp-vine-bb8', 'scp-vine-clayton', 100 'scp-vine-frank', 'scp-vine-gaussian', 'scp-vine-gumbel', 'scp-vine-indep', 'scp-vine-joe', 'scp-vine-student' 101 - 'scp2-vine-tll', 'scp2-vine-bb1', 'scp2-vine-bb6', 'scp2-vine-bb7', 'scp2-vine-bb8', 'scp2-vine-clayton', 102 'scp2-vine-frank', 'scp2-vine-gaussian', 'scp2-vine-gumbel', 'scp2-vine-indep', 'scp2-vine-joe', 'scp2-vine-student' 103 104 level: int. 105 level of confidence for `type_pi == 'quantile'` (default is `95`) 106 107 block_size: int. 108 size of block for 'type_pi' in ("block-bootstrap", "scp-block-bootstrap", "scp2-block-bootstrap"). 109 Default is round(3.15*(n_residuals^1/3)) 110 111 replications: int. 112 number of replications (if needed, for predictive simulation). Default is 'None'. 113 114 kernel: str. 115 the kernel to use for residuals density estimation (used for predictive simulation). Currently, either 'gaussian' or 'tophat'. 116 117 agg: str. 118 either "mean" or "median" for simulation of bootstrap aggregating 119 120 seed: int. 121 reproducibility seed for nodes_sim=='uniform' or predictive simulation. 122 123 backend: str. 124 "cpu" or "gpu" or "tpu". 125 126 verbose: int. 127 0: not printing; 1: printing 128 129 show_progress: bool. 130 True: progress bar when fitting each series; False: no progress bar when fitting each series 131 132 Attributes: 133 134 fit_objs_: dict 135 objects adjusted to each individual time series 136 137 y_: {array-like} 138 MTS responses (most recent observations first) 139 140 X_: {array-like} 141 MTS lags 142 143 xreg_: {array-like} 144 external regressors 145 146 y_means_: dict 147 a dictionary of each series mean values 148 149 preds_: {array-like} 150 successive model predictions 151 152 preds_std_: {array-like} 153 standard deviation around the predictions for Bayesian base learners (`obj`) 154 155 gaussian_preds_std_: {array-like} 156 standard deviation around the predictions for `type_pi='gaussian'` 157 158 return_std_: boolean 159 return uncertainty or not (set in predict) 160 161 df_: data frame 162 the input data frame, in case a data.frame is provided to `fit` 163 164 n_obs_: int 165 number of time series observations (number of rows for multivariate) 166 167 level_: int 168 level of confidence for prediction intervals (default is 95) 169 170 residuals_: {array-like} 171 in-sample residuals (for `type_pi` not conformal prediction) or calibrated residuals 172 (for `type_pi` in conformal prediction) 173 174 residuals_sims_: tuple of {array-like} 175 simulations of in-sample residuals (for `type_pi` not conformal prediction) or 176 calibrated residuals (for `type_pi` in conformal prediction) 177 178 kde_: A scikit-learn object, see https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KernelDensity.html 179 180 residuals_std_dev_: residuals standard deviation 181 182 Examples: 183 184 Example 1: 185 186 ```python 187 import nnetsauce as ns 188 import numpy as np 189 from sklearn import linear_model 190 np.random.seed(123) 191 192 M = np.random.rand(10, 3) 193 M[:,0] = 10*M[:,0] 194 M[:,2] = 25*M[:,2] 195 print(M) 196 197 # Adjust Bayesian Ridge 198 regr4 = linear_model.BayesianRidge() 199 obj_MTS = ns.MTS(regr4, lags = 1, n_hidden_features=5) 200 obj_MTS.fit(M) 201 print(obj_MTS.predict()) 202 203 # with credible intervals 204 print(obj_MTS.predict(return_std=True, level=80)) 205 206 print(obj_MTS.predict(return_std=True, level=95)) 207 ``` 208 209 Example 2: 210 211 ```python 212 import nnetsauce as ns 213 import numpy as np 214 from sklearn import linear_model 215 216 dataset = { 217 'date' : ['2001-01-01', '2002-01-01', '2003-01-01', '2004-01-01', '2005-01-01'], 218 'series1' : [34, 30, 35.6, 33.3, 38.1], 219 'series2' : [4, 5.5, 5.6, 6.3, 5.1], 220 'series3' : [100, 100.5, 100.6, 100.2, 100.1]} 221 df = pd.DataFrame(dataset).set_index('date') 222 print(df) 223 224 # Adjust Bayesian Ridge 225 regr5 = linear_model.BayesianRidge() 226 obj_MTS = ns.MTS(regr5, lags = 1, n_hidden_features=5) 227 obj_MTS.fit(df) 228 print(obj_MTS.predict()) 229 230 # with credible intervals 231 print(obj_MTS.predict(return_std=True, level=80)) 232 233 print(obj_MTS.predict(return_std=True, level=95)) 234 ``` 235 """ 236 237 # construct the object ----- 238 239 def __init__( 240 self, 241 obj, 242 n_hidden_features=5, 243 activation_name="relu", 244 a=0.01, 245 nodes_sim="sobol", 246 bias=True, 247 dropout=0, 248 direct_link=True, 249 n_clusters=2, 250 cluster_encode=True, 251 type_clust="kmeans", 252 type_scaling=("std", "std", "std"), 253 lags=1, 254 type_pi="kde", 255 level=95, 256 block_size=None, 257 replications=None, 258 kernel="gaussian", 259 agg="mean", 260 seed=123, 261 backend="cpu", 262 verbose=0, 263 show_progress=True, 264 ): 265 super().__init__( 266 n_hidden_features=n_hidden_features, 267 activation_name=activation_name, 268 a=a, 269 nodes_sim=nodes_sim, 270 bias=bias, 271 dropout=dropout, 272 direct_link=direct_link, 273 n_clusters=n_clusters, 274 cluster_encode=cluster_encode, 275 type_clust=type_clust, 276 type_scaling=type_scaling, 277 seed=seed, 278 backend=backend, 279 ) 280 281 # Add validation for lags parameter 282 if isinstance(lags, str): 283 assert lags in ( 284 "AIC", 285 "AICc", 286 "BIC", 287 ), "if string, lags must be one of 'AIC', 'AICc', or 'BIC'" 288 else: 289 assert ( 290 int(lags) == lags 291 ), "if numeric, lags parameter should be an integer" 292 293 self.obj = obj 294 self.n_series = None 295 self.lags = lags 296 self.type_pi = type_pi 297 self.level = level 298 if self.type_pi == "quantile": 299 self.obj = QuantileRegressor( 300 self.obj, level=self.level, scoring="conformal" 301 ) 302 self.block_size = block_size 303 self.replications = replications 304 self.kernel = kernel 305 self.agg = agg 306 self.verbose = verbose 307 self.show_progress = show_progress 308 self.series_names = ["series0"] 309 self.input_dates = None 310 self.quantiles = None 311 self.fit_objs_ = {} 312 self.y_ = None # MTS responses (most recent observations first) 313 self.X_ = None # MTS lags 314 self.xreg_ = None 315 self.y_means_ = {} 316 self.mean_ = None 317 self.median_ = None 318 self.upper_ = None 319 self.lower_ = None 320 self.output_dates_ = None 321 self.preds_std_ = [] 322 self.gaussian_preds_std_ = None 323 self.alpha_ = None 324 self.return_std_ = None 325 self.df_ = None 326 self.residuals_ = [] 327 self.abs_calib_residuals_ = None 328 self.calib_residuals_quantile_ = None 329 self.residuals_sims_ = None 330 self.kde_ = None 331 self.sims_ = None 332 self.residuals_std_dev_ = None 333 self.n_obs_ = None 334 self.level_ = None 335 self.init_n_series_ = None 336 337 def fit(self, X, xreg=None, **kwargs): 338 """Fit MTS model to training data X, with optional regressors xreg 339 340 Parameters: 341 342 X: {array-like}, shape = [n_samples, n_features] 343 Training time series, where n_samples is the number 344 of samples and n_features is the number of features; 345 X must be in increasing order (most recent observations last) 346 347 xreg: {array-like}, shape = [n_samples, n_features_xreg] 348 Additional (external) regressors to be passed to self.obj 349 xreg must be in 'increasing' order (most recent observations last) 350 351 **kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity) 352 353 Returns: 354 355 self: object 356 """ 357 try: 358 self.init_n_series_ = X.shape[1] 359 except IndexError as e: 360 self.init_n_series_ = 1 361 362 # Automatic lag selection if requested 363 if isinstance(self.lags, str): 364 max_lags = min(25, X.shape[0] // 4) 365 best_ic = float("inf") 366 best_lags = 1 367 368 if self.verbose: 369 print( 370 f"\nSelecting optimal number of lags using {self.lags}..." 371 ) 372 iterator = tqdm(range(1, max_lags + 1)) 373 else: 374 iterator = range(1, max_lags + 1) 375 376 for lag in iterator: 377 # Convert DataFrame to numpy array before reversing 378 if isinstance(X, pd.DataFrame): 379 X_values = X.values[::-1] 380 else: 381 X_values = X[::-1] 382 383 # Try current lag value 384 if self.init_n_series_ > 1: 385 mts_input = ts.create_train_inputs(X_values, lag) 386 else: 387 mts_input = ts.create_train_inputs( 388 X_values.reshape(-1, 1), lag 389 ) 390 391 # Cook training set and fit model 392 dummy_y, scaled_Z = self.cook_training_set( 393 y=np.ones(mts_input[0].shape[0]), X=mts_input[1] 394 ) 395 residuals_ = [] 396 397 for i in range(self.init_n_series_): 398 y_mean = np.mean(mts_input[0][:, i]) 399 centered_y_i = mts_input[0][:, i] - y_mean 400 self.obj.fit(X=scaled_Z, y=centered_y_i) 401 residuals_.append( 402 (centered_y_i - self.obj.predict(scaled_Z)).tolist() 403 ) 404 405 self.residuals_ = np.asarray(residuals_).T 406 ic = self._compute_information_criterion( 407 curr_lags=lag, criterion=self.lags 408 ) 409 410 if self.verbose: 411 print(f"Trying lags={lag}, {self.lags}={ic:.2f}") 412 413 if ic < best_ic: 414 best_ic = ic 415 best_lags = lag 416 417 if self.verbose: 418 print( 419 f"\nSelected {best_lags} lags with {self.lags}={best_ic:.2f}" 420 ) 421 422 self.lags = best_lags 423 424 self.input_dates = None 425 self.df_ = None 426 427 if isinstance(X, pd.DataFrame) is False: 428 # input data set is a numpy array 429 if xreg is None: 430 X = pd.DataFrame(X) 431 self.series_names = [ 432 "series" + str(i) for i in range(X.shape[1]) 433 ] 434 else: 435 # xreg is not None 436 X = mo.cbind(X, xreg) 437 self.xreg_ = xreg 438 439 else: # input data set is a DataFrame with column names 440 X_index = None 441 if X.index is not None: 442 X_index = X.index 443 if xreg is None: 444 X = copy.deepcopy(mo.convert_df_to_numeric(X)) 445 else: 446 X = copy.deepcopy(mo.cbind(mo.convert_df_to_numeric(X), xreg)) 447 self.xreg_ = xreg 448 if X_index is not None: 449 X.index = X_index 450 self.series_names = X.columns.tolist() 451 452 if isinstance(X, pd.DataFrame): 453 if self.df_ is None: 454 self.df_ = X 455 X = X.values 456 else: 457 input_dates_prev = pd.DatetimeIndex(self.df_.index.values) 458 frequency = pd.infer_freq(input_dates_prev) 459 self.df_ = pd.concat([self.df_, X], axis=0) 460 self.input_dates = pd.date_range( 461 start=input_dates_prev[0], 462 periods=len(input_dates_prev) + X.shape[0], 463 freq=frequency, 464 ).values.tolist() 465 self.df_.index = self.input_dates 466 X = self.df_.values 467 self.df_.columns = self.series_names 468 else: 469 if self.df_ is None: 470 self.df_ = pd.DataFrame(X, columns=self.series_names) 471 else: 472 self.df_ = pd.concat( 473 [self.df_, pd.DataFrame(X, columns=self.series_names)], 474 axis=0, 475 ) 476 477 self.input_dates = ts.compute_input_dates(self.df_) 478 479 try: 480 # multivariate time series 481 n, p = X.shape 482 except: 483 # univariate time series 484 n = X.shape[0] 485 p = 1 486 self.n_obs_ = n 487 488 rep_1_n = np.repeat(1, n) 489 490 self.y_ = None 491 self.X_ = None 492 self.n_series = p 493 self.fit_objs_.clear() 494 self.y_means_.clear() 495 residuals_ = [] 496 self.residuals_ = None 497 self.residuals_sims_ = None 498 self.kde_ = None 499 self.sims_ = None 500 self.scaled_Z_ = None 501 self.centered_y_is_ = [] 502 503 if self.init_n_series_ > 1: 504 # multivariate time series 505 mts_input = ts.create_train_inputs(X[::-1], self.lags) 506 else: 507 # univariate time series 508 mts_input = ts.create_train_inputs( 509 X.reshape(-1, 1)[::-1], self.lags 510 ) 511 512 self.y_ = mts_input[0] 513 514 self.X_ = mts_input[1] 515 516 dummy_y, scaled_Z = self.cook_training_set(y=rep_1_n, X=self.X_) 517 518 self.scaled_Z_ = scaled_Z 519 520 # loop on all the time series and adjust self.obj.fit 521 if self.verbose > 0: 522 print( 523 f"\n Adjusting {type(self.obj).__name__} to multivariate time series... \n" 524 ) 525 526 if self.show_progress is True: 527 iterator = tqdm(range(self.init_n_series_)) 528 else: 529 iterator = range(self.init_n_series_) 530 531 if self.type_pi in ( 532 "gaussian", 533 "kde", 534 "bootstrap", 535 "block-bootstrap", 536 ) or self.type_pi.startswith("vine"): 537 for i in iterator: 538 y_mean = np.mean(self.y_[:, i]) 539 self.y_means_[i] = y_mean 540 centered_y_i = self.y_[:, i] - y_mean 541 self.centered_y_is_.append(centered_y_i) 542 self.obj.fit(X=scaled_Z, y=centered_y_i) 543 self.fit_objs_[i] = deepcopy(self.obj) 544 residuals_.append( 545 ( 546 centered_y_i - self.fit_objs_[i].predict(scaled_Z) 547 ).tolist() 548 ) 549 550 if self.type_pi == "quantile": 551 for i in iterator: 552 y_mean = np.mean(self.y_[:, i]) 553 self.y_means_[i] = y_mean 554 centered_y_i = self.y_[:, i] - y_mean 555 self.centered_y_is_.append(centered_y_i) 556 self.obj.fit(X=scaled_Z, y=centered_y_i) 557 self.fit_objs_[i] = deepcopy(self.obj) 558 559 if self.type_pi.startswith("scp"): 560 # split conformal prediction 561 for i in iterator: 562 n_y = self.y_.shape[0] 563 n_y_half = n_y // 2 564 first_half_idx = range(0, n_y_half) 565 second_half_idx = range(n_y_half, n_y) 566 y_mean_temp = np.mean(self.y_[first_half_idx, i]) 567 centered_y_i_temp = self.y_[first_half_idx, i] - y_mean_temp 568 self.obj.fit(X=scaled_Z[first_half_idx, :], y=centered_y_i_temp) 569 # calibrated residuals actually 570 residuals_.append( 571 ( 572 self.y_[second_half_idx, i] 573 - ( 574 y_mean_temp 575 + self.obj.predict(scaled_Z[second_half_idx, :]) 576 ) 577 ).tolist() 578 ) 579 # fit on the second half 580 y_mean = np.mean(self.y_[second_half_idx, i]) 581 self.y_means_[i] = y_mean 582 centered_y_i = self.y_[second_half_idx, i] - y_mean 583 self.obj.fit(X=scaled_Z[second_half_idx, :], y=centered_y_i) 584 self.fit_objs_[i] = deepcopy(self.obj) 585 586 self.residuals_ = np.asarray(residuals_).T 587 588 if self.type_pi == "gaussian": 589 self.gaussian_preds_std_ = np.std(self.residuals_, axis=0) 590 591 if self.type_pi.startswith("scp2"): 592 # Calculate mean and standard deviation for each column 593 data_mean = np.mean(self.residuals_, axis=0) 594 self.residuals_std_dev_ = np.std(self.residuals_, axis=0) 595 # Center and scale the array using broadcasting 596 self.residuals_ = ( 597 self.residuals_ - data_mean[np.newaxis, :] 598 ) / self.residuals_std_dev_[np.newaxis, :] 599 600 if self.replications != None and "kde" in self.type_pi: 601 if self.verbose > 0: 602 print(f"\n Simulate residuals using {self.kernel} kernel... \n") 603 assert self.kernel in ( 604 "gaussian", 605 "tophat", 606 ), "currently, 'kernel' must be either 'gaussian' or 'tophat'" 607 kernel_bandwidths = {"bandwidth": np.logspace(-6, 6, 150)} 608 grid = GridSearchCV( 609 KernelDensity(kernel=self.kernel, **kwargs), 610 param_grid=kernel_bandwidths, 611 ) 612 grid.fit(self.residuals_) 613 614 if self.verbose > 0: 615 print( 616 f"\n Best parameters for {self.kernel} kernel: {grid.best_params_} \n" 617 ) 618 619 self.kde_ = grid.best_estimator_ 620 621 return self 622 623 def partial_fit(self, X, xreg=None, **kwargs): 624 """partial_fit MTS model to training data X, with optional regressors xreg 625 626 Parameters: 627 628 X: {array-like}, shape = [n_samples, n_features] 629 Training time series, where n_samples is the number 630 of samples and n_features is the number of features; 631 X must be in increasing order (most recent observations last) 632 633 xreg: {array-like}, shape = [n_samples, n_features_xreg] 634 Additional (external) regressors to be passed to self.obj 635 xreg must be in 'increasing' order (most recent observations last) 636 637 **kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity) 638 639 Returns: 640 641 self: object 642 """ 643 try: 644 self.init_n_series_ = X.shape[1] 645 except IndexError as e: 646 self.init_n_series_ = 1 647 648 # Automatic lag selection if requested 649 if isinstance(self.lags, str): 650 max_lags = min(25, X.shape[0] // 4) 651 best_ic = float("inf") 652 best_lags = 1 653 654 if self.verbose: 655 print( 656 f"\nSelecting optimal number of lags using {self.lags}..." 657 ) 658 iterator = tqdm(range(1, max_lags + 1)) 659 else: 660 iterator = range(1, max_lags + 1) 661 662 for lag in iterator: 663 # Convert DataFrame to numpy array before reversing 664 if isinstance(X, pd.DataFrame): 665 X_values = X.values[::-1] 666 else: 667 X_values = X[::-1] 668 669 # Try current lag value 670 if self.init_n_series_ > 1: 671 mts_input = ts.create_train_inputs(X_values, lag) 672 else: 673 mts_input = ts.create_train_inputs( 674 X_values.reshape(-1, 1), lag 675 ) 676 677 # Cook training set and partial_fit model 678 dummy_y, scaled_Z = self.cook_training_set( 679 y=np.ones(mts_input[0].shape[0]), X=mts_input[1] 680 ) 681 residuals_ = [] 682 683 for i in range(self.init_n_series_): 684 y_mean = np.mean(mts_input[0][:, i]) 685 centered_y_i = mts_input[0][:, i] - y_mean 686 self.obj.partial_fit(X=scaled_Z, y=centered_y_i) 687 residuals_.append( 688 (centered_y_i - self.obj.predict(scaled_Z)).tolist() 689 ) 690 691 self.residuals_ = np.asarray(residuals_).T 692 ic = self._compute_information_criterion( 693 curr_lags=lag, criterion=self.lags 694 ) 695 696 if self.verbose: 697 print(f"Trying lags={lag}, {self.lags}={ic:.2f}") 698 699 if ic < best_ic: 700 best_ic = ic 701 best_lags = lag 702 703 if self.verbose: 704 print( 705 f"\nSelected {best_lags} lags with {self.lags}={best_ic:.2f}" 706 ) 707 708 self.lags = best_lags 709 710 self.input_dates = None 711 self.df_ = None 712 713 if isinstance(X, pd.DataFrame) is False: 714 # input data set is a numpy array 715 if xreg is None: 716 X = pd.DataFrame(X) 717 if len(X.shape) > 1: 718 self.series_names = [ 719 "series" + str(i) for i in range(X.shape[1]) 720 ] 721 else: 722 self.series_names = ["series0"] 723 else: 724 # xreg is not None 725 X = mo.cbind(X, xreg) 726 self.xreg_ = xreg 727 728 else: # input data set is a DataFrame with column names 729 X_index = None 730 if X.index is not None: 731 X_index = X.index 732 if xreg is None: 733 X = copy.deepcopy(mo.convert_df_to_numeric(X)) 734 else: 735 X = copy.deepcopy(mo.cbind(mo.convert_df_to_numeric(X), xreg)) 736 self.xreg_ = xreg 737 if X_index is not None: 738 X.index = X_index 739 self.series_names = X.columns.tolist() 740 741 if isinstance(X, pd.DataFrame): 742 if self.df_ is None: 743 self.df_ = X 744 X = X.values 745 else: 746 input_dates_prev = pd.DatetimeIndex(self.df_.index.values) 747 frequency = pd.infer_freq(input_dates_prev) 748 self.df_ = pd.concat([self.df_, X], axis=0) 749 self.input_dates = pd.date_range( 750 start=input_dates_prev[0], 751 periods=len(input_dates_prev) + X.shape[0], 752 freq=frequency, 753 ).values.tolist() 754 self.df_.index = self.input_dates 755 X = self.df_.values 756 self.df_.columns = self.series_names 757 else: 758 if self.df_ is None: 759 self.df_ = pd.DataFrame(X, columns=self.series_names) 760 else: 761 self.df_ = pd.concat( 762 [self.df_, pd.DataFrame(X, columns=self.series_names)], 763 axis=0, 764 ) 765 766 self.input_dates = ts.compute_input_dates(self.df_) 767 768 try: 769 # multivariate time series 770 n, p = X.shape 771 except: 772 # univariate time series 773 n = X.shape[0] 774 p = 1 775 self.n_obs_ = n 776 777 rep_1_n = np.repeat(1, n) 778 779 self.y_ = None 780 self.X_ = None 781 self.n_series = p 782 self.fit_objs_.clear() 783 self.y_means_.clear() 784 residuals_ = [] 785 self.residuals_ = None 786 self.residuals_sims_ = None 787 self.kde_ = None 788 self.sims_ = None 789 self.scaled_Z_ = None 790 self.centered_y_is_ = [] 791 792 if self.init_n_series_ > 1: 793 # multivariate time series 794 mts_input = ts.create_train_inputs(X[::-1], self.lags) 795 else: 796 # univariate time series 797 mts_input = ts.create_train_inputs( 798 X.reshape(-1, 1)[::-1], self.lags 799 ) 800 801 self.y_ = mts_input[0] 802 803 self.X_ = mts_input[1] 804 805 dummy_y, scaled_Z = self.cook_training_set(y=rep_1_n, X=self.X_) 806 807 self.scaled_Z_ = scaled_Z 808 809 # loop on all the time series and adjust self.obj.partial_fit 810 if self.verbose > 0: 811 print( 812 f"\n Adjusting {type(self.obj).__name__} to multivariate time series... \n" 813 ) 814 815 if self.show_progress is True: 816 iterator = tqdm(range(self.init_n_series_)) 817 else: 818 iterator = range(self.init_n_series_) 819 820 if self.type_pi in ( 821 "gaussian", 822 "kde", 823 "bootstrap", 824 "block-bootstrap", 825 ) or self.type_pi.startswith("vine"): 826 for i in iterator: 827 y_mean = np.mean(self.y_[:, i]) 828 self.y_means_[i] = y_mean 829 centered_y_i = self.y_[:, i] - y_mean 830 self.centered_y_is_.append(centered_y_i) 831 self.obj.partial_fit(X=scaled_Z, y=centered_y_i) 832 self.fit_objs_[i] = deepcopy(self.obj) 833 residuals_.append( 834 ( 835 centered_y_i - self.fit_objs_[i].predict(scaled_Z) 836 ).tolist() 837 ) 838 839 if self.type_pi == "quantile": 840 for i in iterator: 841 y_mean = np.mean(self.y_[:, i]) 842 self.y_means_[i] = y_mean 843 centered_y_i = self.y_[:, i] - y_mean 844 self.centered_y_is_.append(centered_y_i) 845 self.obj.partial_fit(X=scaled_Z, y=centered_y_i) 846 self.fit_objs_[i] = deepcopy(self.obj) 847 848 if self.type_pi.startswith("scp"): 849 # split conformal prediction 850 for i in iterator: 851 n_y = self.y_.shape[0] 852 n_y_half = n_y // 2 853 first_half_idx = range(0, n_y_half) 854 second_half_idx = range(n_y_half, n_y) 855 y_mean_temp = np.mean(self.y_[first_half_idx, i]) 856 centered_y_i_temp = self.y_[first_half_idx, i] - y_mean_temp 857 self.obj.partial_fit( 858 X=scaled_Z[first_half_idx, :], y=centered_y_i_temp 859 ) 860 # calibrated residuals actually 861 residuals_.append( 862 ( 863 self.y_[second_half_idx, i] 864 - ( 865 y_mean_temp 866 + self.obj.predict(scaled_Z[second_half_idx, :]) 867 ) 868 ).tolist() 869 ) 870 # partial_fit on the second half 871 y_mean = np.mean(self.y_[second_half_idx, i]) 872 self.y_means_[i] = y_mean 873 centered_y_i = self.y_[second_half_idx, i] - y_mean 874 self.obj.partial_fit( 875 X=scaled_Z[second_half_idx, :], y=centered_y_i 876 ) 877 self.fit_objs_[i] = deepcopy(self.obj) 878 879 self.residuals_ = np.asarray(residuals_).T 880 881 if self.type_pi == "gaussian": 882 self.gaussian_preds_std_ = np.std(self.residuals_, axis=0) 883 884 if self.type_pi.startswith("scp2"): 885 # Calculate mean and standard deviation for each column 886 data_mean = np.mean(self.residuals_, axis=0) 887 self.residuals_std_dev_ = np.std(self.residuals_, axis=0) 888 # Center and scale the array using broadcasting 889 self.residuals_ = ( 890 self.residuals_ - data_mean[np.newaxis, :] 891 ) / self.residuals_std_dev_[np.newaxis, :] 892 893 if self.replications != None and "kde" in self.type_pi: 894 if self.verbose > 0: 895 print(f"\n Simulate residuals using {self.kernel} kernel... \n") 896 assert self.kernel in ( 897 "gaussian", 898 "tophat", 899 ), "currently, 'kernel' must be either 'gaussian' or 'tophat'" 900 kernel_bandwidths = {"bandwidth": np.logspace(-6, 6, 150)} 901 grid = GridSearchCV( 902 KernelDensity(kernel=self.kernel, **kwargs), 903 param_grid=kernel_bandwidths, 904 ) 905 grid.fit(self.residuals_) 906 907 if self.verbose > 0: 908 print( 909 f"\n Best parameters for {self.kernel} kernel: {grid.best_params_} \n" 910 ) 911 912 self.kde_ = grid.best_estimator_ 913 914 return self 915 916 def _predict_quantiles(self, h, quantiles, **kwargs): 917 """Predict arbitrary quantiles from simulated paths.""" 918 # Ensure output dates are set 919 self.output_dates_, _ = ts.compute_output_dates(self.df_, h) 920 921 # Trigger full prediction to generate self.sims_ 922 if not hasattr(self, "sims_") or self.sims_ is None: 923 _ = self.predict(h=h, level=95, **kwargs) # Any level triggers sim 924 925 result_dict = {} 926 927 # Stack simulations: (R, h, n_series) 928 sims_array = np.stack([sim.values for sim in self.sims_], axis=0) 929 930 # Compute quantiles over replication axis 931 q_values = np.quantile( 932 sims_array, quantiles, axis=0 933 ) # (n_q, h, n_series) 934 935 for i, q in enumerate(quantiles): 936 # Clean label: 0.05 → "05", 0.1 → "10", 0.95 → "95" 937 q_label = ( 938 f"{int(q * 100):02d}" 939 if (q * 100).is_integer() 940 else f"{q:.3f}".replace(".", "_") 941 ) 942 for series_id in range(self.init_n_series_): 943 series_name = self.series_names[series_id] 944 col_name = f"quantile_{q_label}_{series_name}" 945 result_dict[col_name] = q_values[i, :, series_id] 946 947 df_return_quantiles = pd.DataFrame( 948 result_dict, index=self.output_dates_ 949 ) 950 951 return df_return_quantiles 952 953 def predict(self, h=5, level=95, quantiles=None, **kwargs): 954 """Forecast all the time series, h steps ahead""" 955 956 if quantiles is not None: 957 # Validate 958 quantiles = np.asarray(quantiles) 959 if not ((quantiles > 0) & (quantiles < 1)).all(): 960 raise ValueError("quantiles must be between 0 and 1.") 961 # Delegate to dedicated method 962 return self._predict_quantiles(h=h, quantiles=quantiles, **kwargs) 963 964 if isinstance(level, list) or isinstance(level, np.ndarray): 965 # Store results 966 result_dict = {} 967 # Loop through alphas and calculate lower/upper for each alpha level 968 # E.g [0.5, 2.5, 5, 16.5, 25, 50] 969 for lev in level: 970 # Get the forecast for this alpha 971 res = self.predict(h=h, level=lev, **kwargs) 972 # Adjust index and collect lower/upper bounds 973 res.lower.index = pd.to_datetime(res.lower.index) 974 res.upper.index = pd.to_datetime(res.upper.index) 975 # Loop over each time series (multivariate) and flatten results 976 if isinstance(res.lower, pd.DataFrame): 977 for ( 978 series 979 ) in ( 980 res.lower.columns 981 ): # Assumes 'lower' and 'upper' have multiple series 982 result_dict[f"lower_{lev}_{series}"] = ( 983 res.lower[series].to_numpy().flatten() 984 ) 985 result_dict[f"upper_{lev}_{series}"] = ( 986 res.upper[series].to_numpy().flatten() 987 ) 988 else: 989 for series_id in range( 990 self.n_series 991 ): # Assumes 'lower' and 'upper' have multiple series 992 result_dict[f"lower_{lev}_{series_id}"] = ( 993 res.lower[series_id, :].to_numpy().flatten() 994 ) 995 result_dict[f"upper_{lev}_{series_id}"] = ( 996 res.upper[series_id, :].to_numpy().flatten() 997 ) 998 return pd.DataFrame(result_dict, index=self.output_dates_) 999 1000 # only one prediction interval 1001 self.output_dates_, frequency = ts.compute_output_dates(self.df_, h) 1002 1003 self.level_ = level 1004 1005 self.return_std_ = False # do not remove (/!\) 1006 1007 self.mean_ = None # do not remove (/!\) 1008 1009 self.mean_ = deepcopy(self.y_) # do not remove (/!\) 1010 1011 self.lower_ = None # do not remove (/!\) 1012 1013 self.upper_ = None # do not remove (/!\) 1014 1015 self.sims_ = None # do not remove (/!\) 1016 1017 y_means_ = np.asarray( 1018 [self.y_means_[i] for i in range(self.init_n_series_)] 1019 ) 1020 1021 n_features = self.init_n_series_ * self.lags 1022 1023 self.alpha_ = 100 - level 1024 1025 pi_multiplier = norm.ppf(1 - self.alpha_ / 200) 1026 1027 if "return_std" in kwargs: # bayesian forecasting 1028 self.return_std_ = True 1029 self.preds_std_ = [] 1030 DescribeResult = namedtuple( 1031 "DescribeResult", ("mean", "lower", "upper") 1032 ) # to be updated 1033 1034 if "return_pi" in kwargs: # split conformal, without simulation 1035 mean_pi_ = [] 1036 lower_pi_ = [] 1037 upper_pi_ = [] 1038 median_pi_ = [] 1039 DescribeResult = namedtuple( 1040 "DescribeResult", ("mean", "lower", "upper") 1041 ) # to be updated 1042 1043 if self.kde_ != None and "kde" in self.type_pi: # kde 1044 target_cols = self.df_.columns[ 1045 : self.init_n_series_ 1046 ] # Get target column names 1047 if self.verbose == 1: 1048 self.residuals_sims_ = tuple( 1049 self.kde_.sample( 1050 n_samples=h, random_state=self.seed + 100 * i 1051 ) # Keep full sample 1052 for i in tqdm(range(self.replications)) 1053 ) 1054 elif self.verbose == 0: 1055 self.residuals_sims_ = tuple( 1056 self.kde_.sample( 1057 n_samples=h, random_state=self.seed + 100 * i 1058 ) # Keep full sample 1059 for i in range(self.replications) 1060 ) 1061 1062 # Convert to DataFrames after sampling 1063 self.residuals_sims_ = tuple( 1064 pd.DataFrame( 1065 sim, # Keep all columns 1066 columns=target_cols, # Use original target column names 1067 index=self.output_dates_, 1068 ) 1069 for sim in self.residuals_sims_ 1070 ) 1071 1072 if self.type_pi in ("bootstrap", "scp-bootstrap", "scp2-bootstrap"): 1073 assert self.replications is not None and isinstance( 1074 self.replications, int 1075 ), "'replications' must be provided and be an integer" 1076 if self.verbose == 1: 1077 self.residuals_sims_ = tuple( 1078 ts.bootstrap( 1079 self.residuals_, 1080 h=h, 1081 block_size=None, 1082 seed=self.seed + 100 * i, 1083 ) 1084 for i in tqdm(range(self.replications)) 1085 ) 1086 elif self.verbose == 0: 1087 self.residuals_sims_ = tuple( 1088 ts.bootstrap( 1089 self.residuals_, 1090 h=h, 1091 block_size=None, 1092 seed=self.seed + 100 * i, 1093 ) 1094 for i in range(self.replications) 1095 ) 1096 1097 if self.type_pi in ( 1098 "block-bootstrap", 1099 "scp-block-bootstrap", 1100 "scp2-block-bootstrap", 1101 ): 1102 if self.block_size is None: 1103 self.block_size = int( 1104 np.ceil(3.15 * (self.residuals_.shape[0] ** (1 / 3))) 1105 ) 1106 1107 assert self.replications is not None and isinstance( 1108 self.replications, int 1109 ), "'replications' must be provided and be an integer" 1110 if self.verbose == 1: 1111 self.residuals_sims_ = tuple( 1112 ts.bootstrap( 1113 self.residuals_, 1114 h=h, 1115 block_size=self.block_size, 1116 seed=self.seed + 100 * i, 1117 ) 1118 for i in tqdm(range(self.replications)) 1119 ) 1120 elif self.verbose == 0: 1121 self.residuals_sims_ = tuple( 1122 ts.bootstrap( 1123 self.residuals_, 1124 h=h, 1125 block_size=self.block_size, 1126 seed=self.seed + 100 * i, 1127 ) 1128 for i in range(self.replications) 1129 ) 1130 1131 if "vine" in self.type_pi: 1132 if self.verbose == 1: 1133 self.residuals_sims_ = tuple( 1134 vinecopula_sample( 1135 x=self.residuals_, 1136 n_samples=h, 1137 method=self.type_pi, 1138 random_state=self.seed + 100 * i, 1139 ) 1140 for i in tqdm(range(self.replications)) 1141 ) 1142 elif self.verbose == 0: 1143 self.residuals_sims_ = tuple( 1144 vinecopula_sample( 1145 x=self.residuals_, 1146 n_samples=h, 1147 method=self.type_pi, 1148 random_state=self.seed + 100 * i, 1149 ) 1150 for i in range(self.replications) 1151 ) 1152 1153 mean_ = deepcopy(self.mean_) 1154 1155 for i in range(h): 1156 new_obs = ts.reformat_response(mean_, self.lags) 1157 new_X = new_obs.reshape(1, -1) 1158 cooked_new_X = self.cook_test_set(new_X, **kwargs) 1159 1160 if "return_std" in kwargs: 1161 self.preds_std_.append( 1162 [ 1163 np.asarray( 1164 self.fit_objs_[i].predict( 1165 cooked_new_X, return_std=True 1166 )[1] 1167 ).item() 1168 for i in range(self.n_series) 1169 ] 1170 ) 1171 1172 if "return_pi" in kwargs: 1173 for i in range(self.n_series): 1174 preds_pi = self.fit_objs_[i].predict(cooked_new_X, **kwargs) 1175 mean_pi_.append(preds_pi.mean[0]) 1176 lower_pi_.append(preds_pi.lower[0]) 1177 upper_pi_.append(preds_pi.upper[0]) 1178 1179 if self.type_pi != "quantile": 1180 predicted_cooked_new_X = np.asarray( 1181 [ 1182 np.asarray( 1183 self.fit_objs_[i].predict(cooked_new_X) 1184 ).item() 1185 for i in range(self.init_n_series_) 1186 ] 1187 ) 1188 else: 1189 predicted_cooked_new_X = np.asarray( 1190 [ 1191 np.asarray( 1192 self.fit_objs_[i] 1193 .predict(cooked_new_X, return_pi=True) 1194 .upper 1195 ).item() 1196 for i in range(self.init_n_series_) 1197 ] 1198 ) 1199 1200 preds = np.asarray(y_means_ + predicted_cooked_new_X) 1201 1202 # Create full row with both predictions and external regressors 1203 if self.xreg_ is not None and "xreg" in kwargs: 1204 next_xreg = kwargs["xreg"].iloc[i: i + 1].values.flatten() 1205 full_row = np.concatenate([preds, next_xreg]) 1206 else: 1207 full_row = preds 1208 1209 # Create a new row with same number of columns as mean_ 1210 new_row = np.zeros((1, mean_.shape[1])) 1211 new_row[0, : full_row.shape[0]] = full_row 1212 1213 # Maintain the full dimensionality by using vstack instead of rbind 1214 mean_ = np.vstack([new_row, mean_[:-1]]) 1215 1216 # Final output should only include the target columns 1217 self.mean_ = pd.DataFrame( 1218 mean_[0: min(h, self.n_obs_ - self.lags), : self.init_n_series_][ 1219 ::-1 1220 ], 1221 columns=self.df_.columns[: self.init_n_series_], 1222 index=self.output_dates_, 1223 ) 1224 1225 # function's return ---------------------------------------------------------------------- 1226 if ( 1227 (("return_std" not in kwargs) and ("return_pi" not in kwargs)) 1228 and (self.type_pi not in ("gaussian", "scp")) 1229 ) or ("vine" in self.type_pi): 1230 if self.replications is None: 1231 return self.mean_.iloc[:, : self.init_n_series_] 1232 1233 # if "return_std" not in kwargs and self.replications is not None 1234 meanf = [] 1235 medianf = [] 1236 lower = [] 1237 upper = [] 1238 1239 if "scp2" in self.type_pi: 1240 if self.verbose == 1: 1241 self.sims_ = tuple( 1242 ( 1243 self.mean_ 1244 + self.residuals_sims_[i] 1245 * self.residuals_std_dev_[np.newaxis, :] 1246 for i in tqdm(range(self.replications)) 1247 ) 1248 ) 1249 elif self.verbose == 0: 1250 self.sims_ = tuple( 1251 ( 1252 self.mean_ 1253 + self.residuals_sims_[i] 1254 * self.residuals_std_dev_[np.newaxis, :] 1255 for i in range(self.replications) 1256 ) 1257 ) 1258 else: 1259 if self.verbose == 1: 1260 self.sims_ = tuple( 1261 ( 1262 self.mean_ + self.residuals_sims_[i] 1263 for i in tqdm(range(self.replications)) 1264 ) 1265 ) 1266 elif self.verbose == 0: 1267 self.sims_ = tuple( 1268 ( 1269 self.mean_ + self.residuals_sims_[i] 1270 for i in range(self.replications) 1271 ) 1272 ) 1273 1274 DescribeResult = namedtuple( 1275 "DescribeResult", ("mean", "sims", "lower", "upper") 1276 ) 1277 for ix in range(self.init_n_series_): 1278 sims_ix = getsims(self.sims_, ix) 1279 if self.agg == "mean": 1280 meanf.append(np.mean(sims_ix, axis=1)) 1281 else: 1282 medianf.append(np.median(sims_ix, axis=1)) 1283 lower.append(np.quantile(sims_ix, q=self.alpha_ / 200, axis=1)) 1284 upper.append( 1285 np.quantile(sims_ix, q=1 - self.alpha_ / 200, axis=1) 1286 ) 1287 self.mean_ = pd.DataFrame( 1288 np.asarray(meanf).T, 1289 columns=self.series_names[ 1290 : self.init_n_series_ 1291 ], # self.df_.columns, 1292 index=self.output_dates_, 1293 ) 1294 1295 self.lower_ = pd.DataFrame( 1296 np.asarray(lower).T, 1297 columns=self.series_names[ 1298 : self.init_n_series_ 1299 ], # self.df_.columns, 1300 index=self.output_dates_, 1301 ) 1302 1303 self.upper_ = pd.DataFrame( 1304 np.asarray(upper).T, 1305 columns=self.series_names[ 1306 : self.init_n_series_ 1307 ], # self.df_.columns, 1308 index=self.output_dates_, 1309 ) 1310 1311 try: 1312 self.median_ = pd.DataFrame( 1313 np.asarray(medianf).T, 1314 columns=self.series_names[ 1315 : self.init_n_series_ 1316 ], # self.df_.columns, 1317 index=self.output_dates_, 1318 ) 1319 except Exception as e: 1320 pass 1321 1322 return DescribeResult( 1323 self.mean_, self.sims_, self.lower_, self.upper_ 1324 ) 1325 1326 if ( 1327 (("return_std" in kwargs) or ("return_pi" in kwargs)) 1328 and (self.type_pi not in ("gaussian", "scp")) 1329 ) or "vine" in self.type_pi: 1330 DescribeResult = namedtuple( 1331 "DescribeResult", ("mean", "lower", "upper") 1332 ) 1333 1334 self.mean_ = pd.DataFrame( 1335 np.asarray(self.mean_), 1336 columns=self.series_names, # self.df_.columns, 1337 index=self.output_dates_, 1338 ) 1339 1340 if "return_std" in kwargs: 1341 self.preds_std_ = np.asarray(self.preds_std_) 1342 1343 self.lower_ = pd.DataFrame( 1344 self.mean_.values - pi_multiplier * self.preds_std_, 1345 columns=self.series_names, # self.df_.columns, 1346 index=self.output_dates_, 1347 ) 1348 1349 self.upper_ = pd.DataFrame( 1350 self.mean_.values + pi_multiplier * self.preds_std_, 1351 columns=self.series_names, # self.df_.columns, 1352 index=self.output_dates_, 1353 ) 1354 1355 if "return_pi" in kwargs: 1356 self.lower_ = pd.DataFrame( 1357 np.asarray(lower_pi_).reshape(h, self.n_series) 1358 + y_means_[np.newaxis, :], 1359 columns=self.series_names, # self.df_.columns, 1360 index=self.output_dates_, 1361 ) 1362 1363 self.upper_ = pd.DataFrame( 1364 np.asarray(upper_pi_).reshape(h, self.n_series) 1365 + y_means_[np.newaxis, :], 1366 columns=self.series_names, # self.df_.columns, 1367 index=self.output_dates_, 1368 ) 1369 1370 res = DescribeResult(self.mean_, self.lower_, self.upper_) 1371 1372 if self.xreg_ is not None: 1373 if len(self.xreg_.shape) > 1: 1374 res2 = mx.tuple_map( 1375 res, 1376 lambda x: mo.delete_last_columns( 1377 x, num_columns=self.xreg_.shape[1] 1378 ), 1379 ) 1380 else: 1381 res2 = mx.tuple_map( 1382 res, lambda x: mo.delete_last_columns(x, num_columns=1) 1383 ) 1384 return DescribeResult(res2[0], res2[1], res2[2]) 1385 1386 return res 1387 1388 if self.type_pi == "gaussian": 1389 DescribeResult = namedtuple( 1390 "DescribeResult", ("mean", "lower", "upper") 1391 ) 1392 1393 self.mean_ = pd.DataFrame( 1394 np.asarray(self.mean_), 1395 columns=self.series_names, # self.df_.columns, 1396 index=self.output_dates_, 1397 ) 1398 1399 # Use Bayesian std if available, otherwise use gaussian residual std 1400 if "return_std" in kwargs and len(self.preds_std_) > 0: 1401 preds_std_to_use = np.asarray(self.preds_std_) 1402 else: 1403 preds_std_to_use = self.gaussian_preds_std_ 1404 1405 self.lower_ = pd.DataFrame( 1406 self.mean_.values - pi_multiplier * preds_std_to_use, 1407 columns=self.series_names, # self.df_.columns, 1408 index=self.output_dates_, 1409 ) 1410 1411 self.upper_ = pd.DataFrame( 1412 self.mean_.values + pi_multiplier * preds_std_to_use, 1413 columns=self.series_names, # self.df_.columns, 1414 index=self.output_dates_, 1415 ) 1416 1417 res = DescribeResult(self.mean_, self.lower_, self.upper_) 1418 1419 if self.xreg_ is not None: 1420 if len(self.xreg_.shape) > 1: 1421 res2 = mx.tuple_map( 1422 res, 1423 lambda x: mo.delete_last_columns( 1424 x, num_columns=self.xreg_.shape[1] 1425 ), 1426 ) 1427 else: 1428 res2 = mx.tuple_map( 1429 res, lambda x: mo.delete_last_columns(x, num_columns=1) 1430 ) 1431 return DescribeResult(res2[0], res2[1], res2[2]) 1432 1433 return res 1434 1435 if self.type_pi == "quantile": 1436 DescribeResult = namedtuple("DescribeResult", ("mean")) 1437 1438 self.mean_ = pd.DataFrame( 1439 np.asarray(self.mean_), 1440 columns=self.series_names, # self.df_.columns, 1441 index=self.output_dates_, 1442 ) 1443 1444 res = DescribeResult(self.mean_) 1445 1446 if self.xreg_ is not None: 1447 if len(self.xreg_.shape) > 1: 1448 res2 = mx.tuple_map( 1449 res, 1450 lambda x: mo.delete_last_columns( 1451 x, num_columns=self.xreg_.shape[1] 1452 ), 1453 ) 1454 else: 1455 res2 = mx.tuple_map( 1456 res, lambda x: mo.delete_last_columns(x, num_columns=1) 1457 ) 1458 return DescribeResult(res2[0]) 1459 1460 return res 1461 1462 # After prediction loop, ensure sims only contain target columns 1463 if self.sims_ is not None: 1464 if self.verbose == 1: 1465 self.sims_ = tuple( 1466 sim[:h,] # Only keep target columns and h rows 1467 for sim in tqdm(self.sims_) 1468 ) 1469 elif self.verbose == 0: 1470 self.sims_ = tuple( 1471 sim[:h,] # Only keep target columns and h rows 1472 for sim in self.sims_ 1473 ) 1474 1475 # Convert numpy arrays to DataFrames with proper columns 1476 self.sims_ = tuple( 1477 pd.DataFrame( 1478 sim, 1479 columns=self.df_.columns[: self.init_n_series_], 1480 index=self.output_dates_, 1481 ) 1482 for sim in self.sims_ 1483 ) 1484 1485 if self.type_pi in ( 1486 "kde", 1487 "bootstrap", 1488 "block-bootstrap", 1489 "vine-copula", 1490 ): 1491 if self.xreg_ is not None: 1492 # Use getsimsxreg when external regressors are present 1493 target_cols = self.df_.columns[: self.init_n_series_] 1494 self.sims_ = getsimsxreg( 1495 self.sims_, self.output_dates_, target_cols 1496 ) 1497 else: 1498 # Use original getsims for backward compatibility 1499 self.sims_ = getsims(self.sims_) 1500 1501 def _crps_ensemble(self, y_true, simulations, axis=0): 1502 """ 1503 Compute the Continuous Ranked Probability Score (CRPS) for an ensemble of simulations. 1504 1505 The CRPS is a measure of the distance between the cumulative distribution 1506 function (CDF) of a forecast and the CDF of the observed value. This method 1507 computes the CRPS in a vectorized form for an ensemble of simulations, efficiently 1508 handling the case where there is only one simulation. 1509 1510 Parameters 1511 ---------- 1512 y_true : array_like, shape (n,) 1513 A 1D array of true values (observations). 1514 Each element represents the true value for a given sample. 1515 1516 simulations : array_like, shape (n, R) 1517 A 2D array of simulated values. Each row corresponds to a different sample 1518 and each column corresponds to a different simulation of that sample. 1519 1520 axis : int, optional, default=0 1521 Axis along which to transpose the simulations if needed. 1522 If axis=0, the simulations are transposed to shape (R, n). 1523 1524 Returns 1525 ------- 1526 crps : ndarray, shape (n,) 1527 A 1D array of CRPS scores, one for each sample. 1528 1529 Notes 1530 ----- 1531 The CRPS score is computed as: 1532 1533 CRPS(y_true, simulations) = E[|X - y|] - 0.5 * E[|X - X'|] 1534 1535 Where: 1536 - `X` is the ensemble of simulations. 1537 - `y` is the true value. 1538 - `X'` is a second independent sample from the ensemble. 1539 1540 The calculation is vectorized to optimize performance for large datasets. 1541 1542 The edge case where `R=1` (only one simulation) is handled by returning 1543 only `term1` (i.e., no ensemble spread). 1544 """ 1545 sims = np.asarray(simulations) # Convert simulations to numpy array 1546 if axis == 0: 1547 sims = sims.T # Transpose if the axis is 0 1548 n, R = sims.shape # n = number of samples, R = number of simulations 1549 # Term 1: E|X - y|, average absolute difference between simulations and true value 1550 term1 = np.mean(np.abs(sims - y_true[:, np.newaxis]), axis=1) 1551 # Handle edge case: if R == 1, return term1 (no spread in ensemble) 1552 if R == 1: 1553 return term1 1554 # Term 2: 0.5 * E|X - X'|, using efficient sorted formula 1555 sims_sorted = np.sort(sims, axis=1) # Sort simulations along each row 1556 # Correct coefficients for efficient calculation 1557 j = np.arange(R) # 0-indexed positions in the sorted simulations 1558 coefficients = (2 * (j + 1) - R - 1) / ( 1559 R * (R - 1) 1560 ) # Efficient coefficient calculation 1561 # Dot product along the second axis (over the simulations) 1562 term2 = np.dot(sims_sorted, coefficients) 1563 # Return CRPS score: term1 - 0.5 * term2 1564 return term1 - 0.5 * term2 1565 1566 def score( 1567 self, 1568 X, 1569 training_index, 1570 testing_index, 1571 scoring=None, 1572 alpha=0.5, 1573 **kwargs, 1574 ): 1575 """Train on training_index, score on testing_index.""" 1576 1577 assert ( 1578 bool(set(training_index).intersection(set(testing_index))) == False 1579 ), "Non-overlapping 'training_index' and 'testing_index' required" 1580 1581 # Dimensions 1582 try: 1583 # multivariate time series 1584 n, p = X.shape 1585 except: 1586 # univariate time series 1587 n = X.shape[0] 1588 p = 1 1589 1590 # Training and testing sets 1591 if p > 1: 1592 X_train = X[training_index, :] 1593 X_test = X[testing_index, :] 1594 else: 1595 X_train = X[training_index] 1596 X_test = X[testing_index] 1597 1598 # Horizon 1599 h = len(testing_index) 1600 assert ( 1601 len(training_index) + h 1602 ) <= n, "Please check lengths of training and testing windows" 1603 1604 # Fit and predict 1605 self.fit(X_train, **kwargs) 1606 preds = self.predict(h=h, **kwargs) 1607 1608 if scoring is None: 1609 scoring = "neg_root_mean_squared_error" 1610 1611 if scoring == "pinball": 1612 # Predict requested quantile 1613 q_pred = self.predict(h=h, quantiles=[alpha], **kwargs) 1614 # Handle multivariate 1615 scores = [] 1616 for j in range(p): 1617 series_name = getattr(self, "series_names", [f"Series_{j}"])[j] 1618 q_label = ( 1619 f"{int(alpha * 100):02d}" 1620 if (alpha * 100).is_integer() 1621 else f"{alpha:.3f}".replace(".", "_") 1622 ) 1623 col = f"quantile_{q_label}_{series_name}" 1624 if col not in q_pred.columns: 1625 raise ValueError( 1626 f"Column '{col}' not found in quantile forecast output." 1627 ) 1628 y_true_j = X_test[:, j] 1629 y_pred_j = q_pred[col].values 1630 # Compute pinball loss for this series 1631 loss = mean_pinball_loss(y_true_j, y_pred_j, alpha=alpha) 1632 scores.append(loss) 1633 # Return average over series 1634 return np.mean(scores) 1635 1636 if scoring == "crps": 1637 # Ensure simulations exist 1638 preds = self.predict(h=h, **kwargs) # triggers self.sims_ 1639 # Extract simulations: list of DataFrames → (R, h, p) 1640 sims_vals = np.stack( 1641 [sim.values for sim in self.sims_], axis=0 1642 ) # (R, h, p) 1643 crps_scores = [] 1644 for j in range(p): 1645 y_true_j = X_test[:, j] 1646 sims_j = sims_vals[:, :, j] # (R, h) 1647 crps_j = self._crps_ensemble(np.asarray(y_true_j), sims_j) 1648 crps_scores.append(np.mean(crps_j)) # average over horizon 1649 return np.mean(crps_scores) # average over series 1650 1651 # check inputs 1652 assert scoring in ( 1653 "explained_variance", 1654 "neg_mean_absolute_error", 1655 "neg_mean_squared_error", 1656 "neg_root_mean_squared_error", 1657 "neg_mean_squared_log_error", 1658 "neg_median_absolute_error", 1659 "r2", 1660 ), "'scoring' should be in ('explained_variance', 'neg_mean_absolute_error', \ 1661 'neg_mean_squared_error', 'neg_root_mean_squared_error', 'neg_mean_squared_log_error', \ 1662 'neg_median_absolute_error', 'r2')" 1663 1664 scoring_options = { 1665 "explained_variance": skm2.explained_variance_score, 1666 "neg_mean_absolute_error": skm2.mean_absolute_error, 1667 "neg_mean_squared_error": lambda x, y: np.mean((x - y) ** 2), 1668 "neg_root_mean_squared_error": lambda x, y: np.sqrt( 1669 np.mean((x - y) ** 2) 1670 ), 1671 "neg_mean_squared_log_error": skm2.mean_squared_log_error, 1672 "neg_median_absolute_error": skm2.median_absolute_error, 1673 "r2": skm2.r2_score, 1674 } 1675 1676 return scoring_options[scoring](X_test, preds) 1677 1678 def plot(self, series=None, type_axis="dates", type_plot="pi"): 1679 """Plot time series forecast 1680 1681 Parameters: 1682 1683 series: {integer} or {string} 1684 series index or name 1685 1686 """ 1687 1688 assert all( 1689 [ 1690 self.mean_ is not None, 1691 self.lower_ is not None, 1692 self.upper_ is not None, 1693 self.output_dates_ is not None, 1694 ] 1695 ), "model forecasting must be obtained first (with predict)" 1696 1697 if series is None: 1698 # assert ( 1699 # self.init_n_series_ == 1 1700 # ), "please specify series index or name (n_series > 1)" 1701 series = 0 1702 1703 if isinstance(series, str): 1704 assert ( 1705 series in self.series_names 1706 ), f"series {series} doesn't exist in the input dataset" 1707 series_idx = self.df_.columns.get_loc(series) 1708 else: 1709 assert isinstance(series, int) and ( 1710 0 <= series < self.n_series 1711 ), f"check series index (< {self.n_series})" 1712 series_idx = series 1713 1714 y_all = list(self.df_.iloc[:, series_idx]) + list( 1715 self.mean_.iloc[:, series_idx] 1716 ) 1717 y_test = list(self.mean_.iloc[:, series_idx]) 1718 n_points_all = len(y_all) 1719 n_points_train = self.df_.shape[0] 1720 1721 if type_axis == "numeric": 1722 x_all = [i for i in range(n_points_all)] 1723 x_test = [i for i in range(n_points_train, n_points_all)] 1724 1725 if type_axis == "dates": # use dates 1726 x_all = np.concatenate( 1727 (self.input_dates.values, self.output_dates_.values), axis=None 1728 ) 1729 x_test = self.output_dates_.values 1730 1731 if type_plot == "pi": 1732 fig, ax = plt.subplots() 1733 ax.plot(x_all, y_all, "-") 1734 ax.plot(x_test, y_test, "-", color="orange") 1735 ax.fill_between( 1736 x_test, 1737 self.lower_.iloc[:, series_idx], 1738 self.upper_.iloc[:, series_idx], 1739 alpha=0.2, 1740 color="orange", 1741 ) 1742 if self.replications is None: 1743 if self.n_series > 1: 1744 plt.title( 1745 f"prediction intervals for {series}", 1746 loc="left", 1747 fontsize=12, 1748 fontweight=0, 1749 color="black", 1750 ) 1751 else: 1752 plt.title( 1753 f"prediction intervals for input time series", 1754 loc="left", 1755 fontsize=12, 1756 fontweight=0, 1757 color="black", 1758 ) 1759 plt.show() 1760 else: # self.replications is not None 1761 if self.n_series > 1: 1762 plt.title( 1763 f"prediction intervals for {self.replications} simulations of {series}", 1764 loc="left", 1765 fontsize=12, 1766 fontweight=0, 1767 color="black", 1768 ) 1769 else: 1770 plt.title( 1771 f"prediction intervals for {self.replications} simulations of input time series", 1772 loc="left", 1773 fontsize=12, 1774 fontweight=0, 1775 color="black", 1776 ) 1777 plt.show() 1778 1779 if type_plot == "spaghetti": 1780 palette = plt.get_cmap("Set1") 1781 sims_ix = getsims(self.sims_, series_idx) 1782 plt.plot(x_all, y_all, "-") 1783 for col_ix in range( 1784 sims_ix.shape[1] 1785 ): # avoid this when there are thousands of simulations 1786 plt.plot( 1787 x_test, 1788 sims_ix[:, col_ix], 1789 "-", 1790 color=palette(col_ix), 1791 linewidth=1, 1792 alpha=0.9, 1793 ) 1794 plt.plot(x_all, y_all, "-", color="black") 1795 plt.plot(x_test, y_test, "-", color="blue") 1796 # Add titles 1797 if self.n_series > 1: 1798 plt.title( 1799 f"{self.replications} simulations of {series}", 1800 loc="left", 1801 fontsize=12, 1802 fontweight=0, 1803 color="black", 1804 ) 1805 else: 1806 plt.title( 1807 f"{self.replications} simulations of input time series", 1808 loc="left", 1809 fontsize=12, 1810 fontweight=0, 1811 color="black", 1812 ) 1813 plt.xlabel("Time") 1814 plt.ylabel("Values") 1815 # Show the graph 1816 plt.show() 1817 1818 def cross_val_score( 1819 self, 1820 X, 1821 scoring="root_mean_squared_error", 1822 n_jobs=None, 1823 verbose=0, 1824 xreg=None, 1825 initial_window=5, 1826 horizon=3, 1827 fixed_window=False, 1828 show_progress=True, 1829 level=95, 1830 alpha=0.5, 1831 **kwargs, 1832 ): 1833 """Evaluate a score by time series cross-validation. 1834 1835 Parameters: 1836 1837 X: {array-like, sparse matrix} of shape (n_samples, n_features) 1838 The data to fit. 1839 1840 scoring: str or a function 1841 A str in ('root_mean_squared_error', 'mean_squared_error', 'mean_error', 1842 'mean_absolute_error', 'mean_error', 'mean_percentage_error', 1843 'mean_absolute_percentage_error', 'winkler_score', 'coverage') 1844 Or a function defined as 'coverage' and 'winkler_score' in `utils.timeseries` 1845 1846 n_jobs: int, default=None 1847 Number of jobs to run in parallel. 1848 1849 verbose: int, default=0 1850 The verbosity level. 1851 1852 xreg: array-like, optional (default=None) 1853 Additional (external) regressors to be passed to `fit` 1854 xreg must be in 'increasing' order (most recent observations last) 1855 1856 initial_window: int 1857 initial number of consecutive values in each training set sample 1858 1859 horizon: int 1860 number of consecutive values in test set sample 1861 1862 fixed_window: boolean 1863 if False, all training samples start at index 0, and the training 1864 window's size is increasing. 1865 if True, the training window's size is fixed, and the window is 1866 rolling forward 1867 1868 show_progress: boolean 1869 if True, a progress bar is printed 1870 1871 level: int 1872 confidence level for prediction intervals 1873 1874 alpha: float 1875 quantile level for pinball loss if scoring='pinball' 1876 0 < alpha < 1 1877 1878 **kwargs: dict 1879 additional parameters to be passed to `fit` and `predict` 1880 1881 Returns: 1882 1883 A tuple: descriptive statistics or errors and raw errors 1884 1885 """ 1886 tscv = TimeSeriesSplit() 1887 1888 tscv_obj = tscv.split( 1889 X, 1890 initial_window=initial_window, 1891 horizon=horizon, 1892 fixed_window=fixed_window, 1893 ) 1894 1895 if isinstance(scoring, str): 1896 assert scoring in ( 1897 "pinball", 1898 "crps", 1899 "root_mean_squared_error", 1900 "mean_squared_error", 1901 "mean_error", 1902 "mean_absolute_error", 1903 "mean_percentage_error", 1904 "mean_absolute_percentage_error", 1905 "winkler_score", 1906 "coverage", 1907 ), "must have scoring in ('pinball', 'crps', 'root_mean_squared_error', 'mean_squared_error', 'mean_error', 'mean_absolute_error', 'mean_error', 'mean_percentage_error', 'mean_absolute_percentage_error', 'winkler_score', 'coverage')" 1908 1909 def err_func(X_test, X_pred, scoring, alpha=0.5): 1910 if (self.replications is not None) or ( 1911 self.type_pi == "gaussian" 1912 ): # probabilistic 1913 if scoring == "pinball": 1914 # Predict requested quantile 1915 q_pred = self.predict( 1916 h=len(X_test), quantiles=[alpha], **kwargs 1917 ) 1918 # Handle multivariate 1919 scores = [] 1920 p = X_test.shape[1] if len(X_test.shape) > 1 else 1 1921 for j in range(p): 1922 series_name = getattr( 1923 self, "series_names", [f"Series_{j}"] 1924 )[j] 1925 q_label = ( 1926 f"{int(alpha * 100):02d}" 1927 if (alpha * 100).is_integer() 1928 else f"{alpha:.3f}".replace(".", "_") 1929 ) 1930 col = f"quantile_{q_label}_{series_name}" 1931 if col not in q_pred.columns: 1932 raise ValueError( 1933 f"Column '{col}' not found in quantile forecast output." 1934 ) 1935 try: 1936 y_true_j = X_test[:, j] if p > 1 else X_test 1937 except: 1938 y_true_j = ( 1939 X_test.iloc[:, j] 1940 if p > 1 1941 else X_test.values 1942 ) 1943 y_pred_j = q_pred[col].values 1944 # Compute pinball loss for this series 1945 loss = mean_pinball_loss( 1946 y_true_j, y_pred_j, alpha=alpha 1947 ) 1948 scores.append(loss) 1949 # Return average over series 1950 return np.mean(scores) 1951 elif scoring == "crps": 1952 # Ensure simulations exist 1953 _ = self.predict( 1954 h=len(X_test), **kwargs 1955 ) # triggers self.sims_ 1956 # Extract simulations: list of DataFrames → (R, h, p) 1957 sims_vals = np.stack( 1958 [sim.values for sim in self.sims_], axis=0 1959 ) # (R, h, p) 1960 crps_scores = [] 1961 p = X_test.shape[1] if len(X_test.shape) > 1 else 1 1962 for j in range(p): 1963 try: 1964 y_true_j = X_test[:, j] if p > 1 else X_test 1965 except Exception as e: 1966 y_true_j = ( 1967 X_test.iloc[:, j] 1968 if p > 1 1969 else X_test.values 1970 ) 1971 sims_j = sims_vals[:, :, j] # (R, h) 1972 crps_j = self._crps_ensemble( 1973 np.asarray(y_true_j), sims_j 1974 ) 1975 crps_scores.append( 1976 np.mean(crps_j) 1977 ) # average over horizon 1978 return np.mean(crps_scores) # average over series 1979 if scoring == "winkler_score": 1980 return winkler_score(X_pred, X_test, level=level) 1981 elif scoring == "coverage": 1982 return coverage(X_pred, X_test, level=level) 1983 else: 1984 return mean_errors( 1985 pred=X_pred.mean, actual=X_test, scoring=scoring 1986 ) 1987 else: # not probabilistic 1988 return mean_errors( 1989 pred=X_pred, actual=X_test, scoring=scoring 1990 ) 1991 1992 else: # isinstance(scoring, str) = False 1993 err_func = scoring 1994 1995 errors = [] 1996 1997 train_indices = [] 1998 1999 test_indices = [] 2000 2001 for train_index, test_index in tscv_obj: 2002 train_indices.append(train_index) 2003 test_indices.append(test_index) 2004 2005 if show_progress is True: 2006 iterator = tqdm( 2007 zip(train_indices, test_indices), total=len(train_indices) 2008 ) 2009 else: 2010 iterator = zip(train_indices, test_indices) 2011 2012 for train_index, test_index in iterator: 2013 if verbose == 1: 2014 print(f"TRAIN: {train_index}") 2015 print(f"TEST: {test_index}") 2016 2017 if isinstance(X, pd.DataFrame): 2018 self.fit(X.iloc[train_index, :], xreg=xreg, **kwargs) 2019 X_test = X.iloc[test_index, :] 2020 else: 2021 self.fit(X[train_index, :], xreg=xreg, **kwargs) 2022 X_test = X[test_index, :] 2023 X_pred = self.predict(h=int(len(test_index)), level=level, **kwargs) 2024 2025 errors.append(err_func(X_test, X_pred, scoring, alpha=alpha)) 2026 2027 res = np.asarray(errors) 2028 2029 return res, describe(res) 2030 2031 def _compute_information_criterion(self, curr_lags, criterion="AIC"): 2032 """Compute information criterion using existing residuals 2033 2034 Parameters 2035 ---------- 2036 curr_lags : int 2037 Current number of lags being evaluated 2038 criterion : str 2039 One of 'AIC', 'AICc', or 'BIC' 2040 2041 Returns 2042 ------- 2043 float 2044 Information criterion value or inf if parameters exceed observations 2045 """ 2046 # Get dimensions 2047 n_obs = self.residuals_.shape[0] 2048 n_features = int(self.init_n_series_ * curr_lags) 2049 n_hidden = int(self.n_hidden_features) 2050 # Calculate number of parameters 2051 term1 = int(n_features * n_hidden) 2052 term2 = int(n_hidden * self.init_n_series_) 2053 n_params = term1 + term2 2054 # Check if we have enough observations for the number of parameters 2055 if n_obs <= n_params + 1: 2056 return float("inf") # Return infinity if too many parameters 2057 # Compute RSS using existing residuals 2058 rss = np.sum(self.residuals_**2) 2059 # Compute criterion 2060 if criterion == "AIC": 2061 ic = n_obs * np.log(rss / n_obs) + 2 * n_params 2062 elif criterion == "AICc": 2063 ic = n_obs * np.log(rss / n_obs) + 2 * n_params * ( 2064 n_obs / (n_obs - n_params - 1) 2065 ) 2066 else: # BIC 2067 ic = n_obs * np.log(rss / n_obs) + n_params * np.log(n_obs) 2068 2069 return ic
Univariate and multivariate time series (MTS) forecasting with Quasi-Randomized networks
Parameters:
obj: object.
any object containing a method fit (obj.fit()) and a method predict
(obj.predict()).
n_hidden_features: int.
number of nodes in the hidden layer.
activation_name: str.
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'.
a: float.
hyperparameter for 'prelu' or 'elu' activation function.
nodes_sim: str.
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'.
bias: boolean.
indicates if the hidden layer contains a bias term (True) or not
(False).
dropout: float.
regularization parameter; (random) percentage of nodes dropped out
of the training.
direct_link: boolean.
indicates if the original predictors are included (True) in model's fitting or not (False).
n_clusters: int.
number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering).
cluster_encode: bool.
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding.
type_clust: str.
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm').
type_scaling: a tuple of 3 strings.
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax').
lags: int.
number of lags used for each time series.
If string, lags must be one of 'AIC', 'AICc', or 'BIC'.
type_pi: str.
type of prediction interval; currently:
- "gaussian": simple, fast, but: assumes stationarity of Gaussian in-sample residuals and independence in the multivariate case
- "quantile": use model-agnostic quantile regression under the hood
- "kde": based on Kernel Density Estimation of in-sample residuals
- "bootstrap": based on independent bootstrap of in-sample residuals
- "block-bootstrap": based on basic block bootstrap of in-sample residuals
- "scp-kde": Sequential split conformal prediction with Kernel Density Estimation of calibrated residuals
- "scp-bootstrap": Sequential split conformal prediction with independent bootstrap of calibrated residuals
- "scp-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of calibrated residuals
- "scp2-kde": Sequential split conformal prediction with Kernel Density Estimation of standardized calibrated residuals
- "scp2-bootstrap": Sequential split conformal prediction with independent bootstrap of standardized calibrated residuals
- "scp2-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of standardized calibrated residuals
- based on copulas of in-sample residuals: 'vine-tll', 'vine-bb1', 'vine-bb6', 'vine-bb7', 'vine-bb8', 'vine-clayton',
'vine-frank', 'vine-gaussian', 'vine-gumbel', 'vine-indep', 'vine-joe', 'vine-student'
- 'scp-vine-tll', 'scp-vine-bb1', 'scp-vine-bb6', 'scp-vine-bb7', 'scp-vine-bb8', 'scp-vine-clayton',
'scp-vine-frank', 'scp-vine-gaussian', 'scp-vine-gumbel', 'scp-vine-indep', 'scp-vine-joe', 'scp-vine-student'
- 'scp2-vine-tll', 'scp2-vine-bb1', 'scp2-vine-bb6', 'scp2-vine-bb7', 'scp2-vine-bb8', 'scp2-vine-clayton',
'scp2-vine-frank', 'scp2-vine-gaussian', 'scp2-vine-gumbel', 'scp2-vine-indep', 'scp2-vine-joe', 'scp2-vine-student'
level: int.
level of confidence for `type_pi == 'quantile'` (default is `95`)
block_size: int.
size of block for 'type_pi' in ("block-bootstrap", "scp-block-bootstrap", "scp2-block-bootstrap").
Default is round(3.15*(n_residuals^1/3))
replications: int.
number of replications (if needed, for predictive simulation). Default is 'None'.
kernel: str.
the kernel to use for residuals density estimation (used for predictive simulation). Currently, either 'gaussian' or 'tophat'.
agg: str.
either "mean" or "median" for simulation of bootstrap aggregating
seed: int.
reproducibility seed for nodes_sim=='uniform' or predictive simulation.
backend: str.
"cpu" or "gpu" or "tpu".
verbose: int.
0: not printing; 1: printing
show_progress: bool.
True: progress bar when fitting each series; False: no progress bar when fitting each series
Attributes:
fit_objs_: dict
objects adjusted to each individual time series
y_: {array-like}
MTS responses (most recent observations first)
X_: {array-like}
MTS lags
xreg_: {array-like}
external regressors
y_means_: dict
a dictionary of each series mean values
preds_: {array-like}
successive model predictions
preds_std_: {array-like}
standard deviation around the predictions for Bayesian base learners (`obj`)
gaussian_preds_std_: {array-like}
standard deviation around the predictions for `type_pi='gaussian'`
return_std_: boolean
return uncertainty or not (set in predict)
df_: data frame
the input data frame, in case a data.frame is provided to `fit`
n_obs_: int
number of time series observations (number of rows for multivariate)
level_: int
level of confidence for prediction intervals (default is 95)
residuals_: {array-like}
in-sample residuals (for `type_pi` not conformal prediction) or calibrated residuals
(for `type_pi` in conformal prediction)
residuals_sims_: tuple of {array-like}
simulations of in-sample residuals (for `type_pi` not conformal prediction) or
calibrated residuals (for `type_pi` in conformal prediction)
kde_: A scikit-learn object, see https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KernelDensity.html
residuals_std_dev_: residuals standard deviation
Examples:
Example 1:
import nnetsauce as ns
import numpy as np
from sklearn import linear_model
np.random.seed(123)
M = np.random.rand(10, 3)
M[:,0] = 10*M[:,0]
M[:,2] = 25*M[:,2]
print(M)
# Adjust Bayesian Ridge
regr4 = linear_model.BayesianRidge()
obj_MTS = ns.MTS(regr4, lags = 1, n_hidden_features=5)
obj_MTS.fit(M)
print(obj_MTS.predict())
# with credible intervals
print(obj_MTS.predict(return_std=True, level=80))
print(obj_MTS.predict(return_std=True, level=95))
Example 2:
import nnetsauce as ns
import numpy as np
from sklearn import linear_model
dataset = {
'date' : ['2001-01-01', '2002-01-01', '2003-01-01', '2004-01-01', '2005-01-01'],
'series1' : [34, 30, 35.6, 33.3, 38.1],
'series2' : [4, 5.5, 5.6, 6.3, 5.1],
'series3' : [100, 100.5, 100.6, 100.2, 100.1]}
df = pd.DataFrame(dataset).set_index('date')
print(df)
# Adjust Bayesian Ridge
regr5 = linear_model.BayesianRidge()
obj_MTS = ns.MTS(regr5, lags = 1, n_hidden_features=5)
obj_MTS.fit(df)
print(obj_MTS.predict())
# with credible intervals
print(obj_MTS.predict(return_std=True, level=80))
print(obj_MTS.predict(return_std=True, level=95))
337 def fit(self, X, xreg=None, **kwargs): 338 """Fit MTS model to training data X, with optional regressors xreg 339 340 Parameters: 341 342 X: {array-like}, shape = [n_samples, n_features] 343 Training time series, where n_samples is the number 344 of samples and n_features is the number of features; 345 X must be in increasing order (most recent observations last) 346 347 xreg: {array-like}, shape = [n_samples, n_features_xreg] 348 Additional (external) regressors to be passed to self.obj 349 xreg must be in 'increasing' order (most recent observations last) 350 351 **kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity) 352 353 Returns: 354 355 self: object 356 """ 357 try: 358 self.init_n_series_ = X.shape[1] 359 except IndexError as e: 360 self.init_n_series_ = 1 361 362 # Automatic lag selection if requested 363 if isinstance(self.lags, str): 364 max_lags = min(25, X.shape[0] // 4) 365 best_ic = float("inf") 366 best_lags = 1 367 368 if self.verbose: 369 print( 370 f"\nSelecting optimal number of lags using {self.lags}..." 371 ) 372 iterator = tqdm(range(1, max_lags + 1)) 373 else: 374 iterator = range(1, max_lags + 1) 375 376 for lag in iterator: 377 # Convert DataFrame to numpy array before reversing 378 if isinstance(X, pd.DataFrame): 379 X_values = X.values[::-1] 380 else: 381 X_values = X[::-1] 382 383 # Try current lag value 384 if self.init_n_series_ > 1: 385 mts_input = ts.create_train_inputs(X_values, lag) 386 else: 387 mts_input = ts.create_train_inputs( 388 X_values.reshape(-1, 1), lag 389 ) 390 391 # Cook training set and fit model 392 dummy_y, scaled_Z = self.cook_training_set( 393 y=np.ones(mts_input[0].shape[0]), X=mts_input[1] 394 ) 395 residuals_ = [] 396 397 for i in range(self.init_n_series_): 398 y_mean = np.mean(mts_input[0][:, i]) 399 centered_y_i = mts_input[0][:, i] - y_mean 400 self.obj.fit(X=scaled_Z, y=centered_y_i) 401 residuals_.append( 402 (centered_y_i - self.obj.predict(scaled_Z)).tolist() 403 ) 404 405 self.residuals_ = np.asarray(residuals_).T 406 ic = self._compute_information_criterion( 407 curr_lags=lag, criterion=self.lags 408 ) 409 410 if self.verbose: 411 print(f"Trying lags={lag}, {self.lags}={ic:.2f}") 412 413 if ic < best_ic: 414 best_ic = ic 415 best_lags = lag 416 417 if self.verbose: 418 print( 419 f"\nSelected {best_lags} lags with {self.lags}={best_ic:.2f}" 420 ) 421 422 self.lags = best_lags 423 424 self.input_dates = None 425 self.df_ = None 426 427 if isinstance(X, pd.DataFrame) is False: 428 # input data set is a numpy array 429 if xreg is None: 430 X = pd.DataFrame(X) 431 self.series_names = [ 432 "series" + str(i) for i in range(X.shape[1]) 433 ] 434 else: 435 # xreg is not None 436 X = mo.cbind(X, xreg) 437 self.xreg_ = xreg 438 439 else: # input data set is a DataFrame with column names 440 X_index = None 441 if X.index is not None: 442 X_index = X.index 443 if xreg is None: 444 X = copy.deepcopy(mo.convert_df_to_numeric(X)) 445 else: 446 X = copy.deepcopy(mo.cbind(mo.convert_df_to_numeric(X), xreg)) 447 self.xreg_ = xreg 448 if X_index is not None: 449 X.index = X_index 450 self.series_names = X.columns.tolist() 451 452 if isinstance(X, pd.DataFrame): 453 if self.df_ is None: 454 self.df_ = X 455 X = X.values 456 else: 457 input_dates_prev = pd.DatetimeIndex(self.df_.index.values) 458 frequency = pd.infer_freq(input_dates_prev) 459 self.df_ = pd.concat([self.df_, X], axis=0) 460 self.input_dates = pd.date_range( 461 start=input_dates_prev[0], 462 periods=len(input_dates_prev) + X.shape[0], 463 freq=frequency, 464 ).values.tolist() 465 self.df_.index = self.input_dates 466 X = self.df_.values 467 self.df_.columns = self.series_names 468 else: 469 if self.df_ is None: 470 self.df_ = pd.DataFrame(X, columns=self.series_names) 471 else: 472 self.df_ = pd.concat( 473 [self.df_, pd.DataFrame(X, columns=self.series_names)], 474 axis=0, 475 ) 476 477 self.input_dates = ts.compute_input_dates(self.df_) 478 479 try: 480 # multivariate time series 481 n, p = X.shape 482 except: 483 # univariate time series 484 n = X.shape[0] 485 p = 1 486 self.n_obs_ = n 487 488 rep_1_n = np.repeat(1, n) 489 490 self.y_ = None 491 self.X_ = None 492 self.n_series = p 493 self.fit_objs_.clear() 494 self.y_means_.clear() 495 residuals_ = [] 496 self.residuals_ = None 497 self.residuals_sims_ = None 498 self.kde_ = None 499 self.sims_ = None 500 self.scaled_Z_ = None 501 self.centered_y_is_ = [] 502 503 if self.init_n_series_ > 1: 504 # multivariate time series 505 mts_input = ts.create_train_inputs(X[::-1], self.lags) 506 else: 507 # univariate time series 508 mts_input = ts.create_train_inputs( 509 X.reshape(-1, 1)[::-1], self.lags 510 ) 511 512 self.y_ = mts_input[0] 513 514 self.X_ = mts_input[1] 515 516 dummy_y, scaled_Z = self.cook_training_set(y=rep_1_n, X=self.X_) 517 518 self.scaled_Z_ = scaled_Z 519 520 # loop on all the time series and adjust self.obj.fit 521 if self.verbose > 0: 522 print( 523 f"\n Adjusting {type(self.obj).__name__} to multivariate time series... \n" 524 ) 525 526 if self.show_progress is True: 527 iterator = tqdm(range(self.init_n_series_)) 528 else: 529 iterator = range(self.init_n_series_) 530 531 if self.type_pi in ( 532 "gaussian", 533 "kde", 534 "bootstrap", 535 "block-bootstrap", 536 ) or self.type_pi.startswith("vine"): 537 for i in iterator: 538 y_mean = np.mean(self.y_[:, i]) 539 self.y_means_[i] = y_mean 540 centered_y_i = self.y_[:, i] - y_mean 541 self.centered_y_is_.append(centered_y_i) 542 self.obj.fit(X=scaled_Z, y=centered_y_i) 543 self.fit_objs_[i] = deepcopy(self.obj) 544 residuals_.append( 545 ( 546 centered_y_i - self.fit_objs_[i].predict(scaled_Z) 547 ).tolist() 548 ) 549 550 if self.type_pi == "quantile": 551 for i in iterator: 552 y_mean = np.mean(self.y_[:, i]) 553 self.y_means_[i] = y_mean 554 centered_y_i = self.y_[:, i] - y_mean 555 self.centered_y_is_.append(centered_y_i) 556 self.obj.fit(X=scaled_Z, y=centered_y_i) 557 self.fit_objs_[i] = deepcopy(self.obj) 558 559 if self.type_pi.startswith("scp"): 560 # split conformal prediction 561 for i in iterator: 562 n_y = self.y_.shape[0] 563 n_y_half = n_y // 2 564 first_half_idx = range(0, n_y_half) 565 second_half_idx = range(n_y_half, n_y) 566 y_mean_temp = np.mean(self.y_[first_half_idx, i]) 567 centered_y_i_temp = self.y_[first_half_idx, i] - y_mean_temp 568 self.obj.fit(X=scaled_Z[first_half_idx, :], y=centered_y_i_temp) 569 # calibrated residuals actually 570 residuals_.append( 571 ( 572 self.y_[second_half_idx, i] 573 - ( 574 y_mean_temp 575 + self.obj.predict(scaled_Z[second_half_idx, :]) 576 ) 577 ).tolist() 578 ) 579 # fit on the second half 580 y_mean = np.mean(self.y_[second_half_idx, i]) 581 self.y_means_[i] = y_mean 582 centered_y_i = self.y_[second_half_idx, i] - y_mean 583 self.obj.fit(X=scaled_Z[second_half_idx, :], y=centered_y_i) 584 self.fit_objs_[i] = deepcopy(self.obj) 585 586 self.residuals_ = np.asarray(residuals_).T 587 588 if self.type_pi == "gaussian": 589 self.gaussian_preds_std_ = np.std(self.residuals_, axis=0) 590 591 if self.type_pi.startswith("scp2"): 592 # Calculate mean and standard deviation for each column 593 data_mean = np.mean(self.residuals_, axis=0) 594 self.residuals_std_dev_ = np.std(self.residuals_, axis=0) 595 # Center and scale the array using broadcasting 596 self.residuals_ = ( 597 self.residuals_ - data_mean[np.newaxis, :] 598 ) / self.residuals_std_dev_[np.newaxis, :] 599 600 if self.replications != None and "kde" in self.type_pi: 601 if self.verbose > 0: 602 print(f"\n Simulate residuals using {self.kernel} kernel... \n") 603 assert self.kernel in ( 604 "gaussian", 605 "tophat", 606 ), "currently, 'kernel' must be either 'gaussian' or 'tophat'" 607 kernel_bandwidths = {"bandwidth": np.logspace(-6, 6, 150)} 608 grid = GridSearchCV( 609 KernelDensity(kernel=self.kernel, **kwargs), 610 param_grid=kernel_bandwidths, 611 ) 612 grid.fit(self.residuals_) 613 614 if self.verbose > 0: 615 print( 616 f"\n Best parameters for {self.kernel} kernel: {grid.best_params_} \n" 617 ) 618 619 self.kde_ = grid.best_estimator_ 620 621 return self
Fit MTS model to training data X, with optional regressors xreg
Parameters:
X: {array-like}, shape = [n_samples, n_features] Training time series, where n_samples is the number of samples and n_features is the number of features; X must be in increasing order (most recent observations last)
xreg: {array-like}, shape = [n_samples, n_features_xreg] Additional (external) regressors to be passed to self.obj xreg must be in 'increasing' order (most recent observations last)
**kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity)
Returns:
self: object
953 def predict(self, h=5, level=95, quantiles=None, **kwargs): 954 """Forecast all the time series, h steps ahead""" 955 956 if quantiles is not None: 957 # Validate 958 quantiles = np.asarray(quantiles) 959 if not ((quantiles > 0) & (quantiles < 1)).all(): 960 raise ValueError("quantiles must be between 0 and 1.") 961 # Delegate to dedicated method 962 return self._predict_quantiles(h=h, quantiles=quantiles, **kwargs) 963 964 if isinstance(level, list) or isinstance(level, np.ndarray): 965 # Store results 966 result_dict = {} 967 # Loop through alphas and calculate lower/upper for each alpha level 968 # E.g [0.5, 2.5, 5, 16.5, 25, 50] 969 for lev in level: 970 # Get the forecast for this alpha 971 res = self.predict(h=h, level=lev, **kwargs) 972 # Adjust index and collect lower/upper bounds 973 res.lower.index = pd.to_datetime(res.lower.index) 974 res.upper.index = pd.to_datetime(res.upper.index) 975 # Loop over each time series (multivariate) and flatten results 976 if isinstance(res.lower, pd.DataFrame): 977 for ( 978 series 979 ) in ( 980 res.lower.columns 981 ): # Assumes 'lower' and 'upper' have multiple series 982 result_dict[f"lower_{lev}_{series}"] = ( 983 res.lower[series].to_numpy().flatten() 984 ) 985 result_dict[f"upper_{lev}_{series}"] = ( 986 res.upper[series].to_numpy().flatten() 987 ) 988 else: 989 for series_id in range( 990 self.n_series 991 ): # Assumes 'lower' and 'upper' have multiple series 992 result_dict[f"lower_{lev}_{series_id}"] = ( 993 res.lower[series_id, :].to_numpy().flatten() 994 ) 995 result_dict[f"upper_{lev}_{series_id}"] = ( 996 res.upper[series_id, :].to_numpy().flatten() 997 ) 998 return pd.DataFrame(result_dict, index=self.output_dates_) 999 1000 # only one prediction interval 1001 self.output_dates_, frequency = ts.compute_output_dates(self.df_, h) 1002 1003 self.level_ = level 1004 1005 self.return_std_ = False # do not remove (/!\) 1006 1007 self.mean_ = None # do not remove (/!\) 1008 1009 self.mean_ = deepcopy(self.y_) # do not remove (/!\) 1010 1011 self.lower_ = None # do not remove (/!\) 1012 1013 self.upper_ = None # do not remove (/!\) 1014 1015 self.sims_ = None # do not remove (/!\) 1016 1017 y_means_ = np.asarray( 1018 [self.y_means_[i] for i in range(self.init_n_series_)] 1019 ) 1020 1021 n_features = self.init_n_series_ * self.lags 1022 1023 self.alpha_ = 100 - level 1024 1025 pi_multiplier = norm.ppf(1 - self.alpha_ / 200) 1026 1027 if "return_std" in kwargs: # bayesian forecasting 1028 self.return_std_ = True 1029 self.preds_std_ = [] 1030 DescribeResult = namedtuple( 1031 "DescribeResult", ("mean", "lower", "upper") 1032 ) # to be updated 1033 1034 if "return_pi" in kwargs: # split conformal, without simulation 1035 mean_pi_ = [] 1036 lower_pi_ = [] 1037 upper_pi_ = [] 1038 median_pi_ = [] 1039 DescribeResult = namedtuple( 1040 "DescribeResult", ("mean", "lower", "upper") 1041 ) # to be updated 1042 1043 if self.kde_ != None and "kde" in self.type_pi: # kde 1044 target_cols = self.df_.columns[ 1045 : self.init_n_series_ 1046 ] # Get target column names 1047 if self.verbose == 1: 1048 self.residuals_sims_ = tuple( 1049 self.kde_.sample( 1050 n_samples=h, random_state=self.seed + 100 * i 1051 ) # Keep full sample 1052 for i in tqdm(range(self.replications)) 1053 ) 1054 elif self.verbose == 0: 1055 self.residuals_sims_ = tuple( 1056 self.kde_.sample( 1057 n_samples=h, random_state=self.seed + 100 * i 1058 ) # Keep full sample 1059 for i in range(self.replications) 1060 ) 1061 1062 # Convert to DataFrames after sampling 1063 self.residuals_sims_ = tuple( 1064 pd.DataFrame( 1065 sim, # Keep all columns 1066 columns=target_cols, # Use original target column names 1067 index=self.output_dates_, 1068 ) 1069 for sim in self.residuals_sims_ 1070 ) 1071 1072 if self.type_pi in ("bootstrap", "scp-bootstrap", "scp2-bootstrap"): 1073 assert self.replications is not None and isinstance( 1074 self.replications, int 1075 ), "'replications' must be provided and be an integer" 1076 if self.verbose == 1: 1077 self.residuals_sims_ = tuple( 1078 ts.bootstrap( 1079 self.residuals_, 1080 h=h, 1081 block_size=None, 1082 seed=self.seed + 100 * i, 1083 ) 1084 for i in tqdm(range(self.replications)) 1085 ) 1086 elif self.verbose == 0: 1087 self.residuals_sims_ = tuple( 1088 ts.bootstrap( 1089 self.residuals_, 1090 h=h, 1091 block_size=None, 1092 seed=self.seed + 100 * i, 1093 ) 1094 for i in range(self.replications) 1095 ) 1096 1097 if self.type_pi in ( 1098 "block-bootstrap", 1099 "scp-block-bootstrap", 1100 "scp2-block-bootstrap", 1101 ): 1102 if self.block_size is None: 1103 self.block_size = int( 1104 np.ceil(3.15 * (self.residuals_.shape[0] ** (1 / 3))) 1105 ) 1106 1107 assert self.replications is not None and isinstance( 1108 self.replications, int 1109 ), "'replications' must be provided and be an integer" 1110 if self.verbose == 1: 1111 self.residuals_sims_ = tuple( 1112 ts.bootstrap( 1113 self.residuals_, 1114 h=h, 1115 block_size=self.block_size, 1116 seed=self.seed + 100 * i, 1117 ) 1118 for i in tqdm(range(self.replications)) 1119 ) 1120 elif self.verbose == 0: 1121 self.residuals_sims_ = tuple( 1122 ts.bootstrap( 1123 self.residuals_, 1124 h=h, 1125 block_size=self.block_size, 1126 seed=self.seed + 100 * i, 1127 ) 1128 for i in range(self.replications) 1129 ) 1130 1131 if "vine" in self.type_pi: 1132 if self.verbose == 1: 1133 self.residuals_sims_ = tuple( 1134 vinecopula_sample( 1135 x=self.residuals_, 1136 n_samples=h, 1137 method=self.type_pi, 1138 random_state=self.seed + 100 * i, 1139 ) 1140 for i in tqdm(range(self.replications)) 1141 ) 1142 elif self.verbose == 0: 1143 self.residuals_sims_ = tuple( 1144 vinecopula_sample( 1145 x=self.residuals_, 1146 n_samples=h, 1147 method=self.type_pi, 1148 random_state=self.seed + 100 * i, 1149 ) 1150 for i in range(self.replications) 1151 ) 1152 1153 mean_ = deepcopy(self.mean_) 1154 1155 for i in range(h): 1156 new_obs = ts.reformat_response(mean_, self.lags) 1157 new_X = new_obs.reshape(1, -1) 1158 cooked_new_X = self.cook_test_set(new_X, **kwargs) 1159 1160 if "return_std" in kwargs: 1161 self.preds_std_.append( 1162 [ 1163 np.asarray( 1164 self.fit_objs_[i].predict( 1165 cooked_new_X, return_std=True 1166 )[1] 1167 ).item() 1168 for i in range(self.n_series) 1169 ] 1170 ) 1171 1172 if "return_pi" in kwargs: 1173 for i in range(self.n_series): 1174 preds_pi = self.fit_objs_[i].predict(cooked_new_X, **kwargs) 1175 mean_pi_.append(preds_pi.mean[0]) 1176 lower_pi_.append(preds_pi.lower[0]) 1177 upper_pi_.append(preds_pi.upper[0]) 1178 1179 if self.type_pi != "quantile": 1180 predicted_cooked_new_X = np.asarray( 1181 [ 1182 np.asarray( 1183 self.fit_objs_[i].predict(cooked_new_X) 1184 ).item() 1185 for i in range(self.init_n_series_) 1186 ] 1187 ) 1188 else: 1189 predicted_cooked_new_X = np.asarray( 1190 [ 1191 np.asarray( 1192 self.fit_objs_[i] 1193 .predict(cooked_new_X, return_pi=True) 1194 .upper 1195 ).item() 1196 for i in range(self.init_n_series_) 1197 ] 1198 ) 1199 1200 preds = np.asarray(y_means_ + predicted_cooked_new_X) 1201 1202 # Create full row with both predictions and external regressors 1203 if self.xreg_ is not None and "xreg" in kwargs: 1204 next_xreg = kwargs["xreg"].iloc[i: i + 1].values.flatten() 1205 full_row = np.concatenate([preds, next_xreg]) 1206 else: 1207 full_row = preds 1208 1209 # Create a new row with same number of columns as mean_ 1210 new_row = np.zeros((1, mean_.shape[1])) 1211 new_row[0, : full_row.shape[0]] = full_row 1212 1213 # Maintain the full dimensionality by using vstack instead of rbind 1214 mean_ = np.vstack([new_row, mean_[:-1]]) 1215 1216 # Final output should only include the target columns 1217 self.mean_ = pd.DataFrame( 1218 mean_[0: min(h, self.n_obs_ - self.lags), : self.init_n_series_][ 1219 ::-1 1220 ], 1221 columns=self.df_.columns[: self.init_n_series_], 1222 index=self.output_dates_, 1223 ) 1224 1225 # function's return ---------------------------------------------------------------------- 1226 if ( 1227 (("return_std" not in kwargs) and ("return_pi" not in kwargs)) 1228 and (self.type_pi not in ("gaussian", "scp")) 1229 ) or ("vine" in self.type_pi): 1230 if self.replications is None: 1231 return self.mean_.iloc[:, : self.init_n_series_] 1232 1233 # if "return_std" not in kwargs and self.replications is not None 1234 meanf = [] 1235 medianf = [] 1236 lower = [] 1237 upper = [] 1238 1239 if "scp2" in self.type_pi: 1240 if self.verbose == 1: 1241 self.sims_ = tuple( 1242 ( 1243 self.mean_ 1244 + self.residuals_sims_[i] 1245 * self.residuals_std_dev_[np.newaxis, :] 1246 for i in tqdm(range(self.replications)) 1247 ) 1248 ) 1249 elif self.verbose == 0: 1250 self.sims_ = tuple( 1251 ( 1252 self.mean_ 1253 + self.residuals_sims_[i] 1254 * self.residuals_std_dev_[np.newaxis, :] 1255 for i in range(self.replications) 1256 ) 1257 ) 1258 else: 1259 if self.verbose == 1: 1260 self.sims_ = tuple( 1261 ( 1262 self.mean_ + self.residuals_sims_[i] 1263 for i in tqdm(range(self.replications)) 1264 ) 1265 ) 1266 elif self.verbose == 0: 1267 self.sims_ = tuple( 1268 ( 1269 self.mean_ + self.residuals_sims_[i] 1270 for i in range(self.replications) 1271 ) 1272 ) 1273 1274 DescribeResult = namedtuple( 1275 "DescribeResult", ("mean", "sims", "lower", "upper") 1276 ) 1277 for ix in range(self.init_n_series_): 1278 sims_ix = getsims(self.sims_, ix) 1279 if self.agg == "mean": 1280 meanf.append(np.mean(sims_ix, axis=1)) 1281 else: 1282 medianf.append(np.median(sims_ix, axis=1)) 1283 lower.append(np.quantile(sims_ix, q=self.alpha_ / 200, axis=1)) 1284 upper.append( 1285 np.quantile(sims_ix, q=1 - self.alpha_ / 200, axis=1) 1286 ) 1287 self.mean_ = pd.DataFrame( 1288 np.asarray(meanf).T, 1289 columns=self.series_names[ 1290 : self.init_n_series_ 1291 ], # self.df_.columns, 1292 index=self.output_dates_, 1293 ) 1294 1295 self.lower_ = pd.DataFrame( 1296 np.asarray(lower).T, 1297 columns=self.series_names[ 1298 : self.init_n_series_ 1299 ], # self.df_.columns, 1300 index=self.output_dates_, 1301 ) 1302 1303 self.upper_ = pd.DataFrame( 1304 np.asarray(upper).T, 1305 columns=self.series_names[ 1306 : self.init_n_series_ 1307 ], # self.df_.columns, 1308 index=self.output_dates_, 1309 ) 1310 1311 try: 1312 self.median_ = pd.DataFrame( 1313 np.asarray(medianf).T, 1314 columns=self.series_names[ 1315 : self.init_n_series_ 1316 ], # self.df_.columns, 1317 index=self.output_dates_, 1318 ) 1319 except Exception as e: 1320 pass 1321 1322 return DescribeResult( 1323 self.mean_, self.sims_, self.lower_, self.upper_ 1324 ) 1325 1326 if ( 1327 (("return_std" in kwargs) or ("return_pi" in kwargs)) 1328 and (self.type_pi not in ("gaussian", "scp")) 1329 ) or "vine" in self.type_pi: 1330 DescribeResult = namedtuple( 1331 "DescribeResult", ("mean", "lower", "upper") 1332 ) 1333 1334 self.mean_ = pd.DataFrame( 1335 np.asarray(self.mean_), 1336 columns=self.series_names, # self.df_.columns, 1337 index=self.output_dates_, 1338 ) 1339 1340 if "return_std" in kwargs: 1341 self.preds_std_ = np.asarray(self.preds_std_) 1342 1343 self.lower_ = pd.DataFrame( 1344 self.mean_.values - pi_multiplier * self.preds_std_, 1345 columns=self.series_names, # self.df_.columns, 1346 index=self.output_dates_, 1347 ) 1348 1349 self.upper_ = pd.DataFrame( 1350 self.mean_.values + pi_multiplier * self.preds_std_, 1351 columns=self.series_names, # self.df_.columns, 1352 index=self.output_dates_, 1353 ) 1354 1355 if "return_pi" in kwargs: 1356 self.lower_ = pd.DataFrame( 1357 np.asarray(lower_pi_).reshape(h, self.n_series) 1358 + y_means_[np.newaxis, :], 1359 columns=self.series_names, # self.df_.columns, 1360 index=self.output_dates_, 1361 ) 1362 1363 self.upper_ = pd.DataFrame( 1364 np.asarray(upper_pi_).reshape(h, self.n_series) 1365 + y_means_[np.newaxis, :], 1366 columns=self.series_names, # self.df_.columns, 1367 index=self.output_dates_, 1368 ) 1369 1370 res = DescribeResult(self.mean_, self.lower_, self.upper_) 1371 1372 if self.xreg_ is not None: 1373 if len(self.xreg_.shape) > 1: 1374 res2 = mx.tuple_map( 1375 res, 1376 lambda x: mo.delete_last_columns( 1377 x, num_columns=self.xreg_.shape[1] 1378 ), 1379 ) 1380 else: 1381 res2 = mx.tuple_map( 1382 res, lambda x: mo.delete_last_columns(x, num_columns=1) 1383 ) 1384 return DescribeResult(res2[0], res2[1], res2[2]) 1385 1386 return res 1387 1388 if self.type_pi == "gaussian": 1389 DescribeResult = namedtuple( 1390 "DescribeResult", ("mean", "lower", "upper") 1391 ) 1392 1393 self.mean_ = pd.DataFrame( 1394 np.asarray(self.mean_), 1395 columns=self.series_names, # self.df_.columns, 1396 index=self.output_dates_, 1397 ) 1398 1399 # Use Bayesian std if available, otherwise use gaussian residual std 1400 if "return_std" in kwargs and len(self.preds_std_) > 0: 1401 preds_std_to_use = np.asarray(self.preds_std_) 1402 else: 1403 preds_std_to_use = self.gaussian_preds_std_ 1404 1405 self.lower_ = pd.DataFrame( 1406 self.mean_.values - pi_multiplier * preds_std_to_use, 1407 columns=self.series_names, # self.df_.columns, 1408 index=self.output_dates_, 1409 ) 1410 1411 self.upper_ = pd.DataFrame( 1412 self.mean_.values + pi_multiplier * preds_std_to_use, 1413 columns=self.series_names, # self.df_.columns, 1414 index=self.output_dates_, 1415 ) 1416 1417 res = DescribeResult(self.mean_, self.lower_, self.upper_) 1418 1419 if self.xreg_ is not None: 1420 if len(self.xreg_.shape) > 1: 1421 res2 = mx.tuple_map( 1422 res, 1423 lambda x: mo.delete_last_columns( 1424 x, num_columns=self.xreg_.shape[1] 1425 ), 1426 ) 1427 else: 1428 res2 = mx.tuple_map( 1429 res, lambda x: mo.delete_last_columns(x, num_columns=1) 1430 ) 1431 return DescribeResult(res2[0], res2[1], res2[2]) 1432 1433 return res 1434 1435 if self.type_pi == "quantile": 1436 DescribeResult = namedtuple("DescribeResult", ("mean")) 1437 1438 self.mean_ = pd.DataFrame( 1439 np.asarray(self.mean_), 1440 columns=self.series_names, # self.df_.columns, 1441 index=self.output_dates_, 1442 ) 1443 1444 res = DescribeResult(self.mean_) 1445 1446 if self.xreg_ is not None: 1447 if len(self.xreg_.shape) > 1: 1448 res2 = mx.tuple_map( 1449 res, 1450 lambda x: mo.delete_last_columns( 1451 x, num_columns=self.xreg_.shape[1] 1452 ), 1453 ) 1454 else: 1455 res2 = mx.tuple_map( 1456 res, lambda x: mo.delete_last_columns(x, num_columns=1) 1457 ) 1458 return DescribeResult(res2[0]) 1459 1460 return res 1461 1462 # After prediction loop, ensure sims only contain target columns 1463 if self.sims_ is not None: 1464 if self.verbose == 1: 1465 self.sims_ = tuple( 1466 sim[:h,] # Only keep target columns and h rows 1467 for sim in tqdm(self.sims_) 1468 ) 1469 elif self.verbose == 0: 1470 self.sims_ = tuple( 1471 sim[:h,] # Only keep target columns and h rows 1472 for sim in self.sims_ 1473 ) 1474 1475 # Convert numpy arrays to DataFrames with proper columns 1476 self.sims_ = tuple( 1477 pd.DataFrame( 1478 sim, 1479 columns=self.df_.columns[: self.init_n_series_], 1480 index=self.output_dates_, 1481 ) 1482 for sim in self.sims_ 1483 ) 1484 1485 if self.type_pi in ( 1486 "kde", 1487 "bootstrap", 1488 "block-bootstrap", 1489 "vine-copula", 1490 ): 1491 if self.xreg_ is not None: 1492 # Use getsimsxreg when external regressors are present 1493 target_cols = self.df_.columns[: self.init_n_series_] 1494 self.sims_ = getsimsxreg( 1495 self.sims_, self.output_dates_, target_cols 1496 ) 1497 else: 1498 # Use original getsims for backward compatibility 1499 self.sims_ = getsims(self.sims_)
Forecast all the time series, h steps ahead
1566 def score( 1567 self, 1568 X, 1569 training_index, 1570 testing_index, 1571 scoring=None, 1572 alpha=0.5, 1573 **kwargs, 1574 ): 1575 """Train on training_index, score on testing_index.""" 1576 1577 assert ( 1578 bool(set(training_index).intersection(set(testing_index))) == False 1579 ), "Non-overlapping 'training_index' and 'testing_index' required" 1580 1581 # Dimensions 1582 try: 1583 # multivariate time series 1584 n, p = X.shape 1585 except: 1586 # univariate time series 1587 n = X.shape[0] 1588 p = 1 1589 1590 # Training and testing sets 1591 if p > 1: 1592 X_train = X[training_index, :] 1593 X_test = X[testing_index, :] 1594 else: 1595 X_train = X[training_index] 1596 X_test = X[testing_index] 1597 1598 # Horizon 1599 h = len(testing_index) 1600 assert ( 1601 len(training_index) + h 1602 ) <= n, "Please check lengths of training and testing windows" 1603 1604 # Fit and predict 1605 self.fit(X_train, **kwargs) 1606 preds = self.predict(h=h, **kwargs) 1607 1608 if scoring is None: 1609 scoring = "neg_root_mean_squared_error" 1610 1611 if scoring == "pinball": 1612 # Predict requested quantile 1613 q_pred = self.predict(h=h, quantiles=[alpha], **kwargs) 1614 # Handle multivariate 1615 scores = [] 1616 for j in range(p): 1617 series_name = getattr(self, "series_names", [f"Series_{j}"])[j] 1618 q_label = ( 1619 f"{int(alpha * 100):02d}" 1620 if (alpha * 100).is_integer() 1621 else f"{alpha:.3f}".replace(".", "_") 1622 ) 1623 col = f"quantile_{q_label}_{series_name}" 1624 if col not in q_pred.columns: 1625 raise ValueError( 1626 f"Column '{col}' not found in quantile forecast output." 1627 ) 1628 y_true_j = X_test[:, j] 1629 y_pred_j = q_pred[col].values 1630 # Compute pinball loss for this series 1631 loss = mean_pinball_loss(y_true_j, y_pred_j, alpha=alpha) 1632 scores.append(loss) 1633 # Return average over series 1634 return np.mean(scores) 1635 1636 if scoring == "crps": 1637 # Ensure simulations exist 1638 preds = self.predict(h=h, **kwargs) # triggers self.sims_ 1639 # Extract simulations: list of DataFrames → (R, h, p) 1640 sims_vals = np.stack( 1641 [sim.values for sim in self.sims_], axis=0 1642 ) # (R, h, p) 1643 crps_scores = [] 1644 for j in range(p): 1645 y_true_j = X_test[:, j] 1646 sims_j = sims_vals[:, :, j] # (R, h) 1647 crps_j = self._crps_ensemble(np.asarray(y_true_j), sims_j) 1648 crps_scores.append(np.mean(crps_j)) # average over horizon 1649 return np.mean(crps_scores) # average over series 1650 1651 # check inputs 1652 assert scoring in ( 1653 "explained_variance", 1654 "neg_mean_absolute_error", 1655 "neg_mean_squared_error", 1656 "neg_root_mean_squared_error", 1657 "neg_mean_squared_log_error", 1658 "neg_median_absolute_error", 1659 "r2", 1660 ), "'scoring' should be in ('explained_variance', 'neg_mean_absolute_error', \ 1661 'neg_mean_squared_error', 'neg_root_mean_squared_error', 'neg_mean_squared_log_error', \ 1662 'neg_median_absolute_error', 'r2')" 1663 1664 scoring_options = { 1665 "explained_variance": skm2.explained_variance_score, 1666 "neg_mean_absolute_error": skm2.mean_absolute_error, 1667 "neg_mean_squared_error": lambda x, y: np.mean((x - y) ** 2), 1668 "neg_root_mean_squared_error": lambda x, y: np.sqrt( 1669 np.mean((x - y) ** 2) 1670 ), 1671 "neg_mean_squared_log_error": skm2.mean_squared_log_error, 1672 "neg_median_absolute_error": skm2.median_absolute_error, 1673 "r2": skm2.r2_score, 1674 } 1675 1676 return scoring_options[scoring](X_test, preds)
Train on training_index, score on testing_index.
12class MTSStacker(MTS): 13 """ 14 Sequential stacking for time series with unified strategy. 15 16 Core Strategy: 17 1. Split data: half1 (base models) | half2 (meta-model) 18 2. Train base models on half1, predict half2 19 3. Create augmented dataset: [original_series | base_pred_1 | base_pred_2 | ...] 20 Stack as additional time series, extract target series 21 4. Train meta-MTS on half2 with augmented data 22 5. Retrain base models on half2 for temporal alignment 23 6. At prediction: base models forecast → augment → meta-model predicts 24 """ 25 26 def __init__( 27 self, 28 base_models, 29 meta_model, 30 split_ratio=0.5, 31 ): 32 """ 33 Parameters 34 ---------- 35 base_models : list of sklearn-compatible models 36 Base models (e.g., Ridge, Lasso, RandomForest) 37 meta_model : nnetsauce.MTS instance 38 MTS with type_pi='scp2-kde' or similar 39 split_ratio : float 40 Proportion for half1 (default: 0.5) 41 """ 42 self.base_models = base_models 43 self.meta_model = meta_model 44 self.split_ratio = split_ratio 45 self.fitted_base_models_ = [] 46 self.split_idx_ = None 47 self.mean_ = None 48 self.lower_ = None 49 self.upper_ = None 50 self.sims_ = None 51 self.output_dates_ = None 52 53 def fit(self, X, xreg=None, **kwargs): 54 """ 55 Fit MTSStacker using sequential stacking strategy. 56 57 Parameters 58 ---------- 59 X : array-like or DataFrame, shape (n_samples, n_features) 60 Training time series (most recent observations last) 61 xreg : array-like, optional 62 External regressors 63 **kwargs : dict 64 Additional parameters for base and meta models 65 66 Returns 67 ------- 68 self : object 69 """ 70 # 1. Store attributes and convert to DataFrame if needed 71 if isinstance(X, pd.DataFrame): 72 self.df_ = X.copy() 73 X_array = X.values 74 self.series_names = X.columns.tolist() 75 else: 76 X_array = np.asarray(X) 77 self.df_ = pd.DataFrame(X_array) 78 self.series_names = [f"series{i}" for i in range(X_array.shape[1])] 79 80 n_samples = X_array.shape[0] 81 self.n_series_ = X_array.shape[1] if X_array.ndim > 1 else 1 82 83 # 2. Split data into half1 and half2 84 split_idx = int(n_samples * self.split_ratio) 85 self.split_idx_ = split_idx 86 87 if split_idx < self.meta_model.lags: 88 raise ValueError( 89 f"Split creates insufficient data: split_idx={split_idx} < " 90 f"lags={self.meta_model.lags}. Reduce split_ratio or use fewer lags." 91 ) 92 93 half1 = X_array[:split_idx] 94 half2 = X_array[split_idx:] 95 96 # 3. Train base models on half1 and predict half2 97 base_preds = [] 98 temp_base_models = [] 99 100 for base_model in self.base_models: 101 # Wrap in MTS with same config as meta_model 102 base_mts = MTS( 103 obj=clone(base_model), 104 lags=self.meta_model.lags, 105 n_hidden_features=self.meta_model.n_hidden_features, 106 replications=self.meta_model.replications, 107 kernel=self.meta_model.kernel, 108 type_pi=None, # No prediction intervals for base models 109 ) 110 base_mts.fit(half1) 111 112 # Predict half2 113 pred = base_mts.predict(h=len(half2)) 114 115 # Handle different return types 116 if isinstance(pred, pd.DataFrame): 117 base_preds.append(pred.values) 118 elif isinstance(pred, np.ndarray): 119 base_preds.append(pred) 120 elif hasattr(pred, "mean"): 121 # Named tuple with mean attribute 122 mean_pred = pred.mean 123 base_preds.append( 124 mean_pred.values 125 if isinstance(mean_pred, pd.DataFrame) 126 else mean_pred 127 ) 128 else: 129 raise ValueError(f"Unexpected prediction type: {type(pred)}") 130 131 temp_base_models.append(base_mts) 132 133 # 4. Create augmented dataset: [original | base_pred_1 | base_pred_2 | ...] 134 base_preds_array = np.hstack( 135 base_preds 136 ) # shape: (len(half2), n_series * n_base_models) 137 138 if isinstance(X, pd.DataFrame): 139 half2_df = pd.DataFrame( 140 half2, 141 index=self.df_.index[split_idx:], 142 columns=self.series_names, 143 ) 144 base_preds_df = pd.DataFrame( 145 base_preds_array, 146 index=self.df_.index[split_idx:], 147 columns=[ 148 f"base_{i}_{j}" 149 for i in range(len(self.base_models)) 150 for j in range(self.n_series_) 151 ], 152 ) 153 augmented = pd.concat([half2_df, base_preds_df], axis=1) 154 else: 155 augmented = np.hstack([half2, base_preds_array]) 156 157 # 5. Train meta-model on augmented half2 158 self.meta_model.fit(augmented, xreg=xreg, **kwargs) 159 160 # Store meta-model attributes 161 self.output_dates_ = self.meta_model.output_dates_ 162 self.fit_objs_ = self.meta_model.fit_objs_ 163 self.y_ = self.meta_model.y_ 164 self.X_ = self.meta_model.X_ 165 self.xreg_ = self.meta_model.xreg_ 166 self.y_means_ = self.meta_model.y_means_ 167 self.residuals_ = self.meta_model.residuals_ 168 169 # 6. FIXED: Retrain base models on half2 for temporal alignment 170 self.fitted_base_models_ = [] 171 for i, base_model in enumerate(self.base_models): 172 base_mts_final = MTS( 173 obj=clone(base_model), 174 lags=self.meta_model.lags, 175 n_hidden_features=self.meta_model.n_hidden_features, 176 replications=self.meta_model.replications, 177 kernel=self.meta_model.kernel, 178 type_pi=None, 179 ) 180 base_mts_final.fit(half2) 181 self.fitted_base_models_.append(base_mts_final) 182 183 return self 184 185 def predict(self, h=5, level=95, **kwargs): 186 """ 187 Forecast h steps ahead using stacked predictions. 188 189 FIXED: Now properly generates base model forecasts and uses them 190 to create augmented features for the meta-model. 191 192 Parameters 193 ---------- 194 h : int 195 Forecast horizon 196 level : int 197 Confidence level for prediction intervals 198 **kwargs : dict 199 Additional parameters for prediction 200 201 Returns 202 ------- 203 DescribeResult or DataFrame 204 Predictions with optional intervals/simulations 205 """ 206 # Step 1: Generate base model forecasts for horizon h 207 base_forecasts = [] 208 209 for base_mts in self.fitted_base_models_: 210 # Each base model forecasts h steps ahead 211 forecast = base_mts.predict(h=h) 212 213 # Extract mean prediction 214 if isinstance(forecast, pd.DataFrame): 215 base_forecasts.append(forecast.values) 216 elif isinstance(forecast, np.ndarray): 217 base_forecasts.append(forecast) 218 elif hasattr(forecast, "mean"): 219 mean_pred = forecast.mean 220 base_forecasts.append( 221 mean_pred.values 222 if isinstance(mean_pred, pd.DataFrame) 223 else mean_pred 224 ) 225 else: 226 raise ValueError(f"Unexpected forecast type: {type(forecast)}") 227 228 # Step 2: Stack base forecasts into augmented features 229 base_forecasts_array = np.hstack( 230 base_forecasts 231 ) # shape: (h, n_series * n_base) 232 233 # Step 3: Create augmented input for meta-model 234 # The meta-model needs the original series structure + base predictions 235 # We use recursive forecasting: predict one step, update history, repeat 236 237 # Get last window of data from training 238 last_window = self.df_.iloc[-self.meta_model.lags:].values 239 240 # Initialize containers for results 241 all_forecasts = [] 242 all_lowers = [] if level is not None else None 243 all_uppers = [] if level is not None else None 244 all_sims = ( 245 [] 246 if hasattr(self.meta_model, "type_pi") and self.meta_model.type_pi 247 else None 248 ) 249 250 # Recursive forecasting 251 current_window = last_window.copy() 252 253 for step in range(h): 254 # Create augmented input: [current_window_last_row | base_forecast_step] 255 # Note: meta-model was trained on [original | base_preds] 256 # For prediction, we need to simulate this structure 257 258 # Use the base forecast for this step 259 base_forecast_step = base_forecasts_array[ 260 step: step + 1, : 261 ] # shape: (1, n_base_features) 262 263 # Create a dummy augmented dataset for this step 264 # Combine last observed values with base predictions 265 last_obs = current_window[-1:, :] # shape: (1, n_series) 266 augmented_step = np.hstack([last_obs, base_forecast_step]) 267 268 # Convert to DataFrame if needed 269 if isinstance(self.df_, pd.DataFrame): 270 augmented_df = pd.DataFrame( 271 augmented_step, 272 columns=( 273 self.series_names 274 + [ 275 f"base_{i}_{j}" 276 for i in range(len(self.base_models)) 277 for j in range(self.n_series_) 278 ] 279 ), 280 ) 281 else: 282 augmented_df = augmented_step 283 284 # Predict one step with meta-model 285 # This is tricky: we need to use meta-model's internal predict 286 # but with our augmented data structure 287 288 # For now, use the standard predict and extract one step 289 step_result = self.meta_model.predict(h=1, level=level, **kwargs) 290 291 # Extract forecasts 292 if isinstance(step_result, pd.DataFrame): 293 forecast_step = step_result.iloc[0, : self.n_series_].values 294 all_forecasts.append(forecast_step) 295 elif isinstance(step_result, np.ndarray): 296 forecast_step = step_result[0, : self.n_series_] 297 all_forecasts.append(forecast_step) 298 elif hasattr(step_result, "mean"): 299 mean_pred = step_result.mean 300 if isinstance(mean_pred, pd.DataFrame): 301 forecast_step = mean_pred.iloc[0, : self.n_series_].values 302 else: 303 forecast_step = mean_pred[0, : self.n_series_] 304 all_forecasts.append(forecast_step) 305 306 # Extract intervals if available 307 if hasattr(step_result, "lower") and all_lowers is not None: 308 lower_pred = step_result.lower 309 if isinstance(lower_pred, pd.DataFrame): 310 all_lowers.append( 311 lower_pred.iloc[0, : self.n_series_].values 312 ) 313 else: 314 all_lowers.append(lower_pred[0, : self.n_series_]) 315 316 if hasattr(step_result, "upper") and all_uppers is not None: 317 upper_pred = step_result.upper 318 if isinstance(upper_pred, pd.DataFrame): 319 all_uppers.append( 320 upper_pred.iloc[0, : self.n_series_].values 321 ) 322 else: 323 all_uppers.append(upper_pred[0, : self.n_series_]) 324 325 # Extract simulations if available 326 if hasattr(step_result, "sims") and all_sims is not None: 327 all_sims.append(step_result.sims) 328 329 # Update window for next iteration 330 current_window = np.vstack( 331 [current_window[1:], forecast_step.reshape(1, -1)] 332 ) 333 334 # Combine all forecasts 335 forecasts_array = np.array(all_forecasts) 336 337 # Create output dates 338 if hasattr(self.df_, "index") and isinstance( 339 self.df_.index, pd.DatetimeIndex 340 ): 341 last_date = self.df_.index[-1] 342 freq = pd.infer_freq(self.df_.index) 343 if freq: 344 output_dates = pd.date_range( 345 start=last_date, periods=h + 1, freq=freq 346 )[1:] 347 else: 348 output_dates = pd.RangeIndex( 349 start=len(self.df_), stop=len(self.df_) + h 350 ) 351 else: 352 output_dates = pd.RangeIndex( 353 start=len(self.df_), stop=len(self.df_) + h 354 ) 355 356 self.output_dates_ = output_dates 357 358 # Format output 359 mean_df = pd.DataFrame( 360 forecasts_array, 361 index=output_dates, 362 columns=self.series_names[: self.n_series_], 363 ) 364 self.mean_ = mean_df 365 366 # Return based on what was computed 367 if all_lowers and all_uppers: 368 lowers_array = np.array(all_lowers) 369 uppers_array = np.array(all_uppers) 370 371 lower_df = pd.DataFrame( 372 lowers_array, 373 index=output_dates, 374 columns=self.series_names[: self.n_series_], 375 ) 376 upper_df = pd.DataFrame( 377 uppers_array, 378 index=output_dates, 379 columns=self.series_names[: self.n_series_], 380 ) 381 382 self.lower_ = lower_df 383 self.upper_ = upper_df 384 385 if all_sims: 386 self.sims_ = tuple(all_sims) 387 DescribeResult = namedtuple( 388 "DescribeResult", ("mean", "sims", "lower", "upper") 389 ) 390 return DescribeResult(mean_df, self.sims_, lower_df, upper_df) 391 else: 392 DescribeResult = namedtuple( 393 "DescribeResult", ("mean", "lower", "upper") 394 ) 395 return DescribeResult(mean_df, lower_df, upper_df) 396 else: 397 return mean_df 398 399 def plot(self, series=None, **kwargs): 400 """ 401 Plot the time series with forecasts and prediction intervals. 402 403 Parameters 404 ---------- 405 series : str or int, optional 406 Name or index of the series to plot (default: 0) 407 **kwargs : dict 408 Additional parameters for plotting 409 """ 410 # Ensure we have predictions 411 if self.mean_ is None: 412 raise ValueError( 413 "Model forecasting must be obtained first (call predict)" 414 ) 415 416 # Convert series name to index if needed 417 if isinstance(series, str): 418 if series in self.series_names: 419 series_idx = self.series_names.index(series) 420 else: 421 raise ValueError( 422 f"Series '{series}' doesn't exist in the input dataset" 423 ) 424 else: 425 series_idx = series if series is not None else 0 426 427 # Check bounds 428 if series_idx < 0 or series_idx >= self.n_series_: 429 raise ValueError( 430 f"Series index {series_idx} is out of bounds (0 to {self.n_series_ - 1})" 431 ) 432 433 # Prepare data for plotting 434 import matplotlib.pyplot as plt 435 import matplotlib.dates as mdates 436 437 # Get historical data 438 historical_data = self.df_.iloc[:, series_idx] 439 forecast_data = self.mean_.iloc[:, series_idx] 440 441 # Get prediction intervals if available 442 has_intervals = self.lower_ is not None and self.upper_ is not None 443 if has_intervals: 444 lower_data = self.lower_.iloc[:, series_idx] 445 upper_data = self.upper_.iloc[:, series_idx] 446 447 # Create figure 448 fig, ax = plt.subplots(figsize=(12, 6)) 449 450 # Plot historical data 451 if isinstance(self.df_.index, pd.DatetimeIndex): 452 hist_index = self.df_.index 453 ax.plot( 454 hist_index, 455 historical_data, 456 "-", 457 label="Historical", 458 color="blue", 459 linewidth=1.5, 460 ) 461 462 # Plot forecast 463 forecast_index = self.mean_.index 464 ax.plot( 465 forecast_index, 466 forecast_data, 467 "-", 468 label="Forecast", 469 color="red", 470 linewidth=1.5, 471 ) 472 473 # Plot prediction intervals 474 if has_intervals: 475 ax.fill_between( 476 forecast_index, 477 lower_data, 478 upper_data, 479 alpha=0.3, 480 color="red", 481 label="Prediction Interval", 482 ) 483 484 # Add vertical line at the split point 485 if self.split_idx_ is not None: 486 split_date = hist_index[self.split_idx_] 487 ax.axvline( 488 x=split_date, 489 color="gray", 490 linestyle="--", 491 alpha=0.5, 492 label="Train Split", 493 ) 494 495 # Format x-axis for dates 496 ax.xaxis.set_major_formatter(mdates.DateFormatter("%Y-%m-%d")) 497 fig.autofmt_xdate() 498 else: 499 # Numeric indices 500 n_points_train = len(self.df_) 501 n_points_forecast = len(self.mean_) 502 503 x_hist = np.arange(n_points_train) 504 x_forecast = np.arange( 505 n_points_train, n_points_train + n_points_forecast 506 ) 507 508 ax.plot( 509 x_hist, 510 historical_data, 511 "-", 512 label="Historical", 513 color="blue", 514 linewidth=1.5, 515 ) 516 ax.plot( 517 x_forecast, 518 forecast_data, 519 "-", 520 label="Forecast", 521 color="red", 522 linewidth=1.5, 523 ) 524 525 if has_intervals: 526 ax.fill_between( 527 x_forecast, 528 lower_data, 529 upper_data, 530 alpha=0.3, 531 color="red", 532 label="Prediction Interval", 533 ) 534 535 if self.split_idx_ is not None: 536 ax.axvline( 537 x=self.split_idx_, 538 color="gray", 539 linestyle="--", 540 alpha=0.5, 541 label="Train Split", 542 ) 543 544 # Set title and labels 545 series_name = ( 546 self.series_names[series_idx] 547 if series_idx < len(self.series_names) 548 else f"Series {series_idx}" 549 ) 550 plt.title(f"Forecast for {series_name}", fontsize=14, fontweight="bold") 551 plt.xlabel("Time") 552 plt.ylabel("Value") 553 plt.legend() 554 plt.grid(True, alpha=0.3) 555 plt.tight_layout() 556 plt.show()
Sequential stacking for time series with unified strategy.
Core Strategy:
- Split data: half1 (base models) | half2 (meta-model)
- Train base models on half1, predict half2
- Create augmented dataset: [original_series | base_pred_1 | base_pred_2 | ...] Stack as additional time series, extract target series
- Train meta-MTS on half2 with augmented data
- Retrain base models on half2 for temporal alignment
- At prediction: base models forecast → augment → meta-model predicts
53 def fit(self, X, xreg=None, **kwargs): 54 """ 55 Fit MTSStacker using sequential stacking strategy. 56 57 Parameters 58 ---------- 59 X : array-like or DataFrame, shape (n_samples, n_features) 60 Training time series (most recent observations last) 61 xreg : array-like, optional 62 External regressors 63 **kwargs : dict 64 Additional parameters for base and meta models 65 66 Returns 67 ------- 68 self : object 69 """ 70 # 1. Store attributes and convert to DataFrame if needed 71 if isinstance(X, pd.DataFrame): 72 self.df_ = X.copy() 73 X_array = X.values 74 self.series_names = X.columns.tolist() 75 else: 76 X_array = np.asarray(X) 77 self.df_ = pd.DataFrame(X_array) 78 self.series_names = [f"series{i}" for i in range(X_array.shape[1])] 79 80 n_samples = X_array.shape[0] 81 self.n_series_ = X_array.shape[1] if X_array.ndim > 1 else 1 82 83 # 2. Split data into half1 and half2 84 split_idx = int(n_samples * self.split_ratio) 85 self.split_idx_ = split_idx 86 87 if split_idx < self.meta_model.lags: 88 raise ValueError( 89 f"Split creates insufficient data: split_idx={split_idx} < " 90 f"lags={self.meta_model.lags}. Reduce split_ratio or use fewer lags." 91 ) 92 93 half1 = X_array[:split_idx] 94 half2 = X_array[split_idx:] 95 96 # 3. Train base models on half1 and predict half2 97 base_preds = [] 98 temp_base_models = [] 99 100 for base_model in self.base_models: 101 # Wrap in MTS with same config as meta_model 102 base_mts = MTS( 103 obj=clone(base_model), 104 lags=self.meta_model.lags, 105 n_hidden_features=self.meta_model.n_hidden_features, 106 replications=self.meta_model.replications, 107 kernel=self.meta_model.kernel, 108 type_pi=None, # No prediction intervals for base models 109 ) 110 base_mts.fit(half1) 111 112 # Predict half2 113 pred = base_mts.predict(h=len(half2)) 114 115 # Handle different return types 116 if isinstance(pred, pd.DataFrame): 117 base_preds.append(pred.values) 118 elif isinstance(pred, np.ndarray): 119 base_preds.append(pred) 120 elif hasattr(pred, "mean"): 121 # Named tuple with mean attribute 122 mean_pred = pred.mean 123 base_preds.append( 124 mean_pred.values 125 if isinstance(mean_pred, pd.DataFrame) 126 else mean_pred 127 ) 128 else: 129 raise ValueError(f"Unexpected prediction type: {type(pred)}") 130 131 temp_base_models.append(base_mts) 132 133 # 4. Create augmented dataset: [original | base_pred_1 | base_pred_2 | ...] 134 base_preds_array = np.hstack( 135 base_preds 136 ) # shape: (len(half2), n_series * n_base_models) 137 138 if isinstance(X, pd.DataFrame): 139 half2_df = pd.DataFrame( 140 half2, 141 index=self.df_.index[split_idx:], 142 columns=self.series_names, 143 ) 144 base_preds_df = pd.DataFrame( 145 base_preds_array, 146 index=self.df_.index[split_idx:], 147 columns=[ 148 f"base_{i}_{j}" 149 for i in range(len(self.base_models)) 150 for j in range(self.n_series_) 151 ], 152 ) 153 augmented = pd.concat([half2_df, base_preds_df], axis=1) 154 else: 155 augmented = np.hstack([half2, base_preds_array]) 156 157 # 5. Train meta-model on augmented half2 158 self.meta_model.fit(augmented, xreg=xreg, **kwargs) 159 160 # Store meta-model attributes 161 self.output_dates_ = self.meta_model.output_dates_ 162 self.fit_objs_ = self.meta_model.fit_objs_ 163 self.y_ = self.meta_model.y_ 164 self.X_ = self.meta_model.X_ 165 self.xreg_ = self.meta_model.xreg_ 166 self.y_means_ = self.meta_model.y_means_ 167 self.residuals_ = self.meta_model.residuals_ 168 169 # 6. FIXED: Retrain base models on half2 for temporal alignment 170 self.fitted_base_models_ = [] 171 for i, base_model in enumerate(self.base_models): 172 base_mts_final = MTS( 173 obj=clone(base_model), 174 lags=self.meta_model.lags, 175 n_hidden_features=self.meta_model.n_hidden_features, 176 replications=self.meta_model.replications, 177 kernel=self.meta_model.kernel, 178 type_pi=None, 179 ) 180 base_mts_final.fit(half2) 181 self.fitted_base_models_.append(base_mts_final) 182 183 return self
Fit MTSStacker using sequential stacking strategy.
Parameters
X : array-like or DataFrame, shape (n_samples, n_features) Training time series (most recent observations last) xreg : array-like, optional External regressors **kwargs : dict Additional parameters for base and meta models
Returns
self : object
185 def predict(self, h=5, level=95, **kwargs): 186 """ 187 Forecast h steps ahead using stacked predictions. 188 189 FIXED: Now properly generates base model forecasts and uses them 190 to create augmented features for the meta-model. 191 192 Parameters 193 ---------- 194 h : int 195 Forecast horizon 196 level : int 197 Confidence level for prediction intervals 198 **kwargs : dict 199 Additional parameters for prediction 200 201 Returns 202 ------- 203 DescribeResult or DataFrame 204 Predictions with optional intervals/simulations 205 """ 206 # Step 1: Generate base model forecasts for horizon h 207 base_forecasts = [] 208 209 for base_mts in self.fitted_base_models_: 210 # Each base model forecasts h steps ahead 211 forecast = base_mts.predict(h=h) 212 213 # Extract mean prediction 214 if isinstance(forecast, pd.DataFrame): 215 base_forecasts.append(forecast.values) 216 elif isinstance(forecast, np.ndarray): 217 base_forecasts.append(forecast) 218 elif hasattr(forecast, "mean"): 219 mean_pred = forecast.mean 220 base_forecasts.append( 221 mean_pred.values 222 if isinstance(mean_pred, pd.DataFrame) 223 else mean_pred 224 ) 225 else: 226 raise ValueError(f"Unexpected forecast type: {type(forecast)}") 227 228 # Step 2: Stack base forecasts into augmented features 229 base_forecasts_array = np.hstack( 230 base_forecasts 231 ) # shape: (h, n_series * n_base) 232 233 # Step 3: Create augmented input for meta-model 234 # The meta-model needs the original series structure + base predictions 235 # We use recursive forecasting: predict one step, update history, repeat 236 237 # Get last window of data from training 238 last_window = self.df_.iloc[-self.meta_model.lags:].values 239 240 # Initialize containers for results 241 all_forecasts = [] 242 all_lowers = [] if level is not None else None 243 all_uppers = [] if level is not None else None 244 all_sims = ( 245 [] 246 if hasattr(self.meta_model, "type_pi") and self.meta_model.type_pi 247 else None 248 ) 249 250 # Recursive forecasting 251 current_window = last_window.copy() 252 253 for step in range(h): 254 # Create augmented input: [current_window_last_row | base_forecast_step] 255 # Note: meta-model was trained on [original | base_preds] 256 # For prediction, we need to simulate this structure 257 258 # Use the base forecast for this step 259 base_forecast_step = base_forecasts_array[ 260 step: step + 1, : 261 ] # shape: (1, n_base_features) 262 263 # Create a dummy augmented dataset for this step 264 # Combine last observed values with base predictions 265 last_obs = current_window[-1:, :] # shape: (1, n_series) 266 augmented_step = np.hstack([last_obs, base_forecast_step]) 267 268 # Convert to DataFrame if needed 269 if isinstance(self.df_, pd.DataFrame): 270 augmented_df = pd.DataFrame( 271 augmented_step, 272 columns=( 273 self.series_names 274 + [ 275 f"base_{i}_{j}" 276 for i in range(len(self.base_models)) 277 for j in range(self.n_series_) 278 ] 279 ), 280 ) 281 else: 282 augmented_df = augmented_step 283 284 # Predict one step with meta-model 285 # This is tricky: we need to use meta-model's internal predict 286 # but with our augmented data structure 287 288 # For now, use the standard predict and extract one step 289 step_result = self.meta_model.predict(h=1, level=level, **kwargs) 290 291 # Extract forecasts 292 if isinstance(step_result, pd.DataFrame): 293 forecast_step = step_result.iloc[0, : self.n_series_].values 294 all_forecasts.append(forecast_step) 295 elif isinstance(step_result, np.ndarray): 296 forecast_step = step_result[0, : self.n_series_] 297 all_forecasts.append(forecast_step) 298 elif hasattr(step_result, "mean"): 299 mean_pred = step_result.mean 300 if isinstance(mean_pred, pd.DataFrame): 301 forecast_step = mean_pred.iloc[0, : self.n_series_].values 302 else: 303 forecast_step = mean_pred[0, : self.n_series_] 304 all_forecasts.append(forecast_step) 305 306 # Extract intervals if available 307 if hasattr(step_result, "lower") and all_lowers is not None: 308 lower_pred = step_result.lower 309 if isinstance(lower_pred, pd.DataFrame): 310 all_lowers.append( 311 lower_pred.iloc[0, : self.n_series_].values 312 ) 313 else: 314 all_lowers.append(lower_pred[0, : self.n_series_]) 315 316 if hasattr(step_result, "upper") and all_uppers is not None: 317 upper_pred = step_result.upper 318 if isinstance(upper_pred, pd.DataFrame): 319 all_uppers.append( 320 upper_pred.iloc[0, : self.n_series_].values 321 ) 322 else: 323 all_uppers.append(upper_pred[0, : self.n_series_]) 324 325 # Extract simulations if available 326 if hasattr(step_result, "sims") and all_sims is not None: 327 all_sims.append(step_result.sims) 328 329 # Update window for next iteration 330 current_window = np.vstack( 331 [current_window[1:], forecast_step.reshape(1, -1)] 332 ) 333 334 # Combine all forecasts 335 forecasts_array = np.array(all_forecasts) 336 337 # Create output dates 338 if hasattr(self.df_, "index") and isinstance( 339 self.df_.index, pd.DatetimeIndex 340 ): 341 last_date = self.df_.index[-1] 342 freq = pd.infer_freq(self.df_.index) 343 if freq: 344 output_dates = pd.date_range( 345 start=last_date, periods=h + 1, freq=freq 346 )[1:] 347 else: 348 output_dates = pd.RangeIndex( 349 start=len(self.df_), stop=len(self.df_) + h 350 ) 351 else: 352 output_dates = pd.RangeIndex( 353 start=len(self.df_), stop=len(self.df_) + h 354 ) 355 356 self.output_dates_ = output_dates 357 358 # Format output 359 mean_df = pd.DataFrame( 360 forecasts_array, 361 index=output_dates, 362 columns=self.series_names[: self.n_series_], 363 ) 364 self.mean_ = mean_df 365 366 # Return based on what was computed 367 if all_lowers and all_uppers: 368 lowers_array = np.array(all_lowers) 369 uppers_array = np.array(all_uppers) 370 371 lower_df = pd.DataFrame( 372 lowers_array, 373 index=output_dates, 374 columns=self.series_names[: self.n_series_], 375 ) 376 upper_df = pd.DataFrame( 377 uppers_array, 378 index=output_dates, 379 columns=self.series_names[: self.n_series_], 380 ) 381 382 self.lower_ = lower_df 383 self.upper_ = upper_df 384 385 if all_sims: 386 self.sims_ = tuple(all_sims) 387 DescribeResult = namedtuple( 388 "DescribeResult", ("mean", "sims", "lower", "upper") 389 ) 390 return DescribeResult(mean_df, self.sims_, lower_df, upper_df) 391 else: 392 DescribeResult = namedtuple( 393 "DescribeResult", ("mean", "lower", "upper") 394 ) 395 return DescribeResult(mean_df, lower_df, upper_df) 396 else: 397 return mean_df
Forecast h steps ahead using stacked predictions.
FIXED: Now properly generates base model forecasts and uses them to create augmented features for the meta-model.
Parameters
h : int Forecast horizon level : int Confidence level for prediction intervals **kwargs : dict Additional parameters for prediction
Returns
DescribeResult or DataFrame Predictions with optional intervals/simulations
16class MultitaskClassifier(Base, ClassifierMixin): 17 """Multitask Classification model based on regression models, with shared covariates 18 19 Parameters: 20 21 obj: object 22 any object (must be a regression model) containing a method fit (obj.fit()) 23 and a method predict (obj.predict()) 24 25 n_hidden_features: int 26 number of nodes in the hidden layer 27 28 activation_name: str 29 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 30 31 a: float 32 hyperparameter for 'prelu' or 'elu' activation function 33 34 nodes_sim: str 35 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 36 'uniform' 37 38 bias: boolean 39 indicates if the hidden layer contains a bias term (True) or not 40 (False) 41 42 dropout: float 43 regularization parameter; (random) percentage of nodes dropped out 44 of the training 45 46 direct_link: boolean 47 indicates if the original predictors are included (True) in model's 48 fitting or not (False) 49 50 n_clusters: int 51 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: 52 no clustering) 53 54 cluster_encode: bool 55 defines how the variable containing clusters is treated (default is one-hot) 56 if `False`, then labels are used, without one-hot encoding 57 58 type_clust: str 59 type of clustering method: currently k-means ('kmeans') or Gaussian 60 Mixture Model ('gmm') 61 62 type_scaling: a tuple of 3 strings 63 scaling methods for inputs, hidden layer, and clustering respectively 64 (and when relevant). 65 Currently available: standardization ('std') or MinMax scaling ('minmax') 66 67 col_sample: float 68 percentage of covariates randomly chosen for training 69 70 row_sample: float 71 percentage of rows chosen for training, by stratified bootstrapping 72 73 seed: int 74 reproducibility seed for nodes_sim=='uniform' 75 76 backend: str 77 "cpu" or "gpu" or "tpu" 78 79 Attributes: 80 81 fit_objs_: dict 82 objects adjusted to each individual time series 83 84 n_classes_: int 85 number of classes for the classifier 86 87 Examples: 88 89 See also [https://github.com/Techtonique/nnetsauce/blob/master/examples/mtask_classification.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/mtask_classification.py) 90 91 ```python 92 import nnetsauce as ns 93 import numpy as np 94 from sklearn.datasets import load_breast_cancer 95 from sklearn.linear_model import LinearRegression 96 from sklearn.model_selection import train_test_split 97 from sklearn import metrics 98 from time import time 99 100 breast_cancer = load_breast_cancer() 101 Z = breast_cancer.data 102 t = breast_cancer.target 103 104 X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2, 105 random_state=123+2*10) 106 107 # Linear Regression is used 108 regr = LinearRegression() 109 fit_obj = ns.MultitaskClassifier(regr, n_hidden_features=5, 110 n_clusters=2, type_clust="gmm") 111 112 start = time() 113 fit_obj.fit(X_train, y_train) 114 print(f"Elapsed {time() - start}") 115 116 print(fit_obj.score(X_test, y_test)) 117 print(fit_obj.score(X_test, y_test, scoring="roc_auc")) 118 119 start = time() 120 preds = fit_obj.predict(X_test) 121 print(f"Elapsed {time() - start}") 122 print(metrics.classification_report(preds, y_test)) 123 ``` 124 125 """ 126 127 # construct the object ----- 128 _estimator_type = "classifier" 129 130 def __init__( 131 self, 132 obj, 133 n_hidden_features=5, 134 activation_name="relu", 135 a=0.01, 136 nodes_sim="sobol", 137 bias=True, 138 dropout=0, 139 direct_link=True, 140 n_clusters=2, 141 cluster_encode=True, 142 type_clust="kmeans", 143 type_scaling=("std", "std", "std"), 144 col_sample=1, 145 row_sample=1, 146 seed=123, 147 backend="cpu", 148 ): 149 super().__init__( 150 n_hidden_features=n_hidden_features, 151 activation_name=activation_name, 152 a=a, 153 nodes_sim=nodes_sim, 154 bias=bias, 155 dropout=dropout, 156 direct_link=direct_link, 157 n_clusters=n_clusters, 158 cluster_encode=cluster_encode, 159 type_clust=type_clust, 160 type_scaling=type_scaling, 161 col_sample=col_sample, 162 row_sample=row_sample, 163 seed=seed, 164 backend=backend, 165 ) 166 167 self.type_fit = "classification" 168 self.obj = obj 169 self.fit_objs_ = {} 170 171 def fit(self, X, y, sample_weight=None, **kwargs): 172 """Fit MultitaskClassifier to training data (X, y). 173 174 Args: 175 176 X: {array-like}, shape = [n_samples, n_features] 177 Training vectors, where n_samples is the number 178 of samples and n_features is the number of features. 179 180 y: array-like, shape = [n_samples] 181 Target values. 182 183 **kwargs: additional parameters to be passed to 184 self.cook_training_set or self.obj.fit 185 186 Returns: 187 188 self: object 189 190 """ 191 192 assert mx.is_factor(y), "y must contain only integers" 193 194 output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 195 196 self.classes_ = np.unique(y) # for compatibility with sklearn 197 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 198 199 # multitask response 200 Y = mo.one_hot_encode2(output_y, self.n_classes_) 201 202 # if sample_weight is None: 203 for i in range(self.n_classes_): 204 self.fit_objs_[i] = deepcopy( 205 self.obj.fit(scaled_Z, Y[:, i], **kwargs) 206 ) 207 208 self.classes_ = np.unique(y) 209 return self 210 211 def predict(self, X, **kwargs): 212 """Predict test data X. 213 214 Args: 215 216 X: {array-like}, shape = [n_samples, n_features] 217 Training vectors, where n_samples is the number 218 of samples and n_features is the number of features. 219 220 **kwargs: additional parameters to be passed to 221 self.cook_test_set 222 223 Returns: 224 225 model predictions: {array-like} 226 227 """ 228 return np.argmax(self.predict_proba(X, **kwargs), axis=1) 229 230 def predict_proba(self, X, **kwargs): 231 """Predict probabilities for test data X. 232 233 Args: 234 235 X: {array-like}, shape = [n_samples, n_features] 236 Training vectors, where n_samples is the number 237 of samples and n_features is the number of features. 238 239 **kwargs: additional parameters to be passed to 240 self.cook_test_set 241 242 Returns: 243 244 probability estimates for test data: {array-like} 245 246 """ 247 248 shape_X = X.shape 249 250 probs = np.zeros((shape_X[0], self.n_classes_)) 251 252 if len(shape_X) == 1: 253 n_features = shape_X[0] 254 255 new_X = mo.rbind( 256 X.reshape(1, n_features), 257 np.ones(n_features).reshape(1, n_features), 258 ) 259 260 Z = self.cook_test_set(new_X, **kwargs) 261 262 # loop on all the classes 263 for i in range(self.n_classes_): 264 probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs)[0] 265 266 else: 267 Z = self.cook_test_set(X, **kwargs) 268 269 # loop on all the classes 270 for i in range(self.n_classes_): 271 probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs) 272 273 expit_raw_probs = expit(probs) 274 275 return expit_raw_probs / expit_raw_probs.sum(axis=1)[:, None] 276 277 def decision_function(self, X, **kwargs): 278 """Compute the decision function of X. 279 280 Parameters: 281 X: {array-like}, shape = [n_samples, n_features] 282 Samples to compute decision function for. 283 284 **kwargs: additional parameters to be passed to 285 self.cook_test_set 286 287 Returns: 288 array-like of shape (n_samples,) or (n_samples, n_classes) 289 Decision function of the input samples. The order of outputs is the same 290 as that of the classes passed to fit. 291 """ 292 if not hasattr(self.obj, "decision_function"): 293 # If base classifier doesn't have decision_function, use predict_proba 294 proba = self.predict_proba(X, **kwargs) 295 if proba.shape[1] == 2: 296 return proba[:, 1] # For binary classification 297 return proba # For multiclass 298 299 if len(X.shape) == 1: 300 n_features = X.shape[0] 301 new_X = mo.rbind( 302 X.reshape(1, n_features), 303 np.ones(n_features).reshape(1, n_features), 304 ) 305 306 return ( 307 self.obj.decision_function( 308 self.cook_test_set(new_X, **kwargs), **kwargs 309 ) 310 )[0] 311 312 return self.obj.decision_function( 313 self.cook_test_set(X, **kwargs), **kwargs 314 ) 315 316 @property 317 def _estimator_type(self): 318 return "classifier"
Multitask Classification model based on regression models, with shared covariates
Parameters:
obj: object
any object (must be a regression model) containing a method fit (obj.fit())
and a method predict (obj.predict())
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not
(False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original predictors are included (True) in model's
fitting or not (False)
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
col_sample: float
percentage of covariates randomly chosen for training
row_sample: float
percentage of rows chosen for training, by stratified bootstrapping
seed: int
reproducibility seed for nodes_sim=='uniform'
backend: str
"cpu" or "gpu" or "tpu"
Attributes:
fit_objs_: dict
objects adjusted to each individual time series
n_classes_: int
number of classes for the classifier
Examples:
See also https://github.com/Techtonique/nnetsauce/blob/master/examples/mtask_classification.py
import nnetsauce as ns
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics
from time import time
breast_cancer = load_breast_cancer()
Z = breast_cancer.data
t = breast_cancer.target
X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2,
random_state=123+2*10)
# Linear Regression is used
regr = LinearRegression()
fit_obj = ns.MultitaskClassifier(regr, n_hidden_features=5,
n_clusters=2, type_clust="gmm")
start = time()
fit_obj.fit(X_train, y_train)
print(f"Elapsed {time() - start}")
print(fit_obj.score(X_test, y_test))
print(fit_obj.score(X_test, y_test, scoring="roc_auc"))
start = time()
preds = fit_obj.predict(X_test)
print(f"Elapsed {time() - start}")
print(metrics.classification_report(preds, y_test))
171 def fit(self, X, y, sample_weight=None, **kwargs): 172 """Fit MultitaskClassifier to training data (X, y). 173 174 Args: 175 176 X: {array-like}, shape = [n_samples, n_features] 177 Training vectors, where n_samples is the number 178 of samples and n_features is the number of features. 179 180 y: array-like, shape = [n_samples] 181 Target values. 182 183 **kwargs: additional parameters to be passed to 184 self.cook_training_set or self.obj.fit 185 186 Returns: 187 188 self: object 189 190 """ 191 192 assert mx.is_factor(y), "y must contain only integers" 193 194 output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 195 196 self.classes_ = np.unique(y) # for compatibility with sklearn 197 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 198 199 # multitask response 200 Y = mo.one_hot_encode2(output_y, self.n_classes_) 201 202 # if sample_weight is None: 203 for i in range(self.n_classes_): 204 self.fit_objs_[i] = deepcopy( 205 self.obj.fit(scaled_Z, Y[:, i], **kwargs) 206 ) 207 208 self.classes_ = np.unique(y) 209 return self
Fit MultitaskClassifier to training data (X, y).
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
211 def predict(self, X, **kwargs): 212 """Predict test data X. 213 214 Args: 215 216 X: {array-like}, shape = [n_samples, n_features] 217 Training vectors, where n_samples is the number 218 of samples and n_features is the number of features. 219 220 **kwargs: additional parameters to be passed to 221 self.cook_test_set 222 223 Returns: 224 225 model predictions: {array-like} 226 227 """ 228 return np.argmax(self.predict_proba(X, **kwargs), axis=1)
Predict test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
230 def predict_proba(self, X, **kwargs): 231 """Predict probabilities for test data X. 232 233 Args: 234 235 X: {array-like}, shape = [n_samples, n_features] 236 Training vectors, where n_samples is the number 237 of samples and n_features is the number of features. 238 239 **kwargs: additional parameters to be passed to 240 self.cook_test_set 241 242 Returns: 243 244 probability estimates for test data: {array-like} 245 246 """ 247 248 shape_X = X.shape 249 250 probs = np.zeros((shape_X[0], self.n_classes_)) 251 252 if len(shape_X) == 1: 253 n_features = shape_X[0] 254 255 new_X = mo.rbind( 256 X.reshape(1, n_features), 257 np.ones(n_features).reshape(1, n_features), 258 ) 259 260 Z = self.cook_test_set(new_X, **kwargs) 261 262 # loop on all the classes 263 for i in range(self.n_classes_): 264 probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs)[0] 265 266 else: 267 Z = self.cook_test_set(X, **kwargs) 268 269 # loop on all the classes 270 for i in range(self.n_classes_): 271 probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs) 272 273 expit_raw_probs = expit(probs) 274 275 return expit_raw_probs / expit_raw_probs.sum(axis=1)[:, None]
Predict probabilities for test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
probability estimates for test data: {array-like}
110class NeuralNetRegressor(BaseEstimator, RegressorMixin): 111 """ 112 (Pretrained) Neural Network Regressor. 113 114 Parameters: 115 116 hidden_layer_sizes : tuple, default=(100,) 117 The number of neurons in each hidden layer. 118 max_iter : int, default=100 119 The maximum number of iterations to train the model. 120 learning_rate : float, default=0.01 121 The learning rate for the optimizer. 122 l1_ratio : float, default=0.5 123 The ratio of L1 regularization. 124 alpha : float, default=1e-6 125 The regularization parameter. 126 activation_name : str, default="relu" 127 The activation function to use. 128 dropout : float, default=0.0 129 The dropout rate. 130 random_state : int, default=None 131 The random state for the random number generator. 132 weights : list, default=None 133 The weights to initialize the model with. 134 135 Attributes: 136 137 weights : list 138 The weights of the model. 139 params : list 140 The parameters of the model. 141 scaler_ : sklearn.preprocessing.StandardScaler 142 The scaler used to standardize the input features. 143 y_mean_ : float 144 The mean of the target variable. 145 146 Methods: 147 148 fit(X, y) 149 Fit the model to the data. 150 predict(X) 151 Predict the target variable. 152 get_weights() 153 Get the weights of the model. 154 set_weights(weights) 155 Set the weights of the model. 156 """ 157 158 def __init__( 159 self, 160 hidden_layer_sizes=None, 161 max_iter=100, 162 learning_rate=0.01, 163 l1_ratio=0.5, 164 alpha=1e-6, 165 activation_name="relu", 166 dropout=0, 167 weights=None, 168 random_state=None, 169 ): 170 if weights is None and hidden_layer_sizes is None: 171 hidden_layer_sizes = (100,) # default value if neither is provided 172 self.hidden_layer_sizes = hidden_layer_sizes 173 self.max_iter = max_iter 174 self.learning_rate = learning_rate 175 self.l1_ratio = l1_ratio 176 self.alpha = alpha 177 self.activation_name = activation_name 178 self.dropout = dropout 179 self.weights = weights 180 self.random_state = random_state 181 self.params = None 182 self.scaler_ = StandardScaler() 183 self.y_mean_ = None 184 185 def _validate_weights(self, input_dim): 186 """Validate that weights dimensions are coherent.""" 187 if not self.weights: 188 return False 189 190 try: 191 # Check each layer's weights and biases 192 prev_dim = input_dim 193 for W, b in self.weights: 194 # Check weight matrix dimensions 195 if W.shape[0] != prev_dim: 196 raise ValueError( 197 f"Weight matrix input dimension {W.shape[0]} does not match, previous layer output dimension {prev_dim}" 198 ) 199 # Check bias dimension matches weight matrix output 200 if W.shape[1] != b.shape[0]: 201 raise ValueError( 202 f"Bias dimension {b.shape[0]} does not match weight matrix, output dimension {W.shape[1]}" 203 ) 204 prev_dim = W.shape[1] 205 206 # Check final output dimension is 1 for regression 207 if prev_dim != 1: 208 raise ValueError( 209 f"Final layer output dimension {prev_dim} must be 1 for regression" 210 ) 211 212 return True 213 except (AttributeError, IndexError): 214 raise ValueError( 215 "Weights format is invalid. Expected list of (weight, bias) tuples" 216 ) 217 218 def fit(self, X, y): 219 # Standardize the input features 220 X = self.scaler_.fit_transform(X) 221 # Ensure y is 2D for consistency 222 y = y.reshape(-1, 1) 223 self.y_mean_ = jnp.mean(y) 224 y = y - self.y_mean_ 225 # Validate or initialize weights 226 if self.weights is not None: 227 if self._validate_weights(X.shape[1]): 228 self.params = self.weights 229 else: 230 if self.hidden_layer_sizes is None: 231 raise ValueError( 232 "Either weights or hidden_layer_sizes must be provided" 233 ) 234 self.params = initialize_params( 235 X.shape[1], self.hidden_layer_sizes, self.random_state 236 ) 237 loss_fn = partial(loss, l1_ratio=self.l1_ratio, alpha=self.alpha) 238 grad_loss = jit(grad(loss_fn)) # compiled gradient evaluation function 239 perex_grads = jit( 240 vmap(grad_loss, in_axes=(None, 0, 0)) 241 ) # fast per-example grads 242 # Training loop 243 for _ in range(self.max_iter): 244 grads = perex_grads(self.params, X, y) 245 # Average gradients across examples 246 grads = jax.tree_map(lambda g: jnp.mean(g, axis=0), grads) 247 # Update parameters 248 self.params = [ 249 (W - self.learning_rate * dW, b - self.learning_rate * db) 250 for (W, b), (dW, db) in zip(self.params, grads) 251 ] 252 # Store final weights 253 self.weights = self.params 254 return self 255 256 def get_weights(self): 257 """Return the current weights of the model.""" 258 if self.weights is None: 259 raise ValueError( 260 "No weights available. Model has not been fitted yet." 261 ) 262 return self.weights 263 264 def set_weights(self, weights): 265 """Set the weights of the model manually.""" 266 self.weights = weights 267 self.params = weights 268 269 def predict(self, X): 270 X = self.scaler_.transform(X) 271 if self.params is None: 272 raise ValueError("Model has not been fitted yet.") 273 predictions = predict_internal( 274 self.params, 275 X, 276 activation_func=self.activation_name, 277 dropout=self.dropout, 278 seed=self.random_state, 279 ) 280 return predictions.reshape(-1) + self.y_mean_
(Pretrained) Neural Network Regressor.
Parameters:
hidden_layer_sizes : tuple, default=(100,)
The number of neurons in each hidden layer.
max_iter : int, default=100
The maximum number of iterations to train the model.
learning_rate : float, default=0.01
The learning rate for the optimizer.
l1_ratio : float, default=0.5
The ratio of L1 regularization.
alpha : float, default=1e-6
The regularization parameter.
activation_name : str, default="relu"
The activation function to use.
dropout : float, default=0.0
The dropout rate.
random_state : int, default=None
The random state for the random number generator.
weights : list, default=None
The weights to initialize the model with.
Attributes:
weights : list
The weights of the model.
params : list
The parameters of the model.
scaler_ : sklearn.preprocessing.StandardScaler
The scaler used to standardize the input features.
y_mean_ : float
The mean of the target variable.
Methods:
fit(X, y)
Fit the model to the data.
predict(X)
Predict the target variable.
get_weights()
Get the weights of the model.
set_weights(weights)
Set the weights of the model.
218 def fit(self, X, y): 219 # Standardize the input features 220 X = self.scaler_.fit_transform(X) 221 # Ensure y is 2D for consistency 222 y = y.reshape(-1, 1) 223 self.y_mean_ = jnp.mean(y) 224 y = y - self.y_mean_ 225 # Validate or initialize weights 226 if self.weights is not None: 227 if self._validate_weights(X.shape[1]): 228 self.params = self.weights 229 else: 230 if self.hidden_layer_sizes is None: 231 raise ValueError( 232 "Either weights or hidden_layer_sizes must be provided" 233 ) 234 self.params = initialize_params( 235 X.shape[1], self.hidden_layer_sizes, self.random_state 236 ) 237 loss_fn = partial(loss, l1_ratio=self.l1_ratio, alpha=self.alpha) 238 grad_loss = jit(grad(loss_fn)) # compiled gradient evaluation function 239 perex_grads = jit( 240 vmap(grad_loss, in_axes=(None, 0, 0)) 241 ) # fast per-example grads 242 # Training loop 243 for _ in range(self.max_iter): 244 grads = perex_grads(self.params, X, y) 245 # Average gradients across examples 246 grads = jax.tree_map(lambda g: jnp.mean(g, axis=0), grads) 247 # Update parameters 248 self.params = [ 249 (W - self.learning_rate * dW, b - self.learning_rate * db) 250 for (W, b), (dW, db) in zip(self.params, grads) 251 ] 252 # Store final weights 253 self.weights = self.params 254 return self
269 def predict(self, X): 270 X = self.scaler_.transform(X) 271 if self.params is None: 272 raise ValueError("Model has not been fitted yet.") 273 predictions = predict_internal( 274 self.params, 275 X, 276 activation_func=self.activation_name, 277 dropout=self.dropout, 278 seed=self.random_state, 279 ) 280 return predictions.reshape(-1) + self.y_mean_
10class NeuralNetClassifier(BaseEstimator, ClassifierMixin): 11 """ 12 (Pretrained) Neural Network Classifier. 13 14 Parameters: 15 16 hidden_layer_sizes : tuple, default=(100,) 17 The number of neurons in each hidden layer. 18 max_iter : int, default=100 19 The maximum number of iterations to train the model. 20 learning_rate : float, default=0.01 21 The learning rate for the optimizer. 22 l1_ratio : float, default=0.5 23 The ratio of L1 regularization. 24 alpha : float, default=1e-6 25 The regularization parameter. 26 activation_name : str, default="relu" 27 The activation function to use. 28 dropout : float, default=0.0 29 The dropout rate. 30 random_state : int, default=None 31 The random state for the random number generator. 32 weights : list, default=None 33 The weights to initialize the model with. 34 35 Attributes: 36 37 weights : list 38 The weights of the model. 39 params : list 40 The parameters of the model. 41 scaler_ : sklearn.preprocessing.StandardScaler 42 The scaler used to standardize the input features. 43 y_mean_ : float 44 The mean of the target variable. 45 46 Methods: 47 48 fit(X, y) 49 Fit the model to the data. 50 predict(X) 51 Predict the target variable. 52 predict_proba(X) 53 Predict the probability of the target variable. 54 get_weights() 55 Get the weights of the model. 56 set_weights(weights) 57 Set the weights of the model. 58 """ 59 60 _estimator_type = "classifier" 61 62 def __init__( 63 self, 64 hidden_layer_sizes=(100,), 65 max_iter=100, 66 learning_rate=0.01, 67 weights=None, 68 l1_ratio=0.5, 69 alpha=1e-6, 70 activation_name="relu", 71 dropout=0.0, 72 random_state=None, 73 ): 74 self.hidden_layer_sizes = hidden_layer_sizes 75 self.max_iter = max_iter 76 self.learning_rate = learning_rate 77 self.weights = weights 78 self.l1_ratio = l1_ratio 79 self.alpha = alpha 80 self.activation_name = activation_name 81 self.dropout = dropout 82 self.random_state = random_state 83 self.regr = None 84 85 def fit(self, X, y): 86 """Fit the model to the data. 87 88 Parameters: 89 90 X: {array-like}, shape = [n_samples, n_features] 91 Training vectors, where n_samples is the number of samples and 92 n_features is the number of features. 93 y: array-like, shape = [n_samples] 94 Target values. 95 """ 96 regressor = NeuralNetRegressor( 97 hidden_layer_sizes=self.hidden_layer_sizes, 98 max_iter=self.max_iter, 99 learning_rate=self.learning_rate, 100 weights=self.weights, 101 l1_ratio=self.l1_ratio, 102 alpha=self.alpha, 103 activation_name=self.activation_name, 104 dropout=self.dropout, 105 random_state=self.random_state, 106 ) 107 self.regr = SimpleMultitaskClassifier(regressor) 108 self.regr.fit(X, y) 109 self.classes_ = np.unique(y) 110 self.n_classes_ = len(self.classes_) 111 self.n_tasks_ = 1 112 self.n_features_in_ = X.shape[1] 113 self.n_outputs_ = 1 114 self.n_samples_fit_ = X.shape[0] 115 self.n_samples_test_ = X.shape[0] 116 self.n_features_out_ = 1 117 self.n_outputs_ = 1 118 self.n_features_in_ = X.shape[1] 119 self.n_features_out_ = 1 120 self.n_outputs_ = 1 121 return self 122 123 def predict_proba(self, X): 124 """Predict the probability of the target variable. 125 126 Parameters: 127 128 X: {array-like}, shape = [n_samples, n_features] 129 Training vectors, where n_samples is the number of samples and 130 n_features is the number of features. 131 """ 132 return self.regr.predict_proba(X) 133 134 def predict(self, X): 135 """Predict the target variable. 136 137 Parameters: 138 139 X: {array-like}, shape = [n_samples, n_features] 140 Training vectors, where n_samples is the number of samples and 141 n_features is the number of features. 142 """ 143 return self.regr.predict(X) 144 145 @property 146 def _estimator_type(self): 147 return "classifier"
(Pretrained) Neural Network Classifier.
Parameters:
hidden_layer_sizes : tuple, default=(100,)
The number of neurons in each hidden layer.
max_iter : int, default=100
The maximum number of iterations to train the model.
learning_rate : float, default=0.01
The learning rate for the optimizer.
l1_ratio : float, default=0.5
The ratio of L1 regularization.
alpha : float, default=1e-6
The regularization parameter.
activation_name : str, default="relu"
The activation function to use.
dropout : float, default=0.0
The dropout rate.
random_state : int, default=None
The random state for the random number generator.
weights : list, default=None
The weights to initialize the model with.
Attributes:
weights : list
The weights of the model.
params : list
The parameters of the model.
scaler_ : sklearn.preprocessing.StandardScaler
The scaler used to standardize the input features.
y_mean_ : float
The mean of the target variable.
Methods:
fit(X, y)
Fit the model to the data.
predict(X)
Predict the target variable.
predict_proba(X)
Predict the probability of the target variable.
get_weights()
Get the weights of the model.
set_weights(weights)
Set the weights of the model.
85 def fit(self, X, y): 86 """Fit the model to the data. 87 88 Parameters: 89 90 X: {array-like}, shape = [n_samples, n_features] 91 Training vectors, where n_samples is the number of samples and 92 n_features is the number of features. 93 y: array-like, shape = [n_samples] 94 Target values. 95 """ 96 regressor = NeuralNetRegressor( 97 hidden_layer_sizes=self.hidden_layer_sizes, 98 max_iter=self.max_iter, 99 learning_rate=self.learning_rate, 100 weights=self.weights, 101 l1_ratio=self.l1_ratio, 102 alpha=self.alpha, 103 activation_name=self.activation_name, 104 dropout=self.dropout, 105 random_state=self.random_state, 106 ) 107 self.regr = SimpleMultitaskClassifier(regressor) 108 self.regr.fit(X, y) 109 self.classes_ = np.unique(y) 110 self.n_classes_ = len(self.classes_) 111 self.n_tasks_ = 1 112 self.n_features_in_ = X.shape[1] 113 self.n_outputs_ = 1 114 self.n_samples_fit_ = X.shape[0] 115 self.n_samples_test_ = X.shape[0] 116 self.n_features_out_ = 1 117 self.n_outputs_ = 1 118 self.n_features_in_ = X.shape[1] 119 self.n_features_out_ = 1 120 self.n_outputs_ = 1 121 return self
Fit the model to the data.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number of samples and
n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
123 def predict_proba(self, X): 124 """Predict the probability of the target variable. 125 126 Parameters: 127 128 X: {array-like}, shape = [n_samples, n_features] 129 Training vectors, where n_samples is the number of samples and 130 n_features is the number of features. 131 """ 132 return self.regr.predict_proba(X)
Predict the probability of the target variable.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number of samples and
n_features is the number of features.
134 def predict(self, X): 135 """Predict the target variable. 136 137 Parameters: 138 139 X: {array-like}, shape = [n_samples, n_features] 140 Training vectors, where n_samples is the number of samples and 141 n_features is the number of features. 142 """ 143 return self.regr.predict(X)
Predict the target variable.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number of samples and
n_features is the number of features.
20class PredictionInterval(BaseEstimator, RegressorMixin): 21 """Class PredictionInterval: Obtain prediction intervals. 22 23 Attributes: 24 25 obj: an object; 26 fitted object containing methods `fit` and `predict` 27 28 method: a string; 29 method for constructing the prediction intervals. 30 Currently "splitconformal" (default) and "localconformal" 31 32 level: a float; 33 Confidence level for prediction intervals. Default is 95, 34 equivalent to a miscoverage error of 5 (%) 35 36 replications: an integer; 37 Number of replications for simulated conformal (default is `None`) 38 39 type_pi: a string; 40 type of prediction interval: currently `None` 41 (split conformal without simulation) 42 for type_pi in: 43 - 'bootstrap': Bootstrap resampling. 44 - 'kde': Kernel Density Estimation. 45 46 type_split: a string; 47 "random" (random split of data) or "sequential" (sequential split of data) 48 49 seed: an integer; 50 Reproducibility of fit (there's a random split between fitting and calibration data) 51 """ 52 53 def __init__( 54 self, 55 obj, 56 method="splitconformal", 57 level=95, 58 type_pi=None, 59 type_split="random", 60 replications=None, 61 kernel=None, 62 agg="mean", 63 seed=123, 64 ): 65 self.obj = obj 66 self.method = method 67 self.level = level 68 self.type_pi = type_pi 69 self.type_split = type_split 70 self.replications = replications 71 self.kernel = kernel 72 self.agg = agg 73 self.seed = seed 74 self.alpha_ = 1 - self.level / 100 75 self.quantile_ = None 76 self.icp_ = None 77 self.calibrated_residuals_ = None 78 self.scaled_calibrated_residuals_ = None 79 self.calibrated_residuals_scaler_ = None 80 self.kde_ = None 81 self.aic_ = None 82 self.aicc_ = None 83 self.bic_ = None 84 self.sse_ = None 85 86 def fit(self, X, y, sample_weight=None, **kwargs): 87 """Fit the `method` to training data (X, y). 88 89 Args: 90 91 X: array-like, shape = [n_samples, n_features]; 92 Training set vectors, where n_samples is the number 93 of samples and n_features is the number of features. 94 95 y: array-like, shape = [n_samples, ]; Target values. 96 97 sample_weight: array-like, shape = [n_samples] 98 Sample weights. 99 100 """ 101 102 if self.type_split == "random": 103 X_train, X_calibration, y_train, y_calibration = train_test_split( 104 X, y, test_size=0.5, random_state=self.seed 105 ) 106 107 elif self.type_split == "sequential": 108 n_x = X.shape[0] 109 n_x_half = n_x // 2 110 first_half_idx = range(0, n_x_half) 111 second_half_idx = range(n_x_half, n_x) 112 X_train = X[first_half_idx, :] 113 X_calibration = X[second_half_idx, :] 114 y_train = y[first_half_idx] 115 y_calibration = y[second_half_idx] 116 117 if self.method == "splitconformal": 118 self.obj.fit(X_train, y_train) 119 preds_calibration = self.obj.predict(X_calibration) 120 self.calibrated_residuals_ = y_calibration - preds_calibration 121 absolute_residuals = np.abs(self.calibrated_residuals_) 122 self.calibrated_residuals_scaler_ = StandardScaler( 123 with_mean=True, with_std=True 124 ) 125 self.scaled_calibrated_residuals_ = ( 126 self.calibrated_residuals_scaler_.fit_transform( 127 self.calibrated_residuals_.reshape(-1, 1) 128 ).ravel() 129 ) 130 try: 131 # numpy version >= 1.22 132 self.quantile_ = np.quantile( 133 a=absolute_residuals, q=self.level / 100, method="higher" 134 ) 135 except Exception: 136 # numpy version < 1.22 137 self.quantile_ = np.quantile( 138 a=absolute_residuals, 139 q=self.level / 100, 140 interpolation="higher", 141 ) 142 143 if self.method == "localconformal": 144 mad_estimator = ExtraTreesRegressor() 145 normalizer = RegressorNormalizer( 146 self.obj, mad_estimator, AbsErrorErrFunc() 147 ) 148 nc = RegressorNc(self.obj, AbsErrorErrFunc(), normalizer) 149 self.icp_ = IcpRegressor(nc) 150 self.icp_.fit(X_train, y_train) 151 self.icp_.calibrate(X_calibration, y_calibration) 152 153 # Calculate AIC 154 # Get predictions 155 preds = self.obj.predict(X_calibration) 156 157 # Calculate SSE 158 self.sse_ = np.sum((y_calibration - preds) ** 2) 159 160 # Get number of parameters from the base model 161 n_params = ( 162 getattr(self.obj, "n_hidden_features", 0) + X_calibration.shape[1] 163 ) 164 165 # Calculate AIC 166 n_samples = len(y_calibration) 167 temp = n_samples * np.log(self.sse_ / n_samples) 168 self.aic_ = temp + 2 * n_params 169 self.bic_ = temp + np.log(n_samples) * n_params 170 171 return self 172 173 def predict(self, X, return_pi=False): 174 """Obtain predictions and prediction intervals 175 176 Args: 177 178 X: array-like, shape = [n_samples, n_features]; 179 Testing set vectors, where n_samples is the number 180 of samples and n_features is the number of features. 181 182 return_pi: boolean 183 Whether the prediction interval is returned or not. 184 Default is False, for compatibility with other _estimators_. 185 If True, a tuple containing the predictions + lower and upper 186 bounds is returned. 187 188 """ 189 190 if self.method == "splitconformal": 191 pred = self.obj.predict(X) 192 193 if self.method == "localconformal": 194 pred = self.icp_.predict(X) 195 196 if self.method == "splitconformal": 197 if ( 198 self.replications is None and self.type_pi is None 199 ): # type_pi is not used here, no bootstrap or kde 200 if return_pi: 201 DescribeResult = namedtuple( 202 "DescribeResult", ("mean", "lower", "upper") 203 ) 204 return DescribeResult( 205 pred, pred - self.quantile_, pred + self.quantile_ 206 ) 207 208 else: 209 return pred 210 211 else: # self.method == "splitconformal" and if self.replications is not None, type_pi must be used 212 raise NotImplementedError 213 214 if self.type_pi is None: 215 self.type_pi = "kde" 216 raise Warning("type_pi must be set, setting to 'kde'") 217 218 if self.replications is None: 219 self.replications = 100 220 raise Warning("replications must be set, setting to 100") 221 222 assert self.type_pi in ( 223 "bootstrap", 224 "kde", 225 "normal", 226 "ecdf", 227 "permutation", 228 "smooth-bootstrap", 229 ), "`self.type_pi` must be in ('bootstrap', 'kde', 'normal', 'ecdf', 'permutation', 'smooth-bootstrap')" 230 231 if self.type_pi == "bootstrap": 232 np.random.seed(self.seed) 233 self.residuals_sims_ = np.asarray( 234 [ 235 np.random.choice( 236 a=self.scaled_calibrated_residuals_, 237 size=X.shape[0], 238 ) 239 for _ in range(self.replications) 240 ] 241 ).T 242 self.sims_ = np.asarray( 243 [ 244 pred 245 + self.calibrated_residuals_scaler_.scale_[0] 246 * self.residuals_sims_[:, i].ravel() 247 for i in range(self.replications) 248 ] 249 ).T 250 elif self.type_pi == "kde": 251 self.kde_ = gaussian_kde( 252 dataset=self.scaled_calibrated_residuals_ 253 ) 254 self.sims_ = np.asarray( 255 [ 256 pred 257 + self.calibrated_residuals_scaler_.scale_[0] 258 * self.kde_.resample( 259 size=X.shape[0], seed=self.seed + i 260 ).ravel() 261 for i in range(self.replications) 262 ] 263 ).T 264 else: # self.type_pi == "normal" or "ecdf" or "permutation" or "smooth-bootstrap" 265 self.residuals_sims_ = np.asarray( 266 simulate_replications( 267 data=self.scaled_calibrated_residuals_, 268 method=self.type_pi, 269 num_replications=self.replications, 270 n_obs=X.shape[0], 271 seed=self.seed, 272 ) 273 ).T 274 self.sims_ = np.asarray( 275 [ 276 pred 277 + self.calibrated_residuals_scaler_.scale_[0] 278 * self.residuals_sims_[:, i].ravel() 279 for i in range(self.replications) 280 ] 281 ).T 282 283 self.mean_ = np.mean(self.sims_, axis=1) 284 self.lower_ = np.quantile( 285 self.sims_, q=self.alpha_ / 200, axis=1 286 ) 287 self.upper_ = np.quantile( 288 self.sims_, q=1 - self.alpha_ / 200, axis=1 289 ) 290 291 DescribeResult = namedtuple( 292 "DescribeResult", ("mean", "sims", "lower", "upper") 293 ) 294 295 return DescribeResult( 296 self.mean_, self.sims_, self.lower_, self.upper_ 297 ) 298 299 if self.method == "localconformal": 300 if self.replications is None: 301 if return_pi: 302 predictions_bounds = self.icp_.predict( 303 X, significance=1 - self.level 304 ) 305 DescribeResult = namedtuple( 306 "DescribeResult", ("mean", "lower", "upper") 307 ) 308 return DescribeResult( 309 pred, predictions_bounds[:, 0], predictions_bounds[:, 1] 310 ) 311 312 else: 313 return pred 314 315 else: # (self.method == "localconformal") and if self.replications is not None 316 raise NotImplementedError( 317 "When self.method == 'localconformal', there are no simulations" 318 )
Class PredictionInterval: Obtain prediction intervals.
Attributes:
obj: an object;
fitted object containing methods `fit` and `predict`
method: a string;
method for constructing the prediction intervals.
Currently "splitconformal" (default) and "localconformal"
level: a float;
Confidence level for prediction intervals. Default is 95,
equivalent to a miscoverage error of 5 (%)
replications: an integer;
Number of replications for simulated conformal (default is `None`)
type_pi: a string;
type of prediction interval: currently `None`
(split conformal without simulation)
for type_pi in:
- 'bootstrap': Bootstrap resampling.
- 'kde': Kernel Density Estimation.
type_split: a string;
"random" (random split of data) or "sequential" (sequential split of data)
seed: an integer;
Reproducibility of fit (there's a random split between fitting and calibration data)
86 def fit(self, X, y, sample_weight=None, **kwargs): 87 """Fit the `method` to training data (X, y). 88 89 Args: 90 91 X: array-like, shape = [n_samples, n_features]; 92 Training set vectors, where n_samples is the number 93 of samples and n_features is the number of features. 94 95 y: array-like, shape = [n_samples, ]; Target values. 96 97 sample_weight: array-like, shape = [n_samples] 98 Sample weights. 99 100 """ 101 102 if self.type_split == "random": 103 X_train, X_calibration, y_train, y_calibration = train_test_split( 104 X, y, test_size=0.5, random_state=self.seed 105 ) 106 107 elif self.type_split == "sequential": 108 n_x = X.shape[0] 109 n_x_half = n_x // 2 110 first_half_idx = range(0, n_x_half) 111 second_half_idx = range(n_x_half, n_x) 112 X_train = X[first_half_idx, :] 113 X_calibration = X[second_half_idx, :] 114 y_train = y[first_half_idx] 115 y_calibration = y[second_half_idx] 116 117 if self.method == "splitconformal": 118 self.obj.fit(X_train, y_train) 119 preds_calibration = self.obj.predict(X_calibration) 120 self.calibrated_residuals_ = y_calibration - preds_calibration 121 absolute_residuals = np.abs(self.calibrated_residuals_) 122 self.calibrated_residuals_scaler_ = StandardScaler( 123 with_mean=True, with_std=True 124 ) 125 self.scaled_calibrated_residuals_ = ( 126 self.calibrated_residuals_scaler_.fit_transform( 127 self.calibrated_residuals_.reshape(-1, 1) 128 ).ravel() 129 ) 130 try: 131 # numpy version >= 1.22 132 self.quantile_ = np.quantile( 133 a=absolute_residuals, q=self.level / 100, method="higher" 134 ) 135 except Exception: 136 # numpy version < 1.22 137 self.quantile_ = np.quantile( 138 a=absolute_residuals, 139 q=self.level / 100, 140 interpolation="higher", 141 ) 142 143 if self.method == "localconformal": 144 mad_estimator = ExtraTreesRegressor() 145 normalizer = RegressorNormalizer( 146 self.obj, mad_estimator, AbsErrorErrFunc() 147 ) 148 nc = RegressorNc(self.obj, AbsErrorErrFunc(), normalizer) 149 self.icp_ = IcpRegressor(nc) 150 self.icp_.fit(X_train, y_train) 151 self.icp_.calibrate(X_calibration, y_calibration) 152 153 # Calculate AIC 154 # Get predictions 155 preds = self.obj.predict(X_calibration) 156 157 # Calculate SSE 158 self.sse_ = np.sum((y_calibration - preds) ** 2) 159 160 # Get number of parameters from the base model 161 n_params = ( 162 getattr(self.obj, "n_hidden_features", 0) + X_calibration.shape[1] 163 ) 164 165 # Calculate AIC 166 n_samples = len(y_calibration) 167 temp = n_samples * np.log(self.sse_ / n_samples) 168 self.aic_ = temp + 2 * n_params 169 self.bic_ = temp + np.log(n_samples) * n_params 170 171 return self
Fit the method to training data (X, y).
Args:
X: array-like, shape = [n_samples, n_features];
Training set vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples, ]; Target values.
sample_weight: array-like, shape = [n_samples]
Sample weights.
173 def predict(self, X, return_pi=False): 174 """Obtain predictions and prediction intervals 175 176 Args: 177 178 X: array-like, shape = [n_samples, n_features]; 179 Testing set vectors, where n_samples is the number 180 of samples and n_features is the number of features. 181 182 return_pi: boolean 183 Whether the prediction interval is returned or not. 184 Default is False, for compatibility with other _estimators_. 185 If True, a tuple containing the predictions + lower and upper 186 bounds is returned. 187 188 """ 189 190 if self.method == "splitconformal": 191 pred = self.obj.predict(X) 192 193 if self.method == "localconformal": 194 pred = self.icp_.predict(X) 195 196 if self.method == "splitconformal": 197 if ( 198 self.replications is None and self.type_pi is None 199 ): # type_pi is not used here, no bootstrap or kde 200 if return_pi: 201 DescribeResult = namedtuple( 202 "DescribeResult", ("mean", "lower", "upper") 203 ) 204 return DescribeResult( 205 pred, pred - self.quantile_, pred + self.quantile_ 206 ) 207 208 else: 209 return pred 210 211 else: # self.method == "splitconformal" and if self.replications is not None, type_pi must be used 212 raise NotImplementedError 213 214 if self.type_pi is None: 215 self.type_pi = "kde" 216 raise Warning("type_pi must be set, setting to 'kde'") 217 218 if self.replications is None: 219 self.replications = 100 220 raise Warning("replications must be set, setting to 100") 221 222 assert self.type_pi in ( 223 "bootstrap", 224 "kde", 225 "normal", 226 "ecdf", 227 "permutation", 228 "smooth-bootstrap", 229 ), "`self.type_pi` must be in ('bootstrap', 'kde', 'normal', 'ecdf', 'permutation', 'smooth-bootstrap')" 230 231 if self.type_pi == "bootstrap": 232 np.random.seed(self.seed) 233 self.residuals_sims_ = np.asarray( 234 [ 235 np.random.choice( 236 a=self.scaled_calibrated_residuals_, 237 size=X.shape[0], 238 ) 239 for _ in range(self.replications) 240 ] 241 ).T 242 self.sims_ = np.asarray( 243 [ 244 pred 245 + self.calibrated_residuals_scaler_.scale_[0] 246 * self.residuals_sims_[:, i].ravel() 247 for i in range(self.replications) 248 ] 249 ).T 250 elif self.type_pi == "kde": 251 self.kde_ = gaussian_kde( 252 dataset=self.scaled_calibrated_residuals_ 253 ) 254 self.sims_ = np.asarray( 255 [ 256 pred 257 + self.calibrated_residuals_scaler_.scale_[0] 258 * self.kde_.resample( 259 size=X.shape[0], seed=self.seed + i 260 ).ravel() 261 for i in range(self.replications) 262 ] 263 ).T 264 else: # self.type_pi == "normal" or "ecdf" or "permutation" or "smooth-bootstrap" 265 self.residuals_sims_ = np.asarray( 266 simulate_replications( 267 data=self.scaled_calibrated_residuals_, 268 method=self.type_pi, 269 num_replications=self.replications, 270 n_obs=X.shape[0], 271 seed=self.seed, 272 ) 273 ).T 274 self.sims_ = np.asarray( 275 [ 276 pred 277 + self.calibrated_residuals_scaler_.scale_[0] 278 * self.residuals_sims_[:, i].ravel() 279 for i in range(self.replications) 280 ] 281 ).T 282 283 self.mean_ = np.mean(self.sims_, axis=1) 284 self.lower_ = np.quantile( 285 self.sims_, q=self.alpha_ / 200, axis=1 286 ) 287 self.upper_ = np.quantile( 288 self.sims_, q=1 - self.alpha_ / 200, axis=1 289 ) 290 291 DescribeResult = namedtuple( 292 "DescribeResult", ("mean", "sims", "lower", "upper") 293 ) 294 295 return DescribeResult( 296 self.mean_, self.sims_, self.lower_, self.upper_ 297 ) 298 299 if self.method == "localconformal": 300 if self.replications is None: 301 if return_pi: 302 predictions_bounds = self.icp_.predict( 303 X, significance=1 - self.level 304 ) 305 DescribeResult = namedtuple( 306 "DescribeResult", ("mean", "lower", "upper") 307 ) 308 return DescribeResult( 309 pred, predictions_bounds[:, 0], predictions_bounds[:, 1] 310 ) 311 312 else: 313 return pred 314 315 else: # (self.method == "localconformal") and if self.replications is not None 316 raise NotImplementedError( 317 "When self.method == 'localconformal', there are no simulations" 318 )
Obtain predictions and prediction intervals
Args:
X: array-like, shape = [n_samples, n_features];
Testing set vectors, where n_samples is the number
of samples and n_features is the number of features.
return_pi: boolean
Whether the prediction interval is returned or not.
Default is False, for compatibility with other _estimators_.
If True, a tuple containing the predictions + lower and upper
bounds is returned.
20class PredictionSet(BaseEstimator, ClassifierMixin): 21 """Class PredictionSet: Obtain prediction sets. 22 23 Attributes: 24 25 obj: an object; 26 fitted object containing methods `fit` and `predict` 27 28 method: a string; 29 method for constructing the prediction sets. 30 Currently "icp" (default, inductive conformal) and "tcp" (transductive conformal) 31 32 level: a float; 33 Confidence level for prediction sets. Default is None, 34 95 is equivalent to a miscoverage error of 5 (%) 35 36 seed: an integer; 37 Reproducibility of fit (there's a random split between fitting and calibration data) 38 """ 39 40 def __init__( 41 self, 42 obj, 43 method="icp", 44 level=None, 45 seed=123, 46 ): 47 self.obj = obj 48 self.method = method 49 self.level = level 50 self.seed = seed 51 if self.level is not None: 52 self.alpha_ = 1 - self.level / 100 53 self.quantile_ = None 54 self.icp_ = None 55 self.tcp_ = None 56 57 if self.method == "icp": 58 self.icp_ = IcpClassifier( 59 ClassifierNc(ClassifierAdapter(self.obj), MarginErrFunc()), 60 ) 61 elif self.method == "tcp": 62 self.tcp_ = TcpClassifier( 63 ClassifierNc(ClassifierAdapter(self.obj), MarginErrFunc()), 64 ) 65 else: 66 raise ValueError("`self.method` must be in ('icp', 'tcp')") 67 68 def fit(self, X, y, sample_weight=None, **kwargs): 69 """Fit the `method` to training data (X, y). 70 71 Args: 72 73 X: array-like, shape = [n_samples, n_features]; 74 Training set vectors, where n_samples is the number 75 of samples and n_features is the number of features. 76 77 y: array-like, shape = [n_samples, ]; Target values. 78 79 sample_weight: array-like, shape = [n_samples] 80 Sample weights. 81 82 """ 83 if self.method == "icp": 84 X_train, X_calibration, y_train, y_calibration = train_test_split( 85 X, y, test_size=0.5, random_state=self.seed 86 ) 87 self.icp_.fit(X_train, y_train) 88 self.icp_.calibrate(X_calibration, y_calibration) 89 90 elif self.method == "tcp": 91 self.tcp_.fit(X, y) 92 93 return self 94 95 def predict(self, X, **kwargs): 96 """Obtain predictions and prediction sets 97 98 Args: 99 100 X: array-like, shape = [n_samples, n_features]; 101 Testing set vectors, where n_samples is the number 102 of samples and n_features is the number of features. 103 104 """ 105 106 if self.method == "icp": 107 return self.icp_.predict(X, significance=self.alpha_, **kwargs) 108 109 elif self.method == "tcp": 110 return self.tcp_.predict(X, significance=self.alpha_, **kwargs) 111 112 else: 113 raise ValueError("`self.method` must be in ('icp', 'tcp')") 114 115 def predict_proba(self, X): 116 predictions = self.predict(X) 117 return np.eye(len(np.unique(predictions)))[predictions]
Class PredictionSet: Obtain prediction sets.
Attributes:
obj: an object;
fitted object containing methods `fit` and `predict`
method: a string;
method for constructing the prediction sets.
Currently "icp" (default, inductive conformal) and "tcp" (transductive conformal)
level: a float;
Confidence level for prediction sets. Default is None,
95 is equivalent to a miscoverage error of 5 (%)
seed: an integer;
Reproducibility of fit (there's a random split between fitting and calibration data)
68 def fit(self, X, y, sample_weight=None, **kwargs): 69 """Fit the `method` to training data (X, y). 70 71 Args: 72 73 X: array-like, shape = [n_samples, n_features]; 74 Training set vectors, where n_samples is the number 75 of samples and n_features is the number of features. 76 77 y: array-like, shape = [n_samples, ]; Target values. 78 79 sample_weight: array-like, shape = [n_samples] 80 Sample weights. 81 82 """ 83 if self.method == "icp": 84 X_train, X_calibration, y_train, y_calibration = train_test_split( 85 X, y, test_size=0.5, random_state=self.seed 86 ) 87 self.icp_.fit(X_train, y_train) 88 self.icp_.calibrate(X_calibration, y_calibration) 89 90 elif self.method == "tcp": 91 self.tcp_.fit(X, y) 92 93 return self
Fit the method to training data (X, y).
Args:
X: array-like, shape = [n_samples, n_features];
Training set vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples, ]; Target values.
sample_weight: array-like, shape = [n_samples]
Sample weights.
95 def predict(self, X, **kwargs): 96 """Obtain predictions and prediction sets 97 98 Args: 99 100 X: array-like, shape = [n_samples, n_features]; 101 Testing set vectors, where n_samples is the number 102 of samples and n_features is the number of features. 103 104 """ 105 106 if self.method == "icp": 107 return self.icp_.predict(X, significance=self.alpha_, **kwargs) 108 109 elif self.method == "tcp": 110 return self.tcp_.predict(X, significance=self.alpha_, **kwargs) 111 112 else: 113 raise ValueError("`self.method` must be in ('icp', 'tcp')")
Obtain predictions and prediction sets
Args:
X: array-like, shape = [n_samples, n_features];
Testing set vectors, where n_samples is the number
of samples and n_features is the number of features.
18class SimpleMultitaskClassifier(Base, ClassifierMixin): 19 """Multitask Classification model based on regression models, with shared covariates 20 21 Parameters: 22 23 obj: object 24 any object (must be a regression model) containing a method fit (obj.fit()) 25 and a method predict (obj.predict()) 26 27 seed: int 28 reproducibility seed 29 30 Attributes: 31 32 fit_objs_: dict 33 objects adjusted to each individual time series 34 35 n_classes_: int 36 number of classes for the classifier 37 38 Examples: 39 40 ```python 41 import nnetsauce as ns 42 import numpy as np 43 from sklearn.datasets import load_breast_cancer 44 from sklearn.linear_model import LinearRegression 45 from sklearn.model_selection import train_test_split 46 from sklearn import metrics 47 from time import time 48 49 breast_cancer = load_breast_cancer() 50 Z = breast_cancer.data 51 t = breast_cancer.target 52 53 X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2, 54 random_state=123+2*10) 55 56 # Linear Regression is used 57 regr = LinearRegression() 58 fit_obj = ns.SimpleMultitaskClassifier(regr) 59 60 start = time() 61 fit_obj.fit(X_train, y_train) 62 print(f"Elapsed {time() - start}") 63 64 print(fit_obj.score(X_test, y_test)) 65 print(fit_obj.score(X_test, y_test, scoring="roc_auc")) 66 67 start = time() 68 preds = fit_obj.predict(X_test) 69 print(f"Elapsed {time() - start}") 70 print(metrics.classification_report(preds, y_test)) 71 ``` 72 73 """ 74 75 # construct the object ----- 76 _estimator_type = "classifier" 77 78 def __init__( 79 self, 80 obj, 81 ): 82 self.type_fit = "classification" 83 self.obj = obj 84 self.fit_objs_ = {} 85 self.X_scaler_ = StandardScaler() 86 self.scaled_X_ = None 87 88 def fit(self, X, y, sample_weight=None, **kwargs): 89 """Fit SimpleMultitaskClassifier to training data (X, y). 90 91 Args: 92 93 X: {array-like}, shape = [n_samples, n_features] 94 Training vectors, where n_samples is the number 95 of samples and n_features is the number of features. 96 97 y: array-like, shape = [n_samples] 98 Target values. 99 100 **kwargs: additional parameters to be passed to 101 self.cook_training_set or self.obj.fit 102 103 Returns: 104 105 self: object 106 107 """ 108 109 assert mx.is_factor(y), "y must contain only integers" 110 111 self.classes_ = np.unique(y) # for compatibility with sklearn 112 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 113 114 self.scaled_X_ = self.X_scaler_.fit_transform(X) 115 116 # multitask response 117 Y = mo.one_hot_encode2(y, self.n_classes_) 118 119 try: 120 for i in range(self.n_classes_): 121 self.fit_objs_[i] = deepcopy( 122 self.obj.fit( 123 self.scaled_X_, 124 Y[:, i], 125 sample_weight=sample_weight, 126 **kwargs 127 ) 128 ) 129 except Exception as e: 130 for i in range(self.n_classes_): 131 self.fit_objs_[i] = deepcopy( 132 self.obj.fit(self.scaled_X_, Y[:, i], **kwargs) 133 ) 134 return self 135 136 def predict(self, X, **kwargs): 137 """Predict test data X. 138 139 Args: 140 141 X: {array-like}, shape = [n_samples, n_features] 142 Training vectors, where n_samples is the number 143 of samples and n_features is the number of features. 144 145 **kwargs: additional parameters 146 147 Returns: 148 149 model predictions: {array-like} 150 151 """ 152 return np.argmax(self.predict_proba(X, **kwargs), axis=1) 153 154 def predict_proba(self, X, **kwargs): 155 """Predict probabilities for test data X. 156 157 Args: 158 159 X: {array-like}, shape = [n_samples, n_features] 160 Training vectors, where n_samples is the number 161 of samples and n_features is the number of features. 162 163 **kwargs: additional parameters 164 165 Returns: 166 167 probability estimates for test data: {array-like} 168 169 """ 170 171 shape_X = X.shape 172 173 probs = np.zeros((shape_X[0], self.n_classes_)) 174 175 if len(shape_X) == 1: # one example 176 n_features = shape_X[0] 177 178 new_X = mo.rbind( 179 X.reshape(1, n_features), 180 np.ones(n_features).reshape(1, n_features), 181 ) 182 183 Z = self.X_scaler_.transform(new_X, **kwargs) 184 185 # Fallback to standard model 186 for i in range(self.n_classes_): 187 probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs)[0] 188 189 else: # multiple rows 190 Z = self.X_scaler_.transform(X, **kwargs) 191 192 # Fallback to standard model 193 for i in range(self.n_classes_): 194 probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs) 195 196 expit_raw_probs = expit(probs) 197 198 # Add small epsilon to avoid division by zero 199 row_sums = expit_raw_probs.sum(axis=1)[:, None] 200 row_sums[row_sums < 1e-10] = 1e-10 201 202 return expit_raw_probs / row_sums 203 204 def decision_function(self, X, **kwargs): 205 """Compute the decision function of X. 206 207 Parameters: 208 X: {array-like}, shape = [n_samples, n_features] 209 Samples to compute decision function for. 210 211 **kwargs: additional parameters to be passed to 212 self.cook_test_set 213 214 Returns: 215 array-like of shape (n_samples,) or (n_samples, n_classes) 216 Decision function of the input samples. The order of outputs is the same 217 as that of the classes passed to fit. 218 """ 219 if not hasattr(self.obj, "decision_function"): 220 # If base classifier doesn't have decision_function, use predict_proba 221 proba = self.predict_proba(X, **kwargs) 222 if proba.shape[1] == 2: 223 return proba[:, 1] # For binary classification 224 return proba # For multiclass 225 226 if len(X.shape) == 1: 227 n_features = X.shape[0] 228 new_X = mo.rbind( 229 X.reshape(1, n_features), 230 np.ones(n_features).reshape(1, n_features), 231 ) 232 233 return ( 234 self.obj.decision_function( 235 self.cook_test_set(new_X, **kwargs), **kwargs 236 ) 237 )[0] 238 239 return self.obj.decision_function( 240 self.cook_test_set(X, **kwargs), **kwargs 241 ) 242 243 @property 244 def _estimator_type(self): 245 return "classifier"
Multitask Classification model based on regression models, with shared covariates
Parameters:
obj: object
any object (must be a regression model) containing a method fit (obj.fit())
and a method predict (obj.predict())
seed: int
reproducibility seed
Attributes:
fit_objs_: dict
objects adjusted to each individual time series
n_classes_: int
number of classes for the classifier
Examples:
import nnetsauce as ns
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics
from time import time
breast_cancer = load_breast_cancer()
Z = breast_cancer.data
t = breast_cancer.target
X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2,
random_state=123+2*10)
# Linear Regression is used
regr = LinearRegression()
fit_obj = ns.SimpleMultitaskClassifier(regr)
start = time()
fit_obj.fit(X_train, y_train)
print(f"Elapsed {time() - start}")
print(fit_obj.score(X_test, y_test))
print(fit_obj.score(X_test, y_test, scoring="roc_auc"))
start = time()
preds = fit_obj.predict(X_test)
print(f"Elapsed {time() - start}")
print(metrics.classification_report(preds, y_test))
88 def fit(self, X, y, sample_weight=None, **kwargs): 89 """Fit SimpleMultitaskClassifier to training data (X, y). 90 91 Args: 92 93 X: {array-like}, shape = [n_samples, n_features] 94 Training vectors, where n_samples is the number 95 of samples and n_features is the number of features. 96 97 y: array-like, shape = [n_samples] 98 Target values. 99 100 **kwargs: additional parameters to be passed to 101 self.cook_training_set or self.obj.fit 102 103 Returns: 104 105 self: object 106 107 """ 108 109 assert mx.is_factor(y), "y must contain only integers" 110 111 self.classes_ = np.unique(y) # for compatibility with sklearn 112 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 113 114 self.scaled_X_ = self.X_scaler_.fit_transform(X) 115 116 # multitask response 117 Y = mo.one_hot_encode2(y, self.n_classes_) 118 119 try: 120 for i in range(self.n_classes_): 121 self.fit_objs_[i] = deepcopy( 122 self.obj.fit( 123 self.scaled_X_, 124 Y[:, i], 125 sample_weight=sample_weight, 126 **kwargs 127 ) 128 ) 129 except Exception as e: 130 for i in range(self.n_classes_): 131 self.fit_objs_[i] = deepcopy( 132 self.obj.fit(self.scaled_X_, Y[:, i], **kwargs) 133 ) 134 return self
Fit SimpleMultitaskClassifier to training data (X, y).
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
136 def predict(self, X, **kwargs): 137 """Predict test data X. 138 139 Args: 140 141 X: {array-like}, shape = [n_samples, n_features] 142 Training vectors, where n_samples is the number 143 of samples and n_features is the number of features. 144 145 **kwargs: additional parameters 146 147 Returns: 148 149 model predictions: {array-like} 150 151 """ 152 return np.argmax(self.predict_proba(X, **kwargs), axis=1)
Predict test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters
Returns:
model predictions: {array-like}
154 def predict_proba(self, X, **kwargs): 155 """Predict probabilities for test data X. 156 157 Args: 158 159 X: {array-like}, shape = [n_samples, n_features] 160 Training vectors, where n_samples is the number 161 of samples and n_features is the number of features. 162 163 **kwargs: additional parameters 164 165 Returns: 166 167 probability estimates for test data: {array-like} 168 169 """ 170 171 shape_X = X.shape 172 173 probs = np.zeros((shape_X[0], self.n_classes_)) 174 175 if len(shape_X) == 1: # one example 176 n_features = shape_X[0] 177 178 new_X = mo.rbind( 179 X.reshape(1, n_features), 180 np.ones(n_features).reshape(1, n_features), 181 ) 182 183 Z = self.X_scaler_.transform(new_X, **kwargs) 184 185 # Fallback to standard model 186 for i in range(self.n_classes_): 187 probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs)[0] 188 189 else: # multiple rows 190 Z = self.X_scaler_.transform(X, **kwargs) 191 192 # Fallback to standard model 193 for i in range(self.n_classes_): 194 probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs) 195 196 expit_raw_probs = expit(probs) 197 198 # Add small epsilon to avoid division by zero 199 row_sums = expit_raw_probs.sum(axis=1)[:, None] 200 row_sums[row_sums < 1e-10] = 1e-10 201 202 return expit_raw_probs / row_sums
Predict probabilities for test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters
Returns:
probability estimates for test data: {array-like}
9class Optimizer: 10 """Optimizer class 11 12 Attributes: 13 14 type_optim: str 15 type of optimizer, (currently) either 'sgd' (stochastic minibatch gradient descent) 16 or 'scd' (stochastic minibatch coordinate descent) 17 18 num_iters: int 19 number of iterations of the optimizer 20 21 learning_rate: float 22 step size 23 24 batch_prop: float 25 proportion of the initial data used at each optimization step 26 27 learning_method: str 28 "poly" - learning rate decreasing as a polynomial function 29 of # of iterations (default) 30 "exp" - learning rate decreasing as an exponential function 31 of # of iterations 32 "momentum" - gradient descent using momentum 33 34 randomization: str 35 type of randomization applied at each step 36 "strat" - stratified subsampling (default) 37 "shuffle" - random subsampling 38 39 mass: float 40 mass on velocity, for `method` == "momentum" 41 42 decay: float 43 coefficient of decrease of the learning rate for 44 `method` == "poly" and `method` == "exp" 45 46 tolerance: float 47 early stopping parameter (convergence of loss function) 48 49 verbose: int 50 controls verbosity of gradient descent 51 0 - nothing is printed 52 1 - a progress bar is printed 53 2 - successive loss function values are printed 54 55 """ 56 57 # construct the object ----- 58 59 def __init__( 60 self, 61 type_optim="sgd", 62 num_iters=100, 63 learning_rate=0.01, 64 batch_prop=1.0, 65 learning_method="momentum", 66 randomization="strat", 67 mass=0.9, 68 decay=0.1, 69 tolerance=1e-3, 70 verbose=1, 71 ): 72 self.type_optim = type_optim 73 self.num_iters = num_iters 74 self.learning_rate = learning_rate 75 self.batch_prop = batch_prop 76 self.learning_method = learning_method 77 self.randomization = randomization 78 self.mass = mass 79 self.decay = decay 80 self.tolerance = tolerance 81 self.verbose = verbose 82 self.opt = None 83 84 def fit(self, loss_func, response, x0, q=None, **kwargs): 85 """Fit GLM model to training data (X, y). 86 87 Args: 88 89 loss_func: loss function 90 91 response: array-like, shape = [n_samples] 92 target variable (used for subsampling) 93 94 x0: array-like, shape = [n_features] 95 initial value provided to the optimizer 96 97 **kwargs: additional parameters to be passed to 98 loss function 99 100 Returns: 101 102 self: object 103 104 """ 105 106 if self.type_optim == "scd": 107 self.results = scd( 108 loss_func, 109 response=response, 110 x=x0, 111 num_iters=self.num_iters, 112 batch_prop=self.batch_prop, 113 learning_rate=self.learning_rate, 114 learning_method=self.learning_method, 115 mass=self.mass, 116 decay=self.decay, 117 randomization=self.randomization, 118 tolerance=self.tolerance, 119 verbose=self.verbose, 120 **kwargs 121 ) 122 123 if self.type_optim == "sgd": 124 self.results = sgd( 125 loss_func, 126 response=response, 127 x=x0, 128 num_iters=self.num_iters, 129 batch_prop=self.batch_prop, 130 learning_rate=self.learning_rate, 131 learning_method=self.learning_method, 132 mass=self.mass, 133 decay=self.decay, 134 randomization=self.randomization, 135 tolerance=self.tolerance, 136 verbose=self.verbose, 137 **kwargs 138 ) 139 140 return self 141 142 def one_hot_encode(self, y, n_classes): 143 return one_hot_encode(y, n_classes)
Optimizer class
Attributes:
type_optim: str
type of optimizer, (currently) either 'sgd' (stochastic minibatch gradient descent)
or 'scd' (stochastic minibatch coordinate descent)
num_iters: int
number of iterations of the optimizer
learning_rate: float
step size
batch_prop: float
proportion of the initial data used at each optimization step
learning_method: str
"poly" - learning rate decreasing as a polynomial function
of # of iterations (default)
"exp" - learning rate decreasing as an exponential function
of # of iterations
"momentum" - gradient descent using momentum
randomization: str
type of randomization applied at each step
"strat" - stratified subsampling (default)
"shuffle" - random subsampling
mass: float
mass on velocity, for `method` == "momentum"
decay: float
coefficient of decrease of the learning rate for
`method` == "poly" and `method` == "exp"
tolerance: float
early stopping parameter (convergence of loss function)
verbose: int
controls verbosity of gradient descent
0 - nothing is printed
1 - a progress bar is printed
2 - successive loss function values are printed
84 def fit(self, loss_func, response, x0, q=None, **kwargs): 85 """Fit GLM model to training data (X, y). 86 87 Args: 88 89 loss_func: loss function 90 91 response: array-like, shape = [n_samples] 92 target variable (used for subsampling) 93 94 x0: array-like, shape = [n_features] 95 initial value provided to the optimizer 96 97 **kwargs: additional parameters to be passed to 98 loss function 99 100 Returns: 101 102 self: object 103 104 """ 105 106 if self.type_optim == "scd": 107 self.results = scd( 108 loss_func, 109 response=response, 110 x=x0, 111 num_iters=self.num_iters, 112 batch_prop=self.batch_prop, 113 learning_rate=self.learning_rate, 114 learning_method=self.learning_method, 115 mass=self.mass, 116 decay=self.decay, 117 randomization=self.randomization, 118 tolerance=self.tolerance, 119 verbose=self.verbose, 120 **kwargs 121 ) 122 123 if self.type_optim == "sgd": 124 self.results = sgd( 125 loss_func, 126 response=response, 127 x=x0, 128 num_iters=self.num_iters, 129 batch_prop=self.batch_prop, 130 learning_rate=self.learning_rate, 131 learning_method=self.learning_method, 132 mass=self.mass, 133 decay=self.decay, 134 randomization=self.randomization, 135 tolerance=self.tolerance, 136 verbose=self.verbose, 137 **kwargs 138 ) 139 140 return self
Fit GLM model to training data (X, y).
Args:
loss_func: loss function
response: array-like, shape = [n_samples]
target variable (used for subsampling)
x0: array-like, shape = [n_features]
initial value provided to the optimizer
**kwargs: additional parameters to be passed to
loss function
Returns:
self: object
37class QuantileRegressor(BaseEstimator, RegressorMixin): 38 """ 39 Quantile Regressor. 40 41 Parameters: 42 43 obj: base model (regression model) 44 The base regressor from which to build a 45 quantile regressor. 46 47 level: int, default=95 48 The level of the quantiles to compute. 49 50 scoring: str, default="predictions" 51 The scoring to use for the optimization and constructing 52 prediction intervals (predictions, residuals, conformal, 53 studentized, conformal-studentized). 54 55 Attributes: 56 57 obj_ : base model (regression model) 58 The base regressor from which to build a 59 quantile regressor. 60 61 offset_multipliers_ : list 62 The multipliers for the offset. 63 64 scoring_residuals_ : list 65 The residuals for the scoring. 66 67 student_multiplier_ : float 68 The multiplier for the student. 69 70 """ 71 72 def __init__(self, obj, level=95, scoring="predictions"): 73 assert scoring in ( 74 "predictions", 75 "residuals", 76 "conformal", 77 "studentized", 78 "conformal-studentized", 79 ), "scoring must be 'predictions' or 'residuals'" 80 self.obj = obj 81 low_risk_level = (1 - level / 100) / 2 82 self.quantiles = [low_risk_level, 0.5, 1 - low_risk_level] 83 self.scoring = scoring 84 self.offset_multipliers_ = None 85 self.obj_ = None 86 self.scoring_residuals_ = None 87 self.student_multiplier_ = None 88 89 def _compute_quantile_loss(self, residuals, quantile): 90 """ 91 Compute the quantile loss for a given set of residuals and quantile. 92 """ 93 return np.mean( 94 residuals 95 * (quantile * (residuals >= 0) + (quantile - 1) * (residuals < 0)) 96 ) 97 98 def _optimize_multiplier( 99 self, 100 y, 101 base_predictions, 102 prev_predictions, 103 scoring_residuals=None, 104 quantile=0.5, 105 ): 106 """ 107 Optimize the multiplier for a given quantile. 108 """ 109 if not 0 < quantile < 1: 110 raise ValueError("Quantile should be between 0 and 1.") 111 112 n = len(y) 113 114 def objective(log_multiplier): 115 """ 116 Objective function for optimization. 117 """ 118 # Convert to positive multiplier using exp 119 multiplier = np.exp(log_multiplier[0]) 120 if self.scoring == "predictions": 121 assert ( 122 base_predictions is not None 123 ), "base_predictions must be not None" 124 # Calculate predictions 125 if prev_predictions is None: 126 # For first quantile, subtract from conditional expectation 127 predictions = base_predictions - multiplier * np.abs( 128 base_predictions 129 ) 130 else: 131 # For other quantiles, add to previous quantile 132 offset = multiplier * np.abs(base_predictions) 133 predictions = prev_predictions + offset 134 elif self.scoring in ("residuals", "conformal"): 135 assert ( 136 scoring_residuals is not None 137 ), "scoring_residuals must be not None" 138 # print("scoring_residuals", scoring_residuals) 139 # Calculate predictions 140 if prev_predictions is None: 141 # For first quantile, subtract from conditional expectation 142 predictions = base_predictions - multiplier * np.std( 143 scoring_residuals 144 ) / np.sqrt(len(scoring_residuals)) 145 # print("predictions", predictions) 146 else: 147 # For other quantiles, add to previous quantile 148 offset = ( 149 multiplier 150 * np.std(scoring_residuals) 151 / np.sqrt(len(scoring_residuals)) 152 ) 153 predictions = prev_predictions + offset 154 elif self.scoring in ("studentized", "conformal-studentized"): 155 assert ( 156 scoring_residuals is not None 157 ), "scoring_residuals must be not None" 158 # Calculate predictions 159 if prev_predictions is None: 160 # For first quantile, subtract from conditional expectation 161 predictions = ( 162 base_predictions - multiplier * self.student_multiplier_ 163 ) 164 # print("predictions", predictions) 165 else: 166 # For other quantiles, add to previous quantile 167 offset = multiplier * self.student_multiplier_ 168 predictions = prev_predictions + offset 169 else: 170 raise ValueError("Invalid argument 'scoring'") 171 172 return self._compute_quantile_loss(y - predictions, quantile) 173 174 # Optimize in log space for numerical stability 175 # bounds = [(-10, 10)] # log space bounds 176 bounds = [(-100, 100)] # log space bounds 177 result = differential_evolution( 178 objective, 179 bounds, 180 # popsize=15, 181 # maxiter=100, 182 # tol=1e-4, 183 popsize=25, 184 maxiter=200, 185 tol=1e-6, 186 disp=False, 187 ) 188 189 return np.exp(result.x[0]) 190 191 def fit(self, X, y): 192 """Fit the model to the data. 193 194 Parameters: 195 196 X: {array-like}, shape = [n_samples, n_features] 197 Training vectors, where n_samples is the number of samples and 198 n_features is the number of features. 199 y: array-like, shape = [n_samples] 200 Target values. 201 """ 202 self.obj_ = clone(self.obj) 203 204 if self.scoring in ("predictions", "residuals"): 205 self.obj_.fit(X, y) 206 base_predictions = self.obj_.predict(X) 207 scoring_residuals = y - base_predictions 208 self.scoring_residuals_ = scoring_residuals 209 210 elif self.scoring == "conformal": 211 X_train, X_calib, y_train, y_calib = train_test_split( 212 X, y, test_size=0.5, random_state=42 213 ) 214 self.obj_.fit(X_train, y_train) 215 scoring_residuals = y_calib - self.obj_.predict( 216 X_calib 217 ) # These are calibration predictions 218 self.scoring_residuals_ = scoring_residuals 219 # Update base_predictions to use training predictions for optimization 220 self.obj_.fit(X_calib, y_calib) 221 base_predictions = self.obj_.predict(X_calib) 222 223 elif self.scoring in ("studentized", "conformal-studentized"): 224 # Calculate student multiplier 225 if self.scoring == "conformal-studentized": 226 X_train, X_calib, y_train, y_calib = train_test_split( 227 X, y, test_size=0.5, random_state=42 228 ) 229 self.obj_.fit(X_train, y_train) 230 scoring_residuals = y_calib - self.obj_.predict(X_calib) 231 # Calculate studentized multiplier using calibration data 232 self.student_multiplier_ = np.std(y_calib, ddof=1) / np.sqrt( 233 len(y_calib) - 1 234 ) 235 self.obj_.fit(X_calib, y_calib) 236 base_predictions = self.obj_.predict(X_calib) 237 else: # regular studentized 238 self.obj_.fit(X, y) 239 base_predictions = self.obj_.predict(X) 240 scoring_residuals = y - base_predictions 241 self.student_multiplier_ = np.std(y, ddof=1) / np.sqrt( 242 len(y) - 1 243 ) 244 245 # Initialize storage for multipliers 246 self.offset_multipliers_ = [] 247 # Keep track of current predictions for each quantile 248 current_predictions = None 249 250 # Fit each quantile sequentially 251 for i, quantile in enumerate(self.quantiles): 252 if self.scoring == "predictions": 253 multiplier = self._optimize_multiplier( 254 y=y, 255 base_predictions=base_predictions, 256 prev_predictions=current_predictions, 257 quantile=quantile, 258 ) 259 260 self.offset_multipliers_.append(multiplier) 261 262 # Update current predictions 263 if current_predictions is None: 264 # First quantile (lowest) 265 current_predictions = ( 266 base_predictions - multiplier * np.abs(base_predictions) 267 ) 268 else: 269 # Subsequent quantiles 270 offset = multiplier * np.abs(base_predictions) 271 current_predictions = current_predictions + offset 272 273 elif self.scoring == "residuals": 274 multiplier = self._optimize_multiplier( 275 y=y, 276 base_predictions=base_predictions, 277 scoring_residuals=scoring_residuals, 278 prev_predictions=current_predictions, 279 quantile=quantile, 280 ) 281 282 self.offset_multipliers_.append(multiplier) 283 284 # Update current predictions 285 if current_predictions is None: 286 # First quantile (lowest) 287 current_predictions = ( 288 base_predictions 289 - multiplier 290 * np.std(scoring_residuals) 291 / np.sqrt(len(scoring_residuals)) 292 ) 293 else: 294 # Subsequent quantiles 295 offset = ( 296 multiplier 297 * np.std(scoring_residuals) 298 / np.sqrt(len(scoring_residuals)) 299 ) 300 current_predictions = current_predictions + offset 301 302 elif self.scoring == "conformal": 303 multiplier = self._optimize_multiplier( 304 y=y_calib, 305 base_predictions=base_predictions, 306 scoring_residuals=scoring_residuals, 307 prev_predictions=current_predictions, 308 quantile=quantile, 309 ) 310 311 self.offset_multipliers_.append(multiplier) 312 313 # Update current predictions 314 if current_predictions is None: 315 # First quantile (lowest) 316 current_predictions = ( 317 base_predictions 318 - multiplier 319 * np.std(scoring_residuals) 320 / np.sqrt(len(scoring_residuals)) 321 ) 322 else: 323 # Subsequent quantiles 324 offset = ( 325 multiplier 326 * np.std(scoring_residuals) 327 / np.sqrt(len(scoring_residuals)) 328 ) 329 current_predictions = current_predictions + offset 330 331 elif self.scoring in ("studentized", "conformal-studentized"): 332 multiplier = self._optimize_multiplier( 333 y=y_calib if self.scoring == "conformal-studentized" else y, 334 base_predictions=base_predictions, 335 scoring_residuals=scoring_residuals, 336 prev_predictions=current_predictions, 337 quantile=quantile, 338 ) 339 340 self.offset_multipliers_.append(multiplier) 341 342 # Update current predictions 343 if current_predictions is None: 344 current_predictions = ( 345 base_predictions - multiplier * self.student_multiplier_ 346 ) 347 else: 348 offset = multiplier * self.student_multiplier_ 349 current_predictions = current_predictions + offset 350 351 return self 352 353 def predict(self, X, return_pi=False): 354 """Predict the target variable. 355 356 Parameters: 357 358 X: {array-like}, shape = [n_samples, n_features] 359 Training vectors, where n_samples is the number of samples and 360 n_features is the number of features. 361 362 return_pi: bool, default=True 363 Whether to return the prediction intervals. 364 """ 365 if self.obj_ is None or self.offset_multipliers_ is None: 366 raise ValueError("Model not fitted yet.") 367 368 base_predictions = self.obj_.predict(X) 369 all_predictions = [] 370 371 if self.scoring == "predictions": 372 # Generate first quantile 373 current_predictions = base_predictions - self.offset_multipliers_[ 374 0 375 ] * np.abs(base_predictions) 376 all_predictions.append(current_predictions) 377 378 # Generate remaining quantiles 379 for multiplier in self.offset_multipliers_[1:]: 380 offset = multiplier * np.abs(base_predictions) 381 current_predictions = current_predictions + offset 382 all_predictions.append(current_predictions) 383 384 elif self.scoring in ("residuals", "conformal"): 385 # Generate first quantile 386 current_predictions = base_predictions - self.offset_multipliers_[ 387 0 388 ] * np.std(self.scoring_residuals_) / np.sqrt( 389 len(self.scoring_residuals_) 390 ) 391 all_predictions.append(current_predictions) 392 393 # Generate remaining quantiles 394 for multiplier in self.offset_multipliers_[1:]: 395 offset = ( 396 multiplier 397 * np.std(self.scoring_residuals_) 398 / np.sqrt(len(self.scoring_residuals_)) 399 ) 400 current_predictions = current_predictions + offset 401 all_predictions.append(current_predictions) 402 403 elif self.scoring in ("studentized", "conformal-studentized"): 404 # Generate first quantile 405 current_predictions = ( 406 base_predictions 407 - self.offset_multipliers_[0] * self.student_multiplier_ 408 ) 409 all_predictions.append(current_predictions) 410 411 # Generate remaining quantiles 412 for multiplier in self.offset_multipliers_[1:]: 413 offset = multiplier * self.student_multiplier_ 414 current_predictions = current_predictions + offset 415 all_predictions.append(current_predictions) 416 417 if return_pi == False: 418 return np.asarray(all_predictions[1]) 419 420 DescribeResult = namedtuple( 421 "DecribeResult", ["mean", "lower", "upper", "median"] 422 ) 423 DescribeResult.mean = base_predictions 424 DescribeResult.lower = np.asarray(all_predictions[0]) 425 DescribeResult.median = np.asarray(all_predictions[1]) 426 DescribeResult.upper = np.asarray(all_predictions[2]) 427 428 return DescribeResult
Quantile Regressor.
Parameters:
obj: base model (regression model)
The base regressor from which to build a
quantile regressor.
level: int, default=95
The level of the quantiles to compute.
scoring: str, default="predictions"
The scoring to use for the optimization and constructing
prediction intervals (predictions, residuals, conformal,
studentized, conformal-studentized).
Attributes:
obj_ : base model (regression model)
The base regressor from which to build a
quantile regressor.
offset_multipliers_ : list
The multipliers for the offset.
scoring_residuals_ : list
The residuals for the scoring.
student_multiplier_ : float
The multiplier for the student.
191 def fit(self, X, y): 192 """Fit the model to the data. 193 194 Parameters: 195 196 X: {array-like}, shape = [n_samples, n_features] 197 Training vectors, where n_samples is the number of samples and 198 n_features is the number of features. 199 y: array-like, shape = [n_samples] 200 Target values. 201 """ 202 self.obj_ = clone(self.obj) 203 204 if self.scoring in ("predictions", "residuals"): 205 self.obj_.fit(X, y) 206 base_predictions = self.obj_.predict(X) 207 scoring_residuals = y - base_predictions 208 self.scoring_residuals_ = scoring_residuals 209 210 elif self.scoring == "conformal": 211 X_train, X_calib, y_train, y_calib = train_test_split( 212 X, y, test_size=0.5, random_state=42 213 ) 214 self.obj_.fit(X_train, y_train) 215 scoring_residuals = y_calib - self.obj_.predict( 216 X_calib 217 ) # These are calibration predictions 218 self.scoring_residuals_ = scoring_residuals 219 # Update base_predictions to use training predictions for optimization 220 self.obj_.fit(X_calib, y_calib) 221 base_predictions = self.obj_.predict(X_calib) 222 223 elif self.scoring in ("studentized", "conformal-studentized"): 224 # Calculate student multiplier 225 if self.scoring == "conformal-studentized": 226 X_train, X_calib, y_train, y_calib = train_test_split( 227 X, y, test_size=0.5, random_state=42 228 ) 229 self.obj_.fit(X_train, y_train) 230 scoring_residuals = y_calib - self.obj_.predict(X_calib) 231 # Calculate studentized multiplier using calibration data 232 self.student_multiplier_ = np.std(y_calib, ddof=1) / np.sqrt( 233 len(y_calib) - 1 234 ) 235 self.obj_.fit(X_calib, y_calib) 236 base_predictions = self.obj_.predict(X_calib) 237 else: # regular studentized 238 self.obj_.fit(X, y) 239 base_predictions = self.obj_.predict(X) 240 scoring_residuals = y - base_predictions 241 self.student_multiplier_ = np.std(y, ddof=1) / np.sqrt( 242 len(y) - 1 243 ) 244 245 # Initialize storage for multipliers 246 self.offset_multipliers_ = [] 247 # Keep track of current predictions for each quantile 248 current_predictions = None 249 250 # Fit each quantile sequentially 251 for i, quantile in enumerate(self.quantiles): 252 if self.scoring == "predictions": 253 multiplier = self._optimize_multiplier( 254 y=y, 255 base_predictions=base_predictions, 256 prev_predictions=current_predictions, 257 quantile=quantile, 258 ) 259 260 self.offset_multipliers_.append(multiplier) 261 262 # Update current predictions 263 if current_predictions is None: 264 # First quantile (lowest) 265 current_predictions = ( 266 base_predictions - multiplier * np.abs(base_predictions) 267 ) 268 else: 269 # Subsequent quantiles 270 offset = multiplier * np.abs(base_predictions) 271 current_predictions = current_predictions + offset 272 273 elif self.scoring == "residuals": 274 multiplier = self._optimize_multiplier( 275 y=y, 276 base_predictions=base_predictions, 277 scoring_residuals=scoring_residuals, 278 prev_predictions=current_predictions, 279 quantile=quantile, 280 ) 281 282 self.offset_multipliers_.append(multiplier) 283 284 # Update current predictions 285 if current_predictions is None: 286 # First quantile (lowest) 287 current_predictions = ( 288 base_predictions 289 - multiplier 290 * np.std(scoring_residuals) 291 / np.sqrt(len(scoring_residuals)) 292 ) 293 else: 294 # Subsequent quantiles 295 offset = ( 296 multiplier 297 * np.std(scoring_residuals) 298 / np.sqrt(len(scoring_residuals)) 299 ) 300 current_predictions = current_predictions + offset 301 302 elif self.scoring == "conformal": 303 multiplier = self._optimize_multiplier( 304 y=y_calib, 305 base_predictions=base_predictions, 306 scoring_residuals=scoring_residuals, 307 prev_predictions=current_predictions, 308 quantile=quantile, 309 ) 310 311 self.offset_multipliers_.append(multiplier) 312 313 # Update current predictions 314 if current_predictions is None: 315 # First quantile (lowest) 316 current_predictions = ( 317 base_predictions 318 - multiplier 319 * np.std(scoring_residuals) 320 / np.sqrt(len(scoring_residuals)) 321 ) 322 else: 323 # Subsequent quantiles 324 offset = ( 325 multiplier 326 * np.std(scoring_residuals) 327 / np.sqrt(len(scoring_residuals)) 328 ) 329 current_predictions = current_predictions + offset 330 331 elif self.scoring in ("studentized", "conformal-studentized"): 332 multiplier = self._optimize_multiplier( 333 y=y_calib if self.scoring == "conformal-studentized" else y, 334 base_predictions=base_predictions, 335 scoring_residuals=scoring_residuals, 336 prev_predictions=current_predictions, 337 quantile=quantile, 338 ) 339 340 self.offset_multipliers_.append(multiplier) 341 342 # Update current predictions 343 if current_predictions is None: 344 current_predictions = ( 345 base_predictions - multiplier * self.student_multiplier_ 346 ) 347 else: 348 offset = multiplier * self.student_multiplier_ 349 current_predictions = current_predictions + offset 350 351 return self
Fit the model to the data.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number of samples and
n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
353 def predict(self, X, return_pi=False): 354 """Predict the target variable. 355 356 Parameters: 357 358 X: {array-like}, shape = [n_samples, n_features] 359 Training vectors, where n_samples is the number of samples and 360 n_features is the number of features. 361 362 return_pi: bool, default=True 363 Whether to return the prediction intervals. 364 """ 365 if self.obj_ is None or self.offset_multipliers_ is None: 366 raise ValueError("Model not fitted yet.") 367 368 base_predictions = self.obj_.predict(X) 369 all_predictions = [] 370 371 if self.scoring == "predictions": 372 # Generate first quantile 373 current_predictions = base_predictions - self.offset_multipliers_[ 374 0 375 ] * np.abs(base_predictions) 376 all_predictions.append(current_predictions) 377 378 # Generate remaining quantiles 379 for multiplier in self.offset_multipliers_[1:]: 380 offset = multiplier * np.abs(base_predictions) 381 current_predictions = current_predictions + offset 382 all_predictions.append(current_predictions) 383 384 elif self.scoring in ("residuals", "conformal"): 385 # Generate first quantile 386 current_predictions = base_predictions - self.offset_multipliers_[ 387 0 388 ] * np.std(self.scoring_residuals_) / np.sqrt( 389 len(self.scoring_residuals_) 390 ) 391 all_predictions.append(current_predictions) 392 393 # Generate remaining quantiles 394 for multiplier in self.offset_multipliers_[1:]: 395 offset = ( 396 multiplier 397 * np.std(self.scoring_residuals_) 398 / np.sqrt(len(self.scoring_residuals_)) 399 ) 400 current_predictions = current_predictions + offset 401 all_predictions.append(current_predictions) 402 403 elif self.scoring in ("studentized", "conformal-studentized"): 404 # Generate first quantile 405 current_predictions = ( 406 base_predictions 407 - self.offset_multipliers_[0] * self.student_multiplier_ 408 ) 409 all_predictions.append(current_predictions) 410 411 # Generate remaining quantiles 412 for multiplier in self.offset_multipliers_[1:]: 413 offset = multiplier * self.student_multiplier_ 414 current_predictions = current_predictions + offset 415 all_predictions.append(current_predictions) 416 417 if return_pi == False: 418 return np.asarray(all_predictions[1]) 419 420 DescribeResult = namedtuple( 421 "DecribeResult", ["mean", "lower", "upper", "median"] 422 ) 423 DescribeResult.mean = base_predictions 424 DescribeResult.lower = np.asarray(all_predictions[0]) 425 DescribeResult.median = np.asarray(all_predictions[1]) 426 DescribeResult.upper = np.asarray(all_predictions[2]) 427 428 return DescribeResult
Predict the target variable.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number of samples and
n_features is the number of features.
return_pi: bool, default=True
Whether to return the prediction intervals.
43class QuantileClassifier(BaseEstimator, ClassifierMixin): 44 """ 45 Quantile Classifier. 46 47 Parameters: 48 49 obj: base model (classification model) 50 The base classifier from which to build a 51 quantile classifier. 52 53 level: int, default=95 54 The level of the quantiles to compute. 55 56 scoring: str, default="predictions" 57 The scoring to use for the optimization and constructing 58 prediction intervals (predictions, residuals, conformal, 59 studentized, conformal-studentized). 60 61 Attributes: 62 63 obj_ : base model (classification model) 64 The base classifier from which to build a 65 quantile classifier. 66 67 offset_multipliers_ : list 68 The multipliers for the offset. 69 70 scoring_residuals_ : list 71 The residuals for the scoring. 72 73 student_multiplier_ : float 74 The multiplier for the student. 75 76 77 """ 78 79 def __init__(self, obj, level=95, scoring="predictions"): 80 assert scoring in ( 81 "predictions", 82 "residuals", 83 "conformal", 84 "studentized", 85 "conformal-studentized", 86 ), "scoring must be 'predictions' or 'residuals'" 87 self.obj = obj 88 quantileregressor = QuantileRegressor(self.obj) 89 quantileregressor.predict = partial( 90 quantileregressor.predict, return_pi=False 91 ) 92 self.obj_ = SimpleMultitaskClassifier(quantileregressor) 93 94 def fit(self, X, y, **kwargs): 95 self.obj_.fit(X, y, **kwargs) 96 97 def predict(self, X, **kwargs): 98 return self.obj_.predict(X, **kwargs) 99 100 def predict_proba(self, X, **kwargs): 101 return self.obj_.predict_proba(X, **kwargs)
Quantile Classifier.
Parameters:
obj: base model (classification model)
The base classifier from which to build a
quantile classifier.
level: int, default=95
The level of the quantiles to compute.
scoring: str, default="predictions"
The scoring to use for the optimization and constructing
prediction intervals (predictions, residuals, conformal,
studentized, conformal-studentized).
Attributes:
obj_ : base model (classification model)
The base classifier from which to build a
quantile classifier.
offset_multipliers_ : list
The multipliers for the offset.
scoring_residuals_ : list
The residuals for the scoring.
student_multiplier_ : float
The multiplier for the student.
18class RandomBagRegressor(RandomBag, RegressorMixin): 19 """Randomized 'Bagging' Regression model 20 21 Parameters: 22 23 obj: object 24 any object containing a method fit (obj.fit()) and a method predict 25 (obj.predict()) 26 27 n_estimators: int 28 number of boosting iterations 29 30 n_hidden_features: int 31 number of nodes in the hidden layer 32 33 activation_name: str 34 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 35 36 a: float 37 hyperparameter for 'prelu' or 'elu' activation function 38 39 nodes_sim: str 40 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 41 'uniform' 42 43 bias: boolean 44 indicates if the hidden layer contains a bias term (True) or not 45 (False) 46 47 dropout: float 48 regularization parameter; (random) percentage of nodes dropped out 49 of the training 50 51 direct_link: boolean 52 indicates if the original predictors are included (True) in model''s 53 fitting or not (False) 54 55 n_clusters: int 56 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: 57 no clustering) 58 59 cluster_encode: bool 60 defines how the variable containing clusters is treated (default is one-hot) 61 if `False`, then labels are used, without one-hot encoding 62 63 type_clust: str 64 type of clustering method: currently k-means ('kmeans') or Gaussian 65 Mixture Model ('gmm') 66 67 type_scaling: a tuple of 3 strings 68 scaling methods for inputs, hidden layer, and clustering respectively 69 (and when relevant). 70 Currently available: standardization ('std') or MinMax scaling ('minmax') 71 72 col_sample: float 73 percentage of covariates randomly chosen for training 74 75 row_sample: float 76 percentage of rows chosen for training, by stratified bootstrapping 77 78 seed: int 79 reproducibility seed for nodes_sim=='uniform' 80 81 backend: str 82 "cpu" or "gpu" or "tpu" 83 84 Attributes: 85 86 voter_: dict 87 dictionary containing all the fitted base-learners 88 89 90 Examples: 91 92 ```python 93 import numpy as np 94 import nnetsauce as ns 95 from sklearn.datasets import fetch_california_housing 96 from sklearn.tree import DecisionTreeRegressor 97 from sklearn.model_selection import train_test_split 98 99 X, y = fetch_california_housing(return_X_y=True, as_frame=False) 100 101 # split data into training test and test set 102 X_train, X_test, y_train, y_test = train_test_split(X, y, 103 test_size=0.2, random_state=13) 104 105 # Requires further tuning 106 obj = DecisionTreeRegressor(max_depth=3, random_state=123) 107 obj2 = ns.RandomBagRegressor(obj=obj, direct_link=False, 108 n_estimators=50, 109 col_sample=0.9, row_sample=0.9, 110 dropout=0, n_clusters=0, verbose=1) 111 112 obj2.fit(X_train, y_train) 113 114 print(np.sqrt(obj2.score(X_test, y_test))) # RMSE 115 116 ``` 117 118 """ 119 120 # construct the object ----- 121 122 def __init__( 123 self, 124 obj, 125 n_estimators=10, 126 n_hidden_features=1, 127 activation_name="relu", 128 a=0.01, 129 nodes_sim="sobol", 130 bias=True, 131 dropout=0, 132 direct_link=False, 133 n_clusters=2, 134 cluster_encode=True, 135 type_clust="kmeans", 136 type_scaling=("std", "std", "std"), 137 col_sample=1, 138 row_sample=1, 139 n_jobs=None, 140 seed=123, 141 verbose=1, 142 backend="cpu", 143 ): 144 super().__init__( 145 obj=obj, 146 n_estimators=n_estimators, 147 n_hidden_features=n_hidden_features, 148 activation_name=activation_name, 149 a=a, 150 nodes_sim=nodes_sim, 151 bias=bias, 152 dropout=dropout, 153 direct_link=direct_link, 154 n_clusters=n_clusters, 155 cluster_encode=cluster_encode, 156 type_clust=type_clust, 157 type_scaling=type_scaling, 158 col_sample=col_sample, 159 row_sample=row_sample, 160 seed=seed, 161 backend=backend, 162 ) 163 164 self.type_fit = "regression" 165 self.verbose = verbose 166 self.n_jobs = n_jobs 167 self.voter_ = {} 168 169 def fit(self, X, y, **kwargs): 170 """Fit Random 'Bagging' model to training data (X, y). 171 172 Args: 173 174 X: {array-like}, shape = [n_samples, n_features] 175 Training vectors, where n_samples is the number 176 of samples and n_features is the number of features. 177 178 y: array-like, shape = [n_samples] 179 Target values. 180 181 **kwargs: additional parameters to be passed to 182 self.cook_training_set or self.obj.fit 183 184 Returns: 185 186 self: object 187 188 """ 189 190 base_learner = CustomRegressor( 191 self.obj, 192 n_hidden_features=self.n_hidden_features, 193 activation_name=self.activation_name, 194 a=self.a, 195 nodes_sim=self.nodes_sim, 196 bias=self.bias, 197 dropout=self.dropout, 198 direct_link=self.direct_link, 199 n_clusters=self.n_clusters, 200 type_clust=self.type_clust, 201 type_scaling=self.type_scaling, 202 col_sample=self.col_sample, 203 row_sample=self.row_sample, 204 seed=self.seed, 205 ) 206 207 # 1 - Sequential training ----- 208 209 if self.n_jobs is None: 210 self.voter_ = rbagloop_regression( 211 base_learner, X, y, self.n_estimators, self.verbose, self.seed 212 ) 213 214 self.n_estimators = len(self.voter_) 215 216 return self 217 218 # 2 - Parallel training ----- 219 # buggy 220 # if self.n_jobs is not None: 221 def fit_estimators(m): 222 base_learner__ = deepcopy(base_learner) 223 base_learner__.set_params(seed=self.seed + m * 1000) 224 base_learner__.fit(X, y, **kwargs) 225 return base_learner__ 226 227 if self.verbose == 1: 228 voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")( 229 delayed(fit_estimators)(m) 230 for m in tqdm(range(self.n_estimators)) 231 ) 232 else: 233 voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")( 234 delayed(fit_estimators)(m) for m in range(self.n_estimators) 235 ) 236 237 self.voter_ = {i: elt for i, elt in enumerate(voters_list)} 238 239 self.n_estimators = len(self.voter_) 240 241 return self 242 243 def predict(self, X, weights=None, **kwargs): 244 """Predict for test data X. 245 246 Args: 247 248 X: {array-like}, shape = [n_samples, n_features] 249 Training vectors, where n_samples is the number 250 of samples and n_features is the number of features. 251 252 **kwargs: additional parameters to be passed to 253 self.cook_test_set 254 255 Returns: 256 257 estimates for test data: {array-like} 258 259 """ 260 261 def calculate_preds(voter, weights=None): 262 ensemble_preds = 0 263 264 n_iter = len(voter) 265 266 assert n_iter > 0, "no estimator found in `RandomBag` ensemble" 267 268 if weights is None: 269 for idx, elt in voter.items(): 270 ensemble_preds += elt.predict(X) 271 272 return ensemble_preds / n_iter 273 274 # if weights is not None: 275 for idx, elt in voter.items(): 276 ensemble_preds += weights[idx] * elt.predict(X) 277 278 return ensemble_preds 279 280 # end calculate_preds ---- 281 282 if weights is None: 283 return calculate_preds(self.voter_) 284 285 # if weights is not None: 286 self.weights = weights 287 288 return calculate_preds(self.voter_, weights)
Randomized 'Bagging' Regression model
Parameters:
obj: object
any object containing a method fit (obj.fit()) and a method predict
(obj.predict())
n_estimators: int
number of boosting iterations
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not
(False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original predictors are included (True) in model''s
fitting or not (False)
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
col_sample: float
percentage of covariates randomly chosen for training
row_sample: float
percentage of rows chosen for training, by stratified bootstrapping
seed: int
reproducibility seed for nodes_sim=='uniform'
backend: str
"cpu" or "gpu" or "tpu"
Attributes:
voter_: dict
dictionary containing all the fitted base-learners
Examples:
import numpy as np
import nnetsauce as ns
from sklearn.datasets import fetch_california_housing
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split
X, y = fetch_california_housing(return_X_y=True, as_frame=False)
# split data into training test and test set
X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size=0.2, random_state=13)
# Requires further tuning
obj = DecisionTreeRegressor(max_depth=3, random_state=123)
obj2 = ns.RandomBagRegressor(obj=obj, direct_link=False,
n_estimators=50,
col_sample=0.9, row_sample=0.9,
dropout=0, n_clusters=0, verbose=1)
obj2.fit(X_train, y_train)
print(np.sqrt(obj2.score(X_test, y_test))) # RMSE
169 def fit(self, X, y, **kwargs): 170 """Fit Random 'Bagging' model to training data (X, y). 171 172 Args: 173 174 X: {array-like}, shape = [n_samples, n_features] 175 Training vectors, where n_samples is the number 176 of samples and n_features is the number of features. 177 178 y: array-like, shape = [n_samples] 179 Target values. 180 181 **kwargs: additional parameters to be passed to 182 self.cook_training_set or self.obj.fit 183 184 Returns: 185 186 self: object 187 188 """ 189 190 base_learner = CustomRegressor( 191 self.obj, 192 n_hidden_features=self.n_hidden_features, 193 activation_name=self.activation_name, 194 a=self.a, 195 nodes_sim=self.nodes_sim, 196 bias=self.bias, 197 dropout=self.dropout, 198 direct_link=self.direct_link, 199 n_clusters=self.n_clusters, 200 type_clust=self.type_clust, 201 type_scaling=self.type_scaling, 202 col_sample=self.col_sample, 203 row_sample=self.row_sample, 204 seed=self.seed, 205 ) 206 207 # 1 - Sequential training ----- 208 209 if self.n_jobs is None: 210 self.voter_ = rbagloop_regression( 211 base_learner, X, y, self.n_estimators, self.verbose, self.seed 212 ) 213 214 self.n_estimators = len(self.voter_) 215 216 return self 217 218 # 2 - Parallel training ----- 219 # buggy 220 # if self.n_jobs is not None: 221 def fit_estimators(m): 222 base_learner__ = deepcopy(base_learner) 223 base_learner__.set_params(seed=self.seed + m * 1000) 224 base_learner__.fit(X, y, **kwargs) 225 return base_learner__ 226 227 if self.verbose == 1: 228 voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")( 229 delayed(fit_estimators)(m) 230 for m in tqdm(range(self.n_estimators)) 231 ) 232 else: 233 voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")( 234 delayed(fit_estimators)(m) for m in range(self.n_estimators) 235 ) 236 237 self.voter_ = {i: elt for i, elt in enumerate(voters_list)} 238 239 self.n_estimators = len(self.voter_) 240 241 return self
Fit Random 'Bagging' model to training data (X, y).
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
243 def predict(self, X, weights=None, **kwargs): 244 """Predict for test data X. 245 246 Args: 247 248 X: {array-like}, shape = [n_samples, n_features] 249 Training vectors, where n_samples is the number 250 of samples and n_features is the number of features. 251 252 **kwargs: additional parameters to be passed to 253 self.cook_test_set 254 255 Returns: 256 257 estimates for test data: {array-like} 258 259 """ 260 261 def calculate_preds(voter, weights=None): 262 ensemble_preds = 0 263 264 n_iter = len(voter) 265 266 assert n_iter > 0, "no estimator found in `RandomBag` ensemble" 267 268 if weights is None: 269 for idx, elt in voter.items(): 270 ensemble_preds += elt.predict(X) 271 272 return ensemble_preds / n_iter 273 274 # if weights is not None: 275 for idx, elt in voter.items(): 276 ensemble_preds += weights[idx] * elt.predict(X) 277 278 return ensemble_preds 279 280 # end calculate_preds ---- 281 282 if weights is None: 283 return calculate_preds(self.voter_) 284 285 # if weights is not None: 286 self.weights = weights 287 288 return calculate_preds(self.voter_, weights)
Predict for test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
estimates for test data: {array-like}
18class RandomBagClassifier(RandomBag, ClassifierMixin): 19 """Randomized 'Bagging' Classification model 20 21 Parameters: 22 23 obj: object 24 any object containing a method fit (obj.fit()) and a method predict 25 (obj.predict()) 26 27 n_estimators: int 28 number of boosting iterations 29 30 n_hidden_features: int 31 number of nodes in the hidden layer 32 33 activation_name: str 34 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 35 36 a: float 37 hyperparameter for 'prelu' or 'elu' activation function 38 39 nodes_sim: str 40 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 41 'uniform' 42 43 bias: boolean 44 indicates if the hidden layer contains a bias term (True) or not 45 (False) 46 47 dropout: float 48 regularization parameter; (random) percentage of nodes dropped out 49 of the training 50 51 direct_link: boolean 52 indicates if the original predictors are included (True) in model's 53 fitting or not (False) 54 55 n_clusters: int 56 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: 57 no clustering) 58 59 cluster_encode: bool 60 defines how the variable containing clusters is treated (default is one-hot) 61 if `False`, then labels are used, without one-hot encoding 62 63 type_clust: str 64 type of clustering method: currently k-means ('kmeans') or Gaussian 65 Mixture Model ('gmm') 66 67 type_scaling: a tuple of 3 strings 68 scaling methods for inputs, hidden layer, and clustering respectively 69 (and when relevant). 70 Currently available: standardization ('std') or MinMax scaling ('minmax') 71 72 col_sample: float 73 percentage of covariates randomly chosen for training 74 75 row_sample: float 76 percentage of rows chosen for training, by stratified bootstrapping 77 78 seed: int 79 reproducibility seed for nodes_sim=='uniform' 80 81 backend: str 82 "cpu" or "gpu" or "tpu" 83 84 Attributes: 85 86 voter_: dict 87 dictionary containing all the fitted base-learners 88 89 90 Examples: 91 92 See also [https://github.com/Techtonique/nnetsauce/blob/master/examples/randombag_classification.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/randombag_classification.py) 93 94 ```python 95 import nnetsauce as ns 96 from sklearn.datasets import load_breast_cancer 97 from sklearn.tree import DecisionTreeClassifier 98 from sklearn.model_selection import train_test_split 99 from sklearn import metrics 100 from time import time 101 102 103 breast_cancer = load_breast_cancer() 104 Z = breast_cancer.data 105 t = breast_cancer.target 106 np.random.seed(123) 107 X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2) 108 109 # decision tree 110 clf = DecisionTreeClassifier(max_depth=2, random_state=123) 111 fit_obj = ns.RandomBagClassifier(clf, n_hidden_features=2, 112 direct_link=True, 113 n_estimators=100, 114 col_sample=0.9, row_sample=0.9, 115 dropout=0.3, n_clusters=0, verbose=1) 116 117 start = time() 118 fit_obj.fit(X_train, y_train) 119 print(f"Elapsed {time() - start}") 120 121 print(fit_obj.score(X_test, y_test)) 122 print(fit_obj.score(X_test, y_test, scoring="roc_auc")) 123 124 start = time() 125 preds = fit_obj.predict(X_test) 126 print(f"Elapsed {time() - start}") 127 print(metrics.classification_report(preds, y_test)) 128 ``` 129 130 """ 131 132 # construct the object ----- 133 _estimator_type = "classifier" 134 135 def __init__( 136 self, 137 obj, 138 n_estimators=10, 139 n_hidden_features=1, 140 activation_name="relu", 141 a=0.01, 142 nodes_sim="sobol", 143 bias=True, 144 dropout=0, 145 direct_link=False, 146 n_clusters=2, 147 cluster_encode=True, 148 type_clust="kmeans", 149 type_scaling=("std", "std", "std"), 150 col_sample=1, 151 row_sample=1, 152 n_jobs=None, 153 seed=123, 154 verbose=1, 155 backend="cpu", 156 ): 157 super().__init__( 158 obj=obj, 159 n_estimators=n_estimators, 160 n_hidden_features=n_hidden_features, 161 activation_name=activation_name, 162 a=a, 163 nodes_sim=nodes_sim, 164 bias=bias, 165 dropout=dropout, 166 direct_link=direct_link, 167 n_clusters=n_clusters, 168 cluster_encode=cluster_encode, 169 type_clust=type_clust, 170 type_scaling=type_scaling, 171 col_sample=col_sample, 172 row_sample=row_sample, 173 seed=seed, 174 backend=backend, 175 ) 176 177 self.type_fit = "classification" 178 self.verbose = verbose 179 self.n_jobs = n_jobs 180 self.voter_ = {} 181 182 def fit(self, X, y, **kwargs): 183 """Fit Random 'Bagging' model to training data (X, y). 184 185 Args: 186 187 X: {array-like}, shape = [n_samples, n_features] 188 Training vectors, where n_samples is the number 189 of samples and n_features is the number of features. 190 191 y: array-like, shape = [n_samples] 192 Target values. 193 194 **kwargs: additional parameters to be passed to 195 self.cook_training_set or self.obj.fit 196 197 Returns: 198 199 self: object 200 201 """ 202 203 assert mx.is_factor(y), "y must contain only integers" 204 205 self.n_classes_ = len(np.unique(y)) # for compatibility with sklearn 206 207 # training 208 self.n_classes = len(np.unique(y)) 209 210 base_learner = CustomClassifier( 211 self.obj, 212 n_hidden_features=self.n_hidden_features, 213 activation_name=self.activation_name, 214 a=self.a, 215 nodes_sim=self.nodes_sim, 216 bias=self.bias, 217 dropout=self.dropout, 218 direct_link=self.direct_link, 219 n_clusters=self.n_clusters, 220 type_clust=self.type_clust, 221 type_scaling=self.type_scaling, 222 col_sample=self.col_sample, 223 row_sample=self.row_sample, 224 seed=self.seed, 225 cv_calibration=None, 226 ) 227 228 # 1 - Sequential training ----- 229 230 if self.n_jobs is None: 231 self.voter_ = rbagloop_classification( 232 base_learner, X, y, self.n_estimators, self.verbose, self.seed 233 ) 234 235 self.n_estimators = len(self.voter_) 236 237 return self 238 239 # 2 - Parallel training ----- 240 # buggy 241 # if self.n_jobs is not None: 242 def fit_estimators(m): 243 base_learner__ = deepcopy(base_learner) 244 base_learner__.set_params(seed=self.seed + m * 1000) 245 base_learner__.fit(X, y, **kwargs) 246 return base_learner__ 247 248 if self.verbose == 1: 249 voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")( 250 delayed(fit_estimators)(m) 251 for m in tqdm(range(self.n_estimators)) 252 ) 253 else: 254 voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")( 255 delayed(fit_estimators)(m) for m in range(self.n_estimators) 256 ) 257 258 self.voter_ = {idx: elt for idx, elt in enumerate(voters_list)} 259 260 self.n_estimators = len(self.voter_) 261 self.classes_ = np.unique(y) 262 return self 263 264 def predict(self, X, weights=None, **kwargs): 265 """Predict test data X. 266 267 Args: 268 269 X: {array-like}, shape = [n_samples, n_features] 270 Training vectors, where n_samples is the number 271 of samples and n_features is the number of features. 272 273 **kwargs: additional parameters to be passed to 274 self.cook_test_set 275 276 Returns: 277 278 model predictions: {array-like} 279 280 """ 281 return self.predict_proba(X, weights, **kwargs).argmax(axis=1) 282 283 def predict_proba(self, X, weights=None, **kwargs): 284 """Predict probabilities for test data X. 285 286 Args: 287 288 X: {array-like}, shape = [n_samples, n_features] 289 Training vectors, where n_samples is the number 290 of samples and n_features is the number of features. 291 292 **kwargs: additional parameters to be passed to 293 self.cook_test_set 294 295 Returns: 296 297 probability estimates for test data: {array-like} 298 299 """ 300 301 def calculate_probas(voter, weights=None, verbose=None): 302 ensemble_proba = 0 303 304 n_iter = len(voter) 305 306 assert n_iter > 0, "no estimator found in `RandomBag` ensemble" 307 308 if weights is None: 309 for idx, elt in voter.items(): 310 try: 311 ensemble_proba += elt.predict_proba(X) 312 313 # if verbose == 1: 314 # pbar.update(idx) 315 316 except: 317 continue 318 319 # if verbose == 1: 320 # pbar.update(n_iter) 321 322 return ensemble_proba / n_iter 323 324 # if weights is not None: 325 for idx, elt in voter.items(): 326 ensemble_proba += weights[idx] * elt.predict_proba(X) 327 328 # if verbose == 1: 329 # pbar.update(idx) 330 331 # if verbose == 1: 332 # pbar.update(n_iter) 333 334 return ensemble_proba 335 336 # end calculate_probas ---- 337 338 if self.n_jobs is None: 339 # if self.verbose == 1: 340 # pbar = Progbar(self.n_estimators) 341 342 if weights is None: 343 return calculate_probas(self.voter_, verbose=self.verbose) 344 345 # if weights is not None: 346 self.weights = weights 347 348 return calculate_probas(self.voter_, weights, verbose=self.verbose) 349 350 # if self.n_jobs is not None: 351 def predict_estimator(m): 352 try: 353 return self.voter_[m].predict_proba(X) 354 except: 355 pass 356 357 if self.verbose == 1: 358 preds = Parallel(n_jobs=self.n_jobs, prefer="threads")( 359 delayed(predict_estimator)(m) 360 for m in tqdm(range(self.n_estimators)) 361 ) 362 363 else: 364 preds = Parallel(n_jobs=self.n_jobs, prefer="threads")( 365 delayed(predict_estimator)(m) for m in range(self.n_estimators) 366 ) 367 368 ensemble_proba = 0 369 370 if weights is None: 371 for i in range(self.n_estimators): 372 ensemble_proba += preds[i] 373 374 return ensemble_proba / self.n_estimators 375 376 for i in range(self.n_estimators): 377 ensemble_proba += weights[i] * preds[i] 378 379 return ensemble_proba 380 381 @property 382 def _estimator_type(self): 383 return "classifier"
Randomized 'Bagging' Classification model
Parameters:
obj: object
any object containing a method fit (obj.fit()) and a method predict
(obj.predict())
n_estimators: int
number of boosting iterations
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not
(False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original predictors are included (True) in model's
fitting or not (False)
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
col_sample: float
percentage of covariates randomly chosen for training
row_sample: float
percentage of rows chosen for training, by stratified bootstrapping
seed: int
reproducibility seed for nodes_sim=='uniform'
backend: str
"cpu" or "gpu" or "tpu"
Attributes:
voter_: dict
dictionary containing all the fitted base-learners
Examples:
See also https://github.com/Techtonique/nnetsauce/blob/master/examples/randombag_classification.py
import nnetsauce as ns
from sklearn.datasets import load_breast_cancer
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn import metrics
from time import time
breast_cancer = load_breast_cancer()
Z = breast_cancer.data
t = breast_cancer.target
np.random.seed(123)
X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2)
# decision tree
clf = DecisionTreeClassifier(max_depth=2, random_state=123)
fit_obj = ns.RandomBagClassifier(clf, n_hidden_features=2,
direct_link=True,
n_estimators=100,
col_sample=0.9, row_sample=0.9,
dropout=0.3, n_clusters=0, verbose=1)
start = time()
fit_obj.fit(X_train, y_train)
print(f"Elapsed {time() - start}")
print(fit_obj.score(X_test, y_test))
print(fit_obj.score(X_test, y_test, scoring="roc_auc"))
start = time()
preds = fit_obj.predict(X_test)
print(f"Elapsed {time() - start}")
print(metrics.classification_report(preds, y_test))
182 def fit(self, X, y, **kwargs): 183 """Fit Random 'Bagging' model to training data (X, y). 184 185 Args: 186 187 X: {array-like}, shape = [n_samples, n_features] 188 Training vectors, where n_samples is the number 189 of samples and n_features is the number of features. 190 191 y: array-like, shape = [n_samples] 192 Target values. 193 194 **kwargs: additional parameters to be passed to 195 self.cook_training_set or self.obj.fit 196 197 Returns: 198 199 self: object 200 201 """ 202 203 assert mx.is_factor(y), "y must contain only integers" 204 205 self.n_classes_ = len(np.unique(y)) # for compatibility with sklearn 206 207 # training 208 self.n_classes = len(np.unique(y)) 209 210 base_learner = CustomClassifier( 211 self.obj, 212 n_hidden_features=self.n_hidden_features, 213 activation_name=self.activation_name, 214 a=self.a, 215 nodes_sim=self.nodes_sim, 216 bias=self.bias, 217 dropout=self.dropout, 218 direct_link=self.direct_link, 219 n_clusters=self.n_clusters, 220 type_clust=self.type_clust, 221 type_scaling=self.type_scaling, 222 col_sample=self.col_sample, 223 row_sample=self.row_sample, 224 seed=self.seed, 225 cv_calibration=None, 226 ) 227 228 # 1 - Sequential training ----- 229 230 if self.n_jobs is None: 231 self.voter_ = rbagloop_classification( 232 base_learner, X, y, self.n_estimators, self.verbose, self.seed 233 ) 234 235 self.n_estimators = len(self.voter_) 236 237 return self 238 239 # 2 - Parallel training ----- 240 # buggy 241 # if self.n_jobs is not None: 242 def fit_estimators(m): 243 base_learner__ = deepcopy(base_learner) 244 base_learner__.set_params(seed=self.seed + m * 1000) 245 base_learner__.fit(X, y, **kwargs) 246 return base_learner__ 247 248 if self.verbose == 1: 249 voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")( 250 delayed(fit_estimators)(m) 251 for m in tqdm(range(self.n_estimators)) 252 ) 253 else: 254 voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")( 255 delayed(fit_estimators)(m) for m in range(self.n_estimators) 256 ) 257 258 self.voter_ = {idx: elt for idx, elt in enumerate(voters_list)} 259 260 self.n_estimators = len(self.voter_) 261 self.classes_ = np.unique(y) 262 return self
Fit Random 'Bagging' model to training data (X, y).
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
264 def predict(self, X, weights=None, **kwargs): 265 """Predict test data X. 266 267 Args: 268 269 X: {array-like}, shape = [n_samples, n_features] 270 Training vectors, where n_samples is the number 271 of samples and n_features is the number of features. 272 273 **kwargs: additional parameters to be passed to 274 self.cook_test_set 275 276 Returns: 277 278 model predictions: {array-like} 279 280 """ 281 return self.predict_proba(X, weights, **kwargs).argmax(axis=1)
Predict test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
283 def predict_proba(self, X, weights=None, **kwargs): 284 """Predict probabilities for test data X. 285 286 Args: 287 288 X: {array-like}, shape = [n_samples, n_features] 289 Training vectors, where n_samples is the number 290 of samples and n_features is the number of features. 291 292 **kwargs: additional parameters to be passed to 293 self.cook_test_set 294 295 Returns: 296 297 probability estimates for test data: {array-like} 298 299 """ 300 301 def calculate_probas(voter, weights=None, verbose=None): 302 ensemble_proba = 0 303 304 n_iter = len(voter) 305 306 assert n_iter > 0, "no estimator found in `RandomBag` ensemble" 307 308 if weights is None: 309 for idx, elt in voter.items(): 310 try: 311 ensemble_proba += elt.predict_proba(X) 312 313 # if verbose == 1: 314 # pbar.update(idx) 315 316 except: 317 continue 318 319 # if verbose == 1: 320 # pbar.update(n_iter) 321 322 return ensemble_proba / n_iter 323 324 # if weights is not None: 325 for idx, elt in voter.items(): 326 ensemble_proba += weights[idx] * elt.predict_proba(X) 327 328 # if verbose == 1: 329 # pbar.update(idx) 330 331 # if verbose == 1: 332 # pbar.update(n_iter) 333 334 return ensemble_proba 335 336 # end calculate_probas ---- 337 338 if self.n_jobs is None: 339 # if self.verbose == 1: 340 # pbar = Progbar(self.n_estimators) 341 342 if weights is None: 343 return calculate_probas(self.voter_, verbose=self.verbose) 344 345 # if weights is not None: 346 self.weights = weights 347 348 return calculate_probas(self.voter_, weights, verbose=self.verbose) 349 350 # if self.n_jobs is not None: 351 def predict_estimator(m): 352 try: 353 return self.voter_[m].predict_proba(X) 354 except: 355 pass 356 357 if self.verbose == 1: 358 preds = Parallel(n_jobs=self.n_jobs, prefer="threads")( 359 delayed(predict_estimator)(m) 360 for m in tqdm(range(self.n_estimators)) 361 ) 362 363 else: 364 preds = Parallel(n_jobs=self.n_jobs, prefer="threads")( 365 delayed(predict_estimator)(m) for m in range(self.n_estimators) 366 ) 367 368 ensemble_proba = 0 369 370 if weights is None: 371 for i in range(self.n_estimators): 372 ensemble_proba += preds[i] 373 374 return ensemble_proba / self.n_estimators 375 376 for i in range(self.n_estimators): 377 ensemble_proba += weights[i] * preds[i] 378 379 return ensemble_proba
Predict probabilities for test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
probability estimates for test data: {array-like}
23class RandomFourierEstimator(BaseEstimator): 24 def __init__( 25 self, estimator, n_components=100, gamma=1.0, random_state=None 26 ): 27 """ 28 Random Fourier Features transformation with a given estimator. 29 30 Parameters: 31 - estimator: A scikit-learn estimator (classifier, regressor, etc.). 32 - n_components: Number of random Fourier features. 33 - gamma: Hyperparameter for RBF kernel approximation. 34 - random_state: Random state for reproducibility. 35 """ 36 self.estimator = estimator 37 self.n_components = n_components 38 self.gamma = gamma 39 self.random_state = random_state 40 41 # Dynamically set the estimator type and appropriate mixin 42 estimator_type = _get_estimator_type(estimator) 43 if estimator_type == "classifier": 44 self._estimator_type = "classifier" 45 # Add ClassifierMixin to the class hierarchy 46 if not isinstance(self, ClassifierMixin): 47 self.__class__ = type( 48 self.__class__.__name__, 49 (self.__class__, ClassifierMixin), 50 dict(self.__class__.__dict__), 51 ) 52 elif estimator_type == "regressor": 53 self._estimator_type = "regressor" 54 # Add RegressorMixin to the class hierarchy 55 if not isinstance(self, RegressorMixin): 56 self.__class__ = type( 57 self.__class__.__name__, 58 (self.__class__, RegressorMixin), 59 dict(self.__class__.__dict__), 60 ) 61 62 def fit(self, X, y=None): 63 """ 64 Fit the Random Fourier feature transformer and the estimator. 65 """ 66 X = check_array(X) 67 68 # Initialize and fit the Random Fourier Feature transformer 69 self.rff_ = RBFSampler( 70 n_components=self.n_components, 71 gamma=self.gamma, 72 random_state=self.random_state, 73 ) 74 X_transformed = self.rff_.fit_transform(X) 75 76 # Fit the underlying estimator on the transformed data 77 self.estimator.fit(X_transformed, y) 78 79 return self 80 81 def partial_fit(self, X, y, classes=None): 82 """ 83 Incrementally fit the Random Fourier feature transformer and the estimator. 84 """ 85 X = check_array(X) 86 87 # Check if RFF transformer is already fitted 88 if not hasattr(self, "rff_"): 89 # First call - fit the transformer 90 self.rff_ = RBFSampler( 91 n_components=self.n_components, 92 gamma=self.gamma, 93 random_state=self.random_state, 94 ) 95 X_transformed = self.rff_.fit_transform(X) 96 else: 97 # Subsequent calls - only transform 98 X_transformed = self.rff_.transform(X) 99 100 # If estimator supports partial_fit, we use it, otherwise raise an error 101 if hasattr(self.estimator, "partial_fit"): 102 self.estimator.partial_fit(X_transformed, y, classes=classes) 103 else: 104 raise ValueError( 105 f"The estimator {type(self.estimator).__name__} does not support partial_fit method." 106 ) 107 108 return self 109 110 def predict(self, X): 111 """ 112 Predict using the Random Fourier transformed data. 113 """ 114 check_is_fitted(self, ["rff_"]) 115 X = check_array(X) 116 117 # Transform the input data 118 X_transformed = self.rff_.transform(X) 119 120 # Predict using the underlying estimator 121 return self.estimator.predict(X_transformed) 122 123 def predict_proba(self, X): 124 """ 125 Predict class probabilities (only for classifiers). 126 """ 127 if ( 128 not hasattr(self, "_estimator_type") 129 or self._estimator_type != "classifier" 130 ): 131 raise AttributeError( 132 "predict_proba is not available for this estimator type." 133 ) 134 135 check_is_fitted(self, ["rff_"]) 136 X = check_array(X) 137 138 if not hasattr(self.estimator, "predict_proba"): 139 raise ValueError( 140 f"The estimator {type(self.estimator).__name__} does not support predict_proba." 141 ) 142 143 # Transform the input data 144 X_transformed = self.rff_.transform(X) 145 146 # Predict probabilities using the underlying estimator 147 return self.estimator.predict_proba(X_transformed) 148 149 def predict_log_proba(self, X): 150 """ 151 Predict class log probabilities (only for classifiers). 152 """ 153 if ( 154 not hasattr(self, "_estimator_type") 155 or self._estimator_type != "classifier" 156 ): 157 raise AttributeError( 158 "predict_log_proba is not available for this estimator type." 159 ) 160 161 check_is_fitted(self, ["rff_"]) 162 X = check_array(X) 163 164 if not hasattr(self.estimator, "predict_log_proba"): 165 raise ValueError( 166 f"The estimator {type(self.estimator).__name__} does not support predict_log_proba." 167 ) 168 169 # Transform the input data 170 X_transformed = self.rff_.transform(X) 171 172 return self.estimator.predict_log_proba(X_transformed) 173 174 def decision_function(self, X): 175 """ 176 Decision function (only for classifiers). 177 """ 178 if ( 179 not hasattr(self, "_estimator_type") 180 or self._estimator_type != "classifier" 181 ): 182 raise AttributeError( 183 "decision_function is not available for this estimator type." 184 ) 185 186 check_is_fitted(self, ["rff_"]) 187 X = check_array(X) 188 189 if not hasattr(self.estimator, "decision_function"): 190 raise ValueError( 191 f"The estimator {type(self.estimator).__name__} does not support decision_function." 192 ) 193 194 # Transform the input data 195 X_transformed = self.rff_.transform(X) 196 197 return self.estimator.decision_function(X_transformed) 198 199 def score(self, X, y): 200 """ 201 Evaluate the model performance. 202 """ 203 check_is_fitted(self, ["rff_"]) 204 X = check_array(X) 205 206 # Transform the input data 207 X_transformed = self.rff_.transform(X) 208 209 # Evaluate using the underlying estimator's score method 210 return self.estimator.score(X_transformed, y) 211 212 @property 213 def classes_(self): 214 """Classes labels (only for classifiers).""" 215 if ( 216 hasattr(self, "_estimator_type") 217 and self._estimator_type == "classifier" 218 ): 219 return getattr(self.estimator, "classes_", None) 220 else: 221 raise AttributeError( 222 "classes_ is not available for this estimator type." 223 ) 224 225 def get_params(self, deep=True): 226 """ 227 Get parameters for this estimator. 228 """ 229 params = {} 230 231 # Get estimator parameters with proper prefixing 232 if deep: 233 estimator_params = self.estimator.get_params(deep=True) 234 for key, value in estimator_params.items(): 235 params[f"estimator__{key}"] = value 236 237 # Add our own parameters 238 params.update( 239 { 240 "estimator": self.estimator, 241 "n_components": self.n_components, 242 "gamma": self.gamma, 243 "random_state": self.random_state, 244 } 245 ) 246 247 return params 248 249 def set_params(self, **params): 250 """ 251 Set the parameters of this estimator. 252 """ 253 # Separate our parameters from estimator parameters 254 our_params = {} 255 estimator_params = {} 256 257 for param, value in params.items(): 258 if param.startswith("estimator__"): 259 # Remove the 'estimator__' prefix 260 estimator_params[param[11:]] = value 261 elif param in [ 262 "estimator", 263 "n_components", 264 "gamma", 265 "random_state", 266 ]: 267 our_params[param] = value 268 else: 269 # Assume it's an estimator parameter without prefix 270 estimator_params[param] = value 271 272 # Set our parameters 273 for param, value in our_params.items(): 274 setattr(self, param, value) 275 276 # If estimator changed, update the estimator type 277 if "estimator" in our_params: 278 self.__init__( 279 self.estimator, self.n_components, self.gamma, self.random_state 280 ) 281 282 # Set estimator parameters 283 if estimator_params: 284 self.estimator.set_params(**estimator_params) 285 286 # If RFF parameters changed and model is fitted, we need to refit 287 if hasattr(self, "rff_") and ( 288 "n_components" in our_params 289 or "gamma" in our_params 290 or "random_state" in our_params 291 ): 292 # Remove the fitted transformer so it gets recreated on next fit 293 delattr(self, "rff_") 294 295 return self
Base class for all estimators in scikit-learn.
Inheriting from this class provides default implementations of:
- setting and getting parameters used by
GridSearchCVand friends; - textual and HTML representation displayed in terminals and IDEs;
- estimator serialization;
- parameters validation;
- data validation;
- feature names validation.
Read more in the :ref:User Guide <rolling_your_own_estimator>.
Notes
All estimators should specify all the parameters that can be set
at the class level in their __init__ as explicit keyword
arguments (no *args or **kwargs).
Examples
>>> import numpy as np
>>> from sklearn.base import BaseEstimator
>>> class MyEstimator(BaseEstimator):
... def __init__(self, *, param=1):
... self.param = param
... def fit(self, X, y=None):
... self.is_fitted_ = True
... return self
... def predict(self, X):
... return np.full(shape=X.shape[0], fill_value=self.param)
>>> estimator = MyEstimator(param=2)
>>> estimator.get_params()
{'param': 2}
>>> X = np.array([[1, 2], [2, 3], [3, 4]])
>>> y = np.array([1, 0, 1])
>>> estimator.fit(X, y).predict(X)
array([2, 2, 2])
>>> estimator.set_params(param=3).fit(X, y).predict(X)
array([3, 3, 3])
62 def fit(self, X, y=None): 63 """ 64 Fit the Random Fourier feature transformer and the estimator. 65 """ 66 X = check_array(X) 67 68 # Initialize and fit the Random Fourier Feature transformer 69 self.rff_ = RBFSampler( 70 n_components=self.n_components, 71 gamma=self.gamma, 72 random_state=self.random_state, 73 ) 74 X_transformed = self.rff_.fit_transform(X) 75 76 # Fit the underlying estimator on the transformed data 77 self.estimator.fit(X_transformed, y) 78 79 return self
Fit the Random Fourier feature transformer and the estimator.
110 def predict(self, X): 111 """ 112 Predict using the Random Fourier transformed data. 113 """ 114 check_is_fitted(self, ["rff_"]) 115 X = check_array(X) 116 117 # Transform the input data 118 X_transformed = self.rff_.transform(X) 119 120 # Predict using the underlying estimator 121 return self.estimator.predict(X_transformed)
Predict using the Random Fourier transformed data.
123 def predict_proba(self, X): 124 """ 125 Predict class probabilities (only for classifiers). 126 """ 127 if ( 128 not hasattr(self, "_estimator_type") 129 or self._estimator_type != "classifier" 130 ): 131 raise AttributeError( 132 "predict_proba is not available for this estimator type." 133 ) 134 135 check_is_fitted(self, ["rff_"]) 136 X = check_array(X) 137 138 if not hasattr(self.estimator, "predict_proba"): 139 raise ValueError( 140 f"The estimator {type(self.estimator).__name__} does not support predict_proba." 141 ) 142 143 # Transform the input data 144 X_transformed = self.rff_.transform(X) 145 146 # Predict probabilities using the underlying estimator 147 return self.estimator.predict_proba(X_transformed)
Predict class probabilities (only for classifiers).
199 def score(self, X, y): 200 """ 201 Evaluate the model performance. 202 """ 203 check_is_fitted(self, ["rff_"]) 204 X = check_array(X) 205 206 # Transform the input data 207 X_transformed = self.rff_.transform(X) 208 209 # Evaluate using the underlying estimator's score method 210 return self.estimator.score(X_transformed, y)
Evaluate the model performance.
12class RandomFourierFeaturesRidge(BaseEstimator, RegressorMixin): 13 """ 14 Random Fourier Features with Bayesian Ridge Regression. 15 16 Implements both standard (MLE) and Bayesian versions with uncertainty quantification. 17 Uses data augmentation for L2 regularization via jnp.lstsq. 18 """ 19 20 def __init__( 21 self, 22 n_features: int = 100, 23 gamma: float = 1.0, 24 alpha: float = 1e-6, 25 include_bias: bool = True, 26 random_seed: int = 42, 27 ): 28 """ 29 Parameters: 30 ----------- 31 n_features : int 32 Number of random Fourier features (D) 33 gamma : float 34 RBF kernel parameter: k(x,y) = exp(-gamma * ||x-y||²) 35 alpha : float 36 Prior precision (inverse variance) for Bayesian version 37 Equivalent to regularization strength: lambda = alpha / beta 38 include_bias : bool 39 Whether to include a bias term 40 random_seed : int 41 Random seed for reproducibility 42 """ 43 self.n_features = n_features 44 self.gamma = gamma 45 self.alpha = alpha 46 self.include_bias = include_bias 47 self.key = random.PRNGKey(random_seed) 48 self.is_fitted = False 49 50 # Bayesian parameters 51 self.beta = None # Noise precision (will be estimated from data) 52 self.w_mean = None # Posterior mean of weights 53 self.w_cov = None # Posterior covariance of weights 54 self.S_N = None # Posterior precision matrix 55 56 def _compute_random_features( 57 self, X: jnp.ndarray, W: jnp.ndarray, b: jnp.ndarray 58 ) -> jnp.ndarray: 59 """Compute random Fourier features: sqrt(2/D) * cos(XW + b)""" 60 projection = jnp.dot(X, W) + b # Shape: (n_samples, n_features) 61 features = jnp.sqrt(2.0 / self.n_features) * jnp.cos(projection) 62 63 if self.include_bias: 64 features = jnp.concatenate( 65 [jnp.ones((X.shape[0], 1)), features], axis=1 66 ) 67 68 return features 69 70 def _init_random_weights( 71 self, input_dim: int 72 ) -> Tuple[jnp.ndarray, jnp.ndarray]: 73 """Initialize random weights and biases for RFF""" 74 # Sample from Gaussian distribution for RBF kernel 75 # Variance = 2 * gamma for RBF kernel 76 self.key, subkey = random.split(self.key) 77 W = random.normal( 78 subkey, shape=(input_dim, self.n_features) 79 ) * jnp.sqrt(2.0 * self.gamma) 80 81 self.key, subkey = random.split(self.key) 82 b = random.uniform( 83 subkey, shape=(1, self.n_features), minval=0, maxval=2 * jnp.pi 84 ) 85 86 return W, b 87 88 def fit( 89 self, 90 X: Union[jnp.ndarray, np.ndarray], 91 y: Union[jnp.ndarray, np.ndarray], 92 method: str = "bayesian", 93 noise_variance: Optional[float] = None, 94 ) -> "RandomFourierFeaturesRidge": 95 """ 96 Fit the model using either standard or Bayesian ridge regression. 97 98 Parameters: 99 ----------- 100 X : array-like, shape (n_samples, n_features) 101 Training data 102 y : array-like, shape (n_samples,) or (n_samples, n_targets) 103 Target values 104 method : str, either "standard" or "bayesian" 105 "standard": Maximum likelihood estimation with L2 regularization 106 "bayesian": Full Bayesian inference with uncertainty quantification 107 noise_variance : float, optional 108 If provided, fixes the noise variance instead of estimating it 109 """ 110 # Convert to JAX arrays if needed 111 X = jnp.asarray(X) 112 y = jnp.asarray(y) 113 114 if len(y.shape) == 1: 115 y = y.reshape(-1, 1) 116 117 n_samples, input_dim = X.shape 118 119 # Initialize random Fourier weights 120 self.W, self.b = self._init_random_weights(input_dim) 121 122 # Compute random Fourier features 123 Phi = self._compute_random_features(X, self.W, self.b) 124 n_basis = Phi.shape[1] # D + 1 if bias included 125 126 # Store feature matrix and target values for Bayesian updates/likelihood computation 127 self.Phi_train = Phi 128 self.y_train = y # Store y_train 129 130 if method == "standard": 131 # Standard ridge regression using data augmentation for regularization 132 self._fit_standard(Phi, y) 133 elif method == "bayesian": 134 # Bayesian ridge regression 135 self._fit_bayesian(Phi, y, noise_variance) 136 else: 137 raise ValueError("method must be 'standard' or 'bayesian'") 138 139 self.is_fitted = True 140 self.method = method 141 self.input_dim = input_dim 142 143 return self 144 145 def _fit_standard(self, Phi: jnp.ndarray, y: jnp.ndarray) -> None: 146 """Standard ridge regression using lstsq with data augmentation""" 147 n_samples, n_basis = Phi.shape 148 149 # Create augmented data for L2 regularization 150 # This is equivalent to adding sqrt(alpha) * I to the design matrix 151 sqrt_alpha = jnp.sqrt(self.alpha) 152 Phi_aug = jnp.vstack([Phi, sqrt_alpha * jnp.eye(n_basis)]) 153 y_aug = jnp.vstack([y, jnp.zeros((n_basis, y.shape[1]))]) 154 155 # Solve using least squares 156 # Note: jnp.linalg.lstsq is more stable than explicit normal equations 157 weights, residuals, rank, s = jnp.linalg.lstsq( 158 Phi_aug, y_aug, rcond=None 159 ) 160 161 self.w_mean = weights 162 self.weights = weights # For compatibility 163 164 # Estimate noise variance from residuals 165 residuals = y - Phi @ weights 166 self.beta = 1.0 / jnp.maximum(jnp.var(residuals), 1e-8) 167 168 def _fit_bayesian( 169 self, 170 Phi: jnp.ndarray, 171 y: jnp.ndarray, 172 noise_variance: Optional[float] = None, 173 ) -> None: 174 """Bayesian ridge regression with evidence approximation""" 175 n_samples, n_basis = Phi.shape 176 177 # Initialize precision parameters 178 if noise_variance is not None: 179 self.beta = 1.0 / noise_variance 180 else: 181 # Initial estimate of beta from data 182 self.beta = 1.0 / jnp.maximum(jnp.var(y), 1e-8) 183 184 # Posterior precision matrix: S_N⁻¹ = alpha * I + beta * ΦᵀΦ 185 I = jnp.eye(n_basis) 186 PhiT_Phi = Phi.T @ Phi 187 188 # Initialize with prior 189 S_N_inv = self.alpha * I 190 191 # Evidence approximation to optimize alpha, beta 192 for _ in range(10): # Iterate to converge on alpha, beta 193 # Update posterior mean and covariance 194 S_N = jnp.linalg.inv(S_N_inv + self.beta * PhiT_Phi) 195 self.w_mean = self.beta * S_N @ Phi.T @ y 196 197 # Update gamma (effective number of parameters) 198 eigenvalues = jnp.linalg.eigvalsh(PhiT_Phi) 199 gamma_val = jnp.sum(eigenvalues / (self.alpha + eigenvalues)) 200 201 # Update alpha and beta (MacKay's fixed point updates) 202 if self.alpha > 0: 203 self.alpha = gamma_val / jnp.sum(self.w_mean**2) 204 205 if noise_variance is None: 206 residuals = y - Phi @ self.w_mean 207 self.beta = (n_samples - gamma_val) / jnp.sum(residuals**2) 208 209 # Update precision matrix 210 S_N_inv = self.alpha * I 211 212 # Store final covariance 213 self.S_N = jnp.linalg.inv(self.alpha * I + self.beta * PhiT_Phi) 214 self.w_cov = self.S_N 215 216 # Also store for compatibility 217 self.weights = self.w_mean 218 219 def transform(self, X: Union[jnp.ndarray, np.ndarray]) -> jnp.ndarray: 220 """Transform input data to random Fourier feature space""" 221 if not self.is_fitted: 222 raise ValueError("Model must be fitted before transforming") 223 224 X = jnp.asarray(X) 225 return self._compute_random_features(X, self.W, self.b) 226 227 def predict( 228 self, 229 X: Union[jnp.ndarray, np.ndarray], 230 return_std: bool = False, 231 return_cov: bool = False, 232 ) -> Union[jnp.ndarray, Tuple[jnp.ndarray, jnp.ndarray]]: 233 """ 234 Make predictions, optionally with uncertainty quantification. 235 236 Parameters: 237 ----------- 238 X : array-like, shape (n_samples, n_features) 239 Input data 240 return_std : bool 241 If True, return standard deviation of predictive distribution 242 return_cov : bool 243 If True, return full covariance matrix of predictive distribution 244 245 Returns: 246 -------- 247 y_pred : jnp.ndarray 248 Predictive mean 249 y_std or y_cov : jnp.ndarray, optional 250 Predictive standard deviation or covariance 251 """ 252 if not self.is_fitted: 253 raise ValueError("Model must be fitted before prediction") 254 255 X = jnp.asarray(X) 256 Phi = self.transform(X) 257 258 # Predictive mean 259 y_pred = Phi @ self.w_mean 260 261 if not return_std and not return_cov: 262 return y_pred 263 264 if self.method != "bayesian": 265 raise ValueError( 266 "Uncertainty quantification only available for Bayesian method" 267 ) 268 269 # Predictive variance 270 if return_cov: 271 # Full predictive covariance 272 # Σ_pred = (1/β) * I + Φ @ S_N @ Φᵀ 273 pred_cov = (1.0 / self.beta) * jnp.eye( 274 Phi.shape[0] 275 ) + Phi @ self.S_N @ Phi.T 276 return y_pred, pred_cov 277 else: 278 # Diagonal of predictive covariance (standard deviations) 279 # σ²_pred = (1/β) + diag(Φ @ S_N @ Φᵀ) 280 var_diag = (1.0 / self.beta) + jnp.sum( 281 (Phi @ self.S_N) * Phi, axis=1 282 ) 283 y_std = jnp.sqrt(jnp.maximum(var_diag, 0.0)).reshape(-1, 1) 284 return y_pred, y_std 285 286 def sample_posterior( 287 self, 288 X: Union[jnp.ndarray, np.ndarray], 289 n_samples: int = 1, 290 key: Optional[jax.random.PRNGKey] = None, 291 ) -> jnp.ndarray: 292 """ 293 Sample from the posterior predictive distribution. 294 295 Parameters: 296 ----------- 297 X : array-like 298 Input data 299 n_samples : int 300 Number of samples to draw 301 key : PRNGKey, optional 302 Random key for sampling 303 304 Returns: 305 -------- 306 samples : jnp.ndarray, shape (n_samples, n_test_samples) 307 Samples from posterior predictive distribution 308 """ 309 if self.method != "bayesian": 310 raise ValueError("Sampling only available for Bayesian method") 311 312 if key is None: 313 key = self.key 314 315 X = jnp.asarray(X) 316 Phi = self.transform(X) 317 n_test = Phi.shape[0] 318 319 # Sample weights from posterior 320 key, subkey = random.split(key) 321 w_samples = random.multivariate_normal( 322 subkey, self.w_mean.flatten(), self.S_N, shape=(n_samples,) 323 ) 324 325 # Generate predictions for each weight sample 326 samples = [] 327 for i in range(n_samples): 328 w_sample = w_samples[i].reshape(-1, 1) 329 # Add noise variance 330 key, subkey1, subkey2 = random.split(key, 3) 331 pred_mean = Phi @ w_sample 332 noise = random.normal(subkey2, shape=pred_mean.shape) / jnp.sqrt( 333 self.beta 334 ) 335 samples.append(pred_mean + noise) 336 337 return jnp.stack(samples, axis=0) 338 339 def log_marginal_likelihood(self) -> float: 340 """ 341 Compute log marginal likelihood (evidence) for Bayesian model. 342 343 Returns: 344 -------- 345 log_evidence : float 346 Log marginal likelihood p(y|X,α,β) 347 """ 348 if self.method != "bayesian": 349 raise ValueError( 350 "Log marginal likelihood only available for Bayesian method" 351 ) 352 353 n_samples = self.Phi_train.shape[0] 354 n_basis = self.Phi_train.shape[1] 355 356 # Log determinant term 357 I = jnp.eye(n_basis) 358 A = self.alpha * I + self.beta * self.Phi_train.T @ self.Phi_train 359 sign, logdet_A = jnp.linalg.slogdet(A) 360 logdet_term = 0.5 * (n_basis * jnp.log(self.alpha) - logdet_A) 361 362 # Data fit term 363 residuals = self.y_train - self.Phi_train @ self.w_mean 364 data_fit_term = -0.5 * self.beta * jnp.sum(residuals**2) 365 366 # Constant term 367 const_term = 0.5 * n_samples * jnp.log(self.beta / (2 * jnp.pi)) 368 369 return float(logdet_term + data_fit_term + const_term) 370 371 def get_params(self) -> Dict: 372 """Get model parameters""" 373 return { 374 "n_features": self.n_features, 375 "gamma": self.gamma, 376 "alpha": self.alpha, 377 "beta": self.beta if self.beta is not None else None, 378 "method": self.method if hasattr(self, "method") else None, 379 "input_dim": self.input_dim if hasattr(self, "input_dim") else None, 380 } 381 382 def set_params(self, **params) -> "RandomFourierFeaturesRidge": 383 """Set model parameters""" 384 for key, value in params.items(): 385 if hasattr(self, key): 386 setattr(self, key, value) 387 return self
Random Fourier Features with Bayesian Ridge Regression.
Implements both standard (MLE) and Bayesian versions with uncertainty quantification. Uses data augmentation for L2 regularization via jnp.lstsq.
88 def fit( 89 self, 90 X: Union[jnp.ndarray, np.ndarray], 91 y: Union[jnp.ndarray, np.ndarray], 92 method: str = "bayesian", 93 noise_variance: Optional[float] = None, 94 ) -> "RandomFourierFeaturesRidge": 95 """ 96 Fit the model using either standard or Bayesian ridge regression. 97 98 Parameters: 99 ----------- 100 X : array-like, shape (n_samples, n_features) 101 Training data 102 y : array-like, shape (n_samples,) or (n_samples, n_targets) 103 Target values 104 method : str, either "standard" or "bayesian" 105 "standard": Maximum likelihood estimation with L2 regularization 106 "bayesian": Full Bayesian inference with uncertainty quantification 107 noise_variance : float, optional 108 If provided, fixes the noise variance instead of estimating it 109 """ 110 # Convert to JAX arrays if needed 111 X = jnp.asarray(X) 112 y = jnp.asarray(y) 113 114 if len(y.shape) == 1: 115 y = y.reshape(-1, 1) 116 117 n_samples, input_dim = X.shape 118 119 # Initialize random Fourier weights 120 self.W, self.b = self._init_random_weights(input_dim) 121 122 # Compute random Fourier features 123 Phi = self._compute_random_features(X, self.W, self.b) 124 n_basis = Phi.shape[1] # D + 1 if bias included 125 126 # Store feature matrix and target values for Bayesian updates/likelihood computation 127 self.Phi_train = Phi 128 self.y_train = y # Store y_train 129 130 if method == "standard": 131 # Standard ridge regression using data augmentation for regularization 132 self._fit_standard(Phi, y) 133 elif method == "bayesian": 134 # Bayesian ridge regression 135 self._fit_bayesian(Phi, y, noise_variance) 136 else: 137 raise ValueError("method must be 'standard' or 'bayesian'") 138 139 self.is_fitted = True 140 self.method = method 141 self.input_dim = input_dim 142 143 return self
Fit the model using either standard or Bayesian ridge regression.
Parameters:
X : array-like, shape (n_samples, n_features) Training data y : array-like, shape (n_samples,) or (n_samples, n_targets) Target values method : str, either "standard" or "bayesian" "standard": Maximum likelihood estimation with L2 regularization "bayesian": Full Bayesian inference with uncertainty quantification noise_variance : float, optional If provided, fixes the noise variance instead of estimating it
227 def predict( 228 self, 229 X: Union[jnp.ndarray, np.ndarray], 230 return_std: bool = False, 231 return_cov: bool = False, 232 ) -> Union[jnp.ndarray, Tuple[jnp.ndarray, jnp.ndarray]]: 233 """ 234 Make predictions, optionally with uncertainty quantification. 235 236 Parameters: 237 ----------- 238 X : array-like, shape (n_samples, n_features) 239 Input data 240 return_std : bool 241 If True, return standard deviation of predictive distribution 242 return_cov : bool 243 If True, return full covariance matrix of predictive distribution 244 245 Returns: 246 -------- 247 y_pred : jnp.ndarray 248 Predictive mean 249 y_std or y_cov : jnp.ndarray, optional 250 Predictive standard deviation or covariance 251 """ 252 if not self.is_fitted: 253 raise ValueError("Model must be fitted before prediction") 254 255 X = jnp.asarray(X) 256 Phi = self.transform(X) 257 258 # Predictive mean 259 y_pred = Phi @ self.w_mean 260 261 if not return_std and not return_cov: 262 return y_pred 263 264 if self.method != "bayesian": 265 raise ValueError( 266 "Uncertainty quantification only available for Bayesian method" 267 ) 268 269 # Predictive variance 270 if return_cov: 271 # Full predictive covariance 272 # Σ_pred = (1/β) * I + Φ @ S_N @ Φᵀ 273 pred_cov = (1.0 / self.beta) * jnp.eye( 274 Phi.shape[0] 275 ) + Phi @ self.S_N @ Phi.T 276 return y_pred, pred_cov 277 else: 278 # Diagonal of predictive covariance (standard deviations) 279 # σ²_pred = (1/β) + diag(Φ @ S_N @ Φᵀ) 280 var_diag = (1.0 / self.beta) + jnp.sum( 281 (Phi @ self.S_N) * Phi, axis=1 282 ) 283 y_std = jnp.sqrt(jnp.maximum(var_diag, 0.0)).reshape(-1, 1) 284 return y_pred, y_std
Make predictions, optionally with uncertainty quantification.
Parameters:
X : array-like, shape (n_samples, n_features) Input data return_std : bool If True, return standard deviation of predictive distribution return_cov : bool If True, return full covariance matrix of predictive distribution
Returns:
y_pred : jnp.ndarray Predictive mean y_std or y_cov : jnp.ndarray, optional Predictive standard deviation or covariance
390class RandomFourierFeaturesRidgeGCV(RandomFourierFeaturesRidge): 391 """ 392 Extends RandomFourierFeaturesRidge with GCV for automatic 393 regularization parameter selection. 394 """ 395 396 def __init__( 397 self, 398 n_features: int = 100, 399 gamma: float = 1.0, 400 alpha: Optional[float] = None, 401 include_bias: bool = True, 402 random_seed: int = 42, 403 ): 404 super().__init__(n_features, gamma, alpha, include_bias, random_seed) 405 self.alpha_opt = None # Stores the GCV-optimized alpha 406 self.gcv_score = None # Stores the optimal GCV score 407 408 def _compute_gcv( 409 self, 410 alpha: float, 411 s_sq: jnp.ndarray, 412 U: jnp.ndarray, 413 y: jnp.ndarray, 414 n_samples: int, 415 ) -> float: 416 """ 417 Compute GCV score for a given alpha. 418 419 Parameters: 420 ----------- 421 alpha : float 422 Regularization parameter 423 s_sq : jnp.ndarray 424 Squared singular values of design matrix Φ 425 U : jnp.ndarray 426 Left singular vectors of Φ 427 y : jnp.ndarray 428 Target values 429 n_samples : int 430 Number of data points 431 432 Returns: 433 -------- 434 gcv : float 435 GCV score for this alpha 436 """ 437 # Degrees of freedom: df(α) = Σ(σ_j²/(σ_j² + α)) 438 df = jnp.sum(s_sq / (s_sq + alpha)) 439 440 # Compute residual sum of squares efficiently using SVD 441 # y_pred = U @ (S²/(S² + α)) @ (U.T @ y) 442 Uty = U.T @ y 443 shrinkage = s_sq / (s_sq + alpha) 444 y_pred = U @ (shrinkage * Uty) 445 residuals = y - y_pred 446 rss = jnp.sum(residuals**2) 447 448 # GCV formula 449 denom = (1.0 - df / n_samples) ** 2 450 gcv = (rss / n_samples) / denom 451 452 return float(gcv) 453 454 def fit_gcv( 455 self, 456 X: Union[jnp.ndarray, np.ndarray], 457 y: Union[jnp.ndarray, np.ndarray], 458 alpha_range: Tuple[float, float] = (1e-8, 1e4), 459 n_alphas: int = 50, 460 method: str = "standard", 461 optimize: bool = True, 462 ) -> "RandomFourierFeaturesRidgeGCV": 463 """ 464 Fit model with GCV-optimized regularization parameter. 465 466 Parameters: 467 ----------- 468 X : array-like 469 Training data 470 y : array-like 471 Target values 472 alpha_range : tuple 473 (min_alpha, max_alpha) range to search 474 n_alphas : int 475 Number of alpha values to try in initial grid search 476 method : str 477 "standard" or "bayesian" 478 optimize : bool 479 If True, perform fine optimization after grid search 480 481 Returns: 482 -------- 483 self : fitted model 484 """ 485 # Convert to JAX arrays 486 X = jnp.asarray(X) 487 y = jnp.asarray(y) 488 489 if len(y.shape) == 1: 490 y = y.reshape(-1, 1) 491 492 n_samples, input_dim = X.shape 493 494 # Initialize random Fourier weights 495 self.W, self.b = self._init_random_weights(input_dim) 496 497 # Compute random Fourier features 498 Phi = self._compute_random_features(X, self.W, self.b) 499 500 # Compute SVD of design matrix for efficient GCV computation 501 # Φ = U @ diag(S) @ V.T 502 U, S, Vt = jnp.linalg.svd(Phi, full_matrices=False) 503 s_sq = S**2 # Squared singular values 504 505 # Grid search on log scale 506 alphas_grid = jnp.logspace( 507 jnp.log10(alpha_range[0]), jnp.log10(alpha_range[1]), n_alphas 508 ) 509 510 gcv_scores = [] 511 for alpha in alphas_grid: 512 score = self._compute_gcv(float(alpha), s_sq, U, y, n_samples) 513 gcv_scores.append(score) 514 515 # Find best alpha from grid 516 best_idx = jnp.argmin(jnp.array(gcv_scores)) 517 alpha_grid_opt = float(alphas_grid[best_idx]) 518 519 # Fine optimization using Brent's method 520 if optimize: 521 # Define objective for scipy optimizer 522 def gcv_objective(log_alpha): 523 alpha = 10**log_alpha 524 return self._compute_gcv(alpha, s_sq, U, y, n_samples) 525 526 # Optimize in log space 527 result = minimize_scalar( 528 gcv_objective, 529 bounds=(jnp.log10(alpha_range[0]), jnp.log10(alpha_range[1])), 530 method="bounded", 531 options={"xatol": 0.1}, # Tolerance in log10 space 532 ) 533 534 if result.success: 535 alpha_opt = 10**result.x 536 gcv_opt = result.fun 537 else: 538 alpha_opt = alpha_grid_opt 539 gcv_opt = gcv_scores[best_idx] 540 else: 541 alpha_opt = alpha_grid_opt 542 gcv_opt = gcv_scores[best_idx] 543 544 # Store optimized parameters 545 self.alpha_opt = alpha_opt 546 self.gcv_score = gcv_opt 547 self.alpha = alpha_opt # Set as the model's alpha 548 549 # Fit final model with optimized alpha 550 if method == "standard": 551 self._fit_standard(Phi, y) 552 elif method == "bayesian": 553 # For Bayesian version, we can use alpha as prior precision 554 # Optionally optimize beta too 555 self._fit_bayesian(Phi, y) 556 else: 557 raise ValueError("method must be 'standard' or 'bayesian'") 558 559 self.is_fitted = True 560 self.method = method 561 self.input_dim = input_dim 562 563 return self 564 565 def fit_gcv_with_path( 566 self, 567 X: Union[jnp.ndarray, np.ndarray], 568 y: Union[jnp.ndarray, np.ndarray], 569 alpha_range: Tuple[float, float] = (1e-8, 1e4), 570 n_alphas: int = 100, 571 method: str = "standard", 572 ) -> dict: 573 """ 574 Fit with GCV and return full regularization path. 575 576 Returns: 577 -------- 578 path_info : dict 579 Dictionary with alpha values, GCV scores, and metrics 580 """ 581 X = jnp.asarray(X) 582 y = jnp.asarray(y) 583 584 if len(y.shape) == 1: 585 y = y.reshape(-1, 1) 586 587 n_samples, input_dim = X.shape 588 589 # Initialize random features 590 self.W, self.b = self._init_random_weights(input_dim) 591 Phi = self._compute_random_features(X, self.W, self.b) 592 593 # Compute SVD 594 U, S, Vt = jnp.linalg.svd(Phi, full_matrices=False) 595 s_sq = S**2 596 597 # Compute GCV path 598 alphas = jnp.logspace( 599 jnp.log10(alpha_range[0]), jnp.log10(alpha_range[1]), n_alphas 600 ) 601 602 gcv_scores = [] 603 train_errors = [] 604 effective_dof = [] 605 606 for alpha in alphas: 607 alpha_val = float(alpha) 608 609 # GCV score 610 gcv = self._compute_gcv(alpha_val, s_sq, U, y, n_samples) 611 gcv_scores.append(gcv) 612 613 # Effective degrees of freedom 614 df = float(jnp.sum(s_sq / (s_sq + alpha_val))) 615 effective_dof.append(df) 616 617 # Training error for this alpha 618 # Compute weights: w = V @ (S/(S² + α)) @ (U.T @ y) 619 Uty = U.T @ y 620 shrinkage = S / (s_sq + alpha_val) 621 w_alpha = Vt.T @ (shrinkage.reshape(-1, 1) * Uty) 622 y_pred = Phi @ w_alpha 623 train_err = float(jnp.mean((y - y_pred) ** 2)) 624 train_errors.append(train_err) 625 626 # Find optimal alpha 627 best_idx = jnp.argmin(jnp.array(gcv_scores)) 628 alpha_opt = float(alphas[best_idx]) 629 630 # Fit final model with optimal alpha 631 self.alpha = alpha_opt 632 if method == "standard": 633 self._fit_standard(Phi, y) 634 elif method == "bayesian": 635 self._fit_bayesian(Phi, y) 636 637 self.is_fitted = True 638 self.method = method 639 self.input_dim = input_dim 640 self.alpha_opt = alpha_opt 641 self.gcv_score = gcv_scores[best_idx] 642 643 # Return full path information 644 path_info = { 645 "alphas": np.array(alphas), 646 "gcv_scores": np.array(gcv_scores), 647 "train_errors": np.array(train_errors), 648 "effective_dof": np.array(effective_dof), 649 "alpha_opt": alpha_opt, 650 "gcv_opt": gcv_scores[best_idx], 651 "dof_opt": effective_dof[best_idx], 652 } 653 654 return path_info 655 656 def plot_gcv_path(self, path_info: dict, save_path: str = None): 657 """ 658 Plot GCV regularization path. 659 """ 660 import matplotlib.pyplot as plt 661 662 fig, axes = plt.subplots(2, 2, figsize=(12, 10)) 663 664 # Plot 1: GCV score vs alpha 665 ax = axes[0, 0] 666 ax.semilogx( 667 path_info["alphas"], path_info["gcv_scores"], "b-", linewidth=2 668 ) 669 ax.axvline( 670 path_info["alpha_opt"], 671 color="r", 672 linestyle="--", 673 label=f'Optimal α = {path_info["alpha_opt"]:.2e}', 674 ) 675 ax.set_xlabel("Regularization α") 676 ax.set_ylabel("GCV Score") 677 ax.set_title("GCV Score vs Regularization") 678 ax.legend() 679 ax.grid(True, alpha=0.3) 680 681 # Plot 2: Training error vs alpha 682 ax = axes[0, 1] 683 ax.loglog( 684 path_info["alphas"], path_info["train_errors"], "g-", linewidth=2 685 ) 686 ax.axvline(path_info["alpha_opt"], color="r", linestyle="--") 687 ax.set_xlabel("Regularization α") 688 ax.set_ylabel("Training MSE") 689 ax.set_title("Training Error vs Regularization") 690 ax.grid(True, alpha=0.3) 691 692 # Plot 3: Effective DOF vs alpha 693 ax = axes[1, 0] 694 ax.semilogx( 695 path_info["alphas"], path_info["effective_dof"], "m-", linewidth=2 696 ) 697 ax.axvline(path_info["alpha_opt"], color="r", linestyle="--") 698 ax.axhline( 699 path_info["dof_opt"], 700 color="r", 701 linestyle=":", 702 label=f'DOF at optimum = {path_info["dof_opt"]:.1f}', 703 ) 704 ax.set_xlabel("Regularization α") 705 ax.set_ylabel("Effective Degrees of Freedom") 706 ax.set_title("Model Complexity vs Regularization") 707 ax.legend() 708 ax.grid(True, alpha=0.3) 709 710 # Plot 4: GCV vs DOF 711 ax = axes[1, 1] 712 ax.plot( 713 path_info["effective_dof"], 714 path_info["gcv_scores"], 715 "k-", 716 linewidth=2, 717 ) 718 ax.axvline(path_info["dof_opt"], color="r", linestyle="--") 719 ax.set_xlabel("Effective Degrees of Freedom") 720 ax.set_ylabel("GCV Score") 721 ax.set_title("GCV vs Model Complexity") 722 ax.grid(True, alpha=0.3) 723 724 plt.suptitle( 725 "GCV Regularization Path Analysis", fontsize=14, fontweight="bold" 726 ) 727 plt.tight_layout() 728 729 if save_path: 730 plt.savefig(save_path, dpi=150, bbox_inches="tight") 731 732 plt.show()
Extends RandomFourierFeaturesRidge with GCV for automatic regularization parameter selection.
16class RegressorUpdater(BaseEstimator, RegressorMixin): 17 """ 18 Update a regression model with new observations 19 20 Parameters 21 ---------- 22 regr: object 23 A regression model with a coef_ attribute 24 alpha: float 25 Updating factor's exponent 26 27 Attributes 28 ---------- 29 n_obs_: int 30 Number of observations 31 coef_: np.ndarray 32 Coefficients of the model 33 updating_factor_: float 34 Updating factor 35 36 """ 37 38 def __init__(self, regr, alpha=0.5): 39 self.regr = regr 40 self.alpha = alpha 41 self.n_obs_ = None 42 self.coef_ = None 43 self.updating_factor_ = None 44 try: 45 self.coef_ = self.regr.coef_ 46 if isinstance(self.regr, Base): 47 self.n_obs_ = self.regr.scaler_.n_samples_seen_ 48 except AttributeError: 49 pass 50 51 def fit(self, X, y, **kwargs): 52 if isinstance( 53 self.regr, CustomRegressor 54 ): # nnetsauce model not deep --- 55 if check_is_fitted(self.regr) == False: 56 self.regr.fit(X, y, **kwargs) 57 self.n_obs_ = X.shape[0] 58 if hasattr(self.regr, "coef_"): 59 self.coef_ = self.regr.coef_ 60 return self 61 self.n_obs_ = self.regr.scaler_.n_samples_seen_ 62 if hasattr(self.regr, "coef_"): 63 self.coef_ = self.regr.coef_ 64 return self 65 66 if ( 67 hasattr(self.regr, "coef_") == False 68 ): # sklearn model or CustomRegressor model --- 69 self.regr.fit(X, y) 70 self.n_obs_ = X.shape[0] 71 self.regr.fit(X, y) 72 if hasattr(self.regr, "stacked_obj"): 73 self.coef_ = self.regr.stacked_obj.coef_ 74 else: 75 self.coef_ = self.regr.coef_ 76 return self 77 self.n_obs_ = X.shape[0] 78 if hasattr(self.regr, "coef_"): 79 self.coef_ = self.regr.coef_ 80 return self 81 82 def predict(self, X): 83 # assert hasattr(self.regr, "coef_"), "model must have coef_ attribute" 84 return self.regr.predict(X) 85 86 def partial_fit(self, X, y): 87 assert hasattr( 88 self.regr, "coef_" 89 ), "model must be fitted first (i.e have 'coef_' attribute)" 90 assert ( 91 self.n_obs_ is not None 92 ), "model must be fitted first (i.e have 'n_obs_' attribute)" 93 94 if len(X.shape) == 1: 95 X = X.reshape(1, -1) 96 97 assert X.shape[0] == 1, "X must have one row" 98 99 self.updating_factor_ = self.n_obs_ ** (-self.alpha) 100 101 if isinstance(self.regr, Base): # nnetsauce model --- 102 newX = deepcopy(X) 103 104 if isinstance( 105 self.regr, CustomRegressor 106 ): # other nnetsauce model (CustomRegressor) --- 107 newX = self.regr.cook_test_set(X=X) 108 if isinstance(X, pd.DataFrame): 109 newx = newX.values.ravel() 110 else: 111 newx = newX.ravel() 112 113 else: # an sklearn model --- 114 if isinstance(X, pd.DataFrame): 115 newx = X.values.ravel() 116 else: 117 newx = X.ravel() 118 119 new_coef = self.regr.coef_ + self.updating_factor_ * np.dot( 120 newx, y - np.dot(newx, self.regr.coef_) 121 ) 122 self.regr.coef_ = _update_mean(self.regr.coef_, self.n_obs_, new_coef) 123 self.coef_ = deepcopy(self.regr.coef_) 124 self.n_obs_ += 1 125 return self
Update a regression model with new observations
Parameters
regr: object A regression model with a coef_ attribute alpha: float Updating factor's exponent
Attributes
n_obs_: int Number of observations coef_: np.ndarray Coefficients of the model updating_factor_: float Updating factor
51 def fit(self, X, y, **kwargs): 52 if isinstance( 53 self.regr, CustomRegressor 54 ): # nnetsauce model not deep --- 55 if check_is_fitted(self.regr) == False: 56 self.regr.fit(X, y, **kwargs) 57 self.n_obs_ = X.shape[0] 58 if hasattr(self.regr, "coef_"): 59 self.coef_ = self.regr.coef_ 60 return self 61 self.n_obs_ = self.regr.scaler_.n_samples_seen_ 62 if hasattr(self.regr, "coef_"): 63 self.coef_ = self.regr.coef_ 64 return self 65 66 if ( 67 hasattr(self.regr, "coef_") == False 68 ): # sklearn model or CustomRegressor model --- 69 self.regr.fit(X, y) 70 self.n_obs_ = X.shape[0] 71 self.regr.fit(X, y) 72 if hasattr(self.regr, "stacked_obj"): 73 self.coef_ = self.regr.stacked_obj.coef_ 74 else: 75 self.coef_ = self.regr.coef_ 76 return self 77 self.n_obs_ = X.shape[0] 78 if hasattr(self.regr, "coef_"): 79 self.coef_ = self.regr.coef_ 80 return self
16class ClassifierUpdater(BaseEstimator, ClassifierMixin): 17 """ 18 Update a regression model with new observations 19 20 Parameters 21 ---------- 22 clf: object 23 A regression model with a coef_ attribute 24 alpha: float 25 Updating factor's exponent 26 27 Attributes 28 ---------- 29 n_obs_: int 30 Number of observations 31 coef_: np.ndarray 32 Coefficients of the model 33 updating_factor_: float 34 Updating factor 35 36 """ 37 38 _estimator_type = "classifier" 39 40 def __init__(self, clf, alpha=0.5): 41 self.clf = clf 42 self.alpha = alpha 43 self.n_obs_ = None 44 self.coef_ = None 45 self.updating_factor_ = None 46 try: 47 self.coef_ = self.clf.coef_ 48 if isinstance(self.clf, Base): 49 self.n_obs_ = self.clf.scaler_.n_samples_seen_ 50 except AttributeError: 51 pass 52 53 def fit(self, X, y, **kwargs): 54 raise NotImplementedError( 55 "fit method is not implemented for ClassifierUpdater" 56 ) 57 58 if isinstance( 59 self.clf, CustomClassifier 60 ): # nnetsauce model not deep --- 61 if check_is_fitted(self.clf) == False: 62 self.clf.fit(X, y, **kwargs) 63 self.n_obs_ = X.shape[0] 64 if hasattr(self.clf, "coef_"): 65 self.coef_ = self.clf.coef_ 66 return self 67 self.n_obs_ = self.clf.scaler_.n_samples_seen_ 68 if hasattr(self.clf, "coef_"): 69 self.coef_ = self.clf.coef_ 70 return self 71 72 if ( 73 hasattr(self.clf, "coef_") == False 74 ): # sklearn model or CustomClassifier model --- 75 self.clf.fit(X, y) 76 self.n_obs_ = X.shape[0] 77 self.clf.fit(X, y) 78 if hasattr(self.clf, "stacked_obj"): 79 self.coef_ = self.clf.stacked_obj.coef_ 80 else: 81 self.coef_ = self.clf.coef_ 82 return self 83 self.n_obs_ = X.shape[0] 84 if hasattr(self.clf, "coef_"): 85 self.coef_ = self.clf.coef_ 86 return self 87 88 def predict(self, X): 89 raise NotImplementedError( 90 "predict method is not implemented for ClassifierUpdater" 91 ) 92 # assert hasattr(self.clf, "coef_"), "model must have coef_ attribute" 93 return self.clf.predict(X) 94 95 def partial_fit(self, X, y): 96 raise NotImplementedError( 97 "partial_fit method is not implemented for ClassifierUpdater" 98 ) 99 100 assert hasattr( 101 self.clf, "coef_" 102 ), "model must be fitted first (i.e have 'coef_' attribute)" 103 assert ( 104 self.n_obs_ is not None 105 ), "model must be fitted first (i.e have 'n_obs_' attribute)" 106 107 if len(X.shape) == 1: 108 X = X.reshape(1, -1) 109 110 assert X.shape[0] == 1, "X must have one row" 111 112 self.updating_factor_ = self.n_obs_ ** (-self.alpha) 113 114 if isinstance(self.clf, Base): # nnetsauce model --- 115 newX = deepcopy(X) 116 117 if isinstance( 118 self.clf, CustomClassifier 119 ): # other nnetsauce model (CustomClassifier) --- 120 newX = self.clf.cook_test_set(X=X) 121 if isinstance(X, pd.DataFrame): 122 newx = newX.values.ravel() 123 else: 124 newx = newX.ravel() 125 126 else: # an sklearn model --- 127 if isinstance(X, pd.DataFrame): 128 newx = X.values.ravel() 129 else: 130 newx = X.ravel() 131 132 new_coef = self.clf.coef_ + self.updating_factor_ * np.dot( 133 newx, y - np.dot(newx, self.clf.coef_) 134 ) 135 self.clf.coef_ = _update_mean(self.clf.coef_, self.n_obs_, new_coef) 136 self.coef_ = deepcopy(self.clf.coef_) 137 self.n_obs_ += 1 138 return self
Update a regression model with new observations
Parameters
clf: object A regression model with a coef_ attribute alpha: float Updating factor's exponent
Attributes
n_obs_: int Number of observations coef_: np.ndarray Coefficients of the model updating_factor_: float Updating factor
53 def fit(self, X, y, **kwargs): 54 raise NotImplementedError( 55 "fit method is not implemented for ClassifierUpdater" 56 ) 57 58 if isinstance( 59 self.clf, CustomClassifier 60 ): # nnetsauce model not deep --- 61 if check_is_fitted(self.clf) == False: 62 self.clf.fit(X, y, **kwargs) 63 self.n_obs_ = X.shape[0] 64 if hasattr(self.clf, "coef_"): 65 self.coef_ = self.clf.coef_ 66 return self 67 self.n_obs_ = self.clf.scaler_.n_samples_seen_ 68 if hasattr(self.clf, "coef_"): 69 self.coef_ = self.clf.coef_ 70 return self 71 72 if ( 73 hasattr(self.clf, "coef_") == False 74 ): # sklearn model or CustomClassifier model --- 75 self.clf.fit(X, y) 76 self.n_obs_ = X.shape[0] 77 self.clf.fit(X, y) 78 if hasattr(self.clf, "stacked_obj"): 79 self.coef_ = self.clf.stacked_obj.coef_ 80 else: 81 self.coef_ = self.clf.coef_ 82 return self 83 self.n_obs_ = X.shape[0] 84 if hasattr(self.clf, "coef_"): 85 self.coef_ = self.clf.coef_ 86 return self
24class RidgeRegressor(BaseEstimator, RegressorMixin): 25 """Ridge. 26 27 Attributes: 28 29 reg_lambda: float 30 regularization parameter. 31 32 backend: str 33 type of backend; must be in ('cpu', 'gpu', 'tpu') 34 35 """ 36 37 def __init__(self, reg_lambda=0.1, backend="cpu"): 38 assert backend in ( 39 "cpu", 40 "gpu", 41 "tpu", 42 ), "`backend` must be in ('cpu', 'gpu', 'tpu')" 43 44 sys_platform = platform.system() 45 46 if (sys_platform == "Windows") and (backend in ("gpu", "tpu")): 47 warnings.warn( 48 "No GPU/TPU computing on Windows yet, backend set to 'cpu'" 49 ) 50 backend = "cpu" 51 52 self.reg_lambda = reg_lambda 53 self.backend = backend 54 self.coef_ = None 55 56 def fit(self, X, y, **kwargs): 57 """Fit matrixops (classifier) to training data (X, y) 58 59 Args: 60 61 X: {array-like}, shape = [n_samples, n_features] 62 Training vectors, where n_samples is the number 63 of samples and n_features is the number of features. 64 65 y: array-like, shape = [n_samples] 66 Target values. 67 68 **kwargs: additional parameters to be passed to self.cook_training_set. 69 70 Returns: 71 72 self: object. 73 74 """ 75 self.ym, centered_y = mo.center_response(y) 76 self.xm = X.mean(axis=0) 77 self.xsd = X.std(axis=0) 78 self.xsd[self.xsd == 0] = 1 # avoid division by zero 79 X_ = (X - self.xm[None, :]) / self.xsd[None, :] 80 81 if self.backend == "cpu": 82 if len(centered_y.shape) <= 1: 83 eye_term = np.sqrt(self.reg_lambda) * np.eye(X.shape[1]) 84 X_ = np.row_stack((X_, eye_term)) 85 y_ = np.concatenate((centered_y, np.zeros(X.shape[1]))) 86 beta_info = get_beta(X_, y_) 87 self.coef_ = beta_info[0] 88 else: 89 try: 90 eye_term = np.sqrt(self.reg_lambda) * np.eye(X.shape[1]) 91 X_ = np.row_stack((X_, eye_term)) 92 y_ = np.row_stack( 93 ( 94 centered_y, 95 np.zeros((eye_term.shape[0], centered_y.shape[1])), 96 ) 97 ) 98 beta_info = get_beta(X_, y_) 99 self.coef_ = beta_info[0] 100 except Exception: 101 x = inv( 102 mo.crossprod(X_) + self.reg_lambda * np.eye(X_.shape[1]) 103 ) 104 hat_matrix = mo.tcrossprod(x, X_) 105 self.coef_ = mo.safe_sparse_dot(hat_matrix, centered_y) 106 return self 107 108 x = jinv( 109 mo.crossprod(X_, backend=self.backend) 110 + self.reg_lambda * jnp.eye(X_.shape[1]) 111 ) 112 113 hat_matrix = mo.tcrossprod(x, X_, backend=self.backend) 114 self.coef_ = mo.safe_sparse_dot( 115 hat_matrix, centered_y, backend=self.backend 116 ) 117 return self 118 119 def predict(self, X, **kwargs): 120 """Predict test data X. 121 122 Args: 123 124 X: {array-like}, shape = [n_samples, n_features] 125 Training vectors, where n_samples is the number 126 of samples and n_features is the number of features. 127 128 **kwargs: additional parameters to be passed to `predict_proba` 129 130 Returns: 131 132 model predictions: {array-like} 133 134 """ 135 X_ = (X - self.xm[None, :]) / self.xsd[None, :] 136 137 if self.backend == "cpu": 138 if isinstance(self.ym, float): 139 return self.ym + mo.safe_sparse_dot(X_, self.coef_) 140 return self.ym[None, :] + mo.safe_sparse_dot(X_, self.coef_) 141 142 # if self.backend in ("gpu", "tpu"): 143 if isinstance(self.ym, float): 144 return self.ym + mo.safe_sparse_dot( 145 X_, self.coef_, backend=self.backend 146 ) 147 return self.ym[None, :] + mo.safe_sparse_dot( 148 X_, self.coef_, backend=self.backend 149 )
Ridge.
Attributes:
reg_lambda: float
regularization parameter.
backend: str
type of backend; must be in ('cpu', 'gpu', 'tpu')
56 def fit(self, X, y, **kwargs): 57 """Fit matrixops (classifier) to training data (X, y) 58 59 Args: 60 61 X: {array-like}, shape = [n_samples, n_features] 62 Training vectors, where n_samples is the number 63 of samples and n_features is the number of features. 64 65 y: array-like, shape = [n_samples] 66 Target values. 67 68 **kwargs: additional parameters to be passed to self.cook_training_set. 69 70 Returns: 71 72 self: object. 73 74 """ 75 self.ym, centered_y = mo.center_response(y) 76 self.xm = X.mean(axis=0) 77 self.xsd = X.std(axis=0) 78 self.xsd[self.xsd == 0] = 1 # avoid division by zero 79 X_ = (X - self.xm[None, :]) / self.xsd[None, :] 80 81 if self.backend == "cpu": 82 if len(centered_y.shape) <= 1: 83 eye_term = np.sqrt(self.reg_lambda) * np.eye(X.shape[1]) 84 X_ = np.row_stack((X_, eye_term)) 85 y_ = np.concatenate((centered_y, np.zeros(X.shape[1]))) 86 beta_info = get_beta(X_, y_) 87 self.coef_ = beta_info[0] 88 else: 89 try: 90 eye_term = np.sqrt(self.reg_lambda) * np.eye(X.shape[1]) 91 X_ = np.row_stack((X_, eye_term)) 92 y_ = np.row_stack( 93 ( 94 centered_y, 95 np.zeros((eye_term.shape[0], centered_y.shape[1])), 96 ) 97 ) 98 beta_info = get_beta(X_, y_) 99 self.coef_ = beta_info[0] 100 except Exception: 101 x = inv( 102 mo.crossprod(X_) + self.reg_lambda * np.eye(X_.shape[1]) 103 ) 104 hat_matrix = mo.tcrossprod(x, X_) 105 self.coef_ = mo.safe_sparse_dot(hat_matrix, centered_y) 106 return self 107 108 x = jinv( 109 mo.crossprod(X_, backend=self.backend) 110 + self.reg_lambda * jnp.eye(X_.shape[1]) 111 ) 112 113 hat_matrix = mo.tcrossprod(x, X_, backend=self.backend) 114 self.coef_ = mo.safe_sparse_dot( 115 hat_matrix, centered_y, backend=self.backend 116 ) 117 return self
Fit matrixops (classifier) to training data (X, y)
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
**kwargs: additional parameters to be passed to self.cook_training_set.
Returns:
self: object.
119 def predict(self, X, **kwargs): 120 """Predict test data X. 121 122 Args: 123 124 X: {array-like}, shape = [n_samples, n_features] 125 Training vectors, where n_samples is the number 126 of samples and n_features is the number of features. 127 128 **kwargs: additional parameters to be passed to `predict_proba` 129 130 Returns: 131 132 model predictions: {array-like} 133 134 """ 135 X_ = (X - self.xm[None, :]) / self.xsd[None, :] 136 137 if self.backend == "cpu": 138 if isinstance(self.ym, float): 139 return self.ym + mo.safe_sparse_dot(X_, self.coef_) 140 return self.ym[None, :] + mo.safe_sparse_dot(X_, self.coef_) 141 142 # if self.backend in ("gpu", "tpu"): 143 if isinstance(self.ym, float): 144 return self.ym + mo.safe_sparse_dot( 145 X_, self.coef_, backend=self.backend 146 ) 147 return self.ym[None, :] + mo.safe_sparse_dot( 148 X_, self.coef_, backend=self.backend 149 )
Predict test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to `predict_proba`
Returns:
model predictions: {array-like}
23class Ridge2Regressor(Ridge2, RegressorMixin): 24 """Ridge regression with 2 regularization parameters derived from class Ridge 25 26 Parameters: 27 28 n_hidden_features: int 29 number of nodes in the hidden layer 30 31 activation_name: str 32 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 33 34 a: float 35 hyperparameter for 'prelu' or 'elu' activation function 36 37 nodes_sim: str 38 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 39 'uniform' 40 41 bias: boolean 42 indicates if the hidden layer contains a bias term (True) or not 43 (False) 44 45 dropout: float 46 regularization parameter; (random) percentage of nodes dropped out 47 of the training 48 49 n_clusters: int 50 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: 51 no clustering) 52 53 cluster_encode: bool 54 defines how the variable containing clusters is treated (default is one-hot) 55 if `False`, then labels are used, without one-hot encoding 56 57 type_clust: str 58 type of clustering method: currently k-means ('kmeans') or Gaussian 59 Mixture Model ('gmm') 60 61 type_scaling: a tuple of 3 strings 62 scaling methods for inputs, hidden layer, and clustering respectively 63 (and when relevant). 64 Currently available: standardization ('std') or MinMax scaling ('minmax') 65 66 lambda1: float 67 regularization parameter on direct link 68 69 lambda2: float 70 regularization parameter on hidden layer 71 72 seed: int 73 reproducibility seed for nodes_sim=='uniform' 74 75 backend: str 76 'cpu' or 'gpu' or 'tpu' 77 78 Attributes: 79 80 beta_: {array-like} 81 regression coefficients 82 83 coef_: {array-like} 84 alias for `beta_`, regression coefficients 85 86 y_mean_: float 87 average response 88 89 """ 90 91 # construct the object ----- 92 93 def __init__( 94 self, 95 n_hidden_features=5, 96 activation_name="relu", 97 a=0.01, 98 nodes_sim="sobol", 99 bias=True, 100 dropout=0, 101 n_clusters=2, 102 cluster_encode=True, 103 type_clust="kmeans", 104 type_scaling=("std", "std", "std"), 105 lambda1=0.1, 106 lambda2=0.1, 107 seed=123, 108 backend="cpu", 109 ): 110 super().__init__( 111 n_hidden_features=n_hidden_features, 112 activation_name=activation_name, 113 a=a, 114 nodes_sim=nodes_sim, 115 bias=bias, 116 dropout=dropout, 117 n_clusters=n_clusters, 118 cluster_encode=cluster_encode, 119 type_clust=type_clust, 120 type_scaling=type_scaling, 121 lambda1=lambda1, 122 lambda2=lambda2, 123 seed=seed, 124 backend=backend, 125 ) 126 127 self.type_fit = "regression" 128 self.coef_ = None 129 130 def fit(self, X, y, **kwargs): 131 """Fit Ridge model to training data (X, y). 132 133 Args: 134 135 X: {array-like}, shape = [n_samples, n_features] 136 Training vectors, where n_samples is the number 137 of samples and n_features is the number of features. 138 139 y: array-like, shape = [n_samples] 140 Target values. 141 142 **kwargs: additional parameters to be passed to 143 self.cook_training_set or self.obj.fit 144 145 Returns: 146 147 self: object 148 149 """ 150 151 sys_platform = platform.system() 152 153 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 154 155 n_X, p_X = X.shape 156 n_Z, p_Z = scaled_Z.shape 157 158 if self.n_clusters > 0: 159 if self.encode_clusters == True: 160 n_features = p_X + self.n_clusters 161 else: 162 n_features = p_X + 1 163 else: 164 n_features = p_X 165 166 X_ = scaled_Z[:, 0:n_features] 167 Phi_X_ = scaled_Z[:, n_features:p_Z] 168 169 B = mo.crossprod(x=X_, backend=self.backend) + self.lambda1 * np.diag( 170 np.repeat(1, n_features) 171 ) 172 C = mo.crossprod(x=Phi_X_, y=X_, backend=self.backend) 173 D = mo.crossprod( 174 x=Phi_X_, backend=self.backend 175 ) + self.lambda2 * np.diag(np.repeat(1, Phi_X_.shape[1])) 176 177 if sys_platform in ("Linux", "Darwin"): 178 B_inv = pinv(B) if self.backend == "cpu" else jpinv(B) 179 else: 180 B_inv = pinv(B) 181 182 W = mo.safe_sparse_dot(a=C, b=B_inv, backend=self.backend) 183 S_mat = D - mo.tcrossprod(x=W, y=C, backend=self.backend) 184 185 if sys_platform in ("Linux", "Darwin"): 186 S_inv = pinv(S_mat) if self.backend == "cpu" else jpinv(S_mat) 187 else: 188 S_inv = pinv(S_mat) 189 190 Y = mo.safe_sparse_dot(a=S_inv, b=W, backend=self.backend) 191 inv = mo.rbind( 192 mo.cbind( 193 x=B_inv + mo.crossprod(x=W, y=Y, backend=self.backend), 194 y=-np.transpose(Y), 195 backend=self.backend, 196 ), 197 mo.cbind(x=-Y, y=S_inv, backend=self.backend), 198 backend=self.backend, 199 ) 200 201 self.beta_ = mo.safe_sparse_dot( 202 a=inv, 203 b=mo.crossprod(x=scaled_Z, y=centered_y, backend=self.backend), 204 backend=self.backend, 205 ) 206 207 self.coef_ = self.beta_ # sklearn compatibility 208 209 return self 210 211 def predict(self, X, **kwargs): 212 """Predict test data X. 213 214 Args: 215 216 X: {array-like}, shape = [n_samples, n_features] 217 Training vectors, where n_samples is the number 218 of samples and n_features is the number of features. 219 220 **kwargs: additional parameters to be passed to 221 self.cook_test_set 222 223 Returns: 224 225 model predictions: {array-like} 226 227 """ 228 229 if len(X.shape) == 1: 230 n_features = X.shape[0] 231 new_X = mo.rbind( 232 x=X.reshape(1, n_features), 233 y=np.ones(n_features).reshape(1, n_features), 234 backend=self.backend, 235 ) 236 237 return ( 238 self.y_mean_ 239 + mo.safe_sparse_dot( 240 a=self.cook_test_set(new_X, **kwargs), 241 b=self.beta_, 242 backend=self.backend, 243 ) 244 )[0] 245 246 return self.y_mean_ + mo.safe_sparse_dot( 247 a=self.cook_test_set(X, **kwargs), 248 b=self.beta_, 249 backend=self.backend, 250 ) 251 252 def partial_fit(self, X, y, learning_rate=0.01, decay=0.001, **kwargs): 253 """Incrementally fit the Ridge model using SGD-style updates. 254 255 Uses the update rule: w_{n+1} = w_n + γ_n * x_n * [y_n - x_n^T * w_n] - γ_n * λ * w_n 256 for online learning with individual samples. 257 258 Args: 259 X: {array-like}, shape = [n_samples, n_features] 260 Training vectors for this batch 261 262 y: array-like, shape = [n_samples] 263 Target values for this batch 264 265 learning_rate: float, default=0.01 266 Initial learning rate for SGD updates 267 268 decay: float, default=0.001 269 Learning rate decay parameter 270 271 **kwargs: additional parameters to be passed to self.cook_training_set 272 273 Returns: 274 self: object 275 """ 276 277 # Input validation 278 X = np.asarray(X) 279 y = np.asarray(y) 280 281 if X.shape[0] != y.shape[0]: 282 raise ValueError("X and y must have the same number of samples") 283 284 # Handle first call 285 if not self._is_fitted: 286 # Initialize learning parameters 287 self.initial_learning_rate = learning_rate 288 self.decay = decay 289 self._step_count = 0 290 self._is_fitted = True 291 292 # Process the batch 293 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 294 295 # Get dimensions 296 n_samples, n_features_total = scaled_Z.shape 297 n_original_features = X.shape[1] 298 299 # Determine feature dimensions for regularization 300 if self.n_clusters > 0: 301 if self.cluster_encode: 302 n_direct_features = n_original_features + self.n_clusters 303 else: 304 n_direct_features = n_original_features + 1 305 else: 306 n_direct_features = n_original_features 307 308 # Initialize beta_ if first time 309 if not hasattr(self, "beta_") or self.beta_ is None: 310 # For regression, beta_ is 1D (single output) 311 self.beta_ = np.zeros(n_features_total) 312 313 # Precompute indices for regularization 314 direct_indices = slice(0, n_direct_features) 315 hidden_indices = slice(n_direct_features, n_features_total) 316 317 # Process each sample with SGD 318 for i in range(n_samples): 319 self._step_count += 1 320 321 # Current learning rate with decay 322 current_lr = self.initial_learning_rate / ( 323 1 + self.decay * self._step_count 324 ) 325 326 # Current sample and target 327 x_i = scaled_Z[i, :] # Feature vector 328 y_i = ( 329 centered_y[i] if centered_y.ndim == 1 else centered_y[i, 0] 330 ) # Scalar target 331 332 # Prediction: x_i^T * beta 333 prediction = x_i @ self.beta_ 334 335 # Error: y_i - prediction 336 error = y_i - prediction 337 338 # Gradient update: current_lr * x_i * error 339 gradient_update = current_lr * x_i * error 340 341 # Regularization terms (more efficient indexing) 342 reg_update = np.zeros_like(self.beta_) 343 reg_update[direct_indices] = ( 344 current_lr * self.lambda1 * self.beta_[direct_indices] 345 ) 346 reg_update[hidden_indices] = ( 347 current_lr * self.lambda2 * self.beta_[hidden_indices] 348 ) 349 350 # Combined update: beta = beta + gradient_update - reg_update 351 self.beta_ += gradient_update - reg_update 352 353 self.coef_ = self.beta_ # sklearn compatibility 354 355 return self
Ridge regression with 2 regularization parameters derived from class Ridge
Parameters:
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not
(False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
lambda1: float
regularization parameter on direct link
lambda2: float
regularization parameter on hidden layer
seed: int
reproducibility seed for nodes_sim=='uniform'
backend: str
'cpu' or 'gpu' or 'tpu'
Attributes:
beta_: {array-like}
regression coefficients
coef_: {array-like}
alias for `beta_`, regression coefficients
y_mean_: float
average response
130 def fit(self, X, y, **kwargs): 131 """Fit Ridge model to training data (X, y). 132 133 Args: 134 135 X: {array-like}, shape = [n_samples, n_features] 136 Training vectors, where n_samples is the number 137 of samples and n_features is the number of features. 138 139 y: array-like, shape = [n_samples] 140 Target values. 141 142 **kwargs: additional parameters to be passed to 143 self.cook_training_set or self.obj.fit 144 145 Returns: 146 147 self: object 148 149 """ 150 151 sys_platform = platform.system() 152 153 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 154 155 n_X, p_X = X.shape 156 n_Z, p_Z = scaled_Z.shape 157 158 if self.n_clusters > 0: 159 if self.encode_clusters == True: 160 n_features = p_X + self.n_clusters 161 else: 162 n_features = p_X + 1 163 else: 164 n_features = p_X 165 166 X_ = scaled_Z[:, 0:n_features] 167 Phi_X_ = scaled_Z[:, n_features:p_Z] 168 169 B = mo.crossprod(x=X_, backend=self.backend) + self.lambda1 * np.diag( 170 np.repeat(1, n_features) 171 ) 172 C = mo.crossprod(x=Phi_X_, y=X_, backend=self.backend) 173 D = mo.crossprod( 174 x=Phi_X_, backend=self.backend 175 ) + self.lambda2 * np.diag(np.repeat(1, Phi_X_.shape[1])) 176 177 if sys_platform in ("Linux", "Darwin"): 178 B_inv = pinv(B) if self.backend == "cpu" else jpinv(B) 179 else: 180 B_inv = pinv(B) 181 182 W = mo.safe_sparse_dot(a=C, b=B_inv, backend=self.backend) 183 S_mat = D - mo.tcrossprod(x=W, y=C, backend=self.backend) 184 185 if sys_platform in ("Linux", "Darwin"): 186 S_inv = pinv(S_mat) if self.backend == "cpu" else jpinv(S_mat) 187 else: 188 S_inv = pinv(S_mat) 189 190 Y = mo.safe_sparse_dot(a=S_inv, b=W, backend=self.backend) 191 inv = mo.rbind( 192 mo.cbind( 193 x=B_inv + mo.crossprod(x=W, y=Y, backend=self.backend), 194 y=-np.transpose(Y), 195 backend=self.backend, 196 ), 197 mo.cbind(x=-Y, y=S_inv, backend=self.backend), 198 backend=self.backend, 199 ) 200 201 self.beta_ = mo.safe_sparse_dot( 202 a=inv, 203 b=mo.crossprod(x=scaled_Z, y=centered_y, backend=self.backend), 204 backend=self.backend, 205 ) 206 207 self.coef_ = self.beta_ # sklearn compatibility 208 209 return self
Fit Ridge model to training data (X, y).
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
211 def predict(self, X, **kwargs): 212 """Predict test data X. 213 214 Args: 215 216 X: {array-like}, shape = [n_samples, n_features] 217 Training vectors, where n_samples is the number 218 of samples and n_features is the number of features. 219 220 **kwargs: additional parameters to be passed to 221 self.cook_test_set 222 223 Returns: 224 225 model predictions: {array-like} 226 227 """ 228 229 if len(X.shape) == 1: 230 n_features = X.shape[0] 231 new_X = mo.rbind( 232 x=X.reshape(1, n_features), 233 y=np.ones(n_features).reshape(1, n_features), 234 backend=self.backend, 235 ) 236 237 return ( 238 self.y_mean_ 239 + mo.safe_sparse_dot( 240 a=self.cook_test_set(new_X, **kwargs), 241 b=self.beta_, 242 backend=self.backend, 243 ) 244 )[0] 245 246 return self.y_mean_ + mo.safe_sparse_dot( 247 a=self.cook_test_set(X, **kwargs), 248 b=self.beta_, 249 backend=self.backend, 250 )
Predict test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
18class Ridge2Classifier(Ridge2, ClassifierMixin): 19 """Multinomial logit classification with 2 regularization parameters 20 21 Parameters: 22 23 n_hidden_features: int 24 number of nodes in the hidden layer 25 26 activation_name: str 27 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 28 29 a: float 30 hyperparameter for 'prelu' or 'elu' activation function 31 32 nodes_sim: str 33 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 34 'uniform' 35 36 bias: boolean 37 indicates if the hidden layer contains a bias term (True) or not 38 (False) 39 40 dropout: float 41 regularization parameter; (random) percentage of nodes dropped out 42 of the training 43 44 direct_link: boolean 45 indicates if the original predictors are included (True) in model's 46 fitting or not (False) 47 48 n_clusters: int 49 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: 50 no clustering) 51 52 cluster_encode: bool 53 defines how the variable containing clusters is treated (default is one-hot) 54 if `False`, then labels are used, without one-hot encoding 55 56 type_clust: str 57 type of clustering method: currently k-means ('kmeans') or Gaussian 58 Mixture Model ('gmm') 59 60 type_scaling: a tuple of 3 strings 61 scaling methods for inputs, hidden layer, and clustering respectively 62 (and when relevant). 63 Currently available: standardization ('std') or MinMax scaling ('minmax') 64 65 lambda1: float 66 regularization parameter on direct link 67 68 lambda2: float 69 regularization parameter on hidden layer 70 71 solver: str 72 optimization function "L-BFGS-B", "Newton-CG", 73 "trust-ncg", "L-BFGS-B-lstsq", "Newton-CG-lstsq", 74 "trust-ncg-lstsq" (see scipy.optimize.minimize) 75 When using "L-BFGS-B-lstsq", "Newton-CG-lstsq", or "trust-ncg-lstsq", 76 the initial value for the optimization is set to the least squares solution 77 78 seed: int 79 reproducibility seed for nodes_sim=='uniform' 80 81 backend: str 82 "cpu" or "gpu" or "tpu" 83 84 Attributes: 85 86 beta_: {array-like} 87 regression coefficients 88 89 classes_: {array-like} 90 unique classes in the target variable 91 92 minloglik_: float 93 minimum value of the negative log-likelihood 94 95 Examples: 96 97 See also [https://github.com/Techtonique/nnetsauce/blob/master/examples/ridge_classification.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/ridge_classification.py) 98 99 ```python 100 import nnetsauce as ns 101 import numpy as np 102 from sklearn.datasets import load_breast_cancer 103 from sklearn.model_selection import train_test_split 104 from time import time 105 106 107 breast_cancer = load_breast_cancer() 108 X = breast_cancer.data 109 y = breast_cancer.target 110 111 # split data into training test and test set 112 np.random.seed(123) 113 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) 114 115 # create the model with nnetsauce 116 fit_obj = ns.Ridge2Classifier(lambda1 = 6.90185578e+04, 117 lambda2 = 3.17392781e+02, 118 n_hidden_features=95, 119 n_clusters=2, 120 dropout = 3.62817383e-01, 121 type_clust = "gmm") 122 123 # fit the model on training set 124 start = time() 125 fit_obj.fit(X_train, y_train) 126 print(f"Elapsed {time() - start}") 127 128 # get the accuracy on test set 129 start = time() 130 print(fit_obj.score(X_test, y_test)) 131 print(f"Elapsed {time() - start}") 132 133 # get area under the curve on test set (auc) 134 print(fit_obj.score(X_test, y_test, scoring="roc_auc")) 135 ``` 136 137 138 """ 139 140 _estimator_type = "classifier" 141 142 # construct the object ----- 143 144 def __init__( 145 self, 146 n_hidden_features=5, 147 activation_name="relu", 148 a=0.01, 149 nodes_sim="sobol", 150 bias=True, 151 dropout=0, 152 direct_link=True, 153 n_clusters=2, 154 cluster_encode=True, 155 type_clust="kmeans", 156 type_scaling=("std", "std", "std"), 157 lambda1=0.1, 158 lambda2=0.1, 159 solver="L-BFGS-B", 160 seed=123, 161 backend="cpu", 162 ): 163 super().__init__( 164 n_hidden_features=n_hidden_features, 165 activation_name=activation_name, 166 a=a, 167 nodes_sim=nodes_sim, 168 bias=bias, 169 dropout=dropout, 170 direct_link=direct_link, 171 n_clusters=n_clusters, 172 cluster_encode=cluster_encode, 173 type_clust=type_clust, 174 type_scaling=type_scaling, 175 lambda1=lambda1, 176 lambda2=lambda2, 177 seed=seed, 178 backend=backend, 179 ) 180 181 self.type_fit = "classification" 182 self.solver = solver 183 self.beta_ = None 184 self.classes_ = None 185 self.minloglik_ = None 186 self.coef_ = None 187 188 def loglik(self, X, Y, **kwargs): 189 """Log-likelihood for training data (X, Y). 190 191 Args: 192 193 X: {array-like}, shape = [n_samples, n_features] 194 Training vectors, where n_samples is the number 195 of samples and n_features is the number of features. 196 197 Y: array-like, shape = [n_samples] 198 One-hot encode target values. 199 200 **kwargs: additional parameters to be passed to 201 self.cook_training_set or self.obj.fit 202 203 Returns: 204 205 """ 206 207 def loglik_grad_hess(Y, X, B, XB, hessian=True, **kwargs): 208 # nobs, n_classes 209 n, K = Y.shape 210 211 # total number of covariates 212 p = X.shape[1] 213 214 # initial number of covariates 215 init_p = p - self.n_hidden_features 216 217 max_double = 709.0 218 XB[XB > max_double] = max_double 219 exp_XB = np.exp(XB) 220 probs = exp_XB / exp_XB.sum(axis=1)[:, None] 221 222 # gradient ----- 223 # (Y - p) -> (n, K) 224 # X -> (n, p) 225 # (K, n) %*% (n, p) -> (K, p) 226 if hessian is False: 227 grad = ( 228 -mo.safe_sparse_dot( 229 a=(Y - probs).T, b=X, backend=self.backend 230 ) 231 / n 232 ) 233 grad += self.lambda1 * B[0:init_p, :].sum(axis=0)[:, None] 234 grad += self.lambda2 * B[init_p:p, :].sum(axis=0)[:, None] 235 236 return grad.flatten() 237 238 # hessian ----- 239 if hessian is True: 240 Kp = K * p 241 hess = np.zeros((Kp, Kp), float) 242 for k1 in range(K): 243 x_index = range(k1 * p, (k1 + 1) * p) 244 for k2 in range(k1, K): 245 y_index = range(k2 * p, (k2 + 1) * p) 246 H_sub = ( 247 -mo.safe_sparse_dot( 248 a=X.T, 249 b=(probs[:, k1] * probs[:, k2])[:, None] * X, 250 backend=self.backend, 251 ) 252 / n 253 ) # do not store 254 hess[np.ix_(x_index, y_index)] = hess[ 255 np.ix_(y_index, x_index) 256 ] = H_sub 257 258 return hess + (self.lambda1 + self.lambda2) * np.identity(Kp) 259 260 # total number of covariates 261 p = X.shape[1] 262 263 # initial number of covariates 264 init_p = p - self.n_hidden_features 265 266 # log-likelihood (1st return) 267 def loglik_func(x): 268 # (p, K) 269 B = x.reshape(Y.shape[1], p).T 270 271 # (n, K) 272 XB = mo.safe_sparse_dot(X, B, backend=self.backend) 273 274 res = -(np.sum(Y * XB, axis=1) - logsumexp(XB)).mean() 275 276 res += ( 277 0.5 278 * self.lambda1 279 * mo.squared_norm(B[0:init_p, :], backend=self.backend) 280 ) 281 res += ( 282 0.5 283 * self.lambda2 284 * mo.squared_norm(B[init_p:p, :], backend=self.backend) 285 ) 286 287 return res 288 289 # gradient of log-likelihood 290 def grad_func(x): 291 # (p, K) 292 B = x.reshape(Y.shape[1], p).T 293 294 return loglik_grad_hess( 295 Y=Y, 296 X=X, 297 B=B, 298 XB=mo.safe_sparse_dot(X, B, backend=self.backend), 299 hessian=False, 300 **kwargs 301 ) 302 303 # hessian of log-likelihood 304 def hessian_func(x): 305 # (p, K) 306 B = x.reshape(Y.shape[1], p).T 307 308 return loglik_grad_hess( 309 Y=Y, 310 X=X, 311 B=B, 312 XB=mo.safe_sparse_dot(X, B, backend=self.backend), 313 hessian=True, 314 **kwargs 315 ) 316 317 return loglik_func, grad_func, hessian_func 318 319 # newton-cg 320 # L-BFGS-B 321 def fit(self, X, y, **kwargs): 322 """Fit Ridge model to training data (X, y). 323 324 for beta: regression coeffs (beta11, ..., beta1p, ..., betaK1, ..., betaKp) 325 for K classes and p covariates. 326 327 Args: 328 329 X: {array-like}, shape = [n_samples, n_features] 330 Training vectors, where n_samples is the number 331 of samples and n_features is the number of features. 332 333 y: array-like, shape = [n_samples] 334 Target values. 335 336 **kwargs: additional parameters to be passed to 337 self.cook_training_set or self.obj.fit 338 339 Returns: 340 341 self: object 342 343 """ 344 345 assert mx.is_factor(y), "y must contain only integers" 346 347 output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 348 349 self.n_classes = len(np.unique(y)) 350 self.classes_ = np.unique(y) # for compatibility with sklearn 351 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 352 353 Y = mo.one_hot_encode2(output_y, self.n_classes) 354 355 # optimize for beta, minimize self.loglik (maximize loglik) ----- 356 loglik_func, grad_func, hessian_func = self.loglik(X=scaled_Z, Y=Y) 357 358 if self.solver == "L-BFGS-B": 359 opt = minimize( 360 fun=loglik_func, 361 x0=np.zeros(scaled_Z.shape[1] * self.n_classes), 362 jac=grad_func, 363 method=self.solver, 364 ) 365 self.beta_ = opt.x 366 self.minloglik_ = opt.fun 367 368 if self.solver in ("Newton-CG", "trust-ncg"): 369 opt = minimize( 370 fun=loglik_func, 371 x0=np.zeros(scaled_Z.shape[1] * self.n_classes), 372 jac=grad_func, 373 hess=hessian_func, 374 method=self.solver, 375 ) 376 self.beta_ = opt.x 377 self.minloglik_ = opt.fun 378 379 if self.solver == "L-BFGS-B-lstsq": 380 opt = minimize( 381 fun=loglik_func, 382 x0=np.linalg.lstsq(scaled_Z, Y, rcond=None)[0].flatten( 383 order="F" 384 ), 385 jac=grad_func, 386 method="L-BFGS-B", 387 ) 388 self.beta_ = opt.x 389 self.minloglik_ = opt.fun 390 391 if self.solver in "Newton-CG-lstsq": 392 opt = minimize( 393 fun=loglik_func, 394 x0=np.linalg.lstsq(scaled_Z, Y, rcond=None)[0].flatten( 395 order="F" 396 ), 397 jac=grad_func, 398 hess=hessian_func, 399 method="Newton-CG", 400 ) 401 self.beta_ = opt.x 402 self.minloglik_ = opt.fun 403 404 if self.solver in "trust-ncg-lstsq": 405 opt = minimize( 406 fun=loglik_func, 407 x0=np.linalg.lstsq(scaled_Z, Y, rcond=None)[0].flatten( 408 order="F" 409 ), 410 jac=grad_func, 411 hess=hessian_func, 412 method="trust-ncg", 413 ) 414 self.beta_ = opt.x 415 self.minloglik_ = opt.fun 416 417 self.coef_ = self.beta_ 418 419 self.classes_ = np.unique(y) 420 421 return self 422 423 def predict(self, X, **kwargs): 424 """Predict test data X. 425 426 Args: 427 428 X: {array-like}, shape = [n_samples, n_features] 429 Training vectors, where n_samples is the number 430 of samples and n_features is the number of features. 431 432 **kwargs: additional parameters to be passed to 433 self.cook_test_set 434 435 Returns: 436 437 model predictions: {array-like} 438 """ 439 440 return np.argmax(self.predict_proba(X, **kwargs), axis=1) 441 442 def predict_proba(self, X, **kwargs): 443 """Predict probabilities for test data X. 444 445 Args: 446 447 X: {array-like}, shape = [n_samples, n_features] 448 Training vectors, where n_samples is the number 449 of samples and n_features is the number of features. 450 451 **kwargs: additional parameters to be passed to 452 self.cook_test_set 453 454 Returns: 455 456 probability estimates for test data: {array-like} 457 458 """ 459 if len(X.shape) == 1: 460 n_features = X.shape[0] 461 new_X = mo.rbind( 462 X.reshape(1, n_features), 463 np.ones(n_features).reshape(1, n_features), 464 ) 465 466 Z = self.cook_test_set(new_X, **kwargs) 467 468 else: 469 Z = self.cook_test_set(X, **kwargs) 470 471 ZB = mo.safe_sparse_dot( 472 a=Z, 473 b=self.beta_.reshape( 474 self.n_classes, 475 X.shape[1] + self.n_hidden_features + self.n_clusters, 476 ).T, 477 backend=self.backend, 478 ) 479 480 exp_ZB = np.exp(ZB) 481 482 return exp_ZB / exp_ZB.sum(axis=1)[:, None] 483 484 @property 485 def _estimator_type(self): 486 return "classifier"
Multinomial logit classification with 2 regularization parameters
Parameters:
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not
(False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original predictors are included (True) in model's
fitting or not (False)
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
lambda1: float
regularization parameter on direct link
lambda2: float
regularization parameter on hidden layer
solver: str
optimization function "L-BFGS-B", "Newton-CG",
"trust-ncg", "L-BFGS-B-lstsq", "Newton-CG-lstsq",
"trust-ncg-lstsq" (see scipy.optimize.minimize)
When using "L-BFGS-B-lstsq", "Newton-CG-lstsq", or "trust-ncg-lstsq",
the initial value for the optimization is set to the least squares solution
seed: int
reproducibility seed for nodes_sim=='uniform'
backend: str
"cpu" or "gpu" or "tpu"
Attributes:
beta_: {array-like}
regression coefficients
classes_: {array-like}
unique classes in the target variable
minloglik_: float
minimum value of the negative log-likelihood
Examples:
See also https://github.com/Techtonique/nnetsauce/blob/master/examples/ridge_classification.py
import nnetsauce as ns
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from time import time
breast_cancer = load_breast_cancer()
X = breast_cancer.data
y = breast_cancer.target
# split data into training test and test set
np.random.seed(123)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
# create the model with nnetsauce
fit_obj = ns.Ridge2Classifier(lambda1 = 6.90185578e+04,
lambda2 = 3.17392781e+02,
n_hidden_features=95,
n_clusters=2,
dropout = 3.62817383e-01,
type_clust = "gmm")
# fit the model on training set
start = time()
fit_obj.fit(X_train, y_train)
print(f"Elapsed {time() - start}")
# get the accuracy on test set
start = time()
print(fit_obj.score(X_test, y_test))
print(f"Elapsed {time() - start}")
# get area under the curve on test set (auc)
print(fit_obj.score(X_test, y_test, scoring="roc_auc"))
321 def fit(self, X, y, **kwargs): 322 """Fit Ridge model to training data (X, y). 323 324 for beta: regression coeffs (beta11, ..., beta1p, ..., betaK1, ..., betaKp) 325 for K classes and p covariates. 326 327 Args: 328 329 X: {array-like}, shape = [n_samples, n_features] 330 Training vectors, where n_samples is the number 331 of samples and n_features is the number of features. 332 333 y: array-like, shape = [n_samples] 334 Target values. 335 336 **kwargs: additional parameters to be passed to 337 self.cook_training_set or self.obj.fit 338 339 Returns: 340 341 self: object 342 343 """ 344 345 assert mx.is_factor(y), "y must contain only integers" 346 347 output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 348 349 self.n_classes = len(np.unique(y)) 350 self.classes_ = np.unique(y) # for compatibility with sklearn 351 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 352 353 Y = mo.one_hot_encode2(output_y, self.n_classes) 354 355 # optimize for beta, minimize self.loglik (maximize loglik) ----- 356 loglik_func, grad_func, hessian_func = self.loglik(X=scaled_Z, Y=Y) 357 358 if self.solver == "L-BFGS-B": 359 opt = minimize( 360 fun=loglik_func, 361 x0=np.zeros(scaled_Z.shape[1] * self.n_classes), 362 jac=grad_func, 363 method=self.solver, 364 ) 365 self.beta_ = opt.x 366 self.minloglik_ = opt.fun 367 368 if self.solver in ("Newton-CG", "trust-ncg"): 369 opt = minimize( 370 fun=loglik_func, 371 x0=np.zeros(scaled_Z.shape[1] * self.n_classes), 372 jac=grad_func, 373 hess=hessian_func, 374 method=self.solver, 375 ) 376 self.beta_ = opt.x 377 self.minloglik_ = opt.fun 378 379 if self.solver == "L-BFGS-B-lstsq": 380 opt = minimize( 381 fun=loglik_func, 382 x0=np.linalg.lstsq(scaled_Z, Y, rcond=None)[0].flatten( 383 order="F" 384 ), 385 jac=grad_func, 386 method="L-BFGS-B", 387 ) 388 self.beta_ = opt.x 389 self.minloglik_ = opt.fun 390 391 if self.solver in "Newton-CG-lstsq": 392 opt = minimize( 393 fun=loglik_func, 394 x0=np.linalg.lstsq(scaled_Z, Y, rcond=None)[0].flatten( 395 order="F" 396 ), 397 jac=grad_func, 398 hess=hessian_func, 399 method="Newton-CG", 400 ) 401 self.beta_ = opt.x 402 self.minloglik_ = opt.fun 403 404 if self.solver in "trust-ncg-lstsq": 405 opt = minimize( 406 fun=loglik_func, 407 x0=np.linalg.lstsq(scaled_Z, Y, rcond=None)[0].flatten( 408 order="F" 409 ), 410 jac=grad_func, 411 hess=hessian_func, 412 method="trust-ncg", 413 ) 414 self.beta_ = opt.x 415 self.minloglik_ = opt.fun 416 417 self.coef_ = self.beta_ 418 419 self.classes_ = np.unique(y) 420 421 return self
Fit Ridge model to training data (X, y).
for beta: regression coeffs (beta11, ..., beta1p, ..., betaK1, ..., betaKp) for K classes and p covariates.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
423 def predict(self, X, **kwargs): 424 """Predict test data X. 425 426 Args: 427 428 X: {array-like}, shape = [n_samples, n_features] 429 Training vectors, where n_samples is the number 430 of samples and n_features is the number of features. 431 432 **kwargs: additional parameters to be passed to 433 self.cook_test_set 434 435 Returns: 436 437 model predictions: {array-like} 438 """ 439 440 return np.argmax(self.predict_proba(X, **kwargs), axis=1)
Predict test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
442 def predict_proba(self, X, **kwargs): 443 """Predict probabilities for test data X. 444 445 Args: 446 447 X: {array-like}, shape = [n_samples, n_features] 448 Training vectors, where n_samples is the number 449 of samples and n_features is the number of features. 450 451 **kwargs: additional parameters to be passed to 452 self.cook_test_set 453 454 Returns: 455 456 probability estimates for test data: {array-like} 457 458 """ 459 if len(X.shape) == 1: 460 n_features = X.shape[0] 461 new_X = mo.rbind( 462 X.reshape(1, n_features), 463 np.ones(n_features).reshape(1, n_features), 464 ) 465 466 Z = self.cook_test_set(new_X, **kwargs) 467 468 else: 469 Z = self.cook_test_set(X, **kwargs) 470 471 ZB = mo.safe_sparse_dot( 472 a=Z, 473 b=self.beta_.reshape( 474 self.n_classes, 475 X.shape[1] + self.n_hidden_features + self.n_clusters, 476 ).T, 477 backend=self.backend, 478 ) 479 480 exp_ZB = np.exp(ZB) 481 482 return exp_ZB / exp_ZB.sum(axis=1)[:, None]
Predict probabilities for test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
probability estimates for test data: {array-like}
23class Ridge2MultitaskClassifier(Ridge2, ClassifierMixin): 24 """Multitask Ridge classification with 2 regularization parameters 25 26 Parameters: 27 28 n_hidden_features: int 29 number of nodes in the hidden layer 30 31 activation_name: str 32 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 33 34 a: float 35 hyperparameter for 'prelu' or 'elu' activation function 36 37 nodes_sim: str 38 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 39 'uniform' 40 41 bias: boolean 42 indicates if the hidden layer contains a bias term (True) or not 43 (False) 44 45 dropout: float 46 regularization parameter; (random) percentage of nodes dropped out 47 of the training 48 49 n_clusters: int 50 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: 51 no clustering) 52 53 cluster_encode: bool 54 defines how the variable containing clusters is treated (default is one-hot) 55 if `False`, then labels are used, without one-hot encoding 56 57 type_clust: str 58 type of clustering method: currently k-means ('kmeans') or Gaussian 59 Mixture Model ('gmm') 60 61 type_scaling: a tuple of 3 strings 62 scaling methods for inputs, hidden layer, and clustering respectively 63 (and when relevant). 64 Currently available: standardization ('std') or MinMax scaling ('minmax') 65 66 lambda1: float 67 regularization parameter on direct link 68 69 lambda2: float 70 regularization parameter on hidden layer 71 72 seed: int 73 reproducibility seed for nodes_sim=='uniform' 74 75 backend: str 76 "cpu" or "gpu" or "tpu" 77 78 Attributes: 79 80 beta_: {array-like} 81 regression coefficients 82 83 coef_: {array-like} 84 alias for `beta_`, regression coefficients 85 86 Examples: 87 88 See also [https://github.com/Techtonique/nnetsauce/blob/master/examples/ridgemtask_classification.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/ridgemtask_classification.py) 89 90 ```python 91 import nnetsauce as ns 92 import numpy as np 93 from sklearn.datasets import load_breast_cancer 94 from sklearn.model_selection import train_test_split 95 from sklearn import metrics 96 from time import time 97 98 breast_cancer = load_breast_cancer() 99 Z = breast_cancer.data 100 t = breast_cancer.target 101 np.random.seed(123) 102 X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2) 103 104 fit_obj = ns.Ridge2MultitaskClassifier(n_hidden_features=int(9.83730469e+01), 105 dropout=4.31054687e-01, 106 n_clusters=int(1.71484375e+00), 107 lambda1=1.24023438e+01, lambda2=7.30263672e+03) 108 109 start = time() 110 fit_obj.fit(X_train, y_train) 111 print(f"Elapsed {time() - start}") 112 113 print(fit_obj.score(X_test, y_test)) 114 print(fit_obj.score(X_test, y_test, scoring="roc_auc")) 115 116 start = time() 117 preds = fit_obj.predict(X_test) 118 print(f"Elapsed {time() - start}") 119 print(metrics.classification_report(preds, y_test)) 120 ``` 121 122 """ 123 124 # construct the object ----- 125 _estimator_type = "classifier" 126 127 def __init__( 128 self, 129 n_hidden_features=5, 130 activation_name="relu", 131 a=0.01, 132 nodes_sim="sobol", 133 bias=True, 134 dropout=0, 135 n_clusters=2, 136 cluster_encode=True, 137 type_clust="kmeans", 138 type_scaling=("std", "std", "std"), 139 lambda1=0.1, 140 lambda2=0.1, 141 seed=123, 142 backend="cpu", 143 ): 144 super().__init__( 145 n_hidden_features=n_hidden_features, 146 activation_name=activation_name, 147 a=a, 148 nodes_sim=nodes_sim, 149 bias=bias, 150 dropout=dropout, 151 n_clusters=n_clusters, 152 cluster_encode=cluster_encode, 153 type_clust=type_clust, 154 type_scaling=type_scaling, 155 lambda1=lambda1, 156 lambda2=lambda2, 157 seed=seed, 158 backend=backend, 159 ) 160 161 self.type_fit = "classification" 162 self.coef_ = None 163 164 def fit(self, X, y, **kwargs): 165 """Fit Ridge model to training data (X, y). 166 167 Args: 168 169 X: {array-like}, shape = [n_samples, n_features] 170 Training vectors, where n_samples is the number 171 of samples and n_features is the number of features. 172 173 y: array-like, shape = [n_samples] 174 Target values. 175 176 **kwargs: additional parameters to be passed to 177 self.cook_training_set or self.obj.fit 178 179 Returns: 180 181 self: object 182 183 """ 184 185 sys_platform = platform.system() 186 187 assert mx.is_factor(y), "y must contain only integers" 188 189 self.classes_ = np.unique(y) # for compatibility with sklearn 190 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 191 192 output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 193 194 n_X, p_X = X.shape 195 n_Z, p_Z = scaled_Z.shape 196 197 self.n_classes = len(np.unique(y)) 198 199 # multitask response 200 Y = mo.one_hot_encode2(output_y, self.n_classes) 201 202 if self.n_clusters > 0: 203 if self.encode_clusters == True: 204 n_features = p_X + self.n_clusters 205 else: 206 n_features = p_X + 1 207 else: 208 n_features = p_X 209 210 X_ = scaled_Z[:, 0:n_features] 211 Phi_X_ = scaled_Z[:, n_features:p_Z] 212 213 B = mo.crossprod(x=X_, backend=self.backend) + self.lambda1 * np.diag( 214 np.repeat(1, X_.shape[1]) 215 ) 216 C = mo.crossprod(x=Phi_X_, y=X_, backend=self.backend) 217 D = mo.crossprod( 218 x=Phi_X_, backend=self.backend 219 ) + self.lambda2 * np.diag(np.repeat(1, Phi_X_.shape[1])) 220 221 if sys_platform in ("Linux", "Darwin"): 222 B_inv = pinv(B) if self.backend == "cpu" else jpinv(B) 223 else: 224 B_inv = pinv(B) 225 226 W = mo.safe_sparse_dot(a=C, b=B_inv, backend=self.backend) 227 S_mat = D - mo.tcrossprod(x=W, y=C, backend=self.backend) 228 229 if sys_platform in ("Linux", "Darwin"): 230 S_inv = pinv(S_mat) if self.backend == "cpu" else jpinv(S_mat) 231 else: 232 S_inv = pinv(S_mat) 233 234 Y2 = mo.safe_sparse_dot(a=S_inv, b=W, backend=self.backend) 235 inv = mo.rbind( 236 mo.cbind( 237 x=B_inv + mo.crossprod(x=W, y=Y2, backend=self.backend), 238 y=-np.transpose(Y2), 239 backend=self.backend, 240 ), 241 mo.cbind(x=-Y2, y=S_inv, backend=self.backend), 242 backend=self.backend, 243 ) 244 245 self.beta_ = mo.safe_sparse_dot( 246 a=inv, 247 b=mo.crossprod(x=scaled_Z, y=Y, backend=self.backend), 248 backend=self.backend, 249 ) 250 self.coef_ = self.beta_ # sklearn compatibility 251 self.classes_ = np.unique(y) 252 self._is_fitted = True 253 return self 254 255 def predict(self, X, **kwargs): 256 """Predict test data X. 257 258 Args: 259 260 X: {array-like}, shape = [n_samples, n_features] 261 Training vectors, where n_samples is the number 262 of samples and n_features is the number of features. 263 264 **kwargs: additional parameters to be passed to 265 self.cook_test_set 266 267 Returns: 268 269 model predictions: {array-like} 270 271 """ 272 273 return np.argmax(self.predict_proba(X, **kwargs), axis=1) 274 275 def predict_proba(self, X, **kwargs): 276 """Predict probabilities for test data X. 277 278 Args: 279 280 X: {array-like}, shape = [n_samples, n_features] 281 Training vectors, where n_samples is the number 282 of samples and n_features is the number of features. 283 284 **kwargs: additional parameters to be passed to 285 self.cook_test_set 286 287 Returns: 288 289 probability estimates for test data: {array-like} 290 291 """ 292 293 if len(X.shape) == 1: 294 n_features = X.shape[0] 295 new_X = mo.rbind( 296 x=X.reshape(1, n_features), 297 y=np.ones(n_features).reshape(1, n_features), 298 backend=self.backend, 299 ) 300 301 Z = self.cook_test_set(new_X, **kwargs) 302 303 else: 304 Z = self.cook_test_set(X, **kwargs) 305 306 ZB = mo.safe_sparse_dot(a=Z, b=self.beta_, backend=self.backend) 307 308 exp_ZB = np.exp(ZB) 309 310 return exp_ZB / exp_ZB.sum(axis=1)[:, None] 311 312 def score(self, X, y, scoring=None): 313 """Scoring function for classification. 314 315 Args: 316 317 X: {array-like}, shape = [n_samples, n_features] 318 Training vectors, where n_samples is the number 319 of samples and n_features is the number of features. 320 321 y: array-like, shape = [n_samples] 322 Target values. 323 324 scoring: str 325 scoring method (default is accuracy) 326 327 Returns: 328 329 score: float 330 """ 331 332 if scoring is None: 333 scoring = "accuracy" 334 335 if scoring == "accuracy": 336 return skm2.accuracy_score(y, self.predict(X)) 337 338 if scoring == "f1": 339 return skm2.f1_score(y, self.predict(X)) 340 341 if scoring == "precision": 342 return skm2.precision_score(y, self.predict(X)) 343 344 if scoring == "recall": 345 return skm2.recall_score(y, self.predict(X)) 346 347 if scoring == "roc_auc": 348 return skm2.roc_auc_score(y, self.predict(X)) 349 350 if scoring == "log_loss": 351 return skm2.log_loss(y, self.predict_proba(X)) 352 353 if scoring == "balanced_accuracy": 354 return skm2.balanced_accuracy_score(y, self.predict(X)) 355 356 if scoring == "average_precision": 357 return skm2.average_precision_score(y, self.predict(X)) 358 359 if scoring == "neg_brier_score": 360 return -skm2.brier_score_loss(y, self.predict_proba(X)) 361 362 if scoring == "neg_log_loss": 363 return -skm2.log_loss(y, self.predict_proba(X)) 364 365 @property 366 def _estimator_type(self): 367 return "classifier" 368 369 def partial_fit( 370 self, X, y, classes=None, learning_rate=0.01, decay=0.001, **kwargs 371 ): 372 """Incrementally fit the Ridge model using SGD-style updates. 373 374 Uses the update rule: w_{n+1} = w_n + γ_n * x_n * [y_n - x_n^T * w_n] - γ_n * λ * w_n 375 for online learning with individual samples. 376 377 Args: 378 X: {array-like}, shape = [n_samples, n_features] 379 Training vectors for this batch 380 381 y: array-like, shape = [n_samples] 382 Target values for this batch 383 384 classes: array-like, shape = [n_classes], optional 385 List of all possible target classes. Must be provided on first call 386 to partial_fit if not already fitted. 387 388 learning_rate: float, default=0.01 389 Initial learning rate for SGD updates 390 391 decay: float, default=0.001 392 Learning rate decay parameter 393 394 **kwargs: additional parameters to be passed to self.cook_training_set 395 396 Returns: 397 self: object 398 """ 399 # Input validation 400 X = np.asarray(X) 401 y = np.asarray(y) 402 403 if X.shape[0] != y.shape[0]: 404 raise ValueError("X and y must have the same number of samples") 405 406 assert mx.is_factor(y), "y must contain only integers" 407 408 # Handle classes on first call 409 if not self._is_fitted: 410 if classes is not None: 411 self.classes_ = np.array(classes) 412 self.n_classes_ = len(self.classes_) 413 else: 414 self.classes_ = np.unique(y) 415 self.n_classes_ = len(self.classes_) 416 417 self.n_classes = len(self.classes_) 418 419 # Initialize learning parameters 420 self.initial_learning_rate = learning_rate 421 self.decay = decay 422 self._step_count = 0 423 self._is_fitted = True 424 425 else: 426 # Check for new classes 427 new_classes = np.setdiff1d(y, self.classes_) 428 if len(new_classes) > 0: 429 raise ValueError( 430 f"New classes {new_classes} encountered. " 431 "partial_fit cannot handle new classes after first call." 432 ) 433 434 # Process the batch 435 output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 436 437 # Get dimensions 438 n_samples, n_features_total = scaled_Z.shape 439 n_original_features = X.shape[1] 440 441 # Create one-hot encoded targets 442 Y = mo.one_hot_encode2(output_y, self.n_classes) 443 444 # Determine feature dimensions for regularization 445 if self.n_clusters > 0: 446 if self.cluster_encode: 447 n_direct_features = n_original_features + self.n_clusters 448 else: 449 n_direct_features = n_original_features + 1 450 else: 451 n_direct_features = n_original_features 452 453 # Initialize beta_ if first time 454 if not hasattr(self, "beta_") or self.beta_ is None: 455 self.beta_ = np.zeros((n_features_total, self.n_classes)) 456 457 # Precompute indices for regularization 458 direct_indices = slice(0, n_direct_features) 459 hidden_indices = slice(n_direct_features, n_features_total) 460 461 # Process each sample with SGD 462 for i in range(n_samples): 463 self._step_count += 1 464 465 # Current learning rate with decay 466 current_lr = self.initial_learning_rate / ( 467 1 + self.decay * self._step_count 468 ) 469 470 # Current sample and target 471 x_i = scaled_Z[i, :] # Feature vector 472 y_i = Y[i, :] # Target vector (one-hot) 473 474 # Prediction: x_i^T * beta 475 prediction = x_i @ self.beta_ 476 477 # Error: y_i - prediction 478 error = y_i - prediction 479 480 # Gradient update: current_lr * x_i * error 481 gradient_update = current_lr * np.outer(x_i, error) 482 483 # Regularization terms (more efficient indexing) 484 reg_update = np.zeros_like(self.beta_) 485 reg_update[direct_indices, :] = ( 486 current_lr * self.lambda1 * self.beta_[direct_indices, :] 487 ) 488 reg_update[hidden_indices, :] = ( 489 current_lr * self.lambda2 * self.beta_[hidden_indices, :] 490 ) 491 492 # Combined update: beta = beta + gradient_update - reg_update 493 self.beta_ += gradient_update - reg_update 494 495 self.coef_ = self.beta_ # sklearn compatibility 496 497 return self
Multitask Ridge classification with 2 regularization parameters
Parameters:
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not
(False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
lambda1: float
regularization parameter on direct link
lambda2: float
regularization parameter on hidden layer
seed: int
reproducibility seed for nodes_sim=='uniform'
backend: str
"cpu" or "gpu" or "tpu"
Attributes:
beta_: {array-like}
regression coefficients
coef_: {array-like}
alias for `beta_`, regression coefficients
Examples:
See also https://github.com/Techtonique/nnetsauce/blob/master/examples/ridgemtask_classification.py
import nnetsauce as ns
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn import metrics
from time import time
breast_cancer = load_breast_cancer()
Z = breast_cancer.data
t = breast_cancer.target
np.random.seed(123)
X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2)
fit_obj = ns.Ridge2MultitaskClassifier(n_hidden_features=int(9.83730469e+01),
dropout=4.31054687e-01,
n_clusters=int(1.71484375e+00),
lambda1=1.24023438e+01, lambda2=7.30263672e+03)
start = time()
fit_obj.fit(X_train, y_train)
print(f"Elapsed {time() - start}")
print(fit_obj.score(X_test, y_test))
print(fit_obj.score(X_test, y_test, scoring="roc_auc"))
start = time()
preds = fit_obj.predict(X_test)
print(f"Elapsed {time() - start}")
print(metrics.classification_report(preds, y_test))
164 def fit(self, X, y, **kwargs): 165 """Fit Ridge model to training data (X, y). 166 167 Args: 168 169 X: {array-like}, shape = [n_samples, n_features] 170 Training vectors, where n_samples is the number 171 of samples and n_features is the number of features. 172 173 y: array-like, shape = [n_samples] 174 Target values. 175 176 **kwargs: additional parameters to be passed to 177 self.cook_training_set or self.obj.fit 178 179 Returns: 180 181 self: object 182 183 """ 184 185 sys_platform = platform.system() 186 187 assert mx.is_factor(y), "y must contain only integers" 188 189 self.classes_ = np.unique(y) # for compatibility with sklearn 190 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 191 192 output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 193 194 n_X, p_X = X.shape 195 n_Z, p_Z = scaled_Z.shape 196 197 self.n_classes = len(np.unique(y)) 198 199 # multitask response 200 Y = mo.one_hot_encode2(output_y, self.n_classes) 201 202 if self.n_clusters > 0: 203 if self.encode_clusters == True: 204 n_features = p_X + self.n_clusters 205 else: 206 n_features = p_X + 1 207 else: 208 n_features = p_X 209 210 X_ = scaled_Z[:, 0:n_features] 211 Phi_X_ = scaled_Z[:, n_features:p_Z] 212 213 B = mo.crossprod(x=X_, backend=self.backend) + self.lambda1 * np.diag( 214 np.repeat(1, X_.shape[1]) 215 ) 216 C = mo.crossprod(x=Phi_X_, y=X_, backend=self.backend) 217 D = mo.crossprod( 218 x=Phi_X_, backend=self.backend 219 ) + self.lambda2 * np.diag(np.repeat(1, Phi_X_.shape[1])) 220 221 if sys_platform in ("Linux", "Darwin"): 222 B_inv = pinv(B) if self.backend == "cpu" else jpinv(B) 223 else: 224 B_inv = pinv(B) 225 226 W = mo.safe_sparse_dot(a=C, b=B_inv, backend=self.backend) 227 S_mat = D - mo.tcrossprod(x=W, y=C, backend=self.backend) 228 229 if sys_platform in ("Linux", "Darwin"): 230 S_inv = pinv(S_mat) if self.backend == "cpu" else jpinv(S_mat) 231 else: 232 S_inv = pinv(S_mat) 233 234 Y2 = mo.safe_sparse_dot(a=S_inv, b=W, backend=self.backend) 235 inv = mo.rbind( 236 mo.cbind( 237 x=B_inv + mo.crossprod(x=W, y=Y2, backend=self.backend), 238 y=-np.transpose(Y2), 239 backend=self.backend, 240 ), 241 mo.cbind(x=-Y2, y=S_inv, backend=self.backend), 242 backend=self.backend, 243 ) 244 245 self.beta_ = mo.safe_sparse_dot( 246 a=inv, 247 b=mo.crossprod(x=scaled_Z, y=Y, backend=self.backend), 248 backend=self.backend, 249 ) 250 self.coef_ = self.beta_ # sklearn compatibility 251 self.classes_ = np.unique(y) 252 self._is_fitted = True 253 return self
Fit Ridge model to training data (X, y).
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
255 def predict(self, X, **kwargs): 256 """Predict test data X. 257 258 Args: 259 260 X: {array-like}, shape = [n_samples, n_features] 261 Training vectors, where n_samples is the number 262 of samples and n_features is the number of features. 263 264 **kwargs: additional parameters to be passed to 265 self.cook_test_set 266 267 Returns: 268 269 model predictions: {array-like} 270 271 """ 272 273 return np.argmax(self.predict_proba(X, **kwargs), axis=1)
Predict test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
275 def predict_proba(self, X, **kwargs): 276 """Predict probabilities for test data X. 277 278 Args: 279 280 X: {array-like}, shape = [n_samples, n_features] 281 Training vectors, where n_samples is the number 282 of samples and n_features is the number of features. 283 284 **kwargs: additional parameters to be passed to 285 self.cook_test_set 286 287 Returns: 288 289 probability estimates for test data: {array-like} 290 291 """ 292 293 if len(X.shape) == 1: 294 n_features = X.shape[0] 295 new_X = mo.rbind( 296 x=X.reshape(1, n_features), 297 y=np.ones(n_features).reshape(1, n_features), 298 backend=self.backend, 299 ) 300 301 Z = self.cook_test_set(new_X, **kwargs) 302 303 else: 304 Z = self.cook_test_set(X, **kwargs) 305 306 ZB = mo.safe_sparse_dot(a=Z, b=self.beta_, backend=self.backend) 307 308 exp_ZB = np.exp(ZB) 309 310 return exp_ZB / exp_ZB.sum(axis=1)[:, None]
Predict probabilities for test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
probability estimates for test data: {array-like}
312 def score(self, X, y, scoring=None): 313 """Scoring function for classification. 314 315 Args: 316 317 X: {array-like}, shape = [n_samples, n_features] 318 Training vectors, where n_samples is the number 319 of samples and n_features is the number of features. 320 321 y: array-like, shape = [n_samples] 322 Target values. 323 324 scoring: str 325 scoring method (default is accuracy) 326 327 Returns: 328 329 score: float 330 """ 331 332 if scoring is None: 333 scoring = "accuracy" 334 335 if scoring == "accuracy": 336 return skm2.accuracy_score(y, self.predict(X)) 337 338 if scoring == "f1": 339 return skm2.f1_score(y, self.predict(X)) 340 341 if scoring == "precision": 342 return skm2.precision_score(y, self.predict(X)) 343 344 if scoring == "recall": 345 return skm2.recall_score(y, self.predict(X)) 346 347 if scoring == "roc_auc": 348 return skm2.roc_auc_score(y, self.predict(X)) 349 350 if scoring == "log_loss": 351 return skm2.log_loss(y, self.predict_proba(X)) 352 353 if scoring == "balanced_accuracy": 354 return skm2.balanced_accuracy_score(y, self.predict(X)) 355 356 if scoring == "average_precision": 357 return skm2.average_precision_score(y, self.predict(X)) 358 359 if scoring == "neg_brier_score": 360 return -skm2.brier_score_loss(y, self.predict_proba(X)) 361 362 if scoring == "neg_log_loss": 363 return -skm2.log_loss(y, self.predict_proba(X))
Scoring function for classification.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
scoring: str
scoring method (default is accuracy)
Returns:
score: float
15class Ridge2Forecaster: 16 """Vectorized Ridge2 RVFL for multivariate time series forecasting. 17 18 Parameters 19 ---------- 20 lags : int, optional 21 Number of lags to use for feature engineering, by default 1 22 nb_hidden : int, optional 23 Number of hidden units, by default 5 24 activ : str, optional 25 Activation function, by default 'relu' 26 lambda_1 : float, optional 27 Ridge regularization parameter for input features, by default 0.1 28 lambda_2 : float, optional 29 Ridge regularization parameter for hidden units, by default 0.1 30 nodes_sim : str, optional 31 Type of quasi-random sequence for weight initialization, by default 'sobol' 32 seed : int, optional 33 Random seed for reproducibility, by default 42 34 """ 35 36 def __init__( 37 self, 38 lags=1, 39 nb_hidden=5, 40 activ="relu", 41 lambda_1=0.1, 42 lambda_2=0.1, 43 nodes_sim="sobol", 44 seed=42, 45 ): 46 self.lags = lags 47 self.nb_hidden = nb_hidden 48 self.lambda_1 = lambda_1 49 self.lambda_2 = lambda_2 50 self.nodes_sim = nodes_sim 51 self.seed = seed 52 self.coef_ = None 53 54 # Activation functions 55 activations = { 56 "relu": lambda x: jnp.maximum(0, x), 57 "sigmoid": lambda x: 1 / (1 + jnp.exp(-x)), 58 "tanh": jnp.tanh, 59 "linear": lambda x: x, 60 } 61 self.activation = jax.jit(activations[activ]) 62 63 def _create_lags(self, y): 64 """Create lagged feature matrix (vectorized).""" 65 n, p = y.shape 66 X = jnp.concatenate( 67 [y[self.lags - i - 1: n - i - 1] for i in range(self.lags)], axis=1 68 ) 69 Y = y[self.lags:] 70 return X, Y 71 72 def _init_weights(self, n_features): 73 """Initialize hidden layer weights using quasi-random sequences.""" 74 total_dim = n_features * self.nb_hidden 75 76 if self.nodes_sim == "sobol": 77 sampler = qmc.Sobol(d=total_dim, scramble=False, seed=self.seed) 78 W = sampler.random(1).reshape(n_features, self.nb_hidden) 79 W = 2 * W - 1 80 else: 81 key = jax.random.PRNGKey(self.seed) 82 W = jax.random.uniform( 83 key, (n_features, self.nb_hidden), minval=-1, maxval=1 84 ) 85 86 return jnp.array(W) 87 88 @partial(jax.jit, static_argnums=(0,)) 89 def _compute_hidden(self, X, W): 90 """Compute hidden layer features (vectorized).""" 91 return self.activation(X @ W) 92 93 @partial(jax.jit, static_argnums=(0,)) 94 def _solve_ridge2(self, X, H, Y): 95 """Solve ridge regression with dual regularization.""" 96 n, p_x = X.shape 97 _, p_h = H.shape 98 99 Y_mean = jnp.mean(Y, axis=0) 100 Y_c = Y - Y_mean 101 102 X_mean = jnp.mean(X, axis=0) 103 X_std = jnp.std(X, axis=0) 104 X_std = jnp.where(X_std == 0, 1.0, X_std) 105 X_s = (X - X_mean) / X_std 106 107 H_mean = jnp.mean(H, axis=0) 108 H_std = jnp.std(H, axis=0) 109 H_std = jnp.where(H_std == 0, 1.0, H_std) 110 H_s = (H - H_mean) / H_std 111 112 XX = X_s.T @ X_s + self.lambda_1 * jnp.eye(p_x) 113 XH = X_s.T @ H_s 114 HH = H_s.T @ H_s + self.lambda_2 * jnp.eye(p_h) 115 116 XX_inv = jnp.linalg.inv(XX) 117 S = HH - XH.T @ XX_inv @ XH 118 S_inv = jnp.linalg.inv(S) 119 120 XY = X_s.T @ Y_c 121 HY = H_s.T @ Y_c 122 123 beta = XX_inv @ (XY - XH @ S_inv @ (HY - XH.T @ XX_inv @ XY)) 124 gamma = S_inv @ (HY - XH.T @ beta) 125 self.coef_ = jnp.concatenate([beta, gamma], axis=1) 126 127 return beta, gamma, Y_mean, X_mean, X_std, H_mean, H_std 128 129 def fit(self, y): 130 """Fit the Ridge2 model. 131 132 Parameters 133 ---------- 134 y : array-like of shape (n_samples,) 135 Target values. 136 """ 137 y = jnp.array(y) 138 if y.ndim == 1: 139 y = y[:, None] 140 141 X, Y = self._create_lags(y) 142 self.n_series = Y.shape[1] 143 144 self.W = self._init_weights(X.shape[1]) 145 H = self._compute_hidden(X, self.W) 146 147 ( 148 self.beta, 149 self.gamma, 150 self.Y_mean, 151 self.X_mean, 152 self.X_std, 153 self.H_mean, 154 self.H_std, 155 ) = self._solve_ridge2(X, H, Y) 156 157 # Compute residuals for prediction intervals 158 X_s = (X - self.X_mean) / self.X_std 159 H_s = (H - self.H_mean) / self.H_std 160 fitted = X_s @ self.beta + H_s @ self.gamma + self.Y_mean 161 self.residuals = np.array(Y - fitted) 162 163 self.last_obs = y[-self.lags:] 164 return self 165 166 @partial(jax.jit, static_argnums=(0,)) 167 def _predict_step(self, x_new): 168 """Single prediction step (JIT-compiled). 169 170 Parameters 171 ---------- 172 x_new : array-like of shape (n_features,) 173 New input data. 174 175 Returns 176 ------- 177 y_next : float 178 Next-step prediction. 179 """ 180 x_s = (x_new - self.X_mean) / self.X_std 181 h = self.activation(x_s @ self.W) 182 h_s = (h - self.H_mean) / self.H_std 183 return x_s @ self.beta + h_s @ self.gamma + self.Y_mean 184 185 def _forecast(self, h=5): 186 """Generate h-step ahead recursive forecasts. 187 188 Parameters 189 ---------- 190 h : int, optional 191 Number of steps to forecast, by default 5 192 193 Returns 194 ------- 195 forecasts : array-like of shape (h,) 196 Forecasted values. 197 """ 198 forecasts = [] 199 current = self.last_obs.copy() 200 201 for _ in range(h): 202 x_new = current.flatten()[None, :] 203 y_next = self._predict_step(x_new)[0] 204 forecasts.append(y_next) 205 current = jnp.vstack([current[1:], y_next]) 206 207 return jnp.array(forecasts) 208 209 def predict(self, h=5, level=None, method="gaussian", B=100): 210 """Generate prediction intervals with proper uncertainty propagation. 211 212 Parameters 213 ---------- 214 h : int, optional 215 Number of steps to forecast, by default 5 216 level : float, optional 217 Confidence level for prediction intervals, by default None 218 method : str, optional 219 Method for prediction intervals ('gaussian' or 'bootstrap'), by default 'gaussian' 220 B : int, optional 221 Number of bootstrap samples, by default 100 222 223 Returns 224 ------- 225 point_forecast : array-like of shape (h,) 226 Point forecasted values. 227 lower : array-like of shape (h,) 228 Lower bounds of prediction intervals. 229 upper : array-like of shape (h,) 230 Upper bounds of prediction intervals. 231 """ 232 233 point_forecast = self._forecast(h) 234 235 if level is None: 236 return point_forecast 237 238 # probabilistic prediction intervals 239 if method == "gaussian": 240 # Use residual std with horizon-dependent scaling 241 residual_std = np.std(self.residuals, axis=0) 242 z = norm.ppf(1 - (1 - level / 100) / 2) 243 244 # Scale uncertainty by sqrt(h) for each horizon 245 horizon_scale = np.sqrt(np.arange(1, h + 1))[:, None] 246 std_expanded = residual_std * horizon_scale 247 248 lower = point_forecast - z * std_expanded 249 upper = point_forecast + z * std_expanded 250 251 elif method == "bootstrap": 252 # Proper residual bootstrap 253 key = jax.random.PRNGKey(self.seed) 254 n_residuals = len(self.residuals) 255 sims = [] 256 257 for _ in range(B): 258 key, subkey = jax.random.split(key) 259 boot_indices = np.random.choice( 260 n_residuals, size=h, replace=True 261 ) 262 boot_resids = self.residuals[boot_indices] 263 264 current = self.last_obs.copy() 265 path = [] 266 267 for t in range(h): 268 x_new = current.flatten()[None, :] 269 y_pred = self._predict_step(x_new)[0] 270 y_sim = y_pred + boot_resids[t] 271 path.append(y_sim) 272 current = jnp.vstack([current[1:], y_sim]) 273 274 sims.append(jnp.array(path)) 275 276 sims = jnp.array(sims) 277 lower = jnp.percentile(sims, (100 - level) / 2, axis=0) 278 upper = jnp.percentile(sims, 100 - (100 - level) / 2, axis=0) 279 280 return { 281 "mean": np.array(point_forecast), 282 "lower": np.array(lower), 283 "upper": np.array(upper), 284 }
Vectorized Ridge2 RVFL for multivariate time series forecasting.
Parameters
lags : int, optional Number of lags to use for feature engineering, by default 1 nb_hidden : int, optional Number of hidden units, by default 5 activ : str, optional Activation function, by default 'relu' lambda_1 : float, optional Ridge regularization parameter for input features, by default 0.1 lambda_2 : float, optional Ridge regularization parameter for hidden units, by default 0.1 nodes_sim : str, optional Type of quasi-random sequence for weight initialization, by default 'sobol' seed : int, optional Random seed for reproducibility, by default 42
129 def fit(self, y): 130 """Fit the Ridge2 model. 131 132 Parameters 133 ---------- 134 y : array-like of shape (n_samples,) 135 Target values. 136 """ 137 y = jnp.array(y) 138 if y.ndim == 1: 139 y = y[:, None] 140 141 X, Y = self._create_lags(y) 142 self.n_series = Y.shape[1] 143 144 self.W = self._init_weights(X.shape[1]) 145 H = self._compute_hidden(X, self.W) 146 147 ( 148 self.beta, 149 self.gamma, 150 self.Y_mean, 151 self.X_mean, 152 self.X_std, 153 self.H_mean, 154 self.H_std, 155 ) = self._solve_ridge2(X, H, Y) 156 157 # Compute residuals for prediction intervals 158 X_s = (X - self.X_mean) / self.X_std 159 H_s = (H - self.H_mean) / self.H_std 160 fitted = X_s @ self.beta + H_s @ self.gamma + self.Y_mean 161 self.residuals = np.array(Y - fitted) 162 163 self.last_obs = y[-self.lags:] 164 return self
Fit the Ridge2 model.
Parameters
y : array-like of shape (n_samples,) Target values.
209 def predict(self, h=5, level=None, method="gaussian", B=100): 210 """Generate prediction intervals with proper uncertainty propagation. 211 212 Parameters 213 ---------- 214 h : int, optional 215 Number of steps to forecast, by default 5 216 level : float, optional 217 Confidence level for prediction intervals, by default None 218 method : str, optional 219 Method for prediction intervals ('gaussian' or 'bootstrap'), by default 'gaussian' 220 B : int, optional 221 Number of bootstrap samples, by default 100 222 223 Returns 224 ------- 225 point_forecast : array-like of shape (h,) 226 Point forecasted values. 227 lower : array-like of shape (h,) 228 Lower bounds of prediction intervals. 229 upper : array-like of shape (h,) 230 Upper bounds of prediction intervals. 231 """ 232 233 point_forecast = self._forecast(h) 234 235 if level is None: 236 return point_forecast 237 238 # probabilistic prediction intervals 239 if method == "gaussian": 240 # Use residual std with horizon-dependent scaling 241 residual_std = np.std(self.residuals, axis=0) 242 z = norm.ppf(1 - (1 - level / 100) / 2) 243 244 # Scale uncertainty by sqrt(h) for each horizon 245 horizon_scale = np.sqrt(np.arange(1, h + 1))[:, None] 246 std_expanded = residual_std * horizon_scale 247 248 lower = point_forecast - z * std_expanded 249 upper = point_forecast + z * std_expanded 250 251 elif method == "bootstrap": 252 # Proper residual bootstrap 253 key = jax.random.PRNGKey(self.seed) 254 n_residuals = len(self.residuals) 255 sims = [] 256 257 for _ in range(B): 258 key, subkey = jax.random.split(key) 259 boot_indices = np.random.choice( 260 n_residuals, size=h, replace=True 261 ) 262 boot_resids = self.residuals[boot_indices] 263 264 current = self.last_obs.copy() 265 path = [] 266 267 for t in range(h): 268 x_new = current.flatten()[None, :] 269 y_pred = self._predict_step(x_new)[0] 270 y_sim = y_pred + boot_resids[t] 271 path.append(y_sim) 272 current = jnp.vstack([current[1:], y_sim]) 273 274 sims.append(jnp.array(path)) 275 276 sims = jnp.array(sims) 277 lower = jnp.percentile(sims, (100 - level) / 2, axis=0) 278 upper = jnp.percentile(sims, 100 - (100 - level) / 2, axis=0) 279 280 return { 281 "mean": np.array(point_forecast), 282 "lower": np.array(lower), 283 "upper": np.array(upper), 284 }
Generate prediction intervals with proper uncertainty propagation.
Parameters
h : int, optional Number of steps to forecast, by default 5 level : float, optional Confidence level for prediction intervals, by default None method : str, optional Method for prediction intervals ('gaussian' or 'bootstrap'), by default 'gaussian' B : int, optional Number of bootstrap samples, by default 100
Returns
point_forecast : array-like of shape (h,) Point forecasted values. lower : array-like of shape (h,) Lower bounds of prediction intervals. upper : array-like of shape (h,) Upper bounds of prediction intervals.
6class SubSampler: 7 """Subsampling class. 8 9 Attributes: 10 11 y: array-like, shape = [n_samples] 12 Target values. 13 14 row_sample: double 15 subsampling fraction 16 17 n_samples: int 18 subsampling by using the number of rows (supersedes row_sample) 19 20 seed: int 21 reproductibility seed 22 23 n_jobs: int 24 number of jobs to run in parallel 25 26 verbose: bool 27 print progress messages and bars 28 """ 29 30 def __init__( 31 self, 32 y, 33 row_sample=0.8, 34 n_samples=None, 35 seed=123, 36 n_jobs=None, 37 verbose=False, 38 ): 39 self.y = y 40 self.n_samples = n_samples 41 if self.n_samples is None: 42 assert ( 43 row_sample < 1 and row_sample >= 0 44 ), "'row_sample' must be provided, plus < 1 and >= 0" 45 self.row_sample = row_sample 46 else: 47 assert self.n_samples < len(y), "'n_samples' must be < len(y)" 48 self.row_sample = self.n_samples / len(y) 49 self.seed = seed 50 self.indices = None 51 self.n_jobs = n_jobs 52 self.verbose = verbose 53 54 def subsample(self): 55 """Returns indices of subsampled input data. 56 57 Examples: 58 59 <ul> 60 <li> <a href="https://github.com/Techtonique/nnetsauce/blob/master/nnetsauce/demo/thierrymoudiki_20240105_subsampling.ipynb">20240105_subsampling.ipynb</a> </li> 61 <li> <a href="https://github.com/Techtonique/nnetsauce/blob/master/nnetsauce/demo/thierrymoudiki_20240131_subsampling_nsamples.ipynb">20240131_subsampling_nsamples.ipynb</a> </li> 62 </ul> 63 64 """ 65 self.indices = dosubsample( 66 y=self.y, 67 row_sample=self.row_sample, 68 seed=self.seed, 69 n_jobs=self.n_jobs, 70 verbose=self.verbose, 71 ) 72 return self.indices
Subsampling class.
Attributes:
y: array-like, shape = [n_samples] Target values.
row_sample: double subsampling fraction
n_samples: int subsampling by using the number of rows (supersedes row_sample)
seed: int reproductibility seed
n_jobs: int number of jobs to run in parallel
verbose: bool print progress messages and bars
54 def subsample(self): 55 """Returns indices of subsampled input data. 56 57 Examples: 58 59 <ul> 60 <li> <a href="https://github.com/Techtonique/nnetsauce/blob/master/nnetsauce/demo/thierrymoudiki_20240105_subsampling.ipynb">20240105_subsampling.ipynb</a> </li> 61 <li> <a href="https://github.com/Techtonique/nnetsauce/blob/master/nnetsauce/demo/thierrymoudiki_20240131_subsampling_nsamples.ipynb">20240131_subsampling_nsamples.ipynb</a> </li> 62 </ul> 63 64 """ 65 self.indices = dosubsample( 66 y=self.y, 67 row_sample=self.row_sample, 68 seed=self.seed, 69 n_jobs=self.n_jobs, 70 verbose=self.verbose, 71 ) 72 return self.indices
Returns indices of subsampled input data.
Examples: