nnetsauce
1from .base.base import Base 2from .base.baseRegressor import BaseRegressor 3from .boosting.adaBoostClassifier import AdaBoostClassifier 4from .custom.customClassifier import CustomClassifier 5from .custom.customRegressor import CustomRegressor 6from .custom.customBackpropRegressor import CustomBackPropRegressor 7from .datasets import Downloader 8from .deep.deepClassifier import DeepClassifier 9from .deep.deepRegressor import DeepRegressor 10from .deep.deepMTS import DeepMTS 11from .elasticnet2.enet2 import ElasticNet2Regressor 12from .glm.glmClassifier import GLMClassifier 13from .glm.glmRegressor import GLMRegressor 14from .kernel.kernel import KernelRidge 15from .lazypredict.lazydeepClassifier import LazyDeepClassifier, LazyClassifier 16from .lazypredict.lazydeepRegressor import LazyDeepRegressor, LazyRegressor 17from .lazypredict.lazydeepClassifier import LazyDeepClassifier 18from .lazypredict.lazydeepRegressor import LazyDeepRegressor 19from .lazypredict.lazydeepMTS import LazyDeepMTS, LazyMTS 20from .mts.mts import MTS 21from .mts.mlarch import MLARCH 22from .mts.classical import ClassicalMTS 23from .mts.stackedmts import MTSStacker 24from .mts.multioutputmts import MultiOutputMTS 25from .mts.discretetokenmts import DiscreteTokenMTS 26from .multitask.multitaskClassifier import MultitaskClassifier 27from .multitask.simplemultitaskClassifier import SimpleMultitaskClassifier 28from .neuralnet.neuralnetregression import NeuralNetRegressor 29from .neuralnet.neuralnetclassification import NeuralNetClassifier 30from .optimizers.optimizer import Optimizer 31from .predictioninterval import PredictionInterval 32from .predictionset import PredictionSet 33from .quantile.quantileregression import QuantileRegressor 34from .quantile.quantileclassification import QuantileClassifier 35from .randombag.randomBagClassifier import RandomBagClassifier 36from .randombag.randomBagRegressor import RandomBagRegressor 37from .randomfourier.randomfourier import RandomFourierEstimator 38from .rff.rffridge import ( 39 RandomFourierFeaturesRidge, 40 RandomFourierFeaturesRidgeGCV, 41) 42from .ridge.ridge import RidgeRegressor 43from .ridge2.ridge2Classifier import Ridge2Classifier 44from .ridge2.ridge2Regressor import Ridge2Regressor 45from .ridge2.ridge2MultitaskClassifier import Ridge2MultitaskClassifier 46from .ridge2.ridge2MTSJAX import Ridge2Forecaster 47from .ridge2.ridge2multioutputregressor import Ridge2MultiOutputRegressor 48from .rvfl.bayesianrvflRegressor import BayesianRVFLRegressor 49from .rvfl.bayesianrvfl2Regressor import BayesianRVFL2Regressor 50from .sampling import SubSampler 51from .updater import RegressorUpdater, ClassifierUpdater 52from .votingregressor import MedianVotingRegressor 53 54__all__ = [ 55 "AdaBoostClassifier", 56 "Base", 57 "BaseRegressor", 58 "BayesianRVFLRegressor", 59 "BayesianRVFL2Regressor", 60 "ClassicalMTS", 61 "CustomClassifier", 62 "CustomRegressor", 63 "CustomBackPropRegressor", 64 "DeepClassifier", 65 "DeepRegressor", 66 "DeepMTS", 67 "DiscreteTokenMTS", 68 "Downloader", 69 "ElasticNet2Regressor", 70 "GLMClassifier", 71 "GLMRegressor", 72 "KernelRidge", 73 "LazyClassifier", 74 "LazyRegressor", 75 "LazyDeepClassifier", 76 "LazyDeepRegressor", 77 "LazyMTS", 78 "LazyDeepMTS", 79 "MLARCH", 80 "MedianVotingRegressor", 81 "MTS", 82 "MTSStacker", 83 "MultiOutputMTS", 84 "MultitaskClassifier", 85 "NeuralNetRegressor", 86 "NeuralNetClassifier", 87 "PredictionInterval", 88 "PredictionSet", 89 "SimpleMultitaskClassifier", 90 "Optimizer", 91 "QuantileRegressor", 92 "QuantileClassifier", 93 "RandomBagRegressor", 94 "RandomBagClassifier", 95 "RandomFourierEstimator", 96 "RandomFourierFeaturesRidge", 97 "RandomFourierFeaturesRidgeGCV", 98 "RegressorUpdater", 99 "ClassifierUpdater", 100 "RidgeRegressor", 101 "Ridge2Regressor", 102 "Ridge2MultiOutputRegressor", 103 "Ridge2Classifier", 104 "Ridge2MultitaskClassifier", 105 "Ridge2Forecaster", 106 "SubSampler", 107]
21class AdaBoostClassifier(Boosting, ClassifierMixin): 22 """AdaBoost Classification (SAMME) model class derived from class Boosting 23 24 Parameters: 25 26 obj: object 27 any object containing a method fit (obj.fit()) and a method predict 28 (obj.predict()) 29 30 n_estimators: int 31 number of boosting iterations 32 33 learning_rate: float 34 learning rate of the boosting procedure 35 36 n_hidden_features: int 37 number of nodes in the hidden layer 38 39 reg_lambda: float 40 regularization parameter for weights 41 42 reg_alpha: float 43 controls compromize between l1 and l2 norm of weights 44 45 activation_name: str 46 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 47 48 a: float 49 hyperparameter for 'prelu' or 'elu' activation function 50 51 nodes_sim: str 52 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 53 'uniform' 54 55 bias: boolean 56 indicates if the hidden layer contains a bias term (True) or not 57 (False) 58 59 dropout: float 60 regularization parameter; (random) percentage of nodes dropped out 61 of the training 62 63 direct_link: boolean 64 indicates if the original predictors are included (True) in model's 65 fitting or not (False) 66 67 n_clusters: int 68 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: 69 no clustering) 70 71 cluster_encode: bool 72 defines how the variable containing clusters is treated (default is one-hot) 73 if `False`, then labels are used, without one-hot encoding 74 75 type_clust: str 76 type of clustering method: currently k-means ('kmeans') or Gaussian 77 Mixture Model ('gmm') 78 79 type_scaling: a tuple of 3 strings 80 scaling methods for inputs, hidden layer, and clustering respectively 81 (and when relevant). 82 Currently available: standardization ('std') or MinMax scaling ('minmax') 83 84 col_sample: float 85 percentage of covariates randomly chosen for training 86 87 row_sample: float 88 percentage of rows chosen for training, by stratified bootstrapping 89 90 seed: int 91 reproducibility seed for nodes_sim=='uniform' 92 93 verbose: int 94 0 for no output, 1 for a progress bar (default is 1) 95 96 method: str 97 type of Adaboost method, 'SAMME' (discrete) or 'SAMME.R' (real) 98 99 backend: str 100 "cpu" or "gpu" or "tpu" 101 102 Attributes: 103 104 alpha_: list 105 AdaBoost coefficients alpha_m 106 107 base_learners_: dict 108 a dictionary containing the base learners 109 110 Examples: 111 112 See also [https://github.com/Techtonique/nnetsauce/blob/master/examples/adaboost_classification.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/adaboost_classification.py) 113 114 ```python 115 import nnetsauce as ns 116 import numpy as np 117 from sklearn.datasets import load_breast_cancer 118 from sklearn.linear_model import LogisticRegression 119 from sklearn.model_selection import train_test_split 120 from sklearn import metrics 121 from time import time 122 123 breast_cancer = load_breast_cancer() 124 Z = breast_cancer.data 125 t = breast_cancer.target 126 np.random.seed(123) 127 X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2) 128 129 # SAMME.R 130 clf = LogisticRegression(solver='liblinear', multi_class = 'ovr', 131 random_state=123) 132 fit_obj = ns.AdaBoostClassifier(clf, 133 n_hidden_features=int(11.22338867), 134 direct_link=True, 135 n_estimators=250, learning_rate=0.01126343, 136 col_sample=0.72684326, row_sample=0.86429443, 137 dropout=0.63078613, n_clusters=2, 138 type_clust="gmm", 139 verbose=1, seed = 123, 140 method="SAMME.R") 141 142 start = time() 143 fit_obj.fit(X_train, y_train) 144 print(f"Elapsed {time() - start}") 145 146 start = time() 147 print(fit_obj.score(X_test, y_test)) 148 print(f"Elapsed {time() - start}") 149 150 preds = fit_obj.predict(X_test) 151 152 print(metrics.classification_report(preds, y_test)) 153 154 ``` 155 156 """ 157 158 # construct the object ----- 159 _estimator_type = "classifier" 160 161 def __init__( 162 self, 163 obj, 164 n_estimators=10, 165 learning_rate=0.1, 166 n_hidden_features=1, 167 reg_lambda=0, 168 reg_alpha=0.5, 169 activation_name="relu", 170 a=0.01, 171 nodes_sim="sobol", 172 bias=True, 173 dropout=0, 174 direct_link=False, 175 n_clusters=2, 176 cluster_encode=True, 177 type_clust="kmeans", 178 type_scaling=("std", "std", "std"), 179 col_sample=1, 180 row_sample=1, 181 seed=123, 182 verbose=1, 183 method="SAMME", 184 backend="cpu", 185 ): 186 self.type_fit = "classification" 187 self.verbose = verbose 188 self.method = method 189 self.reg_lambda = reg_lambda 190 self.reg_alpha = reg_alpha 191 192 super().__init__( 193 obj=obj, 194 n_estimators=n_estimators, 195 learning_rate=learning_rate, 196 n_hidden_features=n_hidden_features, 197 activation_name=activation_name, 198 a=a, 199 nodes_sim=nodes_sim, 200 bias=bias, 201 dropout=dropout, 202 direct_link=direct_link, 203 n_clusters=n_clusters, 204 cluster_encode=cluster_encode, 205 type_clust=type_clust, 206 type_scaling=type_scaling, 207 col_sample=col_sample, 208 row_sample=row_sample, 209 seed=seed, 210 backend=backend, 211 ) 212 213 self.alpha_ = [] 214 self.base_learners_ = dict.fromkeys(range(n_estimators)) 215 216 def fit(self, X, y, sample_weight=None, **kwargs): 217 """Fit Boosting model to training data (X, y). 218 219 Parameters: 220 221 X: {array-like}, shape = [n_samples, n_features] 222 Training vectors, where n_samples is the number 223 of samples and n_features is the number of features. 224 225 y: array-like, shape = [n_samples] 226 Target values. 227 228 **kwargs: additional parameters to be passed to 229 self.cook_training_set or self.obj.fit 230 231 Returns: 232 233 self: object 234 """ 235 236 assert mx.is_factor(y), "y must contain only integers" 237 238 assert self.method in ( 239 "SAMME", 240 "SAMME.R", 241 ), "`method` must be either 'SAMME' or 'SAMME.R'" 242 243 assert (self.reg_lambda <= 1) & ( 244 self.reg_lambda >= 0 245 ), "must have self.reg_lambda <= 1 & self.reg_lambda >= 0" 246 247 assert (self.reg_alpha <= 1) & ( 248 self.reg_alpha >= 0 249 ), "must have self.reg_alpha <= 1 & self.reg_alpha >= 0" 250 251 # training 252 n, p = X.shape 253 self.n_classes = len(np.unique(y)) 254 self.classes_ = np.unique(y) # for compatibility with sklearn 255 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 256 257 if sample_weight is None: 258 w_m = np.repeat(1.0 / n, n) 259 else: 260 w_m = np.asarray(sample_weight) 261 262 base_learner = CustomClassifier( 263 self.obj, 264 n_hidden_features=self.n_hidden_features, 265 activation_name=self.activation_name, 266 a=self.a, 267 nodes_sim=self.nodes_sim, 268 bias=self.bias, 269 dropout=self.dropout, 270 direct_link=self.direct_link, 271 n_clusters=self.n_clusters, 272 type_clust=self.type_clust, 273 type_scaling=self.type_scaling, 274 col_sample=self.col_sample, 275 row_sample=self.row_sample, 276 seed=self.seed, 277 ) 278 279 if self.verbose == 1: 280 pbar = Progbar(self.n_estimators) 281 282 if self.method == "SAMME": 283 err_m = 1e6 284 err_bound = 1 - 1 / self.n_classes 285 self.alpha_.append(1.0) 286 x_range_n = range(n) 287 288 for m in range(self.n_estimators): 289 preds = base_learner.fit( 290 X, y, sample_weight=w_m.ravel(), **kwargs 291 ).predict(X) 292 293 self.base_learners_.update({m: deepcopy(base_learner)}) 294 295 cond = [y[i] != preds[i] for i in x_range_n] 296 297 err_m = max( 298 sum([elt[0] * elt[1] for elt in zip(cond, w_m)]), 299 2.220446049250313e-16, 300 ) # sum(w_m) == 1 301 302 if self.reg_lambda > 0: 303 err_m += self.reg_lambda * ( 304 (1 - self.reg_alpha) * 0.5 * sum([x**2 for x in w_m]) 305 + self.reg_alpha * sum([abs(x) for x in w_m]) 306 ) 307 308 err_m = min(err_m, err_bound) 309 310 alpha_m = self.learning_rate * log( 311 (self.n_classes - 1) * (1 - err_m) / err_m 312 ) 313 314 self.alpha_.append(alpha_m) 315 316 w_m_temp = [exp(alpha_m * cond[i]) for i in x_range_n] 317 318 sum_w_m = sum(w_m_temp) 319 320 w_m = np.asarray([w_m_temp[i] / sum_w_m for i in x_range_n]) 321 322 base_learner.set_params(seed=self.seed + (m + 1) * 1000) 323 324 if self.verbose == 1: 325 pbar.update(m) 326 327 if self.verbose == 1: 328 pbar.update(self.n_estimators) 329 330 self.n_estimators = len(self.base_learners_) 331 self.classes_ = np.unique(y) 332 333 return self 334 335 if self.method == "SAMME.R": 336 Y = mo.one_hot_encode2(y, self.n_classes) 337 338 if sample_weight is None: 339 w_m = np.repeat(1.0 / n, n) # (N, 1) 340 341 else: 342 w_m = np.asarray(sample_weight) 343 344 for m in range(self.n_estimators): 345 probs = base_learner.fit( 346 X, y, sample_weight=w_m.ravel(), **kwargs 347 ).predict_proba(X) 348 349 np.clip( 350 a=probs, a_min=2.220446049250313e-16, a_max=1.0, out=probs 351 ) 352 353 self.base_learners_.update({m: deepcopy(base_learner)}) 354 355 w_m *= np.exp( 356 -1.0 357 * self.learning_rate 358 * (1.0 - 1.0 / self.n_classes) 359 * xlogy(Y, probs).sum(axis=1) 360 ) 361 362 w_m /= np.sum(w_m) 363 364 base_learner.set_params(seed=self.seed + (m + 1) * 1000) 365 366 if self.verbose == 1: 367 pbar.update(m) 368 369 if self.verbose == 1: 370 pbar.update(self.n_estimators) 371 372 self.n_estimators = len(self.base_learners_) 373 self.classes_ = np.unique(y) 374 375 return self 376 377 def predict(self, X, **kwargs): 378 """Predict test data X. 379 380 Parameters: 381 382 X: {array-like}, shape = [n_samples, n_features] 383 Training vectors, where n_samples is the number 384 of samples and n_features is the number of features. 385 386 **kwargs: additional parameters to be passed to 387 self.cook_test_set 388 389 Returns: 390 391 model predictions: {array-like} 392 """ 393 return self.predict_proba(X, **kwargs).argmax(axis=1) 394 395 def predict_proba(self, X, **kwargs): 396 """Predict probabilities for test data X. 397 398 Parameters: 399 400 X: {array-like}, shape = [n_samples, n_features] 401 Training vectors, where n_samples is the number 402 of samples and n_features is the number of features. 403 404 **kwargs: additional parameters to be passed to 405 self.cook_test_set 406 407 Returns: 408 409 probability estimates for test data: {array-like} 410 411 """ 412 413 n_iter = len(self.base_learners_) 414 415 if self.method == "SAMME": 416 ensemble_learner = np.zeros((X.shape[0], self.n_classes)) 417 418 # if self.verbose == 1: 419 # pbar = Progbar(n_iter) 420 421 for idx, base_learner in self.base_learners_.items(): 422 preds = base_learner.predict(X, **kwargs) 423 424 ensemble_learner += self.alpha_[idx] * mo.one_hot_encode2( 425 preds, self.n_classes 426 ) 427 428 # if self.verbose == 1: 429 # pbar.update(idx) 430 431 # if self.verbose == 1: 432 # pbar.update(n_iter) 433 434 expit_ensemble_learner = expit(ensemble_learner) 435 436 sum_ensemble = expit_ensemble_learner.sum(axis=1) 437 438 return expit_ensemble_learner / sum_ensemble[:, None] 439 440 # if self.method == "SAMME.R": 441 ensemble_learner = 0 442 443 # if self.verbose == 1: 444 # pbar = Progbar(n_iter) 445 446 for idx, base_learner in self.base_learners_.items(): 447 probs = base_learner.predict_proba(X, **kwargs) 448 449 np.clip(a=probs, a_min=2.220446049250313e-16, a_max=1.0, out=probs) 450 451 log_preds_proba = np.log(probs) 452 453 ensemble_learner += ( 454 log_preds_proba - log_preds_proba.mean(axis=1)[:, None] 455 ) 456 457 # if self.verbose == 1: 458 # pbar.update(idx) 459 460 ensemble_learner *= self.n_classes - 1 461 462 # if self.verbose == 1: 463 # pbar.update(n_iter) 464 465 expit_ensemble_learner = expit(ensemble_learner) 466 467 sum_ensemble = expit_ensemble_learner.sum(axis=1) 468 469 return expit_ensemble_learner / sum_ensemble[:, None] 470 471 @property 472 def _estimator_type(self): 473 return "classifier"
AdaBoost Classification (SAMME) model class derived from class Boosting
Parameters:
obj: object
any object containing a method fit (obj.fit()) and a method predict
(obj.predict())
n_estimators: int
number of boosting iterations
learning_rate: float
learning rate of the boosting procedure
n_hidden_features: int
number of nodes in the hidden layer
reg_lambda: float
regularization parameter for weights
reg_alpha: float
controls compromize between l1 and l2 norm of weights
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not
(False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original predictors are included (True) in model's
fitting or not (False)
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
col_sample: float
percentage of covariates randomly chosen for training
row_sample: float
percentage of rows chosen for training, by stratified bootstrapping
seed: int
reproducibility seed for nodes_sim=='uniform'
verbose: int
0 for no output, 1 for a progress bar (default is 1)
method: str
type of Adaboost method, 'SAMME' (discrete) or 'SAMME.R' (real)
backend: str
"cpu" or "gpu" or "tpu"
Attributes:
alpha_: list
AdaBoost coefficients alpha_m
base_learners_: dict
a dictionary containing the base learners
Examples:
See also https://github.com/Techtonique/nnetsauce/blob/master/examples/adaboost_classification.py
import nnetsauce as ns
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics
from time import time
breast_cancer = load_breast_cancer()
Z = breast_cancer.data
t = breast_cancer.target
np.random.seed(123)
X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2)
# SAMME.R
clf = LogisticRegression(solver='liblinear', multi_class = 'ovr',
random_state=123)
fit_obj = ns.AdaBoostClassifier(clf,
n_hidden_features=int(11.22338867),
direct_link=True,
n_estimators=250, learning_rate=0.01126343,
col_sample=0.72684326, row_sample=0.86429443,
dropout=0.63078613, n_clusters=2,
type_clust="gmm",
verbose=1, seed = 123,
method="SAMME.R")
start = time()
fit_obj.fit(X_train, y_train)
print(f"Elapsed {time() - start}")
start = time()
print(fit_obj.score(X_test, y_test))
print(f"Elapsed {time() - start}")
preds = fit_obj.predict(X_test)
print(metrics.classification_report(preds, y_test))
216 def fit(self, X, y, sample_weight=None, **kwargs): 217 """Fit Boosting model to training data (X, y). 218 219 Parameters: 220 221 X: {array-like}, shape = [n_samples, n_features] 222 Training vectors, where n_samples is the number 223 of samples and n_features is the number of features. 224 225 y: array-like, shape = [n_samples] 226 Target values. 227 228 **kwargs: additional parameters to be passed to 229 self.cook_training_set or self.obj.fit 230 231 Returns: 232 233 self: object 234 """ 235 236 assert mx.is_factor(y), "y must contain only integers" 237 238 assert self.method in ( 239 "SAMME", 240 "SAMME.R", 241 ), "`method` must be either 'SAMME' or 'SAMME.R'" 242 243 assert (self.reg_lambda <= 1) & ( 244 self.reg_lambda >= 0 245 ), "must have self.reg_lambda <= 1 & self.reg_lambda >= 0" 246 247 assert (self.reg_alpha <= 1) & ( 248 self.reg_alpha >= 0 249 ), "must have self.reg_alpha <= 1 & self.reg_alpha >= 0" 250 251 # training 252 n, p = X.shape 253 self.n_classes = len(np.unique(y)) 254 self.classes_ = np.unique(y) # for compatibility with sklearn 255 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 256 257 if sample_weight is None: 258 w_m = np.repeat(1.0 / n, n) 259 else: 260 w_m = np.asarray(sample_weight) 261 262 base_learner = CustomClassifier( 263 self.obj, 264 n_hidden_features=self.n_hidden_features, 265 activation_name=self.activation_name, 266 a=self.a, 267 nodes_sim=self.nodes_sim, 268 bias=self.bias, 269 dropout=self.dropout, 270 direct_link=self.direct_link, 271 n_clusters=self.n_clusters, 272 type_clust=self.type_clust, 273 type_scaling=self.type_scaling, 274 col_sample=self.col_sample, 275 row_sample=self.row_sample, 276 seed=self.seed, 277 ) 278 279 if self.verbose == 1: 280 pbar = Progbar(self.n_estimators) 281 282 if self.method == "SAMME": 283 err_m = 1e6 284 err_bound = 1 - 1 / self.n_classes 285 self.alpha_.append(1.0) 286 x_range_n = range(n) 287 288 for m in range(self.n_estimators): 289 preds = base_learner.fit( 290 X, y, sample_weight=w_m.ravel(), **kwargs 291 ).predict(X) 292 293 self.base_learners_.update({m: deepcopy(base_learner)}) 294 295 cond = [y[i] != preds[i] for i in x_range_n] 296 297 err_m = max( 298 sum([elt[0] * elt[1] for elt in zip(cond, w_m)]), 299 2.220446049250313e-16, 300 ) # sum(w_m) == 1 301 302 if self.reg_lambda > 0: 303 err_m += self.reg_lambda * ( 304 (1 - self.reg_alpha) * 0.5 * sum([x**2 for x in w_m]) 305 + self.reg_alpha * sum([abs(x) for x in w_m]) 306 ) 307 308 err_m = min(err_m, err_bound) 309 310 alpha_m = self.learning_rate * log( 311 (self.n_classes - 1) * (1 - err_m) / err_m 312 ) 313 314 self.alpha_.append(alpha_m) 315 316 w_m_temp = [exp(alpha_m * cond[i]) for i in x_range_n] 317 318 sum_w_m = sum(w_m_temp) 319 320 w_m = np.asarray([w_m_temp[i] / sum_w_m for i in x_range_n]) 321 322 base_learner.set_params(seed=self.seed + (m + 1) * 1000) 323 324 if self.verbose == 1: 325 pbar.update(m) 326 327 if self.verbose == 1: 328 pbar.update(self.n_estimators) 329 330 self.n_estimators = len(self.base_learners_) 331 self.classes_ = np.unique(y) 332 333 return self 334 335 if self.method == "SAMME.R": 336 Y = mo.one_hot_encode2(y, self.n_classes) 337 338 if sample_weight is None: 339 w_m = np.repeat(1.0 / n, n) # (N, 1) 340 341 else: 342 w_m = np.asarray(sample_weight) 343 344 for m in range(self.n_estimators): 345 probs = base_learner.fit( 346 X, y, sample_weight=w_m.ravel(), **kwargs 347 ).predict_proba(X) 348 349 np.clip( 350 a=probs, a_min=2.220446049250313e-16, a_max=1.0, out=probs 351 ) 352 353 self.base_learners_.update({m: deepcopy(base_learner)}) 354 355 w_m *= np.exp( 356 -1.0 357 * self.learning_rate 358 * (1.0 - 1.0 / self.n_classes) 359 * xlogy(Y, probs).sum(axis=1) 360 ) 361 362 w_m /= np.sum(w_m) 363 364 base_learner.set_params(seed=self.seed + (m + 1) * 1000) 365 366 if self.verbose == 1: 367 pbar.update(m) 368 369 if self.verbose == 1: 370 pbar.update(self.n_estimators) 371 372 self.n_estimators = len(self.base_learners_) 373 self.classes_ = np.unique(y) 374 375 return self
Fit Boosting model to training data (X, y).
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
377 def predict(self, X, **kwargs): 378 """Predict test data X. 379 380 Parameters: 381 382 X: {array-like}, shape = [n_samples, n_features] 383 Training vectors, where n_samples is the number 384 of samples and n_features is the number of features. 385 386 **kwargs: additional parameters to be passed to 387 self.cook_test_set 388 389 Returns: 390 391 model predictions: {array-like} 392 """ 393 return self.predict_proba(X, **kwargs).argmax(axis=1)
Predict test data X.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
395 def predict_proba(self, X, **kwargs): 396 """Predict probabilities for test data X. 397 398 Parameters: 399 400 X: {array-like}, shape = [n_samples, n_features] 401 Training vectors, where n_samples is the number 402 of samples and n_features is the number of features. 403 404 **kwargs: additional parameters to be passed to 405 self.cook_test_set 406 407 Returns: 408 409 probability estimates for test data: {array-like} 410 411 """ 412 413 n_iter = len(self.base_learners_) 414 415 if self.method == "SAMME": 416 ensemble_learner = np.zeros((X.shape[0], self.n_classes)) 417 418 # if self.verbose == 1: 419 # pbar = Progbar(n_iter) 420 421 for idx, base_learner in self.base_learners_.items(): 422 preds = base_learner.predict(X, **kwargs) 423 424 ensemble_learner += self.alpha_[idx] * mo.one_hot_encode2( 425 preds, self.n_classes 426 ) 427 428 # if self.verbose == 1: 429 # pbar.update(idx) 430 431 # if self.verbose == 1: 432 # pbar.update(n_iter) 433 434 expit_ensemble_learner = expit(ensemble_learner) 435 436 sum_ensemble = expit_ensemble_learner.sum(axis=1) 437 438 return expit_ensemble_learner / sum_ensemble[:, None] 439 440 # if self.method == "SAMME.R": 441 ensemble_learner = 0 442 443 # if self.verbose == 1: 444 # pbar = Progbar(n_iter) 445 446 for idx, base_learner in self.base_learners_.items(): 447 probs = base_learner.predict_proba(X, **kwargs) 448 449 np.clip(a=probs, a_min=2.220446049250313e-16, a_max=1.0, out=probs) 450 451 log_preds_proba = np.log(probs) 452 453 ensemble_learner += ( 454 log_preds_proba - log_preds_proba.mean(axis=1)[:, None] 455 ) 456 457 # if self.verbose == 1: 458 # pbar.update(idx) 459 460 ensemble_learner *= self.n_classes - 1 461 462 # if self.verbose == 1: 463 # pbar.update(n_iter) 464 465 expit_ensemble_learner = expit(ensemble_learner) 466 467 sum_ensemble = expit_ensemble_learner.sum(axis=1) 468 469 return expit_ensemble_learner / sum_ensemble[:, None]
Predict probabilities for test data X.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
probability estimates for test data: {array-like}
48class Base(BaseEstimator): 49 """Base model from which all the other classes inherit. 50 51 This class contains the most important data preprocessing/feature engineering methods. 52 53 Parameters: 54 55 n_hidden_features: int 56 number of nodes in the hidden layer 57 58 activation_name: str 59 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 60 61 a: float 62 hyperparameter for 'prelu' or 'elu' activation function 63 64 nodes_sim: str 65 type of simulation for hidden layer nodes: 'sobol', 'hammersley', 'halton', 66 'uniform' 67 68 bias: boolean 69 indicates if the hidden layer contains a bias term (True) or 70 not (False) 71 72 dropout: float 73 regularization parameter; (random) percentage of nodes dropped out 74 of the training 75 76 direct_link: boolean 77 indicates if the original features are included (True) in model's 78 fitting or not (False) 79 80 n_clusters: int 81 number of clusters for type_clust='kmeans' or type_clust='gmm' 82 clustering (could be 0: no clustering) 83 84 cluster_encode: bool 85 defines how the variable containing clusters is treated (default is one-hot); 86 if `False`, then labels are used, without one-hot encoding 87 88 type_clust: str 89 type of clustering method: currently k-means ('kmeans') or Gaussian 90 Mixture Model ('gmm') 91 92 type_scaling: a tuple of 3 strings 93 scaling methods for inputs, hidden layer, and clustering respectively 94 (and when relevant). 95 Currently available: standardization ('std') or MinMax scaling ('minmax') or robust scaling ('robust') or max absolute scaling ('maxabs') 96 97 col_sample: float 98 percentage of features randomly chosen for training 99 100 row_sample: float 101 percentage of rows chosen for training, by stratified bootstrapping 102 103 seed: int 104 reproducibility seed for nodes_sim=='uniform', clustering and dropout 105 106 backend: str 107 "cpu" or "gpu" or "tpu" 108 109 """ 110 111 # construct the object ----- 112 113 def __init__( 114 self, 115 n_hidden_features=5, 116 activation_name="relu", 117 a=0.01, 118 nodes_sim="sobol", 119 bias=True, 120 dropout=0, 121 direct_link=True, 122 n_clusters=2, 123 cluster_encode=True, 124 type_clust="kmeans", 125 type_scaling=("std", "std", "std"), 126 col_sample=1, 127 row_sample=1, 128 seed=123, 129 backend="cpu", 130 ): 131 if not JAX_AVAILABLE and backend != "cpu": 132 raise RuntimeError( 133 "JAX is required for this feature. Install with: pip install yourpackage[jax]" 134 ) 135 136 # input checks ----- 137 138 sys_platform = platform.system() 139 140 if (sys_platform == "Windows") and (backend in ("gpu", "tpu")): 141 warnings.warn( 142 "No GPU/TPU computing on Windows yet, backend set to 'cpu'" 143 ) 144 backend = "cpu" 145 146 assert activation_name in ( 147 "relu", 148 "tanh", 149 "sigmoid", 150 "prelu", 151 "elu", 152 ), "'activation_name' must be in ('relu', 'tanh', 'sigmoid','prelu', 'elu')" 153 154 assert nodes_sim in ( 155 "sobol", 156 "hammersley", 157 "uniform", 158 "halton", 159 ), "'nodes_sim' must be in ('sobol', 'hammersley', 'uniform', 'halton')" 160 161 assert type_clust in ( 162 "kmeans", 163 "gmm", 164 ), "'type_clust' must be in ('kmeans', 'gmm')" 165 166 assert (len(type_scaling) == 3) & all( 167 type_scaling[i] in ("minmax", "std", "robust", "maxabs") 168 for i in range(len(type_scaling)) 169 ), "'type_scaling' must have length 3, and available scaling methods are 'minmax' scaling, standardization ('std'), robust scaling ('robust') and max absolute ('maxabs')" 170 171 assert (col_sample >= 0) & ( 172 col_sample <= 1 173 ), "'col_sample' must be comprised between 0 and 1 (both included)" 174 175 assert backend in ( 176 "cpu", 177 "gpu", 178 "tpu", 179 ), "must have 'backend' in ('cpu', 'gpu', 'tpu')" 180 181 self.n_hidden_features = n_hidden_features 182 self.activation_name = activation_name 183 self.a = a 184 self.nodes_sim = nodes_sim 185 self.bias = bias 186 self.seed = seed 187 self.backend = backend 188 self.dropout = dropout 189 self.direct_link = direct_link 190 self.cluster_encode = cluster_encode 191 self.type_clust = type_clust 192 self.type_scaling = type_scaling 193 self.col_sample = col_sample 194 self.row_sample = row_sample 195 self.n_clusters = n_clusters 196 if isinstance(self, RegressorMixin): 197 self.type_fit = "regression" 198 elif isinstance(self, ClassifierMixin): 199 self.type_fit = "classification" 200 self.subsampler_ = None 201 self.index_col_ = None 202 self.index_row_ = True 203 self.clustering_obj_ = None 204 self.clustering_scaler_ = None 205 self.nn_scaler_ = None 206 self.scaler_ = None 207 self.encoder_ = None 208 self.W_ = None 209 self.X_ = None 210 self.y_ = None 211 self.y_mean_ = None 212 self.beta_ = None 213 214 # activation function ----- 215 216 activation_options = { 217 "relu": ac.relu if (self.backend == "cpu") else jnn.relu, 218 "tanh": np.tanh if (self.backend == "cpu") else jnp.tanh, 219 "sigmoid": (ac.sigmoid if (self.backend == "cpu") else jnn.sigmoid), 220 "prelu": partial(ac.prelu, a=a), 221 "elu": ( 222 partial(ac.elu, a=a) 223 if (self.backend == "cpu") 224 else partial(jnn.elu, a=a) 225 ), 226 } 227 228 self.activation_func = activation_options[activation_name] 229 230 # "preprocessing" methods to be inherited ----- 231 232 def encode_clusters(self, X=None, predict=False, scaler=None, **kwargs): # 233 """Create new covariates with kmeans or GMM clustering 234 235 Parameters: 236 237 X: {array-like}, shape = [n_samples, n_features] 238 Training vectors, where n_samples is the number 239 of samples and n_features is the number of features. 240 241 predict: boolean 242 is False on training set and True on test set 243 244 scaler: {object} of class StandardScaler, MinMaxScaler, RobustScaler or MaxAbsScaler 245 if scaler has already been fitted on training data (online training), it can be passed here 246 247 **kwargs: 248 additional parameters to be passed to the 249 clustering method 250 251 Returns: 252 253 Clusters' matrix, one-hot encoded: {array-like} 254 255 """ 256 257 np.random.seed(self.seed) 258 259 if X is None: 260 X = self.X_ 261 262 if isinstance(X, pd.DataFrame): 263 X = copy.deepcopy(X.values.astype(float)) 264 265 if len(X.shape) == 1: 266 X = X.reshape(1, -1) 267 268 if predict is False: # encode training set 269 # scale input data before clustering 270 self.clustering_scaler_, scaled_X = mo.scale_covariates( 271 X, choice=self.type_scaling[2], scaler=self.clustering_scaler_ 272 ) 273 274 self.clustering_obj_, X_clustered = mo.cluster_covariates( 275 scaled_X, 276 self.n_clusters, 277 self.seed, 278 type_clust=self.type_clust, 279 **kwargs 280 ) 281 282 if self.cluster_encode: 283 return mo.one_hot_encode(X_clustered, self.n_clusters).astype( 284 np.float16 285 ) 286 287 return X_clustered.astype(np.float16) 288 289 # if predict == True, encode test set 290 X_clustered = self.clustering_obj_.predict( 291 self.clustering_scaler_.transform(X) 292 ) 293 294 if self.cluster_encode == True: 295 return mo.one_hot_encode(X_clustered, self.n_clusters).astype( 296 np.float16 297 ) 298 299 return X_clustered.astype(np.float16) 300 301 def create_layer(self, scaled_X, W=None): 302 """Create hidden layer. 303 304 Parameters: 305 306 scaled_X: {array-like}, shape = [n_samples, n_features] 307 Training vectors, where n_samples is the number 308 of samples and n_features is the number of features 309 310 W: {array-like}, shape = [n_features, hidden_features] 311 if provided, constructs the hidden layer with W; otherwise computed internally 312 313 Returns: 314 315 Hidden layer matrix: {array-like} 316 317 """ 318 319 n_features = scaled_X.shape[1] 320 321 # hash_sim = { 322 # "sobol": generate_sobol, 323 # "hammersley": generate_hammersley, 324 # "uniform": generate_uniform, 325 # "halton": generate_halton 326 # } 327 328 if self.bias is False: # no bias term in the hidden layer 329 if W is None: 330 if self.nodes_sim == "sobol": 331 self.W_ = generate_sobol( 332 n_dims=n_features, 333 n_points=self.n_hidden_features, 334 seed=self.seed, 335 ) 336 elif self.nodes_sim == "hammersley": 337 self.W_ = generate_hammersley( 338 n_dims=n_features, 339 n_points=self.n_hidden_features, 340 seed=self.seed, 341 ) 342 elif self.nodes_sim == "uniform": 343 self.W_ = generate_uniform( 344 n_dims=n_features, 345 n_points=self.n_hidden_features, 346 seed=self.seed, 347 ) 348 else: 349 self.W_ = generate_halton( 350 n_dims=n_features, 351 n_points=self.n_hidden_features, 352 seed=self.seed, 353 ) 354 355 assert ( 356 scaled_X.shape[1] == self.W_.shape[0] 357 ), "check dimensions of covariates X and matrix W" 358 359 return mo.dropout( 360 x=self.activation_func( 361 mo.safe_sparse_dot( 362 a=scaled_X, b=self.W_, backend=self.backend 363 ) 364 ), 365 drop_prob=self.dropout, 366 seed=self.seed, 367 ) 368 369 # W is not none 370 assert ( 371 scaled_X.shape[1] == W.shape[0] 372 ), "check dimensions of covariates X and matrix W" 373 374 # self.W_ = W 375 return mo.dropout( 376 x=self.activation_func( 377 mo.safe_sparse_dot(a=scaled_X, b=W, backend=self.backend) 378 ), 379 drop_prob=self.dropout, 380 seed=self.seed, 381 ) 382 383 # with bias term in the hidden layer 384 if W is None: 385 n_features_1 = n_features + 1 386 387 if self.nodes_sim == "sobol": 388 self.W_ = generate_sobol( 389 n_dims=n_features_1, 390 n_points=self.n_hidden_features, 391 seed=self.seed, 392 ) 393 elif self.nodes_sim == "hammersley": 394 self.W_ = generate_hammersley( 395 n_dims=n_features_1, 396 n_points=self.n_hidden_features, 397 seed=self.seed, 398 ) 399 elif self.nodes_sim == "uniform": 400 self.W_ = generate_uniform( 401 n_dims=n_features_1, 402 n_points=self.n_hidden_features, 403 seed=self.seed, 404 ) 405 else: 406 self.W_ = generate_halton( 407 n_dims=n_features_1, 408 n_points=self.n_hidden_features, 409 seed=self.seed, 410 ) 411 412 # self.W_ = hash_sim[self.nodes_sim]( 413 # n_dims=n_features_1, 414 # n_points=self.n_hidden_features, 415 # seed=self.seed, 416 # ) 417 418 return mo.dropout( 419 x=self.activation_func( 420 mo.safe_sparse_dot( 421 a=mo.cbind( 422 np.ones(scaled_X.shape[0]), 423 scaled_X, 424 backend=self.backend, 425 ), 426 b=self.W_, 427 backend=self.backend, 428 ) 429 ), 430 drop_prob=self.dropout, 431 seed=self.seed, 432 ) 433 434 # W is not None 435 # self.W_ = W 436 return mo.dropout( 437 x=self.activation_func( 438 mo.safe_sparse_dot( 439 a=mo.cbind( 440 np.ones(scaled_X.shape[0]), 441 scaled_X, 442 backend=self.backend, 443 ), 444 b=W, 445 backend=self.backend, 446 ) 447 ), 448 drop_prob=self.dropout, 449 seed=self.seed, 450 ) 451 452 def _jax_create_layer(self, scaled_X, W=None): 453 """JAX-compatible version of create_layer that exactly matches the original functionality.""" 454 key = jax.random.PRNGKey(self.seed) 455 n_features = scaled_X.shape[1] 456 457 # Generate weights if not provided 458 if W is None: 459 if self.bias: 460 n_features_1 = n_features + 1 461 shape = (n_features_1, self.n_hidden_features) 462 else: 463 shape = (n_features, self.n_hidden_features) 464 465 # JAX-compatible weight generation matching original behavior 466 if self.nodes_sim == "sobol": 467 W_np = generate_sobol( 468 n_dims=n_features_1, 469 n_points=self.n_hidden_features, 470 seed=self.seed, 471 ) 472 W = jnp.asarray(W_np) 473 elif self.nodes_sim == "hammersley": 474 W_np = generate_hammersley( 475 n_dims=n_features_1, 476 n_points=self.n_hidden_features, 477 seed=self.seed, 478 ) 479 W = jnp.asarray(W_np) 480 elif self.nodes_sim == "uniform": 481 key, subkey = jax.random.split(key) 482 W = jax.random.uniform( 483 subkey, shape=shape, minval=-1.0, maxval=1.0 484 ) 485 else: # halton 486 W_np = generate_halton( 487 n_dims=n_features_1, 488 n_points=self.n_hidden_features, 489 seed=self.seed, 490 ) 491 W = jnp.asarray(W_np) 492 493 self.W_ = np.array(W) # Store as numpy for original methods 494 495 # Prepare input with bias if needed 496 if self.bias: 497 X_with_bias = jnp.hstack( 498 [jnp.ones((scaled_X.shape[0], 1)), scaled_X] 499 ) 500 print("X_with_bias shape:", X_with_bias.shape) 501 print("W shape:", W.shape) 502 linear_output = jnp.dot(X_with_bias, W) 503 else: 504 linear_output = jnp.dot(scaled_X, W) 505 506 # Apply activation function 507 if self.activation_name == "relu": 508 activated = jax.nn.relu(linear_output) 509 elif self.activation_name == "tanh": 510 activated = jnp.tanh(linear_output) 511 elif self.activation_name == "sigmoid": 512 activated = jax.nn.sigmoid(linear_output) 513 else: # leaky relu 514 activated = jax.nn.leaky_relu(linear_output, negative_slope=self.a) 515 516 # Apply dropout 517 if self.dropout > 0: 518 key, subkey = jax.random.split(key) 519 mask = jax.random.bernoulli( 520 subkey, p=1 - self.dropout, shape=activated.shape 521 ) 522 activated = jnp.where(mask, activated / (1 - self.dropout), 0) 523 524 return activated 525 526 def cook_training_set(self, y=None, X=None, W=None, **kwargs): 527 """Create new hidden features for training set, with hidden layer, center the response. 528 529 Parameters: 530 531 y: array-like, shape = [n_samples] 532 Target values 533 534 X: {array-like}, shape = [n_samples, n_features] 535 Training vectors, where n_samples is the number 536 of samples and n_features is the number of features 537 538 W: {array-like}, shape = [n_features, hidden_features] 539 if provided, constructs the hidden layer via W 540 541 Returns: 542 543 (centered response, direct link + hidden layer matrix): {tuple} 544 545 """ 546 547 # either X and y are stored or not 548 # assert ((y is None) & (X is None)) | ((y is not None) & (X is not None)) 549 if self.n_hidden_features > 0: # has a hidden layer 550 assert ( 551 len(self.type_scaling) >= 2 552 ), "must have len(self.type_scaling) >= 2 when self.n_hidden_features > 0" 553 554 if X is None: 555 if self.col_sample == 1: 556 input_X = self.X_ 557 else: 558 n_features = self.X_.shape[1] 559 new_n_features = int(np.ceil(n_features * self.col_sample)) 560 assert ( 561 new_n_features >= 1 562 ), "check class attribute 'col_sample' and the number of covariates provided for X" 563 np.random.seed(self.seed) 564 index_col = np.random.choice( 565 range(n_features), size=new_n_features, replace=False 566 ) 567 self.index_col_ = index_col 568 input_X = self.X_[:, self.index_col_] 569 570 else: # X is not None # keep X vs self.X_ 571 if isinstance(X, pd.DataFrame): 572 X = copy.deepcopy(X.values.astype(float)) 573 574 if self.col_sample == 1: 575 input_X = X 576 else: 577 n_features = X.shape[1] 578 new_n_features = int(np.ceil(n_features * self.col_sample)) 579 assert ( 580 new_n_features >= 1 581 ), "check class attribute 'col_sample' and the number of covariates provided for X" 582 np.random.seed(self.seed) 583 index_col = np.random.choice( 584 range(n_features), size=new_n_features, replace=False 585 ) 586 self.index_col_ = index_col 587 input_X = X[:, self.index_col_] 588 589 if self.n_clusters <= 0: 590 # data without any clustering: self.n_clusters is None ----- 591 592 if self.n_hidden_features > 0: # with hidden layer 593 self.nn_scaler_, scaled_X = mo.scale_covariates( 594 input_X, choice=self.type_scaling[1], scaler=self.nn_scaler_ 595 ) 596 Phi_X = ( 597 self.create_layer(scaled_X) 598 if W is None 599 else self.create_layer(scaled_X, W=W) 600 ) 601 Z = ( 602 mo.cbind(input_X, Phi_X, backend=self.backend) 603 if self.direct_link is True 604 else Phi_X 605 ) 606 self.scaler_, scaled_Z = mo.scale_covariates( 607 Z, choice=self.type_scaling[0], scaler=self.scaler_ 608 ) 609 else: # no hidden layer 610 Z = input_X 611 self.scaler_, scaled_Z = mo.scale_covariates( 612 Z, choice=self.type_scaling[0], scaler=self.scaler_ 613 ) 614 615 else: 616 # data with clustering: self.n_clusters is not None ----- # keep 617 618 augmented_X = mo.cbind( 619 input_X, 620 self.encode_clusters(input_X, **kwargs), 621 backend=self.backend, 622 ) 623 624 if self.n_hidden_features > 0: # with hidden layer 625 self.nn_scaler_, scaled_X = mo.scale_covariates( 626 augmented_X, 627 choice=self.type_scaling[1], 628 scaler=self.nn_scaler_, 629 ) 630 Phi_X = ( 631 self.create_layer(scaled_X) 632 if W is None 633 else self.create_layer(scaled_X, W=W) 634 ) 635 Z = ( 636 mo.cbind(augmented_X, Phi_X, backend=self.backend) 637 if self.direct_link is True 638 else Phi_X 639 ) 640 self.scaler_, scaled_Z = mo.scale_covariates( 641 Z, choice=self.type_scaling[0], scaler=self.scaler_ 642 ) 643 else: # no hidden layer 644 Z = augmented_X 645 self.scaler_, scaled_Z = mo.scale_covariates( 646 Z, choice=self.type_scaling[0], scaler=self.scaler_ 647 ) 648 649 # Returning model inputs ----- 650 if mx.is_factor(y) is False: # regression 651 # center y 652 if y is None: 653 self.y_mean_, centered_y = mo.center_response(self.y_) 654 else: 655 self.y_mean_, centered_y = mo.center_response(y) 656 657 # y is subsampled 658 if self.row_sample < 1: 659 n, p = Z.shape 660 661 self.subsampler_ = ( 662 SubSampler( 663 y=self.y_, row_sample=self.row_sample, seed=self.seed 664 ) 665 if y is None 666 else SubSampler( 667 y=y, row_sample=self.row_sample, seed=self.seed 668 ) 669 ) 670 671 self.index_row_ = self.subsampler_.subsample() 672 673 n_row_sample = len(self.index_row_) 674 # regression 675 return ( 676 centered_y[self.index_row_].reshape(n_row_sample), 677 self.scaler_.transform( 678 Z[self.index_row_, :].reshape(n_row_sample, p) 679 ), 680 ) 681 # y is not subsampled 682 # regression 683 return (centered_y, self.scaler_.transform(Z)) 684 685 # classification 686 # y is subsampled 687 if self.row_sample < 1: 688 n, p = Z.shape 689 690 self.subsampler_ = ( 691 SubSampler( 692 y=self.y_, row_sample=self.row_sample, seed=self.seed 693 ) 694 if y is None 695 else SubSampler(y=y, row_sample=self.row_sample, seed=self.seed) 696 ) 697 698 self.index_row_ = self.subsampler_.subsample() 699 700 n_row_sample = len(self.index_row_) 701 # classification 702 return ( 703 y[self.index_row_].reshape(n_row_sample), 704 self.scaler_.transform( 705 Z[self.index_row_, :].reshape(n_row_sample, p) 706 ), 707 ) 708 # y is not subsampled 709 # classification 710 return (y, self.scaler_.transform(Z)) 711 712 def cook_test_set(self, X, **kwargs): 713 """Transform data from test set, with hidden layer. 714 715 Parameters: 716 717 X: {array-like}, shape = [n_samples, n_features] 718 Training vectors, where n_samples is the number 719 of samples and n_features is the number of features 720 721 **kwargs: additional parameters to be passed to self.encode_cluster 722 723 Returns: 724 725 Transformed test set : {array-like} 726 """ 727 728 if isinstance(X, pd.DataFrame): 729 X = copy.deepcopy(X.values.astype(float)) 730 731 if len(X.shape) == 1: 732 X = X.reshape(1, -1) 733 734 if ( 735 self.n_clusters == 0 736 ): # data without clustering: self.n_clusters is None ----- 737 if self.n_hidden_features > 0: 738 # if hidden layer 739 scaled_X = ( 740 self.nn_scaler_.transform(X) 741 if (self.col_sample == 1) 742 else self.nn_scaler_.transform(X[:, self.index_col_]) 743 ) 744 Phi_X = self.create_layer(scaled_X, self.W_) 745 if self.direct_link: 746 return self.scaler_.transform( 747 mo.cbind(scaled_X, Phi_X, backend=self.backend) 748 ) 749 # when self.direct_link == False 750 return self.scaler_.transform(Phi_X) 751 # if no hidden layer # self.n_hidden_features == 0 752 return self.scaler_.transform(X) 753 754 # data with clustering: self.n_clusters > 0 ----- 755 if self.col_sample == 1: 756 predicted_clusters = self.encode_clusters( 757 X=X, predict=True, **kwargs 758 ) 759 augmented_X = mo.cbind(X, predicted_clusters, backend=self.backend) 760 else: 761 predicted_clusters = self.encode_clusters( 762 X=X[:, self.index_col_], predict=True, **kwargs 763 ) 764 augmented_X = mo.cbind( 765 X[:, self.index_col_], predicted_clusters, backend=self.backend 766 ) 767 768 if self.n_hidden_features > 0: # if hidden layer 769 scaled_X = self.nn_scaler_.transform(augmented_X) 770 Phi_X = self.create_layer(scaled_X, self.W_) 771 if self.direct_link: 772 return self.scaler_.transform( 773 mo.cbind(augmented_X, Phi_X, backend=self.backend) 774 ) 775 return self.scaler_.transform(Phi_X) 776 777 # if no hidden layer 778 return self.scaler_.transform(augmented_X) 779 780 def cook_training_set_jax(self, y=None, X=None, W=None, **kwargs): 781 """JAX-compatible version of cook_training_set that maintains side effects.""" 782 # Initialize random key 783 key = jax.random.PRNGKey(self.seed) 784 785 # Convert inputs to JAX arrays 786 X = jnp.asarray(X) if X is not None else jnp.asarray(self.X_) 787 y = jnp.asarray(y) if y is not None else jnp.asarray(self.y_) 788 789 # Handle column sampling 790 if self.col_sample < 1: 791 n_features = X.shape[1] 792 new_n_features = int(jnp.ceil(n_features * self.col_sample)) 793 assert new_n_features >= 1, "Invalid col_sample" 794 795 key, subkey = jax.random.split(key) 796 index_col = jax.random.choice( 797 subkey, n_features, shape=(new_n_features,), replace=False 798 ) 799 self.index_col_ = np.array( 800 index_col 801 ) # Store as numpy for original methods 802 input_X = X[:, index_col] 803 n_features = ( 804 new_n_features # Update n_features after column sampling 805 ) 806 else: 807 input_X = X 808 n_features = X.shape[1] 809 810 augmented_X = input_X 811 812 # JAX-compatible scaling 813 def jax_scale(data, mean=None, std=None): 814 if mean is None: 815 mean = jnp.mean(data, axis=0) 816 if std is None: 817 std = jnp.std(data, axis=0) 818 return (data - mean) / (std + 1e-10), mean, std 819 820 # Hidden layer processing 821 if self.n_hidden_features > 0: 822 # Initialize weights if not provided 823 if W is None: 824 shape = (n_features, self.n_hidden_features) 825 826 # JAX-compatible weight generation 827 if self.nodes_sim == "uniform": 828 key, subkey = jax.random.split(key) 829 W = jax.random.uniform( 830 subkey, shape=shape, minval=-1.0, maxval=1.0 831 ) * (1 / jnp.sqrt(n_features)) 832 else: 833 # For other sequences, use numpy generation then convert to JAX 834 if self.nodes_sim == "sobol": 835 W_np = generate_sobol( 836 n_dims=shape[0], 837 n_points=shape[1], 838 seed=self.seed, 839 ) 840 elif self.nodes_sim == "hammersley": 841 W_np = generate_hammersley( 842 n_dims=shape[0], 843 n_points=shape[1], 844 seed=self.seed, 845 ) 846 elif self.nodes_sim == "halton": 847 W_np = generate_halton( 848 n_dims=shape[0], 849 n_points=shape[1], 850 seed=self.seed, 851 ) 852 else: # default to uniform 853 key, subkey = jax.random.split(key) 854 W = jax.random.uniform( 855 subkey, shape=shape, minval=-1.0, maxval=1.0 856 ) * (1 / jnp.sqrt(n_features)) 857 858 if self.nodes_sim in ["sobol", "hammersley", "halton"]: 859 W = jnp.asarray(W_np) * (1 / jnp.sqrt(n_features)) 860 861 self.W_ = np.array(W) # Store as numpy for original methods 862 863 # Scale features 864 scaled_X, self.nn_mean_, self.nn_std_ = jax_scale( 865 augmented_X, 866 getattr(self, "nn_mean_", None), 867 getattr(self, "nn_std_", None), 868 ) 869 870 # Create hidden layer with proper bias handling 871 linear_output = jnp.dot(scaled_X, W) 872 873 # Apply activation 874 if self.activation_name == "relu": 875 Phi_X = jax.nn.relu(linear_output) 876 elif self.activation_name == "tanh": 877 Phi_X = jnp.tanh(linear_output) 878 elif self.activation_name == "sigmoid": 879 Phi_X = jax.nn.sigmoid(linear_output) 880 else: # leaky relu 881 Phi_X = jax.nn.leaky_relu(linear_output, negative_slope=self.a) 882 883 # Apply dropout 884 if self.dropout > 0: 885 key, subkey = jax.random.split(key) 886 mask = jax.random.bernoulli( 887 subkey, p=1 - self.dropout, shape=Phi_X.shape 888 ) 889 Phi_X = jnp.where(mask, Phi_X / (1 - self.dropout), 0) 890 891 Z = jnp.hstack([scaled_X, Phi_X]) if self.direct_link else Phi_X 892 else: 893 Z = augmented_X 894 895 # Final scaling 896 scaled_Z, self.scale_mean_, self.scale_std_ = jax_scale( 897 Z, 898 getattr(self, "scale_mean_", None), 899 getattr(self, "scale_std_", None), 900 ) 901 902 # Center response for regression 903 if not hasattr(mx, "is_factor") or not mx.is_factor( 904 y 905 ): # regression case 906 self.y_mean_ = float( 907 jnp.mean(y) 908 ) # Convert to Python float for compatibility 909 centered_y = y - self.y_mean_ 910 else: 911 centered_y = y 912 913 # Handle row sampling 914 if self.row_sample < 1: 915 key, subkey = jax.random.split(key) 916 n_samples = Z.shape[0] 917 n_row_sample = int(jnp.ceil(n_samples * self.row_sample)) 918 index_row = jax.random.choice( 919 subkey, n_samples, shape=(n_row_sample,), replace=False 920 ) 921 self.index_row_ = np.array( 922 index_row 923 ) # Store as numpy for original methods 924 return (centered_y[index_row], scaled_Z[index_row]) 925 926 return (centered_y, scaled_Z) 927 928 def cook_test_set_jax(self, X, **kwargs): 929 """JAX-compatible test set processing with matching dimension handling.""" 930 X = jnp.asarray(X) 931 932 if len(X.shape) == 1: 933 X = X.reshape(1, -1) 934 935 # Handle column sampling 936 input_X = ( 937 X if self.col_sample == 1 else X[:, jnp.asarray(self.index_col_)] 938 ) 939 940 augmented_X = input_X 941 942 # JAX-compatible scaling 943 scaled_X = (augmented_X - self.nn_mean_) / (self.nn_std_ + 1e-10) 944 945 # Process hidden layer if needed 946 if self.n_hidden_features > 0: 947 Phi_X = self._jax_create_layer(scaled_X, jnp.asarray(self.W_)) 948 Z = jnp.hstack([scaled_X, Phi_X]) if self.direct_link else Phi_X 949 else: 950 Z = augmented_X 951 952 # Final scaling 953 scaled_Z = (Z - self.scale_mean_) / (self.scale_std_ + 1e-10) 954 955 return scaled_Z 956 957 def _jax_create_layer(self, X, W): 958 """JAX-compatible hidden layer creation.""" 959 # print("X", X.shape) 960 # print("W", W.shape) 961 # print("self.W_", self.W_.shape) 962 linear_output = jnp.dot(X, W) 963 964 if self.activation_name == "relu": 965 return jax.nn.relu(linear_output) 966 elif self.activation_name == "tanh": 967 return jnp.tanh(linear_output) 968 elif self.activation_name == "sigmoid": 969 return jax.nn.sigmoid(linear_output) 970 else: # leaky relu 971 return jax.nn.leaky_relu(linear_output, negative_slope=self.a) 972 973 def cross_val_score( 974 self, 975 X, 976 y, 977 cv=5, 978 scoring="accuracy", 979 random_state=42, 980 n_jobs=-1, 981 epsilon=0.5, 982 penalized=True, 983 objective="abs", 984 **kwargs 985 ): 986 """ 987 Penalized Cross-validation score for a model. 988 989 Parameters: 990 991 X: {array-like}, shape = [n_samples, n_features] 992 Training vectors, where n_samples is the number 993 of samples and n_features is the number of features 994 995 y: array-like, shape = [n_samples] 996 Target values 997 998 X_test: {array-like}, shape = [n_samples, n_features] 999 Test vectors, where n_samples is the number 1000 of samples and n_features is the number of features 1001 1002 y_test: array-like, shape = [n_samples] 1003 Target values 1004 1005 cv: int 1006 Number of folds 1007 1008 scoring: str 1009 Scoring metric 1010 1011 random_state: int 1012 Random state 1013 1014 n_jobs: int 1015 Number of jobs to run in parallel 1016 1017 epsilon: float 1018 Penalty parameter 1019 1020 penalized: bool 1021 Whether to obtain penalized cross-validation score or not 1022 1023 objective: str 1024 'abs': Minimize the absolute difference between cross-validation score and validation score 1025 'relative': Minimize the relative difference between cross-validation score and validation score 1026 Returns: 1027 1028 A namedtuple with the following fields: 1029 - cv_score: float 1030 cross-validation score 1031 - val_score: float 1032 validation score 1033 - penalized_score: float 1034 penalized cross-validation score: cv_score / val_score + epsilon*(1/val_score + 1/cv_score) 1035 If higher scoring metric is better, minimize the function result. 1036 If lower scoring metric is better, maximize the function result. 1037 """ 1038 if scoring == "accuracy": 1039 scoring_func = accuracy_score 1040 elif scoring == "balanced_accuracy": 1041 scoring_func = balanced_accuracy_score 1042 elif scoring == "f1": 1043 scoring_func = f1_score 1044 elif scoring == "roc_auc": 1045 scoring_func = roc_auc_score 1046 elif scoring == "r2": 1047 scoring_func = r2_score 1048 elif scoring == "mse": 1049 scoring_func = mean_squared_error 1050 elif scoring == "mae": 1051 scoring_func = mean_absolute_error 1052 elif scoring == "mape": 1053 scoring_func = mean_absolute_percentage_error 1054 elif scoring == "rmse": 1055 1056 def scoring_func(y_true, y_pred): 1057 return np.sqrt(mean_squared_error(y_true, y_pred)) 1058 1059 X_train, X_val, y_train, y_val = train_test_split( 1060 X, y, test_size=0.2, random_state=random_state 1061 ) 1062 1063 res = cross_val_score( 1064 self, X_train, y_train, cv=cv, scoring=scoring, n_jobs=n_jobs 1065 ) # cross-validation error 1066 1067 if penalized == False: 1068 return res 1069 1070 DescribeResult = namedtuple( 1071 "DescribeResult", ["cv_score", "val_score", "penalized_score"] 1072 ) 1073 1074 numerator = res.mean() 1075 1076 # Evaluate on the (cv+1)-th fold 1077 preds_val = self.fit(X_train, y_train).predict(X_val) 1078 try: 1079 denominator = scoring(y_val, preds_val) # validation error 1080 except Exception as e: 1081 denominator = scoring_func(y_val, preds_val) 1082 1083 # if higher is better 1084 if objective == "abs": 1085 penalized_score = np.abs(numerator - denominator) + epsilon * ( 1086 1 / denominator + 1 / numerator 1087 ) 1088 elif objective == "relative": 1089 ratio = numerator / denominator 1090 penalized_score = np.abs(ratio - 1) + epsilon * ( 1091 1 / denominator + 1 / numerator 1092 ) 1093 1094 return DescribeResult( 1095 cv_score=numerator, 1096 val_score=denominator, 1097 penalized_score=penalized_score, 1098 )
Base model from which all the other classes inherit.
This class contains the most important data preprocessing/feature engineering methods.
Parameters:
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for hidden layer nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or
not (False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original features are included (True) in model's
fitting or not (False)
n_clusters: int
number of clusters for type_clust='kmeans' or type_clust='gmm'
clustering (could be 0: no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot);
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax') or robust scaling ('robust') or max absolute scaling ('maxabs')
col_sample: float
percentage of features randomly chosen for training
row_sample: float
percentage of rows chosen for training, by stratified bootstrapping
seed: int
reproducibility seed for nodes_sim=='uniform', clustering and dropout
backend: str
"cpu" or "gpu" or "tpu"
232 def encode_clusters(self, X=None, predict=False, scaler=None, **kwargs): # 233 """Create new covariates with kmeans or GMM clustering 234 235 Parameters: 236 237 X: {array-like}, shape = [n_samples, n_features] 238 Training vectors, where n_samples is the number 239 of samples and n_features is the number of features. 240 241 predict: boolean 242 is False on training set and True on test set 243 244 scaler: {object} of class StandardScaler, MinMaxScaler, RobustScaler or MaxAbsScaler 245 if scaler has already been fitted on training data (online training), it can be passed here 246 247 **kwargs: 248 additional parameters to be passed to the 249 clustering method 250 251 Returns: 252 253 Clusters' matrix, one-hot encoded: {array-like} 254 255 """ 256 257 np.random.seed(self.seed) 258 259 if X is None: 260 X = self.X_ 261 262 if isinstance(X, pd.DataFrame): 263 X = copy.deepcopy(X.values.astype(float)) 264 265 if len(X.shape) == 1: 266 X = X.reshape(1, -1) 267 268 if predict is False: # encode training set 269 # scale input data before clustering 270 self.clustering_scaler_, scaled_X = mo.scale_covariates( 271 X, choice=self.type_scaling[2], scaler=self.clustering_scaler_ 272 ) 273 274 self.clustering_obj_, X_clustered = mo.cluster_covariates( 275 scaled_X, 276 self.n_clusters, 277 self.seed, 278 type_clust=self.type_clust, 279 **kwargs 280 ) 281 282 if self.cluster_encode: 283 return mo.one_hot_encode(X_clustered, self.n_clusters).astype( 284 np.float16 285 ) 286 287 return X_clustered.astype(np.float16) 288 289 # if predict == True, encode test set 290 X_clustered = self.clustering_obj_.predict( 291 self.clustering_scaler_.transform(X) 292 ) 293 294 if self.cluster_encode == True: 295 return mo.one_hot_encode(X_clustered, self.n_clusters).astype( 296 np.float16 297 ) 298 299 return X_clustered.astype(np.float16)
Create new covariates with kmeans or GMM clustering
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
predict: boolean
is False on training set and True on test set
scaler: {object} of class StandardScaler, MinMaxScaler, RobustScaler or MaxAbsScaler
if scaler has already been fitted on training data (online training), it can be passed here
**kwargs:
additional parameters to be passed to the
clustering method
Returns:
Clusters' matrix, one-hot encoded: {array-like}
301 def create_layer(self, scaled_X, W=None): 302 """Create hidden layer. 303 304 Parameters: 305 306 scaled_X: {array-like}, shape = [n_samples, n_features] 307 Training vectors, where n_samples is the number 308 of samples and n_features is the number of features 309 310 W: {array-like}, shape = [n_features, hidden_features] 311 if provided, constructs the hidden layer with W; otherwise computed internally 312 313 Returns: 314 315 Hidden layer matrix: {array-like} 316 317 """ 318 319 n_features = scaled_X.shape[1] 320 321 # hash_sim = { 322 # "sobol": generate_sobol, 323 # "hammersley": generate_hammersley, 324 # "uniform": generate_uniform, 325 # "halton": generate_halton 326 # } 327 328 if self.bias is False: # no bias term in the hidden layer 329 if W is None: 330 if self.nodes_sim == "sobol": 331 self.W_ = generate_sobol( 332 n_dims=n_features, 333 n_points=self.n_hidden_features, 334 seed=self.seed, 335 ) 336 elif self.nodes_sim == "hammersley": 337 self.W_ = generate_hammersley( 338 n_dims=n_features, 339 n_points=self.n_hidden_features, 340 seed=self.seed, 341 ) 342 elif self.nodes_sim == "uniform": 343 self.W_ = generate_uniform( 344 n_dims=n_features, 345 n_points=self.n_hidden_features, 346 seed=self.seed, 347 ) 348 else: 349 self.W_ = generate_halton( 350 n_dims=n_features, 351 n_points=self.n_hidden_features, 352 seed=self.seed, 353 ) 354 355 assert ( 356 scaled_X.shape[1] == self.W_.shape[0] 357 ), "check dimensions of covariates X and matrix W" 358 359 return mo.dropout( 360 x=self.activation_func( 361 mo.safe_sparse_dot( 362 a=scaled_X, b=self.W_, backend=self.backend 363 ) 364 ), 365 drop_prob=self.dropout, 366 seed=self.seed, 367 ) 368 369 # W is not none 370 assert ( 371 scaled_X.shape[1] == W.shape[0] 372 ), "check dimensions of covariates X and matrix W" 373 374 # self.W_ = W 375 return mo.dropout( 376 x=self.activation_func( 377 mo.safe_sparse_dot(a=scaled_X, b=W, backend=self.backend) 378 ), 379 drop_prob=self.dropout, 380 seed=self.seed, 381 ) 382 383 # with bias term in the hidden layer 384 if W is None: 385 n_features_1 = n_features + 1 386 387 if self.nodes_sim == "sobol": 388 self.W_ = generate_sobol( 389 n_dims=n_features_1, 390 n_points=self.n_hidden_features, 391 seed=self.seed, 392 ) 393 elif self.nodes_sim == "hammersley": 394 self.W_ = generate_hammersley( 395 n_dims=n_features_1, 396 n_points=self.n_hidden_features, 397 seed=self.seed, 398 ) 399 elif self.nodes_sim == "uniform": 400 self.W_ = generate_uniform( 401 n_dims=n_features_1, 402 n_points=self.n_hidden_features, 403 seed=self.seed, 404 ) 405 else: 406 self.W_ = generate_halton( 407 n_dims=n_features_1, 408 n_points=self.n_hidden_features, 409 seed=self.seed, 410 ) 411 412 # self.W_ = hash_sim[self.nodes_sim]( 413 # n_dims=n_features_1, 414 # n_points=self.n_hidden_features, 415 # seed=self.seed, 416 # ) 417 418 return mo.dropout( 419 x=self.activation_func( 420 mo.safe_sparse_dot( 421 a=mo.cbind( 422 np.ones(scaled_X.shape[0]), 423 scaled_X, 424 backend=self.backend, 425 ), 426 b=self.W_, 427 backend=self.backend, 428 ) 429 ), 430 drop_prob=self.dropout, 431 seed=self.seed, 432 ) 433 434 # W is not None 435 # self.W_ = W 436 return mo.dropout( 437 x=self.activation_func( 438 mo.safe_sparse_dot( 439 a=mo.cbind( 440 np.ones(scaled_X.shape[0]), 441 scaled_X, 442 backend=self.backend, 443 ), 444 b=W, 445 backend=self.backend, 446 ) 447 ), 448 drop_prob=self.dropout, 449 seed=self.seed, 450 )
Create hidden layer.
Parameters:
scaled_X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features
W: {array-like}, shape = [n_features, hidden_features]
if provided, constructs the hidden layer with W; otherwise computed internally
Returns:
Hidden layer matrix: {array-like}
526 def cook_training_set(self, y=None, X=None, W=None, **kwargs): 527 """Create new hidden features for training set, with hidden layer, center the response. 528 529 Parameters: 530 531 y: array-like, shape = [n_samples] 532 Target values 533 534 X: {array-like}, shape = [n_samples, n_features] 535 Training vectors, where n_samples is the number 536 of samples and n_features is the number of features 537 538 W: {array-like}, shape = [n_features, hidden_features] 539 if provided, constructs the hidden layer via W 540 541 Returns: 542 543 (centered response, direct link + hidden layer matrix): {tuple} 544 545 """ 546 547 # either X and y are stored or not 548 # assert ((y is None) & (X is None)) | ((y is not None) & (X is not None)) 549 if self.n_hidden_features > 0: # has a hidden layer 550 assert ( 551 len(self.type_scaling) >= 2 552 ), "must have len(self.type_scaling) >= 2 when self.n_hidden_features > 0" 553 554 if X is None: 555 if self.col_sample == 1: 556 input_X = self.X_ 557 else: 558 n_features = self.X_.shape[1] 559 new_n_features = int(np.ceil(n_features * self.col_sample)) 560 assert ( 561 new_n_features >= 1 562 ), "check class attribute 'col_sample' and the number of covariates provided for X" 563 np.random.seed(self.seed) 564 index_col = np.random.choice( 565 range(n_features), size=new_n_features, replace=False 566 ) 567 self.index_col_ = index_col 568 input_X = self.X_[:, self.index_col_] 569 570 else: # X is not None # keep X vs self.X_ 571 if isinstance(X, pd.DataFrame): 572 X = copy.deepcopy(X.values.astype(float)) 573 574 if self.col_sample == 1: 575 input_X = X 576 else: 577 n_features = X.shape[1] 578 new_n_features = int(np.ceil(n_features * self.col_sample)) 579 assert ( 580 new_n_features >= 1 581 ), "check class attribute 'col_sample' and the number of covariates provided for X" 582 np.random.seed(self.seed) 583 index_col = np.random.choice( 584 range(n_features), size=new_n_features, replace=False 585 ) 586 self.index_col_ = index_col 587 input_X = X[:, self.index_col_] 588 589 if self.n_clusters <= 0: 590 # data without any clustering: self.n_clusters is None ----- 591 592 if self.n_hidden_features > 0: # with hidden layer 593 self.nn_scaler_, scaled_X = mo.scale_covariates( 594 input_X, choice=self.type_scaling[1], scaler=self.nn_scaler_ 595 ) 596 Phi_X = ( 597 self.create_layer(scaled_X) 598 if W is None 599 else self.create_layer(scaled_X, W=W) 600 ) 601 Z = ( 602 mo.cbind(input_X, Phi_X, backend=self.backend) 603 if self.direct_link is True 604 else Phi_X 605 ) 606 self.scaler_, scaled_Z = mo.scale_covariates( 607 Z, choice=self.type_scaling[0], scaler=self.scaler_ 608 ) 609 else: # no hidden layer 610 Z = input_X 611 self.scaler_, scaled_Z = mo.scale_covariates( 612 Z, choice=self.type_scaling[0], scaler=self.scaler_ 613 ) 614 615 else: 616 # data with clustering: self.n_clusters is not None ----- # keep 617 618 augmented_X = mo.cbind( 619 input_X, 620 self.encode_clusters(input_X, **kwargs), 621 backend=self.backend, 622 ) 623 624 if self.n_hidden_features > 0: # with hidden layer 625 self.nn_scaler_, scaled_X = mo.scale_covariates( 626 augmented_X, 627 choice=self.type_scaling[1], 628 scaler=self.nn_scaler_, 629 ) 630 Phi_X = ( 631 self.create_layer(scaled_X) 632 if W is None 633 else self.create_layer(scaled_X, W=W) 634 ) 635 Z = ( 636 mo.cbind(augmented_X, Phi_X, backend=self.backend) 637 if self.direct_link is True 638 else Phi_X 639 ) 640 self.scaler_, scaled_Z = mo.scale_covariates( 641 Z, choice=self.type_scaling[0], scaler=self.scaler_ 642 ) 643 else: # no hidden layer 644 Z = augmented_X 645 self.scaler_, scaled_Z = mo.scale_covariates( 646 Z, choice=self.type_scaling[0], scaler=self.scaler_ 647 ) 648 649 # Returning model inputs ----- 650 if mx.is_factor(y) is False: # regression 651 # center y 652 if y is None: 653 self.y_mean_, centered_y = mo.center_response(self.y_) 654 else: 655 self.y_mean_, centered_y = mo.center_response(y) 656 657 # y is subsampled 658 if self.row_sample < 1: 659 n, p = Z.shape 660 661 self.subsampler_ = ( 662 SubSampler( 663 y=self.y_, row_sample=self.row_sample, seed=self.seed 664 ) 665 if y is None 666 else SubSampler( 667 y=y, row_sample=self.row_sample, seed=self.seed 668 ) 669 ) 670 671 self.index_row_ = self.subsampler_.subsample() 672 673 n_row_sample = len(self.index_row_) 674 # regression 675 return ( 676 centered_y[self.index_row_].reshape(n_row_sample), 677 self.scaler_.transform( 678 Z[self.index_row_, :].reshape(n_row_sample, p) 679 ), 680 ) 681 # y is not subsampled 682 # regression 683 return (centered_y, self.scaler_.transform(Z)) 684 685 # classification 686 # y is subsampled 687 if self.row_sample < 1: 688 n, p = Z.shape 689 690 self.subsampler_ = ( 691 SubSampler( 692 y=self.y_, row_sample=self.row_sample, seed=self.seed 693 ) 694 if y is None 695 else SubSampler(y=y, row_sample=self.row_sample, seed=self.seed) 696 ) 697 698 self.index_row_ = self.subsampler_.subsample() 699 700 n_row_sample = len(self.index_row_) 701 # classification 702 return ( 703 y[self.index_row_].reshape(n_row_sample), 704 self.scaler_.transform( 705 Z[self.index_row_, :].reshape(n_row_sample, p) 706 ), 707 ) 708 # y is not subsampled 709 # classification 710 return (y, self.scaler_.transform(Z))
Create new hidden features for training set, with hidden layer, center the response.
Parameters:
y: array-like, shape = [n_samples]
Target values
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features
W: {array-like}, shape = [n_features, hidden_features]
if provided, constructs the hidden layer via W
Returns:
(centered response, direct link + hidden layer matrix): {tuple}
712 def cook_test_set(self, X, **kwargs): 713 """Transform data from test set, with hidden layer. 714 715 Parameters: 716 717 X: {array-like}, shape = [n_samples, n_features] 718 Training vectors, where n_samples is the number 719 of samples and n_features is the number of features 720 721 **kwargs: additional parameters to be passed to self.encode_cluster 722 723 Returns: 724 725 Transformed test set : {array-like} 726 """ 727 728 if isinstance(X, pd.DataFrame): 729 X = copy.deepcopy(X.values.astype(float)) 730 731 if len(X.shape) == 1: 732 X = X.reshape(1, -1) 733 734 if ( 735 self.n_clusters == 0 736 ): # data without clustering: self.n_clusters is None ----- 737 if self.n_hidden_features > 0: 738 # if hidden layer 739 scaled_X = ( 740 self.nn_scaler_.transform(X) 741 if (self.col_sample == 1) 742 else self.nn_scaler_.transform(X[:, self.index_col_]) 743 ) 744 Phi_X = self.create_layer(scaled_X, self.W_) 745 if self.direct_link: 746 return self.scaler_.transform( 747 mo.cbind(scaled_X, Phi_X, backend=self.backend) 748 ) 749 # when self.direct_link == False 750 return self.scaler_.transform(Phi_X) 751 # if no hidden layer # self.n_hidden_features == 0 752 return self.scaler_.transform(X) 753 754 # data with clustering: self.n_clusters > 0 ----- 755 if self.col_sample == 1: 756 predicted_clusters = self.encode_clusters( 757 X=X, predict=True, **kwargs 758 ) 759 augmented_X = mo.cbind(X, predicted_clusters, backend=self.backend) 760 else: 761 predicted_clusters = self.encode_clusters( 762 X=X[:, self.index_col_], predict=True, **kwargs 763 ) 764 augmented_X = mo.cbind( 765 X[:, self.index_col_], predicted_clusters, backend=self.backend 766 ) 767 768 if self.n_hidden_features > 0: # if hidden layer 769 scaled_X = self.nn_scaler_.transform(augmented_X) 770 Phi_X = self.create_layer(scaled_X, self.W_) 771 if self.direct_link: 772 return self.scaler_.transform( 773 mo.cbind(augmented_X, Phi_X, backend=self.backend) 774 ) 775 return self.scaler_.transform(Phi_X) 776 777 # if no hidden layer 778 return self.scaler_.transform(augmented_X)
Transform data from test set, with hidden layer.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features
**kwargs: additional parameters to be passed to self.encode_cluster
Returns:
Transformed test set : {array-like}
15class BaseRegressor(Base, RegressorMixin): 16 """Random Vector Functional Link Network regression without shrinkage 17 18 Parameters: 19 20 n_hidden_features: int 21 number of nodes in the hidden layer 22 23 activation_name: str 24 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 25 26 a: float 27 hyperparameter for 'prelu' or 'elu' activation function 28 29 nodes_sim: str 30 type of simulation for hidden layer nodes: 'sobol', 'hammersley', 'halton', 31 'uniform' 32 33 bias: boolean 34 indicates if the hidden layer contains a bias term (True) or 35 not (False) 36 37 dropout: float 38 regularization parameter; (random) percentage of nodes dropped out 39 of the training 40 41 direct_link: boolean 42 indicates if the original features are included (True) in model's 43 fitting or not (False) 44 45 n_clusters: int 46 number of clusters for type_clust='kmeans' or type_clust='gmm' 47 clustering (could be 0: no clustering) 48 49 cluster_encode: bool 50 defines how the variable containing clusters is treated (default is one-hot); 51 if `False`, then labels are used, without one-hot encoding 52 53 type_clust: str 54 type of clustering method: currently k-means ('kmeans') or Gaussian 55 Mixture Model ('gmm') 56 57 type_scaling: a tuple of 3 strings 58 scaling methods for inputs, hidden layer, and clustering respectively 59 (and when relevant). 60 Currently available: standardization ('std') or MinMax scaling ('minmax') 61 62 col_sample: float 63 percentage of features randomly chosen for training 64 65 row_sample: float 66 percentage of rows chosen for training, by stratified bootstrapping 67 68 seed: int 69 reproducibility seed for nodes_sim=='uniform', clustering and dropout 70 71 backend: str 72 "cpu" or "gpu" or "tpu" 73 74 Attributes: 75 76 beta_: vector 77 regression coefficients 78 79 GCV_: float 80 Generalized Cross-Validation error 81 82 """ 83 84 # construct the object ----- 85 86 def __init__( 87 self, 88 n_hidden_features=5, 89 activation_name="relu", 90 a=0.01, 91 nodes_sim="sobol", 92 bias=True, 93 dropout=0, 94 direct_link=True, 95 n_clusters=2, 96 cluster_encode=True, 97 type_clust="kmeans", 98 type_scaling=("std", "std", "std"), 99 col_sample=1, 100 row_sample=1, 101 seed=123, 102 backend="cpu", 103 ): 104 super().__init__( 105 n_hidden_features=n_hidden_features, 106 activation_name=activation_name, 107 a=a, 108 nodes_sim=nodes_sim, 109 bias=bias, 110 dropout=dropout, 111 direct_link=direct_link, 112 n_clusters=n_clusters, 113 cluster_encode=cluster_encode, 114 type_clust=type_clust, 115 type_scaling=type_scaling, 116 col_sample=col_sample, 117 row_sample=row_sample, 118 seed=seed, 119 backend=backend, 120 ) 121 122 def fit(self, X, y, **kwargs): 123 """Fit BaseRegressor to training data (X, y) 124 125 Parameters: 126 127 X: {array-like}, shape = [n_samples, n_features] 128 Training vectors, where n_samples is the number 129 of samples and n_features is the number of features 130 131 y: array-like, shape = [n_samples] 132 Target values 133 134 **kwargs: additional parameters to be passed to self.cook_training_set 135 136 Returns: 137 138 self: object 139 """ 140 141 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 142 143 fit_obj = lmf.beta_Sigma_hat( 144 X=scaled_Z, y=centered_y, backend=self.backend 145 ) 146 147 self.beta_ = fit_obj["beta_hat"] 148 149 self.GCV_ = fit_obj["GCV"] 150 151 return self 152 153 def predict(self, X, **kwargs): 154 """Predict test data X. 155 156 Parameters: 157 158 X: {array-like}, shape = [n_samples, n_features] 159 Training vectors, where n_samples is the number 160 of samples and n_features is the number of features 161 162 **kwargs: additional parameters to be passed to self.cook_test_set 163 164 Returns: 165 166 model predictions: {array-like} 167 """ 168 169 if len(X.shape) == 1: 170 n_features = X.shape[0] 171 new_X = mo.rbind( 172 X.reshape(1, n_features), 173 np.ones(n_features).reshape(1, n_features), 174 ) 175 176 return ( 177 self.y_mean_ 178 + mo.safe_sparse_dot( 179 a=self.cook_test_set(new_X, **kwargs), 180 b=self.beta_, 181 backend=self.backend, 182 ) 183 )[0] 184 185 return self.y_mean_ + mo.safe_sparse_dot( 186 a=self.cook_test_set(X, **kwargs), 187 b=self.beta_, 188 backend=self.backend, 189 )
Random Vector Functional Link Network regression without shrinkage
Parameters:
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for hidden layer nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or
not (False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original features are included (True) in model's
fitting or not (False)
n_clusters: int
number of clusters for type_clust='kmeans' or type_clust='gmm'
clustering (could be 0: no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot);
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
col_sample: float
percentage of features randomly chosen for training
row_sample: float
percentage of rows chosen for training, by stratified bootstrapping
seed: int
reproducibility seed for nodes_sim=='uniform', clustering and dropout
backend: str
"cpu" or "gpu" or "tpu"
Attributes:
beta_: vector
regression coefficients
GCV_: float
Generalized Cross-Validation error
122 def fit(self, X, y, **kwargs): 123 """Fit BaseRegressor to training data (X, y) 124 125 Parameters: 126 127 X: {array-like}, shape = [n_samples, n_features] 128 Training vectors, where n_samples is the number 129 of samples and n_features is the number of features 130 131 y: array-like, shape = [n_samples] 132 Target values 133 134 **kwargs: additional parameters to be passed to self.cook_training_set 135 136 Returns: 137 138 self: object 139 """ 140 141 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 142 143 fit_obj = lmf.beta_Sigma_hat( 144 X=scaled_Z, y=centered_y, backend=self.backend 145 ) 146 147 self.beta_ = fit_obj["beta_hat"] 148 149 self.GCV_ = fit_obj["GCV"] 150 151 return self
Fit BaseRegressor to training data (X, y)
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features
y: array-like, shape = [n_samples]
Target values
**kwargs: additional parameters to be passed to self.cook_training_set
Returns:
self: object
153 def predict(self, X, **kwargs): 154 """Predict test data X. 155 156 Parameters: 157 158 X: {array-like}, shape = [n_samples, n_features] 159 Training vectors, where n_samples is the number 160 of samples and n_features is the number of features 161 162 **kwargs: additional parameters to be passed to self.cook_test_set 163 164 Returns: 165 166 model predictions: {array-like} 167 """ 168 169 if len(X.shape) == 1: 170 n_features = X.shape[0] 171 new_X = mo.rbind( 172 X.reshape(1, n_features), 173 np.ones(n_features).reshape(1, n_features), 174 ) 175 176 return ( 177 self.y_mean_ 178 + mo.safe_sparse_dot( 179 a=self.cook_test_set(new_X, **kwargs), 180 b=self.beta_, 181 backend=self.backend, 182 ) 183 )[0] 184 185 return self.y_mean_ + mo.safe_sparse_dot( 186 a=self.cook_test_set(X, **kwargs), 187 b=self.beta_, 188 backend=self.backend, 189 )
Predict test data X.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features
**kwargs: additional parameters to be passed to self.cook_test_set
Returns:
model predictions: {array-like}
15class BayesianRVFLRegressor(Base, RegressorMixin): 16 """Bayesian Random Vector Functional Link Network regression with one prior 17 18 Parameters: 19 20 n_hidden_features: int 21 number of nodes in the hidden layer 22 23 activation_name: str 24 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 25 26 a: float 27 hyperparameter for 'prelu' or 'elu' activation function 28 29 nodes_sim: str 30 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 'uniform' 31 32 bias: boolean 33 indicates if the hidden layer contains a bias term (True) or not (False) 34 35 dropout: float 36 regularization parameter; (random) percentage of nodes dropped out 37 of the training 38 39 direct_link: boolean 40 indicates if the original features are included (True) in model''s fitting or not (False) 41 42 n_clusters: int 43 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering) 44 45 cluster_encode: bool 46 defines how the variable containing clusters is treated (default is one-hot) 47 if `False`, then labels are used, without one-hot encoding 48 49 type_clust: str 50 type of clustering method: currently k-means ('kmeans') or Gaussian Mixture Model ('gmm') 51 52 type_scaling: a tuple of 3 strings 53 scaling methods for inputs, hidden layer, and clustering respectively 54 (and when relevant). 55 Currently available: standardization ('std') or MinMax scaling ('minmax') 56 57 seed: int 58 reproducibility seed for nodes_sim=='uniform' 59 60 s: float 61 std. dev. of regression parameters in Bayesian Ridge Regression 62 63 sigma: float 64 std. dev. of residuals in Bayesian Ridge Regression 65 66 return_std: boolean 67 if True, uncertainty around predictions is evaluated 68 69 backend: str 70 "cpu" or "gpu" or "tpu" 71 72 Attributes: 73 74 beta_: array-like 75 regression''s coefficients 76 77 Sigma_: array-like 78 covariance of the distribution of fitted parameters 79 80 GCV_: float 81 Generalized cross-validation error 82 83 y_mean_: float 84 average response 85 86 Examples: 87 88 ```python 89 TBD 90 ``` 91 92 """ 93 94 # construct the object ----- 95 96 def __init__( 97 self, 98 n_hidden_features=5, 99 activation_name="relu", 100 a=0.01, 101 nodes_sim="sobol", 102 bias=True, 103 dropout=0, 104 direct_link=True, 105 n_clusters=2, 106 cluster_encode=True, 107 type_clust="kmeans", 108 type_scaling=("std", "std", "std"), 109 seed=123, 110 s=0.1, 111 sigma=0.05, 112 return_std=True, 113 backend="cpu", 114 ): 115 super().__init__( 116 n_hidden_features=n_hidden_features, 117 activation_name=activation_name, 118 a=a, 119 nodes_sim=nodes_sim, 120 bias=bias, 121 dropout=dropout, 122 direct_link=direct_link, 123 n_clusters=n_clusters, 124 cluster_encode=cluster_encode, 125 type_clust=type_clust, 126 type_scaling=type_scaling, 127 seed=seed, 128 backend=backend, 129 ) 130 self.s = s 131 self.sigma = sigma 132 self.beta_ = None 133 self.Sigma_ = None 134 self.GCV_ = None 135 self.return_std = return_std 136 137 def fit(self, X, y, **kwargs): 138 """Fit BayesianRVFLRegressor to training data (X, y). 139 140 Parameters: 141 142 X: {array-like}, shape = [n_samples, n_features] 143 Training vectors, where n_samples is the number 144 of samples and n_features is the number of features. 145 146 y: array-like, shape = [n_samples] 147 Target values. 148 149 **kwargs: additional parameters to be passed to 150 self.cook_training_set 151 152 Returns: 153 154 self: object 155 156 """ 157 158 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 159 160 fit_obj = lmf.beta_Sigma_hat_rvfl( 161 X=scaled_Z, 162 y=centered_y, 163 s=self.s, 164 sigma=self.sigma, 165 fit_intercept=False, 166 return_cov=self.return_std, 167 backend=self.backend, 168 ) 169 170 self.beta_ = fit_obj["beta_hat"] 171 172 if self.return_std == True: 173 self.Sigma_ = fit_obj["Sigma_hat"] 174 175 self.GCV_ = fit_obj["GCV"] 176 177 return self 178 179 def predict(self, X, return_std=False, **kwargs): 180 """Predict test data X. 181 182 Parameters: 183 184 X: {array-like}, shape = [n_samples, n_features] 185 Training vectors, where n_samples is the number 186 of samples and n_features is the number of features. 187 188 return_std: {boolean}, standard dev. is returned or not 189 190 **kwargs: additional parameters to be passed to 191 self.cook_test_set 192 193 Returns: 194 195 model predictions: {array-like} 196 197 """ 198 199 if len(X.shape) == 1: # one observation in the test set only 200 n_features = X.shape[0] 201 new_X = mo.rbind( 202 x=X.reshape(1, n_features), 203 y=np.ones(n_features).reshape(1, n_features), 204 backend=self.backend, 205 ) 206 207 self.return_std = return_std 208 209 if self.return_std == False: 210 if len(X.shape) == 1: 211 return ( 212 self.y_mean_ 213 + mo.safe_sparse_dot( 214 a=self.cook_test_set(new_X, **kwargs), 215 b=self.beta_, 216 backend=self.backend, 217 ) 218 )[0] 219 220 return self.y_mean_ + mo.safe_sparse_dot( 221 a=self.cook_test_set(X, **kwargs), 222 b=self.beta_, 223 backend=self.backend, 224 ) 225 226 else: # confidence interval required for preds? 227 if len(X.shape) == 1: 228 Z = self.cook_test_set(new_X, **kwargs) 229 230 pred_obj = lmf.beta_Sigma_hat_rvfl( 231 s=self.s, 232 sigma=self.sigma, 233 X_star=Z, 234 return_cov=True, 235 beta_hat_=self.beta_, 236 Sigma_hat_=self.Sigma_, 237 backend=self.backend, 238 ) 239 240 return ( 241 self.y_mean_ + pred_obj["preds"][0], 242 pred_obj["preds_std"][0], 243 ) 244 245 Z = self.cook_test_set(X, **kwargs) 246 247 pred_obj = lmf.beta_Sigma_hat_rvfl( 248 s=self.s, 249 sigma=self.sigma, 250 X_star=Z, 251 return_cov=True, 252 beta_hat_=self.beta_, 253 Sigma_hat_=self.Sigma_, 254 backend=self.backend, 255 ) 256 257 return (self.y_mean_ + pred_obj["preds"], pred_obj["preds_std"])
Bayesian Random Vector Functional Link Network regression with one prior
Parameters:
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not (False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original features are included (True) in model''s fitting or not (False)
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
seed: int
reproducibility seed for nodes_sim=='uniform'
s: float
std. dev. of regression parameters in Bayesian Ridge Regression
sigma: float
std. dev. of residuals in Bayesian Ridge Regression
return_std: boolean
if True, uncertainty around predictions is evaluated
backend: str
"cpu" or "gpu" or "tpu"
Attributes:
beta_: array-like
regression''s coefficients
Sigma_: array-like
covariance of the distribution of fitted parameters
GCV_: float
Generalized cross-validation error
y_mean_: float
average response
Examples:
TBD
137 def fit(self, X, y, **kwargs): 138 """Fit BayesianRVFLRegressor to training data (X, y). 139 140 Parameters: 141 142 X: {array-like}, shape = [n_samples, n_features] 143 Training vectors, where n_samples is the number 144 of samples and n_features is the number of features. 145 146 y: array-like, shape = [n_samples] 147 Target values. 148 149 **kwargs: additional parameters to be passed to 150 self.cook_training_set 151 152 Returns: 153 154 self: object 155 156 """ 157 158 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 159 160 fit_obj = lmf.beta_Sigma_hat_rvfl( 161 X=scaled_Z, 162 y=centered_y, 163 s=self.s, 164 sigma=self.sigma, 165 fit_intercept=False, 166 return_cov=self.return_std, 167 backend=self.backend, 168 ) 169 170 self.beta_ = fit_obj["beta_hat"] 171 172 if self.return_std == True: 173 self.Sigma_ = fit_obj["Sigma_hat"] 174 175 self.GCV_ = fit_obj["GCV"] 176 177 return self
Fit BayesianRVFLRegressor to training data (X, y).
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
**kwargs: additional parameters to be passed to
self.cook_training_set
Returns:
self: object
179 def predict(self, X, return_std=False, **kwargs): 180 """Predict test data X. 181 182 Parameters: 183 184 X: {array-like}, shape = [n_samples, n_features] 185 Training vectors, where n_samples is the number 186 of samples and n_features is the number of features. 187 188 return_std: {boolean}, standard dev. is returned or not 189 190 **kwargs: additional parameters to be passed to 191 self.cook_test_set 192 193 Returns: 194 195 model predictions: {array-like} 196 197 """ 198 199 if len(X.shape) == 1: # one observation in the test set only 200 n_features = X.shape[0] 201 new_X = mo.rbind( 202 x=X.reshape(1, n_features), 203 y=np.ones(n_features).reshape(1, n_features), 204 backend=self.backend, 205 ) 206 207 self.return_std = return_std 208 209 if self.return_std == False: 210 if len(X.shape) == 1: 211 return ( 212 self.y_mean_ 213 + mo.safe_sparse_dot( 214 a=self.cook_test_set(new_X, **kwargs), 215 b=self.beta_, 216 backend=self.backend, 217 ) 218 )[0] 219 220 return self.y_mean_ + mo.safe_sparse_dot( 221 a=self.cook_test_set(X, **kwargs), 222 b=self.beta_, 223 backend=self.backend, 224 ) 225 226 else: # confidence interval required for preds? 227 if len(X.shape) == 1: 228 Z = self.cook_test_set(new_X, **kwargs) 229 230 pred_obj = lmf.beta_Sigma_hat_rvfl( 231 s=self.s, 232 sigma=self.sigma, 233 X_star=Z, 234 return_cov=True, 235 beta_hat_=self.beta_, 236 Sigma_hat_=self.Sigma_, 237 backend=self.backend, 238 ) 239 240 return ( 241 self.y_mean_ + pred_obj["preds"][0], 242 pred_obj["preds_std"][0], 243 ) 244 245 Z = self.cook_test_set(X, **kwargs) 246 247 pred_obj = lmf.beta_Sigma_hat_rvfl( 248 s=self.s, 249 sigma=self.sigma, 250 X_star=Z, 251 return_cov=True, 252 beta_hat_=self.beta_, 253 Sigma_hat_=self.Sigma_, 254 backend=self.backend, 255 ) 256 257 return (self.y_mean_ + pred_obj["preds"], pred_obj["preds_std"])
Predict test data X.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
return_std: {boolean}, standard dev. is returned or not
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
15class BayesianRVFL2Regressor(Base, RegressorMixin): 16 """Bayesian Random Vector Functional Link Network regression with two priors 17 18 Parameters: 19 20 n_hidden_features: int 21 number of nodes in the hidden layer 22 23 activation_name: str 24 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 25 26 a: float 27 hyperparameter for 'prelu' or 'elu' activation function 28 29 nodes_sim: str 30 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 'uniform' 31 32 bias: boolean 33 indicates if the hidden layer contains a bias term (True) or not (False) 34 35 dropout: float 36 regularization parameter; (random) percentage of nodes dropped out 37 of the training 38 39 direct_link: boolean 40 indicates if the original features are included (True) in model''s fitting or not (False) 41 42 n_clusters: int 43 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering) 44 45 cluster_encode: bool 46 defines how the variable containing clusters is treated (default is one-hot) 47 if `False`, then labels are used, without one-hot encoding 48 49 type_clust: str 50 type of clustering method: currently k-means ('kmeans') or Gaussian Mixture Model ('gmm') 51 52 type_scaling: a tuple of 3 strings 53 scaling methods for inputs, hidden layer, and clustering respectively 54 (and when relevant). 55 Currently available: standardization ('std') or MinMax scaling ('minmax') 56 57 seed: int 58 reproducibility seed for nodes_sim=='uniform' 59 60 s1: float 61 std. dev. of init. regression parameters in Bayesian Ridge Regression 62 63 s2: float 64 std. dev. of augmented regression parameters in Bayesian Ridge Regression 65 66 sigma: float 67 std. dev. of residuals in Bayesian Ridge Regression 68 69 return_std: boolean 70 if True, uncertainty around predictions is evaluated 71 72 backend: str 73 "cpu" or "gpu" or "tpu" 74 75 Attributes: 76 77 beta_: array-like 78 regression''s coefficients 79 80 Sigma_: array-like 81 covariance of the distribution of fitted parameters 82 83 GCV_: float 84 Generalized cross-validation error 85 86 y_mean_: float 87 average response 88 89 Examples: 90 91 ```python 92 TBD 93 ``` 94 95 """ 96 97 # construct the object ----- 98 99 def __init__( 100 self, 101 n_hidden_features=5, 102 activation_name="relu", 103 a=0.01, 104 nodes_sim="sobol", 105 bias=True, 106 dropout=0, 107 direct_link=True, 108 n_clusters=0, 109 cluster_encode=True, 110 type_clust="kmeans", 111 type_scaling=("std", "std", "std"), 112 seed=123, 113 s1=0.1, 114 s2=0.1, 115 sigma=0.05, 116 return_std=True, 117 backend="cpu", 118 ): 119 super().__init__( 120 n_hidden_features=n_hidden_features, 121 activation_name=activation_name, 122 a=a, 123 nodes_sim=nodes_sim, 124 bias=bias, 125 dropout=dropout, 126 direct_link=direct_link, 127 n_clusters=n_clusters, 128 cluster_encode=cluster_encode, 129 type_clust=type_clust, 130 type_scaling=type_scaling, 131 seed=seed, 132 backend=backend, 133 ) 134 135 self.s1 = s1 136 self.s2 = s2 137 self.sigma = sigma 138 self.beta_ = None 139 self.Sigma_ = None 140 self.GCV_ = None 141 self.return_std = return_std 142 self.coef_ = None 143 144 def fit(self, X, y, **kwargs): 145 """Fit BayesianRVFL2Regressor to training data (X, y) 146 147 Parameters: 148 149 X: {array-like}, shape = [n_samples, n_features] 150 Training vectors, where n_samples is the number 151 of samples and n_features is the number of features 152 153 y: array-like, shape = [n_samples] 154 Target values 155 156 **kwargs: additional parameters to be passed to 157 self.cook_training_set 158 159 Returns: 160 161 self: object 162 163 """ 164 165 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 166 167 n, p = X.shape 168 q = self.n_hidden_features 169 170 if self.direct_link == True: 171 r = p + self.n_clusters 172 173 block11 = (self.s1**2) * np.eye(r) 174 block12 = np.zeros((r, q)) 175 block21 = np.zeros((q, r)) 176 block22 = (self.s2**2) * np.eye(q) 177 178 Sigma_prior = mo.rbind( 179 x=mo.cbind(x=block11, y=block12, backend=self.backend), 180 y=mo.cbind(x=block21, y=block22, backend=self.backend), 181 backend=self.backend, 182 ) 183 184 else: 185 Sigma_prior = (self.s2**2) * np.eye(q) 186 187 fit_obj = lmf.beta_Sigma_hat_rvfl2( 188 X=scaled_Z, 189 y=centered_y, 190 Sigma=Sigma_prior, 191 sigma=self.sigma, 192 fit_intercept=False, 193 return_cov=self.return_std, 194 backend=self.backend, 195 ) 196 197 self.beta_ = fit_obj["beta_hat"] 198 199 self.coef_ = self.beta_ 200 201 if self.return_std == True: 202 self.Sigma_ = fit_obj["Sigma_hat"] 203 204 self.GCV_ = fit_obj["GCV"] 205 206 return self 207 208 def predict(self, X, return_std=False, **kwargs): 209 """Predict test data X. 210 211 Parameters: 212 213 X: {array-like}, shape = [n_samples, n_features] 214 Training vectors, where n_samples is the number 215 of samples and n_features is the number of features. 216 217 return_std: {boolean}, standard dev. is returned or not 218 219 **kwargs: additional parameters to be passed to 220 self.cook_test_set 221 222 Returns: 223 224 model predictions: {array-like} 225 226 """ 227 228 if len(X.shape) == 1: # one observation in the test set only 229 n_features = X.shape[0] 230 new_X = mo.rbind( 231 x=X.reshape(1, n_features), 232 y=np.ones(n_features).reshape(1, n_features), 233 backend=self.backend, 234 ) 235 236 self.return_std = return_std 237 238 if self.return_std == False: 239 if len(X.shape) == 1: 240 return ( 241 self.y_mean_ 242 + mo.safe_sparse_dot( 243 self.cook_test_set(new_X, **kwargs), 244 self.beta_, 245 backend=self.backend, 246 ) 247 )[0] 248 249 return self.y_mean_ + mo.safe_sparse_dot( 250 self.cook_test_set(X, **kwargs), 251 self.beta_, 252 backend=self.backend, 253 ) 254 255 else: # confidence interval required for preds? 256 if len(X.shape) == 1: 257 Z = self.cook_test_set(new_X, **kwargs) 258 259 pred_obj = lmf.beta_Sigma_hat_rvfl2( 260 X_star=Z, 261 return_cov=self.return_std, 262 beta_hat_=self.beta_, 263 Sigma_hat_=self.Sigma_, 264 backend=self.backend, 265 ) 266 267 return ( 268 self.y_mean_ + pred_obj["preds"][0], 269 pred_obj["preds_std"][0], 270 ) 271 272 Z = self.cook_test_set(X, **kwargs) 273 274 pred_obj = lmf.beta_Sigma_hat_rvfl2( 275 X_star=Z, 276 return_cov=self.return_std, 277 beta_hat_=self.beta_, 278 Sigma_hat_=self.Sigma_, 279 backend=self.backend, 280 ) 281 282 return (self.y_mean_ + pred_obj["preds"], pred_obj["preds_std"])
Bayesian Random Vector Functional Link Network regression with two priors
Parameters:
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not (False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original features are included (True) in model''s fitting or not (False)
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
seed: int
reproducibility seed for nodes_sim=='uniform'
s1: float
std. dev. of init. regression parameters in Bayesian Ridge Regression
s2: float
std. dev. of augmented regression parameters in Bayesian Ridge Regression
sigma: float
std. dev. of residuals in Bayesian Ridge Regression
return_std: boolean
if True, uncertainty around predictions is evaluated
backend: str
"cpu" or "gpu" or "tpu"
Attributes:
beta_: array-like
regression''s coefficients
Sigma_: array-like
covariance of the distribution of fitted parameters
GCV_: float
Generalized cross-validation error
y_mean_: float
average response
Examples:
TBD
144 def fit(self, X, y, **kwargs): 145 """Fit BayesianRVFL2Regressor to training data (X, y) 146 147 Parameters: 148 149 X: {array-like}, shape = [n_samples, n_features] 150 Training vectors, where n_samples is the number 151 of samples and n_features is the number of features 152 153 y: array-like, shape = [n_samples] 154 Target values 155 156 **kwargs: additional parameters to be passed to 157 self.cook_training_set 158 159 Returns: 160 161 self: object 162 163 """ 164 165 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 166 167 n, p = X.shape 168 q = self.n_hidden_features 169 170 if self.direct_link == True: 171 r = p + self.n_clusters 172 173 block11 = (self.s1**2) * np.eye(r) 174 block12 = np.zeros((r, q)) 175 block21 = np.zeros((q, r)) 176 block22 = (self.s2**2) * np.eye(q) 177 178 Sigma_prior = mo.rbind( 179 x=mo.cbind(x=block11, y=block12, backend=self.backend), 180 y=mo.cbind(x=block21, y=block22, backend=self.backend), 181 backend=self.backend, 182 ) 183 184 else: 185 Sigma_prior = (self.s2**2) * np.eye(q) 186 187 fit_obj = lmf.beta_Sigma_hat_rvfl2( 188 X=scaled_Z, 189 y=centered_y, 190 Sigma=Sigma_prior, 191 sigma=self.sigma, 192 fit_intercept=False, 193 return_cov=self.return_std, 194 backend=self.backend, 195 ) 196 197 self.beta_ = fit_obj["beta_hat"] 198 199 self.coef_ = self.beta_ 200 201 if self.return_std == True: 202 self.Sigma_ = fit_obj["Sigma_hat"] 203 204 self.GCV_ = fit_obj["GCV"] 205 206 return self
Fit BayesianRVFL2Regressor to training data (X, y)
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features
y: array-like, shape = [n_samples]
Target values
**kwargs: additional parameters to be passed to
self.cook_training_set
Returns:
self: object
208 def predict(self, X, return_std=False, **kwargs): 209 """Predict test data X. 210 211 Parameters: 212 213 X: {array-like}, shape = [n_samples, n_features] 214 Training vectors, where n_samples is the number 215 of samples and n_features is the number of features. 216 217 return_std: {boolean}, standard dev. is returned or not 218 219 **kwargs: additional parameters to be passed to 220 self.cook_test_set 221 222 Returns: 223 224 model predictions: {array-like} 225 226 """ 227 228 if len(X.shape) == 1: # one observation in the test set only 229 n_features = X.shape[0] 230 new_X = mo.rbind( 231 x=X.reshape(1, n_features), 232 y=np.ones(n_features).reshape(1, n_features), 233 backend=self.backend, 234 ) 235 236 self.return_std = return_std 237 238 if self.return_std == False: 239 if len(X.shape) == 1: 240 return ( 241 self.y_mean_ 242 + mo.safe_sparse_dot( 243 self.cook_test_set(new_X, **kwargs), 244 self.beta_, 245 backend=self.backend, 246 ) 247 )[0] 248 249 return self.y_mean_ + mo.safe_sparse_dot( 250 self.cook_test_set(X, **kwargs), 251 self.beta_, 252 backend=self.backend, 253 ) 254 255 else: # confidence interval required for preds? 256 if len(X.shape) == 1: 257 Z = self.cook_test_set(new_X, **kwargs) 258 259 pred_obj = lmf.beta_Sigma_hat_rvfl2( 260 X_star=Z, 261 return_cov=self.return_std, 262 beta_hat_=self.beta_, 263 Sigma_hat_=self.Sigma_, 264 backend=self.backend, 265 ) 266 267 return ( 268 self.y_mean_ + pred_obj["preds"][0], 269 pred_obj["preds_std"][0], 270 ) 271 272 Z = self.cook_test_set(X, **kwargs) 273 274 pred_obj = lmf.beta_Sigma_hat_rvfl2( 275 X_star=Z, 276 return_cov=self.return_std, 277 beta_hat_=self.beta_, 278 Sigma_hat_=self.Sigma_, 279 backend=self.backend, 280 ) 281 282 return (self.y_mean_ + pred_obj["preds"], pred_obj["preds_std"])
Predict test data X.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
return_std: {boolean}, standard dev. is returned or not
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
42class ClassicalMTS(MTS): 43 """Time series with statistical models (statsmodels), mostly for benchmarks 44 45 Parameters: 46 47 model: type of model: str. 48 currently, 'VAR', 'VECM', 'ARIMA', 'ETS', 'Theta' 49 Default is None 50 51 obj: object 52 A time series model from statsmodels 53 54 Attributes: 55 56 df_: data frame 57 the input data frame, in case a data.frame is provided to `fit` 58 59 level_: int 60 level of confidence for prediction intervals (default is 95) 61 62 Examples: 63 See examples/classical_mts_timeseries.py 64 """ 65 66 # construct the object ----- 67 68 def __init__(self, model="VAR", obj=None): 69 if obj is not None: 70 self.model = None 71 self.obj = obj 72 else: 73 self.model = model 74 if self.model == "VAR": 75 self.obj = VAR 76 elif self.model == "VECM": 77 self.obj = VECM 78 elif self.model == "ARIMA": 79 self.obj = ARIMA 80 elif self.model == "ETS": 81 self.obj = ExponentialSmoothing 82 elif self.model == "Theta": 83 self.obj = ThetaModel 84 else: 85 raise ValueError("model not recognized") 86 self.n_series = None 87 self.replications = None 88 self.mean_ = None 89 self.upper_ = None 90 self.lower_ = None 91 self.output_dates_ = None 92 self.alpha_ = None 93 self.df_ = None 94 self.residuals_ = [] 95 self.sims_ = None 96 self.level_ = None 97 98 def fit(self, X, **kwargs): 99 """Fit ClassicalMTS model to training data X, with optional regressors xreg 100 101 Parameters: 102 103 X: {array-like}, shape = [n_samples, n_features] 104 Training time series, where n_samples is the number 105 of samples and n_features is the number of features; 106 X must be in increasing order (most recent observations last) 107 108 **kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity) 109 110 Returns: 111 112 self: object 113 """ 114 115 try: 116 self.n_series = X.shape[1] 117 except Exception: 118 self.n_series = 1 119 120 if (isinstance(X, pd.DataFrame) is False) and isinstance( 121 X, pd.Series 122 ) is False: # input data set is a numpy array 123 X = pd.DataFrame(X) 124 if self.n_series > 1: 125 self.series_names = [ 126 "series" + str(i) for i in range(X.shape[1]) 127 ] 128 else: 129 self.series_names = "series0" 130 131 else: # input data set is a DataFrame or Series with column names 132 X_index = None 133 if X.index is not None and len(X.shape) > 1: 134 X_index = X.index 135 X = copy.deepcopy(mo.convert_df_to_numeric(X)) 136 if X_index is not None: 137 try: 138 X.index = X_index 139 except Exception: 140 pass 141 if isinstance(X, pd.DataFrame): 142 self.series_names = X.columns.tolist() 143 else: 144 self.series_names = X.name 145 146 if isinstance(X, pd.DataFrame) or isinstance(X, pd.Series): 147 self.df_ = X 148 X = X.values 149 self.df_.columns = self.series_names 150 self.input_dates = ts.compute_input_dates(self.df_) 151 else: 152 self.df_ = pd.DataFrame(X, columns=self.series_names) 153 154 if self.model == "Theta": 155 try: 156 self.obj = self.obj(self.df_, **kwargs).fit() 157 except Exception as e: 158 self.obj = self.obj(self.df_.values, **kwargs).fit() 159 self.residuals_ = None 160 else: 161 self.obj = self.obj(X, **kwargs).fit() 162 try: 163 self.residuals_ = self.obj.resid 164 except Exception as e: # Theta 165 self.residuals_ = None 166 167 return self 168 169 def predict(self, h=5, level=95, **kwargs): 170 """Forecast all the time series, h steps ahead 171 172 Parameters: 173 174 h: {integer} 175 Forecasting horizon 176 177 **kwargs: additional parameters to be passed to 178 self.cook_test_set 179 180 Returns: 181 182 model predictions for horizon = h: {array-like} 183 184 """ 185 186 self.output_dates_, frequency = ts.compute_output_dates(self.df_, h) 187 self.level_ = level 188 self.lower_ = None # do not remove (/!\) 189 self.upper_ = None # do not remove (/!\) 190 self.sims_ = None # do not remove (/!\) 191 self.level_ = level 192 self.alpha_ = 100 - level 193 194 pi_multiplier = norm.ppf(1 - self.alpha_ / 200) 195 196 # Named tuple for forecast results 197 DescribeResult = namedtuple( 198 "DescribeResult", ("mean", "lower", "upper") 199 ) 200 201 if ( 202 self.obj is not None 203 ): # try all the special cases of the else section (there's probably a better way) 204 try: 205 ( 206 mean_forecast, 207 lower_bound, 208 upper_bound, 209 ) = self.obj.forecast_interval( 210 self.obj.endog, steps=h, alpha=self.alpha_ / 100, **kwargs 211 ) 212 213 except Exception as e: 214 try: 215 forecast_result = self.obj.predict(steps=h) 216 mean_forecast = forecast_result 217 ( 218 lower_bound, 219 upper_bound, 220 ) = self._compute_confidence_intervals( 221 forecast_result, alpha=self.alpha_ / 100, **kwargs 222 ) 223 224 except Exception as e: 225 try: 226 forecast_result = self.obj.get_forecast(steps=h) 227 mean_forecast = forecast_result.predicted_mean 228 lower_bound = forecast_result.conf_int()[:, 0] 229 upper_bound = forecast_result.conf_int()[:, 1] 230 231 except Exception as e: 232 try: 233 forecast_result = self.obj.forecast(steps=h) 234 residuals = self.obj.resid 235 std_errors = np.std(residuals) 236 mean_forecast = forecast_result 237 lower_bound = ( 238 forecast_result - pi_multiplier * std_errors 239 ) 240 upper_bound = ( 241 forecast_result + pi_multiplier * std_errors 242 ) 243 244 except Exception as e: 245 try: 246 mean_forecast = self.obj.forecast( 247 steps=h 248 ).values 249 forecast_result = self.obj.prediction_intervals( 250 steps=h, alpha=self.alpha_ / 100, **kwargs 251 ) 252 lower_bound = forecast_result["lower"].values 253 upper_bound = forecast_result["upper"].values 254 except Exception: 255 mean_forecast = self.obj.forecast(steps=h) 256 forecast_result = self.obj.prediction_intervals( 257 steps=h, alpha=self.alpha_ / 100, **kwargs 258 ) 259 lower_bound = forecast_result["lower"] 260 upper_bound = forecast_result["upper"] 261 262 else: 263 if self.model == "VAR": 264 ( 265 mean_forecast, 266 lower_bound, 267 upper_bound, 268 ) = self.obj.forecast_interval( 269 self.obj.endog, steps=h, alpha=self.alpha_ / 100, **kwargs 270 ) 271 272 elif self.model == "VECM": 273 forecast_result = self.obj.predict(steps=h) 274 mean_forecast = forecast_result 275 lower_bound, upper_bound = self._compute_confidence_intervals( 276 forecast_result, alpha=self.alpha_ / 100, **kwargs 277 ) 278 279 elif self.model == "ARIMA": 280 forecast_result = self.obj.get_forecast(steps=h) 281 mean_forecast = forecast_result.predicted_mean 282 lower_bound = forecast_result.conf_int()[:, 0] 283 upper_bound = forecast_result.conf_int()[:, 1] 284 285 elif self.model == "ETS": 286 forecast_result = self.obj.forecast(steps=h) 287 residuals = self.obj.resid 288 std_errors = np.std(residuals) 289 mean_forecast = forecast_result 290 lower_bound = forecast_result - pi_multiplier * std_errors 291 upper_bound = forecast_result + pi_multiplier * std_errors 292 293 elif self.model == "Theta": 294 try: 295 mean_forecast = self.obj.forecast(steps=h).values 296 forecast_result = self.obj.prediction_intervals( 297 steps=h, alpha=self.alpha_ / 100, **kwargs 298 ) 299 lower_bound = forecast_result["lower"].values 300 upper_bound = forecast_result["upper"].values 301 except Exception: 302 mean_forecast = self.obj.forecast(steps=h) 303 forecast_result = self.obj.prediction_intervals( 304 steps=h, alpha=self.alpha_ / 100, **kwargs 305 ) 306 lower_bound = forecast_result["lower"] 307 upper_bound = forecast_result["upper"] 308 309 else: 310 raise ValueError("model not recognized") 311 312 try: 313 self.mean_ = pd.DataFrame( 314 mean_forecast, 315 columns=self.series_names, 316 index=self.output_dates_, 317 ) 318 self.lower_ = pd.DataFrame( 319 lower_bound, columns=self.series_names, index=self.output_dates_ 320 ) 321 self.upper_ = pd.DataFrame( 322 upper_bound, columns=self.series_names, index=self.output_dates_ 323 ) 324 except Exception: 325 self.mean_ = pd.Series( 326 mean_forecast, name=self.series_names, index=self.output_dates_ 327 ) 328 self.lower_ = pd.Series( 329 lower_bound, name=self.series_names, index=self.output_dates_ 330 ) 331 self.upper_ = pd.Series( 332 upper_bound, name=self.series_names, index=self.output_dates_ 333 ) 334 335 return DescribeResult( 336 mean=self.mean_, lower=self.lower_, upper=self.upper_ 337 ) 338 339 def _compute_confidence_intervals(self, forecast_result, alpha): 340 """ 341 Compute confidence intervals for VECM forecasts. 342 Uses the covariance of residuals to approximate the confidence intervals. 343 """ 344 residuals = self.obj.resid 345 cov_matrix = np.cov(residuals.T) # Covariance matrix of residuals 346 std_errors = np.sqrt(np.diag(cov_matrix)) # Standard errors 347 348 z_value = norm.ppf(1 - alpha / 2) # Z-score for the given alpha level 349 lower_bound = forecast_result - z_value * std_errors 350 upper_bound = forecast_result + z_value * std_errors 351 352 return lower_bound, upper_bound 353 354 def score(self, X, training_index, testing_index, scoring=None, **kwargs): 355 """Train on training_index, score on testing_index.""" 356 357 assert ( 358 bool(set(training_index).intersection(set(testing_index))) == False 359 ), "Non-overlapping 'training_index' and 'testing_index' required" 360 361 # Dimensions 362 try: 363 # multivariate time series 364 n, p = X.shape 365 except: 366 # univariate time series 367 n = X.shape[0] 368 p = 1 369 370 # Training and testing sets 371 if p > 1: 372 X_train = X[training_index, :] 373 X_test = X[testing_index, :] 374 else: 375 X_train = X[training_index] 376 X_test = X[testing_index] 377 378 # Horizon 379 h = len(testing_index) 380 assert ( 381 len(training_index) + h 382 ) <= n, "Please check lengths of training and testing windows" 383 384 # Fit and predict 385 self.fit(X_train, **kwargs) 386 preds = self.predict(h=h, **kwargs) 387 388 if scoring is None: 389 scoring = "neg_root_mean_squared_error" 390 391 # check inputs 392 assert scoring in ( 393 "explained_variance", 394 "neg_mean_absolute_error", 395 "neg_mean_squared_error", 396 "neg_root_mean_squared_error", 397 "neg_mean_squared_log_error", 398 "neg_median_absolute_error", 399 "r2", 400 ), "'scoring' should be in ('explained_variance', 'neg_mean_absolute_error', \ 401 'neg_mean_squared_error', 'neg_root_mean_squared_error', 'neg_mean_squared_log_error', \ 402 'neg_median_absolute_error', 'r2')" 403 404 scoring_options = { 405 "explained_variance": skm2.explained_variance_score, 406 "neg_mean_absolute_error": skm2.mean_absolute_error, 407 "neg_mean_squared_error": lambda x, y: np.mean((x - y) ** 2), 408 "neg_root_mean_squared_error": lambda x, y: np.sqrt( 409 np.mean((x - y) ** 2) 410 ), 411 "neg_mean_squared_log_error": skm2.mean_squared_log_error, 412 "neg_median_absolute_error": skm2.median_absolute_error, 413 "r2": skm2.r2_score, 414 } 415 416 # if p > 1: 417 # return tuple( 418 # [ 419 # scoring_options[scoring]( 420 # X_test[:, i], preds[:, i]#, **kwargs 421 # ) 422 # for i in range(p) 423 # ] 424 # ) 425 # else: 426 return scoring_options[scoring](X_test, preds) 427 428 def plot(self, series=None, type_axis="dates", type_plot="pi"): 429 """Plot time series forecast 430 431 Parameters: 432 433 series: {integer} or {string} 434 series index or name 435 436 """ 437 438 assert all( 439 [ 440 self.mean_ is not None, 441 self.lower_ is not None, 442 self.upper_ is not None, 443 self.output_dates_ is not None, 444 ] 445 ), "model forecasting must be obtained first (with predict)" 446 447 if series is None: 448 assert ( 449 self.n_series == 1 450 ), "please specify series index or name (n_series > 1)" 451 series = 0 452 453 if isinstance(series, str): 454 assert ( 455 series in self.series_names 456 ), f"series {series} doesn't exist in the input dataset" 457 series_idx = self.df_.columns.get_loc(series) 458 else: 459 assert isinstance(series, int) and ( 460 0 <= series < self.n_series 461 ), f"check series index (< {self.n_series})" 462 series_idx = series 463 464 if isinstance(self.df_, pd.DataFrame): 465 y_all = list(self.df_.iloc[:, series_idx]) + list( 466 self.mean_.iloc[:, series_idx] 467 ) 468 y_test = list(self.mean_.iloc[:, series_idx]) 469 else: 470 y_all = list(self.df_.values) + list(self.mean_.values) 471 y_test = list(self.mean_.values) 472 n_points_all = len(y_all) 473 n_points_train = self.df_.shape[0] 474 475 if type_axis == "numeric": 476 x_all = [i for i in range(n_points_all)] 477 x_test = [i for i in range(n_points_train, n_points_all)] 478 479 if type_axis == "dates": # use dates 480 x_all = np.concatenate( 481 (self.input_dates.values, self.output_dates_.values), axis=None 482 ) 483 x_test = self.output_dates_.values 484 485 if type_plot == "pi": 486 fig, ax = plt.subplots() 487 ax.plot(x_all, y_all, "-") 488 ax.plot(x_test, y_test, "-", color="orange") 489 try: 490 ax.fill_between( 491 x_test, 492 self.lower_.iloc[:, series_idx], 493 self.upper_.iloc[:, series_idx], 494 alpha=0.2, 495 color="orange", 496 ) 497 except Exception: 498 ax.fill_between( 499 x_test, 500 self.lower_.values, 501 self.upper_.values, 502 alpha=0.2, 503 color="orange", 504 ) 505 if self.replications is None: 506 if self.n_series > 1: 507 plt.title( 508 f"prediction intervals for {series}", 509 loc="left", 510 fontsize=12, 511 fontweight=0, 512 color="black", 513 ) 514 else: 515 plt.title( 516 f"prediction intervals for input time series", 517 loc="left", 518 fontsize=12, 519 fontweight=0, 520 color="black", 521 ) 522 plt.show() 523 else: # self.replications is not None 524 if self.n_series > 1: 525 plt.title( 526 f"prediction intervals for {self.replications} simulations of {series}", 527 loc="left", 528 fontsize=12, 529 fontweight=0, 530 color="black", 531 ) 532 else: 533 plt.title( 534 f"prediction intervals for {self.replications} simulations of input time series", 535 loc="left", 536 fontsize=12, 537 fontweight=0, 538 color="black", 539 ) 540 plt.show() 541 542 if type_plot == "spaghetti": 543 palette = plt.get_cmap("Set1") 544 sims_ix = getsims(self.sims_, series_idx) 545 plt.plot(x_all, y_all, "-") 546 for col_ix in range( 547 sims_ix.shape[1] 548 ): # avoid this when there are thousands of simulations 549 plt.plot( 550 x_test, 551 sims_ix[:, col_ix], 552 "-", 553 color=palette(col_ix), 554 linewidth=1, 555 alpha=0.9, 556 ) 557 plt.plot(x_all, y_all, "-", color="black") 558 plt.plot(x_test, y_test, "-", color="blue") 559 # Add titles 560 if self.n_series > 1: 561 plt.title( 562 f"{self.replications} simulations of {series}", 563 loc="left", 564 fontsize=12, 565 fontweight=0, 566 color="black", 567 ) 568 else: 569 plt.title( 570 f"{self.replications} simulations of input time series", 571 loc="left", 572 fontsize=12, 573 fontweight=0, 574 color="black", 575 ) 576 plt.xlabel("Time") 577 plt.ylabel("Values") 578 # Show the graph 579 plt.show() 580 581 def cross_val_score( 582 self, 583 X, 584 scoring="root_mean_squared_error", 585 n_jobs=None, 586 verbose=0, 587 xreg=None, 588 initial_window=5, 589 horizon=3, 590 fixed_window=False, 591 show_progress=True, 592 level=95, 593 **kwargs, 594 ): 595 """Evaluate a score by time series cross-validation. 596 597 Parameters: 598 599 X: {array-like, sparse matrix} of shape (n_samples, n_features) 600 The data to fit. 601 602 scoring: str or a function 603 A str in ('root_mean_squared_error', 'mean_squared_error', 'mean_error', 604 'mean_absolute_error', 'mean_error', 'mean_percentage_error', 605 'mean_absolute_percentage_error', 'winkler_score', 'coverage') 606 Or a function defined as 'coverage' and 'winkler_score' in `utils.timeseries` 607 608 n_jobs: int, default=None 609 Number of jobs to run in parallel. 610 611 verbose: int, default=0 612 The verbosity level. 613 614 xreg: array-like, optional (default=None) 615 Additional (external) regressors to be passed to `fit` 616 xreg must be in 'increasing' order (most recent observations last) 617 618 initial_window: int 619 initial number of consecutive values in each training set sample 620 621 horizon: int 622 number of consecutive values in test set sample 623 624 fixed_window: boolean 625 if False, all training samples start at index 0, and the training 626 window's size is increasing. 627 if True, the training window's size is fixed, and the window is 628 rolling forward 629 630 show_progress: boolean 631 if True, a progress bar is printed 632 633 **kwargs: dict 634 additional parameters to be passed to `fit` and `predict` 635 636 Returns: 637 638 A tuple: descriptive statistics or errors and raw errors 639 640 """ 641 tscv = TimeSeriesSplit() 642 643 tscv_obj = tscv.split( 644 X, 645 initial_window=initial_window, 646 horizon=horizon, 647 fixed_window=fixed_window, 648 ) 649 650 if isinstance(scoring, str): 651 assert scoring in ( 652 "root_mean_squared_error", 653 "mean_squared_error", 654 "mean_error", 655 "mean_absolute_error", 656 "mean_percentage_error", 657 "mean_absolute_percentage_error", 658 "winkler_score", 659 "coverage", 660 ), "must have scoring in ('root_mean_squared_error', 'mean_squared_error', 'mean_error', 'mean_absolute_error', 'mean_error', 'mean_percentage_error', 'mean_absolute_percentage_error', 'winkler_score', 'coverage')" 661 662 def err_func(X_test, X_pred, scoring): 663 if (self.replications is not None) or ( 664 self.type_pi == "gaussian" 665 ): # probabilistic 666 if scoring == "winkler_score": 667 return winkler_score(X_pred, X_test, level=level) 668 elif scoring == "coverage": 669 return coverage(X_pred, X_test, level=level) 670 else: 671 return mean_errors( 672 pred=X_pred.mean, actual=X_test, scoring=scoring 673 ) 674 else: # not probabilistic 675 return mean_errors( 676 pred=X_pred, actual=X_test, scoring=scoring 677 ) 678 679 else: # isinstance(scoring, str) = False 680 err_func = scoring 681 682 errors = [] 683 684 train_indices = [] 685 686 test_indices = [] 687 688 for train_index, test_index in tscv_obj: 689 train_indices.append(train_index) 690 test_indices.append(test_index) 691 692 if show_progress is True: 693 iterator = tqdm( 694 zip(train_indices, test_indices), total=len(train_indices) 695 ) 696 else: 697 iterator = zip(train_indices, test_indices) 698 699 for train_index, test_index in iterator: 700 if verbose == 1: 701 print(f"TRAIN: {train_index}") 702 print(f"TEST: {test_index}") 703 704 if isinstance(X, pd.DataFrame): 705 self.fit(X.iloc[train_index, :], xreg=xreg, **kwargs) 706 X_test = X.iloc[test_index, :] 707 else: 708 self.fit(X[train_index, :], xreg=xreg, **kwargs) 709 X_test = X[test_index, :] 710 X_pred = self.predict(h=int(len(test_index)), level=level, **kwargs) 711 712 errors.append(err_func(X_test, X_pred, scoring)) 713 714 res = np.asarray(errors) 715 716 return res, describe(res)
Time series with statistical models (statsmodels), mostly for benchmarks
Parameters:
model: type of model: str.
currently, 'VAR', 'VECM', 'ARIMA', 'ETS', 'Theta'
Default is None
obj: object
A time series model from statsmodels
Attributes:
df_: data frame
the input data frame, in case a data.frame is provided to `fit`
level_: int
level of confidence for prediction intervals (default is 95)
Examples: See examples/classical_mts_timeseries.py
98 def fit(self, X, **kwargs): 99 """Fit ClassicalMTS model to training data X, with optional regressors xreg 100 101 Parameters: 102 103 X: {array-like}, shape = [n_samples, n_features] 104 Training time series, where n_samples is the number 105 of samples and n_features is the number of features; 106 X must be in increasing order (most recent observations last) 107 108 **kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity) 109 110 Returns: 111 112 self: object 113 """ 114 115 try: 116 self.n_series = X.shape[1] 117 except Exception: 118 self.n_series = 1 119 120 if (isinstance(X, pd.DataFrame) is False) and isinstance( 121 X, pd.Series 122 ) is False: # input data set is a numpy array 123 X = pd.DataFrame(X) 124 if self.n_series > 1: 125 self.series_names = [ 126 "series" + str(i) for i in range(X.shape[1]) 127 ] 128 else: 129 self.series_names = "series0" 130 131 else: # input data set is a DataFrame or Series with column names 132 X_index = None 133 if X.index is not None and len(X.shape) > 1: 134 X_index = X.index 135 X = copy.deepcopy(mo.convert_df_to_numeric(X)) 136 if X_index is not None: 137 try: 138 X.index = X_index 139 except Exception: 140 pass 141 if isinstance(X, pd.DataFrame): 142 self.series_names = X.columns.tolist() 143 else: 144 self.series_names = X.name 145 146 if isinstance(X, pd.DataFrame) or isinstance(X, pd.Series): 147 self.df_ = X 148 X = X.values 149 self.df_.columns = self.series_names 150 self.input_dates = ts.compute_input_dates(self.df_) 151 else: 152 self.df_ = pd.DataFrame(X, columns=self.series_names) 153 154 if self.model == "Theta": 155 try: 156 self.obj = self.obj(self.df_, **kwargs).fit() 157 except Exception as e: 158 self.obj = self.obj(self.df_.values, **kwargs).fit() 159 self.residuals_ = None 160 else: 161 self.obj = self.obj(X, **kwargs).fit() 162 try: 163 self.residuals_ = self.obj.resid 164 except Exception as e: # Theta 165 self.residuals_ = None 166 167 return self
Fit ClassicalMTS model to training data X, with optional regressors xreg
Parameters:
X: {array-like}, shape = [n_samples, n_features] Training time series, where n_samples is the number of samples and n_features is the number of features; X must be in increasing order (most recent observations last)
**kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity)
Returns:
self: object
169 def predict(self, h=5, level=95, **kwargs): 170 """Forecast all the time series, h steps ahead 171 172 Parameters: 173 174 h: {integer} 175 Forecasting horizon 176 177 **kwargs: additional parameters to be passed to 178 self.cook_test_set 179 180 Returns: 181 182 model predictions for horizon = h: {array-like} 183 184 """ 185 186 self.output_dates_, frequency = ts.compute_output_dates(self.df_, h) 187 self.level_ = level 188 self.lower_ = None # do not remove (/!\) 189 self.upper_ = None # do not remove (/!\) 190 self.sims_ = None # do not remove (/!\) 191 self.level_ = level 192 self.alpha_ = 100 - level 193 194 pi_multiplier = norm.ppf(1 - self.alpha_ / 200) 195 196 # Named tuple for forecast results 197 DescribeResult = namedtuple( 198 "DescribeResult", ("mean", "lower", "upper") 199 ) 200 201 if ( 202 self.obj is not None 203 ): # try all the special cases of the else section (there's probably a better way) 204 try: 205 ( 206 mean_forecast, 207 lower_bound, 208 upper_bound, 209 ) = self.obj.forecast_interval( 210 self.obj.endog, steps=h, alpha=self.alpha_ / 100, **kwargs 211 ) 212 213 except Exception as e: 214 try: 215 forecast_result = self.obj.predict(steps=h) 216 mean_forecast = forecast_result 217 ( 218 lower_bound, 219 upper_bound, 220 ) = self._compute_confidence_intervals( 221 forecast_result, alpha=self.alpha_ / 100, **kwargs 222 ) 223 224 except Exception as e: 225 try: 226 forecast_result = self.obj.get_forecast(steps=h) 227 mean_forecast = forecast_result.predicted_mean 228 lower_bound = forecast_result.conf_int()[:, 0] 229 upper_bound = forecast_result.conf_int()[:, 1] 230 231 except Exception as e: 232 try: 233 forecast_result = self.obj.forecast(steps=h) 234 residuals = self.obj.resid 235 std_errors = np.std(residuals) 236 mean_forecast = forecast_result 237 lower_bound = ( 238 forecast_result - pi_multiplier * std_errors 239 ) 240 upper_bound = ( 241 forecast_result + pi_multiplier * std_errors 242 ) 243 244 except Exception as e: 245 try: 246 mean_forecast = self.obj.forecast( 247 steps=h 248 ).values 249 forecast_result = self.obj.prediction_intervals( 250 steps=h, alpha=self.alpha_ / 100, **kwargs 251 ) 252 lower_bound = forecast_result["lower"].values 253 upper_bound = forecast_result["upper"].values 254 except Exception: 255 mean_forecast = self.obj.forecast(steps=h) 256 forecast_result = self.obj.prediction_intervals( 257 steps=h, alpha=self.alpha_ / 100, **kwargs 258 ) 259 lower_bound = forecast_result["lower"] 260 upper_bound = forecast_result["upper"] 261 262 else: 263 if self.model == "VAR": 264 ( 265 mean_forecast, 266 lower_bound, 267 upper_bound, 268 ) = self.obj.forecast_interval( 269 self.obj.endog, steps=h, alpha=self.alpha_ / 100, **kwargs 270 ) 271 272 elif self.model == "VECM": 273 forecast_result = self.obj.predict(steps=h) 274 mean_forecast = forecast_result 275 lower_bound, upper_bound = self._compute_confidence_intervals( 276 forecast_result, alpha=self.alpha_ / 100, **kwargs 277 ) 278 279 elif self.model == "ARIMA": 280 forecast_result = self.obj.get_forecast(steps=h) 281 mean_forecast = forecast_result.predicted_mean 282 lower_bound = forecast_result.conf_int()[:, 0] 283 upper_bound = forecast_result.conf_int()[:, 1] 284 285 elif self.model == "ETS": 286 forecast_result = self.obj.forecast(steps=h) 287 residuals = self.obj.resid 288 std_errors = np.std(residuals) 289 mean_forecast = forecast_result 290 lower_bound = forecast_result - pi_multiplier * std_errors 291 upper_bound = forecast_result + pi_multiplier * std_errors 292 293 elif self.model == "Theta": 294 try: 295 mean_forecast = self.obj.forecast(steps=h).values 296 forecast_result = self.obj.prediction_intervals( 297 steps=h, alpha=self.alpha_ / 100, **kwargs 298 ) 299 lower_bound = forecast_result["lower"].values 300 upper_bound = forecast_result["upper"].values 301 except Exception: 302 mean_forecast = self.obj.forecast(steps=h) 303 forecast_result = self.obj.prediction_intervals( 304 steps=h, alpha=self.alpha_ / 100, **kwargs 305 ) 306 lower_bound = forecast_result["lower"] 307 upper_bound = forecast_result["upper"] 308 309 else: 310 raise ValueError("model not recognized") 311 312 try: 313 self.mean_ = pd.DataFrame( 314 mean_forecast, 315 columns=self.series_names, 316 index=self.output_dates_, 317 ) 318 self.lower_ = pd.DataFrame( 319 lower_bound, columns=self.series_names, index=self.output_dates_ 320 ) 321 self.upper_ = pd.DataFrame( 322 upper_bound, columns=self.series_names, index=self.output_dates_ 323 ) 324 except Exception: 325 self.mean_ = pd.Series( 326 mean_forecast, name=self.series_names, index=self.output_dates_ 327 ) 328 self.lower_ = pd.Series( 329 lower_bound, name=self.series_names, index=self.output_dates_ 330 ) 331 self.upper_ = pd.Series( 332 upper_bound, name=self.series_names, index=self.output_dates_ 333 ) 334 335 return DescribeResult( 336 mean=self.mean_, lower=self.lower_, upper=self.upper_ 337 )
Forecast all the time series, h steps ahead
Parameters:
h: {integer} Forecasting horizon
**kwargs: additional parameters to be passed to self.cook_test_set
Returns:
model predictions for horizon = h: {array-like}
354 def score(self, X, training_index, testing_index, scoring=None, **kwargs): 355 """Train on training_index, score on testing_index.""" 356 357 assert ( 358 bool(set(training_index).intersection(set(testing_index))) == False 359 ), "Non-overlapping 'training_index' and 'testing_index' required" 360 361 # Dimensions 362 try: 363 # multivariate time series 364 n, p = X.shape 365 except: 366 # univariate time series 367 n = X.shape[0] 368 p = 1 369 370 # Training and testing sets 371 if p > 1: 372 X_train = X[training_index, :] 373 X_test = X[testing_index, :] 374 else: 375 X_train = X[training_index] 376 X_test = X[testing_index] 377 378 # Horizon 379 h = len(testing_index) 380 assert ( 381 len(training_index) + h 382 ) <= n, "Please check lengths of training and testing windows" 383 384 # Fit and predict 385 self.fit(X_train, **kwargs) 386 preds = self.predict(h=h, **kwargs) 387 388 if scoring is None: 389 scoring = "neg_root_mean_squared_error" 390 391 # check inputs 392 assert scoring in ( 393 "explained_variance", 394 "neg_mean_absolute_error", 395 "neg_mean_squared_error", 396 "neg_root_mean_squared_error", 397 "neg_mean_squared_log_error", 398 "neg_median_absolute_error", 399 "r2", 400 ), "'scoring' should be in ('explained_variance', 'neg_mean_absolute_error', \ 401 'neg_mean_squared_error', 'neg_root_mean_squared_error', 'neg_mean_squared_log_error', \ 402 'neg_median_absolute_error', 'r2')" 403 404 scoring_options = { 405 "explained_variance": skm2.explained_variance_score, 406 "neg_mean_absolute_error": skm2.mean_absolute_error, 407 "neg_mean_squared_error": lambda x, y: np.mean((x - y) ** 2), 408 "neg_root_mean_squared_error": lambda x, y: np.sqrt( 409 np.mean((x - y) ** 2) 410 ), 411 "neg_mean_squared_log_error": skm2.mean_squared_log_error, 412 "neg_median_absolute_error": skm2.median_absolute_error, 413 "r2": skm2.r2_score, 414 } 415 416 # if p > 1: 417 # return tuple( 418 # [ 419 # scoring_options[scoring]( 420 # X_test[:, i], preds[:, i]#, **kwargs 421 # ) 422 # for i in range(p) 423 # ] 424 # ) 425 # else: 426 return scoring_options[scoring](X_test, preds)
Train on training_index, score on testing_index.
16class CustomClassifier(Custom, ClassifierMixin): 17 """Custom Classification model 18 19 Attributes: 20 21 obj: object 22 any object containing a method fit (obj.fit()) and a method predict 23 (obj.predict()) 24 25 n_hidden_features: int 26 number of nodes in the hidden layer 27 28 activation_name: str 29 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 30 31 a: float 32 hyperparameter for 'prelu' or 'elu' activation function 33 34 nodes_sim: str 35 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 36 'uniform' 37 38 bias: boolean 39 indicates if the hidden layer contains a bias term (True) or not 40 (False) 41 42 dropout: float 43 regularization parameter; (random) percentage of nodes dropped out 44 of the training 45 46 direct_link: boolean 47 indicates if the original predictors are included (True) in model''s 48 fitting or not (False) 49 50 n_clusters: int 51 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: 52 no clustering) 53 54 cluster_encode: bool 55 defines how the variable containing clusters is treated (default is one-hot) 56 if `False`, then labels are used, without one-hot encoding 57 58 type_clust: str 59 type of clustering method: currently k-means ('kmeans') or Gaussian 60 Mixture Model ('gmm') 61 62 type_scaling: a tuple of 3 strings 63 scaling methods for inputs, hidden layer, and clustering respectively 64 (and when relevant). 65 Currently available: standardization ('std') or MinMax scaling ('minmax') 66 67 col_sample: float 68 percentage of covariates randomly chosen for training 69 70 row_sample: float 71 percentage of rows chosen for training, by stratified bootstrapping 72 73 cv_calibration: int, cross-validation generator, or iterable, default=2 74 Determines the cross-validation splitting strategy. Same as 75 `sklearn.calibration.CalibratedClassifierCV` 76 77 calibration_method: str 78 {‘sigmoid’, ‘isotonic’}, default=’sigmoid’ 79 The method to use for calibration. Same as 80 `sklearn.calibration.CalibratedClassifierCV` 81 82 seed: int 83 reproducibility seed for nodes_sim=='uniform' 84 85 backend: str 86 "cpu" or "gpu" or "tpu" 87 88 Examples: 89 90 Note: it's better to use the `DeepClassifier` or `LazyDeepClassifier` classes directly 91 92 ```python 93 import nnetsauce as ns 94 from sklearn.ensemble import RandomForestClassifier 95 from sklearn.model_selection import train_test_split 96 from sklearn.datasets import load_digits 97 from time import time 98 99 digits = load_digits() 100 X = digits.data 101 y = digits.target 102 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, 103 random_state=123) 104 105 # layer 1 (base layer) ---- 106 layer1_regr = RandomForestClassifier(n_estimators=10, random_state=123) 107 108 start = time() 109 110 layer1_regr.fit(X_train, y_train) 111 112 # Accuracy in layer 1 113 print(layer1_regr.score(X_test, y_test)) 114 115 # layer 2 using layer 1 ---- 116 layer2_regr = ns.CustomClassifier(obj = layer1_regr, n_hidden_features=5, 117 direct_link=True, bias=True, 118 nodes_sim='uniform', activation_name='relu', 119 n_clusters=2, seed=123) 120 layer2_regr.fit(X_train, y_train) 121 122 # Accuracy in layer 2 123 print(layer2_regr.score(X_test, y_test)) 124 125 # layer 3 using layer 2 ---- 126 layer3_regr = ns.CustomClassifier(obj = layer2_regr, n_hidden_features=10, 127 direct_link=True, bias=True, dropout=0.7, 128 nodes_sim='uniform', activation_name='relu', 129 n_clusters=2, seed=123) 130 layer3_regr.fit(X_train, y_train) 131 132 # Accuracy in layer 3 133 print(layer3_regr.score(X_test, y_test)) 134 135 print(f"Elapsed {time() - start}") 136 ``` 137 138 """ 139 140 # construct the object ----- 141 _estimator_type = "classifier" 142 143 def __init__( 144 self, 145 obj, 146 n_hidden_features=5, 147 activation_name="relu", 148 a=0.01, 149 nodes_sim="sobol", 150 bias=True, 151 dropout=0, 152 direct_link=True, 153 n_clusters=2, 154 cluster_encode=True, 155 type_clust="kmeans", 156 type_scaling=("std", "std", "std"), 157 col_sample=1, 158 row_sample=1, 159 cv_calibration=2, 160 calibration_method="sigmoid", 161 seed=123, 162 backend="cpu", 163 ): 164 super().__init__( 165 obj=obj, 166 n_hidden_features=n_hidden_features, 167 activation_name=activation_name, 168 a=a, 169 nodes_sim=nodes_sim, 170 bias=bias, 171 dropout=dropout, 172 direct_link=direct_link, 173 n_clusters=n_clusters, 174 cluster_encode=cluster_encode, 175 type_clust=type_clust, 176 type_scaling=type_scaling, 177 col_sample=col_sample, 178 row_sample=row_sample, 179 seed=seed, 180 backend=backend, 181 ) 182 self.coef_ = None 183 self.intercept_ = None 184 self.type_fit = "classification" 185 self.cv_calibration = cv_calibration 186 self.calibration_method = calibration_method 187 188 def __sklearn_clone__(self): 189 """Create a clone of the estimator. 190 191 This is required for scikit-learn's calibration system to work properly. 192 """ 193 # Create a new instance with the same parameters 194 clone = CustomClassifier( 195 obj=self.obj, 196 n_hidden_features=self.n_hidden_features, 197 activation_name=self.activation_name, 198 a=self.a, 199 nodes_sim=self.nodes_sim, 200 bias=self.bias, 201 dropout=self.dropout, 202 direct_link=self.direct_link, 203 n_clusters=self.n_clusters, 204 cluster_encode=self.cluster_encode, 205 type_clust=self.type_clust, 206 type_scaling=self.type_scaling, 207 col_sample=self.col_sample, 208 row_sample=self.row_sample, 209 cv_calibration=self.cv_calibration, 210 calibration_method=self.calibration_method, 211 seed=self.seed, 212 backend=self.backend, 213 ) 214 return clone 215 216 def fit(self, X, y, sample_weight=None, **kwargs): 217 """Fit custom model to training data (X, y). 218 219 Parameters: 220 221 X: {array-like}, shape = [n_samples, n_features] 222 Training vectors, where n_samples is the number 223 of samples and n_features is the number of features. 224 225 y: array-like, shape = [n_samples] 226 Target values. 227 228 sample_weight: array-like, shape = [n_samples] 229 Sample weights. 230 231 **kwargs: additional parameters to be passed to 232 self.cook_training_set or self.obj.fit 233 234 Returns: 235 236 self: object 237 """ 238 239 if len(X.shape) == 1: 240 if isinstance(X, pd.DataFrame): 241 X = pd.DataFrame(X.values.reshape(1, -1), columns=X.columns) 242 else: 243 X = X.reshape(1, -1) 244 245 output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 246 self.classes_ = np.unique(y) 247 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 248 249 # Wrap in CalibratedClassifierCV if needed 250 if self.cv_calibration is not None: 251 self.obj = CalibratedClassifierCV( 252 self.obj, cv=self.cv_calibration, method=self.calibration_method 253 ) 254 255 # if sample_weights, else: (must use self.row_index) 256 if sample_weight is not None: 257 self.obj.fit( 258 scaled_Z, 259 output_y, 260 sample_weight=sample_weight[self.index_row_].ravel(), 261 **kwargs 262 ) 263 return self 264 265 # if sample_weight is None: 266 self.obj.fit(scaled_Z, output_y, **kwargs) 267 self.classes_ = np.unique(y) # for compatibility with sklearn 268 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 269 270 if hasattr(self.obj, "coef_"): 271 self.coef_ = self.obj.coef_ 272 273 if hasattr(self.obj, "intercept_"): 274 self.intercept_ = self.obj.intercept_ 275 276 return self 277 278 def partial_fit(self, X, y, sample_weight=None, **kwargs): 279 """Partial fit custom model to training data (X, y). 280 281 Parameters: 282 283 X: {array-like}, shape = [n_samples, n_features] 284 Subset of training vectors, where n_samples is the number 285 of samples and n_features is the number of features. 286 287 y: array-like, shape = [n_samples] 288 Subset of target values. 289 290 sample_weight: array-like, shape = [n_samples] 291 Sample weights. 292 293 **kwargs: additional parameters to be passed to 294 self.cook_training_set or self.obj.fit 295 296 Returns: 297 298 self: object 299 """ 300 301 if len(X.shape) == 1: 302 if isinstance(X, pd.DataFrame): 303 X = pd.DataFrame(X.values.reshape(1, -1), columns=X.columns) 304 else: 305 X = X.reshape(1, -1) 306 y = np.array([y], dtype=int) 307 308 output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 309 self.n_classes_ = len(np.unique(y)) # for compatibility with sklearn 310 311 # if sample_weights, else: (must use self.row_index) 312 if sample_weight is not None: 313 try: 314 self.obj.partial_fit( 315 scaled_Z, 316 output_y, 317 sample_weight=sample_weight[self.index_row_].ravel(), 318 # **kwargs 319 ) 320 except: 321 NotImplementedError 322 323 return self 324 325 # if sample_weight is None: 326 # try: 327 self.obj.partial_fit(scaled_Z, output_y) 328 # except: 329 # raise NotImplementedError 330 331 self.classes_ = np.unique(y) # for compatibility with sklearn 332 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 333 334 return self 335 336 def predict(self, X, **kwargs): 337 """Predict test data X. 338 339 Parameters: 340 341 X: {array-like}, shape = [n_samples, n_features] 342 Training vectors, where n_samples is the number 343 of samples and n_features is the number of features. 344 345 **kwargs: additional parameters to be passed to 346 self.cook_test_set 347 348 Returns: 349 350 model predictions: {array-like} 351 """ 352 353 if len(X.shape) == 1: 354 n_features = X.shape[0] 355 new_X = mo.rbind( 356 X.reshape(1, n_features), 357 np.ones(n_features).reshape(1, n_features), 358 ) 359 360 return ( 361 self.obj.predict(self.cook_test_set(new_X, **kwargs), **kwargs) 362 )[0] 363 364 return self.obj.predict(self.cook_test_set(X, **kwargs), **kwargs) 365 366 def predict_proba(self, X, **kwargs): 367 """Predict probabilities for test data X. 368 369 Args: 370 371 X: {array-like}, shape = [n_samples, n_features] 372 Training vectors, where n_samples is the number 373 of samples and n_features is the number of features. 374 375 **kwargs: additional parameters to be passed to 376 self.cook_test_set 377 378 Returns: 379 380 probability estimates for test data: {array-like} 381 """ 382 383 if len(X.shape) == 1: 384 n_features = X.shape[0] 385 new_X = mo.rbind( 386 X.reshape(1, n_features), 387 np.ones(n_features).reshape(1, n_features), 388 ) 389 return ( 390 self.obj.predict_proba( 391 self.cook_test_set(new_X, **kwargs), **kwargs 392 ) 393 )[0] 394 return self.obj.predict_proba(self.cook_test_set(X, **kwargs), **kwargs) 395 396 def decision_function(self, X, **kwargs): 397 """Compute the decision function of X. 398 399 Parameters: 400 X: {array-like}, shape = [n_samples, n_features] 401 Samples to compute decision function for. 402 403 **kwargs: additional parameters to be passed to 404 self.cook_test_set 405 406 Returns: 407 array-like of shape (n_samples,) or (n_samples, n_classes) 408 Decision function of the input samples. The order of outputs is the same 409 as that of the classes passed to fit. 410 """ 411 if not hasattr(self.obj, "decision_function"): 412 # If base classifier doesn't have decision_function, use predict_proba 413 proba = self.predict_proba(X, **kwargs) 414 if proba.shape[1] == 2: 415 return proba[:, 1] # For binary classification 416 return proba # For multiclass 417 418 if len(X.shape) == 1: 419 n_features = X.shape[0] 420 new_X = mo.rbind( 421 X.reshape(1, n_features), 422 np.ones(n_features).reshape(1, n_features), 423 ) 424 425 return ( 426 self.obj.decision_function( 427 self.cook_test_set(new_X, **kwargs), **kwargs 428 ) 429 )[0] 430 431 return self.obj.decision_function( 432 self.cook_test_set(X, **kwargs), **kwargs 433 ) 434 435 def score(self, X, y, scoring=None): 436 """Scoring function for classification. 437 438 Args: 439 440 X: {array-like}, shape = [n_samples, n_features] 441 Training vectors, where n_samples is the number 442 of samples and n_features is the number of features. 443 444 y: array-like, shape = [n_samples] 445 Target values. 446 447 scoring: str 448 scoring method (default is accuracy) 449 450 Returns: 451 452 score: float 453 """ 454 455 if scoring is None: 456 scoring = "accuracy" 457 458 if scoring == "accuracy": 459 return skm2.accuracy_score(y, self.predict(X)) 460 461 if scoring == "f1": 462 return skm2.f1_score(y, self.predict(X)) 463 464 if scoring == "precision": 465 return skm2.precision_score(y, self.predict(X)) 466 467 if scoring == "recall": 468 return skm2.recall_score(y, self.predict(X)) 469 470 if scoring == "roc_auc": 471 return skm2.roc_auc_score(y, self.predict(X)) 472 473 if scoring == "log_loss": 474 return skm2.log_loss(y, self.predict_proba(X)) 475 476 if scoring == "balanced_accuracy": 477 return skm2.balanced_accuracy_score(y, self.predict(X)) 478 479 if scoring == "average_precision": 480 return skm2.average_precision_score(y, self.predict(X)) 481 482 if scoring == "neg_brier_score": 483 return -skm2.brier_score_loss(y, self.predict_proba(X)) 484 485 if scoring == "neg_log_loss": 486 return -skm2.log_loss(y, self.predict_proba(X)) 487 488 @property 489 def _estimator_type(self): 490 return "classifier"
Custom Classification model
Attributes:
obj: object
any object containing a method fit (obj.fit()) and a method predict
(obj.predict())
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not
(False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original predictors are included (True) in model''s
fitting or not (False)
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
col_sample: float
percentage of covariates randomly chosen for training
row_sample: float
percentage of rows chosen for training, by stratified bootstrapping
cv_calibration: int, cross-validation generator, or iterable, default=2
Determines the cross-validation splitting strategy. Same as
`sklearn.calibration.CalibratedClassifierCV`
calibration_method: str
{‘sigmoid’, ‘isotonic’}, default=’sigmoid’
The method to use for calibration. Same as
`sklearn.calibration.CalibratedClassifierCV`
seed: int
reproducibility seed for nodes_sim=='uniform'
backend: str
"cpu" or "gpu" or "tpu"
Examples:
Note: it's better to use the DeepClassifier or LazyDeepClassifier classes directly
import nnetsauce as ns
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_digits
from time import time
digits = load_digits()
X = digits.data
y = digits.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=123)
# layer 1 (base layer) ----
layer1_regr = RandomForestClassifier(n_estimators=10, random_state=123)
start = time()
layer1_regr.fit(X_train, y_train)
# Accuracy in layer 1
print(layer1_regr.score(X_test, y_test))
# layer 2 using layer 1 ----
layer2_regr = ns.CustomClassifier(obj = layer1_regr, n_hidden_features=5,
direct_link=True, bias=True,
nodes_sim='uniform', activation_name='relu',
n_clusters=2, seed=123)
layer2_regr.fit(X_train, y_train)
# Accuracy in layer 2
print(layer2_regr.score(X_test, y_test))
# layer 3 using layer 2 ----
layer3_regr = ns.CustomClassifier(obj = layer2_regr, n_hidden_features=10,
direct_link=True, bias=True, dropout=0.7,
nodes_sim='uniform', activation_name='relu',
n_clusters=2, seed=123)
layer3_regr.fit(X_train, y_train)
# Accuracy in layer 3
print(layer3_regr.score(X_test, y_test))
print(f"Elapsed {time() - start}")
216 def fit(self, X, y, sample_weight=None, **kwargs): 217 """Fit custom model to training data (X, y). 218 219 Parameters: 220 221 X: {array-like}, shape = [n_samples, n_features] 222 Training vectors, where n_samples is the number 223 of samples and n_features is the number of features. 224 225 y: array-like, shape = [n_samples] 226 Target values. 227 228 sample_weight: array-like, shape = [n_samples] 229 Sample weights. 230 231 **kwargs: additional parameters to be passed to 232 self.cook_training_set or self.obj.fit 233 234 Returns: 235 236 self: object 237 """ 238 239 if len(X.shape) == 1: 240 if isinstance(X, pd.DataFrame): 241 X = pd.DataFrame(X.values.reshape(1, -1), columns=X.columns) 242 else: 243 X = X.reshape(1, -1) 244 245 output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 246 self.classes_ = np.unique(y) 247 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 248 249 # Wrap in CalibratedClassifierCV if needed 250 if self.cv_calibration is not None: 251 self.obj = CalibratedClassifierCV( 252 self.obj, cv=self.cv_calibration, method=self.calibration_method 253 ) 254 255 # if sample_weights, else: (must use self.row_index) 256 if sample_weight is not None: 257 self.obj.fit( 258 scaled_Z, 259 output_y, 260 sample_weight=sample_weight[self.index_row_].ravel(), 261 **kwargs 262 ) 263 return self 264 265 # if sample_weight is None: 266 self.obj.fit(scaled_Z, output_y, **kwargs) 267 self.classes_ = np.unique(y) # for compatibility with sklearn 268 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 269 270 if hasattr(self.obj, "coef_"): 271 self.coef_ = self.obj.coef_ 272 273 if hasattr(self.obj, "intercept_"): 274 self.intercept_ = self.obj.intercept_ 275 276 return self
Fit custom model to training data (X, y).
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
sample_weight: array-like, shape = [n_samples]
Sample weights.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
336 def predict(self, X, **kwargs): 337 """Predict test data X. 338 339 Parameters: 340 341 X: {array-like}, shape = [n_samples, n_features] 342 Training vectors, where n_samples is the number 343 of samples and n_features is the number of features. 344 345 **kwargs: additional parameters to be passed to 346 self.cook_test_set 347 348 Returns: 349 350 model predictions: {array-like} 351 """ 352 353 if len(X.shape) == 1: 354 n_features = X.shape[0] 355 new_X = mo.rbind( 356 X.reshape(1, n_features), 357 np.ones(n_features).reshape(1, n_features), 358 ) 359 360 return ( 361 self.obj.predict(self.cook_test_set(new_X, **kwargs), **kwargs) 362 )[0] 363 364 return self.obj.predict(self.cook_test_set(X, **kwargs), **kwargs)
Predict test data X.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
366 def predict_proba(self, X, **kwargs): 367 """Predict probabilities for test data X. 368 369 Args: 370 371 X: {array-like}, shape = [n_samples, n_features] 372 Training vectors, where n_samples is the number 373 of samples and n_features is the number of features. 374 375 **kwargs: additional parameters to be passed to 376 self.cook_test_set 377 378 Returns: 379 380 probability estimates for test data: {array-like} 381 """ 382 383 if len(X.shape) == 1: 384 n_features = X.shape[0] 385 new_X = mo.rbind( 386 X.reshape(1, n_features), 387 np.ones(n_features).reshape(1, n_features), 388 ) 389 return ( 390 self.obj.predict_proba( 391 self.cook_test_set(new_X, **kwargs), **kwargs 392 ) 393 )[0] 394 return self.obj.predict_proba(self.cook_test_set(X, **kwargs), **kwargs)
Predict probabilities for test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
probability estimates for test data: {array-like}
435 def score(self, X, y, scoring=None): 436 """Scoring function for classification. 437 438 Args: 439 440 X: {array-like}, shape = [n_samples, n_features] 441 Training vectors, where n_samples is the number 442 of samples and n_features is the number of features. 443 444 y: array-like, shape = [n_samples] 445 Target values. 446 447 scoring: str 448 scoring method (default is accuracy) 449 450 Returns: 451 452 score: float 453 """ 454 455 if scoring is None: 456 scoring = "accuracy" 457 458 if scoring == "accuracy": 459 return skm2.accuracy_score(y, self.predict(X)) 460 461 if scoring == "f1": 462 return skm2.f1_score(y, self.predict(X)) 463 464 if scoring == "precision": 465 return skm2.precision_score(y, self.predict(X)) 466 467 if scoring == "recall": 468 return skm2.recall_score(y, self.predict(X)) 469 470 if scoring == "roc_auc": 471 return skm2.roc_auc_score(y, self.predict(X)) 472 473 if scoring == "log_loss": 474 return skm2.log_loss(y, self.predict_proba(X)) 475 476 if scoring == "balanced_accuracy": 477 return skm2.balanced_accuracy_score(y, self.predict(X)) 478 479 if scoring == "average_precision": 480 return skm2.average_precision_score(y, self.predict(X)) 481 482 if scoring == "neg_brier_score": 483 return -skm2.brier_score_loss(y, self.predict_proba(X)) 484 485 if scoring == "neg_log_loss": 486 return -skm2.log_loss(y, self.predict_proba(X))
Scoring function for classification.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
scoring: str
scoring method (default is accuracy)
Returns:
score: float
18class CustomRegressor(Custom, RegressorMixin): 19 """Custom Regression model 20 21 This class is used to 'augment' any regression model with transformed features. 22 23 Parameters: 24 25 obj: object 26 any object containing a method fit (obj.fit()) and a method predict 27 (obj.predict()) 28 29 n_hidden_features: int 30 number of nodes in the hidden layer 31 32 activation_name: str 33 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 34 35 a: float 36 hyperparameter for 'prelu' or 'elu' activation function 37 38 nodes_sim: str 39 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 40 'uniform' 41 42 bias: boolean 43 indicates if the hidden layer contains a bias term (True) or not 44 (False) 45 46 dropout: float 47 regularization parameter; (random) percentage of nodes dropped out 48 of the training 49 50 direct_link: boolean 51 indicates if the original predictors are included (True) in model's 52 fitting or not (False) 53 54 n_clusters: int 55 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: 56 no clustering) 57 58 cluster_encode: bool 59 defines how the variable containing clusters is treated (default is one-hot) 60 if `False`, then labels are used, without one-hot encoding 61 62 type_clust: str 63 type of clustering method: currently k-means ('kmeans') or Gaussian 64 Mixture Model ('gmm') 65 66 type_scaling: a tuple of 3 strings 67 scaling methods for inputs, hidden layer, and clustering respectively 68 (and when relevant). 69 Currently available: standardization ('std') or MinMax scaling ('minmax') 70 71 type_pi: str. 72 type of prediction interval; currently `None` (split or local 73 conformal without simulation), "kde" or "bootstrap" (simulated split 74 conformal). 75 76 replications: int. 77 number of replications (if needed) for predictive simulation. 78 Used only in `self.predict`, for `self.kernel` in ('gaussian', 79 'tophat') and `self.type_pi = 'kde'`. Default is `None`. 80 81 kernel: str. 82 the kernel to use for kernel density estimation (used for predictive 83 simulation in `self.predict`, with `method='splitconformal'` and 84 `type_pi = 'kde'`). Currently, either 'gaussian' or 'tophat'. 85 86 type_split: str. 87 Type of splitting for conformal prediction. None (default), or 88 "random" (random split of data) or "sequential" (sequential split of data) 89 90 col_sample: float 91 percentage of covariates randomly chosen for training 92 93 row_sample: float 94 percentage of rows chosen for training, by stratified bootstrapping 95 96 level: float 97 confidence level for prediction intervals 98 99 pi_method: str 100 method for prediction intervals: 'splitconformal' or 'localconformal' 101 102 seed: int 103 reproducibility seed for nodes_sim=='uniform' 104 105 type_fit: str 106 'regression' 107 108 backend: str 109 "cpu" or "gpu" or "tpu" 110 111 Examples: 112 113 See [https://thierrymoudiki.github.io/blog/2024/03/18/python/conformal-and-bayesian-regression](https://thierrymoudiki.github.io/blog/2024/03/18/python/conformal-and-bayesian-regression) 114 115 """ 116 117 # construct the object ----- 118 119 def __init__( 120 self, 121 obj, 122 n_hidden_features=5, 123 activation_name="relu", 124 a=0.01, 125 nodes_sim="sobol", 126 bias=True, 127 dropout=0, 128 direct_link=True, 129 n_clusters=2, 130 cluster_encode=True, 131 type_clust="kmeans", 132 type_scaling=("std", "std", "std"), 133 type_pi=None, 134 replications=None, 135 kernel=None, 136 type_split=None, 137 col_sample=1, 138 row_sample=1, 139 level=None, 140 pi_method=None, 141 seed=123, 142 backend="cpu", 143 ): 144 super().__init__( 145 obj=obj, 146 n_hidden_features=n_hidden_features, 147 activation_name=activation_name, 148 a=a, 149 nodes_sim=nodes_sim, 150 bias=bias, 151 dropout=dropout, 152 direct_link=direct_link, 153 n_clusters=n_clusters, 154 cluster_encode=cluster_encode, 155 type_clust=type_clust, 156 type_scaling=type_scaling, 157 col_sample=col_sample, 158 row_sample=row_sample, 159 seed=seed, 160 backend=backend, 161 ) 162 163 self.type_fit = "regression" 164 self.type_pi = type_pi 165 self.replications = replications 166 self.kernel = kernel 167 self.type_split = type_split 168 self.level = level 169 self.pi_method = pi_method 170 self.coef_ = None 171 self.intercept_ = None 172 self.X_ = None 173 self.y_ = None 174 self.aic_ = None 175 self.aicc_ = None 176 self.bic_ = None 177 178 def fit(self, X, y, sample_weight=None, **kwargs): 179 """Fit custom model to training data (X, y). 180 181 Parameters: 182 183 X: {array-like}, shape = [n_samples, n_features] 184 Training vectors, where n_samples is the number 185 of samples and n_features is the number of features. 186 187 y: array-like, shape = [n_samples] 188 Target values. 189 190 sample_weight: array-like, shape = [n_samples] 191 Sample weights. 192 193 **kwargs: additional parameters to be passed to 194 self.cook_training_set or self.obj.fit 195 196 Returns: 197 198 self: object 199 200 """ 201 202 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 203 204 if self.level is not None: 205 self.obj = PredictionInterval( 206 obj=self.obj, method=self.pi_method, level=self.level 207 ) 208 209 # if sample_weights, else: (must use self.row_index) 210 if sample_weight is not None: 211 self.obj.fit( 212 scaled_Z, 213 centered_y, 214 sample_weight=sample_weight[self.index_row_].ravel(), 215 **kwargs 216 ) 217 218 return self 219 220 self.obj.fit(scaled_Z, centered_y, **kwargs) 221 222 self.X_ = X 223 224 self.y_ = y 225 226 # Compute SSE 227 centered_y_pred = self.obj.predict(scaled_Z) 228 self.sse_ = np.sum((centered_y - centered_y_pred) ** 2) 229 230 # Get number of parameters 231 n_params = ( 232 self.n_hidden_features + X.shape[1] 233 ) # hidden features + original features 234 if self.n_clusters > 0: 235 n_params += self.n_clusters # add clusters if used 236 237 # Compute information criteria 238 n_samples = X.shape[0] 239 temp = n_samples * np.log(self.sse_ / n_samples) 240 self.aic_ = temp + 2 * n_params 241 self.bic_ = temp + np.log(n_samples) * n_params 242 243 if hasattr(self.obj, "coef_"): 244 self.coef_ = self.obj.coef_ 245 246 if hasattr(self.obj, "intercept_"): 247 self.intercept_ = self.obj.intercept_ 248 249 return self 250 251 def partial_fit(self, X, y, **kwargs): 252 """Partial fit custom model to training data (X, y). 253 254 Parameters: 255 256 X: {array-like}, shape = [n_samples, n_features] 257 Subset of training vectors, where n_samples is the number 258 of samples and n_features is the number of features. 259 260 y: array-like, shape = [n_samples] 261 Subset of target values. 262 263 **kwargs: additional parameters to be passed to 264 self.cook_training_set or self.obj.fit 265 266 Returns: 267 268 self: object 269 270 """ 271 272 if len(X.shape) == 1: 273 if isinstance(X, pd.DataFrame): 274 X = pd.DataFrame(X.values.reshape(1, -1), columns=X.columns) 275 else: 276 X = X.reshape(1, -1) 277 y = np.array([y]) 278 279 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 280 281 self.obj.partial_fit(scaled_Z, centered_y, **kwargs) 282 283 self.X_ = X 284 285 self.y_ = y 286 287 return self 288 289 def predict(self, X, level=95, method="splitconformal", **kwargs): 290 """Predict test data X. 291 292 Parameters: 293 294 X: {array-like}, shape = [n_samples, n_features] 295 Training vectors, where n_samples is the number 296 of samples and n_features is the number of features. 297 298 level: int 299 Level of confidence (default = 95) 300 301 method: str 302 'splitconformal', 'localconformal' 303 prediction (if you specify `return_pi = True`) 304 305 **kwargs: additional parameters 306 `return_pi = True` for conformal prediction, 307 with `method` in ('splitconformal', 'localconformal') 308 or `return_std = True` for `self.obj` in 309 (`sklearn.linear_model.BayesianRidge`, 310 `sklearn.linear_model.ARDRegressor`, 311 `sklearn.gaussian_process.GaussianProcessRegressor`)` 312 313 Returns: 314 315 model predictions: 316 an array if uncertainty quantification is not requested, 317 or a tuple if with prediction intervals and simulations 318 if `return_std = True` (mean, standard deviation, 319 lower and upper prediction interval) or `return_pi = True` 320 () 321 322 """ 323 324 if "return_std" in kwargs: 325 alpha = 100 - level 326 pi_multiplier = norm.ppf(1 - alpha / 200) 327 328 if len(X.shape) == 1: 329 n_features = X.shape[0] 330 new_X = mo.rbind( 331 X.reshape(1, n_features), 332 np.ones(n_features).reshape(1, n_features), 333 ) 334 335 mean_, std_ = self.obj.predict( 336 self.cook_test_set(new_X, **kwargs), return_std=True 337 )[0] 338 339 preds = self.y_mean_ + mean_ 340 lower = self.y_mean_ + (mean_ - pi_multiplier * std_) 341 upper = self.y_mean_ + (mean_ + pi_multiplier * std_) 342 343 DescribeResults = namedtuple( 344 "DescribeResults", ["mean", "std", "lower", "upper"] 345 ) 346 347 return DescribeResults(preds, std_, lower, upper) 348 349 # len(X.shape) > 1 350 mean_, std_ = self.obj.predict( 351 self.cook_test_set(X, **kwargs), return_std=True 352 ) 353 354 preds = self.y_mean_ + mean_ 355 lower = self.y_mean_ + (mean_ - pi_multiplier * std_) 356 upper = self.y_mean_ + (mean_ + pi_multiplier * std_) 357 358 DescribeResults = namedtuple( 359 "DescribeResults", ["mean", "std", "lower", "upper"] 360 ) 361 362 return DescribeResults(preds, std_, lower, upper) 363 364 if "return_pi" in kwargs: 365 assert method in ( 366 "splitconformal", 367 "localconformal", 368 ), "method must be in ('splitconformal', 'localconformal')" 369 self.pi = PredictionInterval( 370 obj=self, 371 method=method, 372 level=level, 373 type_pi=self.type_pi, 374 replications=self.replications, 375 kernel=self.kernel, 376 ) 377 378 if len(self.X_.shape) == 1: 379 if isinstance(X, pd.DataFrame): 380 self.X_ = pd.DataFrame( 381 self.X_.values.reshape(1, -1), columns=self.X_.columns 382 ) 383 else: 384 self.X_ = self.X_.reshape(1, -1) 385 self.y_ = np.array([self.y_]) 386 387 self.pi.fit(self.X_, self.y_) 388 # self.X_ = None # consumes memory to keep, dangerous to delete (side effect) 389 # self.y_ = None # consumes memory to keep, dangerous to delete (side effect) 390 preds = self.pi.predict(X, return_pi=True) 391 return preds 392 393 # "return_std" not in kwargs 394 if len(X.shape) == 1: 395 n_features = X.shape[0] 396 new_X = mo.rbind( 397 X.reshape(1, n_features), 398 np.ones(n_features).reshape(1, n_features), 399 ) 400 401 return ( 402 self.y_mean_ 403 + self.obj.predict( 404 self.cook_test_set(new_X, **kwargs), **kwargs 405 ) 406 )[0] 407 408 # len(X.shape) > 1 409 return self.y_mean_ + self.obj.predict( 410 self.cook_test_set(X, **kwargs), **kwargs 411 ) 412 413 def score(self, X, y, scoring=None): 414 """Compute the score of the model. 415 416 Parameters: 417 418 X: {array-like}, shape = [n_samples, n_features] 419 Training vectors, where n_samples is the number 420 of samples and n_features is the number of features. 421 422 y: array-like, shape = [n_samples] 423 Target values. 424 425 scoring: str 426 scoring method 427 428 Returns: 429 430 score: float 431 432 """ 433 434 if scoring is None: 435 return np.sqrt(np.mean((self.predict(X) - y) ** 2)) 436 437 return skm2.get_scorer(scoring)(self, X, y)
Custom Regression model
This class is used to 'augment' any regression model with transformed features.
Parameters:
obj: object
any object containing a method fit (obj.fit()) and a method predict
(obj.predict())
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not
(False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original predictors are included (True) in model's
fitting or not (False)
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
type_pi: str.
type of prediction interval; currently `None` (split or local
conformal without simulation), "kde" or "bootstrap" (simulated split
conformal).
replications: int.
number of replications (if needed) for predictive simulation.
Used only in `self.predict`, for `self.kernel` in ('gaussian',
'tophat') and `self.type_pi = 'kde'`. Default is `None`.
kernel: str.
the kernel to use for kernel density estimation (used for predictive
simulation in `self.predict`, with `method='splitconformal'` and
`type_pi = 'kde'`). Currently, either 'gaussian' or 'tophat'.
type_split: str.
Type of splitting for conformal prediction. None (default), or
"random" (random split of data) or "sequential" (sequential split of data)
col_sample: float
percentage of covariates randomly chosen for training
row_sample: float
percentage of rows chosen for training, by stratified bootstrapping
level: float
confidence level for prediction intervals
pi_method: str
method for prediction intervals: 'splitconformal' or 'localconformal'
seed: int
reproducibility seed for nodes_sim=='uniform'
type_fit: str
'regression'
backend: str
"cpu" or "gpu" or "tpu"
Examples:
See https://thierrymoudiki.github.io/blog/2024/03/18/python/conformal-and-bayesian-regression
178 def fit(self, X, y, sample_weight=None, **kwargs): 179 """Fit custom model to training data (X, y). 180 181 Parameters: 182 183 X: {array-like}, shape = [n_samples, n_features] 184 Training vectors, where n_samples is the number 185 of samples and n_features is the number of features. 186 187 y: array-like, shape = [n_samples] 188 Target values. 189 190 sample_weight: array-like, shape = [n_samples] 191 Sample weights. 192 193 **kwargs: additional parameters to be passed to 194 self.cook_training_set or self.obj.fit 195 196 Returns: 197 198 self: object 199 200 """ 201 202 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 203 204 if self.level is not None: 205 self.obj = PredictionInterval( 206 obj=self.obj, method=self.pi_method, level=self.level 207 ) 208 209 # if sample_weights, else: (must use self.row_index) 210 if sample_weight is not None: 211 self.obj.fit( 212 scaled_Z, 213 centered_y, 214 sample_weight=sample_weight[self.index_row_].ravel(), 215 **kwargs 216 ) 217 218 return self 219 220 self.obj.fit(scaled_Z, centered_y, **kwargs) 221 222 self.X_ = X 223 224 self.y_ = y 225 226 # Compute SSE 227 centered_y_pred = self.obj.predict(scaled_Z) 228 self.sse_ = np.sum((centered_y - centered_y_pred) ** 2) 229 230 # Get number of parameters 231 n_params = ( 232 self.n_hidden_features + X.shape[1] 233 ) # hidden features + original features 234 if self.n_clusters > 0: 235 n_params += self.n_clusters # add clusters if used 236 237 # Compute information criteria 238 n_samples = X.shape[0] 239 temp = n_samples * np.log(self.sse_ / n_samples) 240 self.aic_ = temp + 2 * n_params 241 self.bic_ = temp + np.log(n_samples) * n_params 242 243 if hasattr(self.obj, "coef_"): 244 self.coef_ = self.obj.coef_ 245 246 if hasattr(self.obj, "intercept_"): 247 self.intercept_ = self.obj.intercept_ 248 249 return self
Fit custom model to training data (X, y).
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
sample_weight: array-like, shape = [n_samples]
Sample weights.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
289 def predict(self, X, level=95, method="splitconformal", **kwargs): 290 """Predict test data X. 291 292 Parameters: 293 294 X: {array-like}, shape = [n_samples, n_features] 295 Training vectors, where n_samples is the number 296 of samples and n_features is the number of features. 297 298 level: int 299 Level of confidence (default = 95) 300 301 method: str 302 'splitconformal', 'localconformal' 303 prediction (if you specify `return_pi = True`) 304 305 **kwargs: additional parameters 306 `return_pi = True` for conformal prediction, 307 with `method` in ('splitconformal', 'localconformal') 308 or `return_std = True` for `self.obj` in 309 (`sklearn.linear_model.BayesianRidge`, 310 `sklearn.linear_model.ARDRegressor`, 311 `sklearn.gaussian_process.GaussianProcessRegressor`)` 312 313 Returns: 314 315 model predictions: 316 an array if uncertainty quantification is not requested, 317 or a tuple if with prediction intervals and simulations 318 if `return_std = True` (mean, standard deviation, 319 lower and upper prediction interval) or `return_pi = True` 320 () 321 322 """ 323 324 if "return_std" in kwargs: 325 alpha = 100 - level 326 pi_multiplier = norm.ppf(1 - alpha / 200) 327 328 if len(X.shape) == 1: 329 n_features = X.shape[0] 330 new_X = mo.rbind( 331 X.reshape(1, n_features), 332 np.ones(n_features).reshape(1, n_features), 333 ) 334 335 mean_, std_ = self.obj.predict( 336 self.cook_test_set(new_X, **kwargs), return_std=True 337 )[0] 338 339 preds = self.y_mean_ + mean_ 340 lower = self.y_mean_ + (mean_ - pi_multiplier * std_) 341 upper = self.y_mean_ + (mean_ + pi_multiplier * std_) 342 343 DescribeResults = namedtuple( 344 "DescribeResults", ["mean", "std", "lower", "upper"] 345 ) 346 347 return DescribeResults(preds, std_, lower, upper) 348 349 # len(X.shape) > 1 350 mean_, std_ = self.obj.predict( 351 self.cook_test_set(X, **kwargs), return_std=True 352 ) 353 354 preds = self.y_mean_ + mean_ 355 lower = self.y_mean_ + (mean_ - pi_multiplier * std_) 356 upper = self.y_mean_ + (mean_ + pi_multiplier * std_) 357 358 DescribeResults = namedtuple( 359 "DescribeResults", ["mean", "std", "lower", "upper"] 360 ) 361 362 return DescribeResults(preds, std_, lower, upper) 363 364 if "return_pi" in kwargs: 365 assert method in ( 366 "splitconformal", 367 "localconformal", 368 ), "method must be in ('splitconformal', 'localconformal')" 369 self.pi = PredictionInterval( 370 obj=self, 371 method=method, 372 level=level, 373 type_pi=self.type_pi, 374 replications=self.replications, 375 kernel=self.kernel, 376 ) 377 378 if len(self.X_.shape) == 1: 379 if isinstance(X, pd.DataFrame): 380 self.X_ = pd.DataFrame( 381 self.X_.values.reshape(1, -1), columns=self.X_.columns 382 ) 383 else: 384 self.X_ = self.X_.reshape(1, -1) 385 self.y_ = np.array([self.y_]) 386 387 self.pi.fit(self.X_, self.y_) 388 # self.X_ = None # consumes memory to keep, dangerous to delete (side effect) 389 # self.y_ = None # consumes memory to keep, dangerous to delete (side effect) 390 preds = self.pi.predict(X, return_pi=True) 391 return preds 392 393 # "return_std" not in kwargs 394 if len(X.shape) == 1: 395 n_features = X.shape[0] 396 new_X = mo.rbind( 397 X.reshape(1, n_features), 398 np.ones(n_features).reshape(1, n_features), 399 ) 400 401 return ( 402 self.y_mean_ 403 + self.obj.predict( 404 self.cook_test_set(new_X, **kwargs), **kwargs 405 ) 406 )[0] 407 408 # len(X.shape) > 1 409 return self.y_mean_ + self.obj.predict( 410 self.cook_test_set(X, **kwargs), **kwargs 411 )
Predict test data X.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
level: int
Level of confidence (default = 95)
method: str
'splitconformal', 'localconformal'
prediction (if you specify `return_pi = True`)
**kwargs: additional parameters
`return_pi = True` for conformal prediction,
with `method` in ('splitconformal', 'localconformal')
or `return_std = True` for `self.obj` in
(`sklearn.linear_model.BayesianRidge`,
`sklearn.linear_model.ARDRegressor`,
`sklearn.gaussian_process.GaussianProcessRegressor`)`
Returns:
model predictions:
an array if uncertainty quantification is not requested,
or a tuple if with prediction intervals and simulations
if `return_std = True` (mean, standard deviation,
lower and upper prediction interval) or `return_pi = True`
()
413 def score(self, X, y, scoring=None): 414 """Compute the score of the model. 415 416 Parameters: 417 418 X: {array-like}, shape = [n_samples, n_features] 419 Training vectors, where n_samples is the number 420 of samples and n_features is the number of features. 421 422 y: array-like, shape = [n_samples] 423 Target values. 424 425 scoring: str 426 scoring method 427 428 Returns: 429 430 score: float 431 432 """ 433 434 if scoring is None: 435 return np.sqrt(np.mean((self.predict(X) - y) ** 2)) 436 437 return skm2.get_scorer(scoring)(self, X, y)
Compute the score of the model.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
scoring: str
scoring method
Returns:
score: float
18class CustomBackPropRegressor(Custom, RegressorMixin): 19 """ 20 Finite difference trainer for nnetsauce models. 21 22 Parameters 23 ---------- 24 25 base_model : str 26 The name of the base model (e.g., 'RidgeCV'). 27 28 type_grad : {'finitediff', 'autodiff'}, optional 29 Type of gradient computation to use (default='finitediff'). 30 31 lr : float, optional 32 Learning rate for optimization (default=1e-4). 33 34 optimizer : {'gd', 'sgd', 'adam', 'cd'}, optional 35 Optimization algorithm: gradient descent ('gd'), stochastic gradient descent ('sgd'), 36 Adam ('adam'), or coordinate descent ('cd'). Default is 'gd'. 37 38 eps : float, optional 39 Scaling factor for adaptive finite difference step size (default=1e-3). 40 41 batch_size : int, optional 42 Batch size for 'sgd' optimizer (default=32). 43 44 alpha : float, optional 45 Elastic net penalty strength (default=0.0). 46 47 l1_ratio : float, optional 48 Elastic net mixing parameter (0 = Ridge, 1 = Lasso, default=0.0). 49 50 type_loss : {'mse', 'quantile'}, optional 51 Type of loss function to use (default='mse'). 52 53 q : float, optional 54 Quantile for quantile loss (default=0.5). 55 56 **kwargs 57 Additional parameters to pass to the scikit-learn model. 58 59 """ 60 61 def __init__( 62 self, 63 base_model, 64 type_grad="finitediff", 65 lr=1e-4, 66 optimizer="gd", 67 eps=1e-3, 68 batch_size=32, 69 alpha=0.0, 70 l1_ratio=0.0, 71 type_loss="mse", 72 q=0.5, 73 backend="cpu", 74 **kwargs, 75 ): 76 super().__init__(base_model, True, **kwargs) 77 self.base_model = base_model 78 self.custom_kwargs = kwargs 79 self.backend = backend 80 self.model = ns.CustomRegressor( 81 self.base_model, backend=self.backend, **self.custom_kwargs 82 ) 83 assert isinstance( 84 self.model, ns.CustomRegressor 85 ), "'model' must be of class ns.CustomRegressor" 86 self.type_grad = type_grad 87 self.lr = lr 88 self.optimizer = optimizer 89 self.eps = eps 90 self.loss_history_ = [] 91 self.opt_state = None 92 self.batch_size = batch_size # for SGD 93 self.loss_history_ = [] 94 self._cd_index = 0 # For coordinate descent 95 self.alpha = alpha 96 self.l1_ratio = l1_ratio 97 self.type_loss = type_loss 98 self.q = q 99 100 def _loss(self, X, y, **kwargs): 101 """ 102 Compute the loss (with elastic net penalty) for the current model. 103 104 Parameters 105 ---------- 106 107 X : array-like of shape (n_samples, n_features) 108 Input data. 109 110 y : array-like of shape (n_samples,) 111 Target values. 112 113 **kwargs 114 Additional keyword arguments for loss calculation. 115 116 Returns 117 ------- 118 float 119 The computed loss value. 120 """ 121 y_pred = self.model.predict(X) 122 if self.type_loss == "mse": 123 loss = np.mean((y - y_pred) ** 2) 124 elif self.type_loss == "quantile": 125 loss = mean_pinball_loss(y, y_pred, alpha=self.q, **kwargs) 126 W = self.model.W_ 127 l1 = np.sum(np.abs(W)) 128 l2 = np.sum(W**2) 129 return loss + self.alpha * ( 130 self.l1_ratio * l1 + 0.5 * (1 - self.l1_ratio) * l2 131 ) 132 133 def _compute_grad(self, X, y): 134 """ 135 Compute the gradient of the loss with respect to W_ using finite differences. 136 137 Parameters 138 ---------- 139 140 X : array-like of shape (n_samples, n_features) 141 Input data. 142 143 y : array-like of shape (n_samples,) 144 Target values. 145 146 Returns 147 ------- 148 149 ndarray 150 Gradient array with the same shape as W_. 151 """ 152 153 # Finite difference gradient computation 154 W = deepcopy(self.model.W_) 155 shape = W.shape 156 W_flat = W.flatten() 157 n_params = W_flat.size 158 159 # Adaptive finite difference step 160 h_vec = self.eps * np.maximum(1.0, np.abs(W_flat)) 161 eye = np.eye(n_params) 162 163 loss_plus = np.zeros(n_params) 164 loss_minus = np.zeros(n_params) 165 166 for i in range(n_params): 167 h_i = h_vec[i] 168 Wp = W_flat.copy() 169 Wp[i] += h_i 170 Wm = W_flat.copy() 171 Wm[i] -= h_i 172 173 self.model.W_ = Wp.reshape(shape) 174 loss_plus[i] = self._loss(X, y) 175 176 self.model.W_ = Wm.reshape(shape) 177 loss_minus[i] = self._loss(X, y) 178 179 grad = ((loss_plus - loss_minus) / (2 * h_vec)).reshape(shape) 180 181 # Add elastic net gradient 182 l1_grad = self.alpha * self.l1_ratio * np.sign(W) 183 l2_grad = self.alpha * (1 - self.l1_ratio) * W 184 grad += l1_grad + l2_grad 185 186 self.model.W_ = W # restore original 187 return grad 188 189 def fit( 190 self, 191 X, 192 y, 193 epochs=10, 194 verbose=True, 195 show_progress=True, 196 sample_weight=None, 197 **kwargs, 198 ): 199 """ 200 Fit the model using finite difference optimization. 201 202 Parameters 203 ---------- 204 205 X : array-like of shape (n_samples, n_features) 206 Training data. 207 208 y : array-like of shape (n_samples,) 209 Target values. 210 211 epochs : int, optional 212 Number of optimization steps (default=10). 213 214 verbose : bool, optional 215 Whether to print progress messages (default=True). 216 217 show_progress : bool, optional 218 Whether to show tqdm progress bar (default=True). 219 220 sample_weight : array-like, optional 221 Sample weights. 222 223 **kwargs 224 Additional keyword arguments. 225 226 Returns 227 ------- 228 229 self : object 230 Returns self. 231 """ 232 233 self.model.fit(X, y) 234 235 iterator = tqdm(range(epochs)) if show_progress else range(epochs) 236 237 for epoch in iterator: 238 grad = self._compute_grad(X, y) 239 240 if self.optimizer == "gd": 241 self.model.W_ -= self.lr * grad 242 self.model.W_ = np.clip(self.model.W_, 0, 1) 243 # print("self.model.W_", self.model.W_) 244 245 elif self.optimizer == "sgd": 246 # Sample a mini-batch for stochastic gradient 247 n_samples = X.shape[0] 248 idxs = np.random.choice( 249 n_samples, self.batch_size, replace=False 250 ) 251 if isinstance(X, pd.DataFrame): 252 X_batch = X.iloc[idxs, :] 253 else: 254 X_batch = X[idxs, :] 255 y_batch = y[idxs] 256 grad = self._compute_grad(X_batch, y_batch) 257 258 self.model.W_ -= self.lr * grad 259 self.model.W_ = np.clip(self.model.W_, 0, 1) 260 261 elif self.optimizer == "adam": 262 if self.opt_state is None: 263 self.opt_state = { 264 "m": np.zeros_like(grad), 265 "v": np.zeros_like(grad), 266 "t": 0, 267 } 268 beta1, beta2, eps = 0.9, 0.999, 1e-8 269 self.opt_state["t"] += 1 270 self.opt_state["m"] = ( 271 beta1 * self.opt_state["m"] + (1 - beta1) * grad 272 ) 273 self.opt_state["v"] = beta2 * self.opt_state["v"] + ( 274 1 - beta2 275 ) * (grad**2) 276 m_hat = self.opt_state["m"] / (1 - beta1 ** self.opt_state["t"]) 277 v_hat = self.opt_state["v"] / (1 - beta2 ** self.opt_state["t"]) 278 279 self.model.W_ -= self.lr * m_hat / (np.sqrt(v_hat) + eps) 280 self.model.W_ = np.clip(self.model.W_, 0, 1) 281 # print("self.model.W_", self.model.W_) 282 283 elif self.optimizer == "cd": # coordinate descent 284 W_shape = self.model.W_.shape 285 W_flat_size = self.model.W_.size 286 W_flat = self.model.W_.flatten() 287 grad_flat = grad.flatten() 288 289 # Update only one coordinate per epoch (cyclic) 290 idx = self._cd_index % W_flat_size 291 W_flat[idx] -= self.lr * grad_flat[idx] 292 # Clip the updated value 293 W_flat[idx] = np.clip(W_flat[idx], 0, 1) 294 295 # Restore W_ 296 self.model.W_ = W_flat.reshape(W_shape) 297 298 self._cd_index += 1 299 300 else: 301 raise ValueError(f"Unsupported optimizer: {self.optimizer}") 302 303 loss = self._loss(X, y) 304 self.loss_history_.append(loss) 305 306 if verbose: 307 print(f"Epoch {epoch+1}: Loss = {loss:.6f}") 308 309 # if sample_weights, else: (must use self.row_index) 310 if sample_weight in kwargs: 311 self.model.fit( 312 X, 313 y, 314 sample_weight=sample_weight[self.index_row_].ravel(), 315 **kwargs, 316 ) 317 318 return self 319 320 return self 321 322 def predict(self, X, level=95, method="splitconformal", **kwargs): 323 """ 324 Predict using the trained model. 325 326 Parameters 327 ---------- 328 329 X : array-like of shape (n_samples, n_features) 330 Input data. 331 332 level : int, optional 333 Level of confidence for prediction intervals (default=95). 334 335 method : {'splitconformal', 'localconformal'}, optional 336 Method for conformal prediction (default='splitconformal'). 337 338 **kwargs 339 Additional keyword arguments. Use `return_pi=True` for prediction intervals, 340 or `return_std=True` for standard deviation estimates. 341 342 Returns 343 ------- 344 345 array or tuple 346 Model predictions, or a tuple with prediction intervals or standard deviations if requested. 347 """ 348 if "return_std" in kwargs: 349 alpha = 100 - level 350 pi_multiplier = norm.ppf(1 - alpha / 200) 351 352 if len(X.shape) == 1: 353 n_features = X.shape[0] 354 new_X = mo.rbind( 355 X.reshape(1, n_features), 356 np.ones(n_features).reshape(1, n_features), 357 ) 358 359 mean_, std_ = self.model.predict(new_X, return_std=True)[0] 360 361 preds = mean_ 362 lower = mean_ - pi_multiplier * std_ 363 upper = mean_ + pi_multiplier * std_ 364 365 DescribeResults = namedtuple( 366 "DescribeResults", ["mean", "std", "lower", "upper"] 367 ) 368 369 return DescribeResults(preds, std_, lower, upper) 370 371 # len(X.shape) > 1 372 mean_, std_ = self.model.predict(X, return_std=True) 373 374 preds = mean_ 375 lower = mean_ - pi_multiplier * std_ 376 upper = mean_ + pi_multiplier * std_ 377 378 DescribeResults = namedtuple( 379 "DescribeResults", ["mean", "std", "lower", "upper"] 380 ) 381 382 return DescribeResults(preds, std_, lower, upper) 383 384 if "return_pi" in kwargs: 385 assert method in ( 386 "splitconformal", 387 "localconformal", 388 ), "method must be in ('splitconformal', 'localconformal')" 389 self.pi = ns.PredictionInterval( 390 obj=self, 391 method=method, 392 level=level, 393 type_pi=self.type_pi, 394 replications=self.replications, 395 kernel=self.kernel, 396 ) 397 398 if len(self.X_.shape) == 1: 399 if isinstance(X, pd.DataFrame): 400 self.X_ = pd.DataFrame( 401 self.X_.values.reshape(1, -1), columns=self.X_.columns 402 ) 403 else: 404 self.X_ = self.X_.reshape(1, -1) 405 self.y_ = np.array([self.y_]) 406 407 self.pi.fit(self.X_, self.y_) 408 # self.X_ = None # consumes memory to keep, dangerous to delete (side effect) 409 # self.y_ = None # consumes memory to keep, dangerous to delete (side effect) 410 preds = self.pi.predict(X, return_pi=True) 411 return preds 412 413 # "return_std" not in kwargs 414 if len(X.shape) == 1: 415 n_features = X.shape[0] 416 new_X = mo.rbind( 417 X.reshape(1, n_features), 418 np.ones(n_features).reshape(1, n_features), 419 ) 420 421 return (0 + self.model.predict(new_X, **kwargs))[0] 422 423 # len(X.shape) > 1 424 return self.model.predict(X, **kwargs)
Finite difference trainer for nnetsauce models.
Parameters
base_model : str The name of the base model (e.g., 'RidgeCV').
type_grad : {'finitediff', 'autodiff'}, optional Type of gradient computation to use (default='finitediff').
lr : float, optional Learning rate for optimization (default=1e-4).
optimizer : {'gd', 'sgd', 'adam', 'cd'}, optional Optimization algorithm: gradient descent ('gd'), stochastic gradient descent ('sgd'), Adam ('adam'), or coordinate descent ('cd'). Default is 'gd'.
eps : float, optional Scaling factor for adaptive finite difference step size (default=1e-3).
batch_size : int, optional Batch size for 'sgd' optimizer (default=32).
alpha : float, optional Elastic net penalty strength (default=0.0).
l1_ratio : float, optional Elastic net mixing parameter (0 = Ridge, 1 = Lasso, default=0.0).
type_loss : {'mse', 'quantile'}, optional Type of loss function to use (default='mse').
q : float, optional Quantile for quantile loss (default=0.5).
**kwargs Additional parameters to pass to the scikit-learn model.
189 def fit( 190 self, 191 X, 192 y, 193 epochs=10, 194 verbose=True, 195 show_progress=True, 196 sample_weight=None, 197 **kwargs, 198 ): 199 """ 200 Fit the model using finite difference optimization. 201 202 Parameters 203 ---------- 204 205 X : array-like of shape (n_samples, n_features) 206 Training data. 207 208 y : array-like of shape (n_samples,) 209 Target values. 210 211 epochs : int, optional 212 Number of optimization steps (default=10). 213 214 verbose : bool, optional 215 Whether to print progress messages (default=True). 216 217 show_progress : bool, optional 218 Whether to show tqdm progress bar (default=True). 219 220 sample_weight : array-like, optional 221 Sample weights. 222 223 **kwargs 224 Additional keyword arguments. 225 226 Returns 227 ------- 228 229 self : object 230 Returns self. 231 """ 232 233 self.model.fit(X, y) 234 235 iterator = tqdm(range(epochs)) if show_progress else range(epochs) 236 237 for epoch in iterator: 238 grad = self._compute_grad(X, y) 239 240 if self.optimizer == "gd": 241 self.model.W_ -= self.lr * grad 242 self.model.W_ = np.clip(self.model.W_, 0, 1) 243 # print("self.model.W_", self.model.W_) 244 245 elif self.optimizer == "sgd": 246 # Sample a mini-batch for stochastic gradient 247 n_samples = X.shape[0] 248 idxs = np.random.choice( 249 n_samples, self.batch_size, replace=False 250 ) 251 if isinstance(X, pd.DataFrame): 252 X_batch = X.iloc[idxs, :] 253 else: 254 X_batch = X[idxs, :] 255 y_batch = y[idxs] 256 grad = self._compute_grad(X_batch, y_batch) 257 258 self.model.W_ -= self.lr * grad 259 self.model.W_ = np.clip(self.model.W_, 0, 1) 260 261 elif self.optimizer == "adam": 262 if self.opt_state is None: 263 self.opt_state = { 264 "m": np.zeros_like(grad), 265 "v": np.zeros_like(grad), 266 "t": 0, 267 } 268 beta1, beta2, eps = 0.9, 0.999, 1e-8 269 self.opt_state["t"] += 1 270 self.opt_state["m"] = ( 271 beta1 * self.opt_state["m"] + (1 - beta1) * grad 272 ) 273 self.opt_state["v"] = beta2 * self.opt_state["v"] + ( 274 1 - beta2 275 ) * (grad**2) 276 m_hat = self.opt_state["m"] / (1 - beta1 ** self.opt_state["t"]) 277 v_hat = self.opt_state["v"] / (1 - beta2 ** self.opt_state["t"]) 278 279 self.model.W_ -= self.lr * m_hat / (np.sqrt(v_hat) + eps) 280 self.model.W_ = np.clip(self.model.W_, 0, 1) 281 # print("self.model.W_", self.model.W_) 282 283 elif self.optimizer == "cd": # coordinate descent 284 W_shape = self.model.W_.shape 285 W_flat_size = self.model.W_.size 286 W_flat = self.model.W_.flatten() 287 grad_flat = grad.flatten() 288 289 # Update only one coordinate per epoch (cyclic) 290 idx = self._cd_index % W_flat_size 291 W_flat[idx] -= self.lr * grad_flat[idx] 292 # Clip the updated value 293 W_flat[idx] = np.clip(W_flat[idx], 0, 1) 294 295 # Restore W_ 296 self.model.W_ = W_flat.reshape(W_shape) 297 298 self._cd_index += 1 299 300 else: 301 raise ValueError(f"Unsupported optimizer: {self.optimizer}") 302 303 loss = self._loss(X, y) 304 self.loss_history_.append(loss) 305 306 if verbose: 307 print(f"Epoch {epoch+1}: Loss = {loss:.6f}") 308 309 # if sample_weights, else: (must use self.row_index) 310 if sample_weight in kwargs: 311 self.model.fit( 312 X, 313 y, 314 sample_weight=sample_weight[self.index_row_].ravel(), 315 **kwargs, 316 ) 317 318 return self 319 320 return self
Fit the model using finite difference optimization.
Parameters
X : array-like of shape (n_samples, n_features) Training data.
y : array-like of shape (n_samples,) Target values.
epochs : int, optional Number of optimization steps (default=10).
verbose : bool, optional Whether to print progress messages (default=True).
show_progress : bool, optional Whether to show tqdm progress bar (default=True).
sample_weight : array-like, optional Sample weights.
**kwargs Additional keyword arguments.
Returns
self : object Returns self.
322 def predict(self, X, level=95, method="splitconformal", **kwargs): 323 """ 324 Predict using the trained model. 325 326 Parameters 327 ---------- 328 329 X : array-like of shape (n_samples, n_features) 330 Input data. 331 332 level : int, optional 333 Level of confidence for prediction intervals (default=95). 334 335 method : {'splitconformal', 'localconformal'}, optional 336 Method for conformal prediction (default='splitconformal'). 337 338 **kwargs 339 Additional keyword arguments. Use `return_pi=True` for prediction intervals, 340 or `return_std=True` for standard deviation estimates. 341 342 Returns 343 ------- 344 345 array or tuple 346 Model predictions, or a tuple with prediction intervals or standard deviations if requested. 347 """ 348 if "return_std" in kwargs: 349 alpha = 100 - level 350 pi_multiplier = norm.ppf(1 - alpha / 200) 351 352 if len(X.shape) == 1: 353 n_features = X.shape[0] 354 new_X = mo.rbind( 355 X.reshape(1, n_features), 356 np.ones(n_features).reshape(1, n_features), 357 ) 358 359 mean_, std_ = self.model.predict(new_X, return_std=True)[0] 360 361 preds = mean_ 362 lower = mean_ - pi_multiplier * std_ 363 upper = mean_ + pi_multiplier * std_ 364 365 DescribeResults = namedtuple( 366 "DescribeResults", ["mean", "std", "lower", "upper"] 367 ) 368 369 return DescribeResults(preds, std_, lower, upper) 370 371 # len(X.shape) > 1 372 mean_, std_ = self.model.predict(X, return_std=True) 373 374 preds = mean_ 375 lower = mean_ - pi_multiplier * std_ 376 upper = mean_ + pi_multiplier * std_ 377 378 DescribeResults = namedtuple( 379 "DescribeResults", ["mean", "std", "lower", "upper"] 380 ) 381 382 return DescribeResults(preds, std_, lower, upper) 383 384 if "return_pi" in kwargs: 385 assert method in ( 386 "splitconformal", 387 "localconformal", 388 ), "method must be in ('splitconformal', 'localconformal')" 389 self.pi = ns.PredictionInterval( 390 obj=self, 391 method=method, 392 level=level, 393 type_pi=self.type_pi, 394 replications=self.replications, 395 kernel=self.kernel, 396 ) 397 398 if len(self.X_.shape) == 1: 399 if isinstance(X, pd.DataFrame): 400 self.X_ = pd.DataFrame( 401 self.X_.values.reshape(1, -1), columns=self.X_.columns 402 ) 403 else: 404 self.X_ = self.X_.reshape(1, -1) 405 self.y_ = np.array([self.y_]) 406 407 self.pi.fit(self.X_, self.y_) 408 # self.X_ = None # consumes memory to keep, dangerous to delete (side effect) 409 # self.y_ = None # consumes memory to keep, dangerous to delete (side effect) 410 preds = self.pi.predict(X, return_pi=True) 411 return preds 412 413 # "return_std" not in kwargs 414 if len(X.shape) == 1: 415 n_features = X.shape[0] 416 new_X = mo.rbind( 417 X.reshape(1, n_features), 418 np.ones(n_features).reshape(1, n_features), 419 ) 420 421 return (0 + self.model.predict(new_X, **kwargs))[0] 422 423 # len(X.shape) > 1 424 return self.model.predict(X, **kwargs)
Predict using the trained model.
Parameters
X : array-like of shape (n_samples, n_features) Input data.
level : int, optional Level of confidence for prediction intervals (default=95).
method : {'splitconformal', 'localconformal'}, optional Method for conformal prediction (default='splitconformal').
**kwargs
Additional keyword arguments. Use return_pi=True for prediction intervals,
or return_std=True for standard deviation estimates.
Returns
array or tuple Model predictions, or a tuple with prediction intervals or standard deviations if requested.
36class DeepClassifier(CustomClassifier, ClassifierMixin): 37 """ 38 Deep Classifier 39 40 Parameters: 41 42 obj: an object 43 A base learner, see also https://www.researchgate.net/publication/380701207_Deep_Quasi-Randomized_neural_Networks_for_classification 44 45 n_layers: int (default=3) 46 Number of layers. `n_layers = 1` is a simple `CustomClassifier` 47 48 verbose : int, optional (default=0) 49 Monitor progress when fitting. 50 51 All the other parameters are nnetsauce `CustomClassifier`'s 52 53 Examples: 54 55 ```python 56 import nnetsauce as ns 57 from sklearn.datasets import load_breast_cancer 58 from sklearn.model_selection import train_test_split 59 from sklearn.linear_model import LogisticRegressionCV 60 data = load_breast_cancer() 61 X = data.data 62 y= data.target 63 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=123) 64 obj = LogisticRegressionCV() 65 clf = ns.DeepClassifier(obj) 66 clf.fit(X_train, y_train) 67 print(clf.score(clf.predict(X_test), y_test)) 68 ``` 69 """ 70 71 _estimator_type = "classifier" 72 73 def __init__( 74 self, 75 obj, 76 # Defining depth 77 n_layers=3, 78 verbose=0, 79 # CustomClassifier attributes 80 n_hidden_features=5, 81 activation_name="relu", 82 a=0.01, 83 nodes_sim="sobol", 84 bias=True, 85 dropout=0, 86 direct_link=True, 87 n_clusters=2, 88 cluster_encode=True, 89 type_clust="kmeans", 90 type_scaling=("std", "std", "std"), 91 col_sample=1, 92 row_sample=1, 93 cv_calibration=2, 94 calibration_method="sigmoid", 95 seed=123, 96 backend="cpu", 97 ): 98 super().__init__( 99 obj=obj, 100 n_hidden_features=n_hidden_features, 101 activation_name=activation_name, 102 a=a, 103 nodes_sim=nodes_sim, 104 bias=bias, 105 dropout=dropout, 106 direct_link=direct_link, 107 n_clusters=n_clusters, 108 cluster_encode=cluster_encode, 109 type_clust=type_clust, 110 type_scaling=type_scaling, 111 col_sample=col_sample, 112 row_sample=row_sample, 113 seed=seed, 114 backend=backend, 115 ) 116 self.coef_ = None 117 self.intercept_ = None 118 self.type_fit = "classification" 119 self.cv_calibration = cv_calibration 120 self.calibration_method = calibration_method 121 122 # Only wrap in CalibratedClassifierCV if not already wrapped 123 # if not isinstance(obj, CalibratedClassifierCV): 124 # self.obj = CalibratedClassifierCV( 125 # self.obj, 126 # cv=self.cv_calibration, 127 # method=self.calibration_method 128 # ) 129 # else: 130 self.coef_ = None 131 self.intercept_ = None 132 self.type_fit = "classification" 133 self.cv_calibration = cv_calibration 134 self.calibration_method = calibration_method 135 self.obj = obj 136 137 assert n_layers >= 1, "must have n_layers >= 1" 138 self.stacked_obj = obj 139 self.verbose = verbose 140 self.n_layers = n_layers 141 self.classes_ = None 142 self.n_classes_ = None 143 144 def fit(self, X, y, **kwargs): 145 """Fit Classification algorithms to X and y. 146 Parameters 147 ---------- 148 X : array-like, 149 Training vectors, where rows is the number of samples 150 and columns is the number of features. 151 y : array-like, 152 Training vectors, where rows is the number of samples 153 and columns is the number of features. 154 **kwargs: dict 155 Additional parameters to be passed to the fit method 156 of the base learner. For example, `sample_weight`. 157 158 Returns 159 ------- 160 A fitted object 161 """ 162 163 self.classes_ = np.unique(y) 164 self.n_classes_ = len( 165 self.classes_ 166 ) # for compatibility with scikit-learn 167 168 if isinstance(X, np.ndarray): 169 X = pd.DataFrame(X) 170 171 # init layer 172 self.stacked_obj = CustomClassifier( 173 obj=self.stacked_obj, 174 n_hidden_features=self.n_hidden_features, 175 activation_name=self.activation_name, 176 a=self.a, 177 nodes_sim=self.nodes_sim, 178 bias=self.bias, 179 dropout=self.dropout, 180 direct_link=self.direct_link, 181 n_clusters=self.n_clusters, 182 cluster_encode=self.cluster_encode, 183 type_clust=self.type_clust, 184 type_scaling=self.type_scaling, 185 col_sample=self.col_sample, 186 row_sample=self.row_sample, 187 cv_calibration=None, 188 calibration_method=None, 189 seed=self.seed, 190 backend=self.backend, 191 ) 192 193 if self.verbose > 0: 194 iterator = tqdm(range(self.n_layers - 1)) 195 else: 196 iterator = range(self.n_layers - 1) 197 198 for _ in iterator: 199 self.stacked_obj = deepcopy( 200 CustomClassifier( 201 obj=self.stacked_obj, 202 n_hidden_features=self.n_hidden_features, 203 activation_name=self.activation_name, 204 a=self.a, 205 nodes_sim=self.nodes_sim, 206 bias=self.bias, 207 dropout=self.dropout, 208 direct_link=self.direct_link, 209 n_clusters=self.n_clusters, 210 cluster_encode=self.cluster_encode, 211 type_clust=self.type_clust, 212 type_scaling=self.type_scaling, 213 col_sample=self.col_sample, 214 row_sample=self.row_sample, 215 cv_calibration=None, 216 calibration_method=None, 217 seed=self.seed, 218 backend=self.backend, 219 ) 220 ) 221 self.stacked_obj.fit(X, y, **kwargs) 222 223 return self 224 225 def partial_fit(self, X, y, **kwargs): 226 """Fit Regression algorithms to X and y. 227 Parameters 228 ---------- 229 X : array-like, 230 Training vectors, where rows is the number of samples 231 and columns is the number of features. 232 y : array-like, 233 Training vectors, where rows is the number of samples 234 and columns is the number of features. 235 **kwargs: dict 236 Additional parameters to be passed to the fit method 237 of the base learner. For example, `sample_weight`. 238 Returns 239 ------- 240 A fitted object 241 """ 242 assert hasattr(self, "stacked_obj"), "model must be fitted first" 243 current_obj = self.stacked_obj 244 for _ in range(self.n_layers): 245 try: 246 input_X = current_obj.obj.cook_test_set(X) 247 current_obj.obj.partial_fit(input_X, y, **kwargs) 248 try: 249 current_obj = current_obj.obj 250 except AttributeError: 251 pass 252 except ValueError: 253 pass 254 return self 255 256 def predict(self, X): 257 return self.stacked_obj.predict(X) 258 259 def predict_proba(self, X): 260 return self.stacked_obj.predict_proba(X) 261 262 def score(self, X, y, scoring=None): 263 return self.stacked_obj.score(X, y, scoring) 264 265 def cross_val_optim( 266 self, 267 X_train, 268 y_train, 269 X_test=None, 270 y_test=None, 271 scoring="accuracy", 272 surrogate_obj=None, 273 cv=5, 274 n_jobs=None, 275 n_init=10, 276 n_iter=190, 277 abs_tol=1e-3, 278 verbose=2, 279 seed=123, 280 **kwargs, 281 ): 282 """Cross-validation function and hyperparameters' search 283 284 Parameters: 285 286 X_train: array-like, 287 Training vectors, where rows is the number of samples 288 and columns is the number of features. 289 290 y_train: array-like, 291 Training vectors, where rows is the number of samples 292 and columns is the number of features. 293 294 X_test: array-like, 295 Testing vectors, where rows is the number of samples 296 and columns is the number of features. 297 298 y_test: array-like, 299 Testing vectors, where rows is the number of samples 300 and columns is the number of features. 301 302 scoring: str 303 scoring metric; see https://scikit-learn.org/stable/modules/model_evaluation.html#the-scoring-parameter-defining-model-evaluation-rules 304 305 surrogate_obj: an object; 306 An ML model for estimating the uncertainty around the objective function 307 308 cv: int; 309 number of cross-validation folds 310 311 n_jobs: int; 312 number of jobs for parallel execution 313 314 n_init: an integer; 315 number of points in the initial setting, when `x_init` and `y_init` are not provided 316 317 n_iter: an integer; 318 number of iterations of the minimization algorithm 319 320 abs_tol: a float; 321 tolerance for convergence of the optimizer (early stopping based on acquisition function) 322 323 verbose: int 324 controls verbosity 325 326 seed: int 327 reproducibility seed 328 329 **kwargs: dict 330 additional parameters to be passed to the estimator 331 332 Examples: 333 334 ```python 335 ``` 336 """ 337 338 num_to_activation_name = {1: "relu", 2: "sigmoid", 3: "tanh"} 339 num_to_nodes_sim = {1: "sobol", 2: "uniform", 3: "hammersley"} 340 num_to_type_clust = {1: "kmeans", 2: "gmm"} 341 342 def deepclassifier_cv( 343 X_train, 344 y_train, 345 # Defining depth 346 n_layers=3, 347 # CustomClassifier attributes 348 n_hidden_features=5, 349 activation_name="relu", 350 nodes_sim="sobol", 351 dropout=0, 352 n_clusters=2, 353 type_clust="kmeans", 354 cv=5, 355 n_jobs=None, 356 scoring="accuracy", 357 seed=123, 358 ): 359 self.set_params( 360 **{ 361 "n_layers": n_layers, 362 # CustomClassifier attributes 363 "n_hidden_features": n_hidden_features, 364 "activation_name": activation_name, 365 "nodes_sim": nodes_sim, 366 "dropout": dropout, 367 "n_clusters": n_clusters, 368 "type_clust": type_clust, 369 **kwargs, 370 } 371 ) 372 return -cross_val_score( 373 estimator=self, 374 X=X_train, 375 y=y_train, 376 scoring=scoring, 377 cv=cv, 378 n_jobs=n_jobs, 379 verbose=0, 380 ).mean() 381 382 # objective function for hyperparams tuning 383 def crossval_objective(xx): 384 return deepclassifier_cv( 385 X_train=X_train, 386 y_train=y_train, 387 # Defining depth 388 n_layers=int(np.ceil(xx[0])), 389 # CustomClassifier attributes 390 n_hidden_features=int(np.ceil(xx[1])), 391 activation_name=num_to_activation_name[np.ceil(xx[2])], 392 nodes_sim=num_to_nodes_sim[int(np.ceil(xx[3]))], 393 dropout=xx[4], 394 n_clusters=int(np.ceil(xx[5])), 395 type_clust=num_to_type_clust[int(np.ceil(xx[6]))], 396 cv=cv, 397 n_jobs=n_jobs, 398 scoring=scoring, 399 seed=seed, 400 ) 401 402 if surrogate_obj is None: 403 gp_opt = gp.GPOpt( 404 objective_func=crossval_objective, 405 lower_bound=np.array([0, 3, 0, 0, 0.0, 0, 0]), 406 upper_bound=np.array([5, 100, 3, 3, 0.4, 5, 2]), 407 params_names=[ 408 "n_layers", 409 # CustomClassifier attributes 410 "n_hidden_features", 411 "activation_name", 412 "nodes_sim", 413 "dropout", 414 "n_clusters", 415 "type_clust", 416 ], 417 method="bayesian", 418 n_init=n_init, 419 n_iter=n_iter, 420 seed=seed, 421 ) 422 else: 423 gp_opt = gp.GPOpt( 424 objective_func=crossval_objective, 425 lower_bound=np.array([0, 3, 0, 0, 0.0, 0, 0]), 426 upper_bound=np.array([5, 100, 3, 3, 0.4, 5, 2]), 427 params_names=[ 428 "n_layers", 429 # CustomClassifier attributes 430 "n_hidden_features", 431 "activation_name", 432 "nodes_sim", 433 "dropout", 434 "n_clusters", 435 "type_clust", 436 ], 437 acquisition="ucb", 438 method="splitconformal", 439 surrogate_obj=ns.PredictionInterval( 440 obj=surrogate_obj, method="splitconformal" 441 ), 442 n_init=n_init, 443 n_iter=n_iter, 444 seed=seed, 445 ) 446 447 res = gp_opt.optimize(verbose=verbose, abs_tol=abs_tol) 448 res.best_params["n_layers"] = int(np.ceil(res.best_params["n_layers"])) 449 res.best_params["n_hidden_features"] = int( 450 np.ceil(res.best_params["n_hidden_features"]) 451 ) 452 res.best_params["activation_name"] = num_to_activation_name[ 453 np.ceil(res.best_params["activation_name"]) 454 ] 455 res.best_params["nodes_sim"] = num_to_nodes_sim[ 456 int(np.ceil(res.best_params["nodes_sim"])) 457 ] 458 res.best_params["dropout"] = res.best_params["dropout"] 459 res.best_params["n_clusters"] = int( 460 np.ceil(res.best_params["n_clusters"]) 461 ) 462 res.best_params["type_clust"] = num_to_type_clust[ 463 int(np.ceil(res.best_params["type_clust"])) 464 ] 465 466 # out-of-sample error 467 if X_test is not None and y_test is not None: 468 self.set_params(**res.best_params, verbose=0, seed=seed) 469 preds = self.fit(X_train, y_train).predict(X_test) 470 # check error on y_test 471 oos_err = getattr(metrics, scoring + "_score")( 472 y_true=y_test, y_pred=preds 473 ) 474 result = namedtuple("result", res._fields + ("test_" + scoring,)) 475 return result(*res, oos_err) 476 else: 477 return res 478 479 def lazy_cross_val_optim( 480 self, 481 X_train, 482 y_train, 483 X_test=None, 484 y_test=None, 485 scoring="accuracy", 486 surrogate_objs=None, 487 customize=False, 488 cv=5, 489 n_jobs=None, 490 n_init=10, 491 n_iter=190, 492 abs_tol=1e-3, 493 verbose=1, 494 seed=123, 495 ): 496 """Automated Cross-validation function and hyperparameters' search using multiple surrogates 497 498 Parameters: 499 500 X_train: array-like, 501 Training vectors, where rows is the number of samples 502 and columns is the number of features. 503 504 y_train: array-like, 505 Training vectors, where rows is the number of samples 506 and columns is the number of features. 507 508 X_test: array-like, 509 Testing vectors, where rows is the number of samples 510 and columns is the number of features. 511 512 y_test: array-like, 513 Testing vectors, where rows is the number of samples 514 and columns is the number of features. 515 516 scoring: str 517 scoring metric; see https://scikit-learn.org/stable/modules/model_evaluation.html#the-scoring-parameter-defining-model-evaluation-rules 518 519 surrogate_objs: object names as a list of strings; 520 ML models for estimating the uncertainty around the objective function 521 522 customize: boolean 523 if True, the surrogate is transformed into a quasi-randomized network (default is False) 524 525 cv: int; 526 number of cross-validation folds 527 528 n_jobs: int; 529 number of jobs for parallel execution 530 531 n_init: an integer; 532 number of points in the initial setting, when `x_init` and `y_init` are not provided 533 534 n_iter: an integer; 535 number of iterations of the minimization algorithm 536 537 abs_tol: a float; 538 tolerance for convergence of the optimizer (early stopping based on acquisition function) 539 540 verbose: int 541 controls verbosity 542 543 seed: int 544 reproducibility seed 545 546 Examples: 547 548 ```python 549 ``` 550 """ 551 552 removed_regressors = [ 553 "TheilSenRegressor", 554 "ARDRegression", 555 "CCA", 556 "GaussianProcessRegressor", 557 "GradientBoostingRegressor", 558 "HistGradientBoostingRegressor", 559 "IsotonicRegression", 560 "MultiOutputRegressor", 561 "MultiTaskElasticNet", 562 "MultiTaskElasticNetCV", 563 "MultiTaskLasso", 564 "MultiTaskLassoCV", 565 "OrthogonalMatchingPursuit", 566 "OrthogonalMatchingPursuitCV", 567 "PLSCanonical", 568 "PLSRegression", 569 "RadiusNeighborsRegressor", 570 "RegressorChain", 571 "StackingRegressor", 572 "VotingRegressor", 573 ] 574 575 results = [] 576 577 for est in all_estimators(): 578 if surrogate_objs is None: 579 if issubclass(est[1], RegressorMixin) and ( 580 est[0] not in removed_regressors 581 ): 582 try: 583 if customize == True: 584 surr_obj = ns.CustomClassifier(obj=est[1]()) 585 else: 586 surr_obj = est[1]() 587 res = self.cross_val_optim( 588 X_train=X_train, 589 y_train=y_train, 590 X_test=X_test, 591 y_test=y_test, 592 surrogate_obj=surr_obj, 593 cv=cv, 594 n_jobs=n_jobs, 595 scoring=scoring, 596 n_init=n_init, 597 n_iter=n_iter, 598 abs_tol=abs_tol, 599 verbose=verbose, 600 seed=seed, 601 ) 602 if customize == True: 603 results.append((f"CustomClassifier({est[0]})", res)) 604 else: 605 results.append((est[0], res)) 606 except: 607 pass 608 609 else: 610 if ( 611 issubclass(est[1], RegressorMixin) 612 and (est[0] not in removed_regressors) 613 and est[0] in surrogate_objs 614 ): 615 try: 616 if customize == True: 617 surr_obj = ns.CustomClassifier(obj=est[1]()) 618 else: 619 surr_obj = est[1]() 620 res = self.cross_val_optim( 621 X_train=X_train, 622 y_train=y_train, 623 X_test=X_test, 624 y_test=y_test, 625 surrogate_obj=surr_obj, 626 cv=cv, 627 n_jobs=n_jobs, 628 scoring=scoring, 629 n_init=n_init, 630 n_iter=n_iter, 631 abs_tol=abs_tol, 632 verbose=verbose, 633 seed=seed, 634 ) 635 if customize == True: 636 results.append((f"CustomClassifier({est[0]})", res)) 637 else: 638 results.append((est[0], res)) 639 except: 640 pass 641 642 return results 643 644 @property 645 def _estimator_type(self): 646 return "classifier"
Deep Classifier
Parameters:
obj: an object
A base learner, see also https://www.researchgate.net/publication/380701207_Deep_Quasi-Randomized_neural_Networks_for_classification
n_layers: int (default=3)
Number of layers. `n_layers = 1` is a simple `CustomClassifier`
verbose : int, optional (default=0)
Monitor progress when fitting.
All the other parameters are nnetsauce `CustomClassifier`'s
Examples:
import nnetsauce as ns
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegressionCV
data = load_breast_cancer()
X = data.data
y= data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=123)
obj = LogisticRegressionCV()
clf = ns.DeepClassifier(obj)
clf.fit(X_train, y_train)
print(clf.score(clf.predict(X_test), y_test))
144 def fit(self, X, y, **kwargs): 145 """Fit Classification algorithms to X and y. 146 Parameters 147 ---------- 148 X : array-like, 149 Training vectors, where rows is the number of samples 150 and columns is the number of features. 151 y : array-like, 152 Training vectors, where rows is the number of samples 153 and columns is the number of features. 154 **kwargs: dict 155 Additional parameters to be passed to the fit method 156 of the base learner. For example, `sample_weight`. 157 158 Returns 159 ------- 160 A fitted object 161 """ 162 163 self.classes_ = np.unique(y) 164 self.n_classes_ = len( 165 self.classes_ 166 ) # for compatibility with scikit-learn 167 168 if isinstance(X, np.ndarray): 169 X = pd.DataFrame(X) 170 171 # init layer 172 self.stacked_obj = CustomClassifier( 173 obj=self.stacked_obj, 174 n_hidden_features=self.n_hidden_features, 175 activation_name=self.activation_name, 176 a=self.a, 177 nodes_sim=self.nodes_sim, 178 bias=self.bias, 179 dropout=self.dropout, 180 direct_link=self.direct_link, 181 n_clusters=self.n_clusters, 182 cluster_encode=self.cluster_encode, 183 type_clust=self.type_clust, 184 type_scaling=self.type_scaling, 185 col_sample=self.col_sample, 186 row_sample=self.row_sample, 187 cv_calibration=None, 188 calibration_method=None, 189 seed=self.seed, 190 backend=self.backend, 191 ) 192 193 if self.verbose > 0: 194 iterator = tqdm(range(self.n_layers - 1)) 195 else: 196 iterator = range(self.n_layers - 1) 197 198 for _ in iterator: 199 self.stacked_obj = deepcopy( 200 CustomClassifier( 201 obj=self.stacked_obj, 202 n_hidden_features=self.n_hidden_features, 203 activation_name=self.activation_name, 204 a=self.a, 205 nodes_sim=self.nodes_sim, 206 bias=self.bias, 207 dropout=self.dropout, 208 direct_link=self.direct_link, 209 n_clusters=self.n_clusters, 210 cluster_encode=self.cluster_encode, 211 type_clust=self.type_clust, 212 type_scaling=self.type_scaling, 213 col_sample=self.col_sample, 214 row_sample=self.row_sample, 215 cv_calibration=None, 216 calibration_method=None, 217 seed=self.seed, 218 backend=self.backend, 219 ) 220 ) 221 self.stacked_obj.fit(X, y, **kwargs) 222 223 return self
Fit Classification algorithms to X and y.
Parameters
X : array-like,
Training vectors, where rows is the number of samples
and columns is the number of features.
y : array-like,
Training vectors, where rows is the number of samples
and columns is the number of features.
**kwargs: dict
Additional parameters to be passed to the fit method
of the base learner. For example, sample_weight.
Returns
A fitted object
Predict test data X.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
Predict probabilities for test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
probability estimates for test data: {array-like}
Scoring function for classification.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
scoring: str
scoring method (default is accuracy)
Returns:
score: float
13class DeepRegressor(CustomRegressor, RegressorMixin): 14 """ 15 Deep Regressor 16 17 Parameters: 18 19 obj: an object 20 A base learner, see also https://www.researchgate.net/publication/380701207_Deep_Quasi-Randomized_neural_Networks_for_classification 21 22 verbose : int, optional (default=0) 23 Monitor progress when fitting. 24 25 n_layers: int (default=2) 26 Number of layers. `n_layers = 1` is a simple `CustomRegressor` 27 28 All the other parameters are nnetsauce `CustomRegressor`'s 29 30 Examples: 31 32 ```python 33 import nnetsauce as ns 34 from sklearn.datasets import load_diabetes 35 from sklearn.model_selection import train_test_split 36 from sklearn.linear_model import RidgeCV 37 data = load_diabetes() 38 X = data.data 39 y= data.target 40 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=123) 41 obj = RidgeCV() 42 clf = ns.DeepRegressor(obj) 43 clf.fit(X_train, y_train) 44 print(clf.score(clf.predict(X_test), y_test)) 45 ``` 46 47 """ 48 49 def __init__( 50 self, 51 obj, 52 # Defining depth 53 n_layers=2, 54 verbose=0, 55 # CustomRegressor attributes 56 n_hidden_features=5, 57 activation_name="relu", 58 a=0.01, 59 nodes_sim="sobol", 60 bias=True, 61 dropout=0, 62 direct_link=True, 63 n_clusters=2, 64 cluster_encode=True, 65 type_clust="kmeans", 66 type_scaling=("std", "std", "std"), 67 col_sample=1, 68 row_sample=1, 69 level=None, 70 pi_method="splitconformal", 71 seed=123, 72 backend="cpu", 73 ): 74 super().__init__( 75 obj=obj, 76 n_hidden_features=n_hidden_features, 77 activation_name=activation_name, 78 a=a, 79 nodes_sim=nodes_sim, 80 bias=bias, 81 dropout=dropout, 82 direct_link=direct_link, 83 n_clusters=n_clusters, 84 cluster_encode=cluster_encode, 85 type_clust=type_clust, 86 type_scaling=type_scaling, 87 col_sample=col_sample, 88 row_sample=row_sample, 89 level=level, 90 pi_method=pi_method, 91 seed=seed, 92 backend=backend, 93 ) 94 95 assert n_layers >= 1, "must have n_layers >= 1" 96 97 self.stacked_obj = deepcopy(obj) 98 self.verbose = verbose 99 self.n_layers = n_layers 100 self.level = level 101 self.pi_method = pi_method 102 self.coef_ = None 103 104 def fit(self, X, y, **kwargs): 105 """Fit Regression algorithms to X and y. 106 Parameters 107 ---------- 108 X : array-like, 109 Training vectors, where rows is the number of samples 110 and columns is the number of features. 111 y : array-like, 112 Training vectors, where rows is the number of samples 113 and columns is the number of features. 114 **kwargs: dict 115 Additional parameters to be passed to the fit method 116 of the base learner. For example, `sample_weight`. 117 Returns 118 ------- 119 A fitted object 120 """ 121 122 if isinstance(X, np.ndarray): 123 X = pd.DataFrame(X) 124 125 # init layer 126 self.stacked_obj = CustomRegressor( 127 obj=self.stacked_obj, 128 n_hidden_features=self.n_hidden_features, 129 activation_name=self.activation_name, 130 a=self.a, 131 nodes_sim=self.nodes_sim, 132 bias=self.bias, 133 dropout=self.dropout, 134 direct_link=self.direct_link, 135 n_clusters=self.n_clusters, 136 cluster_encode=self.cluster_encode, 137 type_clust=self.type_clust, 138 type_scaling=self.type_scaling, 139 col_sample=self.col_sample, 140 row_sample=self.row_sample, 141 seed=self.seed, 142 backend=self.backend, 143 ) 144 145 if self.verbose > 0: 146 iterator = tqdm(range(self.n_layers - 1)) 147 else: 148 iterator = range(self.n_layers - 1) 149 150 for _ in iterator: 151 self.stacked_obj = deepcopy( 152 CustomRegressor( 153 obj=self.stacked_obj, 154 n_hidden_features=self.n_hidden_features, 155 activation_name=self.activation_name, 156 a=self.a, 157 nodes_sim=self.nodes_sim, 158 bias=self.bias, 159 dropout=self.dropout, 160 direct_link=self.direct_link, 161 n_clusters=self.n_clusters, 162 cluster_encode=self.cluster_encode, 163 type_clust=self.type_clust, 164 type_scaling=self.type_scaling, 165 col_sample=self.col_sample, 166 row_sample=self.row_sample, 167 seed=self.seed, 168 backend=self.backend, 169 ) 170 ) 171 172 self.stacked_obj.fit(X, y, **kwargs) 173 174 if self.level is not None: 175 self.stacked_obj = PredictionInterval( 176 obj=self.stacked_obj, method=self.pi_method, level=self.level 177 ) 178 179 if hasattr(self.stacked_obj, "clustering_obj_"): 180 self.clustering_obj_ = self.stacked_obj.clustering_obj_ 181 182 if hasattr(self.stacked_obj, "coef_"): 183 self.coef_ = self.stacked_obj.coef_ 184 185 if hasattr(self.stacked_obj, "scaler_"): 186 self.scaler_ = self.stacked_obj.scaler_ 187 188 if hasattr(self.stacked_obj, "nn_scaler_"): 189 self.nn_scaler_ = self.stacked_obj.nn_scaler_ 190 191 if hasattr(self.stacked_obj, "clustering_scaler_"): 192 self.clustering_scaler_ = self.stacked_obj.clustering_scaler_ 193 194 return self 195 196 def partial_fit(self, X, y, **kwargs): 197 """Fit Regression algorithms to X and y. 198 Parameters 199 ---------- 200 X : array-like, 201 Training vectors, where rows is the number of samples 202 and columns is the number of features. 203 y : array-like, 204 Training vectors, where rows is the number of samples 205 and columns is the number of features. 206 **kwargs: dict 207 Additional parameters to be passed to the fit method 208 of the base learner. For example, `sample_weight`. 209 Returns 210 ------- 211 A fitted object 212 """ 213 assert hasattr(self, "stacked_obj"), "model must be fitted first" 214 current_obj = self.stacked_obj 215 for _ in range(self.n_layers): 216 try: 217 input_X = current_obj.obj.cook_test_set(X) 218 current_obj.obj.partial_fit(input_X, y, **kwargs) 219 try: 220 current_obj = current_obj.obj 221 except AttributeError: 222 pass 223 except ValueError as e: 224 print(e) 225 pass 226 return self 227 228 def predict(self, X, **kwargs): 229 if self.level is not None: 230 return self.stacked_obj.predict(X, return_pi=True) 231 return self.stacked_obj.predict(X, **kwargs) 232 233 def score(self, X, y, scoring=None): 234 return self.stacked_obj.score(X, y, scoring)
Deep Regressor
Parameters:
obj: an object
A base learner, see also https://www.researchgate.net/publication/380701207_Deep_Quasi-Randomized_neural_Networks_for_classification
verbose : int, optional (default=0)
Monitor progress when fitting.
n_layers: int (default=2)
Number of layers. `n_layers = 1` is a simple `CustomRegressor`
All the other parameters are nnetsauce `CustomRegressor`'s
Examples:
import nnetsauce as ns
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.linear_model import RidgeCV
data = load_diabetes()
X = data.data
y= data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=123)
obj = RidgeCV()
clf = ns.DeepRegressor(obj)
clf.fit(X_train, y_train)
print(clf.score(clf.predict(X_test), y_test))
104 def fit(self, X, y, **kwargs): 105 """Fit Regression algorithms to X and y. 106 Parameters 107 ---------- 108 X : array-like, 109 Training vectors, where rows is the number of samples 110 and columns is the number of features. 111 y : array-like, 112 Training vectors, where rows is the number of samples 113 and columns is the number of features. 114 **kwargs: dict 115 Additional parameters to be passed to the fit method 116 of the base learner. For example, `sample_weight`. 117 Returns 118 ------- 119 A fitted object 120 """ 121 122 if isinstance(X, np.ndarray): 123 X = pd.DataFrame(X) 124 125 # init layer 126 self.stacked_obj = CustomRegressor( 127 obj=self.stacked_obj, 128 n_hidden_features=self.n_hidden_features, 129 activation_name=self.activation_name, 130 a=self.a, 131 nodes_sim=self.nodes_sim, 132 bias=self.bias, 133 dropout=self.dropout, 134 direct_link=self.direct_link, 135 n_clusters=self.n_clusters, 136 cluster_encode=self.cluster_encode, 137 type_clust=self.type_clust, 138 type_scaling=self.type_scaling, 139 col_sample=self.col_sample, 140 row_sample=self.row_sample, 141 seed=self.seed, 142 backend=self.backend, 143 ) 144 145 if self.verbose > 0: 146 iterator = tqdm(range(self.n_layers - 1)) 147 else: 148 iterator = range(self.n_layers - 1) 149 150 for _ in iterator: 151 self.stacked_obj = deepcopy( 152 CustomRegressor( 153 obj=self.stacked_obj, 154 n_hidden_features=self.n_hidden_features, 155 activation_name=self.activation_name, 156 a=self.a, 157 nodes_sim=self.nodes_sim, 158 bias=self.bias, 159 dropout=self.dropout, 160 direct_link=self.direct_link, 161 n_clusters=self.n_clusters, 162 cluster_encode=self.cluster_encode, 163 type_clust=self.type_clust, 164 type_scaling=self.type_scaling, 165 col_sample=self.col_sample, 166 row_sample=self.row_sample, 167 seed=self.seed, 168 backend=self.backend, 169 ) 170 ) 171 172 self.stacked_obj.fit(X, y, **kwargs) 173 174 if self.level is not None: 175 self.stacked_obj = PredictionInterval( 176 obj=self.stacked_obj, method=self.pi_method, level=self.level 177 ) 178 179 if hasattr(self.stacked_obj, "clustering_obj_"): 180 self.clustering_obj_ = self.stacked_obj.clustering_obj_ 181 182 if hasattr(self.stacked_obj, "coef_"): 183 self.coef_ = self.stacked_obj.coef_ 184 185 if hasattr(self.stacked_obj, "scaler_"): 186 self.scaler_ = self.stacked_obj.scaler_ 187 188 if hasattr(self.stacked_obj, "nn_scaler_"): 189 self.nn_scaler_ = self.stacked_obj.nn_scaler_ 190 191 if hasattr(self.stacked_obj, "clustering_scaler_"): 192 self.clustering_scaler_ = self.stacked_obj.clustering_scaler_ 193 194 return self
Fit Regression algorithms to X and y.
Parameters
X : array-like,
Training vectors, where rows is the number of samples
and columns is the number of features.
y : array-like,
Training vectors, where rows is the number of samples
and columns is the number of features.
**kwargs: dict
Additional parameters to be passed to the fit method
of the base learner. For example, sample_weight.
Returns
A fitted object
228 def predict(self, X, **kwargs): 229 if self.level is not None: 230 return self.stacked_obj.predict(X, return_pi=True) 231 return self.stacked_obj.predict(X, **kwargs)
Predict test data X.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
level: int
Level of confidence (default = 95)
method: str
'splitconformal', 'localconformal'
prediction (if you specify `return_pi = True`)
**kwargs: additional parameters
`return_pi = True` for conformal prediction,
with `method` in ('splitconformal', 'localconformal')
or `return_std = True` for `self.obj` in
(`sklearn.linear_model.BayesianRidge`,
`sklearn.linear_model.ARDRegressor`,
`sklearn.gaussian_process.GaussianProcessRegressor`)`
Returns:
model predictions:
an array if uncertainty quantification is not requested,
or a tuple if with prediction intervals and simulations
if `return_std = True` (mean, standard deviation,
lower and upper prediction interval) or `return_pi = True`
()
Compute the score of the model.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
scoring: str
scoring method
Returns:
score: float
11class DeepMTS(MTS): 12 """Univariate and multivariate time series (DeepMTS) forecasting with Quasi-Randomized networks (Work in progress) 13 14 Parameters: 15 16 obj: object. 17 any object containing a method fit (obj.fit()) and a method predict 18 (obj.predict()). 19 20 n_layers: int. 21 number of layers in the neural network. 22 23 n_hidden_features: int. 24 number of nodes in the hidden layer. 25 26 activation_name: str. 27 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'. 28 29 a: float. 30 hyperparameter for 'prelu' or 'elu' activation function. 31 32 nodes_sim: str. 33 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 34 'uniform'. 35 36 bias: boolean. 37 indicates if the hidden layer contains a bias term (True) or not 38 (False). 39 40 dropout: float. 41 regularization parameter; (random) percentage of nodes dropped out 42 of the training. 43 44 direct_link: boolean. 45 indicates if the original predictors are included (True) in model's fitting or not (False). 46 47 n_clusters: int. 48 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering). 49 50 cluster_encode: bool. 51 defines how the variable containing clusters is treated (default is one-hot) 52 if `False`, then labels are used, without one-hot encoding. 53 54 type_clust: str. 55 type of clustering method: currently k-means ('kmeans') or Gaussian 56 Mixture Model ('gmm'). 57 58 type_scaling: a tuple of 3 strings. 59 scaling methods for inputs, hidden layer, and clustering respectively 60 (and when relevant). 61 Currently available: standardization ('std') or MinMax scaling ('minmax'). 62 63 lags: int. 64 number of lags used for each time series. 65 66 type_pi: str. 67 type of prediction interval; currently: 68 - "gaussian": simple, fast, but: assumes stationarity of Gaussian in-sample residuals and independence in the multivariate case 69 - "kde": based on Kernel Density Estimation of in-sample residuals 70 - "bootstrap": based on independent bootstrap of in-sample residuals 71 - "block-bootstrap": based on basic block bootstrap of in-sample residuals 72 - "scp-kde": Sequential split conformal prediction with Kernel Density Estimation of calibrated residuals 73 - "scp-bootstrap": Sequential split conformal prediction with independent bootstrap of calibrated residuals 74 - "scp-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of calibrated residuals 75 - "scp2-kde": Sequential split conformal prediction with Kernel Density Estimation of standardized calibrated residuals 76 - "scp2-bootstrap": Sequential split conformal prediction with independent bootstrap of standardized calibrated residuals 77 - "scp2-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of standardized calibrated residuals 78 79 block_size: int. 80 size of block for 'type_pi' in ("block-bootstrap", "scp-block-bootstrap", "scp2-block-bootstrap"). 81 Default is round(3.15*(n_residuals^1/3)) 82 83 replications: int. 84 number of replications (if needed, for predictive simulation). Default is 'None'. 85 86 kernel: str. 87 the kernel to use for residuals density estimation (used for predictive simulation). Currently, either 'gaussian' or 'tophat'. 88 89 agg: str. 90 either "mean" or "median" for simulation of bootstrap aggregating 91 92 seed: int. 93 reproducibility seed for nodes_sim=='uniform' or predictive simulation. 94 95 backend: str. 96 "cpu" or "gpu" or "tpu". 97 98 verbose: int. 99 0: not printing; 1: printing 100 101 show_progress: bool. 102 True: progress bar when fitting each series; False: no progress bar when fitting each series 103 104 Attributes: 105 106 fit_objs_: dict 107 objects adjusted to each individual time series 108 109 y_: {array-like} 110 DeepMTS responses (most recent observations first) 111 112 X_: {array-like} 113 DeepMTS lags 114 115 xreg_: {array-like} 116 external regressors 117 118 y_means_: dict 119 a dictionary of each series mean values 120 121 preds_: {array-like} 122 successive model predictions 123 124 preds_std_: {array-like} 125 standard deviation around the predictions 126 127 return_std_: boolean 128 return uncertainty or not (set in predict) 129 130 df_: data frame 131 the input data frame, in case a data.frame is provided to `fit` 132 133 Examples: 134 135 Example 1: 136 137 ```python 138 import nnetsauce as ns 139 import numpy as np 140 from sklearn import linear_model 141 np.random.seed(123) 142 143 M = np.random.rand(10, 3) 144 M[:,0] = 10*M[:,0] 145 M[:,2] = 25*M[:,2] 146 print(M) 147 148 # Adjust Bayesian Ridge 149 regr4 = linear_model.BayesianRidge() 150 obj_DeepMTS = ns.DeepMTS(regr4, lags = 1, n_hidden_features=5) 151 obj_DeepMTS.fit(M) 152 print(obj_DeepMTS.predict()) 153 154 # with credible intervals 155 print(obj_DeepMTS.predict(return_std=True, level=80)) 156 157 print(obj_DeepMTS.predict(return_std=True, level=95)) 158 ``` 159 160 Example 2: 161 162 ```python 163 import nnetsauce as ns 164 import numpy as np 165 from sklearn import linear_model 166 167 dataset = { 168 'date' : ['2001-01-01', '2002-01-01', '2003-01-01', '2004-01-01', '2005-01-01'], 169 'series1' : [34, 30, 35.6, 33.3, 38.1], 170 'series2' : [4, 5.5, 5.6, 6.3, 5.1], 171 'series3' : [100, 100.5, 100.6, 100.2, 100.1]} 172 df = pd.DataFrame(dataset).set_index('date') 173 print(df) 174 175 # Adjust Bayesian Ridge 176 regr5 = linear_model.BayesianRidge() 177 obj_DeepMTS = ns.DeepMTS(regr5, lags = 1, n_hidden_features=5) 178 obj_DeepMTS.fit(df) 179 print(obj_DeepMTS.predict()) 180 181 # with credible intervals 182 print(obj_DeepMTS.predict(return_std=True, level=80)) 183 184 print(obj_DeepMTS.predict(return_std=True, level=95)) 185 ``` 186 187 """ 188 189 # construct the object ----- 190 191 def __init__( 192 self, 193 obj, 194 n_layers=3, 195 n_hidden_features=5, 196 activation_name="relu", 197 a=0.01, 198 nodes_sim="sobol", 199 bias=True, 200 dropout=0, 201 direct_link=True, 202 n_clusters=2, 203 cluster_encode=True, 204 type_clust="kmeans", 205 type_scaling=("std", "std", "std"), 206 lags=1, 207 type_pi="kde", 208 block_size=None, 209 replications=None, 210 kernel=None, 211 agg="mean", 212 seed=123, 213 backend="cpu", 214 verbose=0, 215 show_progress=True, 216 ): 217 assert int(lags) == lags, "parameter 'lags' should be an integer" 218 assert n_layers >= 1, "must have n_layers >= 1" 219 self.n_layers = int(n_layers) 220 221 if self.n_layers > 1: 222 for _ in range(self.n_layers - 1): 223 obj = CustomRegressor( 224 obj=deepcopy(obj), 225 n_hidden_features=n_hidden_features, 226 activation_name=activation_name, 227 a=a, 228 nodes_sim=nodes_sim, 229 bias=bias, 230 dropout=dropout, 231 direct_link=direct_link, 232 n_clusters=n_clusters, 233 cluster_encode=cluster_encode, 234 type_clust=type_clust, 235 type_scaling=type_scaling, 236 seed=seed, 237 backend=backend, 238 ) 239 240 self.obj = deepcopy(obj) 241 super().__init__( 242 obj=self.obj, 243 n_hidden_features=n_hidden_features, 244 activation_name=activation_name, 245 a=a, 246 nodes_sim=nodes_sim, 247 bias=bias, 248 dropout=dropout, 249 direct_link=direct_link, 250 n_clusters=n_clusters, 251 cluster_encode=cluster_encode, 252 type_clust=type_clust, 253 type_scaling=type_scaling, 254 lags=lags, 255 type_pi=type_pi, 256 block_size=block_size, 257 replications=replications, 258 kernel=kernel, 259 agg=agg, 260 seed=seed, 261 backend=backend, 262 verbose=verbose, 263 show_progress=show_progress, 264 )
Univariate and multivariate time series (DeepMTS) forecasting with Quasi-Randomized networks (Work in progress)
Parameters:
obj: object.
any object containing a method fit (obj.fit()) and a method predict
(obj.predict()).
n_layers: int.
number of layers in the neural network.
n_hidden_features: int.
number of nodes in the hidden layer.
activation_name: str.
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'.
a: float.
hyperparameter for 'prelu' or 'elu' activation function.
nodes_sim: str.
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'.
bias: boolean.
indicates if the hidden layer contains a bias term (True) or not
(False).
dropout: float.
regularization parameter; (random) percentage of nodes dropped out
of the training.
direct_link: boolean.
indicates if the original predictors are included (True) in model's fitting or not (False).
n_clusters: int.
number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering).
cluster_encode: bool.
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding.
type_clust: str.
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm').
type_scaling: a tuple of 3 strings.
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax').
lags: int.
number of lags used for each time series.
type_pi: str.
type of prediction interval; currently:
- "gaussian": simple, fast, but: assumes stationarity of Gaussian in-sample residuals and independence in the multivariate case
- "kde": based on Kernel Density Estimation of in-sample residuals
- "bootstrap": based on independent bootstrap of in-sample residuals
- "block-bootstrap": based on basic block bootstrap of in-sample residuals
- "scp-kde": Sequential split conformal prediction with Kernel Density Estimation of calibrated residuals
- "scp-bootstrap": Sequential split conformal prediction with independent bootstrap of calibrated residuals
- "scp-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of calibrated residuals
- "scp2-kde": Sequential split conformal prediction with Kernel Density Estimation of standardized calibrated residuals
- "scp2-bootstrap": Sequential split conformal prediction with independent bootstrap of standardized calibrated residuals
- "scp2-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of standardized calibrated residuals
block_size: int.
size of block for 'type_pi' in ("block-bootstrap", "scp-block-bootstrap", "scp2-block-bootstrap").
Default is round(3.15*(n_residuals^1/3))
replications: int.
number of replications (if needed, for predictive simulation). Default is 'None'.
kernel: str.
the kernel to use for residuals density estimation (used for predictive simulation). Currently, either 'gaussian' or 'tophat'.
agg: str.
either "mean" or "median" for simulation of bootstrap aggregating
seed: int.
reproducibility seed for nodes_sim=='uniform' or predictive simulation.
backend: str.
"cpu" or "gpu" or "tpu".
verbose: int.
0: not printing; 1: printing
show_progress: bool.
True: progress bar when fitting each series; False: no progress bar when fitting each series
Attributes:
fit_objs_: dict
objects adjusted to each individual time series
y_: {array-like}
DeepMTS responses (most recent observations first)
X_: {array-like}
DeepMTS lags
xreg_: {array-like}
external regressors
y_means_: dict
a dictionary of each series mean values
preds_: {array-like}
successive model predictions
preds_std_: {array-like}
standard deviation around the predictions
return_std_: boolean
return uncertainty or not (set in predict)
df_: data frame
the input data frame, in case a data.frame is provided to `fit`
Examples:
Example 1:
import nnetsauce as ns
import numpy as np
from sklearn import linear_model
np.random.seed(123)
M = np.random.rand(10, 3)
M[:,0] = 10*M[:,0]
M[:,2] = 25*M[:,2]
print(M)
# Adjust Bayesian Ridge
regr4 = linear_model.BayesianRidge()
obj_DeepMTS = ns.DeepMTS(regr4, lags = 1, n_hidden_features=5)
obj_DeepMTS.fit(M)
print(obj_DeepMTS.predict())
# with credible intervals
print(obj_DeepMTS.predict(return_std=True, level=80))
print(obj_DeepMTS.predict(return_std=True, level=95))
Example 2:
import nnetsauce as ns
import numpy as np
from sklearn import linear_model
dataset = {
'date' : ['2001-01-01', '2002-01-01', '2003-01-01', '2004-01-01', '2005-01-01'],
'series1' : [34, 30, 35.6, 33.3, 38.1],
'series2' : [4, 5.5, 5.6, 6.3, 5.1],
'series3' : [100, 100.5, 100.6, 100.2, 100.1]}
df = pd.DataFrame(dataset).set_index('date')
print(df)
# Adjust Bayesian Ridge
regr5 = linear_model.BayesianRidge()
obj_DeepMTS = ns.DeepMTS(regr5, lags = 1, n_hidden_features=5)
obj_DeepMTS.fit(df)
print(obj_DeepMTS.predict())
# with credible intervals
print(obj_DeepMTS.predict(return_std=True, level=80))
print(obj_DeepMTS.predict(return_std=True, level=95))
12class DiscreteTokenMTS(MTS): 13 """ 14 MTS for discrete token forecasting via nearest-neighbor in embedding space. 15 16 Maps continuous predictions to discrete tokens using nearest-neighbor lookup 17 in a vocabulary (embedding space). Supports probabilistic decoding with 18 temperature-controlled softmax and uncertainty quantification in token space. 19 20 Parameters 21 ---------- 22 obj : object 23 Base learner with fit() and predict() methods 24 25 vocab : np.ndarray of shape (vocab_size, n_series) 26 Token vocabulary - each row is a token embedding vector 27 28 metric : {'euclidean', 'cosine'}, default='euclidean' 29 Distance metric for nearest-neighbor lookup 30 31 return_mode : {'token_id', 'token_vector', 'both', 'probs'}, default='token_id' 32 Output format: 33 - 'token_id': integer token indices 34 - 'token_vector': token embedding vectors 35 - 'both': single DataFrame with token_id + dimensions 36 - 'probs': probability distribution over all tokens 37 38 softmax_temperature : float, default=1.0 39 Temperature for softmax when return_mode='probs' 40 Lower values (0.1-0.5) → sharper distributions (more deterministic) 41 Higher values (2.0-10.0) → smoother distributions (more exploratory) 42 43 normalize_vocab : bool, default=False 44 Whether to center and scale vocabulary to zero mean, unit variance 45 46 **mts_kwargs : dict 47 Additional parameters passed to MTS base class 48 49 Attributes 50 ---------- 51 vocab : np.ndarray 52 Normalized vocabulary (if normalize_vocab=True) 53 54 vocab_mean_ : np.ndarray 55 Mean used for normalization (if normalize_vocab=True) 56 57 vocab_std_ : np.ndarray 58 Std used for normalization (if normalize_vocab=True) 59 60 discretization_errors_ : pd.DataFrame or None 61 Distances from predictions to nearest tokens 62 63 Warnings 64 -------- 65 - Prediction intervals (lower/upper) are NOT discretized - only the mean 66 - For uncertainty in token space, use predict_token_distribution() 67 - Vocabulary quality strongly affects results - use diagnose_vocabulary() 68 69 Examples 70 -------- 71 >>> # Basic token prediction 72 >>> vocab = np.random.randn(100, 10) # 100 tokens, 10 dimensions 73 >>> model = DiscreteTokenMTS( 74 ... obj=Ridge(), 75 ... vocab=vocab, 76 ... lags=5, 77 ... return_mode='token_id' 78 ... ) 79 >>> model.fit(X_train) 80 >>> tokens = model.predict(h=10) 81 82 >>> # Probabilistic with temperature control 83 >>> model = DiscreteTokenMTS( 84 ... obj=Ridge(), 85 ... vocab=vocab, 86 ... lags=5, 87 ... return_mode='probs', 88 ... softmax_temperature=1.5 89 ... ) 90 >>> probs = model.predict(h=10) # Returns probability distributions 91 92 >>> # Uncertainty-aware token distributions 93 >>> freqs, entropy, mode = model.predict_token_distribution( 94 ... h=10, 95 ... replications=100 96 ... ) 97 """ 98 99 def __init__( 100 self, 101 obj, 102 vocab, 103 metric="euclidean", 104 return_mode="token_id", 105 softmax_temperature=1.0, 106 normalize_vocab=False, 107 **mts_kwargs, 108 ): 109 super().__init__(obj, **mts_kwargs) 110 111 # Convert and validate vocabulary 112 self.vocab_original = np.asarray(vocab, dtype=np.float64) 113 self._validate_vocabulary() 114 115 self.vocab_size = self.vocab_original.shape[0] 116 self.vocab_mean_ = None 117 self.vocab_std_ = None 118 self.normalize_vocab = normalize_vocab 119 120 # Normalize if requested 121 if normalize_vocab: 122 self._normalize_vocabulary() 123 else: 124 self.vocab = self.vocab_original.copy() 125 126 # Validate and set metric 127 assert metric in [ 128 "euclidean", 129 "cosine", 130 ], "metric must be 'euclidean' or 'cosine'" 131 self.metric = metric 132 self.distance_func = ( 133 euclidean_distances if metric == "euclidean" else cosine_distances 134 ) 135 136 # Validate and set return mode 137 assert return_mode in [ 138 "token_id", 139 "token_vector", 140 "both", 141 "probs", 142 ], "return_mode must be 'token_id', 'token_vector', 'both', or 'probs'" 143 self.return_mode = return_mode 144 145 # Validate temperature 146 assert softmax_temperature > 0, "softmax_temperature must be positive" 147 self.softmax_temperature = softmax_temperature 148 149 # Initialize error tracking 150 self.discretization_errors_ = None 151 152 def _validate_vocabulary(self): 153 """Comprehensive vocabulary validation""" 154 # Check shape 155 assert ( 156 self.vocab_original.ndim == 2 157 ), "vocab must be 2D array (vocab_size, n_series)" 158 assert ( 159 self.vocab_original.shape[0] > 0 160 ), "vocab must have at least one token" 161 162 # Check for NaN/Inf 163 if np.any(np.isnan(self.vocab_original)) or np.any( 164 np.isinf(self.vocab_original) 165 ): 166 raise ValueError("Vocabulary contains NaN or Inf values") 167 168 # Check for duplicates 169 unique_rows = np.unique(self.vocab_original, axis=0) 170 if len(unique_rows) < len(self.vocab_original): 171 n_duplicates = len(self.vocab_original) - len(unique_rows) 172 warnings.warn( 173 f"Vocabulary contains {n_duplicates} duplicate vectors. " 174 "This reduces effective vocabulary size.", 175 UserWarning, 176 ) 177 178 # Check for near-duplicates 179 if len(self.vocab_original) > 1: 180 dists = euclidean_distances(self.vocab_original) 181 np.fill_diagonal(dists, np.inf) 182 min_dist = dists.min() 183 184 if min_dist < 1e-6: 185 warnings.warn( 186 f"Vocabulary contains very close vectors (min distance: {min_dist:.2e}). " 187 "Consider increasing token diversity.", 188 UserWarning, 189 ) 190 191 def _normalize_vocabulary(self): 192 """Center and scale vocabulary""" 193 self.vocab_mean_ = self.vocab_original.mean(axis=0) 194 self.vocab_std_ = self.vocab_original.std(axis=0) + 1e-8 195 self.vocab = (self.vocab_original - self.vocab_mean_) / self.vocab_std_ 196 197 def fit(self, X, **kwargs): 198 """ 199 Fit model and validate vocabulary dimensions match data. 200 201 Parameters 202 ---------- 203 X : array-like of shape (n_samples, n_series) 204 Training data 205 206 **kwargs : dict 207 Additional parameters passed to parent fit 208 209 Returns 210 ------- 211 self : object 212 Fitted estimator 213 """ 214 # Call parent fit 215 super().fit(X, **kwargs) 216 217 # Validate vocabulary dimensions 218 n_series = X.shape[1] if X.ndim > 1 else 1 219 if self.vocab.shape[1] != n_series: 220 raise ValueError( 221 f"Vocabulary dimension ({self.vocab.shape[1]}) must match " 222 f"number of series ({n_series})" 223 ) 224 225 # Additional check for cosine distance 226 if self.metric == "cosine": 227 norms = np.linalg.norm(self.vocab, axis=1) 228 zero_vectors = norms < 1e-10 229 if np.any(zero_vectors): 230 raise ValueError( 231 f"Vocabulary contains {zero_vectors.sum()} zero/near-zero vectors. " 232 "Cosine distance requires non-zero vectors." 233 ) 234 235 return self 236 237 def _vectorized_map_to_tokens(self, continuous_preds): 238 """ 239 Vectorized token mapping for efficiency. 240 241 Parameters 242 ---------- 243 continuous_preds : np.ndarray of shape (h, n_series) 244 Continuous predictions 245 246 Returns 247 ------- 248 result : depends on return_mode 249 errors : np.ndarray 250 Distances to nearest tokens 251 """ 252 # Normalize predictions if vocabulary was normalized 253 if self.normalize_vocab: 254 continuous_preds = ( 255 continuous_preds - self.vocab_mean_ 256 ) / self.vocab_std_ 257 258 # Compute all distances at once 259 dists = self.distance_func(continuous_preds, self.vocab) 260 261 # Find nearest tokens 262 nearest_indices = np.argmin(dists, axis=1) 263 min_dists = dists[np.arange(len(dists)), nearest_indices] 264 265 if self.return_mode == "token_id": 266 return nearest_indices, min_dists 267 268 elif self.return_mode == "token_vector": 269 token_vecs = self.vocab[nearest_indices] 270 # Denormalize if vocabulary was normalized 271 if self.normalize_vocab: 272 token_vecs = token_vecs * self.vocab_std_ + self.vocab_mean_ 273 return token_vecs, min_dists 274 275 elif self.return_mode == "both": 276 # Return combined array: [token_id, dim_0, dim_1, ...] 277 token_ids = nearest_indices.reshape(-1, 1) 278 token_vecs = self.vocab[nearest_indices] 279 # Denormalize if vocabulary was normalized 280 if self.normalize_vocab: 281 token_vecs = token_vecs * self.vocab_std_ + self.vocab_mean_ 282 combined = np.column_stack([token_ids, token_vecs]) 283 return combined, min_dists 284 285 elif self.return_mode == "probs": 286 # Softmax of negative distances 287 probs = softmax(-dists / self.softmax_temperature, axis=1) 288 return probs, min_dists 289 290 def predict( 291 self, 292 h=5, 293 level=95, 294 quantiles=None, 295 return_discretization_error=False, 296 **kwargs, 297 ): 298 """ 299 Generate discrete token predictions. 300 301 Parameters 302 ---------- 303 h : int, default=5 304 Forecast horizon 305 306 level : int, default=95 307 Confidence level (only affects continuous forecasts) 308 309 quantiles : list of float, optional 310 Quantile levels 311 312 return_discretization_error : bool, default=False 313 If True, return (predictions, errors) tuple 314 315 **kwargs : dict 316 Additional parameters for parent predict 317 318 Returns 319 ------- 320 predictions : pd.DataFrame 321 Discrete predictions. Format depends on return_mode: 322 - 'token_id': single column 'token_id' 323 - 'token_vector': columns 'dim_0', 'dim_1', ... 324 - 'both': columns 'token_id', 'dim_0', 'dim_1', ... 325 - 'probs': columns 'token_0_prob', 'token_1_prob', ... 326 327 errors : pd.DataFrame (if return_discretization_error=True) 328 Discretization errors (distances to nearest tokens) 329 330 Warnings 331 -------- 332 When prediction intervals are requested but only mean is discretized, 333 a warning is issued. Use predict_token_distribution() for uncertainty 334 in token space. 335 """ 336 # Get continuous predictions from parent 337 continuous_result = super().predict( 338 h=h, level=level, quantiles=quantiles, **kwargs 339 ) 340 341 # FIXED: Robust type detection using duck typing 342 if hasattr(continuous_result, "_fields"): # Namedtuple 343 if ( 344 hasattr(continuous_result, "sims") 345 and continuous_result.sims is not None 346 ): 347 # Simulation-based forecast 348 return self._discretize_simulations( 349 continuous_result.sims, return_discretization_error 350 ) 351 elif hasattr(continuous_result, "mean"): 352 # Interval-based forecast - warn about information loss 353 warnings.warn( 354 "Prediction intervals cannot be meaningfully discretized. " 355 "Only mean predictions are converted to tokens. " 356 "Use predict_token_distribution(replications=N) for " 357 "uncertainty in token space.", 358 UserWarning, 359 ) 360 return self._discretize_dataframe( 361 continuous_result.mean, return_discretization_error 362 ) 363 elif isinstance(continuous_result, pd.DataFrame): 364 # Deterministic forecast 365 return self._discretize_dataframe( 366 continuous_result, return_discretization_error 367 ) 368 else: 369 raise NotImplementedError( 370 f"Unhandled predict output type: {type(continuous_result)}" 371 ) 372 373 def _discretize_dataframe(self, df, return_error=False): 374 """Discretize a continuous prediction DataFrame""" 375 # Use vectorized mapping 376 result, errors = self._vectorized_map_to_tokens(df.values) 377 378 # FIXED: Always return single DataFrame (even for 'both' mode) 379 if self.return_mode == "probs": 380 result_df = pd.DataFrame( 381 result, 382 index=df.index, 383 columns=[f"token_{i}_prob" for i in range(self.vocab_size)], 384 ) 385 elif self.return_mode == "both": 386 # Combined format: token_id + dimensions 387 columns = ["token_id"] + [ 388 f"dim_{i}" for i in range(self.vocab.shape[1]) 389 ] 390 result_df = pd.DataFrame(result, index=df.index, columns=columns) 391 result_df["token_id"] = result_df["token_id"].astype(int) 392 elif self.return_mode == "token_id": 393 result_df = pd.DataFrame( 394 result.reshape(-1, 1), index=df.index, columns=["token_id"] 395 ) 396 else: # 'token_vector' 397 result_df = pd.DataFrame( 398 result, 399 index=df.index, 400 columns=[f"dim_{i}" for i in range(self.vocab.shape[1])], 401 ) 402 403 if return_error: 404 error_df = pd.DataFrame( 405 errors.reshape(-1, 1), 406 index=df.index, 407 columns=["discretization_error"], 408 ) 409 self.discretization_errors_ = error_df 410 return result_df, error_df 411 412 return result_df 413 414 def _discretize_simulations(self, sims, return_error=False): 415 """Discretize simulation paths""" 416 discrete_sims = [] 417 all_errors = [] 418 419 for sim_df in sims: 420 result, errors = self._vectorized_map_to_tokens(sim_df.values) 421 422 if self.return_mode == "probs": 423 discrete_df = pd.DataFrame( 424 result, 425 index=sim_df.index, 426 columns=[f"token_{i}_prob" for i in range(self.vocab_size)], 427 ) 428 elif self.return_mode == "both": 429 columns = ["token_id"] + [ 430 f"dim_{i}" for i in range(self.vocab.shape[1]) 431 ] 432 discrete_df = pd.DataFrame( 433 result, index=sim_df.index, columns=columns 434 ) 435 discrete_df["token_id"] = discrete_df["token_id"].astype(int) 436 elif self.return_mode == "token_id": 437 discrete_df = pd.DataFrame( 438 result.reshape(-1, 1), 439 index=sim_df.index, 440 columns=["token_id"], 441 ) 442 else: # 'token_vector' 443 discrete_df = pd.DataFrame( 444 result, 445 index=sim_df.index, 446 columns=[f"dim_{i}" for i in range(self.vocab.shape[1])], 447 ) 448 449 discrete_sims.append(discrete_df) 450 451 if return_error: 452 error_df = pd.DataFrame( 453 errors.reshape(-1, 1), 454 index=sim_df.index, 455 columns=["discretization_error"], 456 ) 457 all_errors.append(error_df) 458 459 if return_error: 460 return tuple(discrete_sims), tuple(all_errors) 461 return tuple(discrete_sims) 462 463 # ========== NEW: Uncertainty Quantification in Token Space ========== 464 465 def predict_top_k(self, h=5, k=5, **kwargs): 466 """ 467 Predict top-k most probable tokens per timestep. 468 469 Parameters 470 ---------- 471 h : int 472 Forecast horizon 473 k : int 474 Number of top tokens to return 475 **kwargs : dict 476 Additional parameters for parent predict 477 478 Returns 479 ------- 480 predictions : pd.DataFrame 481 Columns: token_1, prob_1, token_2, prob_2, ..., token_k, prob_k 482 """ 483 continuous_result = super().predict(h=h, **kwargs) 484 485 # Handle different return types 486 if hasattr(continuous_result, "mean"): 487 preds = continuous_result.mean.values 488 index = continuous_result.mean.index 489 elif isinstance(continuous_result, pd.DataFrame): 490 preds = continuous_result.values 491 index = continuous_result.index 492 else: 493 raise ValueError("Cannot extract continuous predictions") 494 495 # Compute probabilities 496 dists = self.distance_func(preds, self.vocab) 497 probs = softmax(-dists / self.softmax_temperature, axis=1) 498 499 # Get top-k 500 top_k_indices = np.argsort(probs, axis=1)[:, -k:][:, ::-1] 501 top_k_probs = np.take_along_axis(probs, top_k_indices, axis=1) 502 503 # Format as DataFrame 504 columns = [] 505 data = [] 506 for i in range(k): 507 columns.extend([f"token_{i+1}", f"prob_{i+1}"]) 508 data.append(top_k_indices[:, i]) 509 data.append(top_k_probs[:, i]) 510 511 return pd.DataFrame(np.column_stack(data), index=index, columns=columns) 512 513 def predict_token_distribution(self, h=5, replications=100, **kwargs): 514 """ 515 Generate token probability distribution from simulation ensemble. 516 517 This method provides meaningful uncertainty quantification in token space 518 by discretizing multiple simulation paths and computing token frequencies. 519 520 Parameters 521 ---------- 522 h : int 523 Forecast horizon 524 replications : int 525 Number of simulation paths 526 **kwargs : dict 527 Additional parameters for parent predict 528 529 Returns 530 ------- 531 frequencies : pd.DataFrame 532 Token frequencies across simulations 533 Columns: token_0_freq, token_1_freq, ..., token_V_freq 534 535 entropy : pd.Series 536 Shannon entropy per timestep (uncertainty measure) 537 538 mode_tokens : pd.DataFrame 539 Most frequent token per timestep 540 541 Examples 542 -------- 543 >>> freqs, entropy, mode = model.predict_token_distribution(h=10, replications=100) 544 >>> # High entropy → uncertain prediction 545 >>> uncertain_steps = entropy[entropy > 2.0] 546 >>> # Use mode tokens for point predictions 547 >>> predictions = mode['mode_token'].values 548 """ 549 # Force simulation mode 550 kwargs["replications"] = replications 551 continuous_result = super().predict(h=h, **kwargs) 552 553 # Extract simulations 554 if ( 555 hasattr(continuous_result, "sims") 556 and continuous_result.sims is not None 557 ): 558 sims = continuous_result.sims 559 index = continuous_result.mean.index 560 else: 561 raise ValueError( 562 "predict_token_distribution requires simulation-based forecasting. " 563 "Ensure replications > 0 and type_pi supports simulations." 564 ) 565 566 # Discretize all paths 567 all_tokens = [] 568 for sim in sims: 569 tokens, _ = self._vectorized_map_to_tokens(sim.values) 570 if self.return_mode == "probs": 571 # For probs mode, get argmax token 572 tokens = np.argmax(tokens, axis=1) 573 elif self.return_mode == "both": 574 # Extract token_id column 575 tokens = tokens[:, 0].astype(int) 576 elif self.return_mode == "token_vector": 577 # Map back to token IDs 578 dists = self.distance_func(tokens, self.vocab) 579 tokens = np.argmin(dists, axis=1) 580 # else: token_id mode, already correct 581 582 all_tokens.append(tokens) 583 584 all_tokens = np.array(all_tokens) # (replications, h) 585 586 # Compute frequency distribution 587 h_actual = all_tokens.shape[1] 588 token_freqs = np.zeros((h_actual, self.vocab_size)) 589 590 for t in range(h_actual): 591 unique, counts = np.unique(all_tokens[:, t], return_counts=True) 592 token_freqs[t, unique] = counts / replications 593 594 # Compute entropy 595 epsilon = 1e-10 596 entropy = -np.sum(token_freqs * np.log(token_freqs + epsilon), axis=1) 597 598 # Get mode 599 mode_tokens = np.argmax(token_freqs, axis=1) 600 601 # Package results 602 freq_df = pd.DataFrame( 603 token_freqs, 604 index=index, 605 columns=[f"token_{i}_freq" for i in range(self.vocab_size)], 606 ) 607 608 entropy_series = pd.Series(entropy, index=index, name="entropy") 609 610 mode_df = pd.DataFrame(mode_tokens, index=index, columns=["mode_token"]) 611 612 return freq_df, entropy_series, mode_df 613 614 # ========== Utility Methods ========== 615 616 def tokens_to_vectors(self, token_ids): 617 """Convert token IDs to embedding vectors (in original scale)""" 618 token_ids = np.asarray(token_ids).astype(int) 619 assert np.all( 620 (token_ids >= 0) & (token_ids < self.vocab_size) 621 ), f"Token IDs must be in range [0, {self.vocab_size-1}]" 622 vectors = self.vocab[token_ids] 623 # Denormalize if vocabulary was normalized 624 if self.normalize_vocab: 625 vectors = vectors * self.vocab_std_ + self.vocab_mean_ 626 return vectors 627 628 def get_token_neighbors(self, token_id, k=5): 629 """Find k nearest neighbors of a token""" 630 assert ( 631 0 <= token_id < self.vocab_size 632 ), f"token_id must be in range [0, {self.vocab_size-1}]" 633 634 token_vec = self.vocab[token_id].reshape(1, -1) 635 dists = self.distance_func(token_vec, self.vocab).flatten() 636 637 sorted_indices = np.argsort(dists) 638 sorted_indices = sorted_indices[sorted_indices != token_id][:k] 639 640 return pd.DataFrame( 641 {"neighbor_id": sorted_indices, "distance": dists[sorted_indices]} 642 ) 643 644 def compute_vocab_coverage(self, predictions): 645 """Compute vocabulary usage statistics""" 646 if "token_id" not in predictions.columns: 647 raise ValueError("predictions must have 'token_id' column") 648 649 token_ids = predictions["token_id"].values 650 unique_tokens = np.unique(token_ids) 651 freq = pd.Series(token_ids).value_counts().sort_index() 652 653 return { 654 "unique_tokens": len(unique_tokens), 655 "coverage_pct": 100 * len(unique_tokens) / self.vocab_size, 656 "token_frequencies": freq, 657 "most_common_token": freq.idxmax() if len(freq) > 0 else None, 658 "least_common_token": freq.idxmin() if len(freq) > 0 else None, 659 } 660 661 def diagnose_vocabulary(self): 662 """ 663 Comprehensive vocabulary quality diagnostics. 664 665 Returns 666 ------- 667 report : dict 668 Quality metrics including distances, condition number, coverage 669 """ 670 # Use original vocabulary for diagnostics to get meaningful statistics 671 vocab_to_diagnose = self.vocab_original 672 673 report = { 674 "vocab_size": self.vocab_size, 675 "embedding_dim": vocab_to_diagnose.shape[1], 676 "normalized": self.normalize_vocab, 677 } 678 679 # Pairwise distances 680 dists = euclidean_distances(vocab_to_diagnose) 681 np.fill_diagonal(dists, np.inf) 682 683 report["min_pairwise_distance"] = dists.min() 684 report["max_pairwise_distance"] = dists.max() 685 report["mean_pairwise_distance"] = dists[dists != np.inf].mean() 686 687 # Condition number 688 U, s, Vt = np.linalg.svd(vocab_to_diagnose, full_matrices=False) 689 report["condition_number"] = s.max() / (s.min() + 1e-10) 690 691 # Coverage volume 692 ranges = vocab_to_diagnose.max(axis=0) - vocab_to_diagnose.min(axis=0) 693 report["coverage_volume"] = np.prod(ranges) 694 695 # Duplicates 696 unique_rows = np.unique(vocab_to_diagnose, axis=0) 697 report["duplicate_count"] = len(vocab_to_diagnose) - len(unique_rows) 698 699 return report 700 701 def print_vocabulary_report(self): 702 """Print human-readable vocabulary diagnostics""" 703 report = self.diagnose_vocabulary() 704 705 print("=" * 60) 706 print("VOCABULARY QUALITY REPORT") 707 print("=" * 60) 708 print(f"Vocabulary size: {report['vocab_size']} tokens") 709 print(f"Embedding dimension: {report['embedding_dim']}") 710 print(f"\nPairwise Distances:") 711 print(f" Min: {report['min_pairwise_distance']:.6f}") 712 print(f" Mean: {report['mean_pairwise_distance']:.6f}") 713 print(f" Max: {report['max_pairwise_distance']:.6f}") 714 print(f"\nVocabulary Health:") 715 print(f" Condition number: {report['condition_number']:.2f}") 716 if report["condition_number"] > 1000: 717 print( 718 " ⚠️ WARNING: High condition number may indicate redundant tokens" 719 ) 720 print(f" Duplicate tokens: {report['duplicate_count']}") 721 if report["duplicate_count"] > 0: 722 print(" ⚠️ WARNING: Duplicates reduce effective vocabulary size") 723 print(f" Coverage volume: {report['coverage_volume']:.2e}") 724 print("=" * 60)
MTS for discrete token forecasting via nearest-neighbor in embedding space.
Maps continuous predictions to discrete tokens using nearest-neighbor lookup in a vocabulary (embedding space). Supports probabilistic decoding with temperature-controlled softmax and uncertainty quantification in token space.
Parameters
obj : object Base learner with fit() and predict() methods
vocab : np.ndarray of shape (vocab_size, n_series) Token vocabulary - each row is a token embedding vector
metric : {'euclidean', 'cosine'}, default='euclidean' Distance metric for nearest-neighbor lookup
return_mode : {'token_id', 'token_vector', 'both', 'probs'}, default='token_id' Output format: - 'token_id': integer token indices - 'token_vector': token embedding vectors - 'both': single DataFrame with token_id + dimensions - 'probs': probability distribution over all tokens
softmax_temperature : float, default=1.0 Temperature for softmax when return_mode='probs' Lower values (0.1-0.5) → sharper distributions (more deterministic) Higher values (2.0-10.0) → smoother distributions (more exploratory)
normalize_vocab : bool, default=False Whether to center and scale vocabulary to zero mean, unit variance
**mts_kwargs : dict Additional parameters passed to MTS base class
Attributes
vocab : np.ndarray Normalized vocabulary (if normalize_vocab=True)
vocab_mean_ : np.ndarray Mean used for normalization (if normalize_vocab=True)
vocab_std_ : np.ndarray Std used for normalization (if normalize_vocab=True)
discretization_errors_ : pd.DataFrame or None Distances from predictions to nearest tokens
Warnings
- Prediction intervals (lower/upper) are NOT discretized - only the mean
- For uncertainty in token space, use predict_token_distribution()
- Vocabulary quality strongly affects results - use diagnose_vocabulary()
Examples
>>> # Basic token prediction
>>> vocab = np.random.randn(100, 10) # 100 tokens, 10 dimensions
>>> model = DiscreteTokenMTS(
... obj=Ridge(),
... vocab=vocab,
... lags=5,
... return_mode='token_id'
... )
>>> model.fit(X_train)
>>> tokens = model.predict(h=10)
>>> # Probabilistic with temperature control
>>> model = DiscreteTokenMTS(
... obj=Ridge(),
... vocab=vocab,
... lags=5,
... return_mode='probs',
... softmax_temperature=1.5
... )
>>> probs = model.predict(h=10) # Returns probability distributions
>>> # Uncertainty-aware token distributions
>>> freqs, entropy, mode = model.predict_token_distribution(
... h=10,
... replications=100
... )
197 def fit(self, X, **kwargs): 198 """ 199 Fit model and validate vocabulary dimensions match data. 200 201 Parameters 202 ---------- 203 X : array-like of shape (n_samples, n_series) 204 Training data 205 206 **kwargs : dict 207 Additional parameters passed to parent fit 208 209 Returns 210 ------- 211 self : object 212 Fitted estimator 213 """ 214 # Call parent fit 215 super().fit(X, **kwargs) 216 217 # Validate vocabulary dimensions 218 n_series = X.shape[1] if X.ndim > 1 else 1 219 if self.vocab.shape[1] != n_series: 220 raise ValueError( 221 f"Vocabulary dimension ({self.vocab.shape[1]}) must match " 222 f"number of series ({n_series})" 223 ) 224 225 # Additional check for cosine distance 226 if self.metric == "cosine": 227 norms = np.linalg.norm(self.vocab, axis=1) 228 zero_vectors = norms < 1e-10 229 if np.any(zero_vectors): 230 raise ValueError( 231 f"Vocabulary contains {zero_vectors.sum()} zero/near-zero vectors. " 232 "Cosine distance requires non-zero vectors." 233 ) 234 235 return self
Fit model and validate vocabulary dimensions match data.
Parameters
X : array-like of shape (n_samples, n_series) Training data
**kwargs : dict Additional parameters passed to parent fit
Returns
self : object Fitted estimator
290 def predict( 291 self, 292 h=5, 293 level=95, 294 quantiles=None, 295 return_discretization_error=False, 296 **kwargs, 297 ): 298 """ 299 Generate discrete token predictions. 300 301 Parameters 302 ---------- 303 h : int, default=5 304 Forecast horizon 305 306 level : int, default=95 307 Confidence level (only affects continuous forecasts) 308 309 quantiles : list of float, optional 310 Quantile levels 311 312 return_discretization_error : bool, default=False 313 If True, return (predictions, errors) tuple 314 315 **kwargs : dict 316 Additional parameters for parent predict 317 318 Returns 319 ------- 320 predictions : pd.DataFrame 321 Discrete predictions. Format depends on return_mode: 322 - 'token_id': single column 'token_id' 323 - 'token_vector': columns 'dim_0', 'dim_1', ... 324 - 'both': columns 'token_id', 'dim_0', 'dim_1', ... 325 - 'probs': columns 'token_0_prob', 'token_1_prob', ... 326 327 errors : pd.DataFrame (if return_discretization_error=True) 328 Discretization errors (distances to nearest tokens) 329 330 Warnings 331 -------- 332 When prediction intervals are requested but only mean is discretized, 333 a warning is issued. Use predict_token_distribution() for uncertainty 334 in token space. 335 """ 336 # Get continuous predictions from parent 337 continuous_result = super().predict( 338 h=h, level=level, quantiles=quantiles, **kwargs 339 ) 340 341 # FIXED: Robust type detection using duck typing 342 if hasattr(continuous_result, "_fields"): # Namedtuple 343 if ( 344 hasattr(continuous_result, "sims") 345 and continuous_result.sims is not None 346 ): 347 # Simulation-based forecast 348 return self._discretize_simulations( 349 continuous_result.sims, return_discretization_error 350 ) 351 elif hasattr(continuous_result, "mean"): 352 # Interval-based forecast - warn about information loss 353 warnings.warn( 354 "Prediction intervals cannot be meaningfully discretized. " 355 "Only mean predictions are converted to tokens. " 356 "Use predict_token_distribution(replications=N) for " 357 "uncertainty in token space.", 358 UserWarning, 359 ) 360 return self._discretize_dataframe( 361 continuous_result.mean, return_discretization_error 362 ) 363 elif isinstance(continuous_result, pd.DataFrame): 364 # Deterministic forecast 365 return self._discretize_dataframe( 366 continuous_result, return_discretization_error 367 ) 368 else: 369 raise NotImplementedError( 370 f"Unhandled predict output type: {type(continuous_result)}" 371 )
Generate discrete token predictions.
Parameters
h : int, default=5 Forecast horizon
level : int, default=95 Confidence level (only affects continuous forecasts)
quantiles : list of float, optional Quantile levels
return_discretization_error : bool, default=False If True, return (predictions, errors) tuple
**kwargs : dict Additional parameters for parent predict
Returns
predictions : pd.DataFrame Discrete predictions. Format depends on return_mode: - 'token_id': single column 'token_id' - 'token_vector': columns 'dim_0', 'dim_1', ... - 'both': columns 'token_id', 'dim_0', 'dim_1', ... - 'probs': columns 'token_0_prob', 'token_1_prob', ...
errors : pd.DataFrame (if return_discretization_error=True) Discretization errors (distances to nearest tokens)
Warnings
When prediction intervals are requested but only mean is discretized, a warning is issued. Use predict_token_distribution() for uncertainty in token space.
6class Downloader: 7 """Download datasets from data sources (R-universe for now)""" 8 9 def __init__(self): 10 self.pkgname = None 11 self.dataset = None 12 self.source = None 13 self.url = None 14 self.request = None 15 16 def download( 17 self, 18 pkgname="MASS", 19 dataset="Boston", 20 source="https://cran.r-universe.dev/", 21 **kwargs 22 ): 23 """Download datasets from data sources (R-universe for now) 24 25 Examples: 26 27 ```python 28 import nnetsauce as ns 29 30 downloader = ns.Downloader() 31 df = downloader.download(pkgname="MASS", dataset="Boston") 32 ``` 33 34 """ 35 self.pkgname = pkgname 36 self.dataset = dataset 37 self.source = source 38 self.url = source + pkgname + "/data/" + dataset + "/json" 39 self.request = requests.get(self.url) 40 return pd.DataFrame(self.request.json(), **kwargs)
Download datasets from data sources (R-universe for now)
16 def download( 17 self, 18 pkgname="MASS", 19 dataset="Boston", 20 source="https://cran.r-universe.dev/", 21 **kwargs 22 ): 23 """Download datasets from data sources (R-universe for now) 24 25 Examples: 26 27 ```python 28 import nnetsauce as ns 29 30 downloader = ns.Downloader() 31 df = downloader.download(pkgname="MASS", dataset="Boston") 32 ``` 33 34 """ 35 self.pkgname = pkgname 36 self.dataset = dataset 37 self.source = source 38 self.url = source + pkgname + "/data/" + dataset + "/json" 39 self.request = requests.get(self.url) 40 return pd.DataFrame(self.request.json(), **kwargs)
Download datasets from data sources (R-universe for now)
Examples:
import nnetsauce as ns
downloader = ns.Downloader()
df = downloader.download(pkgname="MASS", dataset="Boston")
7class ElasticNet2Regressor(BaseEstimator, RegressorMixin): 8 def __init__( 9 self, 10 n_hidden_features=100, 11 alpha=1.0, 12 l1_ratio=0.5, 13 lambd=0.1, 14 activation_name="tanh", 15 a=0.01, 16 max_iter=1000, 17 tol=1e-4, 18 random_state=None, 19 ): 20 self.n_hidden_features = n_hidden_features 21 self.alpha = alpha 22 self.l1_ratio = l1_ratio 23 self.lambd = lambd 24 self.activation_name = activation_name 25 self.a = a 26 self.max_iter = max_iter 27 self.tol = tol 28 self.random_state = random_state 29 30 def _activation(self, Z): 31 if self.activation_name == "relu": 32 return np.maximum(0, Z) 33 elif self.activation_name == "tanh": 34 return np.tanh(Z) 35 elif self.activation_name == "sigmoid": 36 return 1 / (1 + np.exp(-Z)) 37 elif self.activation_name == "prelu": 38 return np.where(Z > 0, Z, self.a * Z) 39 elif self.activation_name == "elu": 40 return np.where(Z > 0, Z, self.a * (np.exp(Z) - 1)) 41 else: 42 raise ValueError(f"Unknown activation: {self.activation_name}") 43 44 def fit(self, X, y): 45 X, y = check_X_y(X, y) 46 rng = np.random.RandomState(self.random_state) 47 48 # Standardize inputs 49 self.X_mean_ = X.mean(axis=0) 50 self.X_std_ = X.std(axis=0) + 1e-8 51 X_scaled = (X - self.X_mean_) / self.X_std_ 52 53 # Center response 54 self.y_mean_ = y.mean() 55 y_centered = y - self.y_mean_ 56 57 # Random feature mapping 58 self.W_in_ = rng.randn(X.shape[1], self.n_hidden_features) 59 self.b_in_ = rng.randn(self.n_hidden_features) 60 H = self._activation(X_scaled @ self.W_in_ + self.b_in_) 61 62 # Doubly-constrained optimization with Elastic Net 63 beta = np.zeros(self.n_hidden_features) 64 65 for _ in range(self.max_iter): 66 beta_old = beta.copy() 67 68 # Gradient descent step with projection 69 grad = H.T @ (H @ beta - y_centered) / len(y) 70 step = 0.01 / (1 + self.alpha * (1 - self.l1_ratio)) 71 72 # Soft thresholding (L1) 73 beta = beta - step * grad 74 threshold = step * self.alpha * self.l1_ratio 75 beta = np.sign(beta) * np.maximum(np.abs(beta) - threshold, 0) 76 77 # L2 projection (constraint) 78 norm = np.linalg.norm(beta) 79 if norm > self.lambd: 80 beta = beta * (self.lambd / norm) 81 82 if np.linalg.norm(beta - beta_old) < self.tol: 83 break 84 85 self.beta_ = beta 86 return self 87 88 def predict(self, X): 89 X = check_array(X) 90 X_scaled = (X - self.X_mean_) / self.X_std_ 91 H = self._activation(X_scaled @ self.W_in_ + self.b_in_) 92 return H @ self.beta_ + self.y_mean_
Base class for all estimators in scikit-learn.
Inheriting from this class provides default implementations of:
- setting and getting parameters used by
GridSearchCVand friends; - textual and HTML representation displayed in terminals and IDEs;
- estimator serialization;
- parameters validation;
- data validation;
- feature names validation.
Read more in the :ref:User Guide <rolling_your_own_estimator>.
Notes
All estimators should specify all the parameters that can be set
at the class level in their __init__ as explicit keyword
arguments (no *args or **kwargs).
Examples
>>> import numpy as np
>>> from sklearn.base import BaseEstimator
>>> class MyEstimator(BaseEstimator):
... def __init__(self, *, param=1):
... self.param = param
... def fit(self, X, y=None):
... self.is_fitted_ = True
... return self
... def predict(self, X):
... return np.full(shape=X.shape[0], fill_value=self.param)
>>> estimator = MyEstimator(param=2)
>>> estimator.get_params()
{'param': 2}
>>> X = np.array([[1, 2], [2, 3], [3, 4]])
>>> y = np.array([1, 0, 1])
>>> estimator.fit(X, y).predict(X)
array([2, 2, 2])
>>> estimator.set_params(param=3).fit(X, y).predict(X)
array([3, 3, 3])
44 def fit(self, X, y): 45 X, y = check_X_y(X, y) 46 rng = np.random.RandomState(self.random_state) 47 48 # Standardize inputs 49 self.X_mean_ = X.mean(axis=0) 50 self.X_std_ = X.std(axis=0) + 1e-8 51 X_scaled = (X - self.X_mean_) / self.X_std_ 52 53 # Center response 54 self.y_mean_ = y.mean() 55 y_centered = y - self.y_mean_ 56 57 # Random feature mapping 58 self.W_in_ = rng.randn(X.shape[1], self.n_hidden_features) 59 self.b_in_ = rng.randn(self.n_hidden_features) 60 H = self._activation(X_scaled @ self.W_in_ + self.b_in_) 61 62 # Doubly-constrained optimization with Elastic Net 63 beta = np.zeros(self.n_hidden_features) 64 65 for _ in range(self.max_iter): 66 beta_old = beta.copy() 67 68 # Gradient descent step with projection 69 grad = H.T @ (H @ beta - y_centered) / len(y) 70 step = 0.01 / (1 + self.alpha * (1 - self.l1_ratio)) 71 72 # Soft thresholding (L1) 73 beta = beta - step * grad 74 threshold = step * self.alpha * self.l1_ratio 75 beta = np.sign(beta) * np.maximum(np.abs(beta) - threshold, 0) 76 77 # L2 projection (constraint) 78 norm = np.linalg.norm(beta) 79 if norm > self.lambd: 80 beta = beta * (self.lambd / norm) 81 82 if np.linalg.norm(beta - beta_old) < self.tol: 83 break 84 85 self.beta_ = beta 86 return self
23class GLMClassifier(GLM, ClassifierMixin): 24 """Generalized 'linear' models using quasi-randomized networks (classification) 25 26 Parameters: 27 28 n_hidden_features: int 29 number of nodes in the hidden layer 30 31 lambda1: float 32 regularization parameter for GLM coefficients on original features 33 34 alpha1: float 35 controls compromize between l1 and l2 norm of GLM coefficients on original features 36 37 lambda2: float 38 regularization parameter for GLM coefficients on nonlinear features 39 40 alpha2: float 41 controls compromize between l1 and l2 norm of GLM coefficients on nonlinear features 42 43 activation_name: str 44 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 45 46 a: float 47 hyperparameter for 'prelu' or 'elu' activation function 48 49 nodes_sim: str 50 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 51 'uniform' 52 53 bias: boolean 54 indicates if the hidden layer contains a bias term (True) or not 55 (False) 56 57 dropout: float 58 regularization parameter; (random) percentage of nodes dropped out 59 of the training 60 61 direct_link: boolean 62 indicates if the original predictors are included (True) in model's 63 fitting or not (False) 64 65 n_clusters: int 66 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: 67 no clustering) 68 69 cluster_encode: bool 70 defines how the variable containing clusters is treated (default is one-hot) 71 if `False`, then labels are used, without one-hot encoding 72 73 type_clust: str 74 type of clustering method: currently k-means ('kmeans') or Gaussian 75 Mixture Model ('gmm') 76 77 type_scaling: a tuple of 3 strings 78 scaling methods for inputs, hidden layer, and clustering respectively 79 (and when relevant). 80 Currently available: standardization ('std') or MinMax scaling ('minmax') 81 82 optimizer: object 83 optimizer, from class nnetsauce.Optimizer 84 85 backend: str. 86 "cpu" or "gpu" or "tpu". 87 88 seed: int 89 reproducibility seed for nodes_sim=='uniform' 90 91 Attributes: 92 93 beta_: vector 94 regression coefficients 95 96 Examples: 97 98 See [https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_classification.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_classification.py) 99 100 """ 101 102 # construct the object ----- 103 _estimator_type = "classifier" 104 105 def __init__( 106 self, 107 n_hidden_features=5, 108 lambda1=0.01, 109 alpha1=0.5, 110 lambda2=0.01, 111 alpha2=0.5, 112 family="expit", 113 activation_name="relu", 114 a=0.01, 115 nodes_sim="sobol", 116 bias=True, 117 dropout=0, 118 direct_link=True, 119 n_clusters=2, 120 cluster_encode=True, 121 type_clust="kmeans", 122 type_scaling=("std", "std", "std"), 123 optimizer=Optimizer(), 124 backend="cpu", 125 seed=123, 126 ): 127 super().__init__( 128 n_hidden_features=n_hidden_features, 129 lambda1=lambda1, 130 alpha1=alpha1, 131 lambda2=lambda2, 132 alpha2=alpha2, 133 activation_name=activation_name, 134 a=a, 135 nodes_sim=nodes_sim, 136 bias=bias, 137 dropout=dropout, 138 direct_link=direct_link, 139 n_clusters=n_clusters, 140 cluster_encode=cluster_encode, 141 type_clust=type_clust, 142 type_scaling=type_scaling, 143 optimizer=optimizer, 144 backend=backend, 145 seed=seed, 146 ) 147 148 self.family = family 149 150 def logit_loss(self, Y, row_index, XB): 151 self.n_classes = Y.shape[1] # len(np.unique(y)) 152 # Y = mo.one_hot_encode2(y, self.n_classes) 153 # Y = self.optimizer.one_hot_encode(y, self.n_classes) 154 155 # max_double = 709.0 # only if softmax 156 # XB[XB > max_double] = max_double 157 XB[XB > 709.0] = 709.0 158 159 if row_index is None: 160 return -np.mean(np.sum(Y * XB, axis=1) - logsumexp(XB)) 161 162 return -np.mean(np.sum(Y[row_index, :] * XB, axis=1) - logsumexp(XB)) 163 164 def expit_erf_loss(self, Y, row_index, XB): 165 # self.n_classes = len(np.unique(y)) 166 # Y = mo.one_hot_encode2(y, self.n_classes) 167 # Y = self.optimizer.one_hot_encode(y, self.n_classes) 168 self.n_classes = Y.shape[1] 169 170 if row_index is None: 171 return -np.mean(np.sum(Y * XB, axis=1) - logsumexp(XB)) 172 173 return -np.mean(np.sum(Y[row_index, :] * XB, axis=1) - logsumexp(XB)) 174 175 def loss_func( 176 self, 177 beta, 178 group_index, 179 X, 180 Y, 181 y, 182 row_index=None, 183 type_loss="logit", 184 **kwargs 185 ): 186 res = { 187 "logit": self.logit_loss, 188 "expit": self.expit_erf_loss, 189 "erf": self.expit_erf_loss, 190 } 191 192 if row_index is None: 193 row_index = range(len(y)) 194 XB = self.compute_XB( 195 X, 196 beta=np.reshape(beta, (X.shape[1], self.n_classes), order="F"), 197 ) 198 199 return res[type_loss](Y, row_index, XB) + self.compute_penalty( 200 group_index=group_index, beta=beta 201 ) 202 203 XB = self.compute_XB( 204 X, 205 beta=np.reshape(beta, (X.shape[1], self.n_classes), order="F"), 206 row_index=row_index, 207 ) 208 209 return res[type_loss](Y, row_index, XB) + self.compute_penalty( 210 group_index=group_index, beta=beta 211 ) 212 213 def fit(self, X, y, **kwargs): 214 """Fit GLM model to training data (X, y). 215 216 Args: 217 218 X: {array-like}, shape = [n_samples, n_features] 219 Training vectors, where n_samples is the number 220 of samples and n_features is the number of features. 221 222 y: array-like, shape = [n_samples] 223 Target values. 224 225 **kwargs: additional parameters to be passed to 226 self.cook_training_set or self.obj.fit 227 228 Returns: 229 230 self: object 231 232 """ 233 234 assert mx.is_factor( 235 y 236 ), "y must contain only integers" # change is_factor and subsampling everywhere 237 238 self.classes_ = np.unique(y) # for compatibility with sklearn 239 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 240 241 self.beta_ = None 242 243 n, p = X.shape 244 245 self.group_index = n * X.shape[1] 246 247 self.n_classes = len(np.unique(y)) 248 249 output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 250 251 # Y = mo.one_hot_encode2(output_y, self.n_classes) 252 Y = self.optimizer.one_hot_encode(output_y, self.n_classes) 253 254 # initialization 255 if self.backend == "cpu": 256 beta_ = np.linalg.lstsq(scaled_Z, Y, rcond=None)[0] 257 else: 258 beta_ = jnp.linalg.lstsq(scaled_Z, Y, rcond=None)[0] 259 260 # optimization 261 # fit(self, loss_func, response, x0, **kwargs): 262 # loss_func(self, beta, group_index, X, y, 263 # row_index=None, type_loss="gaussian", 264 # **kwargs) 265 self.optimizer.fit( 266 self.loss_func, 267 response=y, 268 x0=beta_.flatten(order="F"), 269 group_index=self.group_index, 270 X=scaled_Z, 271 Y=Y, 272 y=y, 273 type_loss=self.family, 274 ) 275 276 self.beta_ = self.optimizer.results[0] 277 self.classes_ = np.unique(y) 278 279 return self 280 281 def predict(self, X, **kwargs): 282 """Predict test data X. 283 284 Args: 285 286 X: {array-like}, shape = [n_samples, n_features] 287 Training vectors, where n_samples is the number 288 of samples and n_features is the number of features. 289 290 **kwargs: additional parameters to be passed to 291 self.cook_test_set 292 293 Returns: 294 295 model predictions: {array-like} 296 297 """ 298 299 return np.argmax(self.predict_proba(X, **kwargs), axis=1) 300 301 def predict_proba(self, X, **kwargs): 302 """Predict probabilities for test data X. 303 304 Args: 305 306 X: {array-like}, shape = [n_samples, n_features] 307 Training vectors, where n_samples is the number 308 of samples and n_features is the number of features. 309 310 **kwargs: additional parameters to be passed to 311 self.cook_test_set 312 313 Returns: 314 315 probability estimates for test data: {array-like} 316 317 """ 318 if len(X.shape) == 1: 319 n_features = X.shape[0] 320 new_X = mo.rbind( 321 X.reshape(1, n_features), 322 np.ones(n_features).reshape(1, n_features), 323 ) 324 325 Z = self.cook_test_set(new_X, **kwargs) 326 327 else: 328 Z = self.cook_test_set(X, **kwargs) 329 330 ZB = mo.safe_sparse_dot( 331 Z, 332 self.beta_.reshape( 333 self.n_classes, 334 X.shape[1] + self.n_hidden_features + self.n_clusters, 335 ).T, 336 ) 337 338 if self.family == "logit": 339 exp_ZB = np.exp(ZB) 340 341 return exp_ZB / exp_ZB.sum(axis=1)[:, None] 342 343 if self.family == "expit": 344 exp_ZB = expit(ZB) 345 346 return exp_ZB / exp_ZB.sum(axis=1)[:, None] 347 348 if self.family == "erf": 349 exp_ZB = 0.5 * (1 + erf(ZB)) 350 351 return exp_ZB / exp_ZB.sum(axis=1)[:, None] 352 353 def score(self, X, y, scoring=None): 354 """Scoring function for classification. 355 356 Args: 357 358 X: {array-like}, shape = [n_samples, n_features] 359 Training vectors, where n_samples is the number 360 of samples and n_features is the number of features. 361 362 y: array-like, shape = [n_samples] 363 Target values. 364 365 scoring: str 366 scoring method (default is accuracy) 367 368 Returns: 369 370 score: float 371 """ 372 373 if scoring is None: 374 scoring = "accuracy" 375 376 if scoring == "accuracy": 377 return skm2.accuracy_score(y, self.predict(X)) 378 379 if scoring == "f1": 380 return skm2.f1_score(y, self.predict(X)) 381 382 if scoring == "precision": 383 return skm2.precision_score(y, self.predict(X)) 384 385 if scoring == "recall": 386 return skm2.recall_score(y, self.predict(X)) 387 388 if scoring == "roc_auc": 389 return skm2.roc_auc_score(y, self.predict(X)) 390 391 if scoring == "log_loss": 392 return skm2.log_loss(y, self.predict_proba(X)) 393 394 if scoring == "balanced_accuracy": 395 return skm2.balanced_accuracy_score(y, self.predict(X)) 396 397 if scoring == "average_precision": 398 return skm2.average_precision_score(y, self.predict(X)) 399 400 if scoring == "neg_brier_score": 401 return -skm2.brier_score_loss(y, self.predict_proba(X)) 402 403 if scoring == "neg_log_loss": 404 return -skm2.log_loss(y, self.predict_proba(X)) 405 406 @property 407 def _estimator_type(self): 408 return "classifier"
Generalized 'linear' models using quasi-randomized networks (classification)
Parameters:
n_hidden_features: int
number of nodes in the hidden layer
lambda1: float
regularization parameter for GLM coefficients on original features
alpha1: float
controls compromize between l1 and l2 norm of GLM coefficients on original features
lambda2: float
regularization parameter for GLM coefficients on nonlinear features
alpha2: float
controls compromize between l1 and l2 norm of GLM coefficients on nonlinear features
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not
(False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original predictors are included (True) in model's
fitting or not (False)
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
optimizer: object
optimizer, from class Optimizer
backend: str.
"cpu" or "gpu" or "tpu".
seed: int
reproducibility seed for nodes_sim=='uniform'
Attributes:
beta_: vector
regression coefficients
Examples:
See https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_classification.py
213 def fit(self, X, y, **kwargs): 214 """Fit GLM model to training data (X, y). 215 216 Args: 217 218 X: {array-like}, shape = [n_samples, n_features] 219 Training vectors, where n_samples is the number 220 of samples and n_features is the number of features. 221 222 y: array-like, shape = [n_samples] 223 Target values. 224 225 **kwargs: additional parameters to be passed to 226 self.cook_training_set or self.obj.fit 227 228 Returns: 229 230 self: object 231 232 """ 233 234 assert mx.is_factor( 235 y 236 ), "y must contain only integers" # change is_factor and subsampling everywhere 237 238 self.classes_ = np.unique(y) # for compatibility with sklearn 239 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 240 241 self.beta_ = None 242 243 n, p = X.shape 244 245 self.group_index = n * X.shape[1] 246 247 self.n_classes = len(np.unique(y)) 248 249 output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 250 251 # Y = mo.one_hot_encode2(output_y, self.n_classes) 252 Y = self.optimizer.one_hot_encode(output_y, self.n_classes) 253 254 # initialization 255 if self.backend == "cpu": 256 beta_ = np.linalg.lstsq(scaled_Z, Y, rcond=None)[0] 257 else: 258 beta_ = jnp.linalg.lstsq(scaled_Z, Y, rcond=None)[0] 259 260 # optimization 261 # fit(self, loss_func, response, x0, **kwargs): 262 # loss_func(self, beta, group_index, X, y, 263 # row_index=None, type_loss="gaussian", 264 # **kwargs) 265 self.optimizer.fit( 266 self.loss_func, 267 response=y, 268 x0=beta_.flatten(order="F"), 269 group_index=self.group_index, 270 X=scaled_Z, 271 Y=Y, 272 y=y, 273 type_loss=self.family, 274 ) 275 276 self.beta_ = self.optimizer.results[0] 277 self.classes_ = np.unique(y) 278 279 return self
Fit GLM model to training data (X, y).
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
281 def predict(self, X, **kwargs): 282 """Predict test data X. 283 284 Args: 285 286 X: {array-like}, shape = [n_samples, n_features] 287 Training vectors, where n_samples is the number 288 of samples and n_features is the number of features. 289 290 **kwargs: additional parameters to be passed to 291 self.cook_test_set 292 293 Returns: 294 295 model predictions: {array-like} 296 297 """ 298 299 return np.argmax(self.predict_proba(X, **kwargs), axis=1)
Predict test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
301 def predict_proba(self, X, **kwargs): 302 """Predict probabilities for test data X. 303 304 Args: 305 306 X: {array-like}, shape = [n_samples, n_features] 307 Training vectors, where n_samples is the number 308 of samples and n_features is the number of features. 309 310 **kwargs: additional parameters to be passed to 311 self.cook_test_set 312 313 Returns: 314 315 probability estimates for test data: {array-like} 316 317 """ 318 if len(X.shape) == 1: 319 n_features = X.shape[0] 320 new_X = mo.rbind( 321 X.reshape(1, n_features), 322 np.ones(n_features).reshape(1, n_features), 323 ) 324 325 Z = self.cook_test_set(new_X, **kwargs) 326 327 else: 328 Z = self.cook_test_set(X, **kwargs) 329 330 ZB = mo.safe_sparse_dot( 331 Z, 332 self.beta_.reshape( 333 self.n_classes, 334 X.shape[1] + self.n_hidden_features + self.n_clusters, 335 ).T, 336 ) 337 338 if self.family == "logit": 339 exp_ZB = np.exp(ZB) 340 341 return exp_ZB / exp_ZB.sum(axis=1)[:, None] 342 343 if self.family == "expit": 344 exp_ZB = expit(ZB) 345 346 return exp_ZB / exp_ZB.sum(axis=1)[:, None] 347 348 if self.family == "erf": 349 exp_ZB = 0.5 * (1 + erf(ZB)) 350 351 return exp_ZB / exp_ZB.sum(axis=1)[:, None]
Predict probabilities for test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
probability estimates for test data: {array-like}
353 def score(self, X, y, scoring=None): 354 """Scoring function for classification. 355 356 Args: 357 358 X: {array-like}, shape = [n_samples, n_features] 359 Training vectors, where n_samples is the number 360 of samples and n_features is the number of features. 361 362 y: array-like, shape = [n_samples] 363 Target values. 364 365 scoring: str 366 scoring method (default is accuracy) 367 368 Returns: 369 370 score: float 371 """ 372 373 if scoring is None: 374 scoring = "accuracy" 375 376 if scoring == "accuracy": 377 return skm2.accuracy_score(y, self.predict(X)) 378 379 if scoring == "f1": 380 return skm2.f1_score(y, self.predict(X)) 381 382 if scoring == "precision": 383 return skm2.precision_score(y, self.predict(X)) 384 385 if scoring == "recall": 386 return skm2.recall_score(y, self.predict(X)) 387 388 if scoring == "roc_auc": 389 return skm2.roc_auc_score(y, self.predict(X)) 390 391 if scoring == "log_loss": 392 return skm2.log_loss(y, self.predict_proba(X)) 393 394 if scoring == "balanced_accuracy": 395 return skm2.balanced_accuracy_score(y, self.predict(X)) 396 397 if scoring == "average_precision": 398 return skm2.average_precision_score(y, self.predict(X)) 399 400 if scoring == "neg_brier_score": 401 return -skm2.brier_score_loss(y, self.predict_proba(X)) 402 403 if scoring == "neg_log_loss": 404 return -skm2.log_loss(y, self.predict_proba(X))
Scoring function for classification.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
scoring: str
scoring method (default is accuracy)
Returns:
score: float
22class GLMRegressor(GLM, RegressorMixin): 23 """Generalized 'linear' models using quasi-randomized networks (regression) 24 25 Attributes: 26 27 n_hidden_features: int 28 number of nodes in the hidden layer 29 30 lambda1: float 31 regularization parameter for GLM coefficients on original features 32 33 alpha1: float 34 controls compromize between l1 and l2 norm of GLM coefficients on original features 35 36 lambda2: float 37 regularization parameter for GLM coefficients on nonlinear features 38 39 alpha2: float 40 controls compromize between l1 and l2 norm of GLM coefficients on nonlinear features 41 42 family: str 43 "gaussian", "laplace", "poisson", or "quantile" (for now) 44 45 level: int, default=50 46 The level of the quantiles to compute for family = "quantile". 47 Default is the median. 48 49 activation_name: str 50 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 51 52 a: float 53 hyperparameter for 'prelu' or 'elu' activation function 54 55 nodes_sim: str 56 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 57 'uniform' 58 59 bias: boolean 60 indicates if the hidden layer contains a bias term (True) or not 61 (False) 62 63 dropout: float 64 regularization parameter; (random) percentage of nodes dropped out 65 of the training 66 67 direct_link: boolean 68 indicates if the original predictors are included (True) in model's 69 fitting or not (False) 70 71 n_clusters: int 72 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: 73 no clustering) 74 75 cluster_encode: bool 76 defines how the variable containing clusters is treated (default is one-hot) 77 if `False`, then labels are used, without one-hot encoding 78 79 type_clust: str 80 type of clustering method: currently k-means ('kmeans') or Gaussian 81 Mixture Model ('gmm') 82 83 type_scaling: a tuple of 3 strings 84 scaling methods for inputs, hidden layer, and clustering respectively 85 (and when relevant). 86 Currently available: standardization ('std') or MinMax scaling ('minmax') 87 88 optimizer: object 89 optimizer, from class nnetsauce.utils.Optimizer 90 91 backend: str. 92 "cpu" or "gpu" or "tpu". 93 94 seed: int 95 reproducibility seed for nodes_sim=='uniform' 96 97 backend: str 98 "cpu", "gpu", "tpu" 99 100 Attributes: 101 102 beta_: vector 103 regression coefficients 104 105 Examples: 106 107 See [https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_regression.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_regression.py) 108 109 """ 110 111 # construct the object ----- 112 113 def __init__( 114 self, 115 n_hidden_features=5, 116 lambda1=0.01, 117 alpha1=0.5, 118 lambda2=0.01, 119 alpha2=0.5, 120 family="gaussian", 121 level=50, 122 activation_name="relu", 123 a=0.01, 124 nodes_sim="sobol", 125 bias=True, 126 dropout=0, 127 direct_link=True, 128 n_clusters=2, 129 cluster_encode=True, 130 type_clust="kmeans", 131 type_scaling=("std", "std", "std"), 132 optimizer=Optimizer(), 133 backend="cpu", 134 seed=123, 135 ): 136 super().__init__( 137 n_hidden_features=n_hidden_features, 138 lambda1=lambda1, 139 alpha1=alpha1, 140 lambda2=lambda2, 141 alpha2=alpha2, 142 activation_name=activation_name, 143 a=a, 144 nodes_sim=nodes_sim, 145 bias=bias, 146 dropout=dropout, 147 direct_link=direct_link, 148 n_clusters=n_clusters, 149 cluster_encode=cluster_encode, 150 type_clust=type_clust, 151 type_scaling=type_scaling, 152 optimizer=optimizer, 153 backend=backend, 154 seed=seed, 155 ) 156 157 self.family = family 158 self.level = level 159 self.q = self.level / 100 160 161 def gaussian_loss(self, y, row_index, XB): 162 return 0.5 * np.mean(np.square(y[row_index] - XB)) 163 164 def laplace_loss(self, y, row_index, XB): 165 return 0.5 * np.mean(np.abs(y[row_index] - XB)) 166 167 def poisson_loss(self, y, row_index, XB): 168 return -np.mean(y[row_index] * XB - np.exp(XB)) 169 170 def pinball_loss(self, y, row_index, XB, tau=0.5): 171 y = np.array(y[row_index]) 172 y_pred = np.array(XB) 173 return mean_pinball_loss(y, y_pred, alpha=tau) 174 # return np.mean(np.maximum(tau * residuals, (tau - 1) * residuals)) 175 176 def loss_func( 177 self, 178 beta, 179 group_index, 180 X, 181 y, 182 row_index=None, 183 type_loss="gaussian", 184 **kwargs 185 ): 186 res = { 187 "gaussian": self.gaussian_loss, 188 "laplace": self.laplace_loss, 189 "poisson": self.poisson_loss, 190 "quantile": self.pinball_loss, 191 } 192 193 if type_loss != "quantile": 194 if row_index is None: 195 row_index = range(len(y)) 196 XB = self.compute_XB(X, beta=beta) 197 198 return res[type_loss](y, row_index, XB) + self.compute_penalty( 199 group_index=group_index, beta=beta 200 ) 201 202 XB = self.compute_XB(X, beta=beta, row_index=row_index) 203 204 return res[type_loss](y, row_index, XB) + self.compute_penalty( 205 group_index=group_index, beta=beta 206 ) 207 208 else: # quantile 209 assert ( 210 self.q > 0 and self.q < 1 211 ), "'tau' must be comprised 0 < tau < 1" 212 213 if row_index is None: 214 row_index = range(len(y)) 215 XB = self.compute_XB(X, beta=beta) 216 return res[type_loss](y, row_index, XB, self.q) 217 218 XB = self.compute_XB(X, beta=beta, row_index=row_index) 219 return res[type_loss](y, row_index, XB, self.q) 220 221 def fit(self, X, y, **kwargs): 222 """Fit GLM model to training data (X, y). 223 224 Args: 225 226 X: {array-like}, shape = [n_samples, n_features] 227 Training vectors, where n_samples is the number 228 of samples and n_features is the number of features. 229 230 y: array-like, shape = [n_samples] 231 Target values. 232 233 **kwargs: additional parameters to be passed to 234 self.cook_training_set or self.obj.fit 235 236 Returns: 237 238 self: object 239 240 """ 241 self.beta_ = None 242 self.n_iter = 0 243 244 _, self.group_index = X.shape 245 246 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 247 # initialization 248 if self.backend == "cpu": 249 beta_ = np.linalg.lstsq(scaled_Z, centered_y, rcond=None)[0] 250 else: 251 beta_ = jnp.linalg.lstsq(scaled_Z, centered_y, rcond=None)[0] 252 # optimization 253 # fit(self, loss_func, response, x0, **kwargs): 254 # loss_func(self, beta, group_index, X, y, 255 # row_index=None, type_loss="gaussian", 256 # **kwargs) 257 self.optimizer.fit( 258 self.loss_func, 259 response=centered_y, 260 x0=beta_, 261 group_index=self.group_index, 262 X=scaled_Z, 263 y=centered_y, 264 type_loss=self.family, 265 **kwargs 266 ) 267 268 self.beta_ = self.optimizer.results[0] 269 270 return self 271 272 def predict(self, X, **kwargs): 273 """Predict test data X. 274 275 Args: 276 277 X: {array-like}, shape = [n_samples, n_features] 278 Training vectors, where n_samples is the number 279 of samples and n_features is the number of features. 280 281 **kwargs: additional parameters to be passed to 282 self.cook_test_set 283 284 Returns: 285 286 model predictions: {array-like} 287 288 """ 289 290 if len(X.shape) == 1: 291 n_features = X.shape[0] 292 new_X = mo.rbind( 293 X.reshape(1, n_features), 294 np.ones(n_features).reshape(1, n_features), 295 ) 296 297 return ( 298 self.y_mean_ 299 + np.dot(self.cook_test_set(new_X, **kwargs), self.beta_) 300 )[0] 301 302 return self.y_mean_ + np.dot( 303 self.cook_test_set(X, **kwargs), self.beta_ 304 ) 305 306 def score(self, X, y, scoring=None): 307 """Compute the score of the model. 308 309 Parameters: 310 311 X: {array-like}, shape = [n_samples, n_features] 312 Training vectors, where n_samples is the number 313 of samples and n_features is the number of features. 314 315 y: array-like, shape = [n_samples] 316 Target values. 317 318 scoring: str 319 scoring method 320 321 Returns: 322 323 score: float 324 325 """ 326 327 if scoring is None: 328 return np.sqrt(np.mean((self.predict(X) - y) ** 2)) 329 330 return skm2.get_scorer(scoring)(self, X, y)
Generalized 'linear' models using quasi-randomized networks (regression)
Attributes:
n_hidden_features: int
number of nodes in the hidden layer
lambda1: float
regularization parameter for GLM coefficients on original features
alpha1: float
controls compromize between l1 and l2 norm of GLM coefficients on original features
lambda2: float
regularization parameter for GLM coefficients on nonlinear features
alpha2: float
controls compromize between l1 and l2 norm of GLM coefficients on nonlinear features
family: str
"gaussian", "laplace", "poisson", or "quantile" (for now)
level: int, default=50
The level of the quantiles to compute for family = "quantile".
Default is the median.
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not
(False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original predictors are included (True) in model's
fitting or not (False)
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
optimizer: object
optimizer, from class Optimizer
backend: str.
"cpu" or "gpu" or "tpu".
seed: int
reproducibility seed for nodes_sim=='uniform'
backend: str
"cpu", "gpu", "tpu"
Attributes:
beta_: vector
regression coefficients
Examples:
See [https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_regression.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_regression.py)
221 def fit(self, X, y, **kwargs): 222 """Fit GLM model to training data (X, y). 223 224 Args: 225 226 X: {array-like}, shape = [n_samples, n_features] 227 Training vectors, where n_samples is the number 228 of samples and n_features is the number of features. 229 230 y: array-like, shape = [n_samples] 231 Target values. 232 233 **kwargs: additional parameters to be passed to 234 self.cook_training_set or self.obj.fit 235 236 Returns: 237 238 self: object 239 240 """ 241 self.beta_ = None 242 self.n_iter = 0 243 244 _, self.group_index = X.shape 245 246 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 247 # initialization 248 if self.backend == "cpu": 249 beta_ = np.linalg.lstsq(scaled_Z, centered_y, rcond=None)[0] 250 else: 251 beta_ = jnp.linalg.lstsq(scaled_Z, centered_y, rcond=None)[0] 252 # optimization 253 # fit(self, loss_func, response, x0, **kwargs): 254 # loss_func(self, beta, group_index, X, y, 255 # row_index=None, type_loss="gaussian", 256 # **kwargs) 257 self.optimizer.fit( 258 self.loss_func, 259 response=centered_y, 260 x0=beta_, 261 group_index=self.group_index, 262 X=scaled_Z, 263 y=centered_y, 264 type_loss=self.family, 265 **kwargs 266 ) 267 268 self.beta_ = self.optimizer.results[0] 269 270 return self
Fit GLM model to training data (X, y).
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
272 def predict(self, X, **kwargs): 273 """Predict test data X. 274 275 Args: 276 277 X: {array-like}, shape = [n_samples, n_features] 278 Training vectors, where n_samples is the number 279 of samples and n_features is the number of features. 280 281 **kwargs: additional parameters to be passed to 282 self.cook_test_set 283 284 Returns: 285 286 model predictions: {array-like} 287 288 """ 289 290 if len(X.shape) == 1: 291 n_features = X.shape[0] 292 new_X = mo.rbind( 293 X.reshape(1, n_features), 294 np.ones(n_features).reshape(1, n_features), 295 ) 296 297 return ( 298 self.y_mean_ 299 + np.dot(self.cook_test_set(new_X, **kwargs), self.beta_) 300 )[0] 301 302 return self.y_mean_ + np.dot( 303 self.cook_test_set(X, **kwargs), self.beta_ 304 )
Predict test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
306 def score(self, X, y, scoring=None): 307 """Compute the score of the model. 308 309 Parameters: 310 311 X: {array-like}, shape = [n_samples, n_features] 312 Training vectors, where n_samples is the number 313 of samples and n_features is the number of features. 314 315 y: array-like, shape = [n_samples] 316 Target values. 317 318 scoring: str 319 scoring method 320 321 Returns: 322 323 score: float 324 325 """ 326 327 if scoring is None: 328 return np.sqrt(np.mean((self.predict(X) - y) ** 2)) 329 330 return skm2.get_scorer(scoring)(self, X, y)
Compute the score of the model.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
scoring: str
scoring method
Returns:
score: float
18class KernelRidge(BaseEstimator, RegressorMixin): 19 """ 20 Kernel Ridge Regression with optional GPU support, Matérn kernels, and automatic input standardization. 21 22 Parameters: 23 - alpha: float 24 Regularization parameter. 25 - kernel: str 26 Kernel type ("linear", "rbf", or "matern"). 27 - gamma: float 28 Kernel coefficient for "rbf". Ignored for other kernels. 29 - nu: float 30 Smoothness parameter for the Matérn kernel. Default is 1.5. 31 - length_scale: float 32 Length scale parameter for the Matérn kernel. Default is 1.0. 33 - backend: str 34 "cpu" or "gpu" (uses JAX if "gpu"). 35 """ 36 37 def __init__( 38 self, 39 alpha=1.0, 40 kernel="rbf", 41 gamma=None, 42 nu=1.5, 43 length_scale=1.0, 44 backend="cpu", 45 ): 46 if not JAX_AVAILABLE and backend != "cpu": 47 raise RuntimeError( 48 "JAX is required for this feature. Install with: pip install yourpackage[jax]" 49 ) 50 self.alpha = alpha 51 self.alpha_ = alpha 52 self.kernel = kernel 53 self.gamma = gamma 54 self.nu = nu 55 self.length_scale = length_scale 56 self.backend = backend 57 self.scaler = StandardScaler() 58 59 if backend == "gpu" and not JAX_AVAILABLE: 60 raise ImportError( 61 "JAX is not installed. Please install JAX to use the GPU backend." 62 ) 63 64 def _linear_kernel(self, X, Y): 65 return jnp.dot(X, Y.T) if self.backend == "gpu" else np.dot(X, Y.T) 66 67 def _rbf_kernel(self, X, Y): 68 if self.gamma is None: 69 self.gamma = 1.0 / X.shape[1] 70 if self.backend == "gpu": 71 sq_dists = ( 72 jnp.sum(X**2, axis=1)[:, None] 73 + jnp.sum(Y**2, axis=1) 74 - 2 * jnp.dot(X, Y.T) 75 ) 76 return jnp.exp(-self.gamma * sq_dists) 77 else: 78 sq_dists = ( 79 np.sum(X**2, axis=1)[:, None] 80 + np.sum(Y**2, axis=1) 81 - 2 * np.dot(X, Y.T) 82 ) 83 return np.exp(-self.gamma * sq_dists) 84 85 def _matern_kernel(self, X, Y): 86 """ 87 Compute the Matérn kernel using JAX for GPU or NumPy for CPU. 88 89 Parameters: 90 - X: array-like, shape (n_samples_X, n_features) 91 - Y: array-like, shape (n_samples_Y, n_features) 92 93 Returns: 94 - Kernel matrix, shape (n_samples_X, n_samples_Y) 95 """ 96 if self.backend == "gpu": 97 # Compute pairwise distances 98 dists = jnp.sqrt( 99 jnp.sum((X[:, None, :] - Y[None, :, :]) ** 2, axis=2) 100 ) 101 scaled_dists = jnp.sqrt(2 * self.nu) * dists / self.length_scale 102 103 # Matérn kernel formula 104 coeff = (2 ** (1 - self.nu)) / jnp.exp(gammaln(self.nu)) 105 matern_kernel = ( 106 coeff * (scaled_dists**self.nu) * kv(self.nu, scaled_dists) 107 ) 108 matern_kernel = jnp.where( 109 dists == 0, 1.0, matern_kernel 110 ) # Handle the case where distance is 0 111 return matern_kernel 112 else: 113 # Use NumPy for CPU 114 from scipy.special import ( 115 gammaln, 116 kv, 117 ) # Ensure scipy.special is used for CPU 118 119 dists = np.sqrt( 120 np.sum((X[:, None, :] - Y[None, :, :]) ** 2, axis=2) 121 ) 122 scaled_dists = np.sqrt(2 * self.nu) * dists / self.length_scale 123 124 # Matérn kernel formula 125 coeff = (2 ** (1 - self.nu)) / np.exp(gammaln(self.nu)) 126 matern_kernel = ( 127 coeff * (scaled_dists**self.nu) * kv(self.nu, scaled_dists) 128 ) 129 matern_kernel = np.where( 130 dists == 0, 1.0, matern_kernel 131 ) # Handle the case where distance is 0 132 return matern_kernel 133 134 def _get_kernel(self, X, Y): 135 if self.kernel == "linear": 136 return self._linear_kernel(X, Y) 137 elif self.kernel == "rbf": 138 return self._rbf_kernel(X, Y) 139 elif self.kernel == "matern": 140 return self._matern_kernel(X, Y) 141 else: 142 raise ValueError(f"Unsupported kernel: {self.kernel}") 143 144 def fit(self, X, y): 145 """ 146 Fit the Kernel Ridge Regression model. 147 148 Parameters: 149 - X: array-like, shape (n_samples, n_features) 150 Training data. 151 - y: array-like, shape (n_samples,) 152 Target values. 153 """ 154 # Standardize the inputs 155 X = self.scaler.fit_transform(X) 156 self.X_fit_ = X 157 158 # Center the response 159 self.y_mean_ = np.mean(y) 160 y_centered = y - self.y_mean_ 161 162 n_samples = X.shape[0] 163 164 # Compute the kernel matrix 165 K = self._get_kernel(X, X) 166 self.K_ = K 167 self.y_fit_ = y_centered 168 169 if isinstance(self.alpha, (list, np.ndarray)): 170 # If alpha is a list or array, compute LOOE for each alpha 171 self.alphas_ = self.alpha # Store the list of alphas 172 self.dual_coefs_ = [] # Store dual coefficients for each alpha 173 self.looe_ = [] # Store LOOE for each alpha 174 175 for alpha in self.alpha: 176 G = K + alpha * np.eye(n_samples) 177 G_inv = np.linalg.inv(G) 178 diag_G_inv = np.diag(G_inv) 179 dual_coef = np.linalg.solve(G, y_centered) 180 looe = np.sum((dual_coef / diag_G_inv) ** 2) # Compute LOOE 181 self.dual_coefs_.append(dual_coef) 182 self.looe_.append(looe) 183 184 # Select the best alpha based on the smallest LOOE 185 best_index = np.argmin(self.looe_) 186 self.alpha_ = self.alpha[best_index] 187 self.dual_coef_ = self.dual_coefs_[best_index] 188 else: 189 # If alpha is a single value, proceed as usual 190 if self.backend == "gpu": 191 self.dual_coef_ = jnp.linalg.solve( 192 K + self.alpha * jnp.eye(n_samples), y_centered 193 ) 194 else: 195 self.dual_coef_ = np.linalg.solve( 196 K + self.alpha * np.eye(n_samples), y_centered 197 ) 198 199 return self 200 201 def predict(self, X, probs=False): 202 """ 203 Predict using the Kernel Ridge Regression model. 204 205 Parameters: 206 - X: array-like, shape (n_samples, n_features) 207 Test data. 208 209 Returns: 210 - Predicted values, shape (n_samples,). 211 """ 212 # Standardize the inputs 213 X = self.scaler.transform(X) 214 K = self._get_kernel(X, self.X_fit_) 215 if self.backend == "gpu": 216 preds = jnp.dot(K, self.dual_coef_) + self.y_mean_ 217 if probs: 218 # Compute similarity to self.X_fit_ 219 similarities = jnp.dot( 220 preds, self.X_fit_.T 221 ) # Shape: (n_samples, n_fit_) 222 # Apply softmax to get probabilities 223 return jaxsoftmax(similarities, axis=1) 224 return preds 225 else: 226 preds = np.dot(K, self.dual_coef_) + self.y_mean_ 227 if probs: 228 # Compute similarity to self.X_fit_ 229 similarities = np.dot( 230 preds, self.X_fit_.T 231 ) # Shape: (n_samples, n_fit_) 232 # Apply softmax to get probabilities 233 return softmax(similarities, axis=1) 234 return preds 235 236 def partial_fit(self, X, y): 237 """ 238 Incrementally fit the Kernel Ridge Regression model with new data using a recursive approach. 239 240 Parameters: 241 - X: array-like, shape (n_samples, n_features) 242 New training data. 243 - y: array-like, shape (n_samples,) 244 New target values. 245 246 Returns: 247 - self: object 248 The updated model. 249 """ 250 # Standardize the inputs 251 X = ( 252 self.scaler.fit_transform(X) 253 if not hasattr(self, "X_fit_") 254 else self.scaler.transform(X) 255 ) 256 257 if not hasattr(self, "X_fit_"): 258 # Initialize with the first batch of data 259 self.X_fit_ = X 260 261 # Center the response 262 self.y_mean_ = np.mean(y) 263 y_centered = y - self.y_mean_ 264 self.y_fit_ = y_centered 265 266 n_samples = X.shape[0] 267 268 # Compute the kernel matrix for the initial data 269 self.K_ = self._get_kernel(X, X) 270 271 # Initialize dual coefficients for each alpha 272 if isinstance(self.alpha, (list, np.ndarray)): 273 self.dual_coefs_ = [np.zeros(n_samples) for _ in self.alpha] 274 else: 275 self.dual_coef_ = np.zeros(n_samples) 276 else: 277 # Incrementally update with new data 278 y_centered = y - self.y_mean_ # Center the new batch of responses 279 for x_new, y_new in zip(X, y_centered): 280 x_new = x_new.reshape(1, -1) # Ensure x_new is 2D 281 k_new = self._get_kernel(self.X_fit_, x_new).flatten() 282 283 # Compute the kernel value for the new data point 284 k_self = self._get_kernel(x_new, x_new).item() 285 286 if isinstance(self.alpha, (list, np.ndarray)): 287 # Update dual coefficients for each alpha 288 for idx, alpha in enumerate(self.alpha): 289 gamma_new = 1 / (k_self + alpha) 290 residual = y_new - np.dot(self.dual_coefs_[idx], k_new) 291 self.dual_coefs_[idx] = np.append( 292 self.dual_coefs_[idx], gamma_new * residual 293 ) 294 else: 295 # Update dual coefficients for a single alpha 296 gamma_new = 1 / (k_self + self.alpha) 297 residual = y_new - np.dot(self.dual_coef_, k_new) 298 self.dual_coef_ = np.append( 299 self.dual_coef_, gamma_new * residual 300 ) 301 302 # Update the kernel matrix 303 self.K_ = np.block( 304 [ 305 [self.K_, k_new[:, None]], 306 [k_new[None, :], np.array([[k_self]])], 307 ] 308 ) 309 310 # Update the stored data 311 self.X_fit_ = np.vstack([self.X_fit_, x_new]) 312 self.y_fit_ = np.append(self.y_fit_, y_new) 313 314 # Select the best alpha based on LOOE after the batch 315 if isinstance(self.alpha, (list, np.ndarray)): 316 self.looe_ = [] 317 for idx, alpha in enumerate(self.alpha): 318 G = self.K_ + alpha * np.eye(self.K_.shape[0]) 319 G_inv = np.linalg.inv(G) 320 diag_G_inv = np.diag(G_inv) 321 looe = np.sum((self.dual_coefs_[idx] / diag_G_inv) ** 2) 322 self.looe_.append(looe) 323 324 # Select the best alpha 325 best_index = np.argmin(self.looe_) 326 self.alpha_ = self.alpha[best_index] 327 self.dual_coef_ = self.dual_coefs_[best_index] 328 329 return self
Kernel Ridge Regression with optional GPU support, Matérn kernels, and automatic input standardization.
Parameters:
- alpha: float Regularization parameter.
- kernel: str Kernel type ("linear", "rbf", or "matern").
- gamma: float Kernel coefficient for "rbf". Ignored for other kernels.
- nu: float Smoothness parameter for the Matérn kernel. Default is 1.5.
- length_scale: float Length scale parameter for the Matérn kernel. Default is 1.0.
- backend: str "cpu" or "gpu" (uses JAX if "gpu").
144 def fit(self, X, y): 145 """ 146 Fit the Kernel Ridge Regression model. 147 148 Parameters: 149 - X: array-like, shape (n_samples, n_features) 150 Training data. 151 - y: array-like, shape (n_samples,) 152 Target values. 153 """ 154 # Standardize the inputs 155 X = self.scaler.fit_transform(X) 156 self.X_fit_ = X 157 158 # Center the response 159 self.y_mean_ = np.mean(y) 160 y_centered = y - self.y_mean_ 161 162 n_samples = X.shape[0] 163 164 # Compute the kernel matrix 165 K = self._get_kernel(X, X) 166 self.K_ = K 167 self.y_fit_ = y_centered 168 169 if isinstance(self.alpha, (list, np.ndarray)): 170 # If alpha is a list or array, compute LOOE for each alpha 171 self.alphas_ = self.alpha # Store the list of alphas 172 self.dual_coefs_ = [] # Store dual coefficients for each alpha 173 self.looe_ = [] # Store LOOE for each alpha 174 175 for alpha in self.alpha: 176 G = K + alpha * np.eye(n_samples) 177 G_inv = np.linalg.inv(G) 178 diag_G_inv = np.diag(G_inv) 179 dual_coef = np.linalg.solve(G, y_centered) 180 looe = np.sum((dual_coef / diag_G_inv) ** 2) # Compute LOOE 181 self.dual_coefs_.append(dual_coef) 182 self.looe_.append(looe) 183 184 # Select the best alpha based on the smallest LOOE 185 best_index = np.argmin(self.looe_) 186 self.alpha_ = self.alpha[best_index] 187 self.dual_coef_ = self.dual_coefs_[best_index] 188 else: 189 # If alpha is a single value, proceed as usual 190 if self.backend == "gpu": 191 self.dual_coef_ = jnp.linalg.solve( 192 K + self.alpha * jnp.eye(n_samples), y_centered 193 ) 194 else: 195 self.dual_coef_ = np.linalg.solve( 196 K + self.alpha * np.eye(n_samples), y_centered 197 ) 198 199 return self
Fit the Kernel Ridge Regression model.
Parameters:
- X: array-like, shape (n_samples, n_features) Training data.
- y: array-like, shape (n_samples,) Target values.
201 def predict(self, X, probs=False): 202 """ 203 Predict using the Kernel Ridge Regression model. 204 205 Parameters: 206 - X: array-like, shape (n_samples, n_features) 207 Test data. 208 209 Returns: 210 - Predicted values, shape (n_samples,). 211 """ 212 # Standardize the inputs 213 X = self.scaler.transform(X) 214 K = self._get_kernel(X, self.X_fit_) 215 if self.backend == "gpu": 216 preds = jnp.dot(K, self.dual_coef_) + self.y_mean_ 217 if probs: 218 # Compute similarity to self.X_fit_ 219 similarities = jnp.dot( 220 preds, self.X_fit_.T 221 ) # Shape: (n_samples, n_fit_) 222 # Apply softmax to get probabilities 223 return jaxsoftmax(similarities, axis=1) 224 return preds 225 else: 226 preds = np.dot(K, self.dual_coef_) + self.y_mean_ 227 if probs: 228 # Compute similarity to self.X_fit_ 229 similarities = np.dot( 230 preds, self.X_fit_.T 231 ) # Shape: (n_samples, n_fit_) 232 # Apply softmax to get probabilities 233 return softmax(similarities, axis=1) 234 return preds
Predict using the Kernel Ridge Regression model.
Parameters:
- X: array-like, shape (n_samples, n_features) Test data.
Returns:
- Predicted values, shape (n_samples,).
757class LazyClassifier(LazyDeepClassifier): 758 """ 759 Fitting -- almost -- all the classification algorithms with 760 nnetsauce's CustomClassifier and returning their scores (no layers). 761 762 Parameters: 763 764 verbose: int, optional (default=0) 765 Any positive number for verbosity. 766 767 ignore_warnings: bool, optional (default=True) 768 When set to True, the warning related to algorigms that are not able to run are ignored. 769 770 custom_metric: function, optional (default=None) 771 When function is provided, models are evaluated based on the custom evaluation metric provided. 772 773 predictions: bool, optional (default=False) 774 When set to True, the predictions of all the models models are returned as dataframe. 775 776 sort_by: string, optional (default='Accuracy') 777 Sort models by a metric. Available options are 'Accuracy', 'Balanced Accuracy', 'ROC AUC', 'F1 Score' 778 or a custom metric identified by its name and provided by custom_metric. 779 780 random_state: int, optional (default=42) 781 Reproducibiility seed. 782 783 estimators: list, optional (default='all') 784 list of Estimators names or just 'all' (default='all') 785 786 preprocess: bool 787 preprocessing is done when set to True 788 789 n_jobs : int, when possible, run in parallel 790 For now, only used by individual models that support it. 791 792 All the other parameters are the same as CustomClassifier's. 793 794 Attributes: 795 796 models_: dict-object 797 Returns a dictionary with each model pipeline as value 798 with key as name of models. 799 800 best_model_: object 801 Returns the best model pipeline based on the sort_by metric. 802 803 Examples: 804 805 import nnetsauce as ns 806 import numpy as np 807 from sklearn import datasets 808 from sklearn.utils import shuffle 809 810 dataset = datasets.load_iris() 811 X = dataset.data 812 y = dataset.target 813 X, y = shuffle(X, y, random_state=123) 814 X = X.astype(np.float32) 815 y = y.astype(np.float32) 816 X_train, X_test = X[:100], X[100:] 817 y_train, y_test = y[:100], y[100:] 818 819 clf = ns.LazyClassifier(verbose=0, ignore_warnings=True, custom_metric=None) 820 models, predictions = clf.fit(X_train, X_test, y_train, y_test) 821 model_dictionary = clf.provide_models(X_train,X_test,y_train,y_test) 822 print(models) 823 824 """ 825 826 def __init__( 827 self, 828 verbose=0, 829 ignore_warnings=True, 830 custom_metric=None, 831 predictions=False, 832 sort_by="Accuracy", 833 random_state=42, 834 estimators="all", 835 preprocess=False, 836 n_jobs=None, 837 # CustomClassifier attributes 838 obj=None, 839 n_hidden_features=5, 840 activation_name="relu", 841 a=0.01, 842 nodes_sim="sobol", 843 bias=True, 844 dropout=0, 845 direct_link=True, 846 n_clusters=2, 847 cluster_encode=True, 848 type_clust="kmeans", 849 type_scaling=("std", "std", "std"), 850 col_sample=1, 851 row_sample=1, 852 seed=123, 853 backend="cpu", 854 ): 855 super().__init__( 856 verbose=verbose, 857 ignore_warnings=ignore_warnings, 858 custom_metric=custom_metric, 859 predictions=predictions, 860 sort_by=sort_by, 861 random_state=random_state, 862 estimators=estimators, 863 preprocess=preprocess, 864 n_jobs=n_jobs, 865 n_layers=1, 866 obj=obj, 867 n_hidden_features=n_hidden_features, 868 activation_name=activation_name, 869 a=a, 870 nodes_sim=nodes_sim, 871 bias=bias, 872 dropout=dropout, 873 direct_link=direct_link, 874 n_clusters=n_clusters, 875 cluster_encode=cluster_encode, 876 type_clust=type_clust, 877 type_scaling=type_scaling, 878 col_sample=col_sample, 879 row_sample=row_sample, 880 seed=seed, 881 backend=backend, 882 )
Fitting -- almost -- all the classification algorithms with nnetsauce's CustomClassifier and returning their scores (no layers).
Parameters:
verbose: int, optional (default=0)
Any positive number for verbosity.
ignore_warnings: bool, optional (default=True)
When set to True, the warning related to algorigms that are not able to run are ignored.
custom_metric: function, optional (default=None)
When function is provided, models are evaluated based on the custom evaluation metric provided.
predictions: bool, optional (default=False)
When set to True, the predictions of all the models models are returned as dataframe.
sort_by: string, optional (default='Accuracy')
Sort models by a metric. Available options are 'Accuracy', 'Balanced Accuracy', 'ROC AUC', 'F1 Score'
or a custom metric identified by its name and provided by custom_metric.
random_state: int, optional (default=42)
Reproducibiility seed.
estimators: list, optional (default='all')
list of Estimators names or just 'all' (default='all')
preprocess: bool
preprocessing is done when set to True
n_jobs : int, when possible, run in parallel
For now, only used by individual models that support it.
All the other parameters are the same as CustomClassifier's.
Attributes:
models_: dict-object
Returns a dictionary with each model pipeline as value
with key as name of models.
best_model_: object
Returns the best model pipeline based on the sort_by metric.
Examples:
import nnetsauce as ns
import numpy as np
from sklearn import datasets
from sklearn.utils import shuffle
dataset = datasets.load_iris()
X = dataset.data
y = dataset.target
X, y = shuffle(X, y, random_state=123)
X = X.astype(np.float32)
y = y.astype(np.float32)
X_train, X_test = X[:100], X[100:]
y_train, y_test = y[:100], y[100:]
clf = ns.LazyClassifier(verbose=0, ignore_warnings=True, custom_metric=None)
models, predictions = clf.fit(X_train, X_test, y_train, y_test)
model_dictionary = clf.provide_models(X_train,X_test,y_train,y_test)
print(models)
654class LazyRegressor(LazyDeepRegressor): 655 """ 656 Fitting -- almost -- all the regression algorithms with 657 nnetsauce's CustomRegressor and returning their scores. 658 659 Parameters: 660 661 verbose: int, optional (default=0) 662 Any positive number for verbosity. 663 664 ignore_warnings: bool, optional (default=True) 665 When set to True, the warning related to algorigms that are not able to run are ignored. 666 667 custom_metric: function, optional (default=None) 668 When function is provided, models are evaluated based on the custom evaluation metric provided. 669 670 predictions: bool, optional (default=False) 671 When set to True, the predictions of all the models models are returned as dataframe. 672 673 sort_by: string, optional (default='RMSE') 674 Sort models by a metric. Available options are 'R-Squared', 'Adjusted R-Squared', 'RMSE', 'Time Taken' and 'Custom Metric'. 675 or a custom metric identified by its name and provided by custom_metric. 676 677 random_state: int, optional (default=42) 678 Reproducibiility seed. 679 680 estimators: list, optional (default='all') 681 list of Estimators names or just 'all' (default='all') 682 683 preprocess: bool 684 preprocessing is done when set to True 685 686 n_jobs : int, when possible, run in parallel 687 For now, only used by individual models that support it. 688 689 All the other parameters are the same as CustomRegressor's. 690 691 Attributes: 692 693 models_: dict-object 694 Returns a dictionary with each model pipeline as value 695 with key as name of models. 696 697 best_model_: object 698 Returns the best model pipeline based on the sort_by metric. 699 700 Examples: 701 702 import nnetsauce as ns 703 import numpy as np 704 from sklearn import datasets 705 from sklearn.utils import shuffle 706 707 diabetes = datasets.load_diabetes() 708 X, y = shuffle(diabetes.data, diabetes.target, random_state=13) 709 X = X.astype(np.float32) 710 711 offset = int(X.shape[0] * 0.9) 712 X_train, y_train = X[:offset], y[:offset] 713 X_test, y_test = X[offset:], y[offset:] 714 715 reg = ns.LazyRegressor(verbose=0, ignore_warnings=False, 716 custom_metric=None) 717 models, predictions = reg.fit(X_train, X_test, y_train, y_test) 718 print(models) 719 720 """ 721 722 def __init__( 723 self, 724 verbose=0, 725 ignore_warnings=True, 726 custom_metric=None, 727 predictions=False, 728 sort_by="RMSE", 729 random_state=42, 730 estimators="all", 731 preprocess=False, 732 n_jobs=None, 733 # CustomRegressor attributes 734 obj=None, 735 n_hidden_features=5, 736 activation_name="relu", 737 a=0.01, 738 nodes_sim="sobol", 739 bias=True, 740 dropout=0, 741 direct_link=True, 742 n_clusters=2, 743 cluster_encode=True, 744 type_clust="kmeans", 745 type_scaling=("std", "std", "std"), 746 col_sample=1, 747 row_sample=1, 748 seed=123, 749 backend="cpu", 750 ): 751 super().__init__( 752 verbose=verbose, 753 ignore_warnings=ignore_warnings, 754 custom_metric=custom_metric, 755 predictions=predictions, 756 sort_by=sort_by, 757 random_state=random_state, 758 estimators=estimators, 759 preprocess=preprocess, 760 n_jobs=n_jobs, 761 n_layers=1, 762 obj=obj, 763 n_hidden_features=n_hidden_features, 764 activation_name=activation_name, 765 a=a, 766 nodes_sim=nodes_sim, 767 bias=bias, 768 dropout=dropout, 769 direct_link=direct_link, 770 n_clusters=n_clusters, 771 cluster_encode=cluster_encode, 772 type_clust=type_clust, 773 type_scaling=type_scaling, 774 col_sample=col_sample, 775 row_sample=row_sample, 776 seed=seed, 777 backend=backend, 778 )
Fitting -- almost -- all the regression algorithms with nnetsauce's CustomRegressor and returning their scores.
Parameters:
verbose: int, optional (default=0)
Any positive number for verbosity.
ignore_warnings: bool, optional (default=True)
When set to True, the warning related to algorigms that are not able to run are ignored.
custom_metric: function, optional (default=None)
When function is provided, models are evaluated based on the custom evaluation metric provided.
predictions: bool, optional (default=False)
When set to True, the predictions of all the models models are returned as dataframe.
sort_by: string, optional (default='RMSE')
Sort models by a metric. Available options are 'R-Squared', 'Adjusted R-Squared', 'RMSE', 'Time Taken' and 'Custom Metric'.
or a custom metric identified by its name and provided by custom_metric.
random_state: int, optional (default=42)
Reproducibiility seed.
estimators: list, optional (default='all')
list of Estimators names or just 'all' (default='all')
preprocess: bool
preprocessing is done when set to True
n_jobs : int, when possible, run in parallel
For now, only used by individual models that support it.
All the other parameters are the same as CustomRegressor's.
Attributes:
models_: dict-object
Returns a dictionary with each model pipeline as value
with key as name of models.
best_model_: object
Returns the best model pipeline based on the sort_by metric.
Examples:
import nnetsauce as ns
import numpy as np
from sklearn import datasets
from sklearn.utils import shuffle
diabetes = datasets.load_diabetes()
X, y = shuffle(diabetes.data, diabetes.target, random_state=13)
X = X.astype(np.float32)
offset = int(X.shape[0] * 0.9)
X_train, y_train = X[:offset], y[:offset]
X_test, y_test = X[offset:], y[offset:]
reg = ns.LazyRegressor(verbose=0, ignore_warnings=False,
custom_metric=None)
models, predictions = reg.fit(X_train, X_test, y_train, y_test)
print(models)
94class LazyDeepClassifier(Custom, ClassifierMixin): 95 """ 96 97 Fitting -- almost -- all the classification algorithms with layers of 98 nnetsauce's CustomClassifier and returning their scores. 99 100 Parameters: 101 102 verbose: int, optional (default=0) 103 Any positive number for verbosity. 104 105 ignore_warnings: bool, optional (default=True) 106 When set to True, the warning related to algorigms that are not 107 able to run are ignored. 108 109 custom_metric: function, optional (default=None) 110 When function is provided, models are evaluated based on the custom 111 evaluation metric provided. 112 113 predictions: bool, optional (default=False) 114 When set to True, the predictions of all the models models are 115 returned as data frame. 116 117 sort_by: string, optional (default='Accuracy') 118 Sort models by a metric. Available options are 'Accuracy', 119 'Balanced Accuracy', 'ROC AUC', 'F1 Score' or a custom metric 120 identified by its name and provided by custom_metric. 121 122 random_state: int, optional (default=42) 123 Reproducibiility seed. 124 125 estimators: list, optional (default='all') 126 list of Estimators names or just 'all' for > 90 classifiers 127 (default='all') 128 129 preprocess: bool, preprocessing is done when set to True 130 131 n_jobs: int, when possible, run in parallel 132 For now, only used by individual models that support it. 133 134 n_layers: int, optional (default=3) 135 Number of layers of CustomClassifiers to be used. 136 137 All the other parameters are the same as CustomClassifier's. 138 139 Attributes: 140 141 models_: dict-object 142 Returns a dictionary with each model pipeline as value 143 with key as name of models. 144 145 best_model_: object 146 Returns the best model pipeline. 147 148 Examples 149 150 ```python 151 import nnetsauce as ns 152 from sklearn.datasets import load_breast_cancer 153 from sklearn.model_selection import train_test_split 154 data = load_breast_cancer() 155 X = data.data 156 y= data.target 157 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, 158 random_state=123) 159 clf = ns.LazyDeepClassifier(verbose=0, ignore_warnings=True, custom_metric=None) 160 models, predictions = clf.fit(X_train, X_test, y_train, y_test) 161 model_dictionary = clf.provide_models(X_train,X_test,y_train,y_test) 162 print(models) 163 ``` 164 165 """ 166 167 def __init__( 168 self, 169 verbose=0, 170 ignore_warnings=True, 171 custom_metric=None, 172 predictions=False, 173 sort_by="Accuracy", 174 random_state=42, 175 estimators="all", 176 preprocess=False, 177 n_jobs=None, 178 # Defining depth 179 n_layers=3, 180 # CustomClassifier attributes 181 obj=None, 182 n_hidden_features=5, 183 activation_name="relu", 184 a=0.01, 185 nodes_sim="sobol", 186 bias=True, 187 dropout=0, 188 direct_link=True, 189 n_clusters=2, 190 cluster_encode=True, 191 type_clust="kmeans", 192 type_scaling=("std", "std", "std"), 193 col_sample=1, 194 row_sample=1, 195 seed=123, 196 backend="cpu", 197 ): 198 self.verbose = verbose 199 self.ignore_warnings = ignore_warnings 200 self.custom_metric = custom_metric 201 self.predictions = predictions 202 self.sort_by = sort_by 203 self.models_ = {} 204 self.best_model_ = None 205 self.random_state = random_state 206 self.estimators = estimators 207 self.preprocess = preprocess 208 self.n_layers = n_layers - 1 209 self.n_jobs = n_jobs 210 super().__init__( 211 obj=obj, 212 n_hidden_features=n_hidden_features, 213 activation_name=activation_name, 214 a=a, 215 nodes_sim=nodes_sim, 216 bias=bias, 217 dropout=dropout, 218 direct_link=direct_link, 219 n_clusters=n_clusters, 220 cluster_encode=cluster_encode, 221 type_clust=type_clust, 222 type_scaling=type_scaling, 223 col_sample=col_sample, 224 row_sample=row_sample, 225 seed=seed, 226 backend=backend, 227 ) 228 229 def fit(self, X_train, X_test, y_train, y_test): 230 """Fit classifiers to X_train and y_train, predict and score on X_test, 231 y_test. 232 233 Parameters: 234 235 X_train: array-like, 236 Training vectors, where rows is the number of samples 237 and columns is the number of features. 238 239 X_test: array-like, 240 Testing vectors, where rows is the number of samples 241 and columns is the number of features. 242 243 y_train: array-like, 244 Training vectors, where rows is the number of samples 245 and columns is the number of features. 246 247 y_test: array-like, 248 Testing vectors, where rows is the number of samples 249 and columns is the number of features. 250 251 Returns: 252 253 scores: Pandas DataFrame 254 Returns metrics of all the models in a Pandas DataFrame. 255 256 predictions: Pandas DataFrame 257 Returns predictions of all the models in a Pandas DataFrame. 258 """ 259 Accuracy = [] 260 B_Accuracy = [] 261 ROC_AUC = [] 262 F1 = [] 263 names = [] 264 TIME = [] 265 predictions = {} 266 267 if self.custom_metric is not None: 268 CUSTOM_METRIC = [] 269 270 if isinstance(X_train, np.ndarray): 271 X_train = pd.DataFrame(X_train) 272 X_test = pd.DataFrame(X_test) 273 274 numeric_features = X_train.select_dtypes(include=[np.number]).columns 275 categorical_features = X_train.select_dtypes(include=["object"]).columns 276 277 categorical_low, categorical_high = get_card_split( 278 X_train, categorical_features 279 ) 280 281 if self.preprocess is True: 282 preprocessor = ColumnTransformer( 283 transformers=[ 284 ("numeric", numeric_transformer, numeric_features), 285 ( 286 "categorical_low", 287 categorical_transformer_low, 288 categorical_low, 289 ), 290 ( 291 "categorical_high", 292 categorical_transformer_high, 293 categorical_high, 294 ), 295 ] 296 ) 297 298 # baseline models 299 try: 300 baseline_names = ["RandomForestClassifier", "XGBClassifier"] 301 baseline_models = [RandomForestClassifier(), xgb.XGBClassifier()] 302 except Exception as exception: 303 baseline_names = ["RandomForestClassifier"] 304 baseline_models = [RandomForestClassifier()] 305 306 for name, model in zip(baseline_names, baseline_models): 307 start = time.time() 308 try: 309 model.fit(X_train, y_train) 310 self.models_[name] = model 311 y_pred = model.predict(X_test) 312 accuracy = accuracy_score(y_test, y_pred, normalize=True) 313 b_accuracy = balanced_accuracy_score(y_test, y_pred) 314 f1 = f1_score(y_test, y_pred, average="weighted") 315 try: 316 roc_auc = roc_auc_score(y_test, y_pred) 317 except Exception as exception: 318 roc_auc = None 319 if self.ignore_warnings is False: 320 print("ROC AUC couldn't be calculated for " + name) 321 print(exception) 322 names.append(name) 323 Accuracy.append(accuracy) 324 B_Accuracy.append(b_accuracy) 325 ROC_AUC.append(roc_auc) 326 F1.append(f1) 327 TIME.append(time.time() - start) 328 if self.custom_metric is not None: 329 custom_metric = self.custom_metric(y_test, y_pred) 330 CUSTOM_METRIC.append(custom_metric) 331 if self.verbose > 0: 332 if self.custom_metric is not None: 333 print( 334 { 335 "Model": name, 336 "Accuracy": accuracy, 337 "Balanced Accuracy": b_accuracy, 338 "ROC AUC": roc_auc, 339 "F1 Score": f1, 340 self.custom_metric.__name__: custom_metric, 341 "Time taken": time.time() - start, 342 } 343 ) 344 else: 345 print( 346 { 347 "Model": name, 348 "Accuracy": accuracy, 349 "Balanced Accuracy": b_accuracy, 350 "ROC AUC": roc_auc, 351 "F1 Score": f1, 352 "Time taken": time.time() - start, 353 } 354 ) 355 if self.predictions: 356 predictions[name] = y_pred 357 except Exception as exception: 358 if self.ignore_warnings is False: 359 print(name + " model failed to execute") 360 print(exception) 361 362 if self.estimators == "all": 363 self.classifiers = [ 364 item 365 for sublist in [ 366 DEEPCLASSIFIERS, 367 DEEPMULTITASKCLASSIFIERS, 368 DEEPSIMPLEMULTITASKCLASSIFIERS, 369 ] 370 for item in sublist 371 ] 372 else: 373 self.classifiers = ( 374 [ 375 ("DeepCustomClassifier(" + est[0] + ")", est[1]) 376 for est in all_estimators() 377 if ( 378 issubclass(est[1], ClassifierMixin) 379 and (est[0] in self.estimators) 380 ) 381 ] 382 + [ 383 ( 384 "DeepMultitaskClassifier(" + est[0] + ")", 385 partial(MultitaskClassifier, obj=est[1]()), 386 ) 387 for est in all_estimators() 388 if ( 389 issubclass(est[1], RegressorMixin) 390 and (est[0] in self.estimators) 391 ) 392 ] 393 + [ 394 ( 395 "DeepSimpleMultitaskClassifier(" + est[0] + ")", 396 partial(SimpleMultitaskClassifier, obj=est[1]()), 397 ) 398 for est in all_estimators() 399 if ( 400 issubclass(est[1], RegressorMixin) 401 and (est[0] in self.estimators) 402 ) 403 ] 404 ) 405 406 if self.preprocess is True: 407 for name, model in tqdm(self.classifiers): # do parallel exec 408 other_args = ( 409 {} 410 ) # use this trick for `random_state` too --> refactor 411 try: 412 if ( 413 "n_jobs" in model().get_params().keys() 414 and name.find("LogisticRegression") == -1 415 ): 416 other_args["n_jobs"] = self.n_jobs 417 except Exception: 418 pass 419 420 start = time.time() 421 422 try: 423 if "random_state" in model().get_params().keys(): 424 layer_clf = CustomClassifier( 425 obj=model(random_state=self.random_state), 426 n_hidden_features=self.n_hidden_features, 427 activation_name=self.activation_name, 428 a=self.a, 429 nodes_sim=self.nodes_sim, 430 bias=self.bias, 431 dropout=self.dropout, 432 direct_link=self.direct_link, 433 n_clusters=self.n_clusters, 434 cluster_encode=self.cluster_encode, 435 type_clust=self.type_clust, 436 type_scaling=self.type_scaling, 437 col_sample=self.col_sample, 438 row_sample=self.row_sample, 439 seed=self.seed, 440 backend=self.backend, 441 cv_calibration=None, 442 ) 443 444 else: 445 layer_clf = CustomClassifier( 446 obj=model(), 447 n_hidden_features=self.n_hidden_features, 448 activation_name=self.activation_name, 449 a=self.a, 450 nodes_sim=self.nodes_sim, 451 bias=self.bias, 452 dropout=self.dropout, 453 direct_link=self.direct_link, 454 n_clusters=self.n_clusters, 455 cluster_encode=self.cluster_encode, 456 type_clust=self.type_clust, 457 type_scaling=self.type_scaling, 458 col_sample=self.col_sample, 459 row_sample=self.row_sample, 460 seed=self.seed, 461 backend=self.backend, 462 cv_calibration=None, 463 ) 464 465 layer_clf.fit(X_train, y_train) 466 467 for _ in range(self.n_layers): 468 layer_clf = deepcopy( 469 CustomClassifier( 470 obj=layer_clf, 471 n_hidden_features=self.n_hidden_features, 472 activation_name=self.activation_name, 473 a=self.a, 474 nodes_sim=self.nodes_sim, 475 bias=self.bias, 476 dropout=self.dropout, 477 direct_link=self.direct_link, 478 n_clusters=self.n_clusters, 479 cluster_encode=self.cluster_encode, 480 type_clust=self.type_clust, 481 type_scaling=self.type_scaling, 482 col_sample=self.col_sample, 483 row_sample=self.row_sample, 484 seed=self.seed, 485 backend=self.backend, 486 cv_calibration=None, 487 ) 488 ) 489 490 pipe = Pipeline( 491 [ 492 ("preprocessor", preprocessor), 493 ("classifier", layer_clf), 494 ] 495 ) 496 497 pipe.fit(X_train, y_train) 498 self.models_[name] = pipe 499 y_pred = pipe.predict(X_test) 500 accuracy = accuracy_score(y_test, y_pred, normalize=True) 501 b_accuracy = balanced_accuracy_score(y_test, y_pred) 502 f1 = f1_score(y_test, y_pred, average="weighted") 503 try: 504 roc_auc = roc_auc_score(y_test, y_pred) 505 except Exception as exception: 506 roc_auc = None 507 if self.ignore_warnings is False: 508 print("ROC AUC couldn't be calculated for " + name) 509 print(exception) 510 names.append(name) 511 Accuracy.append(accuracy) 512 B_Accuracy.append(b_accuracy) 513 ROC_AUC.append(roc_auc) 514 F1.append(f1) 515 TIME.append(time.time() - start) 516 if self.custom_metric is not None: 517 custom_metric = self.custom_metric(y_test, y_pred) 518 CUSTOM_METRIC.append(custom_metric) 519 if self.verbose > 0: 520 if self.custom_metric is not None: 521 print( 522 { 523 "Model": name, 524 "Accuracy": accuracy, 525 "Balanced Accuracy": b_accuracy, 526 "ROC AUC": roc_auc, 527 "F1 Score": f1, 528 self.custom_metric.__name__: custom_metric, 529 "Time taken": time.time() - start, 530 } 531 ) 532 else: 533 print( 534 { 535 "Model": name, 536 "Accuracy": accuracy, 537 "Balanced Accuracy": b_accuracy, 538 "ROC AUC": roc_auc, 539 "F1 Score": f1, 540 "Time taken": time.time() - start, 541 } 542 ) 543 if self.predictions: 544 predictions[name] = y_pred 545 except Exception as exception: 546 if self.ignore_warnings is False: 547 print(name + " model failed to execute") 548 print(exception) 549 550 else: # no preprocessing 551 for name, model in tqdm(self.classifiers): # do parallel exec 552 start = time.time() 553 try: 554 if "random_state" in model().get_params().keys(): 555 layer_clf = CustomClassifier( 556 obj=model(random_state=self.random_state), 557 n_hidden_features=self.n_hidden_features, 558 activation_name=self.activation_name, 559 a=self.a, 560 nodes_sim=self.nodes_sim, 561 bias=self.bias, 562 dropout=self.dropout, 563 direct_link=self.direct_link, 564 n_clusters=self.n_clusters, 565 cluster_encode=self.cluster_encode, 566 type_clust=self.type_clust, 567 type_scaling=self.type_scaling, 568 col_sample=self.col_sample, 569 row_sample=self.row_sample, 570 seed=self.seed, 571 backend=self.backend, 572 cv_calibration=None, 573 ) 574 575 else: 576 layer_clf = CustomClassifier( 577 obj=model(), 578 n_hidden_features=self.n_hidden_features, 579 activation_name=self.activation_name, 580 a=self.a, 581 nodes_sim=self.nodes_sim, 582 bias=self.bias, 583 dropout=self.dropout, 584 direct_link=self.direct_link, 585 n_clusters=self.n_clusters, 586 cluster_encode=self.cluster_encode, 587 type_clust=self.type_clust, 588 type_scaling=self.type_scaling, 589 col_sample=self.col_sample, 590 row_sample=self.row_sample, 591 seed=self.seed, 592 backend=self.backend, 593 cv_calibration=None, 594 ) 595 596 layer_clf.fit(X_train, y_train) 597 598 for _ in range(self.n_layers): 599 layer_clf = deepcopy( 600 CustomClassifier( 601 obj=layer_clf, 602 n_hidden_features=self.n_hidden_features, 603 activation_name=self.activation_name, 604 a=self.a, 605 nodes_sim=self.nodes_sim, 606 bias=self.bias, 607 dropout=self.dropout, 608 direct_link=self.direct_link, 609 n_clusters=self.n_clusters, 610 cluster_encode=self.cluster_encode, 611 type_clust=self.type_clust, 612 type_scaling=self.type_scaling, 613 col_sample=self.col_sample, 614 row_sample=self.row_sample, 615 seed=self.seed, 616 backend=self.backend, 617 cv_calibration=None, 618 ) 619 ) 620 621 # layer_clf.fit(X_train, y_train) 622 623 layer_clf.fit(X_train, y_train) 624 625 self.models_[name] = layer_clf 626 y_pred = layer_clf.predict(X_test) 627 accuracy = accuracy_score(y_test, y_pred, normalize=True) 628 b_accuracy = balanced_accuracy_score(y_test, y_pred) 629 f1 = f1_score(y_test, y_pred, average="weighted") 630 try: 631 roc_auc = roc_auc_score(y_test, y_pred) 632 except Exception as exception: 633 roc_auc = None 634 if self.ignore_warnings is False: 635 print("ROC AUC couldn't be calculated for " + name) 636 print(exception) 637 names.append(name) 638 Accuracy.append(accuracy) 639 B_Accuracy.append(b_accuracy) 640 ROC_AUC.append(roc_auc) 641 F1.append(f1) 642 TIME.append(time.time() - start) 643 if self.custom_metric is not None: 644 custom_metric = self.custom_metric(y_test, y_pred) 645 CUSTOM_METRIC.append(custom_metric) 646 if self.verbose > 0: 647 if self.custom_metric is not None: 648 print( 649 { 650 "Model": name, 651 "Accuracy": accuracy, 652 "Balanced Accuracy": b_accuracy, 653 "ROC AUC": roc_auc, 654 "F1 Score": f1, 655 self.custom_metric.__name__: custom_metric, 656 "Time taken": time.time() - start, 657 } 658 ) 659 else: 660 print( 661 { 662 "Model": name, 663 "Accuracy": accuracy, 664 "Balanced Accuracy": b_accuracy, 665 "ROC AUC": roc_auc, 666 "F1 Score": f1, 667 "Time taken": time.time() - start, 668 } 669 ) 670 if self.predictions: 671 predictions[name] = y_pred 672 except Exception as exception: 673 if self.ignore_warnings is False: 674 print(name + " model failed to execute") 675 print(exception) 676 677 if self.custom_metric is None: 678 scores = pd.DataFrame( 679 { 680 "Model": names, 681 "Accuracy": Accuracy, 682 "Balanced Accuracy": B_Accuracy, 683 "ROC AUC": ROC_AUC, 684 "F1 Score": F1, 685 "Time Taken": TIME, 686 } 687 ) 688 else: 689 scores = pd.DataFrame( 690 { 691 "Model": names, 692 "Accuracy": Accuracy, 693 "Balanced Accuracy": B_Accuracy, 694 "ROC AUC": ROC_AUC, 695 "F1 Score": F1, 696 "Custom metric": CUSTOM_METRIC, 697 "Time Taken": TIME, 698 } 699 ) 700 scores = scores.sort_values(by=self.sort_by, ascending=False).set_index( 701 "Model" 702 ) 703 704 self.best_model_ = self.models_[scores.index[0]] 705 706 if self.predictions is True: 707 return scores, predictions 708 709 return scores 710 711 def get_best_model(self): 712 """ 713 This function returns the best model pipeline based on the sort_by metric. 714 715 Returns: 716 717 best_model: object, 718 Returns the best model pipeline based on the sort_by metric. 719 720 """ 721 return self.best_model_ 722 723 def provide_models(self, X_train, X_test, y_train, y_test): 724 """Returns all the model objects trained. If fit hasn't been called yet, 725 then it's called to return the models. 726 727 Parameters: 728 729 X_train: array-like, 730 Training vectors, where rows is the number of samples 731 and columns is the number of features. 732 733 X_test: array-like, 734 Testing vectors, where rows is the number of samples 735 and columns is the number of features. 736 737 y_train: array-like, 738 Training vectors, where rows is the number of samples 739 and columns is the number of features. 740 741 y_test: array-like, 742 Testing vectors, where rows is the number of samples 743 and columns is the number of features. 744 745 Returns: 746 747 models: dict-object, 748 Returns a dictionary with each model's pipeline as value 749 and key = name of the model. 750 """ 751 if len(self.models_.keys()) == 0: 752 self.fit(X_train, X_test, y_train, y_test) 753 754 return self.models_
Fitting -- almost -- all the classification algorithms with layers of nnetsauce's CustomClassifier and returning their scores.
Parameters:
verbose: int, optional (default=0)
Any positive number for verbosity.
ignore_warnings: bool, optional (default=True)
When set to True, the warning related to algorigms that are not
able to run are ignored.
custom_metric: function, optional (default=None)
When function is provided, models are evaluated based on the custom
evaluation metric provided.
predictions: bool, optional (default=False)
When set to True, the predictions of all the models models are
returned as data frame.
sort_by: string, optional (default='Accuracy')
Sort models by a metric. Available options are 'Accuracy',
'Balanced Accuracy', 'ROC AUC', 'F1 Score' or a custom metric
identified by its name and provided by custom_metric.
random_state: int, optional (default=42)
Reproducibiility seed.
estimators: list, optional (default='all')
list of Estimators names or just 'all' for > 90 classifiers
(default='all')
preprocess: bool, preprocessing is done when set to True
n_jobs: int, when possible, run in parallel
For now, only used by individual models that support it.
n_layers: int, optional (default=3)
Number of layers of CustomClassifiers to be used.
All the other parameters are the same as CustomClassifier's.
Attributes:
models_: dict-object
Returns a dictionary with each model pipeline as value
with key as name of models.
best_model_: object
Returns the best model pipeline.
Examples
import nnetsauce as ns
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
data = load_breast_cancer()
X = data.data
y= data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2,
random_state=123)
clf = ns.LazyDeepClassifier(verbose=0, ignore_warnings=True, custom_metric=None)
models, predictions = clf.fit(X_train, X_test, y_train, y_test)
model_dictionary = clf.provide_models(X_train,X_test,y_train,y_test)
print(models)
229 def fit(self, X_train, X_test, y_train, y_test): 230 """Fit classifiers to X_train and y_train, predict and score on X_test, 231 y_test. 232 233 Parameters: 234 235 X_train: array-like, 236 Training vectors, where rows is the number of samples 237 and columns is the number of features. 238 239 X_test: array-like, 240 Testing vectors, where rows is the number of samples 241 and columns is the number of features. 242 243 y_train: array-like, 244 Training vectors, where rows is the number of samples 245 and columns is the number of features. 246 247 y_test: array-like, 248 Testing vectors, where rows is the number of samples 249 and columns is the number of features. 250 251 Returns: 252 253 scores: Pandas DataFrame 254 Returns metrics of all the models in a Pandas DataFrame. 255 256 predictions: Pandas DataFrame 257 Returns predictions of all the models in a Pandas DataFrame. 258 """ 259 Accuracy = [] 260 B_Accuracy = [] 261 ROC_AUC = [] 262 F1 = [] 263 names = [] 264 TIME = [] 265 predictions = {} 266 267 if self.custom_metric is not None: 268 CUSTOM_METRIC = [] 269 270 if isinstance(X_train, np.ndarray): 271 X_train = pd.DataFrame(X_train) 272 X_test = pd.DataFrame(X_test) 273 274 numeric_features = X_train.select_dtypes(include=[np.number]).columns 275 categorical_features = X_train.select_dtypes(include=["object"]).columns 276 277 categorical_low, categorical_high = get_card_split( 278 X_train, categorical_features 279 ) 280 281 if self.preprocess is True: 282 preprocessor = ColumnTransformer( 283 transformers=[ 284 ("numeric", numeric_transformer, numeric_features), 285 ( 286 "categorical_low", 287 categorical_transformer_low, 288 categorical_low, 289 ), 290 ( 291 "categorical_high", 292 categorical_transformer_high, 293 categorical_high, 294 ), 295 ] 296 ) 297 298 # baseline models 299 try: 300 baseline_names = ["RandomForestClassifier", "XGBClassifier"] 301 baseline_models = [RandomForestClassifier(), xgb.XGBClassifier()] 302 except Exception as exception: 303 baseline_names = ["RandomForestClassifier"] 304 baseline_models = [RandomForestClassifier()] 305 306 for name, model in zip(baseline_names, baseline_models): 307 start = time.time() 308 try: 309 model.fit(X_train, y_train) 310 self.models_[name] = model 311 y_pred = model.predict(X_test) 312 accuracy = accuracy_score(y_test, y_pred, normalize=True) 313 b_accuracy = balanced_accuracy_score(y_test, y_pred) 314 f1 = f1_score(y_test, y_pred, average="weighted") 315 try: 316 roc_auc = roc_auc_score(y_test, y_pred) 317 except Exception as exception: 318 roc_auc = None 319 if self.ignore_warnings is False: 320 print("ROC AUC couldn't be calculated for " + name) 321 print(exception) 322 names.append(name) 323 Accuracy.append(accuracy) 324 B_Accuracy.append(b_accuracy) 325 ROC_AUC.append(roc_auc) 326 F1.append(f1) 327 TIME.append(time.time() - start) 328 if self.custom_metric is not None: 329 custom_metric = self.custom_metric(y_test, y_pred) 330 CUSTOM_METRIC.append(custom_metric) 331 if self.verbose > 0: 332 if self.custom_metric is not None: 333 print( 334 { 335 "Model": name, 336 "Accuracy": accuracy, 337 "Balanced Accuracy": b_accuracy, 338 "ROC AUC": roc_auc, 339 "F1 Score": f1, 340 self.custom_metric.__name__: custom_metric, 341 "Time taken": time.time() - start, 342 } 343 ) 344 else: 345 print( 346 { 347 "Model": name, 348 "Accuracy": accuracy, 349 "Balanced Accuracy": b_accuracy, 350 "ROC AUC": roc_auc, 351 "F1 Score": f1, 352 "Time taken": time.time() - start, 353 } 354 ) 355 if self.predictions: 356 predictions[name] = y_pred 357 except Exception as exception: 358 if self.ignore_warnings is False: 359 print(name + " model failed to execute") 360 print(exception) 361 362 if self.estimators == "all": 363 self.classifiers = [ 364 item 365 for sublist in [ 366 DEEPCLASSIFIERS, 367 DEEPMULTITASKCLASSIFIERS, 368 DEEPSIMPLEMULTITASKCLASSIFIERS, 369 ] 370 for item in sublist 371 ] 372 else: 373 self.classifiers = ( 374 [ 375 ("DeepCustomClassifier(" + est[0] + ")", est[1]) 376 for est in all_estimators() 377 if ( 378 issubclass(est[1], ClassifierMixin) 379 and (est[0] in self.estimators) 380 ) 381 ] 382 + [ 383 ( 384 "DeepMultitaskClassifier(" + est[0] + ")", 385 partial(MultitaskClassifier, obj=est[1]()), 386 ) 387 for est in all_estimators() 388 if ( 389 issubclass(est[1], RegressorMixin) 390 and (est[0] in self.estimators) 391 ) 392 ] 393 + [ 394 ( 395 "DeepSimpleMultitaskClassifier(" + est[0] + ")", 396 partial(SimpleMultitaskClassifier, obj=est[1]()), 397 ) 398 for est in all_estimators() 399 if ( 400 issubclass(est[1], RegressorMixin) 401 and (est[0] in self.estimators) 402 ) 403 ] 404 ) 405 406 if self.preprocess is True: 407 for name, model in tqdm(self.classifiers): # do parallel exec 408 other_args = ( 409 {} 410 ) # use this trick for `random_state` too --> refactor 411 try: 412 if ( 413 "n_jobs" in model().get_params().keys() 414 and name.find("LogisticRegression") == -1 415 ): 416 other_args["n_jobs"] = self.n_jobs 417 except Exception: 418 pass 419 420 start = time.time() 421 422 try: 423 if "random_state" in model().get_params().keys(): 424 layer_clf = CustomClassifier( 425 obj=model(random_state=self.random_state), 426 n_hidden_features=self.n_hidden_features, 427 activation_name=self.activation_name, 428 a=self.a, 429 nodes_sim=self.nodes_sim, 430 bias=self.bias, 431 dropout=self.dropout, 432 direct_link=self.direct_link, 433 n_clusters=self.n_clusters, 434 cluster_encode=self.cluster_encode, 435 type_clust=self.type_clust, 436 type_scaling=self.type_scaling, 437 col_sample=self.col_sample, 438 row_sample=self.row_sample, 439 seed=self.seed, 440 backend=self.backend, 441 cv_calibration=None, 442 ) 443 444 else: 445 layer_clf = CustomClassifier( 446 obj=model(), 447 n_hidden_features=self.n_hidden_features, 448 activation_name=self.activation_name, 449 a=self.a, 450 nodes_sim=self.nodes_sim, 451 bias=self.bias, 452 dropout=self.dropout, 453 direct_link=self.direct_link, 454 n_clusters=self.n_clusters, 455 cluster_encode=self.cluster_encode, 456 type_clust=self.type_clust, 457 type_scaling=self.type_scaling, 458 col_sample=self.col_sample, 459 row_sample=self.row_sample, 460 seed=self.seed, 461 backend=self.backend, 462 cv_calibration=None, 463 ) 464 465 layer_clf.fit(X_train, y_train) 466 467 for _ in range(self.n_layers): 468 layer_clf = deepcopy( 469 CustomClassifier( 470 obj=layer_clf, 471 n_hidden_features=self.n_hidden_features, 472 activation_name=self.activation_name, 473 a=self.a, 474 nodes_sim=self.nodes_sim, 475 bias=self.bias, 476 dropout=self.dropout, 477 direct_link=self.direct_link, 478 n_clusters=self.n_clusters, 479 cluster_encode=self.cluster_encode, 480 type_clust=self.type_clust, 481 type_scaling=self.type_scaling, 482 col_sample=self.col_sample, 483 row_sample=self.row_sample, 484 seed=self.seed, 485 backend=self.backend, 486 cv_calibration=None, 487 ) 488 ) 489 490 pipe = Pipeline( 491 [ 492 ("preprocessor", preprocessor), 493 ("classifier", layer_clf), 494 ] 495 ) 496 497 pipe.fit(X_train, y_train) 498 self.models_[name] = pipe 499 y_pred = pipe.predict(X_test) 500 accuracy = accuracy_score(y_test, y_pred, normalize=True) 501 b_accuracy = balanced_accuracy_score(y_test, y_pred) 502 f1 = f1_score(y_test, y_pred, average="weighted") 503 try: 504 roc_auc = roc_auc_score(y_test, y_pred) 505 except Exception as exception: 506 roc_auc = None 507 if self.ignore_warnings is False: 508 print("ROC AUC couldn't be calculated for " + name) 509 print(exception) 510 names.append(name) 511 Accuracy.append(accuracy) 512 B_Accuracy.append(b_accuracy) 513 ROC_AUC.append(roc_auc) 514 F1.append(f1) 515 TIME.append(time.time() - start) 516 if self.custom_metric is not None: 517 custom_metric = self.custom_metric(y_test, y_pred) 518 CUSTOM_METRIC.append(custom_metric) 519 if self.verbose > 0: 520 if self.custom_metric is not None: 521 print( 522 { 523 "Model": name, 524 "Accuracy": accuracy, 525 "Balanced Accuracy": b_accuracy, 526 "ROC AUC": roc_auc, 527 "F1 Score": f1, 528 self.custom_metric.__name__: custom_metric, 529 "Time taken": time.time() - start, 530 } 531 ) 532 else: 533 print( 534 { 535 "Model": name, 536 "Accuracy": accuracy, 537 "Balanced Accuracy": b_accuracy, 538 "ROC AUC": roc_auc, 539 "F1 Score": f1, 540 "Time taken": time.time() - start, 541 } 542 ) 543 if self.predictions: 544 predictions[name] = y_pred 545 except Exception as exception: 546 if self.ignore_warnings is False: 547 print(name + " model failed to execute") 548 print(exception) 549 550 else: # no preprocessing 551 for name, model in tqdm(self.classifiers): # do parallel exec 552 start = time.time() 553 try: 554 if "random_state" in model().get_params().keys(): 555 layer_clf = CustomClassifier( 556 obj=model(random_state=self.random_state), 557 n_hidden_features=self.n_hidden_features, 558 activation_name=self.activation_name, 559 a=self.a, 560 nodes_sim=self.nodes_sim, 561 bias=self.bias, 562 dropout=self.dropout, 563 direct_link=self.direct_link, 564 n_clusters=self.n_clusters, 565 cluster_encode=self.cluster_encode, 566 type_clust=self.type_clust, 567 type_scaling=self.type_scaling, 568 col_sample=self.col_sample, 569 row_sample=self.row_sample, 570 seed=self.seed, 571 backend=self.backend, 572 cv_calibration=None, 573 ) 574 575 else: 576 layer_clf = CustomClassifier( 577 obj=model(), 578 n_hidden_features=self.n_hidden_features, 579 activation_name=self.activation_name, 580 a=self.a, 581 nodes_sim=self.nodes_sim, 582 bias=self.bias, 583 dropout=self.dropout, 584 direct_link=self.direct_link, 585 n_clusters=self.n_clusters, 586 cluster_encode=self.cluster_encode, 587 type_clust=self.type_clust, 588 type_scaling=self.type_scaling, 589 col_sample=self.col_sample, 590 row_sample=self.row_sample, 591 seed=self.seed, 592 backend=self.backend, 593 cv_calibration=None, 594 ) 595 596 layer_clf.fit(X_train, y_train) 597 598 for _ in range(self.n_layers): 599 layer_clf = deepcopy( 600 CustomClassifier( 601 obj=layer_clf, 602 n_hidden_features=self.n_hidden_features, 603 activation_name=self.activation_name, 604 a=self.a, 605 nodes_sim=self.nodes_sim, 606 bias=self.bias, 607 dropout=self.dropout, 608 direct_link=self.direct_link, 609 n_clusters=self.n_clusters, 610 cluster_encode=self.cluster_encode, 611 type_clust=self.type_clust, 612 type_scaling=self.type_scaling, 613 col_sample=self.col_sample, 614 row_sample=self.row_sample, 615 seed=self.seed, 616 backend=self.backend, 617 cv_calibration=None, 618 ) 619 ) 620 621 # layer_clf.fit(X_train, y_train) 622 623 layer_clf.fit(X_train, y_train) 624 625 self.models_[name] = layer_clf 626 y_pred = layer_clf.predict(X_test) 627 accuracy = accuracy_score(y_test, y_pred, normalize=True) 628 b_accuracy = balanced_accuracy_score(y_test, y_pred) 629 f1 = f1_score(y_test, y_pred, average="weighted") 630 try: 631 roc_auc = roc_auc_score(y_test, y_pred) 632 except Exception as exception: 633 roc_auc = None 634 if self.ignore_warnings is False: 635 print("ROC AUC couldn't be calculated for " + name) 636 print(exception) 637 names.append(name) 638 Accuracy.append(accuracy) 639 B_Accuracy.append(b_accuracy) 640 ROC_AUC.append(roc_auc) 641 F1.append(f1) 642 TIME.append(time.time() - start) 643 if self.custom_metric is not None: 644 custom_metric = self.custom_metric(y_test, y_pred) 645 CUSTOM_METRIC.append(custom_metric) 646 if self.verbose > 0: 647 if self.custom_metric is not None: 648 print( 649 { 650 "Model": name, 651 "Accuracy": accuracy, 652 "Balanced Accuracy": b_accuracy, 653 "ROC AUC": roc_auc, 654 "F1 Score": f1, 655 self.custom_metric.__name__: custom_metric, 656 "Time taken": time.time() - start, 657 } 658 ) 659 else: 660 print( 661 { 662 "Model": name, 663 "Accuracy": accuracy, 664 "Balanced Accuracy": b_accuracy, 665 "ROC AUC": roc_auc, 666 "F1 Score": f1, 667 "Time taken": time.time() - start, 668 } 669 ) 670 if self.predictions: 671 predictions[name] = y_pred 672 except Exception as exception: 673 if self.ignore_warnings is False: 674 print(name + " model failed to execute") 675 print(exception) 676 677 if self.custom_metric is None: 678 scores = pd.DataFrame( 679 { 680 "Model": names, 681 "Accuracy": Accuracy, 682 "Balanced Accuracy": B_Accuracy, 683 "ROC AUC": ROC_AUC, 684 "F1 Score": F1, 685 "Time Taken": TIME, 686 } 687 ) 688 else: 689 scores = pd.DataFrame( 690 { 691 "Model": names, 692 "Accuracy": Accuracy, 693 "Balanced Accuracy": B_Accuracy, 694 "ROC AUC": ROC_AUC, 695 "F1 Score": F1, 696 "Custom metric": CUSTOM_METRIC, 697 "Time Taken": TIME, 698 } 699 ) 700 scores = scores.sort_values(by=self.sort_by, ascending=False).set_index( 701 "Model" 702 ) 703 704 self.best_model_ = self.models_[scores.index[0]] 705 706 if self.predictions is True: 707 return scores, predictions 708 709 return scores
Fit classifiers to X_train and y_train, predict and score on X_test, y_test.
Parameters:
X_train: array-like,
Training vectors, where rows is the number of samples
and columns is the number of features.
X_test: array-like,
Testing vectors, where rows is the number of samples
and columns is the number of features.
y_train: array-like,
Training vectors, where rows is the number of samples
and columns is the number of features.
y_test: array-like,
Testing vectors, where rows is the number of samples
and columns is the number of features.
Returns:
scores: Pandas DataFrame
Returns metrics of all the models in a Pandas DataFrame.
predictions: Pandas DataFrame
Returns predictions of all the models in a Pandas DataFrame.
723 def provide_models(self, X_train, X_test, y_train, y_test): 724 """Returns all the model objects trained. If fit hasn't been called yet, 725 then it's called to return the models. 726 727 Parameters: 728 729 X_train: array-like, 730 Training vectors, where rows is the number of samples 731 and columns is the number of features. 732 733 X_test: array-like, 734 Testing vectors, where rows is the number of samples 735 and columns is the number of features. 736 737 y_train: array-like, 738 Training vectors, where rows is the number of samples 739 and columns is the number of features. 740 741 y_test: array-like, 742 Testing vectors, where rows is the number of samples 743 and columns is the number of features. 744 745 Returns: 746 747 models: dict-object, 748 Returns a dictionary with each model's pipeline as value 749 and key = name of the model. 750 """ 751 if len(self.models_.keys()) == 0: 752 self.fit(X_train, X_test, y_train, y_test) 753 754 return self.models_
Returns all the model objects trained. If fit hasn't been called yet, then it's called to return the models.
Parameters:
X_train: array-like, Training vectors, where rows is the number of samples and columns is the number of features.
X_test: array-like, Testing vectors, where rows is the number of samples and columns is the number of features.
y_train: array-like, Training vectors, where rows is the number of samples and columns is the number of features.
y_test: array-like, Testing vectors, where rows is the number of samples and columns is the number of features.
Returns:
models: dict-object,
Returns a dictionary with each model's pipeline as value
and key = name of the model.
90class LazyDeepRegressor(Custom, RegressorMixin): 91 """ 92 Fitting -- almost -- all the regression algorithms with layers of 93 nnetsauce's CustomRegressor and returning their scores. 94 95 Parameters: 96 97 verbose: int, optional (default=0) 98 Any positive number for verbosity. 99 100 ignore_warnings: bool, optional (default=True) 101 When set to True, the warning related to algorigms that are not able to run are ignored. 102 103 custom_metric: function, optional (default=None) 104 When function is provided, models are evaluated based on the custom evaluation metric provided. 105 106 predictions: bool, optional (default=False) 107 When set to True, the predictions of all the models models are returned as dataframe. 108 109 sort_by: string, optional (default='RMSE') 110 Sort models by a metric. Available options are 'R-Squared', 'Adjusted R-Squared', 'RMSE', 'Time Taken' and 'Custom Metric'. 111 or a custom metric identified by its name and provided by custom_metric. 112 113 random_state: int, optional (default=42) 114 Reproducibiility seed. 115 116 estimators: list, optional (default='all') 117 list of Estimators names or just 'all' (default='all') 118 119 preprocess: bool 120 preprocessing is done when set to True 121 122 n_jobs : int, when possible, run in parallel 123 For now, only used by individual models that support it. 124 125 n_layers: int, optional (default=3) 126 Number of layers of CustomRegressors to be used. 127 128 All the other parameters are the same as CustomRegressor's. 129 130 Attributes: 131 132 models_: dict-object 133 Returns a dictionary with each model pipeline as value 134 with key as name of models. 135 136 best_model_: object 137 Returns the best model pipeline based on the sort_by metric. 138 139 Examples: 140 141 import nnetsauce as ns 142 import numpy as np 143 from sklearn import datasets 144 from sklearn.utils import shuffle 145 146 diabetes = datasets.load_diabetes() 147 X, y = shuffle(diabetes.data, diabetes.target, random_state=13) 148 X = X.astype(np.float32) 149 150 offset = int(X.shape[0] * 0.9) 151 X_train, y_train = X[:offset], y[:offset] 152 X_test, y_test = X[offset:], y[offset:] 153 154 reg = ns.LazyDeepRegressor(verbose=0, ignore_warnings=False, custom_metric=None) 155 models, predictions = reg.fit(X_train, X_test, y_train, y_test) 156 print(models) 157 158 """ 159 160 def __init__( 161 self, 162 verbose=0, 163 ignore_warnings=True, 164 custom_metric=None, 165 predictions=False, 166 sort_by="RMSE", 167 random_state=42, 168 estimators="all", 169 preprocess=False, 170 n_jobs=None, 171 # Defining depth 172 n_layers=3, 173 # CustomRegressor attributes 174 obj=None, 175 n_hidden_features=5, 176 activation_name="relu", 177 a=0.01, 178 nodes_sim="sobol", 179 bias=True, 180 dropout=0, 181 direct_link=True, 182 n_clusters=2, 183 cluster_encode=True, 184 type_clust="kmeans", 185 type_scaling=("std", "std", "std"), 186 col_sample=1, 187 row_sample=1, 188 seed=123, 189 backend="cpu", 190 ): 191 self.verbose = verbose 192 self.ignore_warnings = ignore_warnings 193 self.custom_metric = custom_metric 194 self.predictions = predictions 195 self.sort_by = sort_by 196 self.models_ = {} 197 self.best_model_ = None 198 self.random_state = random_state 199 self.estimators = estimators 200 self.preprocess = preprocess 201 self.n_layers = n_layers - 1 202 self.n_jobs = n_jobs 203 super().__init__( 204 obj=obj, 205 n_hidden_features=n_hidden_features, 206 activation_name=activation_name, 207 a=a, 208 nodes_sim=nodes_sim, 209 bias=bias, 210 dropout=dropout, 211 direct_link=direct_link, 212 n_clusters=n_clusters, 213 cluster_encode=cluster_encode, 214 type_clust=type_clust, 215 type_scaling=type_scaling, 216 col_sample=col_sample, 217 row_sample=row_sample, 218 seed=seed, 219 backend=backend, 220 ) 221 222 def fit(self, X_train, X_test, y_train, y_test): 223 """Fit Regression algorithms to X_train and y_train, predict and score on X_test, y_test. 224 225 Parameters: 226 227 X_train : array-like, 228 Training vectors, where rows is the number of samples 229 and columns is the number of features. 230 231 X_test : array-like, 232 Testing vectors, where rows is the number of samples 233 and columns is the number of features. 234 235 y_train : array-like, 236 Training vectors, where rows is the number of samples 237 and columns is the number of features. 238 239 y_test : array-like, 240 Testing vectors, where rows is the number of samples 241 and columns is the number of features. 242 243 Returns: 244 ------- 245 scores: Pandas DataFrame 246 Returns metrics of all the models in a Pandas DataFrame. 247 248 predictions : Pandas DataFrame 249 Returns predictions of all the models in a Pandas DataFrame. 250 251 """ 252 R2 = [] 253 ADJR2 = [] 254 RMSE = [] 255 # WIN = [] 256 names = [] 257 TIME = [] 258 predictions = {} 259 260 if self.custom_metric: 261 CUSTOM_METRIC = [] 262 263 if isinstance(X_train, np.ndarray): 264 X_train = pd.DataFrame(X_train) 265 X_test = pd.DataFrame(X_test) 266 267 numeric_features = X_train.select_dtypes(include=[np.number]).columns 268 categorical_features = X_train.select_dtypes(include=["object"]).columns 269 270 categorical_low, categorical_high = get_card_split( 271 X_train, categorical_features 272 ) 273 274 if self.preprocess is True: 275 preprocessor = ColumnTransformer( 276 transformers=[ 277 ("numeric", numeric_transformer, numeric_features), 278 ( 279 "categorical_low", 280 categorical_transformer_low, 281 categorical_low, 282 ), 283 ( 284 "categorical_high", 285 categorical_transformer_high, 286 categorical_high, 287 ), 288 ] 289 ) 290 291 # base models 292 try: 293 baseline_names = ["RandomForestRegressor", "XGBRegressor"] 294 baseline_models = [RandomForestRegressor(), xgb.XGBRegressor()] 295 except Exception as exception: 296 baseline_names = ["RandomForestRegressor"] 297 baseline_models = [RandomForestRegressor()] 298 299 for name, model in zip(baseline_names, baseline_models): 300 start = time.time() 301 try: 302 model.fit(X_train, y_train) 303 self.models_[name] = model 304 y_pred = model.predict(X_test) 305 r_squared = r2_score(y_test, y_pred) 306 adj_rsquared = adjusted_rsquared( 307 r_squared, X_test.shape[0], X_test.shape[1] 308 ) 309 rmse = np.sqrt(np.mean((y_test - y_pred) ** 2)) 310 311 names.append(name) 312 R2.append(r_squared) 313 ADJR2.append(adj_rsquared) 314 RMSE.append(rmse) 315 TIME.append(time.time() - start) 316 317 if self.custom_metric: 318 custom_metric = self.custom_metric(y_test, y_pred) 319 CUSTOM_METRIC.append(custom_metric) 320 321 if self.verbose > 0: 322 scores_verbose = { 323 "Model": name, 324 "R-Squared": r_squared, 325 "Adjusted R-Squared": adj_rsquared, 326 "RMSE": rmse, 327 "Time taken": time.time() - start, 328 } 329 330 if self.custom_metric: 331 scores_verbose[self.custom_metric.__name__] = ( 332 custom_metric 333 ) 334 335 print(scores_verbose) 336 if self.predictions: 337 predictions[name] = y_pred 338 except Exception as exception: 339 if self.ignore_warnings is False: 340 print(name + " model failed to execute") 341 print(exception) 342 343 if self.estimators == "all": 344 self.regressors = DEEPREGRESSORS 345 else: 346 self.regressors = [ 347 ("DeepCustomRegressor(" + est[0] + ")", est[1]) 348 for est in all_estimators() 349 if ( 350 issubclass(est[1], RegressorMixin) 351 and (est[0] in self.estimators) 352 ) 353 ] 354 355 if self.preprocess is True: 356 for name, model in tqdm(self.regressors): # do parallel exec 357 start = time.time() 358 try: 359 if "random_state" in model().get_params().keys(): 360 layer_regr = CustomRegressor( 361 obj=model(random_state=self.random_state), 362 n_hidden_features=self.n_hidden_features, 363 activation_name=self.activation_name, 364 a=self.a, 365 nodes_sim=self.nodes_sim, 366 bias=self.bias, 367 dropout=self.dropout, 368 direct_link=self.direct_link, 369 n_clusters=self.n_clusters, 370 cluster_encode=self.cluster_encode, 371 type_clust=self.type_clust, 372 type_scaling=self.type_scaling, 373 col_sample=self.col_sample, 374 row_sample=self.row_sample, 375 seed=self.seed, 376 backend=self.backend, 377 ) 378 else: 379 layer_regr = CustomRegressor( 380 obj=model(), 381 n_hidden_features=self.n_hidden_features, 382 activation_name=self.activation_name, 383 a=self.a, 384 nodes_sim=self.nodes_sim, 385 bias=self.bias, 386 dropout=self.dropout, 387 direct_link=self.direct_link, 388 n_clusters=self.n_clusters, 389 cluster_encode=self.cluster_encode, 390 type_clust=self.type_clust, 391 type_scaling=self.type_scaling, 392 col_sample=self.col_sample, 393 row_sample=self.row_sample, 394 seed=self.seed, 395 backend=self.backend, 396 ) 397 398 for _ in range(self.n_layers): 399 layer_regr = deepcopy( 400 CustomRegressor( 401 obj=layer_regr, 402 n_hidden_features=self.n_hidden_features, 403 activation_name=self.activation_name, 404 a=self.a, 405 nodes_sim=self.nodes_sim, 406 bias=self.bias, 407 dropout=self.dropout, 408 direct_link=self.direct_link, 409 n_clusters=self.n_clusters, 410 cluster_encode=self.cluster_encode, 411 type_clust=self.type_clust, 412 type_scaling=self.type_scaling, 413 col_sample=self.col_sample, 414 row_sample=self.row_sample, 415 seed=self.seed, 416 backend=self.backend, 417 ) 418 ) 419 420 layer_regr.fit(X_train, y_train) 421 422 pipe = Pipeline( 423 steps=[ 424 ("preprocessor", preprocessor), 425 ("regressor", layer_regr), 426 ] 427 ) 428 429 pipe.fit(X_train, y_train) 430 431 self.models_[name] = pipe 432 y_pred = pipe.predict(X_test) 433 r_squared = r2_score(y_test, y_pred) 434 adj_rsquared = adjusted_rsquared( 435 r_squared, X_test.shape[0], X_test.shape[1] 436 ) 437 rmse = np.sqrt(np.mean((y_test - y_pred) ** 2)) 438 439 names.append(name) 440 R2.append(r_squared) 441 ADJR2.append(adj_rsquared) 442 RMSE.append(rmse) 443 TIME.append(time.time() - start) 444 445 if self.custom_metric: 446 custom_metric = self.custom_metric(y_test, y_pred) 447 CUSTOM_METRIC.append(custom_metric) 448 449 if self.verbose > 0: 450 scores_verbose = { 451 "Model": name, 452 "R-Squared": r_squared, 453 "Adjusted R-Squared": adj_rsquared, 454 "RMSE": rmse, 455 "Time taken": time.time() - start, 456 } 457 458 if self.custom_metric: 459 scores_verbose[self.custom_metric.__name__] = ( 460 custom_metric 461 ) 462 463 print(scores_verbose) 464 if self.predictions: 465 predictions[name] = y_pred 466 except Exception as exception: 467 if self.ignore_warnings is False: 468 print(name + " model failed to execute") 469 print(exception) 470 471 else: # no preprocessing 472 for name, model in tqdm(self.regressors): # do parallel exec 473 start = time.time() 474 try: 475 if "random_state" in model().get_params().keys(): 476 layer_regr = CustomRegressor( 477 obj=model(random_state=self.random_state), 478 n_hidden_features=self.n_hidden_features, 479 activation_name=self.activation_name, 480 a=self.a, 481 nodes_sim=self.nodes_sim, 482 bias=self.bias, 483 dropout=self.dropout, 484 direct_link=self.direct_link, 485 n_clusters=self.n_clusters, 486 cluster_encode=self.cluster_encode, 487 type_clust=self.type_clust, 488 type_scaling=self.type_scaling, 489 col_sample=self.col_sample, 490 row_sample=self.row_sample, 491 seed=self.seed, 492 backend=self.backend, 493 ) 494 else: 495 layer_regr = CustomRegressor( 496 obj=model(), 497 n_hidden_features=self.n_hidden_features, 498 activation_name=self.activation_name, 499 a=self.a, 500 nodes_sim=self.nodes_sim, 501 bias=self.bias, 502 dropout=self.dropout, 503 direct_link=self.direct_link, 504 n_clusters=self.n_clusters, 505 cluster_encode=self.cluster_encode, 506 type_clust=self.type_clust, 507 type_scaling=self.type_scaling, 508 col_sample=self.col_sample, 509 row_sample=self.row_sample, 510 seed=self.seed, 511 backend=self.backend, 512 ) 513 514 layer_regr.fit(X_train, y_train) 515 516 for _ in range(self.n_layers): 517 layer_regr = deepcopy( 518 CustomRegressor( 519 obj=layer_regr, 520 n_hidden_features=self.n_hidden_features, 521 activation_name=self.activation_name, 522 a=self.a, 523 nodes_sim=self.nodes_sim, 524 bias=self.bias, 525 dropout=self.dropout, 526 direct_link=self.direct_link, 527 n_clusters=self.n_clusters, 528 cluster_encode=self.cluster_encode, 529 type_clust=self.type_clust, 530 type_scaling=self.type_scaling, 531 col_sample=self.col_sample, 532 row_sample=self.row_sample, 533 seed=self.seed, 534 backend=self.backend, 535 ) 536 ) 537 538 # layer_regr.fit(X_train, y_train) 539 540 layer_regr.fit(X_train, y_train) 541 542 self.models_[name] = layer_regr 543 y_pred = layer_regr.predict(X_test) 544 545 r_squared = r2_score(y_test, y_pred) 546 adj_rsquared = adjusted_rsquared( 547 r_squared, X_test.shape[0], X_test.shape[1] 548 ) 549 rmse = np.sqrt(np.mean((y_test - y_pred) ** 2)) 550 551 names.append(name) 552 R2.append(r_squared) 553 ADJR2.append(adj_rsquared) 554 RMSE.append(rmse) 555 TIME.append(time.time() - start) 556 557 if self.custom_metric: 558 custom_metric = self.custom_metric(y_test, y_pred) 559 CUSTOM_METRIC.append(custom_metric) 560 561 if self.verbose > 0: 562 scores_verbose = { 563 "Model": name, 564 "R-Squared": r_squared, 565 "Adjusted R-Squared": adj_rsquared, 566 "RMSE": rmse, 567 "Time taken": time.time() - start, 568 } 569 570 if self.custom_metric: 571 scores_verbose[self.custom_metric.__name__] = ( 572 custom_metric 573 ) 574 575 print(scores_verbose) 576 if self.predictions: 577 predictions[name] = y_pred 578 except Exception as exception: 579 if self.ignore_warnings is False: 580 print(name + " model failed to execute") 581 print(exception) 582 583 scores = { 584 "Model": names, 585 "Adjusted R-Squared": ADJR2, 586 "R-Squared": R2, 587 "RMSE": RMSE, 588 "Time Taken": TIME, 589 } 590 591 if self.custom_metric: 592 scores["Custom metric"] = CUSTOM_METRIC 593 594 scores = pd.DataFrame(scores) 595 scores = scores.sort_values(by=self.sort_by, ascending=True).set_index( 596 "Model" 597 ) 598 599 self.best_model_ = self.models_[scores.index[0]] 600 601 if self.predictions is True: 602 return scores, predictions 603 604 return scores 605 606 def get_best_model(self): 607 """ 608 This function returns the best model pipeline based on the sort_by metric. 609 610 Returns: 611 612 best_model: object, 613 Returns the best model pipeline based on the sort_by metric. 614 615 """ 616 return self.best_model_ 617 618 def provide_models(self, X_train, X_test, y_train, y_test): 619 """ 620 This function returns all the model objects trained in fit function. 621 If fit is not called already, then we call fit and then return the models. 622 623 Parameters: 624 625 X_train : array-like, 626 Training vectors, where rows is the number of samples 627 and columns is the number of features. 628 629 X_test : array-like, 630 Testing vectors, where rows is the number of samples 631 and columns is the number of features. 632 633 y_train : array-like, 634 Training vectors, where rows is the number of samples 635 and columns is the number of features. 636 637 y_test : array-like, 638 Testing vectors, where rows is the number of samples 639 and columns is the number of features. 640 641 Returns: 642 643 models: dict-object, 644 Returns a dictionary with each model pipeline as value 645 with key as name of models. 646 647 """ 648 if len(self.models_.keys()) == 0: 649 self.fit(X_train, X_test, y_train, y_test) 650 651 return self.models_
Fitting -- almost -- all the regression algorithms with layers of nnetsauce's CustomRegressor and returning their scores.
Parameters:
verbose: int, optional (default=0)
Any positive number for verbosity.
ignore_warnings: bool, optional (default=True)
When set to True, the warning related to algorigms that are not able to run are ignored.
custom_metric: function, optional (default=None)
When function is provided, models are evaluated based on the custom evaluation metric provided.
predictions: bool, optional (default=False)
When set to True, the predictions of all the models models are returned as dataframe.
sort_by: string, optional (default='RMSE')
Sort models by a metric. Available options are 'R-Squared', 'Adjusted R-Squared', 'RMSE', 'Time Taken' and 'Custom Metric'.
or a custom metric identified by its name and provided by custom_metric.
random_state: int, optional (default=42)
Reproducibiility seed.
estimators: list, optional (default='all')
list of Estimators names or just 'all' (default='all')
preprocess: bool
preprocessing is done when set to True
n_jobs : int, when possible, run in parallel
For now, only used by individual models that support it.
n_layers: int, optional (default=3)
Number of layers of CustomRegressors to be used.
All the other parameters are the same as CustomRegressor's.
Attributes:
models_: dict-object
Returns a dictionary with each model pipeline as value
with key as name of models.
best_model_: object
Returns the best model pipeline based on the sort_by metric.
Examples:
import nnetsauce as ns
import numpy as np
from sklearn import datasets
from sklearn.utils import shuffle
diabetes = datasets.load_diabetes()
X, y = shuffle(diabetes.data, diabetes.target, random_state=13)
X = X.astype(np.float32)
offset = int(X.shape[0] * 0.9)
X_train, y_train = X[:offset], y[:offset]
X_test, y_test = X[offset:], y[offset:]
reg = ns.LazyDeepRegressor(verbose=0, ignore_warnings=False, custom_metric=None)
models, predictions = reg.fit(X_train, X_test, y_train, y_test)
print(models)
222 def fit(self, X_train, X_test, y_train, y_test): 223 """Fit Regression algorithms to X_train and y_train, predict and score on X_test, y_test. 224 225 Parameters: 226 227 X_train : array-like, 228 Training vectors, where rows is the number of samples 229 and columns is the number of features. 230 231 X_test : array-like, 232 Testing vectors, where rows is the number of samples 233 and columns is the number of features. 234 235 y_train : array-like, 236 Training vectors, where rows is the number of samples 237 and columns is the number of features. 238 239 y_test : array-like, 240 Testing vectors, where rows is the number of samples 241 and columns is the number of features. 242 243 Returns: 244 ------- 245 scores: Pandas DataFrame 246 Returns metrics of all the models in a Pandas DataFrame. 247 248 predictions : Pandas DataFrame 249 Returns predictions of all the models in a Pandas DataFrame. 250 251 """ 252 R2 = [] 253 ADJR2 = [] 254 RMSE = [] 255 # WIN = [] 256 names = [] 257 TIME = [] 258 predictions = {} 259 260 if self.custom_metric: 261 CUSTOM_METRIC = [] 262 263 if isinstance(X_train, np.ndarray): 264 X_train = pd.DataFrame(X_train) 265 X_test = pd.DataFrame(X_test) 266 267 numeric_features = X_train.select_dtypes(include=[np.number]).columns 268 categorical_features = X_train.select_dtypes(include=["object"]).columns 269 270 categorical_low, categorical_high = get_card_split( 271 X_train, categorical_features 272 ) 273 274 if self.preprocess is True: 275 preprocessor = ColumnTransformer( 276 transformers=[ 277 ("numeric", numeric_transformer, numeric_features), 278 ( 279 "categorical_low", 280 categorical_transformer_low, 281 categorical_low, 282 ), 283 ( 284 "categorical_high", 285 categorical_transformer_high, 286 categorical_high, 287 ), 288 ] 289 ) 290 291 # base models 292 try: 293 baseline_names = ["RandomForestRegressor", "XGBRegressor"] 294 baseline_models = [RandomForestRegressor(), xgb.XGBRegressor()] 295 except Exception as exception: 296 baseline_names = ["RandomForestRegressor"] 297 baseline_models = [RandomForestRegressor()] 298 299 for name, model in zip(baseline_names, baseline_models): 300 start = time.time() 301 try: 302 model.fit(X_train, y_train) 303 self.models_[name] = model 304 y_pred = model.predict(X_test) 305 r_squared = r2_score(y_test, y_pred) 306 adj_rsquared = adjusted_rsquared( 307 r_squared, X_test.shape[0], X_test.shape[1] 308 ) 309 rmse = np.sqrt(np.mean((y_test - y_pred) ** 2)) 310 311 names.append(name) 312 R2.append(r_squared) 313 ADJR2.append(adj_rsquared) 314 RMSE.append(rmse) 315 TIME.append(time.time() - start) 316 317 if self.custom_metric: 318 custom_metric = self.custom_metric(y_test, y_pred) 319 CUSTOM_METRIC.append(custom_metric) 320 321 if self.verbose > 0: 322 scores_verbose = { 323 "Model": name, 324 "R-Squared": r_squared, 325 "Adjusted R-Squared": adj_rsquared, 326 "RMSE": rmse, 327 "Time taken": time.time() - start, 328 } 329 330 if self.custom_metric: 331 scores_verbose[self.custom_metric.__name__] = ( 332 custom_metric 333 ) 334 335 print(scores_verbose) 336 if self.predictions: 337 predictions[name] = y_pred 338 except Exception as exception: 339 if self.ignore_warnings is False: 340 print(name + " model failed to execute") 341 print(exception) 342 343 if self.estimators == "all": 344 self.regressors = DEEPREGRESSORS 345 else: 346 self.regressors = [ 347 ("DeepCustomRegressor(" + est[0] + ")", est[1]) 348 for est in all_estimators() 349 if ( 350 issubclass(est[1], RegressorMixin) 351 and (est[0] in self.estimators) 352 ) 353 ] 354 355 if self.preprocess is True: 356 for name, model in tqdm(self.regressors): # do parallel exec 357 start = time.time() 358 try: 359 if "random_state" in model().get_params().keys(): 360 layer_regr = CustomRegressor( 361 obj=model(random_state=self.random_state), 362 n_hidden_features=self.n_hidden_features, 363 activation_name=self.activation_name, 364 a=self.a, 365 nodes_sim=self.nodes_sim, 366 bias=self.bias, 367 dropout=self.dropout, 368 direct_link=self.direct_link, 369 n_clusters=self.n_clusters, 370 cluster_encode=self.cluster_encode, 371 type_clust=self.type_clust, 372 type_scaling=self.type_scaling, 373 col_sample=self.col_sample, 374 row_sample=self.row_sample, 375 seed=self.seed, 376 backend=self.backend, 377 ) 378 else: 379 layer_regr = CustomRegressor( 380 obj=model(), 381 n_hidden_features=self.n_hidden_features, 382 activation_name=self.activation_name, 383 a=self.a, 384 nodes_sim=self.nodes_sim, 385 bias=self.bias, 386 dropout=self.dropout, 387 direct_link=self.direct_link, 388 n_clusters=self.n_clusters, 389 cluster_encode=self.cluster_encode, 390 type_clust=self.type_clust, 391 type_scaling=self.type_scaling, 392 col_sample=self.col_sample, 393 row_sample=self.row_sample, 394 seed=self.seed, 395 backend=self.backend, 396 ) 397 398 for _ in range(self.n_layers): 399 layer_regr = deepcopy( 400 CustomRegressor( 401 obj=layer_regr, 402 n_hidden_features=self.n_hidden_features, 403 activation_name=self.activation_name, 404 a=self.a, 405 nodes_sim=self.nodes_sim, 406 bias=self.bias, 407 dropout=self.dropout, 408 direct_link=self.direct_link, 409 n_clusters=self.n_clusters, 410 cluster_encode=self.cluster_encode, 411 type_clust=self.type_clust, 412 type_scaling=self.type_scaling, 413 col_sample=self.col_sample, 414 row_sample=self.row_sample, 415 seed=self.seed, 416 backend=self.backend, 417 ) 418 ) 419 420 layer_regr.fit(X_train, y_train) 421 422 pipe = Pipeline( 423 steps=[ 424 ("preprocessor", preprocessor), 425 ("regressor", layer_regr), 426 ] 427 ) 428 429 pipe.fit(X_train, y_train) 430 431 self.models_[name] = pipe 432 y_pred = pipe.predict(X_test) 433 r_squared = r2_score(y_test, y_pred) 434 adj_rsquared = adjusted_rsquared( 435 r_squared, X_test.shape[0], X_test.shape[1] 436 ) 437 rmse = np.sqrt(np.mean((y_test - y_pred) ** 2)) 438 439 names.append(name) 440 R2.append(r_squared) 441 ADJR2.append(adj_rsquared) 442 RMSE.append(rmse) 443 TIME.append(time.time() - start) 444 445 if self.custom_metric: 446 custom_metric = self.custom_metric(y_test, y_pred) 447 CUSTOM_METRIC.append(custom_metric) 448 449 if self.verbose > 0: 450 scores_verbose = { 451 "Model": name, 452 "R-Squared": r_squared, 453 "Adjusted R-Squared": adj_rsquared, 454 "RMSE": rmse, 455 "Time taken": time.time() - start, 456 } 457 458 if self.custom_metric: 459 scores_verbose[self.custom_metric.__name__] = ( 460 custom_metric 461 ) 462 463 print(scores_verbose) 464 if self.predictions: 465 predictions[name] = y_pred 466 except Exception as exception: 467 if self.ignore_warnings is False: 468 print(name + " model failed to execute") 469 print(exception) 470 471 else: # no preprocessing 472 for name, model in tqdm(self.regressors): # do parallel exec 473 start = time.time() 474 try: 475 if "random_state" in model().get_params().keys(): 476 layer_regr = CustomRegressor( 477 obj=model(random_state=self.random_state), 478 n_hidden_features=self.n_hidden_features, 479 activation_name=self.activation_name, 480 a=self.a, 481 nodes_sim=self.nodes_sim, 482 bias=self.bias, 483 dropout=self.dropout, 484 direct_link=self.direct_link, 485 n_clusters=self.n_clusters, 486 cluster_encode=self.cluster_encode, 487 type_clust=self.type_clust, 488 type_scaling=self.type_scaling, 489 col_sample=self.col_sample, 490 row_sample=self.row_sample, 491 seed=self.seed, 492 backend=self.backend, 493 ) 494 else: 495 layer_regr = CustomRegressor( 496 obj=model(), 497 n_hidden_features=self.n_hidden_features, 498 activation_name=self.activation_name, 499 a=self.a, 500 nodes_sim=self.nodes_sim, 501 bias=self.bias, 502 dropout=self.dropout, 503 direct_link=self.direct_link, 504 n_clusters=self.n_clusters, 505 cluster_encode=self.cluster_encode, 506 type_clust=self.type_clust, 507 type_scaling=self.type_scaling, 508 col_sample=self.col_sample, 509 row_sample=self.row_sample, 510 seed=self.seed, 511 backend=self.backend, 512 ) 513 514 layer_regr.fit(X_train, y_train) 515 516 for _ in range(self.n_layers): 517 layer_regr = deepcopy( 518 CustomRegressor( 519 obj=layer_regr, 520 n_hidden_features=self.n_hidden_features, 521 activation_name=self.activation_name, 522 a=self.a, 523 nodes_sim=self.nodes_sim, 524 bias=self.bias, 525 dropout=self.dropout, 526 direct_link=self.direct_link, 527 n_clusters=self.n_clusters, 528 cluster_encode=self.cluster_encode, 529 type_clust=self.type_clust, 530 type_scaling=self.type_scaling, 531 col_sample=self.col_sample, 532 row_sample=self.row_sample, 533 seed=self.seed, 534 backend=self.backend, 535 ) 536 ) 537 538 # layer_regr.fit(X_train, y_train) 539 540 layer_regr.fit(X_train, y_train) 541 542 self.models_[name] = layer_regr 543 y_pred = layer_regr.predict(X_test) 544 545 r_squared = r2_score(y_test, y_pred) 546 adj_rsquared = adjusted_rsquared( 547 r_squared, X_test.shape[0], X_test.shape[1] 548 ) 549 rmse = np.sqrt(np.mean((y_test - y_pred) ** 2)) 550 551 names.append(name) 552 R2.append(r_squared) 553 ADJR2.append(adj_rsquared) 554 RMSE.append(rmse) 555 TIME.append(time.time() - start) 556 557 if self.custom_metric: 558 custom_metric = self.custom_metric(y_test, y_pred) 559 CUSTOM_METRIC.append(custom_metric) 560 561 if self.verbose > 0: 562 scores_verbose = { 563 "Model": name, 564 "R-Squared": r_squared, 565 "Adjusted R-Squared": adj_rsquared, 566 "RMSE": rmse, 567 "Time taken": time.time() - start, 568 } 569 570 if self.custom_metric: 571 scores_verbose[self.custom_metric.__name__] = ( 572 custom_metric 573 ) 574 575 print(scores_verbose) 576 if self.predictions: 577 predictions[name] = y_pred 578 except Exception as exception: 579 if self.ignore_warnings is False: 580 print(name + " model failed to execute") 581 print(exception) 582 583 scores = { 584 "Model": names, 585 "Adjusted R-Squared": ADJR2, 586 "R-Squared": R2, 587 "RMSE": RMSE, 588 "Time Taken": TIME, 589 } 590 591 if self.custom_metric: 592 scores["Custom metric"] = CUSTOM_METRIC 593 594 scores = pd.DataFrame(scores) 595 scores = scores.sort_values(by=self.sort_by, ascending=True).set_index( 596 "Model" 597 ) 598 599 self.best_model_ = self.models_[scores.index[0]] 600 601 if self.predictions is True: 602 return scores, predictions 603 604 return scores
Fit Regression algorithms to X_train and y_train, predict and score on X_test, y_test.
Parameters:
X_train : array-like,
Training vectors, where rows is the number of samples
and columns is the number of features.
X_test : array-like,
Testing vectors, where rows is the number of samples
and columns is the number of features.
y_train : array-like,
Training vectors, where rows is the number of samples
and columns is the number of features.
y_test : array-like,
Testing vectors, where rows is the number of samples
and columns is the number of features.
Returns:
scores: Pandas DataFrame Returns metrics of all the models in a Pandas DataFrame.
predictions : Pandas DataFrame Returns predictions of all the models in a Pandas DataFrame.
618 def provide_models(self, X_train, X_test, y_train, y_test): 619 """ 620 This function returns all the model objects trained in fit function. 621 If fit is not called already, then we call fit and then return the models. 622 623 Parameters: 624 625 X_train : array-like, 626 Training vectors, where rows is the number of samples 627 and columns is the number of features. 628 629 X_test : array-like, 630 Testing vectors, where rows is the number of samples 631 and columns is the number of features. 632 633 y_train : array-like, 634 Training vectors, where rows is the number of samples 635 and columns is the number of features. 636 637 y_test : array-like, 638 Testing vectors, where rows is the number of samples 639 and columns is the number of features. 640 641 Returns: 642 643 models: dict-object, 644 Returns a dictionary with each model pipeline as value 645 with key as name of models. 646 647 """ 648 if len(self.models_.keys()) == 0: 649 self.fit(X_train, X_test, y_train, y_test) 650 651 return self.models_
This function returns all the model objects trained in fit function. If fit is not called already, then we call fit and then return the models.
Parameters:
X_train : array-like,
Training vectors, where rows is the number of samples
and columns is the number of features.
X_test : array-like,
Testing vectors, where rows is the number of samples
and columns is the number of features.
y_train : array-like,
Training vectors, where rows is the number of samples
and columns is the number of features.
y_test : array-like,
Testing vectors, where rows is the number of samples
and columns is the number of features.
Returns:
models: dict-object,
Returns a dictionary with each model pipeline as value
with key as name of models.
998class LazyMTS(LazyDeepMTS): 999 """ 1000 Fitting -- almost -- all the regression algorithms to multivariate time series 1001 and returning their scores (no layers). 1002 1003 Parameters: 1004 1005 verbose: int, optional (default=0) 1006 Any positive number for verbosity. 1007 1008 ignore_warnings: bool, optional (default=True) 1009 When set to True, the warning related to algorigms that are not 1010 able to run are ignored. 1011 1012 custom_metric: function, optional (default=None) 1013 When function is provided, models are evaluated based on the custom 1014 evaluation metric provided. 1015 1016 predictions: bool, optional (default=False) 1017 When set to True, the predictions of all the models models are returned as dataframe. 1018 1019 sort_by: string, optional (default='RMSE') 1020 Sort models by a metric. Available options are 'RMSE', 'MAE', 'MPL', 'MPE', 'MAPE', 1021 'R-Squared', 'Adjusted R-Squared' or a custom metric identified by its name and 1022 provided by custom_metric. 1023 1024 random_state: int, optional (default=42) 1025 Reproducibiility seed. 1026 1027 estimators: list, optional (default='all') 1028 list of Estimators (regression algorithms) names or just 'all' (default='all') 1029 1030 preprocess: bool, preprocessing is done when set to True 1031 1032 h: int, optional (default=None) 1033 Number of steps ahead to predict (when used, must be > 0 and < X_test.shape[0]). 1034 1035 All the other parameters are the same as MTS's. 1036 1037 Attributes: 1038 1039 models_: dict-object 1040 Returns a dictionary with each model pipeline as value 1041 with key as name of models. 1042 1043 best_model_: object 1044 Returns the best model pipeline based on the sort_by metric. 1045 1046 Examples: 1047 1048 See https://thierrymoudiki.github.io/blog/2023/10/29/python/quasirandomizednn/MTS-LazyPredict 1049 1050 """ 1051 1052 def __init__( 1053 self, 1054 verbose=0, 1055 ignore_warnings=True, 1056 custom_metric=None, 1057 predictions=False, 1058 sort_by=None, # leave it as is 1059 random_state=42, 1060 estimators="all", 1061 preprocess=False, 1062 h=None, 1063 # MTS attributes 1064 obj=None, 1065 n_hidden_features=5, 1066 activation_name="relu", 1067 a=0.01, 1068 nodes_sim="sobol", 1069 bias=True, 1070 dropout=0, 1071 direct_link=True, 1072 n_clusters=2, 1073 cluster_encode=True, 1074 type_clust="kmeans", 1075 type_scaling=("std", "std", "std"), 1076 lags=15, 1077 type_pi="scp2-kde", 1078 block_size=None, 1079 replications=None, 1080 kernel=None, 1081 agg="mean", 1082 seed=123, 1083 backend="cpu", 1084 show_progress=False, 1085 ): 1086 super().__init__( 1087 verbose=verbose, 1088 ignore_warnings=ignore_warnings, 1089 custom_metric=custom_metric, 1090 predictions=predictions, 1091 sort_by=sort_by, 1092 random_state=random_state, 1093 estimators=estimators, 1094 preprocess=preprocess, 1095 n_layers=1, 1096 h=h, 1097 obj=obj, 1098 n_hidden_features=n_hidden_features, 1099 activation_name=activation_name, 1100 a=a, 1101 nodes_sim=nodes_sim, 1102 bias=bias, 1103 dropout=dropout, 1104 direct_link=direct_link, 1105 n_clusters=n_clusters, 1106 cluster_encode=cluster_encode, 1107 type_clust=type_clust, 1108 type_scaling=type_scaling, 1109 lags=lags, 1110 type_pi=type_pi, 1111 block_size=block_size, 1112 replications=replications, 1113 kernel=kernel, 1114 agg=agg, 1115 seed=seed, 1116 backend=backend, 1117 show_progress=show_progress, 1118 )
Fitting -- almost -- all the regression algorithms to multivariate time series and returning their scores (no layers).
Parameters:
verbose: int, optional (default=0)
Any positive number for verbosity.
ignore_warnings: bool, optional (default=True)
When set to True, the warning related to algorigms that are not
able to run are ignored.
custom_metric: function, optional (default=None)
When function is provided, models are evaluated based on the custom
evaluation metric provided.
predictions: bool, optional (default=False)
When set to True, the predictions of all the models models are returned as dataframe.
sort_by: string, optional (default='RMSE')
Sort models by a metric. Available options are 'RMSE', 'MAE', 'MPL', 'MPE', 'MAPE',
'R-Squared', 'Adjusted R-Squared' or a custom metric identified by its name and
provided by custom_metric.
random_state: int, optional (default=42)
Reproducibiility seed.
estimators: list, optional (default='all')
list of Estimators (regression algorithms) names or just 'all' (default='all')
preprocess: bool, preprocessing is done when set to True
h: int, optional (default=None)
Number of steps ahead to predict (when used, must be > 0 and < X_test.shape[0]).
All the other parameters are the same as MTS's.
Attributes:
models_: dict-object
Returns a dictionary with each model pipeline as value
with key as name of models.
best_model_: object
Returns the best model pipeline based on the sort_by metric.
Examples:
See https://thierrymoudiki.github.io/blog/2023/10/29/python/quasirandomizednn/MTS-LazyPredict
104class LazyDeepMTS(MTS): 105 """ 106 107 Fitting -- almost -- all the regression algorithms with layers of 108 nnetsauce's CustomRegressor to multivariate time series 109 and returning their scores. 110 111 Parameters: 112 113 verbose: int, optional (default=0) 114 Any positive number for verbosity. 115 116 ignore_warnings: bool, optional (default=True) 117 When set to True, the warning related to algorigms that are not 118 able to run are ignored. 119 120 custom_metric: function, optional (default=None) 121 When function is provided, models are evaluated based on the custom 122 evaluation metric provided. 123 124 predictions: bool, optional (default=False) 125 When set to True, the predictions of all the models models are returned as dataframe. 126 127 sort_by: string, optional (default='RMSE') 128 Sort models by a metric. Available options are 'RMSE', 'MAE', 'MPL', 'MPE', 'MAPE', 129 'R-Squared', 'Adjusted R-Squared' or a custom metric identified by its name and 130 provided by custom_metric. 131 132 random_state: int, optional (default=42) 133 Reproducibiility seed. 134 135 estimators: list, optional (default='all') 136 list of Estimators (regression algorithms) names or just 'all' (default='all') 137 138 preprocess: bool, preprocessing is done when set to True 139 140 n_layers: int, optional (default=1) 141 Number of layers in the network. When set to 1, the model is equivalent to a MTS. 142 143 h: int, optional (default=None) 144 Number of steps ahead to predict (when used, must be > 0 and < X_test.shape[0]). 145 146 All the other parameters are the same as MTS's. 147 148 Attributes: 149 150 models_: dict-object 151 Returns a dictionary with each model pipeline as value 152 with key as name of models. 153 154 best_model_: object 155 Returns the best model pipeline based on the sort_by metric. 156 157 Examples: 158 159 See https://thierrymoudiki.github.io/blog/2023/10/29/python/quasirandomizednn/MTS-LazyPredict 160 161 """ 162 163 def __init__( 164 self, 165 verbose=0, 166 ignore_warnings=True, 167 custom_metric=None, 168 predictions=False, 169 sort_by=None, # leave it as is 170 random_state=42, 171 estimators="all", 172 preprocess=False, 173 n_layers=1, 174 h=None, 175 # MTS attributes 176 obj=None, 177 n_hidden_features=5, 178 activation_name="relu", 179 a=0.01, 180 nodes_sim="sobol", 181 bias=True, 182 dropout=0, 183 direct_link=True, 184 n_clusters=2, 185 cluster_encode=True, 186 type_clust="kmeans", 187 type_scaling=("std", "std", "std"), 188 lags=15, 189 type_pi="scp2-kde", 190 block_size=None, 191 replications=None, 192 kernel=None, 193 agg="mean", 194 seed=123, 195 backend="cpu", 196 show_progress=False, 197 ): 198 self.verbose = verbose 199 self.ignore_warnings = ignore_warnings 200 self.custom_metric = custom_metric 201 self.predictions = predictions 202 self.sort_by = sort_by 203 self.models_ = {} 204 self.best_model_ = None 205 self.random_state = random_state 206 self.estimators = estimators 207 self.preprocess = preprocess 208 self.n_layers = n_layers 209 self.h = h 210 super().__init__( 211 obj=obj, 212 n_hidden_features=n_hidden_features, 213 activation_name=activation_name, 214 a=a, 215 nodes_sim=nodes_sim, 216 bias=bias, 217 dropout=dropout, 218 direct_link=direct_link, 219 n_clusters=n_clusters, 220 cluster_encode=cluster_encode, 221 type_clust=type_clust, 222 type_scaling=type_scaling, 223 seed=seed, 224 backend=backend, 225 lags=lags, 226 type_pi=type_pi, 227 block_size=block_size, 228 replications=replications, 229 kernel=kernel, 230 agg=agg, 231 verbose=verbose, 232 show_progress=show_progress, 233 ) 234 if self.replications is not None or self.type_pi == "gaussian": 235 if self.sort_by is None: 236 self.sort_by = "WINKLERSCORE" 237 else: 238 if self.sort_by is None: 239 self.sort_by = "RMSE" 240 241 def fit(self, X_train, X_test, xreg=None, per_series=False, **kwargs): 242 """Fit Regression algorithms to X_train, predict and score on X_test. 243 244 Parameters: 245 246 X_train: array-like or data frame, 247 Training vectors, where rows is the number of samples 248 and columns is the number of features. 249 250 X_test: array-like or data frame, 251 Testing vectors, where rows is the number of samples 252 and columns is the number of features. 253 254 xreg: array-like, optional (default=None) 255 Additional (external) regressors to be passed to self.obj 256 xreg must be in 'increasing' order (most recent observations last) 257 258 per_series: bool, optional (default=False) 259 When set to True, the metrics are computed series by series. 260 261 **kwargs: dict, optional (default=None) 262 Additional parameters to be passed to `fit` method of `obj`. 263 264 Returns: 265 266 scores: Pandas DataFrame 267 Returns metrics of all the models in a Pandas DataFrame. 268 269 predictions: Pandas DataFrame 270 Returns predictions of all the models in a Pandas DataFrame. 271 272 """ 273 R2 = [] 274 ADJR2 = [] 275 ME = [] 276 MPL = [] 277 RMSE = [] 278 MAE = [] 279 MPE = [] 280 MAPE = [] 281 WINKLERSCORE = [] 282 COVERAGE = [] 283 284 # WIN = [] 285 names = [] 286 TIME = [] 287 predictions = {} 288 289 if self.custom_metric is not None: 290 CUSTOM_METRIC = [] 291 292 if self.h is None: 293 assert X_test is not None, "If h is None, X_test must be provided." 294 295 if isinstance(X_train, np.ndarray): 296 X_train = pd.DataFrame(X_train) 297 X_test = pd.DataFrame(X_test) 298 299 self.series_names = X_train.columns.tolist() 300 301 X_train = convert_df_to_numeric(X_train) 302 X_test = convert_df_to_numeric(X_test) 303 304 numeric_features = X_train.select_dtypes(include=[np.number]).columns 305 categorical_features = X_train.select_dtypes(include=["object"]).columns 306 307 categorical_low, categorical_high = get_card_split( 308 X_train, categorical_features 309 ) 310 311 if self.preprocess: 312 preprocessor = ColumnTransformer( 313 transformers=[ 314 ("numeric", numeric_transformer, numeric_features), 315 ( 316 "categorical_low", 317 categorical_transformer_low, 318 categorical_low, 319 ), 320 ( 321 "categorical_high", 322 categorical_transformer_high, 323 categorical_high, 324 ), 325 ] 326 ) 327 328 # baselines (Classical MTS) ---- 329 for i, name in enumerate(["ARIMA", "ETS", "Theta", "VAR", "VECM"]): 330 try: 331 start = time.time() 332 regr = ClassicalMTS(model=name) 333 regr.fit(X_train, **kwargs) 334 self.models_[name] = regr 335 if self.h is None: 336 X_pred = regr.predict(h=X_test.shape[0], **kwargs) 337 else: 338 assert self.h > 0, "h must be > 0" 339 X_pred = regr.predict(h=self.h, **kwargs) 340 try: 341 X_test = X_test[0: self.h, :] 342 except Exception as e: 343 X_test = X_test.iloc[0: self.h, :] 344 345 rmse = mean_errors( 346 actual=X_test, 347 pred=X_pred, 348 scoring="root_mean_squared_error", 349 per_series=per_series, 350 ) 351 mae = mean_errors( 352 actual=X_test, 353 pred=X_pred, 354 scoring="mean_absolute_error", 355 per_series=per_series, 356 ) 357 mpl = mean_errors( 358 actual=X_test, 359 pred=X_pred, 360 scoring="mean_pinball_loss", 361 per_series=per_series, 362 ) 363 except Exception: 364 continue 365 366 names.append(name) 367 RMSE.append(rmse) 368 MAE.append(mae) 369 MPL.append(mpl) 370 371 if self.custom_metric is not None: 372 try: 373 if self.h is None: 374 custom_metric = self.custom_metric(X_test, X_pred) 375 else: 376 custom_metric = self.custom_metric(X_test_h, X_pred) 377 CUSTOM_METRIC.append(custom_metric) 378 except Exception as e: 379 custom_metric = np.iinfo(np.float32).max 380 CUSTOM_METRIC.append(np.iinfo(np.float32).max) 381 382 if (self.replications is not None) or (self.type_pi == "gaussian"): 383 if per_series == False: 384 winklerscore = winkler_score( 385 obj=X_pred, actual=X_test, level=95 386 ) 387 coveragecalc = coverage(X_pred, X_test, level=95) 388 else: 389 winklerscore = winkler_score( 390 obj=X_pred, actual=X_test, level=95, per_series=True 391 ) 392 coveragecalc = coverage( 393 X_pred, X_test, level=95, per_series=True 394 ) 395 WINKLERSCORE.append(winklerscore) 396 COVERAGE.append(coveragecalc) 397 TIME.append(time.time() - start) 398 399 if self.estimators == "all": 400 if self.n_layers <= 1: 401 self.regressors = REGRESSORSMTS 402 else: 403 self.regressors = DEEPREGRESSORSMTS 404 else: 405 if self.n_layers <= 1: 406 self.regressors = [ 407 ("MTS(" + est[0] + ")", est[1]) 408 for est in all_estimators() 409 if ( 410 issubclass(est[1], RegressorMixin) 411 and (est[0] in self.estimators) 412 ) 413 ] 414 else: # self.n_layers > 1 415 self.regressors = [ 416 ("DeepMTS(" + est[0] + ")", est[1]) 417 for est in all_estimators() 418 if ( 419 issubclass(est[1], RegressorMixin) 420 and (est[0] in self.estimators) 421 ) 422 ] 423 424 if self.preprocess is True: 425 for name, model in tqdm(self.regressors): # do parallel exec 426 start = time.time() 427 try: 428 if "random_state" in model().get_params().keys(): 429 pipe = Pipeline( 430 steps=[ 431 ("preprocessor", preprocessor), 432 ( 433 "regressor", 434 DeepMTS( 435 obj=model( 436 random_state=self.random_state, 437 **kwargs, 438 ), 439 n_layers=self.n_layers, 440 n_hidden_features=self.n_hidden_features, 441 activation_name=self.activation_name, 442 a=self.a, 443 nodes_sim=self.nodes_sim, 444 bias=self.bias, 445 dropout=self.dropout, 446 direct_link=self.direct_link, 447 n_clusters=self.n_clusters, 448 cluster_encode=self.cluster_encode, 449 type_clust=self.type_clust, 450 type_scaling=self.type_scaling, 451 lags=self.lags, 452 type_pi=self.type_pi, 453 block_size=self.block_size, 454 replications=self.replications, 455 kernel=self.kernel, 456 agg=self.agg, 457 seed=self.seed, 458 backend=self.backend, 459 show_progress=self.show_progress, 460 ), 461 ), 462 ] 463 ) 464 else: # "random_state" in model().get_params().keys() 465 pipe = Pipeline( 466 steps=[ 467 ("preprocessor", preprocessor), 468 ( 469 "regressor", 470 DeepMTS( 471 obj=model(**kwargs), 472 n_layers=self.n_layers, 473 n_hidden_features=self.n_hidden_features, 474 activation_name=self.activation_name, 475 a=self.a, 476 nodes_sim=self.nodes_sim, 477 bias=self.bias, 478 dropout=self.dropout, 479 direct_link=self.direct_link, 480 n_clusters=self.n_clusters, 481 cluster_encode=self.cluster_encode, 482 type_clust=self.type_clust, 483 type_scaling=self.type_scaling, 484 lags=self.lags, 485 type_pi=self.type_pi, 486 block_size=self.block_size, 487 replications=self.replications, 488 kernel=self.kernel, 489 agg=self.agg, 490 seed=self.seed, 491 backend=self.backend, 492 show_progress=self.show_progress, 493 ), 494 ), 495 ] 496 ) 497 498 pipe.fit(X_train, **kwargs) 499 # pipe.fit(X_train, xreg=xreg) 500 501 self.models_[name] = pipe 502 503 if self.h is None: 504 X_pred = pipe["regressor"].predict(h=self.h, **kwargs) 505 else: 506 assert self.h > 0, "h must be > 0" 507 X_pred = pipe["regressor"].predict(h=self.h, **kwargs) 508 509 if (self.replications is not None) or ( 510 self.type_pi == "gaussian" 511 ): 512 rmse = mean_errors( 513 actual=X_test, 514 pred=X_pred, 515 scoring="root_mean_squared_error", 516 per_series=per_series, 517 ) 518 mae = mean_errors( 519 actual=X_test, 520 pred=X_pred, 521 scoring="mean_absolute_error", 522 per_series=per_series, 523 ) 524 mpl = mean_errors( 525 actual=X_test, 526 pred=X_pred, 527 scoring="mean_pinball_loss", 528 per_series=per_series, 529 ) 530 winklerscore = winkler_score( 531 obj=X_pred, 532 actual=X_test, 533 level=95, 534 per_series=per_series, 535 ) 536 coveragecalc = coverage( 537 X_pred, X_test, level=95, per_series=per_series 538 ) 539 else: 540 rmse = mean_errors( 541 actual=X_test, 542 pred=X_pred, 543 scoring="root_mean_squared_error", 544 per_series=per_series, 545 ) 546 mae = mean_errors( 547 actual=X_test, 548 pred=X_pred, 549 scoring="mean_absolute_error", 550 per_series=per_series, 551 ) 552 mpl = mean_errors( 553 actual=X_test, 554 pred=X_pred, 555 scoring="mean_pinball_loss", 556 per_series=per_series, 557 ) 558 559 names.append(name) 560 RMSE.append(rmse) 561 MAE.append(mae) 562 MPL.append(mpl) 563 564 if (self.replications is not None) or ( 565 self.type_pi == "gaussian" 566 ): 567 WINKLERSCORE.append(winklerscore) 568 COVERAGE.append(coveragecalc) 569 TIME.append(time.time() - start) 570 571 if self.custom_metric is not None: 572 try: 573 custom_metric = self.custom_metric(X_test, X_pred) 574 CUSTOM_METRIC.append(custom_metric) 575 except Exception as e: 576 custom_metric = np.iinfo(np.float32).max 577 CUSTOM_METRIC.append(custom_metric) 578 579 if self.verbose > 0: 580 if (self.replications is not None) or ( 581 self.type_pi == "gaussian" 582 ): 583 scores_verbose = { 584 "Model": name, 585 "RMSE": rmse, 586 "MAE": mae, 587 "MPL": mpl, 588 "WINKLERSCORE": winklerscore, 589 "COVERAGE": coveragecalc, 590 "Time taken": time.time() - start, 591 } 592 else: 593 scores_verbose = { 594 "Model": name, 595 "RMSE": rmse, 596 "MAE": mae, 597 "MPL": mpl, 598 "Time taken": time.time() - start, 599 } 600 601 if self.custom_metric is not None: 602 scores_verbose["Custom metric"] = custom_metric 603 604 if self.predictions: 605 predictions[name] = X_pred 606 except Exception as exception: 607 if self.ignore_warnings is False: 608 print(name + " model failed to execute") 609 print(exception) 610 611 else: # no preprocessing 612 for name, model in tqdm(self.regressors): # do parallel exec 613 start = time.time() 614 try: 615 if "random_state" in model().get_params().keys(): 616 pipe = DeepMTS( 617 obj=model(random_state=self.random_state, **kwargs), 618 n_layers=self.n_layers, 619 n_hidden_features=self.n_hidden_features, 620 activation_name=self.activation_name, 621 a=self.a, 622 nodes_sim=self.nodes_sim, 623 bias=self.bias, 624 dropout=self.dropout, 625 direct_link=self.direct_link, 626 n_clusters=self.n_clusters, 627 cluster_encode=self.cluster_encode, 628 type_clust=self.type_clust, 629 type_scaling=self.type_scaling, 630 lags=self.lags, 631 type_pi=self.type_pi, 632 block_size=self.block_size, 633 replications=self.replications, 634 kernel=self.kernel, 635 agg=self.agg, 636 seed=self.seed, 637 backend=self.backend, 638 show_progress=self.show_progress, 639 ) 640 else: 641 pipe = DeepMTS( 642 obj=model(**kwargs), 643 n_layers=self.n_layers, 644 n_hidden_features=self.n_hidden_features, 645 activation_name=self.activation_name, 646 a=self.a, 647 nodes_sim=self.nodes_sim, 648 bias=self.bias, 649 dropout=self.dropout, 650 direct_link=self.direct_link, 651 n_clusters=self.n_clusters, 652 cluster_encode=self.cluster_encode, 653 type_clust=self.type_clust, 654 type_scaling=self.type_scaling, 655 lags=self.lags, 656 type_pi=self.type_pi, 657 block_size=self.block_size, 658 replications=self.replications, 659 kernel=self.kernel, 660 agg=self.agg, 661 seed=self.seed, 662 backend=self.backend, 663 show_progress=self.show_progress, 664 ) 665 666 pipe.fit(X_train, xreg, **kwargs) 667 # pipe.fit(X_train, xreg=xreg) # DO xreg like in `ahead` 668 669 self.models_[name] = pipe 670 671 if self.preprocess is True: 672 if self.h is None: 673 X_pred = pipe["regressor"].predict( 674 h=X_test.shape[0], **kwargs 675 ) 676 else: 677 assert ( 678 self.h > 0 and self.h <= X_test.shape[0] 679 ), "h must be > 0 and < X_test.shape[0]" 680 X_pred = pipe["regressor"].predict( 681 h=self.h, **kwargs 682 ) 683 684 else: 685 if self.h is None: 686 X_pred = pipe.predict( 687 h=X_test.shape[0], 688 **kwargs, 689 # X_pred = pipe.predict(h=X_test.shape[0], new_xreg=new_xreg) ## DO xreg like in `ahead` 690 ) 691 else: 692 assert ( 693 self.h > 0 and self.h <= X_test.shape[0] 694 ), "h must be > 0 and < X_test.shape[0]" 695 X_pred = pipe.predict(h=self.h, **kwargs) 696 697 if self.h is None: 698 if (self.replications is not None) or ( 699 self.type_pi == "gaussian" 700 ): 701 rmse = mean_errors( 702 actual=X_test, 703 pred=X_pred.mean, 704 scoring="root_mean_squared_error", 705 per_series=per_series, 706 ) 707 mae = mean_errors( 708 actual=X_test, 709 pred=X_pred.mean, 710 scoring="mean_absolute_error", 711 per_series=per_series, 712 ) 713 mpl = mean_errors( 714 actual=X_test, 715 pred=X_pred.mean, 716 scoring="mean_pinball_loss", 717 per_series=per_series, 718 ) 719 winklerscore = winkler_score( 720 obj=X_pred, 721 actual=X_test, 722 level=95, 723 per_series=per_series, 724 ) 725 coveragecalc = coverage( 726 X_pred, X_test, level=95, per_series=per_series 727 ) 728 else: # no prediction interval 729 rmse = mean_errors( 730 actual=X_test, 731 pred=X_pred, 732 scoring="root_mean_squared_error", 733 per_series=per_series, 734 ) 735 mae = mean_errors( 736 actual=X_test, 737 pred=X_pred, 738 scoring="mean_absolute_error", 739 per_series=per_series, 740 ) 741 mpl = mean_errors( 742 actual=X_test, 743 pred=X_pred, 744 scoring="mean_pinball_loss", 745 per_series=per_series, 746 ) 747 else: # self.h is not None 748 if (self.replications is not None) or ( 749 self.type_pi == "gaussian" 750 ): 751 if isinstance(X_test, pd.DataFrame): 752 X_test_h = X_test.iloc[0: self.h, :] 753 rmse = mean_errors( 754 actual=X_test_h, 755 pred=X_pred, 756 scoring="root_mean_squared_error", 757 per_series=per_series, 758 ) 759 mae = mean_errors( 760 actual=X_test_h, 761 pred=X_pred, 762 scoring="mean_absolute_error", 763 per_series=per_series, 764 ) 765 mpl = mean_errors( 766 actual=X_test_h, 767 pred=X_pred, 768 scoring="mean_pinball_loss", 769 per_series=per_series, 770 ) 771 winklerscore = winkler_score( 772 obj=X_pred, 773 actual=X_test_h, 774 level=95, 775 per_series=per_series, 776 ) 777 coveragecalc = coverage( 778 X_pred, 779 X_test_h, 780 level=95, 781 per_series=per_series, 782 ) 783 else: 784 X_test_h = X_test[0: self.h, :] 785 rmse = mean_errors( 786 actual=X_test_h, 787 pred=X_pred, 788 scoring="root_mean_squared_error", 789 per_series=per_series, 790 ) 791 mae = mean_errors( 792 actual=X_test_h, 793 pred=X_pred, 794 scoring="mean_absolute_error", 795 per_series=per_series, 796 ) 797 mpl = mean_errors( 798 actual=X_test_h, 799 pred=X_pred, 800 scoring="mean_pinball_loss", 801 per_series=per_series, 802 ) 803 winklerscore = winkler_score( 804 obj=X_pred, 805 actual=X_test_h, 806 level=95, 807 per_series=per_series, 808 ) 809 coveragecalc = coverage( 810 X_pred, 811 X_test_h, 812 level=95, 813 per_series=per_series, 814 ) 815 else: # no prediction interval 816 if isinstance(X_test, pd.DataFrame): 817 X_test_h = X_test.iloc[0: self.h, :] 818 rmse = mean_errors( 819 actual=X_test_h, 820 pred=X_pred, 821 scoring="root_mean_squared_error", 822 per_series=per_series, 823 ) 824 mae = mean_errors( 825 actual=X_test_h, 826 pred=X_pred, 827 scoring="mean_absolute_error", 828 per_series=per_series, 829 ) 830 mpl = mean_errors( 831 actual=X_test_h, 832 pred=X_pred, 833 scoring="mean_pinball_loss", 834 per_series=per_series, 835 ) 836 else: 837 X_test_h = X_test[0: self.h, :] 838 rmse = mean_errors( 839 actual=X_test_h, 840 pred=X_pred, 841 scoring="root_mean_squared_error", 842 per_series=per_series, 843 ) 844 mae = mean_errors( 845 actual=X_test_h, 846 pred=X_pred, 847 scoring="mean_absolute_error", 848 per_series=per_series, 849 ) 850 851 names.append(name) 852 RMSE.append(rmse) 853 MAE.append(mae) 854 MPL.append(mpl) 855 if (self.replications is not None) or ( 856 self.type_pi == "gaussian" 857 ): 858 WINKLERSCORE.append(winklerscore) 859 COVERAGE.append(coveragecalc) 860 TIME.append(time.time() - start) 861 862 if self.custom_metric is not None: 863 try: 864 if self.h is None: 865 custom_metric = self.custom_metric( 866 X_test, X_pred 867 ) 868 else: 869 custom_metric = self.custom_metric( 870 X_test_h, X_pred 871 ) 872 CUSTOM_METRIC.append(custom_metric) 873 except Exception as e: 874 custom_metric = np.iinfo(np.float32).max 875 CUSTOM_METRIC.append(np.iinfo(np.float32).max) 876 877 if self.verbose > 0: 878 if (self.replications is not None) or ( 879 self.type_pi == "gaussian" 880 ): 881 scores_verbose = { 882 "Model": name, 883 "RMSE": rmse, 884 "MAE": mae, 885 "MPL": mpl, 886 "WINKLERSCORE": winklerscore, 887 "COVERAGE": coveragecalc, 888 "Time taken": time.time() - start, 889 } 890 else: 891 scores_verbose = { 892 "Model": name, 893 "RMSE": rmse, 894 "MAE": mae, 895 "MPL": mpl, 896 "Time taken": time.time() - start, 897 } 898 899 if self.custom_metric is not None: 900 scores_verbose["Custom metric"] = custom_metric 901 902 if self.predictions: 903 predictions[name] = X_pred 904 905 except Exception as exception: 906 if self.ignore_warnings is False: 907 print(name + " model failed to execute") 908 print(exception) 909 910 if (self.replications is not None) or (self.type_pi == "gaussian"): 911 scores = { 912 "Model": names, 913 "RMSE": RMSE, 914 "MAE": MAE, 915 "MPL": MPL, 916 "WINKLERSCORE": WINKLERSCORE, 917 "COVERAGE": COVERAGE, 918 "Time Taken": TIME, 919 } 920 else: 921 scores = { 922 "Model": names, 923 "RMSE": RMSE, 924 "MAE": MAE, 925 "MPL": MPL, 926 "Time Taken": TIME, 927 } 928 929 if self.custom_metric is not None: 930 scores["Custom metric"] = CUSTOM_METRIC 931 932 if per_series: 933 scores = dict_to_dataframe_series(scores, self.series_names) 934 else: 935 scores = pd.DataFrame(scores) 936 937 try: # case per_series, can't be sorted 938 scores = scores.sort_values( 939 by=self.sort_by, ascending=True 940 ).set_index("Model") 941 942 self.best_model_ = self.models_[scores.index[0]] 943 except Exception as e: 944 pass 945 946 if self.predictions is True: 947 return scores, predictions 948 949 return scores 950 951 def get_best_model(self): 952 """ 953 This function returns the best model pipeline based on the sort_by metric. 954 955 Returns: 956 957 best_model: object, 958 Returns the best model pipeline based on the sort_by metric. 959 960 """ 961 return self.best_model_ 962 963 def provide_models(self, X_train, X_test): 964 """ 965 This function returns all the model objects trained in fit function. 966 If fit is not called already, then we call fit and then return the models. 967 968 Parameters: 969 970 X_train : array-like, 971 Training vectors, where rows is the number of samples 972 and columns is the number of features. 973 974 X_test : array-like, 975 Testing vectors, where rows is the number of samples 976 and columns is the number of features. 977 978 Returns: 979 980 models: dict-object, 981 Returns a dictionary with each model pipeline as value 982 with key as name of models. 983 984 """ 985 if self.h is None: 986 if len(self.models_.keys()) == 0: 987 self.fit(X_train, X_test) 988 else: 989 if len(self.models_.keys()) == 0: 990 if isinstance(X_test, pd.DataFrame): 991 self.fit(X_train, X_test.iloc[0: self.h, :]) 992 else: 993 self.fit(X_train, X_test[0: self.h, :]) 994 995 return self.models_
Fitting -- almost -- all the regression algorithms with layers of nnetsauce's CustomRegressor to multivariate time series and returning their scores.
Parameters:
verbose: int, optional (default=0)
Any positive number for verbosity.
ignore_warnings: bool, optional (default=True)
When set to True, the warning related to algorigms that are not
able to run are ignored.
custom_metric: function, optional (default=None)
When function is provided, models are evaluated based on the custom
evaluation metric provided.
predictions: bool, optional (default=False)
When set to True, the predictions of all the models models are returned as dataframe.
sort_by: string, optional (default='RMSE')
Sort models by a metric. Available options are 'RMSE', 'MAE', 'MPL', 'MPE', 'MAPE',
'R-Squared', 'Adjusted R-Squared' or a custom metric identified by its name and
provided by custom_metric.
random_state: int, optional (default=42)
Reproducibiility seed.
estimators: list, optional (default='all')
list of Estimators (regression algorithms) names or just 'all' (default='all')
preprocess: bool, preprocessing is done when set to True
n_layers: int, optional (default=1)
Number of layers in the network. When set to 1, the model is equivalent to a MTS.
h: int, optional (default=None)
Number of steps ahead to predict (when used, must be > 0 and < X_test.shape[0]).
All the other parameters are the same as MTS's.
Attributes:
models_: dict-object
Returns a dictionary with each model pipeline as value
with key as name of models.
best_model_: object
Returns the best model pipeline based on the sort_by metric.
Examples:
See https://thierrymoudiki.github.io/blog/2023/10/29/python/quasirandomizednn/MTS-LazyPredict
241 def fit(self, X_train, X_test, xreg=None, per_series=False, **kwargs): 242 """Fit Regression algorithms to X_train, predict and score on X_test. 243 244 Parameters: 245 246 X_train: array-like or data frame, 247 Training vectors, where rows is the number of samples 248 and columns is the number of features. 249 250 X_test: array-like or data frame, 251 Testing vectors, where rows is the number of samples 252 and columns is the number of features. 253 254 xreg: array-like, optional (default=None) 255 Additional (external) regressors to be passed to self.obj 256 xreg must be in 'increasing' order (most recent observations last) 257 258 per_series: bool, optional (default=False) 259 When set to True, the metrics are computed series by series. 260 261 **kwargs: dict, optional (default=None) 262 Additional parameters to be passed to `fit` method of `obj`. 263 264 Returns: 265 266 scores: Pandas DataFrame 267 Returns metrics of all the models in a Pandas DataFrame. 268 269 predictions: Pandas DataFrame 270 Returns predictions of all the models in a Pandas DataFrame. 271 272 """ 273 R2 = [] 274 ADJR2 = [] 275 ME = [] 276 MPL = [] 277 RMSE = [] 278 MAE = [] 279 MPE = [] 280 MAPE = [] 281 WINKLERSCORE = [] 282 COVERAGE = [] 283 284 # WIN = [] 285 names = [] 286 TIME = [] 287 predictions = {} 288 289 if self.custom_metric is not None: 290 CUSTOM_METRIC = [] 291 292 if self.h is None: 293 assert X_test is not None, "If h is None, X_test must be provided." 294 295 if isinstance(X_train, np.ndarray): 296 X_train = pd.DataFrame(X_train) 297 X_test = pd.DataFrame(X_test) 298 299 self.series_names = X_train.columns.tolist() 300 301 X_train = convert_df_to_numeric(X_train) 302 X_test = convert_df_to_numeric(X_test) 303 304 numeric_features = X_train.select_dtypes(include=[np.number]).columns 305 categorical_features = X_train.select_dtypes(include=["object"]).columns 306 307 categorical_low, categorical_high = get_card_split( 308 X_train, categorical_features 309 ) 310 311 if self.preprocess: 312 preprocessor = ColumnTransformer( 313 transformers=[ 314 ("numeric", numeric_transformer, numeric_features), 315 ( 316 "categorical_low", 317 categorical_transformer_low, 318 categorical_low, 319 ), 320 ( 321 "categorical_high", 322 categorical_transformer_high, 323 categorical_high, 324 ), 325 ] 326 ) 327 328 # baselines (Classical MTS) ---- 329 for i, name in enumerate(["ARIMA", "ETS", "Theta", "VAR", "VECM"]): 330 try: 331 start = time.time() 332 regr = ClassicalMTS(model=name) 333 regr.fit(X_train, **kwargs) 334 self.models_[name] = regr 335 if self.h is None: 336 X_pred = regr.predict(h=X_test.shape[0], **kwargs) 337 else: 338 assert self.h > 0, "h must be > 0" 339 X_pred = regr.predict(h=self.h, **kwargs) 340 try: 341 X_test = X_test[0: self.h, :] 342 except Exception as e: 343 X_test = X_test.iloc[0: self.h, :] 344 345 rmse = mean_errors( 346 actual=X_test, 347 pred=X_pred, 348 scoring="root_mean_squared_error", 349 per_series=per_series, 350 ) 351 mae = mean_errors( 352 actual=X_test, 353 pred=X_pred, 354 scoring="mean_absolute_error", 355 per_series=per_series, 356 ) 357 mpl = mean_errors( 358 actual=X_test, 359 pred=X_pred, 360 scoring="mean_pinball_loss", 361 per_series=per_series, 362 ) 363 except Exception: 364 continue 365 366 names.append(name) 367 RMSE.append(rmse) 368 MAE.append(mae) 369 MPL.append(mpl) 370 371 if self.custom_metric is not None: 372 try: 373 if self.h is None: 374 custom_metric = self.custom_metric(X_test, X_pred) 375 else: 376 custom_metric = self.custom_metric(X_test_h, X_pred) 377 CUSTOM_METRIC.append(custom_metric) 378 except Exception as e: 379 custom_metric = np.iinfo(np.float32).max 380 CUSTOM_METRIC.append(np.iinfo(np.float32).max) 381 382 if (self.replications is not None) or (self.type_pi == "gaussian"): 383 if per_series == False: 384 winklerscore = winkler_score( 385 obj=X_pred, actual=X_test, level=95 386 ) 387 coveragecalc = coverage(X_pred, X_test, level=95) 388 else: 389 winklerscore = winkler_score( 390 obj=X_pred, actual=X_test, level=95, per_series=True 391 ) 392 coveragecalc = coverage( 393 X_pred, X_test, level=95, per_series=True 394 ) 395 WINKLERSCORE.append(winklerscore) 396 COVERAGE.append(coveragecalc) 397 TIME.append(time.time() - start) 398 399 if self.estimators == "all": 400 if self.n_layers <= 1: 401 self.regressors = REGRESSORSMTS 402 else: 403 self.regressors = DEEPREGRESSORSMTS 404 else: 405 if self.n_layers <= 1: 406 self.regressors = [ 407 ("MTS(" + est[0] + ")", est[1]) 408 for est in all_estimators() 409 if ( 410 issubclass(est[1], RegressorMixin) 411 and (est[0] in self.estimators) 412 ) 413 ] 414 else: # self.n_layers > 1 415 self.regressors = [ 416 ("DeepMTS(" + est[0] + ")", est[1]) 417 for est in all_estimators() 418 if ( 419 issubclass(est[1], RegressorMixin) 420 and (est[0] in self.estimators) 421 ) 422 ] 423 424 if self.preprocess is True: 425 for name, model in tqdm(self.regressors): # do parallel exec 426 start = time.time() 427 try: 428 if "random_state" in model().get_params().keys(): 429 pipe = Pipeline( 430 steps=[ 431 ("preprocessor", preprocessor), 432 ( 433 "regressor", 434 DeepMTS( 435 obj=model( 436 random_state=self.random_state, 437 **kwargs, 438 ), 439 n_layers=self.n_layers, 440 n_hidden_features=self.n_hidden_features, 441 activation_name=self.activation_name, 442 a=self.a, 443 nodes_sim=self.nodes_sim, 444 bias=self.bias, 445 dropout=self.dropout, 446 direct_link=self.direct_link, 447 n_clusters=self.n_clusters, 448 cluster_encode=self.cluster_encode, 449 type_clust=self.type_clust, 450 type_scaling=self.type_scaling, 451 lags=self.lags, 452 type_pi=self.type_pi, 453 block_size=self.block_size, 454 replications=self.replications, 455 kernel=self.kernel, 456 agg=self.agg, 457 seed=self.seed, 458 backend=self.backend, 459 show_progress=self.show_progress, 460 ), 461 ), 462 ] 463 ) 464 else: # "random_state" in model().get_params().keys() 465 pipe = Pipeline( 466 steps=[ 467 ("preprocessor", preprocessor), 468 ( 469 "regressor", 470 DeepMTS( 471 obj=model(**kwargs), 472 n_layers=self.n_layers, 473 n_hidden_features=self.n_hidden_features, 474 activation_name=self.activation_name, 475 a=self.a, 476 nodes_sim=self.nodes_sim, 477 bias=self.bias, 478 dropout=self.dropout, 479 direct_link=self.direct_link, 480 n_clusters=self.n_clusters, 481 cluster_encode=self.cluster_encode, 482 type_clust=self.type_clust, 483 type_scaling=self.type_scaling, 484 lags=self.lags, 485 type_pi=self.type_pi, 486 block_size=self.block_size, 487 replications=self.replications, 488 kernel=self.kernel, 489 agg=self.agg, 490 seed=self.seed, 491 backend=self.backend, 492 show_progress=self.show_progress, 493 ), 494 ), 495 ] 496 ) 497 498 pipe.fit(X_train, **kwargs) 499 # pipe.fit(X_train, xreg=xreg) 500 501 self.models_[name] = pipe 502 503 if self.h is None: 504 X_pred = pipe["regressor"].predict(h=self.h, **kwargs) 505 else: 506 assert self.h > 0, "h must be > 0" 507 X_pred = pipe["regressor"].predict(h=self.h, **kwargs) 508 509 if (self.replications is not None) or ( 510 self.type_pi == "gaussian" 511 ): 512 rmse = mean_errors( 513 actual=X_test, 514 pred=X_pred, 515 scoring="root_mean_squared_error", 516 per_series=per_series, 517 ) 518 mae = mean_errors( 519 actual=X_test, 520 pred=X_pred, 521 scoring="mean_absolute_error", 522 per_series=per_series, 523 ) 524 mpl = mean_errors( 525 actual=X_test, 526 pred=X_pred, 527 scoring="mean_pinball_loss", 528 per_series=per_series, 529 ) 530 winklerscore = winkler_score( 531 obj=X_pred, 532 actual=X_test, 533 level=95, 534 per_series=per_series, 535 ) 536 coveragecalc = coverage( 537 X_pred, X_test, level=95, per_series=per_series 538 ) 539 else: 540 rmse = mean_errors( 541 actual=X_test, 542 pred=X_pred, 543 scoring="root_mean_squared_error", 544 per_series=per_series, 545 ) 546 mae = mean_errors( 547 actual=X_test, 548 pred=X_pred, 549 scoring="mean_absolute_error", 550 per_series=per_series, 551 ) 552 mpl = mean_errors( 553 actual=X_test, 554 pred=X_pred, 555 scoring="mean_pinball_loss", 556 per_series=per_series, 557 ) 558 559 names.append(name) 560 RMSE.append(rmse) 561 MAE.append(mae) 562 MPL.append(mpl) 563 564 if (self.replications is not None) or ( 565 self.type_pi == "gaussian" 566 ): 567 WINKLERSCORE.append(winklerscore) 568 COVERAGE.append(coveragecalc) 569 TIME.append(time.time() - start) 570 571 if self.custom_metric is not None: 572 try: 573 custom_metric = self.custom_metric(X_test, X_pred) 574 CUSTOM_METRIC.append(custom_metric) 575 except Exception as e: 576 custom_metric = np.iinfo(np.float32).max 577 CUSTOM_METRIC.append(custom_metric) 578 579 if self.verbose > 0: 580 if (self.replications is not None) or ( 581 self.type_pi == "gaussian" 582 ): 583 scores_verbose = { 584 "Model": name, 585 "RMSE": rmse, 586 "MAE": mae, 587 "MPL": mpl, 588 "WINKLERSCORE": winklerscore, 589 "COVERAGE": coveragecalc, 590 "Time taken": time.time() - start, 591 } 592 else: 593 scores_verbose = { 594 "Model": name, 595 "RMSE": rmse, 596 "MAE": mae, 597 "MPL": mpl, 598 "Time taken": time.time() - start, 599 } 600 601 if self.custom_metric is not None: 602 scores_verbose["Custom metric"] = custom_metric 603 604 if self.predictions: 605 predictions[name] = X_pred 606 except Exception as exception: 607 if self.ignore_warnings is False: 608 print(name + " model failed to execute") 609 print(exception) 610 611 else: # no preprocessing 612 for name, model in tqdm(self.regressors): # do parallel exec 613 start = time.time() 614 try: 615 if "random_state" in model().get_params().keys(): 616 pipe = DeepMTS( 617 obj=model(random_state=self.random_state, **kwargs), 618 n_layers=self.n_layers, 619 n_hidden_features=self.n_hidden_features, 620 activation_name=self.activation_name, 621 a=self.a, 622 nodes_sim=self.nodes_sim, 623 bias=self.bias, 624 dropout=self.dropout, 625 direct_link=self.direct_link, 626 n_clusters=self.n_clusters, 627 cluster_encode=self.cluster_encode, 628 type_clust=self.type_clust, 629 type_scaling=self.type_scaling, 630 lags=self.lags, 631 type_pi=self.type_pi, 632 block_size=self.block_size, 633 replications=self.replications, 634 kernel=self.kernel, 635 agg=self.agg, 636 seed=self.seed, 637 backend=self.backend, 638 show_progress=self.show_progress, 639 ) 640 else: 641 pipe = DeepMTS( 642 obj=model(**kwargs), 643 n_layers=self.n_layers, 644 n_hidden_features=self.n_hidden_features, 645 activation_name=self.activation_name, 646 a=self.a, 647 nodes_sim=self.nodes_sim, 648 bias=self.bias, 649 dropout=self.dropout, 650 direct_link=self.direct_link, 651 n_clusters=self.n_clusters, 652 cluster_encode=self.cluster_encode, 653 type_clust=self.type_clust, 654 type_scaling=self.type_scaling, 655 lags=self.lags, 656 type_pi=self.type_pi, 657 block_size=self.block_size, 658 replications=self.replications, 659 kernel=self.kernel, 660 agg=self.agg, 661 seed=self.seed, 662 backend=self.backend, 663 show_progress=self.show_progress, 664 ) 665 666 pipe.fit(X_train, xreg, **kwargs) 667 # pipe.fit(X_train, xreg=xreg) # DO xreg like in `ahead` 668 669 self.models_[name] = pipe 670 671 if self.preprocess is True: 672 if self.h is None: 673 X_pred = pipe["regressor"].predict( 674 h=X_test.shape[0], **kwargs 675 ) 676 else: 677 assert ( 678 self.h > 0 and self.h <= X_test.shape[0] 679 ), "h must be > 0 and < X_test.shape[0]" 680 X_pred = pipe["regressor"].predict( 681 h=self.h, **kwargs 682 ) 683 684 else: 685 if self.h is None: 686 X_pred = pipe.predict( 687 h=X_test.shape[0], 688 **kwargs, 689 # X_pred = pipe.predict(h=X_test.shape[0], new_xreg=new_xreg) ## DO xreg like in `ahead` 690 ) 691 else: 692 assert ( 693 self.h > 0 and self.h <= X_test.shape[0] 694 ), "h must be > 0 and < X_test.shape[0]" 695 X_pred = pipe.predict(h=self.h, **kwargs) 696 697 if self.h is None: 698 if (self.replications is not None) or ( 699 self.type_pi == "gaussian" 700 ): 701 rmse = mean_errors( 702 actual=X_test, 703 pred=X_pred.mean, 704 scoring="root_mean_squared_error", 705 per_series=per_series, 706 ) 707 mae = mean_errors( 708 actual=X_test, 709 pred=X_pred.mean, 710 scoring="mean_absolute_error", 711 per_series=per_series, 712 ) 713 mpl = mean_errors( 714 actual=X_test, 715 pred=X_pred.mean, 716 scoring="mean_pinball_loss", 717 per_series=per_series, 718 ) 719 winklerscore = winkler_score( 720 obj=X_pred, 721 actual=X_test, 722 level=95, 723 per_series=per_series, 724 ) 725 coveragecalc = coverage( 726 X_pred, X_test, level=95, per_series=per_series 727 ) 728 else: # no prediction interval 729 rmse = mean_errors( 730 actual=X_test, 731 pred=X_pred, 732 scoring="root_mean_squared_error", 733 per_series=per_series, 734 ) 735 mae = mean_errors( 736 actual=X_test, 737 pred=X_pred, 738 scoring="mean_absolute_error", 739 per_series=per_series, 740 ) 741 mpl = mean_errors( 742 actual=X_test, 743 pred=X_pred, 744 scoring="mean_pinball_loss", 745 per_series=per_series, 746 ) 747 else: # self.h is not None 748 if (self.replications is not None) or ( 749 self.type_pi == "gaussian" 750 ): 751 if isinstance(X_test, pd.DataFrame): 752 X_test_h = X_test.iloc[0: self.h, :] 753 rmse = mean_errors( 754 actual=X_test_h, 755 pred=X_pred, 756 scoring="root_mean_squared_error", 757 per_series=per_series, 758 ) 759 mae = mean_errors( 760 actual=X_test_h, 761 pred=X_pred, 762 scoring="mean_absolute_error", 763 per_series=per_series, 764 ) 765 mpl = mean_errors( 766 actual=X_test_h, 767 pred=X_pred, 768 scoring="mean_pinball_loss", 769 per_series=per_series, 770 ) 771 winklerscore = winkler_score( 772 obj=X_pred, 773 actual=X_test_h, 774 level=95, 775 per_series=per_series, 776 ) 777 coveragecalc = coverage( 778 X_pred, 779 X_test_h, 780 level=95, 781 per_series=per_series, 782 ) 783 else: 784 X_test_h = X_test[0: self.h, :] 785 rmse = mean_errors( 786 actual=X_test_h, 787 pred=X_pred, 788 scoring="root_mean_squared_error", 789 per_series=per_series, 790 ) 791 mae = mean_errors( 792 actual=X_test_h, 793 pred=X_pred, 794 scoring="mean_absolute_error", 795 per_series=per_series, 796 ) 797 mpl = mean_errors( 798 actual=X_test_h, 799 pred=X_pred, 800 scoring="mean_pinball_loss", 801 per_series=per_series, 802 ) 803 winklerscore = winkler_score( 804 obj=X_pred, 805 actual=X_test_h, 806 level=95, 807 per_series=per_series, 808 ) 809 coveragecalc = coverage( 810 X_pred, 811 X_test_h, 812 level=95, 813 per_series=per_series, 814 ) 815 else: # no prediction interval 816 if isinstance(X_test, pd.DataFrame): 817 X_test_h = X_test.iloc[0: self.h, :] 818 rmse = mean_errors( 819 actual=X_test_h, 820 pred=X_pred, 821 scoring="root_mean_squared_error", 822 per_series=per_series, 823 ) 824 mae = mean_errors( 825 actual=X_test_h, 826 pred=X_pred, 827 scoring="mean_absolute_error", 828 per_series=per_series, 829 ) 830 mpl = mean_errors( 831 actual=X_test_h, 832 pred=X_pred, 833 scoring="mean_pinball_loss", 834 per_series=per_series, 835 ) 836 else: 837 X_test_h = X_test[0: self.h, :] 838 rmse = mean_errors( 839 actual=X_test_h, 840 pred=X_pred, 841 scoring="root_mean_squared_error", 842 per_series=per_series, 843 ) 844 mae = mean_errors( 845 actual=X_test_h, 846 pred=X_pred, 847 scoring="mean_absolute_error", 848 per_series=per_series, 849 ) 850 851 names.append(name) 852 RMSE.append(rmse) 853 MAE.append(mae) 854 MPL.append(mpl) 855 if (self.replications is not None) or ( 856 self.type_pi == "gaussian" 857 ): 858 WINKLERSCORE.append(winklerscore) 859 COVERAGE.append(coveragecalc) 860 TIME.append(time.time() - start) 861 862 if self.custom_metric is not None: 863 try: 864 if self.h is None: 865 custom_metric = self.custom_metric( 866 X_test, X_pred 867 ) 868 else: 869 custom_metric = self.custom_metric( 870 X_test_h, X_pred 871 ) 872 CUSTOM_METRIC.append(custom_metric) 873 except Exception as e: 874 custom_metric = np.iinfo(np.float32).max 875 CUSTOM_METRIC.append(np.iinfo(np.float32).max) 876 877 if self.verbose > 0: 878 if (self.replications is not None) or ( 879 self.type_pi == "gaussian" 880 ): 881 scores_verbose = { 882 "Model": name, 883 "RMSE": rmse, 884 "MAE": mae, 885 "MPL": mpl, 886 "WINKLERSCORE": winklerscore, 887 "COVERAGE": coveragecalc, 888 "Time taken": time.time() - start, 889 } 890 else: 891 scores_verbose = { 892 "Model": name, 893 "RMSE": rmse, 894 "MAE": mae, 895 "MPL": mpl, 896 "Time taken": time.time() - start, 897 } 898 899 if self.custom_metric is not None: 900 scores_verbose["Custom metric"] = custom_metric 901 902 if self.predictions: 903 predictions[name] = X_pred 904 905 except Exception as exception: 906 if self.ignore_warnings is False: 907 print(name + " model failed to execute") 908 print(exception) 909 910 if (self.replications is not None) or (self.type_pi == "gaussian"): 911 scores = { 912 "Model": names, 913 "RMSE": RMSE, 914 "MAE": MAE, 915 "MPL": MPL, 916 "WINKLERSCORE": WINKLERSCORE, 917 "COVERAGE": COVERAGE, 918 "Time Taken": TIME, 919 } 920 else: 921 scores = { 922 "Model": names, 923 "RMSE": RMSE, 924 "MAE": MAE, 925 "MPL": MPL, 926 "Time Taken": TIME, 927 } 928 929 if self.custom_metric is not None: 930 scores["Custom metric"] = CUSTOM_METRIC 931 932 if per_series: 933 scores = dict_to_dataframe_series(scores, self.series_names) 934 else: 935 scores = pd.DataFrame(scores) 936 937 try: # case per_series, can't be sorted 938 scores = scores.sort_values( 939 by=self.sort_by, ascending=True 940 ).set_index("Model") 941 942 self.best_model_ = self.models_[scores.index[0]] 943 except Exception as e: 944 pass 945 946 if self.predictions is True: 947 return scores, predictions 948 949 return scores
Fit Regression algorithms to X_train, predict and score on X_test.
Parameters:
X_train: array-like or data frame,
Training vectors, where rows is the number of samples
and columns is the number of features.
X_test: array-like or data frame,
Testing vectors, where rows is the number of samples
and columns is the number of features.
xreg: array-like, optional (default=None)
Additional (external) regressors to be passed to self.obj
xreg must be in 'increasing' order (most recent observations last)
per_series: bool, optional (default=False)
When set to True, the metrics are computed series by series.
**kwargs: dict, optional (default=None)
Additional parameters to be passed to `fit` method of `obj`.
Returns:
scores: Pandas DataFrame
Returns metrics of all the models in a Pandas DataFrame.
predictions: Pandas DataFrame
Returns predictions of all the models in a Pandas DataFrame.
963 def provide_models(self, X_train, X_test): 964 """ 965 This function returns all the model objects trained in fit function. 966 If fit is not called already, then we call fit and then return the models. 967 968 Parameters: 969 970 X_train : array-like, 971 Training vectors, where rows is the number of samples 972 and columns is the number of features. 973 974 X_test : array-like, 975 Testing vectors, where rows is the number of samples 976 and columns is the number of features. 977 978 Returns: 979 980 models: dict-object, 981 Returns a dictionary with each model pipeline as value 982 with key as name of models. 983 984 """ 985 if self.h is None: 986 if len(self.models_.keys()) == 0: 987 self.fit(X_train, X_test) 988 else: 989 if len(self.models_.keys()) == 0: 990 if isinstance(X_test, pd.DataFrame): 991 self.fit(X_train, X_test.iloc[0: self.h, :]) 992 else: 993 self.fit(X_train, X_test[0: self.h, :]) 994 995 return self.models_
This function returns all the model objects trained in fit function. If fit is not called already, then we call fit and then return the models.
Parameters:
X_train : array-like,
Training vectors, where rows is the number of samples
and columns is the number of features.
X_test : array-like,
Testing vectors, where rows is the number of samples
and columns is the number of features.
Returns:
models: dict-object,
Returns a dictionary with each model pipeline as value
with key as name of models.
10class MLARCH: 11 """Machine Learning-agnostic ARCH for nearly-stationary time series (e.g., returns) 12 13 Parameters 14 ---------- 15 model_mean : object 16 Model for mean component 17 model_sigma : object 18 Model for volatility component (sklearn regressor) 19 model_residuals : object 20 Model for standardized residuals 21 lags_vol : int, default=10 22 Number of lags for squared residuals in volatility model 23 """ 24 25 def __init__(self, model_mean, model_sigma, model_residuals, lags_vol=10): 26 self.model_mean = model_mean 27 self.model_sigma = model_sigma 28 self.model_residuals = model_residuals 29 self.lags_vol = lags_vol 30 31 def _create_lags(self, y, lags): 32 """Create lagged feature matrix""" 33 n = len(y) 34 if n <= lags: 35 raise ValueError(f"Series length {n} must be > lags {lags}") 36 X = np.zeros((n - lags, lags)) 37 for i in range(lags): 38 X[:, i] = y[i: (n - lags + i)] 39 return X 40 41 def fit(self, y, **kwargs): 42 """Fit the MLARCH model 43 44 Parameters 45 ---------- 46 y : array-like 47 Target time series (should be stationary, e.g., returns) 48 49 Returns 50 ------- 51 self 52 """ 53 # Format input 54 if isinstance(y, (pd.Series, pd.DataFrame)): 55 y = y.values 56 y = y.ravel() 57 58 if len(y) < self.lags_vol + 20: 59 raise ValueError(f"Need at least {self.lags_vol + 20} observations") 60 61 # Step 1: Fit mean model 62 self.model_mean.fit(y.reshape(-1, 1)) 63 mean_residuals = self.model_mean.residuals_.ravel() 64 65 # Step 2: Fit ARCH volatility model on lagged squared residuals 66 resid_squared = mean_residuals**2 67 X_vol = self._create_lags(resid_squared, self.lags_vol) 68 y_vol = np.log(resid_squared[self.lags_vol:] + 1e-8) 69 70 self.model_sigma.fit(X_vol, y_vol) 71 72 # Get fitted volatility 73 fitted_log_sigma = self.model_sigma.predict(X_vol) 74 fitted_sigma = np.exp(fitted_log_sigma) 75 76 # Step 3: Compute standardized residuals with proper scaling 77 standardized_residuals = mean_residuals[self.lags_vol:] / np.sqrt( 78 fitted_sigma 79 ) 80 81 # Enforce zero mean and unit variance 82 self.z_mean_ = np.mean(standardized_residuals) 83 self.z_std_ = np.std(standardized_residuals) 84 standardized_residuals = ( 85 standardized_residuals - self.z_mean_ 86 ) / self.z_std_ 87 88 # Step 4: Fit residuals model 89 self.model_residuals.fit(standardized_residuals.reshape(-1, 1)) 90 91 # Store for prediction 92 self.last_residuals_squared_ = resid_squared[-self.lags_vol:] 93 94 # Store diagnostics 95 self.fitted_volatility_mean_ = np.mean(np.sqrt(fitted_sigma)) 96 self.fitted_volatility_std_ = np.std(np.sqrt(fitted_sigma)) 97 98 return self 99 100 def predict(self, h=5, level=95, return_sims=False): 101 """Predict future values 102 103 Parameters 104 ---------- 105 h : int 106 Forecast horizon 107 level : int 108 Confidence level for prediction intervals 109 return_sims : bool 110 If True, return full simulation paths 111 112 Returns 113 ------- 114 DescribeResult 115 Named tuple with mean, sims, lower, upper 116 """ 117 DescribeResult = namedtuple( 118 "DescribeResult", ("mean", "sims", "lower", "upper") 119 ) 120 121 # Get mean forecast 122 mean_forecast = self.model_mean.predict(h=h).values.ravel() 123 124 # Recursive ARCH volatility forecasting 125 sigma_forecast = np.zeros(h) 126 current_lags = self.last_residuals_squared_.copy() 127 128 for i in range(h): 129 X_t = current_lags.reshape(1, -1) 130 log_sigma_t = self.model_sigma.predict(X_t)[0] 131 sigma_forecast[i] = np.exp(log_sigma_t) 132 # Update lags with predicted variance 133 current_lags = np.append(current_lags[1:], sigma_forecast[i]) 134 135 # Predict standardized residuals and rescale 136 z_forecast_normalized = self.model_residuals.predict(h=h).values.ravel() 137 z_forecast = z_forecast_normalized * self.z_std_ + self.z_mean_ 138 139 # Combine: μ + z × σ 140 point_forecast = mean_forecast + z_forecast * np.sqrt(sigma_forecast) 141 142 # Generate prediction intervals 143 sims = None 144 if return_sims: 145 preds_z_for_sims = self.model_residuals.predict(h=h) 146 if hasattr(preds_z_for_sims, "sims") and isinstance( 147 preds_z_for_sims.sims, pd.DataFrame 148 ): 149 sims_z_normalized = preds_z_for_sims.sims 150 n_sims = sims_z_normalized.shape[1] 151 152 sims = np.zeros((h, n_sims)) 153 for sim_idx in range(n_sims): 154 # Rescale simulations 155 z_sim = ( 156 sims_z_normalized.iloc[:, sim_idx].values * self.z_std_ 157 + self.z_mean_ 158 ) 159 sims[:, sim_idx] = mean_forecast + z_sim * np.sqrt( 160 sigma_forecast 161 ) 162 163 alpha = 1 - level / 100 164 lower_bound = np.quantile(sims, alpha / 2, axis=1) 165 upper_bound = np.quantile(sims, 1 - alpha / 2, axis=1) 166 else: 167 # Fallback to Gaussian 168 z_score = norm.ppf(1 - (1 - level / 100) / 2) 169 margin = z_score * np.sqrt(sigma_forecast) * self.z_std_ 170 lower_bound = point_forecast - margin 171 upper_bound = point_forecast + margin 172 else: 173 # Gaussian intervals with proper scaling 174 z_score = norm.ppf(1 - (1 - level / 100) / 2) 175 margin = z_score * np.sqrt(sigma_forecast) * self.z_std_ 176 lower_bound = point_forecast - margin 177 upper_bound = point_forecast + margin 178 179 return DescribeResult(point_forecast, sims, lower_bound, upper_bound)
Machine Learning-agnostic ARCH for nearly-stationary time series (e.g., returns)
Parameters
model_mean : object Model for mean component model_sigma : object Model for volatility component (sklearn regressor) model_residuals : object Model for standardized residuals lags_vol : int, default=10 Number of lags for squared residuals in volatility model
41 def fit(self, y, **kwargs): 42 """Fit the MLARCH model 43 44 Parameters 45 ---------- 46 y : array-like 47 Target time series (should be stationary, e.g., returns) 48 49 Returns 50 ------- 51 self 52 """ 53 # Format input 54 if isinstance(y, (pd.Series, pd.DataFrame)): 55 y = y.values 56 y = y.ravel() 57 58 if len(y) < self.lags_vol + 20: 59 raise ValueError(f"Need at least {self.lags_vol + 20} observations") 60 61 # Step 1: Fit mean model 62 self.model_mean.fit(y.reshape(-1, 1)) 63 mean_residuals = self.model_mean.residuals_.ravel() 64 65 # Step 2: Fit ARCH volatility model on lagged squared residuals 66 resid_squared = mean_residuals**2 67 X_vol = self._create_lags(resid_squared, self.lags_vol) 68 y_vol = np.log(resid_squared[self.lags_vol:] + 1e-8) 69 70 self.model_sigma.fit(X_vol, y_vol) 71 72 # Get fitted volatility 73 fitted_log_sigma = self.model_sigma.predict(X_vol) 74 fitted_sigma = np.exp(fitted_log_sigma) 75 76 # Step 3: Compute standardized residuals with proper scaling 77 standardized_residuals = mean_residuals[self.lags_vol:] / np.sqrt( 78 fitted_sigma 79 ) 80 81 # Enforce zero mean and unit variance 82 self.z_mean_ = np.mean(standardized_residuals) 83 self.z_std_ = np.std(standardized_residuals) 84 standardized_residuals = ( 85 standardized_residuals - self.z_mean_ 86 ) / self.z_std_ 87 88 # Step 4: Fit residuals model 89 self.model_residuals.fit(standardized_residuals.reshape(-1, 1)) 90 91 # Store for prediction 92 self.last_residuals_squared_ = resid_squared[-self.lags_vol:] 93 94 # Store diagnostics 95 self.fitted_volatility_mean_ = np.mean(np.sqrt(fitted_sigma)) 96 self.fitted_volatility_std_ = np.std(np.sqrt(fitted_sigma)) 97 98 return self
Fit the MLARCH model
Parameters
y : array-like Target time series (should be stationary, e.g., returns)
Returns
self
100 def predict(self, h=5, level=95, return_sims=False): 101 """Predict future values 102 103 Parameters 104 ---------- 105 h : int 106 Forecast horizon 107 level : int 108 Confidence level for prediction intervals 109 return_sims : bool 110 If True, return full simulation paths 111 112 Returns 113 ------- 114 DescribeResult 115 Named tuple with mean, sims, lower, upper 116 """ 117 DescribeResult = namedtuple( 118 "DescribeResult", ("mean", "sims", "lower", "upper") 119 ) 120 121 # Get mean forecast 122 mean_forecast = self.model_mean.predict(h=h).values.ravel() 123 124 # Recursive ARCH volatility forecasting 125 sigma_forecast = np.zeros(h) 126 current_lags = self.last_residuals_squared_.copy() 127 128 for i in range(h): 129 X_t = current_lags.reshape(1, -1) 130 log_sigma_t = self.model_sigma.predict(X_t)[0] 131 sigma_forecast[i] = np.exp(log_sigma_t) 132 # Update lags with predicted variance 133 current_lags = np.append(current_lags[1:], sigma_forecast[i]) 134 135 # Predict standardized residuals and rescale 136 z_forecast_normalized = self.model_residuals.predict(h=h).values.ravel() 137 z_forecast = z_forecast_normalized * self.z_std_ + self.z_mean_ 138 139 # Combine: μ + z × σ 140 point_forecast = mean_forecast + z_forecast * np.sqrt(sigma_forecast) 141 142 # Generate prediction intervals 143 sims = None 144 if return_sims: 145 preds_z_for_sims = self.model_residuals.predict(h=h) 146 if hasattr(preds_z_for_sims, "sims") and isinstance( 147 preds_z_for_sims.sims, pd.DataFrame 148 ): 149 sims_z_normalized = preds_z_for_sims.sims 150 n_sims = sims_z_normalized.shape[1] 151 152 sims = np.zeros((h, n_sims)) 153 for sim_idx in range(n_sims): 154 # Rescale simulations 155 z_sim = ( 156 sims_z_normalized.iloc[:, sim_idx].values * self.z_std_ 157 + self.z_mean_ 158 ) 159 sims[:, sim_idx] = mean_forecast + z_sim * np.sqrt( 160 sigma_forecast 161 ) 162 163 alpha = 1 - level / 100 164 lower_bound = np.quantile(sims, alpha / 2, axis=1) 165 upper_bound = np.quantile(sims, 1 - alpha / 2, axis=1) 166 else: 167 # Fallback to Gaussian 168 z_score = norm.ppf(1 - (1 - level / 100) / 2) 169 margin = z_score * np.sqrt(sigma_forecast) * self.z_std_ 170 lower_bound = point_forecast - margin 171 upper_bound = point_forecast + margin 172 else: 173 # Gaussian intervals with proper scaling 174 z_score = norm.ppf(1 - (1 - level / 100) / 2) 175 margin = z_score * np.sqrt(sigma_forecast) * self.z_std_ 176 lower_bound = point_forecast - margin 177 upper_bound = point_forecast + margin 178 179 return DescribeResult(point_forecast, sims, lower_bound, upper_bound)
Predict future values
Parameters
h : int Forecast horizon level : int Confidence level for prediction intervals return_sims : bool If True, return full simulation paths
Returns
DescribeResult Named tuple with mean, sims, lower, upper
6class MedianVotingRegressor(VotingRegressor): 7 def predict(self, X): 8 """ 9 Predict using the median of the base regressors' predictions. 10 11 Parameters: 12 X (array-like): Feature matrix for predictions. 13 14 Returns: 15 y_pred (array): Median of predictions from the base regressors. 16 """ 17 predictions = np.asarray( 18 [regressor.predict(X) for regressor in self.estimators_] 19 ) 20 return np.median(predictions, axis=0)
Prediction voting regressor for unfitted estimators.
A voting regressor is an ensemble meta-estimator that fits several base regressors, each on the whole dataset. Then it averages the individual predictions to form a final prediction.
For a detailed example, refer to
:ref:sphx_glr_auto_examples_ensemble_plot_voting_regressor.py.
Read more in the :ref:User Guide <voting_regressor>.
New in version 0.21.
Parameters
estimators : list of (str, estimator) tuples
Invoking the fit method on the VotingRegressor will fit clones
of those original estimators that will be stored in the class attribute
self.estimators_. An estimator can be set to 'drop' using
set_params().
*Changed in version 0.21:*
``'drop'`` is accepted. Using None was deprecated in 0.22 and
support was removed in 0.24.
weights : array-like of shape (n_regressors,), default=None
Sequence of weights (float or int) to weight the occurrences of
predicted values before averaging. Uses uniform weights if None.
n_jobs : int, default=None
The number of jobs to run in parallel for fit.
None means 1 unless in a joblib.parallel_backend context.
-1 means using all processors. See :term:Glossary <n_jobs>
for more details.
verbose : bool, default=False If True, the time elapsed while fitting will be printed as it is completed.
*New in version 0.23.*
Attributes
estimators_ : list of regressors
The collection of fitted sub-estimators as defined in estimators
that are not 'drop'.
named_estimators_ : ~sklearn.utils.Bunch
Attribute to access any fitted sub-estimators by name.
*New in version 0.20.*
n_features_in_ : int
Number of features seen during :term:fit. Only defined if the
underlying regressor exposes such an attribute when fit.
*New in version 0.24.*
feature_names_in_ : ndarray of shape (n_features_in_,)
Names of features seen during :term:fit. Only defined if the
underlying estimators expose such an attribute when fit.
*New in version 1.0.*
See Also
VotingClassifier : Soft Voting/Majority Rule classifier.
Examples
>>> import numpy as np
>>> from sklearn.linear_model import LinearRegression
>>> from sklearn.ensemble import RandomForestRegressor
>>> from sklearn.ensemble import VotingRegressor
>>> from sklearn.neighbors import KNeighborsRegressor
>>> r1 = LinearRegression()
>>> r2 = RandomForestRegressor(n_estimators=10, random_state=1)
>>> r3 = KNeighborsRegressor()
>>> X = np.array([[1, 1], [2, 4], [3, 9], [4, 16], [5, 25], [6, 36]])
>>> y = np.array([2, 6, 12, 20, 30, 42])
>>> er = VotingRegressor([('lr', r1), ('rf', r2), ('r3', r3)])
>>> print(er.fit(X, y).predict(X))
[ 6.8 8.4 12.5 17.8 26 34]
In the following example, we drop the 'lr' estimator with
~VotingRegressor.set_params() and fit the remaining two estimators:
>>> er = er.set_params(lr='drop')
>>> er = er.fit(X, y)
>>> len(er.estimators_)
2
7 def predict(self, X): 8 """ 9 Predict using the median of the base regressors' predictions. 10 11 Parameters: 12 X (array-like): Feature matrix for predictions. 13 14 Returns: 15 y_pred (array): Median of predictions from the base regressors. 16 """ 17 predictions = np.asarray( 18 [regressor.predict(X) for regressor in self.estimators_] 19 ) 20 return np.median(predictions, axis=0)
Predict using the median of the base regressors' predictions.
Parameters: X (array-like): Feature matrix for predictions.
Returns: y_pred (array): Median of predictions from the base regressors.
31class MTS(Base): 32 """Univariate and multivariate time series (MTS) forecasting with Quasi-Randomized networks 33 34 Parameters: 35 36 obj: object. 37 any object containing a method fit (obj.fit()) and a method predict 38 (obj.predict()). 39 40 n_hidden_features: int. 41 number of nodes in the hidden layer. 42 43 activation_name: str. 44 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'. 45 46 a: float. 47 hyperparameter for 'prelu' or 'elu' activation function. 48 49 nodes_sim: str. 50 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 51 'uniform'. 52 53 bias: boolean. 54 indicates if the hidden layer contains a bias term (True) or not 55 (False). 56 57 dropout: float. 58 regularization parameter; (random) percentage of nodes dropped out 59 of the training. 60 61 direct_link: boolean. 62 indicates if the original predictors are included (True) in model's fitting or not (False). 63 64 n_clusters: int. 65 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering). 66 67 cluster_encode: bool. 68 defines how the variable containing clusters is treated (default is one-hot) 69 if `False`, then labels are used, without one-hot encoding. 70 71 type_clust: str. 72 type of clustering method: currently k-means ('kmeans') or Gaussian 73 Mixture Model ('gmm'). 74 75 type_scaling: a tuple of 3 strings. 76 scaling methods for inputs, hidden layer, and clustering respectively 77 (and when relevant). 78 Currently available: standardization ('std') or MinMax scaling ('minmax'). 79 80 lags: int. 81 number of lags used for each time series. 82 If string, lags must be one of 'AIC', 'AICc', or 'BIC'. 83 84 type_pi: str. 85 type of prediction interval; currently: 86 - "gaussian": simple, fast, but: assumes stationarity of Gaussian in-sample residuals and independence in the multivariate case 87 - "quantile": use model-agnostic quantile regression under the hood 88 - "kde": based on Kernel Density Estimation of in-sample residuals 89 - "bootstrap": based on independent bootstrap of in-sample residuals 90 - "block-bootstrap": based on basic block bootstrap of in-sample residuals 91 - "scp-kde": Sequential split conformal prediction with Kernel Density Estimation of calibrated residuals 92 - "scp-bootstrap": Sequential split conformal prediction with independent bootstrap of calibrated residuals 93 - "scp-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of calibrated residuals 94 - "scp2-kde": Sequential split conformal prediction with Kernel Density Estimation of standardized calibrated residuals 95 - "scp2-bootstrap": Sequential split conformal prediction with independent bootstrap of standardized calibrated residuals 96 - "scp2-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of standardized calibrated residuals 97 - based on copulas of in-sample residuals: 'vine-tll', 'vine-bb1', 'vine-bb6', 'vine-bb7', 'vine-bb8', 'vine-clayton', 98 'vine-frank', 'vine-gaussian', 'vine-gumbel', 'vine-indep', 'vine-joe', 'vine-student' 99 - 'scp-vine-tll', 'scp-vine-bb1', 'scp-vine-bb6', 'scp-vine-bb7', 'scp-vine-bb8', 'scp-vine-clayton', 100 'scp-vine-frank', 'scp-vine-gaussian', 'scp-vine-gumbel', 'scp-vine-indep', 'scp-vine-joe', 'scp-vine-student' 101 - 'scp2-vine-tll', 'scp2-vine-bb1', 'scp2-vine-bb6', 'scp2-vine-bb7', 'scp2-vine-bb8', 'scp2-vine-clayton', 102 'scp2-vine-frank', 'scp2-vine-gaussian', 'scp2-vine-gumbel', 'scp2-vine-indep', 'scp2-vine-joe', 'scp2-vine-student' 103 104 level: int. 105 level of confidence for `type_pi == 'quantile'` (default is `95`) 106 107 block_size: int. 108 size of block for 'type_pi' in ("block-bootstrap", "scp-block-bootstrap", "scp2-block-bootstrap"). 109 Default is round(3.15*(n_residuals^1/3)) 110 111 replications: int. 112 number of replications (if needed, for predictive simulation). Default is 'None'. 113 114 kernel: str. 115 the kernel to use for residuals density estimation (used for predictive simulation). Currently, either 'gaussian' or 'tophat'. 116 117 agg: str. 118 either "mean" or "median" for simulation of bootstrap aggregating 119 120 seed: int. 121 reproducibility seed for nodes_sim=='uniform' or predictive simulation. 122 123 backend: str. 124 "cpu" or "gpu" or "tpu". 125 126 verbose: int. 127 0: not printing; 1: printing 128 129 show_progress: bool. 130 True: progress bar when fitting each series; False: no progress bar when fitting each series 131 132 Attributes: 133 134 fit_objs_: dict 135 objects adjusted to each individual time series 136 137 y_: {array-like} 138 MTS responses (most recent observations first) 139 140 X_: {array-like} 141 MTS lags 142 143 xreg_: {array-like} 144 external regressors 145 146 y_means_: dict 147 a dictionary of each series mean values 148 149 preds_: {array-like} 150 successive model predictions 151 152 preds_std_: {array-like} 153 standard deviation around the predictions for Bayesian base learners (`obj`) 154 155 gaussian_preds_std_: {array-like} 156 standard deviation around the predictions for `type_pi='gaussian'` 157 158 return_std_: boolean 159 return uncertainty or not (set in predict) 160 161 df_: data frame 162 the input data frame, in case a data.frame is provided to `fit` 163 164 n_obs_: int 165 number of time series observations (number of rows for multivariate) 166 167 level_: int 168 level of confidence for prediction intervals (default is 95) 169 170 residuals_: {array-like} 171 in-sample residuals (for `type_pi` not conformal prediction) or calibrated residuals 172 (for `type_pi` in conformal prediction) 173 174 residuals_sims_: tuple of {array-like} 175 simulations of in-sample residuals (for `type_pi` not conformal prediction) or 176 calibrated residuals (for `type_pi` in conformal prediction) 177 178 kde_: A scikit-learn object, see https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KernelDensity.html 179 180 residuals_std_dev_: residuals standard deviation 181 182 Examples: 183 184 Example 1: 185 186 ```python 187 import nnetsauce as ns 188 import numpy as np 189 from sklearn import linear_model 190 np.random.seed(123) 191 192 M = np.random.rand(10, 3) 193 M[:,0] = 10*M[:,0] 194 M[:,2] = 25*M[:,2] 195 print(M) 196 197 # Adjust Bayesian Ridge 198 regr4 = linear_model.BayesianRidge() 199 obj_MTS = ns.MTS(regr4, lags = 1, n_hidden_features=5) 200 obj_MTS.fit(M) 201 print(obj_MTS.predict()) 202 203 # with credible intervals 204 print(obj_MTS.predict(return_std=True, level=80)) 205 206 print(obj_MTS.predict(return_std=True, level=95)) 207 ``` 208 209 Example 2: 210 211 ```python 212 import nnetsauce as ns 213 import numpy as np 214 from sklearn import linear_model 215 216 dataset = { 217 'date' : ['2001-01-01', '2002-01-01', '2003-01-01', '2004-01-01', '2005-01-01'], 218 'series1' : [34, 30, 35.6, 33.3, 38.1], 219 'series2' : [4, 5.5, 5.6, 6.3, 5.1], 220 'series3' : [100, 100.5, 100.6, 100.2, 100.1]} 221 df = pd.DataFrame(dataset).set_index('date') 222 print(df) 223 224 # Adjust Bayesian Ridge 225 regr5 = linear_model.BayesianRidge() 226 obj_MTS = ns.MTS(regr5, lags = 1, n_hidden_features=5) 227 obj_MTS.fit(df) 228 print(obj_MTS.predict()) 229 230 # with credible intervals 231 print(obj_MTS.predict(return_std=True, level=80)) 232 233 print(obj_MTS.predict(return_std=True, level=95)) 234 ``` 235 """ 236 237 # construct the object ----- 238 239 def __init__( 240 self, 241 obj, 242 n_hidden_features=5, 243 activation_name="relu", 244 a=0.01, 245 nodes_sim="sobol", 246 bias=True, 247 dropout=0, 248 direct_link=True, 249 n_clusters=2, 250 cluster_encode=True, 251 type_clust="kmeans", 252 type_scaling=("std", "std", "std"), 253 lags=1, 254 type_pi="kde", 255 level=95, 256 block_size=None, 257 replications=None, 258 kernel="gaussian", 259 agg="mean", 260 seed=123, 261 backend="cpu", 262 verbose=0, 263 show_progress=True, 264 ): 265 super().__init__( 266 n_hidden_features=n_hidden_features, 267 activation_name=activation_name, 268 a=a, 269 nodes_sim=nodes_sim, 270 bias=bias, 271 dropout=dropout, 272 direct_link=direct_link, 273 n_clusters=n_clusters, 274 cluster_encode=cluster_encode, 275 type_clust=type_clust, 276 type_scaling=type_scaling, 277 seed=seed, 278 backend=backend, 279 ) 280 281 # Add validation for lags parameter 282 if isinstance(lags, str): 283 assert lags in ( 284 "AIC", 285 "AICc", 286 "BIC", 287 ), "if string, lags must be one of 'AIC', 'AICc', or 'BIC'" 288 else: 289 assert ( 290 int(lags) == lags 291 ), "if numeric, lags parameter should be an integer" 292 293 self.obj = obj 294 self.n_series = None 295 self.lags = lags 296 self.type_pi = type_pi 297 self.level = level 298 if self.type_pi == "quantile": 299 self.obj = QuantileRegressor( 300 self.obj, level=self.level, scoring="conformal" 301 ) 302 self.block_size = block_size 303 self.replications = replications 304 self.kernel = kernel 305 self.agg = agg 306 self.verbose = verbose 307 self.show_progress = show_progress 308 self.series_names = ["series0"] 309 self.input_dates = None 310 self.quantiles = None 311 self.fit_objs_ = {} 312 self.y_ = None # MTS responses (most recent observations first) 313 self.X_ = None # MTS lags 314 self.xreg_ = None 315 self.y_means_ = {} 316 self.mean_ = None 317 self.median_ = None 318 self.upper_ = None 319 self.lower_ = None 320 self.output_dates_ = None 321 self.preds_std_ = [] 322 self.gaussian_preds_std_ = None 323 self.alpha_ = None 324 self.return_std_ = None 325 self.df_ = None 326 self.residuals_ = [] 327 self.abs_calib_residuals_ = None 328 self.calib_residuals_quantile_ = None 329 self.residuals_sims_ = None 330 self.kde_ = None 331 self.sims_ = None 332 self.residuals_std_dev_ = None 333 self.n_obs_ = None 334 self.level_ = None 335 self.init_n_series_ = None 336 337 def fit(self, X, xreg=None, **kwargs): 338 """Fit MTS model to training data X, with optional regressors xreg 339 340 Parameters: 341 342 X: {array-like}, shape = [n_samples, n_features] 343 Training time series, where n_samples is the number 344 of samples and n_features is the number of features; 345 X must be in increasing order (most recent observations last) 346 347 xreg: {array-like}, shape = [n_samples, n_features_xreg] 348 Additional (external) regressors to be passed to self.obj 349 xreg must be in 'increasing' order (most recent observations last) 350 351 **kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity) 352 353 Returns: 354 355 self: object 356 """ 357 try: 358 self.init_n_series_ = X.shape[1] 359 except IndexError as e: 360 self.init_n_series_ = 1 361 362 # Automatic lag selection if requested 363 if isinstance(self.lags, str): 364 max_lags = min(25, X.shape[0] // 4) 365 best_ic = float("inf") 366 best_lags = 1 367 368 if self.verbose: 369 print( 370 f"\nSelecting optimal number of lags using {self.lags}..." 371 ) 372 iterator = tqdm(range(1, max_lags + 1)) 373 else: 374 iterator = range(1, max_lags + 1) 375 376 for lag in iterator: 377 # Convert DataFrame to numpy array before reversing 378 if isinstance(X, pd.DataFrame): 379 X_values = X.values[::-1] 380 else: 381 X_values = X[::-1] 382 383 # Try current lag value 384 if self.init_n_series_ > 1: 385 mts_input = ts.create_train_inputs(X_values, lag) 386 else: 387 mts_input = ts.create_train_inputs( 388 X_values.reshape(-1, 1), lag 389 ) 390 391 # Cook training set and fit model 392 dummy_y, scaled_Z = self.cook_training_set( 393 y=np.ones(mts_input[0].shape[0]), X=mts_input[1] 394 ) 395 residuals_ = [] 396 397 for i in range(self.init_n_series_): 398 y_mean = np.mean(mts_input[0][:, i]) 399 centered_y_i = mts_input[0][:, i] - y_mean 400 self.obj.fit(X=scaled_Z, y=centered_y_i) 401 residuals_.append( 402 (centered_y_i - self.obj.predict(scaled_Z)).tolist() 403 ) 404 405 self.residuals_ = np.asarray(residuals_).T 406 ic = self._compute_information_criterion( 407 curr_lags=lag, criterion=self.lags 408 ) 409 410 if self.verbose: 411 print(f"Trying lags={lag}, {self.lags}={ic:.2f}") 412 413 if ic < best_ic: 414 best_ic = ic 415 best_lags = lag 416 417 if self.verbose: 418 print( 419 f"\nSelected {best_lags} lags with {self.lags}={best_ic:.2f}" 420 ) 421 422 self.lags = best_lags 423 424 self.input_dates = None 425 self.df_ = None 426 427 if isinstance(X, pd.DataFrame) is False: 428 # input data set is a numpy array 429 if xreg is None: 430 X = pd.DataFrame(X) 431 self.series_names = [ 432 "series" + str(i) for i in range(X.shape[1]) 433 ] 434 else: 435 # xreg is not None 436 X = mo.cbind(X, xreg) 437 self.xreg_ = xreg 438 439 else: # input data set is a DataFrame with column names 440 X_index = None 441 if X.index is not None: 442 X_index = X.index 443 if xreg is None: 444 X = copy.deepcopy(mo.convert_df_to_numeric(X)) 445 else: 446 X = copy.deepcopy(mo.cbind(mo.convert_df_to_numeric(X), xreg)) 447 self.xreg_ = xreg 448 if X_index is not None: 449 X.index = X_index 450 self.series_names = X.columns.tolist() 451 452 if isinstance(X, pd.DataFrame): 453 if self.df_ is None: 454 self.df_ = X 455 X = X.values 456 else: 457 input_dates_prev = pd.DatetimeIndex(self.df_.index.values) 458 frequency = pd.infer_freq(input_dates_prev) 459 self.df_ = pd.concat([self.df_, X], axis=0) 460 self.input_dates = pd.date_range( 461 start=input_dates_prev[0], 462 periods=len(input_dates_prev) + X.shape[0], 463 freq=frequency, 464 ).values.tolist() 465 self.df_.index = self.input_dates 466 X = self.df_.values 467 self.df_.columns = self.series_names 468 else: 469 if self.df_ is None: 470 self.df_ = pd.DataFrame(X, columns=self.series_names) 471 else: 472 self.df_ = pd.concat( 473 [self.df_, pd.DataFrame(X, columns=self.series_names)], 474 axis=0, 475 ) 476 477 self.input_dates = ts.compute_input_dates(self.df_) 478 479 try: 480 # multivariate time series 481 n, p = X.shape 482 except: 483 # univariate time series 484 n = X.shape[0] 485 p = 1 486 self.n_obs_ = n 487 488 rep_1_n = np.repeat(1, n) 489 490 self.y_ = None 491 self.X_ = None 492 self.n_series = p 493 self.fit_objs_.clear() 494 self.y_means_.clear() 495 residuals_ = [] 496 self.residuals_ = None 497 self.residuals_sims_ = None 498 self.kde_ = None 499 self.sims_ = None 500 self.scaled_Z_ = None 501 self.centered_y_is_ = [] 502 503 if self.init_n_series_ > 1: 504 # multivariate time series 505 mts_input = ts.create_train_inputs(X[::-1], self.lags) 506 else: 507 # univariate time series 508 mts_input = ts.create_train_inputs( 509 X.reshape(-1, 1)[::-1], self.lags 510 ) 511 512 self.y_ = mts_input[0] 513 514 self.X_ = mts_input[1] 515 516 dummy_y, scaled_Z = self.cook_training_set(y=rep_1_n, X=self.X_) 517 518 self.scaled_Z_ = scaled_Z 519 520 # loop on all the time series and adjust self.obj.fit 521 if self.verbose > 0: 522 print( 523 f"\n Adjusting {type(self.obj).__name__} to multivariate time series... \n" 524 ) 525 526 if self.show_progress is True: 527 iterator = tqdm(range(self.init_n_series_)) 528 else: 529 iterator = range(self.init_n_series_) 530 531 if self.type_pi in ( 532 "gaussian", 533 "kde", 534 "bootstrap", 535 "block-bootstrap", 536 ) or self.type_pi.startswith("vine"): 537 for i in iterator: 538 y_mean = np.mean(self.y_[:, i]) 539 self.y_means_[i] = y_mean 540 centered_y_i = self.y_[:, i] - y_mean 541 self.centered_y_is_.append(centered_y_i) 542 self.obj.fit(X=scaled_Z, y=centered_y_i) 543 self.fit_objs_[i] = deepcopy(self.obj) 544 residuals_.append( 545 ( 546 centered_y_i - self.fit_objs_[i].predict(scaled_Z) 547 ).tolist() 548 ) 549 550 if self.type_pi == "quantile": 551 for i in iterator: 552 y_mean = np.mean(self.y_[:, i]) 553 self.y_means_[i] = y_mean 554 centered_y_i = self.y_[:, i] - y_mean 555 self.centered_y_is_.append(centered_y_i) 556 self.obj.fit(X=scaled_Z, y=centered_y_i) 557 self.fit_objs_[i] = deepcopy(self.obj) 558 559 if self.type_pi.startswith("scp"): 560 # split conformal prediction 561 for i in iterator: 562 n_y = self.y_.shape[0] 563 n_y_half = n_y // 2 564 first_half_idx = range(0, n_y_half) 565 second_half_idx = range(n_y_half, n_y) 566 y_mean_temp = np.mean(self.y_[first_half_idx, i]) 567 centered_y_i_temp = self.y_[first_half_idx, i] - y_mean_temp 568 self.obj.fit(X=scaled_Z[first_half_idx, :], y=centered_y_i_temp) 569 # calibrated residuals actually 570 residuals_.append( 571 ( 572 self.y_[second_half_idx, i] 573 - ( 574 y_mean_temp 575 + self.obj.predict(scaled_Z[second_half_idx, :]) 576 ) 577 ).tolist() 578 ) 579 # fit on the second half 580 y_mean = np.mean(self.y_[second_half_idx, i]) 581 self.y_means_[i] = y_mean 582 centered_y_i = self.y_[second_half_idx, i] - y_mean 583 self.obj.fit(X=scaled_Z[second_half_idx, :], y=centered_y_i) 584 self.fit_objs_[i] = deepcopy(self.obj) 585 586 self.residuals_ = np.asarray(residuals_).T 587 588 if self.type_pi == "gaussian": 589 self.gaussian_preds_std_ = np.std(self.residuals_, axis=0) 590 591 if self.type_pi.startswith("scp2"): 592 # Calculate mean and standard deviation for each column 593 data_mean = np.mean(self.residuals_, axis=0) 594 self.residuals_std_dev_ = np.std(self.residuals_, axis=0) 595 # Center and scale the array using broadcasting 596 self.residuals_ = ( 597 self.residuals_ - data_mean[np.newaxis, :] 598 ) / self.residuals_std_dev_[np.newaxis, :] 599 600 if self.replications != None and "kde" in self.type_pi: 601 if self.verbose > 0: 602 print(f"\n Simulate residuals using {self.kernel} kernel... \n") 603 assert self.kernel in ( 604 "gaussian", 605 "tophat", 606 ), "currently, 'kernel' must be either 'gaussian' or 'tophat'" 607 kernel_bandwidths = {"bandwidth": np.logspace(-6, 6, 150)} 608 grid = GridSearchCV( 609 KernelDensity(kernel=self.kernel, **kwargs), 610 param_grid=kernel_bandwidths, 611 ) 612 grid.fit(self.residuals_) 613 614 if self.verbose > 0: 615 print( 616 f"\n Best parameters for {self.kernel} kernel: {grid.best_params_} \n" 617 ) 618 619 self.kde_ = grid.best_estimator_ 620 621 return self 622 623 def partial_fit(self, X, xreg=None, **kwargs): 624 """partial_fit MTS model to training data X, with optional regressors xreg 625 626 Parameters: 627 628 X: {array-like}, shape = [n_samples, n_features] 629 Training time series, where n_samples is the number 630 of samples and n_features is the number of features; 631 X must be in increasing order (most recent observations last) 632 633 xreg: {array-like}, shape = [n_samples, n_features_xreg] 634 Additional (external) regressors to be passed to self.obj 635 xreg must be in 'increasing' order (most recent observations last) 636 637 **kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity) 638 639 Returns: 640 641 self: object 642 """ 643 try: 644 self.init_n_series_ = X.shape[1] 645 except IndexError as e: 646 self.init_n_series_ = 1 647 648 # Automatic lag selection if requested 649 if isinstance(self.lags, str): 650 max_lags = min(25, X.shape[0] // 4) 651 best_ic = float("inf") 652 best_lags = 1 653 654 if self.verbose: 655 print( 656 f"\nSelecting optimal number of lags using {self.lags}..." 657 ) 658 iterator = tqdm(range(1, max_lags + 1)) 659 else: 660 iterator = range(1, max_lags + 1) 661 662 for lag in iterator: 663 # Convert DataFrame to numpy array before reversing 664 if isinstance(X, pd.DataFrame): 665 X_values = X.values[::-1] 666 else: 667 X_values = X[::-1] 668 669 # Try current lag value 670 if self.init_n_series_ > 1: 671 mts_input = ts.create_train_inputs(X_values, lag) 672 else: 673 mts_input = ts.create_train_inputs( 674 X_values.reshape(-1, 1), lag 675 ) 676 677 # Cook training set and partial_fit model 678 dummy_y, scaled_Z = self.cook_training_set( 679 y=np.ones(mts_input[0].shape[0]), X=mts_input[1] 680 ) 681 residuals_ = [] 682 683 for i in range(self.init_n_series_): 684 y_mean = np.mean(mts_input[0][:, i]) 685 centered_y_i = mts_input[0][:, i] - y_mean 686 self.obj.partial_fit(X=scaled_Z, y=centered_y_i) 687 residuals_.append( 688 (centered_y_i - self.obj.predict(scaled_Z)).tolist() 689 ) 690 691 self.residuals_ = np.asarray(residuals_).T 692 ic = self._compute_information_criterion( 693 curr_lags=lag, criterion=self.lags 694 ) 695 696 if self.verbose: 697 print(f"Trying lags={lag}, {self.lags}={ic:.2f}") 698 699 if ic < best_ic: 700 best_ic = ic 701 best_lags = lag 702 703 if self.verbose: 704 print( 705 f"\nSelected {best_lags} lags with {self.lags}={best_ic:.2f}" 706 ) 707 708 self.lags = best_lags 709 710 self.input_dates = None 711 self.df_ = None 712 713 if isinstance(X, pd.DataFrame) is False: 714 # input data set is a numpy array 715 if xreg is None: 716 X = pd.DataFrame(X) 717 if len(X.shape) > 1: 718 self.series_names = [ 719 "series" + str(i) for i in range(X.shape[1]) 720 ] 721 else: 722 self.series_names = ["series0"] 723 else: 724 # xreg is not None 725 X = mo.cbind(X, xreg) 726 self.xreg_ = xreg 727 728 else: # input data set is a DataFrame with column names 729 X_index = None 730 if X.index is not None: 731 X_index = X.index 732 if xreg is None: 733 X = copy.deepcopy(mo.convert_df_to_numeric(X)) 734 else: 735 X = copy.deepcopy(mo.cbind(mo.convert_df_to_numeric(X), xreg)) 736 self.xreg_ = xreg 737 if X_index is not None: 738 X.index = X_index 739 self.series_names = X.columns.tolist() 740 741 if isinstance(X, pd.DataFrame): 742 if self.df_ is None: 743 self.df_ = X 744 X = X.values 745 else: 746 input_dates_prev = pd.DatetimeIndex(self.df_.index.values) 747 frequency = pd.infer_freq(input_dates_prev) 748 self.df_ = pd.concat([self.df_, X], axis=0) 749 self.input_dates = pd.date_range( 750 start=input_dates_prev[0], 751 periods=len(input_dates_prev) + X.shape[0], 752 freq=frequency, 753 ).values.tolist() 754 self.df_.index = self.input_dates 755 X = self.df_.values 756 self.df_.columns = self.series_names 757 else: 758 if self.df_ is None: 759 self.df_ = pd.DataFrame(X, columns=self.series_names) 760 else: 761 self.df_ = pd.concat( 762 [self.df_, pd.DataFrame(X, columns=self.series_names)], 763 axis=0, 764 ) 765 766 self.input_dates = ts.compute_input_dates(self.df_) 767 768 try: 769 # multivariate time series 770 n, p = X.shape 771 except: 772 # univariate time series 773 n = X.shape[0] 774 p = 1 775 self.n_obs_ = n 776 777 rep_1_n = np.repeat(1, n) 778 779 self.y_ = None 780 self.X_ = None 781 self.n_series = p 782 self.fit_objs_.clear() 783 self.y_means_.clear() 784 residuals_ = [] 785 self.residuals_ = None 786 self.residuals_sims_ = None 787 self.kde_ = None 788 self.sims_ = None 789 self.scaled_Z_ = None 790 self.centered_y_is_ = [] 791 792 if self.init_n_series_ > 1: 793 # multivariate time series 794 mts_input = ts.create_train_inputs(X[::-1], self.lags) 795 else: 796 # univariate time series 797 mts_input = ts.create_train_inputs( 798 X.reshape(-1, 1)[::-1], self.lags 799 ) 800 801 self.y_ = mts_input[0] 802 803 self.X_ = mts_input[1] 804 805 dummy_y, scaled_Z = self.cook_training_set(y=rep_1_n, X=self.X_) 806 807 self.scaled_Z_ = scaled_Z 808 809 # loop on all the time series and adjust self.obj.partial_fit 810 if self.verbose > 0: 811 print( 812 f"\n Adjusting {type(self.obj).__name__} to multivariate time series... \n" 813 ) 814 815 if self.show_progress is True: 816 iterator = tqdm(range(self.init_n_series_)) 817 else: 818 iterator = range(self.init_n_series_) 819 820 if self.type_pi in ( 821 "gaussian", 822 "kde", 823 "bootstrap", 824 "block-bootstrap", 825 ) or self.type_pi.startswith("vine"): 826 for i in iterator: 827 y_mean = np.mean(self.y_[:, i]) 828 self.y_means_[i] = y_mean 829 centered_y_i = self.y_[:, i] - y_mean 830 self.centered_y_is_.append(centered_y_i) 831 self.obj.partial_fit(X=scaled_Z, y=centered_y_i) 832 self.fit_objs_[i] = deepcopy(self.obj) 833 residuals_.append( 834 ( 835 centered_y_i - self.fit_objs_[i].predict(scaled_Z) 836 ).tolist() 837 ) 838 839 if self.type_pi == "quantile": 840 for i in iterator: 841 y_mean = np.mean(self.y_[:, i]) 842 self.y_means_[i] = y_mean 843 centered_y_i = self.y_[:, i] - y_mean 844 self.centered_y_is_.append(centered_y_i) 845 self.obj.partial_fit(X=scaled_Z, y=centered_y_i) 846 self.fit_objs_[i] = deepcopy(self.obj) 847 848 if self.type_pi.startswith("scp"): 849 # split conformal prediction 850 for i in iterator: 851 n_y = self.y_.shape[0] 852 n_y_half = n_y // 2 853 first_half_idx = range(0, n_y_half) 854 second_half_idx = range(n_y_half, n_y) 855 y_mean_temp = np.mean(self.y_[first_half_idx, i]) 856 centered_y_i_temp = self.y_[first_half_idx, i] - y_mean_temp 857 self.obj.partial_fit( 858 X=scaled_Z[first_half_idx, :], y=centered_y_i_temp 859 ) 860 # calibrated residuals actually 861 residuals_.append( 862 ( 863 self.y_[second_half_idx, i] 864 - ( 865 y_mean_temp 866 + self.obj.predict(scaled_Z[second_half_idx, :]) 867 ) 868 ).tolist() 869 ) 870 # partial_fit on the second half 871 y_mean = np.mean(self.y_[second_half_idx, i]) 872 self.y_means_[i] = y_mean 873 centered_y_i = self.y_[second_half_idx, i] - y_mean 874 self.obj.partial_fit( 875 X=scaled_Z[second_half_idx, :], y=centered_y_i 876 ) 877 self.fit_objs_[i] = deepcopy(self.obj) 878 879 self.residuals_ = np.asarray(residuals_).T 880 881 if self.type_pi == "gaussian": 882 self.gaussian_preds_std_ = np.std(self.residuals_, axis=0) 883 884 if self.type_pi.startswith("scp2"): 885 # Calculate mean and standard deviation for each column 886 data_mean = np.mean(self.residuals_, axis=0) 887 self.residuals_std_dev_ = np.std(self.residuals_, axis=0) 888 # Center and scale the array using broadcasting 889 self.residuals_ = ( 890 self.residuals_ - data_mean[np.newaxis, :] 891 ) / self.residuals_std_dev_[np.newaxis, :] 892 893 if self.replications != None and "kde" in self.type_pi: 894 if self.verbose > 0: 895 print(f"\n Simulate residuals using {self.kernel} kernel... \n") 896 assert self.kernel in ( 897 "gaussian", 898 "tophat", 899 ), "currently, 'kernel' must be either 'gaussian' or 'tophat'" 900 kernel_bandwidths = {"bandwidth": np.logspace(-6, 6, 150)} 901 grid = GridSearchCV( 902 KernelDensity(kernel=self.kernel, **kwargs), 903 param_grid=kernel_bandwidths, 904 ) 905 grid.fit(self.residuals_) 906 907 if self.verbose > 0: 908 print( 909 f"\n Best parameters for {self.kernel} kernel: {grid.best_params_} \n" 910 ) 911 912 self.kde_ = grid.best_estimator_ 913 914 return self 915 916 def _predict_quantiles(self, h, quantiles, **kwargs): 917 """Predict arbitrary quantiles from simulated paths.""" 918 # Ensure output dates are set 919 self.output_dates_, _ = ts.compute_output_dates(self.df_, h) 920 921 # Trigger full prediction to generate self.sims_ 922 if not hasattr(self, "sims_") or self.sims_ is None: 923 _ = self.predict(h=h, level=95, **kwargs) # Any level triggers sim 924 925 result_dict = {} 926 927 # Stack simulations: (R, h, n_series) 928 sims_array = np.stack([sim.values for sim in self.sims_], axis=0) 929 930 # Compute quantiles over replication axis 931 q_values = np.quantile( 932 sims_array, quantiles, axis=0 933 ) # (n_q, h, n_series) 934 935 for i, q in enumerate(quantiles): 936 # Clean label: 0.05 → "05", 0.1 → "10", 0.95 → "95" 937 q_label = ( 938 f"{int(q * 100):02d}" 939 if (q * 100).is_integer() 940 else f"{q:.3f}".replace(".", "_") 941 ) 942 for series_id in range(self.init_n_series_): 943 series_name = self.series_names[series_id] 944 col_name = f"quantile_{q_label}_{series_name}" 945 result_dict[col_name] = q_values[i, :, series_id] 946 947 df_return_quantiles = pd.DataFrame( 948 result_dict, index=self.output_dates_ 949 ) 950 951 return df_return_quantiles 952 953 def predict(self, h=5, level=95, quantiles=None, **kwargs): 954 """Forecast all the time series, h steps ahead""" 955 956 if quantiles is not None: 957 # Validate 958 quantiles = np.asarray(quantiles) 959 if not ((quantiles > 0) & (quantiles < 1)).all(): 960 raise ValueError("quantiles must be between 0 and 1.") 961 # Delegate to dedicated method 962 return self._predict_quantiles(h=h, quantiles=quantiles, **kwargs) 963 964 if isinstance(level, list) or isinstance(level, np.ndarray): 965 # Store results 966 result_dict = {} 967 # Loop through alphas and calculate lower/upper for each alpha level 968 # E.g [0.5, 2.5, 5, 16.5, 25, 50] 969 for lev in level: 970 # Get the forecast for this alpha 971 res = self.predict(h=h, level=lev, **kwargs) 972 # Adjust index and collect lower/upper bounds 973 res.lower.index = pd.to_datetime(res.lower.index) 974 res.upper.index = pd.to_datetime(res.upper.index) 975 # Loop over each time series (multivariate) and flatten results 976 if isinstance(res.lower, pd.DataFrame): 977 for ( 978 series 979 ) in ( 980 res.lower.columns 981 ): # Assumes 'lower' and 'upper' have multiple series 982 result_dict[f"lower_{lev}_{series}"] = ( 983 res.lower[series].to_numpy().flatten() 984 ) 985 result_dict[f"upper_{lev}_{series}"] = ( 986 res.upper[series].to_numpy().flatten() 987 ) 988 else: 989 for series_id in range( 990 self.n_series 991 ): # Assumes 'lower' and 'upper' have multiple series 992 result_dict[f"lower_{lev}_{series_id}"] = ( 993 res.lower[series_id, :].to_numpy().flatten() 994 ) 995 result_dict[f"upper_{lev}_{series_id}"] = ( 996 res.upper[series_id, :].to_numpy().flatten() 997 ) 998 return pd.DataFrame(result_dict, index=self.output_dates_) 999 1000 # only one prediction interval 1001 self.output_dates_, frequency = ts.compute_output_dates(self.df_, h) 1002 1003 self.level_ = level 1004 1005 self.return_std_ = False # do not remove (/!\) 1006 1007 self.mean_ = None # do not remove (/!\) 1008 1009 self.mean_ = deepcopy(self.y_) # do not remove (/!\) 1010 1011 self.lower_ = None # do not remove (/!\) 1012 1013 self.upper_ = None # do not remove (/!\) 1014 1015 self.sims_ = None # do not remove (/!\) 1016 1017 y_means_ = np.asarray( 1018 [self.y_means_[i] for i in range(self.init_n_series_)] 1019 ) 1020 1021 n_features = self.init_n_series_ * self.lags 1022 1023 self.alpha_ = 100 - level 1024 1025 pi_multiplier = norm.ppf(1 - self.alpha_ / 200) 1026 1027 if "return_std" in kwargs: # bayesian forecasting 1028 self.return_std_ = True 1029 self.preds_std_ = [] 1030 DescribeResult = namedtuple( 1031 "DescribeResult", ("mean", "lower", "upper") 1032 ) # to be updated 1033 1034 if "return_pi" in kwargs: # split conformal, without simulation 1035 mean_pi_ = [] 1036 lower_pi_ = [] 1037 upper_pi_ = [] 1038 median_pi_ = [] 1039 DescribeResult = namedtuple( 1040 "DescribeResult", ("mean", "lower", "upper") 1041 ) # to be updated 1042 1043 if self.kde_ != None and "kde" in self.type_pi: # kde 1044 target_cols = self.df_.columns[ 1045 : self.init_n_series_ 1046 ] # Get target column names 1047 if self.verbose == 1: 1048 self.residuals_sims_ = tuple( 1049 self.kde_.sample( 1050 n_samples=h, random_state=self.seed + 100 * i 1051 ) # Keep full sample 1052 for i in tqdm(range(self.replications)) 1053 ) 1054 elif self.verbose == 0: 1055 self.residuals_sims_ = tuple( 1056 self.kde_.sample( 1057 n_samples=h, random_state=self.seed + 100 * i 1058 ) # Keep full sample 1059 for i in range(self.replications) 1060 ) 1061 1062 # Convert to DataFrames after sampling 1063 self.residuals_sims_ = tuple( 1064 pd.DataFrame( 1065 sim, # Keep all columns 1066 columns=target_cols, # Use original target column names 1067 index=self.output_dates_, 1068 ) 1069 for sim in self.residuals_sims_ 1070 ) 1071 1072 if self.type_pi in ("bootstrap", "scp-bootstrap", "scp2-bootstrap"): 1073 assert self.replications is not None and isinstance( 1074 self.replications, int 1075 ), "'replications' must be provided and be an integer" 1076 if self.verbose == 1: 1077 self.residuals_sims_ = tuple( 1078 ts.bootstrap( 1079 self.residuals_, 1080 h=h, 1081 block_size=None, 1082 seed=self.seed + 100 * i, 1083 ) 1084 for i in tqdm(range(self.replications)) 1085 ) 1086 elif self.verbose == 0: 1087 self.residuals_sims_ = tuple( 1088 ts.bootstrap( 1089 self.residuals_, 1090 h=h, 1091 block_size=None, 1092 seed=self.seed + 100 * i, 1093 ) 1094 for i in range(self.replications) 1095 ) 1096 1097 if self.type_pi in ( 1098 "block-bootstrap", 1099 "scp-block-bootstrap", 1100 "scp2-block-bootstrap", 1101 ): 1102 if self.block_size is None: 1103 self.block_size = int( 1104 np.ceil(3.15 * (self.residuals_.shape[0] ** (1 / 3))) 1105 ) 1106 1107 assert self.replications is not None and isinstance( 1108 self.replications, int 1109 ), "'replications' must be provided and be an integer" 1110 if self.verbose == 1: 1111 self.residuals_sims_ = tuple( 1112 ts.bootstrap( 1113 self.residuals_, 1114 h=h, 1115 block_size=self.block_size, 1116 seed=self.seed + 100 * i, 1117 ) 1118 for i in tqdm(range(self.replications)) 1119 ) 1120 elif self.verbose == 0: 1121 self.residuals_sims_ = tuple( 1122 ts.bootstrap( 1123 self.residuals_, 1124 h=h, 1125 block_size=self.block_size, 1126 seed=self.seed + 100 * i, 1127 ) 1128 for i in range(self.replications) 1129 ) 1130 1131 if "vine" in self.type_pi: 1132 if self.verbose == 1: 1133 self.residuals_sims_ = tuple( 1134 vinecopula_sample( 1135 x=self.residuals_, 1136 n_samples=h, 1137 method=self.type_pi, 1138 random_state=self.seed + 100 * i, 1139 ) 1140 for i in tqdm(range(self.replications)) 1141 ) 1142 elif self.verbose == 0: 1143 self.residuals_sims_ = tuple( 1144 vinecopula_sample( 1145 x=self.residuals_, 1146 n_samples=h, 1147 method=self.type_pi, 1148 random_state=self.seed + 100 * i, 1149 ) 1150 for i in range(self.replications) 1151 ) 1152 1153 mean_ = deepcopy(self.mean_) 1154 1155 for i in range(h): 1156 new_obs = ts.reformat_response(mean_, self.lags) 1157 new_X = new_obs.reshape(1, -1) 1158 cooked_new_X = self.cook_test_set(new_X, **kwargs) 1159 1160 if "return_std" in kwargs: 1161 self.preds_std_.append( 1162 [ 1163 np.asarray( 1164 self.fit_objs_[i].predict( 1165 cooked_new_X, return_std=True 1166 )[1] 1167 ).item() 1168 for i in range(self.n_series) 1169 ] 1170 ) 1171 1172 if "return_pi" in kwargs: 1173 for i in range(self.n_series): 1174 preds_pi = self.fit_objs_[i].predict(cooked_new_X, **kwargs) 1175 mean_pi_.append(preds_pi.mean[0]) 1176 lower_pi_.append(preds_pi.lower[0]) 1177 upper_pi_.append(preds_pi.upper[0]) 1178 1179 if self.type_pi != "quantile": 1180 predicted_cooked_new_X = np.asarray( 1181 [ 1182 np.asarray( 1183 self.fit_objs_[i].predict(cooked_new_X) 1184 ).item() 1185 for i in range(self.init_n_series_) 1186 ] 1187 ) 1188 else: 1189 predicted_cooked_new_X = np.asarray( 1190 [ 1191 np.asarray( 1192 self.fit_objs_[i] 1193 .predict(cooked_new_X, return_pi=True) 1194 .upper 1195 ).item() 1196 for i in range(self.init_n_series_) 1197 ] 1198 ) 1199 1200 preds = np.asarray(y_means_ + predicted_cooked_new_X) 1201 1202 # Create full row with both predictions and external regressors 1203 if self.xreg_ is not None and "xreg" in kwargs: 1204 next_xreg = kwargs["xreg"].iloc[i: i + 1].values.flatten() 1205 full_row = np.concatenate([preds, next_xreg]) 1206 else: 1207 full_row = preds 1208 1209 # Create a new row with same number of columns as mean_ 1210 new_row = np.zeros((1, mean_.shape[1])) 1211 new_row[0, : full_row.shape[0]] = full_row 1212 1213 # Maintain the full dimensionality by using vstack instead of rbind 1214 mean_ = np.vstack([new_row, mean_[:-1]]) 1215 1216 # Final output should only include the target columns 1217 self.mean_ = pd.DataFrame( 1218 mean_[0: min(h, self.n_obs_ - self.lags), : self.init_n_series_][ 1219 ::-1 1220 ], 1221 columns=self.df_.columns[: self.init_n_series_], 1222 index=self.output_dates_, 1223 ) 1224 1225 # function's return ---------------------------------------------------------------------- 1226 if ( 1227 (("return_std" not in kwargs) and ("return_pi" not in kwargs)) 1228 and (self.type_pi not in ("gaussian", "scp")) 1229 ) or ("vine" in self.type_pi): 1230 if self.replications is None: 1231 return self.mean_.iloc[:, : self.init_n_series_] 1232 1233 # if "return_std" not in kwargs and self.replications is not None 1234 meanf = [] 1235 medianf = [] 1236 lower = [] 1237 upper = [] 1238 1239 if "scp2" in self.type_pi: 1240 if self.verbose == 1: 1241 self.sims_ = tuple( 1242 ( 1243 self.mean_ 1244 + self.residuals_sims_[i] 1245 * self.residuals_std_dev_[np.newaxis, :] 1246 for i in tqdm(range(self.replications)) 1247 ) 1248 ) 1249 elif self.verbose == 0: 1250 self.sims_ = tuple( 1251 ( 1252 self.mean_ 1253 + self.residuals_sims_[i] 1254 * self.residuals_std_dev_[np.newaxis, :] 1255 for i in range(self.replications) 1256 ) 1257 ) 1258 else: 1259 if self.verbose == 1: 1260 self.sims_ = tuple( 1261 ( 1262 self.mean_ + self.residuals_sims_[i] 1263 for i in tqdm(range(self.replications)) 1264 ) 1265 ) 1266 elif self.verbose == 0: 1267 self.sims_ = tuple( 1268 ( 1269 self.mean_ + self.residuals_sims_[i] 1270 for i in range(self.replications) 1271 ) 1272 ) 1273 1274 DescribeResult = namedtuple( 1275 "DescribeResult", ("mean", "sims", "lower", "upper") 1276 ) 1277 for ix in range(self.init_n_series_): 1278 sims_ix = getsims(self.sims_, ix) 1279 if self.agg == "mean": 1280 meanf.append(np.mean(sims_ix, axis=1)) 1281 else: 1282 medianf.append(np.median(sims_ix, axis=1)) 1283 lower.append(np.quantile(sims_ix, q=self.alpha_ / 200, axis=1)) 1284 upper.append( 1285 np.quantile(sims_ix, q=1 - self.alpha_ / 200, axis=1) 1286 ) 1287 self.mean_ = pd.DataFrame( 1288 np.asarray(meanf).T, 1289 columns=self.series_names[ 1290 : self.init_n_series_ 1291 ], # self.df_.columns, 1292 index=self.output_dates_, 1293 ) 1294 1295 self.lower_ = pd.DataFrame( 1296 np.asarray(lower).T, 1297 columns=self.series_names[ 1298 : self.init_n_series_ 1299 ], # self.df_.columns, 1300 index=self.output_dates_, 1301 ) 1302 1303 self.upper_ = pd.DataFrame( 1304 np.asarray(upper).T, 1305 columns=self.series_names[ 1306 : self.init_n_series_ 1307 ], # self.df_.columns, 1308 index=self.output_dates_, 1309 ) 1310 1311 try: 1312 self.median_ = pd.DataFrame( 1313 np.asarray(medianf).T, 1314 columns=self.series_names[ 1315 : self.init_n_series_ 1316 ], # self.df_.columns, 1317 index=self.output_dates_, 1318 ) 1319 except Exception as e: 1320 pass 1321 1322 return DescribeResult( 1323 self.mean_, self.sims_, self.lower_, self.upper_ 1324 ) 1325 1326 if ( 1327 (("return_std" in kwargs) or ("return_pi" in kwargs)) 1328 and (self.type_pi not in ("gaussian", "scp")) 1329 ) or "vine" in self.type_pi: 1330 DescribeResult = namedtuple( 1331 "DescribeResult", ("mean", "lower", "upper") 1332 ) 1333 1334 self.mean_ = pd.DataFrame( 1335 np.asarray(self.mean_), 1336 columns=self.series_names, # self.df_.columns, 1337 index=self.output_dates_, 1338 ) 1339 1340 if "return_std" in kwargs: 1341 self.preds_std_ = np.asarray(self.preds_std_) 1342 1343 self.lower_ = pd.DataFrame( 1344 self.mean_.values - pi_multiplier * self.preds_std_, 1345 columns=self.series_names, # self.df_.columns, 1346 index=self.output_dates_, 1347 ) 1348 1349 self.upper_ = pd.DataFrame( 1350 self.mean_.values + pi_multiplier * self.preds_std_, 1351 columns=self.series_names, # self.df_.columns, 1352 index=self.output_dates_, 1353 ) 1354 1355 if "return_pi" in kwargs: 1356 self.lower_ = pd.DataFrame( 1357 np.asarray(lower_pi_).reshape(h, self.n_series) 1358 + y_means_[np.newaxis, :], 1359 columns=self.series_names, # self.df_.columns, 1360 index=self.output_dates_, 1361 ) 1362 1363 self.upper_ = pd.DataFrame( 1364 np.asarray(upper_pi_).reshape(h, self.n_series) 1365 + y_means_[np.newaxis, :], 1366 columns=self.series_names, # self.df_.columns, 1367 index=self.output_dates_, 1368 ) 1369 1370 res = DescribeResult(self.mean_, self.lower_, self.upper_) 1371 1372 if self.xreg_ is not None: 1373 if len(self.xreg_.shape) > 1: 1374 res2 = mx.tuple_map( 1375 res, 1376 lambda x: mo.delete_last_columns( 1377 x, num_columns=self.xreg_.shape[1] 1378 ), 1379 ) 1380 else: 1381 res2 = mx.tuple_map( 1382 res, lambda x: mo.delete_last_columns(x, num_columns=1) 1383 ) 1384 return DescribeResult(res2[0], res2[1], res2[2]) 1385 1386 return res 1387 1388 if self.type_pi == "gaussian": 1389 DescribeResult = namedtuple( 1390 "DescribeResult", ("mean", "lower", "upper") 1391 ) 1392 1393 self.mean_ = pd.DataFrame( 1394 np.asarray(self.mean_), 1395 columns=self.series_names, # self.df_.columns, 1396 index=self.output_dates_, 1397 ) 1398 1399 # Use Bayesian std if available, otherwise use gaussian residual std 1400 if "return_std" in kwargs and len(self.preds_std_) > 0: 1401 preds_std_to_use = np.asarray(self.preds_std_) 1402 else: 1403 preds_std_to_use = self.gaussian_preds_std_ 1404 1405 self.lower_ = pd.DataFrame( 1406 self.mean_.values - pi_multiplier * preds_std_to_use, 1407 columns=self.series_names, # self.df_.columns, 1408 index=self.output_dates_, 1409 ) 1410 1411 self.upper_ = pd.DataFrame( 1412 self.mean_.values + pi_multiplier * preds_std_to_use, 1413 columns=self.series_names, # self.df_.columns, 1414 index=self.output_dates_, 1415 ) 1416 1417 res = DescribeResult(self.mean_, self.lower_, self.upper_) 1418 1419 if self.xreg_ is not None: 1420 if len(self.xreg_.shape) > 1: 1421 res2 = mx.tuple_map( 1422 res, 1423 lambda x: mo.delete_last_columns( 1424 x, num_columns=self.xreg_.shape[1] 1425 ), 1426 ) 1427 else: 1428 res2 = mx.tuple_map( 1429 res, lambda x: mo.delete_last_columns(x, num_columns=1) 1430 ) 1431 return DescribeResult(res2[0], res2[1], res2[2]) 1432 1433 return res 1434 1435 if self.type_pi == "quantile": 1436 DescribeResult = namedtuple("DescribeResult", ("mean")) 1437 1438 self.mean_ = pd.DataFrame( 1439 np.asarray(self.mean_), 1440 columns=self.series_names, # self.df_.columns, 1441 index=self.output_dates_, 1442 ) 1443 1444 res = DescribeResult(self.mean_) 1445 1446 if self.xreg_ is not None: 1447 if len(self.xreg_.shape) > 1: 1448 res2 = mx.tuple_map( 1449 res, 1450 lambda x: mo.delete_last_columns( 1451 x, num_columns=self.xreg_.shape[1] 1452 ), 1453 ) 1454 else: 1455 res2 = mx.tuple_map( 1456 res, lambda x: mo.delete_last_columns(x, num_columns=1) 1457 ) 1458 return DescribeResult(res2[0]) 1459 1460 return res 1461 1462 # After prediction loop, ensure sims only contain target columns 1463 if self.sims_ is not None: 1464 if self.verbose == 1: 1465 self.sims_ = tuple( 1466 sim[:h,] # Only keep target columns and h rows 1467 for sim in tqdm(self.sims_) 1468 ) 1469 elif self.verbose == 0: 1470 self.sims_ = tuple( 1471 sim[:h,] # Only keep target columns and h rows 1472 for sim in self.sims_ 1473 ) 1474 1475 # Convert numpy arrays to DataFrames with proper columns 1476 self.sims_ = tuple( 1477 pd.DataFrame( 1478 sim, 1479 columns=self.df_.columns[: self.init_n_series_], 1480 index=self.output_dates_, 1481 ) 1482 for sim in self.sims_ 1483 ) 1484 1485 if self.type_pi in ( 1486 "kde", 1487 "bootstrap", 1488 "block-bootstrap", 1489 "vine-copula", 1490 ): 1491 if self.xreg_ is not None: 1492 # Use getsimsxreg when external regressors are present 1493 target_cols = self.df_.columns[: self.init_n_series_] 1494 self.sims_ = getsimsxreg( 1495 self.sims_, self.output_dates_, target_cols 1496 ) 1497 else: 1498 # Use original getsims for backward compatibility 1499 self.sims_ = getsims(self.sims_) 1500 1501 def _crps_ensemble(self, y_true, simulations, axis=0): 1502 """ 1503 Compute the Continuous Ranked Probability Score (CRPS) for an ensemble of simulations. 1504 1505 The CRPS is a measure of the distance between the cumulative distribution 1506 function (CDF) of a forecast and the CDF of the observed value. This method 1507 computes the CRPS in a vectorized form for an ensemble of simulations, efficiently 1508 handling the case where there is only one simulation. 1509 1510 Parameters 1511 ---------- 1512 y_true : array_like, shape (n,) 1513 A 1D array of true values (observations). 1514 Each element represents the true value for a given sample. 1515 1516 simulations : array_like, shape (n, R) 1517 A 2D array of simulated values. Each row corresponds to a different sample 1518 and each column corresponds to a different simulation of that sample. 1519 1520 axis : int, optional, default=0 1521 Axis along which to transpose the simulations if needed. 1522 If axis=0, the simulations are transposed to shape (R, n). 1523 1524 Returns 1525 ------- 1526 crps : ndarray, shape (n,) 1527 A 1D array of CRPS scores, one for each sample. 1528 1529 Notes 1530 ----- 1531 The CRPS score is computed as: 1532 1533 CRPS(y_true, simulations) = E[|X - y|] - 0.5 * E[|X - X'|] 1534 1535 Where: 1536 - `X` is the ensemble of simulations. 1537 - `y` is the true value. 1538 - `X'` is a second independent sample from the ensemble. 1539 1540 The calculation is vectorized to optimize performance for large datasets. 1541 1542 The edge case where `R=1` (only one simulation) is handled by returning 1543 only `term1` (i.e., no ensemble spread). 1544 """ 1545 sims = np.asarray(simulations) # Convert simulations to numpy array 1546 if axis == 0: 1547 sims = sims.T # Transpose if the axis is 0 1548 n, R = sims.shape # n = number of samples, R = number of simulations 1549 # Term 1: E|X - y|, average absolute difference between simulations and true value 1550 term1 = np.mean(np.abs(sims - y_true[:, np.newaxis]), axis=1) 1551 # Handle edge case: if R == 1, return term1 (no spread in ensemble) 1552 if R == 1: 1553 return term1 1554 # Term 2: 0.5 * E|X - X'|, using efficient sorted formula 1555 sims_sorted = np.sort(sims, axis=1) # Sort simulations along each row 1556 # Correct coefficients for efficient calculation 1557 j = np.arange(R) # 0-indexed positions in the sorted simulations 1558 coefficients = (2 * (j + 1) - R - 1) / ( 1559 R * (R - 1) 1560 ) # Efficient coefficient calculation 1561 # Dot product along the second axis (over the simulations) 1562 term2 = np.dot(sims_sorted, coefficients) 1563 # Return CRPS score: term1 - 0.5 * term2 1564 return term1 - 0.5 * term2 1565 1566 def score( 1567 self, 1568 X, 1569 training_index, 1570 testing_index, 1571 scoring=None, 1572 alpha=0.5, 1573 **kwargs, 1574 ): 1575 """Train on training_index, score on testing_index.""" 1576 1577 assert ( 1578 bool(set(training_index).intersection(set(testing_index))) == False 1579 ), "Non-overlapping 'training_index' and 'testing_index' required" 1580 1581 # Dimensions 1582 try: 1583 # multivariate time series 1584 n, p = X.shape 1585 except: 1586 # univariate time series 1587 n = X.shape[0] 1588 p = 1 1589 1590 # Training and testing sets 1591 if p > 1: 1592 X_train = X[training_index, :] 1593 X_test = X[testing_index, :] 1594 else: 1595 X_train = X[training_index] 1596 X_test = X[testing_index] 1597 1598 # Horizon 1599 h = len(testing_index) 1600 assert ( 1601 len(training_index) + h 1602 ) <= n, "Please check lengths of training and testing windows" 1603 1604 # Fit and predict 1605 self.fit(X_train, **kwargs) 1606 preds = self.predict(h=h, **kwargs) 1607 1608 if scoring is None: 1609 scoring = "neg_root_mean_squared_error" 1610 1611 if scoring == "pinball": 1612 # Predict requested quantile 1613 q_pred = self.predict(h=h, quantiles=[alpha], **kwargs) 1614 # Handle multivariate 1615 scores = [] 1616 for j in range(p): 1617 series_name = getattr(self, "series_names", [f"Series_{j}"])[j] 1618 q_label = ( 1619 f"{int(alpha * 100):02d}" 1620 if (alpha * 100).is_integer() 1621 else f"{alpha:.3f}".replace(".", "_") 1622 ) 1623 col = f"quantile_{q_label}_{series_name}" 1624 if col not in q_pred.columns: 1625 raise ValueError( 1626 f"Column '{col}' not found in quantile forecast output." 1627 ) 1628 y_true_j = X_test[:, j] 1629 y_pred_j = q_pred[col].values 1630 # Compute pinball loss for this series 1631 loss = mean_pinball_loss(y_true_j, y_pred_j, alpha=alpha) 1632 scores.append(loss) 1633 # Return average over series 1634 return np.mean(scores) 1635 1636 if scoring == "crps": 1637 # Ensure simulations exist 1638 preds = self.predict(h=h, **kwargs) # triggers self.sims_ 1639 # Extract simulations: list of DataFrames → (R, h, p) 1640 sims_vals = np.stack( 1641 [sim.values for sim in self.sims_], axis=0 1642 ) # (R, h, p) 1643 crps_scores = [] 1644 for j in range(p): 1645 y_true_j = X_test[:, j] 1646 sims_j = sims_vals[:, :, j] # (R, h) 1647 crps_j = self._crps_ensemble(np.asarray(y_true_j), sims_j) 1648 crps_scores.append(np.mean(crps_j)) # average over horizon 1649 return np.mean(crps_scores) # average over series 1650 1651 # check inputs 1652 assert scoring in ( 1653 "explained_variance", 1654 "neg_mean_absolute_error", 1655 "neg_mean_squared_error", 1656 "neg_root_mean_squared_error", 1657 "neg_mean_squared_log_error", 1658 "neg_median_absolute_error", 1659 "r2", 1660 ), "'scoring' should be in ('explained_variance', 'neg_mean_absolute_error', \ 1661 'neg_mean_squared_error', 'neg_root_mean_squared_error', 'neg_mean_squared_log_error', \ 1662 'neg_median_absolute_error', 'r2')" 1663 1664 scoring_options = { 1665 "explained_variance": skm2.explained_variance_score, 1666 "neg_mean_absolute_error": skm2.mean_absolute_error, 1667 "neg_mean_squared_error": lambda x, y: np.mean((x - y) ** 2), 1668 "neg_root_mean_squared_error": lambda x, y: np.sqrt( 1669 np.mean((x - y) ** 2) 1670 ), 1671 "neg_mean_squared_log_error": skm2.mean_squared_log_error, 1672 "neg_median_absolute_error": skm2.median_absolute_error, 1673 "r2": skm2.r2_score, 1674 } 1675 1676 return scoring_options[scoring](X_test, preds) 1677 1678 def plot(self, series=None, type_axis="dates", type_plot="pi"): 1679 """Plot time series forecast 1680 1681 Parameters: 1682 1683 series: {integer} or {string} 1684 series index or name 1685 1686 """ 1687 1688 assert all( 1689 [ 1690 self.mean_ is not None, 1691 self.lower_ is not None, 1692 self.upper_ is not None, 1693 self.output_dates_ is not None, 1694 ] 1695 ), "model forecasting must be obtained first (with predict)" 1696 1697 if series is None: 1698 # assert ( 1699 # self.init_n_series_ == 1 1700 # ), "please specify series index or name (n_series > 1)" 1701 series = 0 1702 1703 if isinstance(series, str): 1704 assert ( 1705 series in self.series_names 1706 ), f"series {series} doesn't exist in the input dataset" 1707 series_idx = self.df_.columns.get_loc(series) 1708 else: 1709 assert isinstance(series, int) and ( 1710 0 <= series < self.n_series 1711 ), f"check series index (< {self.n_series})" 1712 series_idx = series 1713 1714 y_all = list(self.df_.iloc[:, series_idx]) + list( 1715 self.mean_.iloc[:, series_idx] 1716 ) 1717 y_test = list(self.mean_.iloc[:, series_idx]) 1718 n_points_all = len(y_all) 1719 n_points_train = self.df_.shape[0] 1720 1721 if type_axis == "numeric": 1722 x_all = [i for i in range(n_points_all)] 1723 x_test = [i for i in range(n_points_train, n_points_all)] 1724 1725 if type_axis == "dates": # use dates 1726 x_all = np.concatenate( 1727 (self.input_dates.values, self.output_dates_.values), axis=None 1728 ) 1729 x_test = self.output_dates_.values 1730 1731 if type_plot == "pi": 1732 fig, ax = plt.subplots() 1733 ax.plot(x_all, y_all, "-") 1734 ax.plot(x_test, y_test, "-", color="orange") 1735 ax.fill_between( 1736 x_test, 1737 self.lower_.iloc[:, series_idx], 1738 self.upper_.iloc[:, series_idx], 1739 alpha=0.2, 1740 color="orange", 1741 ) 1742 if self.replications is None: 1743 if self.n_series > 1: 1744 plt.title( 1745 f"prediction intervals for {series}", 1746 loc="left", 1747 fontsize=12, 1748 fontweight=0, 1749 color="black", 1750 ) 1751 else: 1752 plt.title( 1753 f"prediction intervals for input time series", 1754 loc="left", 1755 fontsize=12, 1756 fontweight=0, 1757 color="black", 1758 ) 1759 plt.show() 1760 else: # self.replications is not None 1761 if self.n_series > 1: 1762 plt.title( 1763 f"prediction intervals for {self.replications} simulations of {series}", 1764 loc="left", 1765 fontsize=12, 1766 fontweight=0, 1767 color="black", 1768 ) 1769 else: 1770 plt.title( 1771 f"prediction intervals for {self.replications} simulations of input time series", 1772 loc="left", 1773 fontsize=12, 1774 fontweight=0, 1775 color="black", 1776 ) 1777 plt.show() 1778 1779 if type_plot == "spaghetti": 1780 palette = plt.get_cmap("Set1") 1781 sims_ix = getsims(self.sims_, series_idx) 1782 plt.plot(x_all, y_all, "-") 1783 for col_ix in range( 1784 sims_ix.shape[1] 1785 ): # avoid this when there are thousands of simulations 1786 plt.plot( 1787 x_test, 1788 sims_ix[:, col_ix], 1789 "-", 1790 color=palette(col_ix), 1791 linewidth=1, 1792 alpha=0.9, 1793 ) 1794 plt.plot(x_all, y_all, "-", color="black") 1795 plt.plot(x_test, y_test, "-", color="blue") 1796 # Add titles 1797 if self.n_series > 1: 1798 plt.title( 1799 f"{self.replications} simulations of {series}", 1800 loc="left", 1801 fontsize=12, 1802 fontweight=0, 1803 color="black", 1804 ) 1805 else: 1806 plt.title( 1807 f"{self.replications} simulations of input time series", 1808 loc="left", 1809 fontsize=12, 1810 fontweight=0, 1811 color="black", 1812 ) 1813 plt.xlabel("Time") 1814 plt.ylabel("Values") 1815 # Show the graph 1816 plt.show() 1817 1818 def cross_val_score( 1819 self, 1820 X, 1821 scoring="root_mean_squared_error", 1822 n_jobs=None, 1823 verbose=0, 1824 xreg=None, 1825 initial_window=5, 1826 horizon=3, 1827 fixed_window=False, 1828 show_progress=True, 1829 level=95, 1830 alpha=0.5, 1831 **kwargs, 1832 ): 1833 """Evaluate a score by time series cross-validation. 1834 1835 Parameters: 1836 1837 X: {array-like, sparse matrix} of shape (n_samples, n_features) 1838 The data to fit. 1839 1840 scoring: str or a function 1841 A str in ('root_mean_squared_error', 'mean_squared_error', 'mean_error', 1842 'mean_absolute_error', 'mean_error', 'mean_percentage_error', 1843 'mean_absolute_percentage_error', 'winkler_score', 'coverage') 1844 Or a function defined as 'coverage' and 'winkler_score' in `utils.timeseries` 1845 1846 n_jobs: int, default=None 1847 Number of jobs to run in parallel. 1848 1849 verbose: int, default=0 1850 The verbosity level. 1851 1852 xreg: array-like, optional (default=None) 1853 Additional (external) regressors to be passed to `fit` 1854 xreg must be in 'increasing' order (most recent observations last) 1855 1856 initial_window: int 1857 initial number of consecutive values in each training set sample 1858 1859 horizon: int 1860 number of consecutive values in test set sample 1861 1862 fixed_window: boolean 1863 if False, all training samples start at index 0, and the training 1864 window's size is increasing. 1865 if True, the training window's size is fixed, and the window is 1866 rolling forward 1867 1868 show_progress: boolean 1869 if True, a progress bar is printed 1870 1871 level: int 1872 confidence level for prediction intervals 1873 1874 alpha: float 1875 quantile level for pinball loss if scoring='pinball' 1876 0 < alpha < 1 1877 1878 **kwargs: dict 1879 additional parameters to be passed to `fit` and `predict` 1880 1881 Returns: 1882 1883 A tuple: descriptive statistics or errors and raw errors 1884 1885 """ 1886 tscv = TimeSeriesSplit() 1887 1888 tscv_obj = tscv.split( 1889 X, 1890 initial_window=initial_window, 1891 horizon=horizon, 1892 fixed_window=fixed_window, 1893 ) 1894 1895 if isinstance(scoring, str): 1896 assert scoring in ( 1897 "pinball", 1898 "crps", 1899 "root_mean_squared_error", 1900 "mean_squared_error", 1901 "mean_error", 1902 "mean_absolute_error", 1903 "mean_percentage_error", 1904 "mean_absolute_percentage_error", 1905 "winkler_score", 1906 "coverage", 1907 ), "must have scoring in ('pinball', 'crps', 'root_mean_squared_error', 'mean_squared_error', 'mean_error', 'mean_absolute_error', 'mean_error', 'mean_percentage_error', 'mean_absolute_percentage_error', 'winkler_score', 'coverage')" 1908 1909 def err_func(X_test, X_pred, scoring, alpha=0.5): 1910 if (self.replications is not None) or ( 1911 self.type_pi == "gaussian" 1912 ): # probabilistic 1913 if scoring == "pinball": 1914 # Predict requested quantile 1915 q_pred = self.predict( 1916 h=len(X_test), quantiles=[alpha], **kwargs 1917 ) 1918 # Handle multivariate 1919 scores = [] 1920 p = X_test.shape[1] if len(X_test.shape) > 1 else 1 1921 for j in range(p): 1922 series_name = getattr( 1923 self, "series_names", [f"Series_{j}"] 1924 )[j] 1925 q_label = ( 1926 f"{int(alpha * 100):02d}" 1927 if (alpha * 100).is_integer() 1928 else f"{alpha:.3f}".replace(".", "_") 1929 ) 1930 col = f"quantile_{q_label}_{series_name}" 1931 if col not in q_pred.columns: 1932 raise ValueError( 1933 f"Column '{col}' not found in quantile forecast output." 1934 ) 1935 try: 1936 y_true_j = X_test[:, j] if p > 1 else X_test 1937 except: 1938 y_true_j = ( 1939 X_test.iloc[:, j] 1940 if p > 1 1941 else X_test.values 1942 ) 1943 y_pred_j = q_pred[col].values 1944 # Compute pinball loss for this series 1945 loss = mean_pinball_loss( 1946 y_true_j, y_pred_j, alpha=alpha 1947 ) 1948 scores.append(loss) 1949 # Return average over series 1950 return np.mean(scores) 1951 elif scoring == "crps": 1952 # Ensure simulations exist 1953 _ = self.predict( 1954 h=len(X_test), **kwargs 1955 ) # triggers self.sims_ 1956 # Extract simulations: list of DataFrames → (R, h, p) 1957 sims_vals = np.stack( 1958 [sim.values for sim in self.sims_], axis=0 1959 ) # (R, h, p) 1960 crps_scores = [] 1961 p = X_test.shape[1] if len(X_test.shape) > 1 else 1 1962 for j in range(p): 1963 try: 1964 y_true_j = X_test[:, j] if p > 1 else X_test 1965 except Exception as e: 1966 y_true_j = ( 1967 X_test.iloc[:, j] 1968 if p > 1 1969 else X_test.values 1970 ) 1971 sims_j = sims_vals[:, :, j] # (R, h) 1972 crps_j = self._crps_ensemble( 1973 np.asarray(y_true_j), sims_j 1974 ) 1975 crps_scores.append( 1976 np.mean(crps_j) 1977 ) # average over horizon 1978 return np.mean(crps_scores) # average over series 1979 if scoring == "winkler_score": 1980 return winkler_score(X_pred, X_test, level=level) 1981 elif scoring == "coverage": 1982 return coverage(X_pred, X_test, level=level) 1983 else: 1984 return mean_errors( 1985 pred=X_pred.mean, actual=X_test, scoring=scoring 1986 ) 1987 else: # not probabilistic 1988 return mean_errors( 1989 pred=X_pred, actual=X_test, scoring=scoring 1990 ) 1991 1992 else: # isinstance(scoring, str) = False 1993 err_func = scoring 1994 1995 errors = [] 1996 1997 train_indices = [] 1998 1999 test_indices = [] 2000 2001 for train_index, test_index in tscv_obj: 2002 train_indices.append(train_index) 2003 test_indices.append(test_index) 2004 2005 if show_progress is True: 2006 iterator = tqdm( 2007 zip(train_indices, test_indices), total=len(train_indices) 2008 ) 2009 else: 2010 iterator = zip(train_indices, test_indices) 2011 2012 for train_index, test_index in iterator: 2013 if verbose == 1: 2014 print(f"TRAIN: {train_index}") 2015 print(f"TEST: {test_index}") 2016 2017 if isinstance(X, pd.DataFrame): 2018 self.fit(X.iloc[train_index, :], xreg=xreg, **kwargs) 2019 X_test = X.iloc[test_index, :] 2020 else: 2021 self.fit(X[train_index, :], xreg=xreg, **kwargs) 2022 X_test = X[test_index, :] 2023 X_pred = self.predict(h=int(len(test_index)), level=level, **kwargs) 2024 2025 errors.append(err_func(X_test, X_pred, scoring, alpha=alpha)) 2026 2027 res = np.asarray(errors) 2028 2029 return res, describe(res) 2030 2031 def _compute_information_criterion(self, curr_lags, criterion="AIC"): 2032 """Compute information criterion using existing residuals 2033 2034 Parameters 2035 ---------- 2036 curr_lags : int 2037 Current number of lags being evaluated 2038 criterion : str 2039 One of 'AIC', 'AICc', or 'BIC' 2040 2041 Returns 2042 ------- 2043 float 2044 Information criterion value or inf if parameters exceed observations 2045 """ 2046 # Get dimensions 2047 n_obs = self.residuals_.shape[0] 2048 n_features = int(self.init_n_series_ * curr_lags) 2049 n_hidden = int(self.n_hidden_features) 2050 # Calculate number of parameters 2051 term1 = int(n_features * n_hidden) 2052 term2 = int(n_hidden * self.init_n_series_) 2053 n_params = term1 + term2 2054 # Check if we have enough observations for the number of parameters 2055 if n_obs <= n_params + 1: 2056 return float("inf") # Return infinity if too many parameters 2057 # Compute RSS using existing residuals 2058 rss = np.sum(self.residuals_**2) 2059 # Compute criterion 2060 if criterion == "AIC": 2061 ic = n_obs * np.log(rss / n_obs) + 2 * n_params 2062 elif criterion == "AICc": 2063 ic = n_obs * np.log(rss / n_obs) + 2 * n_params * ( 2064 n_obs / (n_obs - n_params - 1) 2065 ) 2066 else: # BIC 2067 ic = n_obs * np.log(rss / n_obs) + n_params * np.log(n_obs) 2068 2069 return ic
Univariate and multivariate time series (MTS) forecasting with Quasi-Randomized networks
Parameters:
obj: object.
any object containing a method fit (obj.fit()) and a method predict
(obj.predict()).
n_hidden_features: int.
number of nodes in the hidden layer.
activation_name: str.
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'.
a: float.
hyperparameter for 'prelu' or 'elu' activation function.
nodes_sim: str.
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'.
bias: boolean.
indicates if the hidden layer contains a bias term (True) or not
(False).
dropout: float.
regularization parameter; (random) percentage of nodes dropped out
of the training.
direct_link: boolean.
indicates if the original predictors are included (True) in model's fitting or not (False).
n_clusters: int.
number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering).
cluster_encode: bool.
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding.
type_clust: str.
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm').
type_scaling: a tuple of 3 strings.
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax').
lags: int.
number of lags used for each time series.
If string, lags must be one of 'AIC', 'AICc', or 'BIC'.
type_pi: str.
type of prediction interval; currently:
- "gaussian": simple, fast, but: assumes stationarity of Gaussian in-sample residuals and independence in the multivariate case
- "quantile": use model-agnostic quantile regression under the hood
- "kde": based on Kernel Density Estimation of in-sample residuals
- "bootstrap": based on independent bootstrap of in-sample residuals
- "block-bootstrap": based on basic block bootstrap of in-sample residuals
- "scp-kde": Sequential split conformal prediction with Kernel Density Estimation of calibrated residuals
- "scp-bootstrap": Sequential split conformal prediction with independent bootstrap of calibrated residuals
- "scp-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of calibrated residuals
- "scp2-kde": Sequential split conformal prediction with Kernel Density Estimation of standardized calibrated residuals
- "scp2-bootstrap": Sequential split conformal prediction with independent bootstrap of standardized calibrated residuals
- "scp2-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of standardized calibrated residuals
- based on copulas of in-sample residuals: 'vine-tll', 'vine-bb1', 'vine-bb6', 'vine-bb7', 'vine-bb8', 'vine-clayton',
'vine-frank', 'vine-gaussian', 'vine-gumbel', 'vine-indep', 'vine-joe', 'vine-student'
- 'scp-vine-tll', 'scp-vine-bb1', 'scp-vine-bb6', 'scp-vine-bb7', 'scp-vine-bb8', 'scp-vine-clayton',
'scp-vine-frank', 'scp-vine-gaussian', 'scp-vine-gumbel', 'scp-vine-indep', 'scp-vine-joe', 'scp-vine-student'
- 'scp2-vine-tll', 'scp2-vine-bb1', 'scp2-vine-bb6', 'scp2-vine-bb7', 'scp2-vine-bb8', 'scp2-vine-clayton',
'scp2-vine-frank', 'scp2-vine-gaussian', 'scp2-vine-gumbel', 'scp2-vine-indep', 'scp2-vine-joe', 'scp2-vine-student'
level: int.
level of confidence for `type_pi == 'quantile'` (default is `95`)
block_size: int.
size of block for 'type_pi' in ("block-bootstrap", "scp-block-bootstrap", "scp2-block-bootstrap").
Default is round(3.15*(n_residuals^1/3))
replications: int.
number of replications (if needed, for predictive simulation). Default is 'None'.
kernel: str.
the kernel to use for residuals density estimation (used for predictive simulation). Currently, either 'gaussian' or 'tophat'.
agg: str.
either "mean" or "median" for simulation of bootstrap aggregating
seed: int.
reproducibility seed for nodes_sim=='uniform' or predictive simulation.
backend: str.
"cpu" or "gpu" or "tpu".
verbose: int.
0: not printing; 1: printing
show_progress: bool.
True: progress bar when fitting each series; False: no progress bar when fitting each series
Attributes:
fit_objs_: dict
objects adjusted to each individual time series
y_: {array-like}
MTS responses (most recent observations first)
X_: {array-like}
MTS lags
xreg_: {array-like}
external regressors
y_means_: dict
a dictionary of each series mean values
preds_: {array-like}
successive model predictions
preds_std_: {array-like}
standard deviation around the predictions for Bayesian base learners (`obj`)
gaussian_preds_std_: {array-like}
standard deviation around the predictions for `type_pi='gaussian'`
return_std_: boolean
return uncertainty or not (set in predict)
df_: data frame
the input data frame, in case a data.frame is provided to `fit`
n_obs_: int
number of time series observations (number of rows for multivariate)
level_: int
level of confidence for prediction intervals (default is 95)
residuals_: {array-like}
in-sample residuals (for `type_pi` not conformal prediction) or calibrated residuals
(for `type_pi` in conformal prediction)
residuals_sims_: tuple of {array-like}
simulations of in-sample residuals (for `type_pi` not conformal prediction) or
calibrated residuals (for `type_pi` in conformal prediction)
kde_: A scikit-learn object, see https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KernelDensity.html
residuals_std_dev_: residuals standard deviation
Examples:
Example 1:
import nnetsauce as ns
import numpy as np
from sklearn import linear_model
np.random.seed(123)
M = np.random.rand(10, 3)
M[:,0] = 10*M[:,0]
M[:,2] = 25*M[:,2]
print(M)
# Adjust Bayesian Ridge
regr4 = linear_model.BayesianRidge()
obj_MTS = ns.MTS(regr4, lags = 1, n_hidden_features=5)
obj_MTS.fit(M)
print(obj_MTS.predict())
# with credible intervals
print(obj_MTS.predict(return_std=True, level=80))
print(obj_MTS.predict(return_std=True, level=95))
Example 2:
import nnetsauce as ns
import numpy as np
from sklearn import linear_model
dataset = {
'date' : ['2001-01-01', '2002-01-01', '2003-01-01', '2004-01-01', '2005-01-01'],
'series1' : [34, 30, 35.6, 33.3, 38.1],
'series2' : [4, 5.5, 5.6, 6.3, 5.1],
'series3' : [100, 100.5, 100.6, 100.2, 100.1]}
df = pd.DataFrame(dataset).set_index('date')
print(df)
# Adjust Bayesian Ridge
regr5 = linear_model.BayesianRidge()
obj_MTS = ns.MTS(regr5, lags = 1, n_hidden_features=5)
obj_MTS.fit(df)
print(obj_MTS.predict())
# with credible intervals
print(obj_MTS.predict(return_std=True, level=80))
print(obj_MTS.predict(return_std=True, level=95))
337 def fit(self, X, xreg=None, **kwargs): 338 """Fit MTS model to training data X, with optional regressors xreg 339 340 Parameters: 341 342 X: {array-like}, shape = [n_samples, n_features] 343 Training time series, where n_samples is the number 344 of samples and n_features is the number of features; 345 X must be in increasing order (most recent observations last) 346 347 xreg: {array-like}, shape = [n_samples, n_features_xreg] 348 Additional (external) regressors to be passed to self.obj 349 xreg must be in 'increasing' order (most recent observations last) 350 351 **kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity) 352 353 Returns: 354 355 self: object 356 """ 357 try: 358 self.init_n_series_ = X.shape[1] 359 except IndexError as e: 360 self.init_n_series_ = 1 361 362 # Automatic lag selection if requested 363 if isinstance(self.lags, str): 364 max_lags = min(25, X.shape[0] // 4) 365 best_ic = float("inf") 366 best_lags = 1 367 368 if self.verbose: 369 print( 370 f"\nSelecting optimal number of lags using {self.lags}..." 371 ) 372 iterator = tqdm(range(1, max_lags + 1)) 373 else: 374 iterator = range(1, max_lags + 1) 375 376 for lag in iterator: 377 # Convert DataFrame to numpy array before reversing 378 if isinstance(X, pd.DataFrame): 379 X_values = X.values[::-1] 380 else: 381 X_values = X[::-1] 382 383 # Try current lag value 384 if self.init_n_series_ > 1: 385 mts_input = ts.create_train_inputs(X_values, lag) 386 else: 387 mts_input = ts.create_train_inputs( 388 X_values.reshape(-1, 1), lag 389 ) 390 391 # Cook training set and fit model 392 dummy_y, scaled_Z = self.cook_training_set( 393 y=np.ones(mts_input[0].shape[0]), X=mts_input[1] 394 ) 395 residuals_ = [] 396 397 for i in range(self.init_n_series_): 398 y_mean = np.mean(mts_input[0][:, i]) 399 centered_y_i = mts_input[0][:, i] - y_mean 400 self.obj.fit(X=scaled_Z, y=centered_y_i) 401 residuals_.append( 402 (centered_y_i - self.obj.predict(scaled_Z)).tolist() 403 ) 404 405 self.residuals_ = np.asarray(residuals_).T 406 ic = self._compute_information_criterion( 407 curr_lags=lag, criterion=self.lags 408 ) 409 410 if self.verbose: 411 print(f"Trying lags={lag}, {self.lags}={ic:.2f}") 412 413 if ic < best_ic: 414 best_ic = ic 415 best_lags = lag 416 417 if self.verbose: 418 print( 419 f"\nSelected {best_lags} lags with {self.lags}={best_ic:.2f}" 420 ) 421 422 self.lags = best_lags 423 424 self.input_dates = None 425 self.df_ = None 426 427 if isinstance(X, pd.DataFrame) is False: 428 # input data set is a numpy array 429 if xreg is None: 430 X = pd.DataFrame(X) 431 self.series_names = [ 432 "series" + str(i) for i in range(X.shape[1]) 433 ] 434 else: 435 # xreg is not None 436 X = mo.cbind(X, xreg) 437 self.xreg_ = xreg 438 439 else: # input data set is a DataFrame with column names 440 X_index = None 441 if X.index is not None: 442 X_index = X.index 443 if xreg is None: 444 X = copy.deepcopy(mo.convert_df_to_numeric(X)) 445 else: 446 X = copy.deepcopy(mo.cbind(mo.convert_df_to_numeric(X), xreg)) 447 self.xreg_ = xreg 448 if X_index is not None: 449 X.index = X_index 450 self.series_names = X.columns.tolist() 451 452 if isinstance(X, pd.DataFrame): 453 if self.df_ is None: 454 self.df_ = X 455 X = X.values 456 else: 457 input_dates_prev = pd.DatetimeIndex(self.df_.index.values) 458 frequency = pd.infer_freq(input_dates_prev) 459 self.df_ = pd.concat([self.df_, X], axis=0) 460 self.input_dates = pd.date_range( 461 start=input_dates_prev[0], 462 periods=len(input_dates_prev) + X.shape[0], 463 freq=frequency, 464 ).values.tolist() 465 self.df_.index = self.input_dates 466 X = self.df_.values 467 self.df_.columns = self.series_names 468 else: 469 if self.df_ is None: 470 self.df_ = pd.DataFrame(X, columns=self.series_names) 471 else: 472 self.df_ = pd.concat( 473 [self.df_, pd.DataFrame(X, columns=self.series_names)], 474 axis=0, 475 ) 476 477 self.input_dates = ts.compute_input_dates(self.df_) 478 479 try: 480 # multivariate time series 481 n, p = X.shape 482 except: 483 # univariate time series 484 n = X.shape[0] 485 p = 1 486 self.n_obs_ = n 487 488 rep_1_n = np.repeat(1, n) 489 490 self.y_ = None 491 self.X_ = None 492 self.n_series = p 493 self.fit_objs_.clear() 494 self.y_means_.clear() 495 residuals_ = [] 496 self.residuals_ = None 497 self.residuals_sims_ = None 498 self.kde_ = None 499 self.sims_ = None 500 self.scaled_Z_ = None 501 self.centered_y_is_ = [] 502 503 if self.init_n_series_ > 1: 504 # multivariate time series 505 mts_input = ts.create_train_inputs(X[::-1], self.lags) 506 else: 507 # univariate time series 508 mts_input = ts.create_train_inputs( 509 X.reshape(-1, 1)[::-1], self.lags 510 ) 511 512 self.y_ = mts_input[0] 513 514 self.X_ = mts_input[1] 515 516 dummy_y, scaled_Z = self.cook_training_set(y=rep_1_n, X=self.X_) 517 518 self.scaled_Z_ = scaled_Z 519 520 # loop on all the time series and adjust self.obj.fit 521 if self.verbose > 0: 522 print( 523 f"\n Adjusting {type(self.obj).__name__} to multivariate time series... \n" 524 ) 525 526 if self.show_progress is True: 527 iterator = tqdm(range(self.init_n_series_)) 528 else: 529 iterator = range(self.init_n_series_) 530 531 if self.type_pi in ( 532 "gaussian", 533 "kde", 534 "bootstrap", 535 "block-bootstrap", 536 ) or self.type_pi.startswith("vine"): 537 for i in iterator: 538 y_mean = np.mean(self.y_[:, i]) 539 self.y_means_[i] = y_mean 540 centered_y_i = self.y_[:, i] - y_mean 541 self.centered_y_is_.append(centered_y_i) 542 self.obj.fit(X=scaled_Z, y=centered_y_i) 543 self.fit_objs_[i] = deepcopy(self.obj) 544 residuals_.append( 545 ( 546 centered_y_i - self.fit_objs_[i].predict(scaled_Z) 547 ).tolist() 548 ) 549 550 if self.type_pi == "quantile": 551 for i in iterator: 552 y_mean = np.mean(self.y_[:, i]) 553 self.y_means_[i] = y_mean 554 centered_y_i = self.y_[:, i] - y_mean 555 self.centered_y_is_.append(centered_y_i) 556 self.obj.fit(X=scaled_Z, y=centered_y_i) 557 self.fit_objs_[i] = deepcopy(self.obj) 558 559 if self.type_pi.startswith("scp"): 560 # split conformal prediction 561 for i in iterator: 562 n_y = self.y_.shape[0] 563 n_y_half = n_y // 2 564 first_half_idx = range(0, n_y_half) 565 second_half_idx = range(n_y_half, n_y) 566 y_mean_temp = np.mean(self.y_[first_half_idx, i]) 567 centered_y_i_temp = self.y_[first_half_idx, i] - y_mean_temp 568 self.obj.fit(X=scaled_Z[first_half_idx, :], y=centered_y_i_temp) 569 # calibrated residuals actually 570 residuals_.append( 571 ( 572 self.y_[second_half_idx, i] 573 - ( 574 y_mean_temp 575 + self.obj.predict(scaled_Z[second_half_idx, :]) 576 ) 577 ).tolist() 578 ) 579 # fit on the second half 580 y_mean = np.mean(self.y_[second_half_idx, i]) 581 self.y_means_[i] = y_mean 582 centered_y_i = self.y_[second_half_idx, i] - y_mean 583 self.obj.fit(X=scaled_Z[second_half_idx, :], y=centered_y_i) 584 self.fit_objs_[i] = deepcopy(self.obj) 585 586 self.residuals_ = np.asarray(residuals_).T 587 588 if self.type_pi == "gaussian": 589 self.gaussian_preds_std_ = np.std(self.residuals_, axis=0) 590 591 if self.type_pi.startswith("scp2"): 592 # Calculate mean and standard deviation for each column 593 data_mean = np.mean(self.residuals_, axis=0) 594 self.residuals_std_dev_ = np.std(self.residuals_, axis=0) 595 # Center and scale the array using broadcasting 596 self.residuals_ = ( 597 self.residuals_ - data_mean[np.newaxis, :] 598 ) / self.residuals_std_dev_[np.newaxis, :] 599 600 if self.replications != None and "kde" in self.type_pi: 601 if self.verbose > 0: 602 print(f"\n Simulate residuals using {self.kernel} kernel... \n") 603 assert self.kernel in ( 604 "gaussian", 605 "tophat", 606 ), "currently, 'kernel' must be either 'gaussian' or 'tophat'" 607 kernel_bandwidths = {"bandwidth": np.logspace(-6, 6, 150)} 608 grid = GridSearchCV( 609 KernelDensity(kernel=self.kernel, **kwargs), 610 param_grid=kernel_bandwidths, 611 ) 612 grid.fit(self.residuals_) 613 614 if self.verbose > 0: 615 print( 616 f"\n Best parameters for {self.kernel} kernel: {grid.best_params_} \n" 617 ) 618 619 self.kde_ = grid.best_estimator_ 620 621 return self
Fit MTS model to training data X, with optional regressors xreg
Parameters:
X: {array-like}, shape = [n_samples, n_features] Training time series, where n_samples is the number of samples and n_features is the number of features; X must be in increasing order (most recent observations last)
xreg: {array-like}, shape = [n_samples, n_features_xreg] Additional (external) regressors to be passed to self.obj xreg must be in 'increasing' order (most recent observations last)
**kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity)
Returns:
self: object
953 def predict(self, h=5, level=95, quantiles=None, **kwargs): 954 """Forecast all the time series, h steps ahead""" 955 956 if quantiles is not None: 957 # Validate 958 quantiles = np.asarray(quantiles) 959 if not ((quantiles > 0) & (quantiles < 1)).all(): 960 raise ValueError("quantiles must be between 0 and 1.") 961 # Delegate to dedicated method 962 return self._predict_quantiles(h=h, quantiles=quantiles, **kwargs) 963 964 if isinstance(level, list) or isinstance(level, np.ndarray): 965 # Store results 966 result_dict = {} 967 # Loop through alphas and calculate lower/upper for each alpha level 968 # E.g [0.5, 2.5, 5, 16.5, 25, 50] 969 for lev in level: 970 # Get the forecast for this alpha 971 res = self.predict(h=h, level=lev, **kwargs) 972 # Adjust index and collect lower/upper bounds 973 res.lower.index = pd.to_datetime(res.lower.index) 974 res.upper.index = pd.to_datetime(res.upper.index) 975 # Loop over each time series (multivariate) and flatten results 976 if isinstance(res.lower, pd.DataFrame): 977 for ( 978 series 979 ) in ( 980 res.lower.columns 981 ): # Assumes 'lower' and 'upper' have multiple series 982 result_dict[f"lower_{lev}_{series}"] = ( 983 res.lower[series].to_numpy().flatten() 984 ) 985 result_dict[f"upper_{lev}_{series}"] = ( 986 res.upper[series].to_numpy().flatten() 987 ) 988 else: 989 for series_id in range( 990 self.n_series 991 ): # Assumes 'lower' and 'upper' have multiple series 992 result_dict[f"lower_{lev}_{series_id}"] = ( 993 res.lower[series_id, :].to_numpy().flatten() 994 ) 995 result_dict[f"upper_{lev}_{series_id}"] = ( 996 res.upper[series_id, :].to_numpy().flatten() 997 ) 998 return pd.DataFrame(result_dict, index=self.output_dates_) 999 1000 # only one prediction interval 1001 self.output_dates_, frequency = ts.compute_output_dates(self.df_, h) 1002 1003 self.level_ = level 1004 1005 self.return_std_ = False # do not remove (/!\) 1006 1007 self.mean_ = None # do not remove (/!\) 1008 1009 self.mean_ = deepcopy(self.y_) # do not remove (/!\) 1010 1011 self.lower_ = None # do not remove (/!\) 1012 1013 self.upper_ = None # do not remove (/!\) 1014 1015 self.sims_ = None # do not remove (/!\) 1016 1017 y_means_ = np.asarray( 1018 [self.y_means_[i] for i in range(self.init_n_series_)] 1019 ) 1020 1021 n_features = self.init_n_series_ * self.lags 1022 1023 self.alpha_ = 100 - level 1024 1025 pi_multiplier = norm.ppf(1 - self.alpha_ / 200) 1026 1027 if "return_std" in kwargs: # bayesian forecasting 1028 self.return_std_ = True 1029 self.preds_std_ = [] 1030 DescribeResult = namedtuple( 1031 "DescribeResult", ("mean", "lower", "upper") 1032 ) # to be updated 1033 1034 if "return_pi" in kwargs: # split conformal, without simulation 1035 mean_pi_ = [] 1036 lower_pi_ = [] 1037 upper_pi_ = [] 1038 median_pi_ = [] 1039 DescribeResult = namedtuple( 1040 "DescribeResult", ("mean", "lower", "upper") 1041 ) # to be updated 1042 1043 if self.kde_ != None and "kde" in self.type_pi: # kde 1044 target_cols = self.df_.columns[ 1045 : self.init_n_series_ 1046 ] # Get target column names 1047 if self.verbose == 1: 1048 self.residuals_sims_ = tuple( 1049 self.kde_.sample( 1050 n_samples=h, random_state=self.seed + 100 * i 1051 ) # Keep full sample 1052 for i in tqdm(range(self.replications)) 1053 ) 1054 elif self.verbose == 0: 1055 self.residuals_sims_ = tuple( 1056 self.kde_.sample( 1057 n_samples=h, random_state=self.seed + 100 * i 1058 ) # Keep full sample 1059 for i in range(self.replications) 1060 ) 1061 1062 # Convert to DataFrames after sampling 1063 self.residuals_sims_ = tuple( 1064 pd.DataFrame( 1065 sim, # Keep all columns 1066 columns=target_cols, # Use original target column names 1067 index=self.output_dates_, 1068 ) 1069 for sim in self.residuals_sims_ 1070 ) 1071 1072 if self.type_pi in ("bootstrap", "scp-bootstrap", "scp2-bootstrap"): 1073 assert self.replications is not None and isinstance( 1074 self.replications, int 1075 ), "'replications' must be provided and be an integer" 1076 if self.verbose == 1: 1077 self.residuals_sims_ = tuple( 1078 ts.bootstrap( 1079 self.residuals_, 1080 h=h, 1081 block_size=None, 1082 seed=self.seed + 100 * i, 1083 ) 1084 for i in tqdm(range(self.replications)) 1085 ) 1086 elif self.verbose == 0: 1087 self.residuals_sims_ = tuple( 1088 ts.bootstrap( 1089 self.residuals_, 1090 h=h, 1091 block_size=None, 1092 seed=self.seed + 100 * i, 1093 ) 1094 for i in range(self.replications) 1095 ) 1096 1097 if self.type_pi in ( 1098 "block-bootstrap", 1099 "scp-block-bootstrap", 1100 "scp2-block-bootstrap", 1101 ): 1102 if self.block_size is None: 1103 self.block_size = int( 1104 np.ceil(3.15 * (self.residuals_.shape[0] ** (1 / 3))) 1105 ) 1106 1107 assert self.replications is not None and isinstance( 1108 self.replications, int 1109 ), "'replications' must be provided and be an integer" 1110 if self.verbose == 1: 1111 self.residuals_sims_ = tuple( 1112 ts.bootstrap( 1113 self.residuals_, 1114 h=h, 1115 block_size=self.block_size, 1116 seed=self.seed + 100 * i, 1117 ) 1118 for i in tqdm(range(self.replications)) 1119 ) 1120 elif self.verbose == 0: 1121 self.residuals_sims_ = tuple( 1122 ts.bootstrap( 1123 self.residuals_, 1124 h=h, 1125 block_size=self.block_size, 1126 seed=self.seed + 100 * i, 1127 ) 1128 for i in range(self.replications) 1129 ) 1130 1131 if "vine" in self.type_pi: 1132 if self.verbose == 1: 1133 self.residuals_sims_ = tuple( 1134 vinecopula_sample( 1135 x=self.residuals_, 1136 n_samples=h, 1137 method=self.type_pi, 1138 random_state=self.seed + 100 * i, 1139 ) 1140 for i in tqdm(range(self.replications)) 1141 ) 1142 elif self.verbose == 0: 1143 self.residuals_sims_ = tuple( 1144 vinecopula_sample( 1145 x=self.residuals_, 1146 n_samples=h, 1147 method=self.type_pi, 1148 random_state=self.seed + 100 * i, 1149 ) 1150 for i in range(self.replications) 1151 ) 1152 1153 mean_ = deepcopy(self.mean_) 1154 1155 for i in range(h): 1156 new_obs = ts.reformat_response(mean_, self.lags) 1157 new_X = new_obs.reshape(1, -1) 1158 cooked_new_X = self.cook_test_set(new_X, **kwargs) 1159 1160 if "return_std" in kwargs: 1161 self.preds_std_.append( 1162 [ 1163 np.asarray( 1164 self.fit_objs_[i].predict( 1165 cooked_new_X, return_std=True 1166 )[1] 1167 ).item() 1168 for i in range(self.n_series) 1169 ] 1170 ) 1171 1172 if "return_pi" in kwargs: 1173 for i in range(self.n_series): 1174 preds_pi = self.fit_objs_[i].predict(cooked_new_X, **kwargs) 1175 mean_pi_.append(preds_pi.mean[0]) 1176 lower_pi_.append(preds_pi.lower[0]) 1177 upper_pi_.append(preds_pi.upper[0]) 1178 1179 if self.type_pi != "quantile": 1180 predicted_cooked_new_X = np.asarray( 1181 [ 1182 np.asarray( 1183 self.fit_objs_[i].predict(cooked_new_X) 1184 ).item() 1185 for i in range(self.init_n_series_) 1186 ] 1187 ) 1188 else: 1189 predicted_cooked_new_X = np.asarray( 1190 [ 1191 np.asarray( 1192 self.fit_objs_[i] 1193 .predict(cooked_new_X, return_pi=True) 1194 .upper 1195 ).item() 1196 for i in range(self.init_n_series_) 1197 ] 1198 ) 1199 1200 preds = np.asarray(y_means_ + predicted_cooked_new_X) 1201 1202 # Create full row with both predictions and external regressors 1203 if self.xreg_ is not None and "xreg" in kwargs: 1204 next_xreg = kwargs["xreg"].iloc[i: i + 1].values.flatten() 1205 full_row = np.concatenate([preds, next_xreg]) 1206 else: 1207 full_row = preds 1208 1209 # Create a new row with same number of columns as mean_ 1210 new_row = np.zeros((1, mean_.shape[1])) 1211 new_row[0, : full_row.shape[0]] = full_row 1212 1213 # Maintain the full dimensionality by using vstack instead of rbind 1214 mean_ = np.vstack([new_row, mean_[:-1]]) 1215 1216 # Final output should only include the target columns 1217 self.mean_ = pd.DataFrame( 1218 mean_[0: min(h, self.n_obs_ - self.lags), : self.init_n_series_][ 1219 ::-1 1220 ], 1221 columns=self.df_.columns[: self.init_n_series_], 1222 index=self.output_dates_, 1223 ) 1224 1225 # function's return ---------------------------------------------------------------------- 1226 if ( 1227 (("return_std" not in kwargs) and ("return_pi" not in kwargs)) 1228 and (self.type_pi not in ("gaussian", "scp")) 1229 ) or ("vine" in self.type_pi): 1230 if self.replications is None: 1231 return self.mean_.iloc[:, : self.init_n_series_] 1232 1233 # if "return_std" not in kwargs and self.replications is not None 1234 meanf = [] 1235 medianf = [] 1236 lower = [] 1237 upper = [] 1238 1239 if "scp2" in self.type_pi: 1240 if self.verbose == 1: 1241 self.sims_ = tuple( 1242 ( 1243 self.mean_ 1244 + self.residuals_sims_[i] 1245 * self.residuals_std_dev_[np.newaxis, :] 1246 for i in tqdm(range(self.replications)) 1247 ) 1248 ) 1249 elif self.verbose == 0: 1250 self.sims_ = tuple( 1251 ( 1252 self.mean_ 1253 + self.residuals_sims_[i] 1254 * self.residuals_std_dev_[np.newaxis, :] 1255 for i in range(self.replications) 1256 ) 1257 ) 1258 else: 1259 if self.verbose == 1: 1260 self.sims_ = tuple( 1261 ( 1262 self.mean_ + self.residuals_sims_[i] 1263 for i in tqdm(range(self.replications)) 1264 ) 1265 ) 1266 elif self.verbose == 0: 1267 self.sims_ = tuple( 1268 ( 1269 self.mean_ + self.residuals_sims_[i] 1270 for i in range(self.replications) 1271 ) 1272 ) 1273 1274 DescribeResult = namedtuple( 1275 "DescribeResult", ("mean", "sims", "lower", "upper") 1276 ) 1277 for ix in range(self.init_n_series_): 1278 sims_ix = getsims(self.sims_, ix) 1279 if self.agg == "mean": 1280 meanf.append(np.mean(sims_ix, axis=1)) 1281 else: 1282 medianf.append(np.median(sims_ix, axis=1)) 1283 lower.append(np.quantile(sims_ix, q=self.alpha_ / 200, axis=1)) 1284 upper.append( 1285 np.quantile(sims_ix, q=1 - self.alpha_ / 200, axis=1) 1286 ) 1287 self.mean_ = pd.DataFrame( 1288 np.asarray(meanf).T, 1289 columns=self.series_names[ 1290 : self.init_n_series_ 1291 ], # self.df_.columns, 1292 index=self.output_dates_, 1293 ) 1294 1295 self.lower_ = pd.DataFrame( 1296 np.asarray(lower).T, 1297 columns=self.series_names[ 1298 : self.init_n_series_ 1299 ], # self.df_.columns, 1300 index=self.output_dates_, 1301 ) 1302 1303 self.upper_ = pd.DataFrame( 1304 np.asarray(upper).T, 1305 columns=self.series_names[ 1306 : self.init_n_series_ 1307 ], # self.df_.columns, 1308 index=self.output_dates_, 1309 ) 1310 1311 try: 1312 self.median_ = pd.DataFrame( 1313 np.asarray(medianf).T, 1314 columns=self.series_names[ 1315 : self.init_n_series_ 1316 ], # self.df_.columns, 1317 index=self.output_dates_, 1318 ) 1319 except Exception as e: 1320 pass 1321 1322 return DescribeResult( 1323 self.mean_, self.sims_, self.lower_, self.upper_ 1324 ) 1325 1326 if ( 1327 (("return_std" in kwargs) or ("return_pi" in kwargs)) 1328 and (self.type_pi not in ("gaussian", "scp")) 1329 ) or "vine" in self.type_pi: 1330 DescribeResult = namedtuple( 1331 "DescribeResult", ("mean", "lower", "upper") 1332 ) 1333 1334 self.mean_ = pd.DataFrame( 1335 np.asarray(self.mean_), 1336 columns=self.series_names, # self.df_.columns, 1337 index=self.output_dates_, 1338 ) 1339 1340 if "return_std" in kwargs: 1341 self.preds_std_ = np.asarray(self.preds_std_) 1342 1343 self.lower_ = pd.DataFrame( 1344 self.mean_.values - pi_multiplier * self.preds_std_, 1345 columns=self.series_names, # self.df_.columns, 1346 index=self.output_dates_, 1347 ) 1348 1349 self.upper_ = pd.DataFrame( 1350 self.mean_.values + pi_multiplier * self.preds_std_, 1351 columns=self.series_names, # self.df_.columns, 1352 index=self.output_dates_, 1353 ) 1354 1355 if "return_pi" in kwargs: 1356 self.lower_ = pd.DataFrame( 1357 np.asarray(lower_pi_).reshape(h, self.n_series) 1358 + y_means_[np.newaxis, :], 1359 columns=self.series_names, # self.df_.columns, 1360 index=self.output_dates_, 1361 ) 1362 1363 self.upper_ = pd.DataFrame( 1364 np.asarray(upper_pi_).reshape(h, self.n_series) 1365 + y_means_[np.newaxis, :], 1366 columns=self.series_names, # self.df_.columns, 1367 index=self.output_dates_, 1368 ) 1369 1370 res = DescribeResult(self.mean_, self.lower_, self.upper_) 1371 1372 if self.xreg_ is not None: 1373 if len(self.xreg_.shape) > 1: 1374 res2 = mx.tuple_map( 1375 res, 1376 lambda x: mo.delete_last_columns( 1377 x, num_columns=self.xreg_.shape[1] 1378 ), 1379 ) 1380 else: 1381 res2 = mx.tuple_map( 1382 res, lambda x: mo.delete_last_columns(x, num_columns=1) 1383 ) 1384 return DescribeResult(res2[0], res2[1], res2[2]) 1385 1386 return res 1387 1388 if self.type_pi == "gaussian": 1389 DescribeResult = namedtuple( 1390 "DescribeResult", ("mean", "lower", "upper") 1391 ) 1392 1393 self.mean_ = pd.DataFrame( 1394 np.asarray(self.mean_), 1395 columns=self.series_names, # self.df_.columns, 1396 index=self.output_dates_, 1397 ) 1398 1399 # Use Bayesian std if available, otherwise use gaussian residual std 1400 if "return_std" in kwargs and len(self.preds_std_) > 0: 1401 preds_std_to_use = np.asarray(self.preds_std_) 1402 else: 1403 preds_std_to_use = self.gaussian_preds_std_ 1404 1405 self.lower_ = pd.DataFrame( 1406 self.mean_.values - pi_multiplier * preds_std_to_use, 1407 columns=self.series_names, # self.df_.columns, 1408 index=self.output_dates_, 1409 ) 1410 1411 self.upper_ = pd.DataFrame( 1412 self.mean_.values + pi_multiplier * preds_std_to_use, 1413 columns=self.series_names, # self.df_.columns, 1414 index=self.output_dates_, 1415 ) 1416 1417 res = DescribeResult(self.mean_, self.lower_, self.upper_) 1418 1419 if self.xreg_ is not None: 1420 if len(self.xreg_.shape) > 1: 1421 res2 = mx.tuple_map( 1422 res, 1423 lambda x: mo.delete_last_columns( 1424 x, num_columns=self.xreg_.shape[1] 1425 ), 1426 ) 1427 else: 1428 res2 = mx.tuple_map( 1429 res, lambda x: mo.delete_last_columns(x, num_columns=1) 1430 ) 1431 return DescribeResult(res2[0], res2[1], res2[2]) 1432 1433 return res 1434 1435 if self.type_pi == "quantile": 1436 DescribeResult = namedtuple("DescribeResult", ("mean")) 1437 1438 self.mean_ = pd.DataFrame( 1439 np.asarray(self.mean_), 1440 columns=self.series_names, # self.df_.columns, 1441 index=self.output_dates_, 1442 ) 1443 1444 res = DescribeResult(self.mean_) 1445 1446 if self.xreg_ is not None: 1447 if len(self.xreg_.shape) > 1: 1448 res2 = mx.tuple_map( 1449 res, 1450 lambda x: mo.delete_last_columns( 1451 x, num_columns=self.xreg_.shape[1] 1452 ), 1453 ) 1454 else: 1455 res2 = mx.tuple_map( 1456 res, lambda x: mo.delete_last_columns(x, num_columns=1) 1457 ) 1458 return DescribeResult(res2[0]) 1459 1460 return res 1461 1462 # After prediction loop, ensure sims only contain target columns 1463 if self.sims_ is not None: 1464 if self.verbose == 1: 1465 self.sims_ = tuple( 1466 sim[:h,] # Only keep target columns and h rows 1467 for sim in tqdm(self.sims_) 1468 ) 1469 elif self.verbose == 0: 1470 self.sims_ = tuple( 1471 sim[:h,] # Only keep target columns and h rows 1472 for sim in self.sims_ 1473 ) 1474 1475 # Convert numpy arrays to DataFrames with proper columns 1476 self.sims_ = tuple( 1477 pd.DataFrame( 1478 sim, 1479 columns=self.df_.columns[: self.init_n_series_], 1480 index=self.output_dates_, 1481 ) 1482 for sim in self.sims_ 1483 ) 1484 1485 if self.type_pi in ( 1486 "kde", 1487 "bootstrap", 1488 "block-bootstrap", 1489 "vine-copula", 1490 ): 1491 if self.xreg_ is not None: 1492 # Use getsimsxreg when external regressors are present 1493 target_cols = self.df_.columns[: self.init_n_series_] 1494 self.sims_ = getsimsxreg( 1495 self.sims_, self.output_dates_, target_cols 1496 ) 1497 else: 1498 # Use original getsims for backward compatibility 1499 self.sims_ = getsims(self.sims_)
Forecast all the time series, h steps ahead
1566 def score( 1567 self, 1568 X, 1569 training_index, 1570 testing_index, 1571 scoring=None, 1572 alpha=0.5, 1573 **kwargs, 1574 ): 1575 """Train on training_index, score on testing_index.""" 1576 1577 assert ( 1578 bool(set(training_index).intersection(set(testing_index))) == False 1579 ), "Non-overlapping 'training_index' and 'testing_index' required" 1580 1581 # Dimensions 1582 try: 1583 # multivariate time series 1584 n, p = X.shape 1585 except: 1586 # univariate time series 1587 n = X.shape[0] 1588 p = 1 1589 1590 # Training and testing sets 1591 if p > 1: 1592 X_train = X[training_index, :] 1593 X_test = X[testing_index, :] 1594 else: 1595 X_train = X[training_index] 1596 X_test = X[testing_index] 1597 1598 # Horizon 1599 h = len(testing_index) 1600 assert ( 1601 len(training_index) + h 1602 ) <= n, "Please check lengths of training and testing windows" 1603 1604 # Fit and predict 1605 self.fit(X_train, **kwargs) 1606 preds = self.predict(h=h, **kwargs) 1607 1608 if scoring is None: 1609 scoring = "neg_root_mean_squared_error" 1610 1611 if scoring == "pinball": 1612 # Predict requested quantile 1613 q_pred = self.predict(h=h, quantiles=[alpha], **kwargs) 1614 # Handle multivariate 1615 scores = [] 1616 for j in range(p): 1617 series_name = getattr(self, "series_names", [f"Series_{j}"])[j] 1618 q_label = ( 1619 f"{int(alpha * 100):02d}" 1620 if (alpha * 100).is_integer() 1621 else f"{alpha:.3f}".replace(".", "_") 1622 ) 1623 col = f"quantile_{q_label}_{series_name}" 1624 if col not in q_pred.columns: 1625 raise ValueError( 1626 f"Column '{col}' not found in quantile forecast output." 1627 ) 1628 y_true_j = X_test[:, j] 1629 y_pred_j = q_pred[col].values 1630 # Compute pinball loss for this series 1631 loss = mean_pinball_loss(y_true_j, y_pred_j, alpha=alpha) 1632 scores.append(loss) 1633 # Return average over series 1634 return np.mean(scores) 1635 1636 if scoring == "crps": 1637 # Ensure simulations exist 1638 preds = self.predict(h=h, **kwargs) # triggers self.sims_ 1639 # Extract simulations: list of DataFrames → (R, h, p) 1640 sims_vals = np.stack( 1641 [sim.values for sim in self.sims_], axis=0 1642 ) # (R, h, p) 1643 crps_scores = [] 1644 for j in range(p): 1645 y_true_j = X_test[:, j] 1646 sims_j = sims_vals[:, :, j] # (R, h) 1647 crps_j = self._crps_ensemble(np.asarray(y_true_j), sims_j) 1648 crps_scores.append(np.mean(crps_j)) # average over horizon 1649 return np.mean(crps_scores) # average over series 1650 1651 # check inputs 1652 assert scoring in ( 1653 "explained_variance", 1654 "neg_mean_absolute_error", 1655 "neg_mean_squared_error", 1656 "neg_root_mean_squared_error", 1657 "neg_mean_squared_log_error", 1658 "neg_median_absolute_error", 1659 "r2", 1660 ), "'scoring' should be in ('explained_variance', 'neg_mean_absolute_error', \ 1661 'neg_mean_squared_error', 'neg_root_mean_squared_error', 'neg_mean_squared_log_error', \ 1662 'neg_median_absolute_error', 'r2')" 1663 1664 scoring_options = { 1665 "explained_variance": skm2.explained_variance_score, 1666 "neg_mean_absolute_error": skm2.mean_absolute_error, 1667 "neg_mean_squared_error": lambda x, y: np.mean((x - y) ** 2), 1668 "neg_root_mean_squared_error": lambda x, y: np.sqrt( 1669 np.mean((x - y) ** 2) 1670 ), 1671 "neg_mean_squared_log_error": skm2.mean_squared_log_error, 1672 "neg_median_absolute_error": skm2.median_absolute_error, 1673 "r2": skm2.r2_score, 1674 } 1675 1676 return scoring_options[scoring](X_test, preds)
Train on training_index, score on testing_index.
12class MTSStacker(MTS): 13 """ 14 Sequential stacking for time series with unified strategy. 15 16 Core Strategy: 17 1. Split data: half1 (base models) | half2 (meta-model) 18 2. Train base models on half1, predict half2 19 3. Create augmented dataset: [original_series | base_pred_1 | base_pred_2 | ...] 20 Stack as additional time series, extract target series 21 4. Train meta-MTS on half2 with augmented data 22 5. Retrain base models on half2 for temporal alignment 23 6. At prediction: base models forecast → augment → meta-model predicts 24 """ 25 26 def __init__( 27 self, 28 base_models, 29 meta_model, 30 split_ratio=0.5, 31 ): 32 """ 33 Parameters 34 ---------- 35 base_models : list of sklearn-compatible models 36 Base models (e.g., Ridge, Lasso, RandomForest) 37 meta_model : nnetsauce.MTS instance 38 MTS with type_pi='scp2-kde' or similar 39 split_ratio : float 40 Proportion for half1 (default: 0.5) 41 """ 42 self.base_models = base_models 43 self.meta_model = meta_model 44 self.split_ratio = split_ratio 45 self.fitted_base_models_ = [] 46 self.split_idx_ = None 47 self.mean_ = None 48 self.lower_ = None 49 self.upper_ = None 50 self.sims_ = None 51 self.output_dates_ = None 52 53 def fit(self, X, xreg=None, **kwargs): 54 """ 55 Fit MTSStacker using sequential stacking strategy. 56 57 Parameters 58 ---------- 59 X : array-like or DataFrame, shape (n_samples, n_features) 60 Training time series (most recent observations last) 61 xreg : array-like, optional 62 External regressors 63 **kwargs : dict 64 Additional parameters for base and meta models 65 66 Returns 67 ------- 68 self : object 69 """ 70 # 1. Store attributes and convert to DataFrame if needed 71 if isinstance(X, pd.DataFrame): 72 self.df_ = X.copy() 73 X_array = X.values 74 self.series_names = X.columns.tolist() 75 else: 76 X_array = np.asarray(X) 77 self.df_ = pd.DataFrame(X_array) 78 self.series_names = [f"series{i}" for i in range(X_array.shape[1])] 79 80 n_samples = X_array.shape[0] 81 self.n_series_ = X_array.shape[1] if X_array.ndim > 1 else 1 82 83 # 2. Split data into half1 and half2 84 split_idx = int(n_samples * self.split_ratio) 85 self.split_idx_ = split_idx 86 87 if split_idx < self.meta_model.lags: 88 raise ValueError( 89 f"Split creates insufficient data: split_idx={split_idx} < " 90 f"lags={self.meta_model.lags}. Reduce split_ratio or use fewer lags." 91 ) 92 93 half1 = X_array[:split_idx] 94 half2 = X_array[split_idx:] 95 96 # 3. Train base models on half1 and predict half2 97 base_preds = [] 98 temp_base_models = [] 99 100 for base_model in self.base_models: 101 # Wrap in MTS with same config as meta_model 102 base_mts = MTS( 103 obj=clone(base_model), 104 lags=self.meta_model.lags, 105 n_hidden_features=self.meta_model.n_hidden_features, 106 replications=self.meta_model.replications, 107 kernel=self.meta_model.kernel, 108 type_pi=None, # No prediction intervals for base models 109 ) 110 base_mts.fit(half1) 111 112 # Predict half2 113 pred = base_mts.predict(h=len(half2)) 114 115 # Handle different return types 116 if isinstance(pred, pd.DataFrame): 117 base_preds.append(pred.values) 118 elif isinstance(pred, np.ndarray): 119 base_preds.append(pred) 120 elif hasattr(pred, "mean"): 121 # Named tuple with mean attribute 122 mean_pred = pred.mean 123 base_preds.append( 124 mean_pred.values 125 if isinstance(mean_pred, pd.DataFrame) 126 else mean_pred 127 ) 128 else: 129 raise ValueError(f"Unexpected prediction type: {type(pred)}") 130 131 temp_base_models.append(base_mts) 132 133 # 4. Create augmented dataset: [original | base_pred_1 | base_pred_2 | ...] 134 base_preds_array = np.hstack( 135 base_preds 136 ) # shape: (len(half2), n_series * n_base_models) 137 138 if isinstance(X, pd.DataFrame): 139 half2_df = pd.DataFrame( 140 half2, 141 index=self.df_.index[split_idx:], 142 columns=self.series_names, 143 ) 144 base_preds_df = pd.DataFrame( 145 base_preds_array, 146 index=self.df_.index[split_idx:], 147 columns=[ 148 f"base_{i}_{j}" 149 for i in range(len(self.base_models)) 150 for j in range(self.n_series_) 151 ], 152 ) 153 augmented = pd.concat([half2_df, base_preds_df], axis=1) 154 else: 155 augmented = np.hstack([half2, base_preds_array]) 156 157 # 5. Train meta-model on augmented half2 158 self.meta_model.fit(augmented, xreg=xreg, **kwargs) 159 160 # Store meta-model attributes 161 self.output_dates_ = self.meta_model.output_dates_ 162 self.fit_objs_ = self.meta_model.fit_objs_ 163 self.y_ = self.meta_model.y_ 164 self.X_ = self.meta_model.X_ 165 self.xreg_ = self.meta_model.xreg_ 166 self.y_means_ = self.meta_model.y_means_ 167 self.residuals_ = self.meta_model.residuals_ 168 169 # 6. FIXED: Retrain base models on half2 for temporal alignment 170 self.fitted_base_models_ = [] 171 for i, base_model in enumerate(self.base_models): 172 base_mts_final = MTS( 173 obj=clone(base_model), 174 lags=self.meta_model.lags, 175 n_hidden_features=self.meta_model.n_hidden_features, 176 replications=self.meta_model.replications, 177 kernel=self.meta_model.kernel, 178 type_pi=None, 179 ) 180 base_mts_final.fit(half2) 181 self.fitted_base_models_.append(base_mts_final) 182 183 return self 184 185 def predict(self, h=5, level=95, **kwargs): 186 """ 187 Forecast h steps ahead using stacked predictions. 188 189 FIXED: Now properly generates base model forecasts and uses them 190 to create augmented features for the meta-model. 191 192 Parameters 193 ---------- 194 h : int 195 Forecast horizon 196 level : int 197 Confidence level for prediction intervals 198 **kwargs : dict 199 Additional parameters for prediction 200 201 Returns 202 ------- 203 DescribeResult or DataFrame 204 Predictions with optional intervals/simulations 205 """ 206 # Step 1: Generate base model forecasts for horizon h 207 base_forecasts = [] 208 209 for base_mts in self.fitted_base_models_: 210 # Each base model forecasts h steps ahead 211 forecast = base_mts.predict(h=h) 212 213 # Extract mean prediction 214 if isinstance(forecast, pd.DataFrame): 215 base_forecasts.append(forecast.values) 216 elif isinstance(forecast, np.ndarray): 217 base_forecasts.append(forecast) 218 elif hasattr(forecast, "mean"): 219 mean_pred = forecast.mean 220 base_forecasts.append( 221 mean_pred.values 222 if isinstance(mean_pred, pd.DataFrame) 223 else mean_pred 224 ) 225 else: 226 raise ValueError(f"Unexpected forecast type: {type(forecast)}") 227 228 # Step 2: Stack base forecasts into augmented features 229 base_forecasts_array = np.hstack( 230 base_forecasts 231 ) # shape: (h, n_series * n_base) 232 233 # Step 3: Create augmented input for meta-model 234 # The meta-model needs the original series structure + base predictions 235 # We use recursive forecasting: predict one step, update history, repeat 236 237 # Get last window of data from training 238 last_window = self.df_.iloc[-self.meta_model.lags:].values 239 240 # Initialize containers for results 241 all_forecasts = [] 242 all_lowers = [] if level is not None else None 243 all_uppers = [] if level is not None else None 244 all_sims = ( 245 [] 246 if hasattr(self.meta_model, "type_pi") and self.meta_model.type_pi 247 else None 248 ) 249 250 # Recursive forecasting 251 current_window = last_window.copy() 252 253 for step in range(h): 254 # Create augmented input: [current_window_last_row | base_forecast_step] 255 # Note: meta-model was trained on [original | base_preds] 256 # For prediction, we need to simulate this structure 257 258 # Use the base forecast for this step 259 base_forecast_step = base_forecasts_array[ 260 step: step + 1, : 261 ] # shape: (1, n_base_features) 262 263 # Create a dummy augmented dataset for this step 264 # Combine last observed values with base predictions 265 last_obs = current_window[-1:, :] # shape: (1, n_series) 266 augmented_step = np.hstack([last_obs, base_forecast_step]) 267 268 # Convert to DataFrame if needed 269 if isinstance(self.df_, pd.DataFrame): 270 augmented_df = pd.DataFrame( 271 augmented_step, 272 columns=( 273 self.series_names 274 + [ 275 f"base_{i}_{j}" 276 for i in range(len(self.base_models)) 277 for j in range(self.n_series_) 278 ] 279 ), 280 ) 281 else: 282 augmented_df = augmented_step 283 284 # Predict one step with meta-model 285 # This is tricky: we need to use meta-model's internal predict 286 # but with our augmented data structure 287 288 # For now, use the standard predict and extract one step 289 step_result = self.meta_model.predict(h=1, level=level, **kwargs) 290 291 # Extract forecasts 292 if isinstance(step_result, pd.DataFrame): 293 forecast_step = step_result.iloc[0, : self.n_series_].values 294 all_forecasts.append(forecast_step) 295 elif isinstance(step_result, np.ndarray): 296 forecast_step = step_result[0, : self.n_series_] 297 all_forecasts.append(forecast_step) 298 elif hasattr(step_result, "mean"): 299 mean_pred = step_result.mean 300 if isinstance(mean_pred, pd.DataFrame): 301 forecast_step = mean_pred.iloc[0, : self.n_series_].values 302 else: 303 forecast_step = mean_pred[0, : self.n_series_] 304 all_forecasts.append(forecast_step) 305 306 # Extract intervals if available 307 if hasattr(step_result, "lower") and all_lowers is not None: 308 lower_pred = step_result.lower 309 if isinstance(lower_pred, pd.DataFrame): 310 all_lowers.append( 311 lower_pred.iloc[0, : self.n_series_].values 312 ) 313 else: 314 all_lowers.append(lower_pred[0, : self.n_series_]) 315 316 if hasattr(step_result, "upper") and all_uppers is not None: 317 upper_pred = step_result.upper 318 if isinstance(upper_pred, pd.DataFrame): 319 all_uppers.append( 320 upper_pred.iloc[0, : self.n_series_].values 321 ) 322 else: 323 all_uppers.append(upper_pred[0, : self.n_series_]) 324 325 # Extract simulations if available 326 if hasattr(step_result, "sims") and all_sims is not None: 327 all_sims.append(step_result.sims) 328 329 # Update window for next iteration 330 current_window = np.vstack( 331 [current_window[1:], forecast_step.reshape(1, -1)] 332 ) 333 334 # Combine all forecasts 335 forecasts_array = np.array(all_forecasts) 336 337 # Create output dates 338 if hasattr(self.df_, "index") and isinstance( 339 self.df_.index, pd.DatetimeIndex 340 ): 341 last_date = self.df_.index[-1] 342 freq = pd.infer_freq(self.df_.index) 343 if freq: 344 output_dates = pd.date_range( 345 start=last_date, periods=h + 1, freq=freq 346 )[1:] 347 else: 348 output_dates = pd.RangeIndex( 349 start=len(self.df_), stop=len(self.df_) + h 350 ) 351 else: 352 output_dates = pd.RangeIndex( 353 start=len(self.df_), stop=len(self.df_) + h 354 ) 355 356 self.output_dates_ = output_dates 357 358 # Format output 359 mean_df = pd.DataFrame( 360 forecasts_array, 361 index=output_dates, 362 columns=self.series_names[: self.n_series_], 363 ) 364 self.mean_ = mean_df 365 366 # Return based on what was computed 367 if all_lowers and all_uppers: 368 lowers_array = np.array(all_lowers) 369 uppers_array = np.array(all_uppers) 370 371 lower_df = pd.DataFrame( 372 lowers_array, 373 index=output_dates, 374 columns=self.series_names[: self.n_series_], 375 ) 376 upper_df = pd.DataFrame( 377 uppers_array, 378 index=output_dates, 379 columns=self.series_names[: self.n_series_], 380 ) 381 382 self.lower_ = lower_df 383 self.upper_ = upper_df 384 385 if all_sims: 386 self.sims_ = tuple(all_sims) 387 DescribeResult = namedtuple( 388 "DescribeResult", ("mean", "sims", "lower", "upper") 389 ) 390 return DescribeResult(mean_df, self.sims_, lower_df, upper_df) 391 else: 392 DescribeResult = namedtuple( 393 "DescribeResult", ("mean", "lower", "upper") 394 ) 395 return DescribeResult(mean_df, lower_df, upper_df) 396 else: 397 return mean_df 398 399 def plot(self, series=None, **kwargs): 400 """ 401 Plot the time series with forecasts and prediction intervals. 402 403 Parameters 404 ---------- 405 series : str or int, optional 406 Name or index of the series to plot (default: 0) 407 **kwargs : dict 408 Additional parameters for plotting 409 """ 410 # Ensure we have predictions 411 if self.mean_ is None: 412 raise ValueError( 413 "Model forecasting must be obtained first (call predict)" 414 ) 415 416 # Convert series name to index if needed 417 if isinstance(series, str): 418 if series in self.series_names: 419 series_idx = self.series_names.index(series) 420 else: 421 raise ValueError( 422 f"Series '{series}' doesn't exist in the input dataset" 423 ) 424 else: 425 series_idx = series if series is not None else 0 426 427 # Check bounds 428 if series_idx < 0 or series_idx >= self.n_series_: 429 raise ValueError( 430 f"Series index {series_idx} is out of bounds (0 to {self.n_series_ - 1})" 431 ) 432 433 # Prepare data for plotting 434 import matplotlib.pyplot as plt 435 import matplotlib.dates as mdates 436 437 # Get historical data 438 historical_data = self.df_.iloc[:, series_idx] 439 forecast_data = self.mean_.iloc[:, series_idx] 440 441 # Get prediction intervals if available 442 has_intervals = self.lower_ is not None and self.upper_ is not None 443 if has_intervals: 444 lower_data = self.lower_.iloc[:, series_idx] 445 upper_data = self.upper_.iloc[:, series_idx] 446 447 # Create figure 448 fig, ax = plt.subplots(figsize=(12, 6)) 449 450 # Plot historical data 451 if isinstance(self.df_.index, pd.DatetimeIndex): 452 hist_index = self.df_.index 453 ax.plot( 454 hist_index, 455 historical_data, 456 "-", 457 label="Historical", 458 color="blue", 459 linewidth=1.5, 460 ) 461 462 # Plot forecast 463 forecast_index = self.mean_.index 464 ax.plot( 465 forecast_index, 466 forecast_data, 467 "-", 468 label="Forecast", 469 color="red", 470 linewidth=1.5, 471 ) 472 473 # Plot prediction intervals 474 if has_intervals: 475 ax.fill_between( 476 forecast_index, 477 lower_data, 478 upper_data, 479 alpha=0.3, 480 color="red", 481 label="Prediction Interval", 482 ) 483 484 # Add vertical line at the split point 485 if self.split_idx_ is not None: 486 split_date = hist_index[self.split_idx_] 487 ax.axvline( 488 x=split_date, 489 color="gray", 490 linestyle="--", 491 alpha=0.5, 492 label="Train Split", 493 ) 494 495 # Format x-axis for dates 496 ax.xaxis.set_major_formatter(mdates.DateFormatter("%Y-%m-%d")) 497 fig.autofmt_xdate() 498 else: 499 # Numeric indices 500 n_points_train = len(self.df_) 501 n_points_forecast = len(self.mean_) 502 503 x_hist = np.arange(n_points_train) 504 x_forecast = np.arange( 505 n_points_train, n_points_train + n_points_forecast 506 ) 507 508 ax.plot( 509 x_hist, 510 historical_data, 511 "-", 512 label="Historical", 513 color="blue", 514 linewidth=1.5, 515 ) 516 ax.plot( 517 x_forecast, 518 forecast_data, 519 "-", 520 label="Forecast", 521 color="red", 522 linewidth=1.5, 523 ) 524 525 if has_intervals: 526 ax.fill_between( 527 x_forecast, 528 lower_data, 529 upper_data, 530 alpha=0.3, 531 color="red", 532 label="Prediction Interval", 533 ) 534 535 if self.split_idx_ is not None: 536 ax.axvline( 537 x=self.split_idx_, 538 color="gray", 539 linestyle="--", 540 alpha=0.5, 541 label="Train Split", 542 ) 543 544 # Set title and labels 545 series_name = ( 546 self.series_names[series_idx] 547 if series_idx < len(self.series_names) 548 else f"Series {series_idx}" 549 ) 550 plt.title(f"Forecast for {series_name}", fontsize=14, fontweight="bold") 551 plt.xlabel("Time") 552 plt.ylabel("Value") 553 plt.legend() 554 plt.grid(True, alpha=0.3) 555 plt.tight_layout() 556 plt.show()
Sequential stacking for time series with unified strategy.
Core Strategy:
- Split data: half1 (base models) | half2 (meta-model)
- Train base models on half1, predict half2
- Create augmented dataset: [original_series | base_pred_1 | base_pred_2 | ...] Stack as additional time series, extract target series
- Train meta-MTS on half2 with augmented data
- Retrain base models on half2 for temporal alignment
- At prediction: base models forecast → augment → meta-model predicts
53 def fit(self, X, xreg=None, **kwargs): 54 """ 55 Fit MTSStacker using sequential stacking strategy. 56 57 Parameters 58 ---------- 59 X : array-like or DataFrame, shape (n_samples, n_features) 60 Training time series (most recent observations last) 61 xreg : array-like, optional 62 External regressors 63 **kwargs : dict 64 Additional parameters for base and meta models 65 66 Returns 67 ------- 68 self : object 69 """ 70 # 1. Store attributes and convert to DataFrame if needed 71 if isinstance(X, pd.DataFrame): 72 self.df_ = X.copy() 73 X_array = X.values 74 self.series_names = X.columns.tolist() 75 else: 76 X_array = np.asarray(X) 77 self.df_ = pd.DataFrame(X_array) 78 self.series_names = [f"series{i}" for i in range(X_array.shape[1])] 79 80 n_samples = X_array.shape[0] 81 self.n_series_ = X_array.shape[1] if X_array.ndim > 1 else 1 82 83 # 2. Split data into half1 and half2 84 split_idx = int(n_samples * self.split_ratio) 85 self.split_idx_ = split_idx 86 87 if split_idx < self.meta_model.lags: 88 raise ValueError( 89 f"Split creates insufficient data: split_idx={split_idx} < " 90 f"lags={self.meta_model.lags}. Reduce split_ratio or use fewer lags." 91 ) 92 93 half1 = X_array[:split_idx] 94 half2 = X_array[split_idx:] 95 96 # 3. Train base models on half1 and predict half2 97 base_preds = [] 98 temp_base_models = [] 99 100 for base_model in self.base_models: 101 # Wrap in MTS with same config as meta_model 102 base_mts = MTS( 103 obj=clone(base_model), 104 lags=self.meta_model.lags, 105 n_hidden_features=self.meta_model.n_hidden_features, 106 replications=self.meta_model.replications, 107 kernel=self.meta_model.kernel, 108 type_pi=None, # No prediction intervals for base models 109 ) 110 base_mts.fit(half1) 111 112 # Predict half2 113 pred = base_mts.predict(h=len(half2)) 114 115 # Handle different return types 116 if isinstance(pred, pd.DataFrame): 117 base_preds.append(pred.values) 118 elif isinstance(pred, np.ndarray): 119 base_preds.append(pred) 120 elif hasattr(pred, "mean"): 121 # Named tuple with mean attribute 122 mean_pred = pred.mean 123 base_preds.append( 124 mean_pred.values 125 if isinstance(mean_pred, pd.DataFrame) 126 else mean_pred 127 ) 128 else: 129 raise ValueError(f"Unexpected prediction type: {type(pred)}") 130 131 temp_base_models.append(base_mts) 132 133 # 4. Create augmented dataset: [original | base_pred_1 | base_pred_2 | ...] 134 base_preds_array = np.hstack( 135 base_preds 136 ) # shape: (len(half2), n_series * n_base_models) 137 138 if isinstance(X, pd.DataFrame): 139 half2_df = pd.DataFrame( 140 half2, 141 index=self.df_.index[split_idx:], 142 columns=self.series_names, 143 ) 144 base_preds_df = pd.DataFrame( 145 base_preds_array, 146 index=self.df_.index[split_idx:], 147 columns=[ 148 f"base_{i}_{j}" 149 for i in range(len(self.base_models)) 150 for j in range(self.n_series_) 151 ], 152 ) 153 augmented = pd.concat([half2_df, base_preds_df], axis=1) 154 else: 155 augmented = np.hstack([half2, base_preds_array]) 156 157 # 5. Train meta-model on augmented half2 158 self.meta_model.fit(augmented, xreg=xreg, **kwargs) 159 160 # Store meta-model attributes 161 self.output_dates_ = self.meta_model.output_dates_ 162 self.fit_objs_ = self.meta_model.fit_objs_ 163 self.y_ = self.meta_model.y_ 164 self.X_ = self.meta_model.X_ 165 self.xreg_ = self.meta_model.xreg_ 166 self.y_means_ = self.meta_model.y_means_ 167 self.residuals_ = self.meta_model.residuals_ 168 169 # 6. FIXED: Retrain base models on half2 for temporal alignment 170 self.fitted_base_models_ = [] 171 for i, base_model in enumerate(self.base_models): 172 base_mts_final = MTS( 173 obj=clone(base_model), 174 lags=self.meta_model.lags, 175 n_hidden_features=self.meta_model.n_hidden_features, 176 replications=self.meta_model.replications, 177 kernel=self.meta_model.kernel, 178 type_pi=None, 179 ) 180 base_mts_final.fit(half2) 181 self.fitted_base_models_.append(base_mts_final) 182 183 return self
Fit MTSStacker using sequential stacking strategy.
Parameters
X : array-like or DataFrame, shape (n_samples, n_features) Training time series (most recent observations last) xreg : array-like, optional External regressors **kwargs : dict Additional parameters for base and meta models
Returns
self : object
185 def predict(self, h=5, level=95, **kwargs): 186 """ 187 Forecast h steps ahead using stacked predictions. 188 189 FIXED: Now properly generates base model forecasts and uses them 190 to create augmented features for the meta-model. 191 192 Parameters 193 ---------- 194 h : int 195 Forecast horizon 196 level : int 197 Confidence level for prediction intervals 198 **kwargs : dict 199 Additional parameters for prediction 200 201 Returns 202 ------- 203 DescribeResult or DataFrame 204 Predictions with optional intervals/simulations 205 """ 206 # Step 1: Generate base model forecasts for horizon h 207 base_forecasts = [] 208 209 for base_mts in self.fitted_base_models_: 210 # Each base model forecasts h steps ahead 211 forecast = base_mts.predict(h=h) 212 213 # Extract mean prediction 214 if isinstance(forecast, pd.DataFrame): 215 base_forecasts.append(forecast.values) 216 elif isinstance(forecast, np.ndarray): 217 base_forecasts.append(forecast) 218 elif hasattr(forecast, "mean"): 219 mean_pred = forecast.mean 220 base_forecasts.append( 221 mean_pred.values 222 if isinstance(mean_pred, pd.DataFrame) 223 else mean_pred 224 ) 225 else: 226 raise ValueError(f"Unexpected forecast type: {type(forecast)}") 227 228 # Step 2: Stack base forecasts into augmented features 229 base_forecasts_array = np.hstack( 230 base_forecasts 231 ) # shape: (h, n_series * n_base) 232 233 # Step 3: Create augmented input for meta-model 234 # The meta-model needs the original series structure + base predictions 235 # We use recursive forecasting: predict one step, update history, repeat 236 237 # Get last window of data from training 238 last_window = self.df_.iloc[-self.meta_model.lags:].values 239 240 # Initialize containers for results 241 all_forecasts = [] 242 all_lowers = [] if level is not None else None 243 all_uppers = [] if level is not None else None 244 all_sims = ( 245 [] 246 if hasattr(self.meta_model, "type_pi") and self.meta_model.type_pi 247 else None 248 ) 249 250 # Recursive forecasting 251 current_window = last_window.copy() 252 253 for step in range(h): 254 # Create augmented input: [current_window_last_row | base_forecast_step] 255 # Note: meta-model was trained on [original | base_preds] 256 # For prediction, we need to simulate this structure 257 258 # Use the base forecast for this step 259 base_forecast_step = base_forecasts_array[ 260 step: step + 1, : 261 ] # shape: (1, n_base_features) 262 263 # Create a dummy augmented dataset for this step 264 # Combine last observed values with base predictions 265 last_obs = current_window[-1:, :] # shape: (1, n_series) 266 augmented_step = np.hstack([last_obs, base_forecast_step]) 267 268 # Convert to DataFrame if needed 269 if isinstance(self.df_, pd.DataFrame): 270 augmented_df = pd.DataFrame( 271 augmented_step, 272 columns=( 273 self.series_names 274 + [ 275 f"base_{i}_{j}" 276 for i in range(len(self.base_models)) 277 for j in range(self.n_series_) 278 ] 279 ), 280 ) 281 else: 282 augmented_df = augmented_step 283 284 # Predict one step with meta-model 285 # This is tricky: we need to use meta-model's internal predict 286 # but with our augmented data structure 287 288 # For now, use the standard predict and extract one step 289 step_result = self.meta_model.predict(h=1, level=level, **kwargs) 290 291 # Extract forecasts 292 if isinstance(step_result, pd.DataFrame): 293 forecast_step = step_result.iloc[0, : self.n_series_].values 294 all_forecasts.append(forecast_step) 295 elif isinstance(step_result, np.ndarray): 296 forecast_step = step_result[0, : self.n_series_] 297 all_forecasts.append(forecast_step) 298 elif hasattr(step_result, "mean"): 299 mean_pred = step_result.mean 300 if isinstance(mean_pred, pd.DataFrame): 301 forecast_step = mean_pred.iloc[0, : self.n_series_].values 302 else: 303 forecast_step = mean_pred[0, : self.n_series_] 304 all_forecasts.append(forecast_step) 305 306 # Extract intervals if available 307 if hasattr(step_result, "lower") and all_lowers is not None: 308 lower_pred = step_result.lower 309 if isinstance(lower_pred, pd.DataFrame): 310 all_lowers.append( 311 lower_pred.iloc[0, : self.n_series_].values 312 ) 313 else: 314 all_lowers.append(lower_pred[0, : self.n_series_]) 315 316 if hasattr(step_result, "upper") and all_uppers is not None: 317 upper_pred = step_result.upper 318 if isinstance(upper_pred, pd.DataFrame): 319 all_uppers.append( 320 upper_pred.iloc[0, : self.n_series_].values 321 ) 322 else: 323 all_uppers.append(upper_pred[0, : self.n_series_]) 324 325 # Extract simulations if available 326 if hasattr(step_result, "sims") and all_sims is not None: 327 all_sims.append(step_result.sims) 328 329 # Update window for next iteration 330 current_window = np.vstack( 331 [current_window[1:], forecast_step.reshape(1, -1)] 332 ) 333 334 # Combine all forecasts 335 forecasts_array = np.array(all_forecasts) 336 337 # Create output dates 338 if hasattr(self.df_, "index") and isinstance( 339 self.df_.index, pd.DatetimeIndex 340 ): 341 last_date = self.df_.index[-1] 342 freq = pd.infer_freq(self.df_.index) 343 if freq: 344 output_dates = pd.date_range( 345 start=last_date, periods=h + 1, freq=freq 346 )[1:] 347 else: 348 output_dates = pd.RangeIndex( 349 start=len(self.df_), stop=len(self.df_) + h 350 ) 351 else: 352 output_dates = pd.RangeIndex( 353 start=len(self.df_), stop=len(self.df_) + h 354 ) 355 356 self.output_dates_ = output_dates 357 358 # Format output 359 mean_df = pd.DataFrame( 360 forecasts_array, 361 index=output_dates, 362 columns=self.series_names[: self.n_series_], 363 ) 364 self.mean_ = mean_df 365 366 # Return based on what was computed 367 if all_lowers and all_uppers: 368 lowers_array = np.array(all_lowers) 369 uppers_array = np.array(all_uppers) 370 371 lower_df = pd.DataFrame( 372 lowers_array, 373 index=output_dates, 374 columns=self.series_names[: self.n_series_], 375 ) 376 upper_df = pd.DataFrame( 377 uppers_array, 378 index=output_dates, 379 columns=self.series_names[: self.n_series_], 380 ) 381 382 self.lower_ = lower_df 383 self.upper_ = upper_df 384 385 if all_sims: 386 self.sims_ = tuple(all_sims) 387 DescribeResult = namedtuple( 388 "DescribeResult", ("mean", "sims", "lower", "upper") 389 ) 390 return DescribeResult(mean_df, self.sims_, lower_df, upper_df) 391 else: 392 DescribeResult = namedtuple( 393 "DescribeResult", ("mean", "lower", "upper") 394 ) 395 return DescribeResult(mean_df, lower_df, upper_df) 396 else: 397 return mean_df
Forecast h steps ahead using stacked predictions.
FIXED: Now properly generates base model forecasts and uses them to create augmented features for the meta-model.
Parameters
h : int Forecast horizon level : int Confidence level for prediction intervals **kwargs : dict Additional parameters for prediction
Returns
DescribeResult or DataFrame Predictions with optional intervals/simulations
14class MultiOutputMTS(MTS): 15 """MTS subclass optimized for multivariate time series with vectorized models 16 17 Enforces n_series >= 2 and uses single vectorized fit call instead of per-series loop. 18 Works with sklearn models supporting multi-output (Ridge, Lasso, LinearRegression, etc.) 19 """ 20 21 def fit(self, X, xreg=None, **kwargs): 22 """Fit with vectorized multi-output model - requires n_series >= 2""" 23 24 # Validate multivariate input 25 try: 26 self.init_n_series_ = X.shape[1] 27 except IndexError: 28 raise ValueError( 29 "MultiOutputMTS requires multivariate input (n_samples, n_series)" 30 ) 31 32 if self.init_n_series_ < 2: 33 raise ValueError( 34 f"MultiOutputMTS requires at least 2 series, got {self.init_n_series_}" 35 ) 36 37 # Automatic lag selection if requested (copied from parent) 38 if isinstance(self.lags, str): 39 max_lags = min(25, X.shape[0] // 4) 40 best_ic = float("inf") 41 best_lags = 1 42 43 if self.verbose: 44 print( 45 f"\nSelecting optimal number of lags using {self.lags}..." 46 ) 47 iterator = tqdm(range(1, max_lags + 1)) 48 else: 49 iterator = range(1, max_lags + 1) 50 51 for lag in iterator: 52 if isinstance(X, pd.DataFrame): 53 X_values = X.values[::-1] 54 else: 55 X_values = X[::-1] 56 57 mts_input = ts.create_train_inputs(X_values, lag) 58 dummy_y, scaled_Z = self.cook_training_set( 59 y=np.ones(mts_input[0].shape[0]), X=mts_input[1] 60 ) 61 62 # Vectorized fit for lag selection 63 y_means = np.mean(mts_input[0], axis=0) 64 centered_y = mts_input[0] - y_means[np.newaxis, :] 65 self.obj.fit(X=scaled_Z, y=centered_y) 66 residuals = centered_y - self.obj.predict(scaled_Z) 67 self.residuals_ = residuals # Keep (n_obs, n_series) shape 68 69 ic = self._compute_information_criterion( 70 curr_lags=lag, criterion=self.lags 71 ) 72 73 if self.verbose: 74 print(f"Trying lags={lag}, {self.lags}={ic:.2f}") 75 76 if ic < best_ic: 77 best_ic = ic 78 best_lags = lag 79 80 if self.verbose: 81 print( 82 f"\nSelected {best_lags} lags with {self.lags}={best_ic:.2f}" 83 ) 84 85 self.lags = best_lags 86 87 # Data preprocessing (from parent) 88 self.input_dates = None 89 self.df_ = None 90 91 if isinstance(X, pd.DataFrame) is False: 92 if xreg is None: 93 X = pd.DataFrame(X) 94 self.series_names = [ 95 "series" + str(i) for i in range(X.shape[1]) 96 ] 97 else: 98 X = mo.cbind(X, xreg) 99 self.xreg_ = xreg 100 else: 101 X_index = None 102 if X.index is not None: 103 X_index = X.index 104 if xreg is None: 105 X = copy.deepcopy(mo.convert_df_to_numeric(X)) 106 else: 107 X = copy.deepcopy(mo.cbind(mo.convert_df_to_numeric(X), xreg)) 108 self.xreg_ = xreg 109 if X_index is not None: 110 X.index = X_index 111 self.series_names = X.columns.tolist() 112 113 if isinstance(X, pd.DataFrame): 114 if self.df_ is None: 115 self.df_ = X 116 X = X.values 117 else: 118 input_dates_prev = pd.DatetimeIndex(self.df_.index.values) 119 frequency = pd.infer_freq(input_dates_prev) 120 self.df_ = pd.concat([self.df_, X], axis=0) 121 self.input_dates = pd.date_range( 122 start=input_dates_prev[0], 123 periods=len(input_dates_prev) + X.shape[0], 124 freq=frequency, 125 ).values.tolist() 126 self.df_.index = self.input_dates 127 X = self.df_.values 128 self.df_.columns = self.series_names 129 else: 130 if self.df_ is None: 131 self.df_ = pd.DataFrame(X, columns=self.series_names) 132 else: 133 self.df_ = pd.concat( 134 [self.df_, pd.DataFrame(X, columns=self.series_names)], 135 axis=0, 136 ) 137 138 self.input_dates = ts.compute_input_dates(self.df_) 139 140 n, p = X.shape 141 self.n_obs_ = n 142 rep_1_n = np.repeat(1, n) 143 144 self.y_ = None 145 self.X_ = None 146 self.n_series = p 147 self.fit_objs_.clear() 148 self.y_means_.clear() 149 self.residuals_ = None 150 self.residuals_sims_ = None 151 self.kde_ = None 152 self.sims_ = None 153 self.scaled_Z_ = None 154 self.centered_y_is_ = [] 155 156 # Create training inputs 157 mts_input = ts.create_train_inputs(X[::-1], self.lags) 158 self.y_ = mts_input[0] 159 self.X_ = mts_input[1] 160 161 dummy_y, scaled_Z = self.cook_training_set(y=rep_1_n, X=self.X_) 162 self.scaled_Z_ = scaled_Z 163 164 if self.verbose > 0: 165 print( 166 f"\n Adjusting {type(self.obj).__name__} to multivariate time series (vectorized)... \n" 167 ) 168 169 # VECTORIZED FITTING - NO LOOP 170 y_means_array = np.array( 171 [np.mean(self.y_[:, i]) for i in range(self.init_n_series_)] 172 ) 173 for i in range(self.init_n_series_): 174 self.y_means_[i] = y_means_array[i] 175 176 centered_y_all = self.y_ - y_means_array[np.newaxis, :] 177 self.centered_y_is_ = [ 178 centered_y_all[:, i] for i in range(self.init_n_series_) 179 ] 180 181 # Single vectorized fit for all series 182 self.obj.fit(scaled_Z, centered_y_all) 183 184 # All series share the same model 185 for i in range(self.init_n_series_): 186 self.fit_objs_[i] = self.obj 187 188 # Vectorized residuals - ONLY target columns (n_obs, n_series) 189 preds_all = self.obj.predict(scaled_Z) 190 residuals_raw = centered_y_all - preds_all 191 192 # CRITICAL: Ensure residuals only have n_series columns, not all scaled_Z columns 193 # In case there's some dimension mismatch, explicitly slice 194 self.residuals_ = residuals_raw[:, : self.init_n_series_] 195 196 # Handle type_pi 197 if self.type_pi == "gaussian": 198 self.gaussian_preds_std_ = np.std(self.residuals_, axis=0) 199 200 if self.type_pi.startswith("scp2"): 201 data_mean = np.mean(self.residuals_, axis=0) 202 self.residuals_std_dev_ = np.std(self.residuals_, axis=0) 203 self.residuals_ = ( 204 self.residuals_ - data_mean[np.newaxis, :] 205 ) / self.residuals_std_dev_[np.newaxis, :] 206 207 if self.replications is not None and "kde" in self.type_pi: 208 if self.verbose > 0: 209 print(f"\n Simulate residuals using {self.kernel} kernel... \n") 210 assert self.kernel in ( 211 "gaussian", 212 "tophat", 213 ), "currently, 'kernel' must be either 'gaussian' or 'tophat'" 214 kernel_bandwidths = {"bandwidth": np.logspace(-6, 6, 150)} 215 grid = GridSearchCV( 216 KernelDensity(kernel=self.kernel, **kwargs), 217 param_grid=kernel_bandwidths, 218 ) 219 grid.fit(self.residuals_) 220 if self.verbose > 0: 221 print( 222 f"\n Best parameters for {self.kernel} kernel: {grid.best_params_} \n" 223 ) 224 self.kde_ = grid.best_estimator_ 225 226 return self 227 228 def predict(self, h=5, level=95, quantiles=None, **kwargs): 229 """Override predict to handle vectorized model predictions""" 230 231 # Delegate to parent for quantiles and multiple levels 232 if quantiles is not None or isinstance(level, (list, np.ndarray)): 233 return super().predict( 234 h=h, level=level, quantiles=quantiles, **kwargs 235 ) 236 237 # Store original obj temporarily 238 original_obj = self.obj 239 240 # Create wrapper that extracts the i-th output for each series 241 class VectorizedWrapper: 242 def __init__(self, model, series_idx): 243 self.model = model 244 self.series_idx = series_idx 245 246 def predict(self, X, **kw): 247 """Predict and return only the output for this series index""" 248 preds = self.model.predict(X, **kw) 249 # preds shape: (n_samples, n_series) or (n_series,) 250 if len(preds.shape) == 1: 251 # Single prediction: (n_series,) 252 return preds[self.series_idx: self.series_idx + 1] 253 else: 254 # Multiple predictions: (n_samples, n_series) 255 return preds[ 256 :, self.series_idx: self.series_idx + 1 257 ].flatten() 258 259 # Wrap each series with its own index 260 for i in range(self.init_n_series_): 261 self.fit_objs_[i] = VectorizedWrapper(original_obj, i) 262 263 try: 264 result = super().predict( 265 h=h, level=level, quantiles=quantiles, **kwargs 266 ) 267 finally: 268 # Restore original 269 for i in range(self.init_n_series_): 270 self.fit_objs_[i] = original_obj 271 272 return result
MTS subclass optimized for multivariate time series with vectorized models
Enforces n_series >= 2 and uses single vectorized fit call instead of per-series loop. Works with sklearn models supporting multi-output (Ridge, Lasso, LinearRegression, etc.)
21 def fit(self, X, xreg=None, **kwargs): 22 """Fit with vectorized multi-output model - requires n_series >= 2""" 23 24 # Validate multivariate input 25 try: 26 self.init_n_series_ = X.shape[1] 27 except IndexError: 28 raise ValueError( 29 "MultiOutputMTS requires multivariate input (n_samples, n_series)" 30 ) 31 32 if self.init_n_series_ < 2: 33 raise ValueError( 34 f"MultiOutputMTS requires at least 2 series, got {self.init_n_series_}" 35 ) 36 37 # Automatic lag selection if requested (copied from parent) 38 if isinstance(self.lags, str): 39 max_lags = min(25, X.shape[0] // 4) 40 best_ic = float("inf") 41 best_lags = 1 42 43 if self.verbose: 44 print( 45 f"\nSelecting optimal number of lags using {self.lags}..." 46 ) 47 iterator = tqdm(range(1, max_lags + 1)) 48 else: 49 iterator = range(1, max_lags + 1) 50 51 for lag in iterator: 52 if isinstance(X, pd.DataFrame): 53 X_values = X.values[::-1] 54 else: 55 X_values = X[::-1] 56 57 mts_input = ts.create_train_inputs(X_values, lag) 58 dummy_y, scaled_Z = self.cook_training_set( 59 y=np.ones(mts_input[0].shape[0]), X=mts_input[1] 60 ) 61 62 # Vectorized fit for lag selection 63 y_means = np.mean(mts_input[0], axis=0) 64 centered_y = mts_input[0] - y_means[np.newaxis, :] 65 self.obj.fit(X=scaled_Z, y=centered_y) 66 residuals = centered_y - self.obj.predict(scaled_Z) 67 self.residuals_ = residuals # Keep (n_obs, n_series) shape 68 69 ic = self._compute_information_criterion( 70 curr_lags=lag, criterion=self.lags 71 ) 72 73 if self.verbose: 74 print(f"Trying lags={lag}, {self.lags}={ic:.2f}") 75 76 if ic < best_ic: 77 best_ic = ic 78 best_lags = lag 79 80 if self.verbose: 81 print( 82 f"\nSelected {best_lags} lags with {self.lags}={best_ic:.2f}" 83 ) 84 85 self.lags = best_lags 86 87 # Data preprocessing (from parent) 88 self.input_dates = None 89 self.df_ = None 90 91 if isinstance(X, pd.DataFrame) is False: 92 if xreg is None: 93 X = pd.DataFrame(X) 94 self.series_names = [ 95 "series" + str(i) for i in range(X.shape[1]) 96 ] 97 else: 98 X = mo.cbind(X, xreg) 99 self.xreg_ = xreg 100 else: 101 X_index = None 102 if X.index is not None: 103 X_index = X.index 104 if xreg is None: 105 X = copy.deepcopy(mo.convert_df_to_numeric(X)) 106 else: 107 X = copy.deepcopy(mo.cbind(mo.convert_df_to_numeric(X), xreg)) 108 self.xreg_ = xreg 109 if X_index is not None: 110 X.index = X_index 111 self.series_names = X.columns.tolist() 112 113 if isinstance(X, pd.DataFrame): 114 if self.df_ is None: 115 self.df_ = X 116 X = X.values 117 else: 118 input_dates_prev = pd.DatetimeIndex(self.df_.index.values) 119 frequency = pd.infer_freq(input_dates_prev) 120 self.df_ = pd.concat([self.df_, X], axis=0) 121 self.input_dates = pd.date_range( 122 start=input_dates_prev[0], 123 periods=len(input_dates_prev) + X.shape[0], 124 freq=frequency, 125 ).values.tolist() 126 self.df_.index = self.input_dates 127 X = self.df_.values 128 self.df_.columns = self.series_names 129 else: 130 if self.df_ is None: 131 self.df_ = pd.DataFrame(X, columns=self.series_names) 132 else: 133 self.df_ = pd.concat( 134 [self.df_, pd.DataFrame(X, columns=self.series_names)], 135 axis=0, 136 ) 137 138 self.input_dates = ts.compute_input_dates(self.df_) 139 140 n, p = X.shape 141 self.n_obs_ = n 142 rep_1_n = np.repeat(1, n) 143 144 self.y_ = None 145 self.X_ = None 146 self.n_series = p 147 self.fit_objs_.clear() 148 self.y_means_.clear() 149 self.residuals_ = None 150 self.residuals_sims_ = None 151 self.kde_ = None 152 self.sims_ = None 153 self.scaled_Z_ = None 154 self.centered_y_is_ = [] 155 156 # Create training inputs 157 mts_input = ts.create_train_inputs(X[::-1], self.lags) 158 self.y_ = mts_input[0] 159 self.X_ = mts_input[1] 160 161 dummy_y, scaled_Z = self.cook_training_set(y=rep_1_n, X=self.X_) 162 self.scaled_Z_ = scaled_Z 163 164 if self.verbose > 0: 165 print( 166 f"\n Adjusting {type(self.obj).__name__} to multivariate time series (vectorized)... \n" 167 ) 168 169 # VECTORIZED FITTING - NO LOOP 170 y_means_array = np.array( 171 [np.mean(self.y_[:, i]) for i in range(self.init_n_series_)] 172 ) 173 for i in range(self.init_n_series_): 174 self.y_means_[i] = y_means_array[i] 175 176 centered_y_all = self.y_ - y_means_array[np.newaxis, :] 177 self.centered_y_is_ = [ 178 centered_y_all[:, i] for i in range(self.init_n_series_) 179 ] 180 181 # Single vectorized fit for all series 182 self.obj.fit(scaled_Z, centered_y_all) 183 184 # All series share the same model 185 for i in range(self.init_n_series_): 186 self.fit_objs_[i] = self.obj 187 188 # Vectorized residuals - ONLY target columns (n_obs, n_series) 189 preds_all = self.obj.predict(scaled_Z) 190 residuals_raw = centered_y_all - preds_all 191 192 # CRITICAL: Ensure residuals only have n_series columns, not all scaled_Z columns 193 # In case there's some dimension mismatch, explicitly slice 194 self.residuals_ = residuals_raw[:, : self.init_n_series_] 195 196 # Handle type_pi 197 if self.type_pi == "gaussian": 198 self.gaussian_preds_std_ = np.std(self.residuals_, axis=0) 199 200 if self.type_pi.startswith("scp2"): 201 data_mean = np.mean(self.residuals_, axis=0) 202 self.residuals_std_dev_ = np.std(self.residuals_, axis=0) 203 self.residuals_ = ( 204 self.residuals_ - data_mean[np.newaxis, :] 205 ) / self.residuals_std_dev_[np.newaxis, :] 206 207 if self.replications is not None and "kde" in self.type_pi: 208 if self.verbose > 0: 209 print(f"\n Simulate residuals using {self.kernel} kernel... \n") 210 assert self.kernel in ( 211 "gaussian", 212 "tophat", 213 ), "currently, 'kernel' must be either 'gaussian' or 'tophat'" 214 kernel_bandwidths = {"bandwidth": np.logspace(-6, 6, 150)} 215 grid = GridSearchCV( 216 KernelDensity(kernel=self.kernel, **kwargs), 217 param_grid=kernel_bandwidths, 218 ) 219 grid.fit(self.residuals_) 220 if self.verbose > 0: 221 print( 222 f"\n Best parameters for {self.kernel} kernel: {grid.best_params_} \n" 223 ) 224 self.kde_ = grid.best_estimator_ 225 226 return self
Fit with vectorized multi-output model - requires n_series >= 2
228 def predict(self, h=5, level=95, quantiles=None, **kwargs): 229 """Override predict to handle vectorized model predictions""" 230 231 # Delegate to parent for quantiles and multiple levels 232 if quantiles is not None or isinstance(level, (list, np.ndarray)): 233 return super().predict( 234 h=h, level=level, quantiles=quantiles, **kwargs 235 ) 236 237 # Store original obj temporarily 238 original_obj = self.obj 239 240 # Create wrapper that extracts the i-th output for each series 241 class VectorizedWrapper: 242 def __init__(self, model, series_idx): 243 self.model = model 244 self.series_idx = series_idx 245 246 def predict(self, X, **kw): 247 """Predict and return only the output for this series index""" 248 preds = self.model.predict(X, **kw) 249 # preds shape: (n_samples, n_series) or (n_series,) 250 if len(preds.shape) == 1: 251 # Single prediction: (n_series,) 252 return preds[self.series_idx: self.series_idx + 1] 253 else: 254 # Multiple predictions: (n_samples, n_series) 255 return preds[ 256 :, self.series_idx: self.series_idx + 1 257 ].flatten() 258 259 # Wrap each series with its own index 260 for i in range(self.init_n_series_): 261 self.fit_objs_[i] = VectorizedWrapper(original_obj, i) 262 263 try: 264 result = super().predict( 265 h=h, level=level, quantiles=quantiles, **kwargs 266 ) 267 finally: 268 # Restore original 269 for i in range(self.init_n_series_): 270 self.fit_objs_[i] = original_obj 271 272 return result
Override predict to handle vectorized model predictions
16class MultitaskClassifier(Base, ClassifierMixin): 17 """Multitask Classification model based on regression models, with shared covariates 18 19 Parameters: 20 21 obj: object 22 any object (must be a regression model) containing a method fit (obj.fit()) 23 and a method predict (obj.predict()) 24 25 n_hidden_features: int 26 number of nodes in the hidden layer 27 28 activation_name: str 29 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 30 31 a: float 32 hyperparameter for 'prelu' or 'elu' activation function 33 34 nodes_sim: str 35 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 36 'uniform' 37 38 bias: boolean 39 indicates if the hidden layer contains a bias term (True) or not 40 (False) 41 42 dropout: float 43 regularization parameter; (random) percentage of nodes dropped out 44 of the training 45 46 direct_link: boolean 47 indicates if the original predictors are included (True) in model's 48 fitting or not (False) 49 50 n_clusters: int 51 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: 52 no clustering) 53 54 cluster_encode: bool 55 defines how the variable containing clusters is treated (default is one-hot) 56 if `False`, then labels are used, without one-hot encoding 57 58 type_clust: str 59 type of clustering method: currently k-means ('kmeans') or Gaussian 60 Mixture Model ('gmm') 61 62 type_scaling: a tuple of 3 strings 63 scaling methods for inputs, hidden layer, and clustering respectively 64 (and when relevant). 65 Currently available: standardization ('std') or MinMax scaling ('minmax') 66 67 col_sample: float 68 percentage of covariates randomly chosen for training 69 70 row_sample: float 71 percentage of rows chosen for training, by stratified bootstrapping 72 73 seed: int 74 reproducibility seed for nodes_sim=='uniform' 75 76 backend: str 77 "cpu" or "gpu" or "tpu" 78 79 Attributes: 80 81 fit_objs_: dict 82 objects adjusted to each individual time series 83 84 n_classes_: int 85 number of classes for the classifier 86 87 Examples: 88 89 See also [https://github.com/Techtonique/nnetsauce/blob/master/examples/mtask_classification.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/mtask_classification.py) 90 91 ```python 92 import nnetsauce as ns 93 import numpy as np 94 from sklearn.datasets import load_breast_cancer 95 from sklearn.linear_model import LinearRegression 96 from sklearn.model_selection import train_test_split 97 from sklearn import metrics 98 from time import time 99 100 breast_cancer = load_breast_cancer() 101 Z = breast_cancer.data 102 t = breast_cancer.target 103 104 X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2, 105 random_state=123+2*10) 106 107 # Linear Regression is used 108 regr = LinearRegression() 109 fit_obj = ns.MultitaskClassifier(regr, n_hidden_features=5, 110 n_clusters=2, type_clust="gmm") 111 112 start = time() 113 fit_obj.fit(X_train, y_train) 114 print(f"Elapsed {time() - start}") 115 116 print(fit_obj.score(X_test, y_test)) 117 print(fit_obj.score(X_test, y_test, scoring="roc_auc")) 118 119 start = time() 120 preds = fit_obj.predict(X_test) 121 print(f"Elapsed {time() - start}") 122 print(metrics.classification_report(preds, y_test)) 123 ``` 124 125 """ 126 127 # construct the object ----- 128 _estimator_type = "classifier" 129 130 def __init__( 131 self, 132 obj, 133 n_hidden_features=5, 134 activation_name="relu", 135 a=0.01, 136 nodes_sim="sobol", 137 bias=True, 138 dropout=0, 139 direct_link=True, 140 n_clusters=2, 141 cluster_encode=True, 142 type_clust="kmeans", 143 type_scaling=("std", "std", "std"), 144 col_sample=1, 145 row_sample=1, 146 seed=123, 147 backend="cpu", 148 ): 149 super().__init__( 150 n_hidden_features=n_hidden_features, 151 activation_name=activation_name, 152 a=a, 153 nodes_sim=nodes_sim, 154 bias=bias, 155 dropout=dropout, 156 direct_link=direct_link, 157 n_clusters=n_clusters, 158 cluster_encode=cluster_encode, 159 type_clust=type_clust, 160 type_scaling=type_scaling, 161 col_sample=col_sample, 162 row_sample=row_sample, 163 seed=seed, 164 backend=backend, 165 ) 166 167 self.type_fit = "classification" 168 self.obj = obj 169 self.fit_objs_ = {} 170 171 def fit(self, X, y, sample_weight=None, **kwargs): 172 """Fit MultitaskClassifier to training data (X, y). 173 174 Args: 175 176 X: {array-like}, shape = [n_samples, n_features] 177 Training vectors, where n_samples is the number 178 of samples and n_features is the number of features. 179 180 y: array-like, shape = [n_samples] 181 Target values. 182 183 **kwargs: additional parameters to be passed to 184 self.cook_training_set or self.obj.fit 185 186 Returns: 187 188 self: object 189 190 """ 191 192 assert mx.is_factor(y), "y must contain only integers" 193 194 output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 195 196 self.classes_ = np.unique(y) # for compatibility with sklearn 197 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 198 199 # multitask response 200 Y = mo.one_hot_encode2(output_y, self.n_classes_) 201 202 # if sample_weight is None: 203 for i in range(self.n_classes_): 204 self.fit_objs_[i] = deepcopy( 205 self.obj.fit(scaled_Z, Y[:, i], **kwargs) 206 ) 207 208 self.classes_ = np.unique(y) 209 return self 210 211 def predict(self, X, **kwargs): 212 """Predict test data X. 213 214 Args: 215 216 X: {array-like}, shape = [n_samples, n_features] 217 Training vectors, where n_samples is the number 218 of samples and n_features is the number of features. 219 220 **kwargs: additional parameters to be passed to 221 self.cook_test_set 222 223 Returns: 224 225 model predictions: {array-like} 226 227 """ 228 return np.argmax(self.predict_proba(X, **kwargs), axis=1) 229 230 def predict_proba(self, X, **kwargs): 231 """Predict probabilities for test data X. 232 233 Args: 234 235 X: {array-like}, shape = [n_samples, n_features] 236 Training vectors, where n_samples is the number 237 of samples and n_features is the number of features. 238 239 **kwargs: additional parameters to be passed to 240 self.cook_test_set 241 242 Returns: 243 244 probability estimates for test data: {array-like} 245 246 """ 247 248 shape_X = X.shape 249 250 probs = np.zeros((shape_X[0], self.n_classes_)) 251 252 if len(shape_X) == 1: 253 n_features = shape_X[0] 254 255 new_X = mo.rbind( 256 X.reshape(1, n_features), 257 np.ones(n_features).reshape(1, n_features), 258 ) 259 260 Z = self.cook_test_set(new_X, **kwargs) 261 262 # loop on all the classes 263 for i in range(self.n_classes_): 264 probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs)[0] 265 266 else: 267 Z = self.cook_test_set(X, **kwargs) 268 269 # loop on all the classes 270 for i in range(self.n_classes_): 271 probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs) 272 273 expit_raw_probs = expit(probs) 274 275 return expit_raw_probs / expit_raw_probs.sum(axis=1)[:, None] 276 277 def decision_function(self, X, **kwargs): 278 """Compute the decision function of X. 279 280 Parameters: 281 X: {array-like}, shape = [n_samples, n_features] 282 Samples to compute decision function for. 283 284 **kwargs: additional parameters to be passed to 285 self.cook_test_set 286 287 Returns: 288 array-like of shape (n_samples,) or (n_samples, n_classes) 289 Decision function of the input samples. The order of outputs is the same 290 as that of the classes passed to fit. 291 """ 292 if not hasattr(self.obj, "decision_function"): 293 # If base classifier doesn't have decision_function, use predict_proba 294 proba = self.predict_proba(X, **kwargs) 295 if proba.shape[1] == 2: 296 return proba[:, 1] # For binary classification 297 return proba # For multiclass 298 299 if len(X.shape) == 1: 300 n_features = X.shape[0] 301 new_X = mo.rbind( 302 X.reshape(1, n_features), 303 np.ones(n_features).reshape(1, n_features), 304 ) 305 306 return ( 307 self.obj.decision_function( 308 self.cook_test_set(new_X, **kwargs), **kwargs 309 ) 310 )[0] 311 312 return self.obj.decision_function( 313 self.cook_test_set(X, **kwargs), **kwargs 314 ) 315 316 @property 317 def _estimator_type(self): 318 return "classifier"
Multitask Classification model based on regression models, with shared covariates
Parameters:
obj: object
any object (must be a regression model) containing a method fit (obj.fit())
and a method predict (obj.predict())
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not
(False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original predictors are included (True) in model's
fitting or not (False)
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
col_sample: float
percentage of covariates randomly chosen for training
row_sample: float
percentage of rows chosen for training, by stratified bootstrapping
seed: int
reproducibility seed for nodes_sim=='uniform'
backend: str
"cpu" or "gpu" or "tpu"
Attributes:
fit_objs_: dict
objects adjusted to each individual time series
n_classes_: int
number of classes for the classifier
Examples:
See also https://github.com/Techtonique/nnetsauce/blob/master/examples/mtask_classification.py
import nnetsauce as ns
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics
from time import time
breast_cancer = load_breast_cancer()
Z = breast_cancer.data
t = breast_cancer.target
X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2,
random_state=123+2*10)
# Linear Regression is used
regr = LinearRegression()
fit_obj = ns.MultitaskClassifier(regr, n_hidden_features=5,
n_clusters=2, type_clust="gmm")
start = time()
fit_obj.fit(X_train, y_train)
print(f"Elapsed {time() - start}")
print(fit_obj.score(X_test, y_test))
print(fit_obj.score(X_test, y_test, scoring="roc_auc"))
start = time()
preds = fit_obj.predict(X_test)
print(f"Elapsed {time() - start}")
print(metrics.classification_report(preds, y_test))
171 def fit(self, X, y, sample_weight=None, **kwargs): 172 """Fit MultitaskClassifier to training data (X, y). 173 174 Args: 175 176 X: {array-like}, shape = [n_samples, n_features] 177 Training vectors, where n_samples is the number 178 of samples and n_features is the number of features. 179 180 y: array-like, shape = [n_samples] 181 Target values. 182 183 **kwargs: additional parameters to be passed to 184 self.cook_training_set or self.obj.fit 185 186 Returns: 187 188 self: object 189 190 """ 191 192 assert mx.is_factor(y), "y must contain only integers" 193 194 output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 195 196 self.classes_ = np.unique(y) # for compatibility with sklearn 197 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 198 199 # multitask response 200 Y = mo.one_hot_encode2(output_y, self.n_classes_) 201 202 # if sample_weight is None: 203 for i in range(self.n_classes_): 204 self.fit_objs_[i] = deepcopy( 205 self.obj.fit(scaled_Z, Y[:, i], **kwargs) 206 ) 207 208 self.classes_ = np.unique(y) 209 return self
Fit MultitaskClassifier to training data (X, y).
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
211 def predict(self, X, **kwargs): 212 """Predict test data X. 213 214 Args: 215 216 X: {array-like}, shape = [n_samples, n_features] 217 Training vectors, where n_samples is the number 218 of samples and n_features is the number of features. 219 220 **kwargs: additional parameters to be passed to 221 self.cook_test_set 222 223 Returns: 224 225 model predictions: {array-like} 226 227 """ 228 return np.argmax(self.predict_proba(X, **kwargs), axis=1)
Predict test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
230 def predict_proba(self, X, **kwargs): 231 """Predict probabilities for test data X. 232 233 Args: 234 235 X: {array-like}, shape = [n_samples, n_features] 236 Training vectors, where n_samples is the number 237 of samples and n_features is the number of features. 238 239 **kwargs: additional parameters to be passed to 240 self.cook_test_set 241 242 Returns: 243 244 probability estimates for test data: {array-like} 245 246 """ 247 248 shape_X = X.shape 249 250 probs = np.zeros((shape_X[0], self.n_classes_)) 251 252 if len(shape_X) == 1: 253 n_features = shape_X[0] 254 255 new_X = mo.rbind( 256 X.reshape(1, n_features), 257 np.ones(n_features).reshape(1, n_features), 258 ) 259 260 Z = self.cook_test_set(new_X, **kwargs) 261 262 # loop on all the classes 263 for i in range(self.n_classes_): 264 probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs)[0] 265 266 else: 267 Z = self.cook_test_set(X, **kwargs) 268 269 # loop on all the classes 270 for i in range(self.n_classes_): 271 probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs) 272 273 expit_raw_probs = expit(probs) 274 275 return expit_raw_probs / expit_raw_probs.sum(axis=1)[:, None]
Predict probabilities for test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
probability estimates for test data: {array-like}
112class NeuralNetRegressor(BaseEstimator, RegressorMixin): 113 """ 114 (Pretrained) Neural Network Regressor. 115 116 Parameters: 117 118 hidden_layer_sizes : tuple, default=(100,) 119 The number of neurons in each hidden layer. 120 max_iter : int, default=100 121 The maximum number of iterations to train the model. 122 learning_rate : float, default=0.01 123 The learning rate for the optimizer. 124 l1_ratio : float, default=0.5 125 The ratio of L1 regularization. 126 alpha : float, default=1e-6 127 The regularization parameter. 128 activation_name : str, default="relu" 129 The activation function to use. 130 dropout : float, default=0.0 131 The dropout rate. 132 random_state : int, default=None 133 The random state for the random number generator. 134 weights : list, default=None 135 The weights to initialize the model with. 136 137 Attributes: 138 139 weights : list 140 The weights of the model. 141 params : list 142 The parameters of the model. 143 scaler_ : sklearn.preprocessing.StandardScaler 144 The scaler used to standardize the input features. 145 y_mean_ : float 146 The mean of the target variable. 147 148 Methods: 149 150 fit(X, y) 151 Fit the model to the data. 152 predict(X) 153 Predict the target variable. 154 get_weights() 155 Get the weights of the model. 156 set_weights(weights) 157 Set the weights of the model. 158 """ 159 160 def __init__( 161 self, 162 hidden_layer_sizes=None, 163 max_iter=100, 164 learning_rate=0.01, 165 l1_ratio=0.5, 166 alpha=1e-6, 167 activation_name="relu", 168 dropout=0, 169 weights=None, 170 random_state=None, 171 ): 172 if not JAX_AVAILABLE: 173 raise RuntimeError( 174 "JAX is required for this feature. Install with: pip install yourpackage[jax]" 175 ) 176 177 if weights is None and hidden_layer_sizes is None: 178 hidden_layer_sizes = (100,) # default value if neither is provided 179 self.hidden_layer_sizes = hidden_layer_sizes 180 self.max_iter = max_iter 181 self.learning_rate = learning_rate 182 self.l1_ratio = l1_ratio 183 self.alpha = alpha 184 self.activation_name = activation_name 185 self.dropout = dropout 186 self.weights = weights 187 self.random_state = random_state 188 self.params = None 189 self.scaler_ = StandardScaler() 190 self.y_mean_ = None 191 192 def _validate_weights(self, input_dim): 193 """Validate that weights dimensions are coherent.""" 194 if not self.weights: 195 return False 196 197 try: 198 # Check each layer's weights and biases 199 prev_dim = input_dim 200 for W, b in self.weights: 201 # Check weight matrix dimensions 202 if W.shape[0] != prev_dim: 203 raise ValueError( 204 f"Weight matrix input dimension {W.shape[0]} does not match, previous layer output dimension {prev_dim}" 205 ) 206 # Check bias dimension matches weight matrix output 207 if W.shape[1] != b.shape[0]: 208 raise ValueError( 209 f"Bias dimension {b.shape[0]} does not match weight matrix, output dimension {W.shape[1]}" 210 ) 211 prev_dim = W.shape[1] 212 213 # Check final output dimension is 1 for regression 214 if prev_dim != 1: 215 raise ValueError( 216 f"Final layer output dimension {prev_dim} must be 1 for regression" 217 ) 218 219 return True 220 except (AttributeError, IndexError): 221 raise ValueError( 222 "Weights format is invalid. Expected list of (weight, bias) tuples" 223 ) 224 225 def fit(self, X, y): 226 # Standardize the input features 227 X = self.scaler_.fit_transform(X) 228 # Ensure y is 2D for consistency 229 y = y.reshape(-1, 1) 230 self.y_mean_ = jnp.mean(y) 231 y = y - self.y_mean_ 232 # Validate or initialize weights 233 if self.weights is not None: 234 if self._validate_weights(X.shape[1]): 235 self.params = self.weights 236 else: 237 if self.hidden_layer_sizes is None: 238 raise ValueError( 239 "Either weights or hidden_layer_sizes must be provided" 240 ) 241 self.params = initialize_params( 242 X.shape[1], self.hidden_layer_sizes, self.random_state 243 ) 244 loss_fn = partial(loss, l1_ratio=self.l1_ratio, alpha=self.alpha) 245 grad_loss = jit(grad(loss_fn)) # compiled gradient evaluation function 246 perex_grads = jit( 247 vmap(grad_loss, in_axes=(None, 0, 0)) 248 ) # fast per-example grads 249 # Training loop 250 for _ in range(self.max_iter): 251 grads = perex_grads(self.params, X, y) 252 # Average gradients across examples 253 grads = jax.tree_map(lambda g: jnp.mean(g, axis=0), grads) 254 # Update parameters 255 self.params = [ 256 (W - self.learning_rate * dW, b - self.learning_rate * db) 257 for (W, b), (dW, db) in zip(self.params, grads) 258 ] 259 # Store final weights 260 self.weights = self.params 261 return self 262 263 def get_weights(self): 264 """Return the current weights of the model.""" 265 if self.weights is None: 266 raise ValueError( 267 "No weights available. Model has not been fitted yet." 268 ) 269 return self.weights 270 271 def set_weights(self, weights): 272 """Set the weights of the model manually.""" 273 self.weights = weights 274 self.params = weights 275 276 def predict(self, X): 277 X = self.scaler_.transform(X) 278 if self.params is None: 279 raise ValueError("Model has not been fitted yet.") 280 predictions = predict_internal( 281 self.params, 282 X, 283 activation_func=self.activation_name, 284 dropout=self.dropout, 285 seed=self.random_state, 286 ) 287 return predictions.reshape(-1) + self.y_mean_
(Pretrained) Neural Network Regressor.
Parameters:
hidden_layer_sizes : tuple, default=(100,)
The number of neurons in each hidden layer.
max_iter : int, default=100
The maximum number of iterations to train the model.
learning_rate : float, default=0.01
The learning rate for the optimizer.
l1_ratio : float, default=0.5
The ratio of L1 regularization.
alpha : float, default=1e-6
The regularization parameter.
activation_name : str, default="relu"
The activation function to use.
dropout : float, default=0.0
The dropout rate.
random_state : int, default=None
The random state for the random number generator.
weights : list, default=None
The weights to initialize the model with.
Attributes:
weights : list
The weights of the model.
params : list
The parameters of the model.
scaler_ : sklearn.preprocessing.StandardScaler
The scaler used to standardize the input features.
y_mean_ : float
The mean of the target variable.
Methods:
fit(X, y)
Fit the model to the data.
predict(X)
Predict the target variable.
get_weights()
Get the weights of the model.
set_weights(weights)
Set the weights of the model.
225 def fit(self, X, y): 226 # Standardize the input features 227 X = self.scaler_.fit_transform(X) 228 # Ensure y is 2D for consistency 229 y = y.reshape(-1, 1) 230 self.y_mean_ = jnp.mean(y) 231 y = y - self.y_mean_ 232 # Validate or initialize weights 233 if self.weights is not None: 234 if self._validate_weights(X.shape[1]): 235 self.params = self.weights 236 else: 237 if self.hidden_layer_sizes is None: 238 raise ValueError( 239 "Either weights or hidden_layer_sizes must be provided" 240 ) 241 self.params = initialize_params( 242 X.shape[1], self.hidden_layer_sizes, self.random_state 243 ) 244 loss_fn = partial(loss, l1_ratio=self.l1_ratio, alpha=self.alpha) 245 grad_loss = jit(grad(loss_fn)) # compiled gradient evaluation function 246 perex_grads = jit( 247 vmap(grad_loss, in_axes=(None, 0, 0)) 248 ) # fast per-example grads 249 # Training loop 250 for _ in range(self.max_iter): 251 grads = perex_grads(self.params, X, y) 252 # Average gradients across examples 253 grads = jax.tree_map(lambda g: jnp.mean(g, axis=0), grads) 254 # Update parameters 255 self.params = [ 256 (W - self.learning_rate * dW, b - self.learning_rate * db) 257 for (W, b), (dW, db) in zip(self.params, grads) 258 ] 259 # Store final weights 260 self.weights = self.params 261 return self
276 def predict(self, X): 277 X = self.scaler_.transform(X) 278 if self.params is None: 279 raise ValueError("Model has not been fitted yet.") 280 predictions = predict_internal( 281 self.params, 282 X, 283 activation_func=self.activation_name, 284 dropout=self.dropout, 285 seed=self.random_state, 286 ) 287 return predictions.reshape(-1) + self.y_mean_
10class NeuralNetClassifier(BaseEstimator, ClassifierMixin): 11 """ 12 (Pretrained) Neural Network Classifier. 13 14 Parameters: 15 16 hidden_layer_sizes : tuple, default=(100,) 17 The number of neurons in each hidden layer. 18 max_iter : int, default=100 19 The maximum number of iterations to train the model. 20 learning_rate : float, default=0.01 21 The learning rate for the optimizer. 22 l1_ratio : float, default=0.5 23 The ratio of L1 regularization. 24 alpha : float, default=1e-6 25 The regularization parameter. 26 activation_name : str, default="relu" 27 The activation function to use. 28 dropout : float, default=0.0 29 The dropout rate. 30 random_state : int, default=None 31 The random state for the random number generator. 32 weights : list, default=None 33 The weights to initialize the model with. 34 35 Attributes: 36 37 weights : list 38 The weights of the model. 39 params : list 40 The parameters of the model. 41 scaler_ : sklearn.preprocessing.StandardScaler 42 The scaler used to standardize the input features. 43 y_mean_ : float 44 The mean of the target variable. 45 46 Methods: 47 48 fit(X, y) 49 Fit the model to the data. 50 predict(X) 51 Predict the target variable. 52 predict_proba(X) 53 Predict the probability of the target variable. 54 get_weights() 55 Get the weights of the model. 56 set_weights(weights) 57 Set the weights of the model. 58 """ 59 60 _estimator_type = "classifier" 61 62 def __init__( 63 self, 64 hidden_layer_sizes=(100,), 65 max_iter=100, 66 learning_rate=0.01, 67 weights=None, 68 l1_ratio=0.5, 69 alpha=1e-6, 70 activation_name="relu", 71 dropout=0.0, 72 random_state=None, 73 ): 74 self.hidden_layer_sizes = hidden_layer_sizes 75 self.max_iter = max_iter 76 self.learning_rate = learning_rate 77 self.weights = weights 78 self.l1_ratio = l1_ratio 79 self.alpha = alpha 80 self.activation_name = activation_name 81 self.dropout = dropout 82 self.random_state = random_state 83 self.regr = None 84 85 def fit(self, X, y): 86 """Fit the model to the data. 87 88 Parameters: 89 90 X: {array-like}, shape = [n_samples, n_features] 91 Training vectors, where n_samples is the number of samples and 92 n_features is the number of features. 93 y: array-like, shape = [n_samples] 94 Target values. 95 """ 96 regressor = NeuralNetRegressor( 97 hidden_layer_sizes=self.hidden_layer_sizes, 98 max_iter=self.max_iter, 99 learning_rate=self.learning_rate, 100 weights=self.weights, 101 l1_ratio=self.l1_ratio, 102 alpha=self.alpha, 103 activation_name=self.activation_name, 104 dropout=self.dropout, 105 random_state=self.random_state, 106 ) 107 self.regr = SimpleMultitaskClassifier(regressor) 108 self.regr.fit(X, y) 109 self.classes_ = np.unique(y) 110 self.n_classes_ = len(self.classes_) 111 self.n_tasks_ = 1 112 self.n_features_in_ = X.shape[1] 113 self.n_outputs_ = 1 114 self.n_samples_fit_ = X.shape[0] 115 self.n_samples_test_ = X.shape[0] 116 self.n_features_out_ = 1 117 self.n_outputs_ = 1 118 self.n_features_in_ = X.shape[1] 119 self.n_features_out_ = 1 120 self.n_outputs_ = 1 121 return self 122 123 def predict_proba(self, X): 124 """Predict the probability of the target variable. 125 126 Parameters: 127 128 X: {array-like}, shape = [n_samples, n_features] 129 Training vectors, where n_samples is the number of samples and 130 n_features is the number of features. 131 """ 132 return self.regr.predict_proba(X) 133 134 def predict(self, X): 135 """Predict the target variable. 136 137 Parameters: 138 139 X: {array-like}, shape = [n_samples, n_features] 140 Training vectors, where n_samples is the number of samples and 141 n_features is the number of features. 142 """ 143 return self.regr.predict(X) 144 145 @property 146 def _estimator_type(self): 147 return "classifier"
(Pretrained) Neural Network Classifier.
Parameters:
hidden_layer_sizes : tuple, default=(100,)
The number of neurons in each hidden layer.
max_iter : int, default=100
The maximum number of iterations to train the model.
learning_rate : float, default=0.01
The learning rate for the optimizer.
l1_ratio : float, default=0.5
The ratio of L1 regularization.
alpha : float, default=1e-6
The regularization parameter.
activation_name : str, default="relu"
The activation function to use.
dropout : float, default=0.0
The dropout rate.
random_state : int, default=None
The random state for the random number generator.
weights : list, default=None
The weights to initialize the model with.
Attributes:
weights : list
The weights of the model.
params : list
The parameters of the model.
scaler_ : sklearn.preprocessing.StandardScaler
The scaler used to standardize the input features.
y_mean_ : float
The mean of the target variable.
Methods:
fit(X, y)
Fit the model to the data.
predict(X)
Predict the target variable.
predict_proba(X)
Predict the probability of the target variable.
get_weights()
Get the weights of the model.
set_weights(weights)
Set the weights of the model.
85 def fit(self, X, y): 86 """Fit the model to the data. 87 88 Parameters: 89 90 X: {array-like}, shape = [n_samples, n_features] 91 Training vectors, where n_samples is the number of samples and 92 n_features is the number of features. 93 y: array-like, shape = [n_samples] 94 Target values. 95 """ 96 regressor = NeuralNetRegressor( 97 hidden_layer_sizes=self.hidden_layer_sizes, 98 max_iter=self.max_iter, 99 learning_rate=self.learning_rate, 100 weights=self.weights, 101 l1_ratio=self.l1_ratio, 102 alpha=self.alpha, 103 activation_name=self.activation_name, 104 dropout=self.dropout, 105 random_state=self.random_state, 106 ) 107 self.regr = SimpleMultitaskClassifier(regressor) 108 self.regr.fit(X, y) 109 self.classes_ = np.unique(y) 110 self.n_classes_ = len(self.classes_) 111 self.n_tasks_ = 1 112 self.n_features_in_ = X.shape[1] 113 self.n_outputs_ = 1 114 self.n_samples_fit_ = X.shape[0] 115 self.n_samples_test_ = X.shape[0] 116 self.n_features_out_ = 1 117 self.n_outputs_ = 1 118 self.n_features_in_ = X.shape[1] 119 self.n_features_out_ = 1 120 self.n_outputs_ = 1 121 return self
Fit the model to the data.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number of samples and
n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
123 def predict_proba(self, X): 124 """Predict the probability of the target variable. 125 126 Parameters: 127 128 X: {array-like}, shape = [n_samples, n_features] 129 Training vectors, where n_samples is the number of samples and 130 n_features is the number of features. 131 """ 132 return self.regr.predict_proba(X)
Predict the probability of the target variable.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number of samples and
n_features is the number of features.
134 def predict(self, X): 135 """Predict the target variable. 136 137 Parameters: 138 139 X: {array-like}, shape = [n_samples, n_features] 140 Training vectors, where n_samples is the number of samples and 141 n_features is the number of features. 142 """ 143 return self.regr.predict(X)
Predict the target variable.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number of samples and
n_features is the number of features.
21class PredictionInterval(BaseEstimator, RegressorMixin): 22 """Class PredictionInterval: Obtain prediction intervals. 23 24 Attributes: 25 26 obj: an object; 27 fitted object containing methods `fit` and `predict` 28 29 method: a string; 30 method for constructing the prediction intervals. 31 Currently "splitconformal" (default) and "localconformal" 32 33 level: a float; 34 Confidence level for prediction intervals. Default is 95, 35 equivalent to a miscoverage error of 5 (%) 36 37 replications: an integer; 38 Number of replications for simulated conformal (default is `None`) 39 40 type_pi: a string; 41 type of prediction interval: currently `None` 42 (split conformal without simulation) 43 for type_pi in: 44 - 'bootstrap': Bootstrap resampling. 45 - 'kde': Kernel Density Estimation. 46 47 type_split: a string; 48 "random" (random split of data) or "sequential" (sequential split of data) 49 50 seed: an integer; 51 Reproducibility of fit (there's a random split between fitting and calibration data) 52 """ 53 54 def __init__( 55 self, 56 obj, 57 method="splitconformal", 58 level=95, 59 type_pi=None, 60 type_split="random", 61 replications=None, 62 kernel=None, 63 agg="mean", 64 seed=123, 65 ): 66 self.obj = obj 67 self.method = method 68 self.level = level 69 self.type_pi = type_pi 70 self.type_split = type_split 71 self.replications = replications 72 self.kernel = kernel 73 self.agg = agg 74 self.seed = seed 75 self.alpha_ = 1 - self.level / 100 76 self.quantile_ = None 77 self.icp_ = None 78 self.calibrated_residuals_ = None 79 self.scaled_calibrated_residuals_ = None 80 self.calibrated_residuals_scaler_ = None 81 self.kde_ = None 82 self.aic_ = None 83 self.aicc_ = None 84 self.bic_ = None 85 self.sse_ = None 86 87 def fit(self, X, y, sample_weight=None, **kwargs): 88 """Fit the `method` to training data (X, y). 89 90 Args: 91 92 X: array-like, shape = [n_samples, n_features]; 93 Training set vectors, where n_samples is the number 94 of samples and n_features is the number of features. 95 96 y: array-like, shape = [n_samples, ]; Target values. 97 98 sample_weight: array-like, shape = [n_samples] 99 Sample weights. 100 101 """ 102 103 if self.type_split == "random": 104 X_train, X_calibration, y_train, y_calibration = train_test_split( 105 X, y, test_size=0.5, random_state=self.seed 106 ) 107 108 elif self.type_split == "sequential": 109 n_x = X.shape[0] 110 n_x_half = n_x // 2 111 first_half_idx = range(0, n_x_half) 112 second_half_idx = range(n_x_half, n_x) 113 X_train = X[first_half_idx, :] 114 X_calibration = X[second_half_idx, :] 115 y_train = y[first_half_idx] 116 y_calibration = y[second_half_idx] 117 118 if self.method == "splitconformal": 119 self.obj.fit(X_train, y_train) 120 preds_calibration = self.obj.predict(X_calibration) 121 self.calibrated_residuals_ = y_calibration - preds_calibration 122 absolute_residuals = np.abs(self.calibrated_residuals_) 123 self.calibrated_residuals_scaler_ = StandardScaler( 124 with_mean=True, with_std=True 125 ) 126 self.scaled_calibrated_residuals_ = ( 127 self.calibrated_residuals_scaler_.fit_transform( 128 self.calibrated_residuals_.reshape(-1, 1) 129 ).ravel() 130 ) 131 try: 132 # numpy version >= 1.22 133 self.quantile_ = np.quantile( 134 a=absolute_residuals, q=self.level / 100, method="higher" 135 ) 136 except Exception: 137 # numpy version < 1.22 138 self.quantile_ = np.quantile( 139 a=absolute_residuals, 140 q=self.level / 100, 141 interpolation="higher", 142 ) 143 144 if self.method == "localconformal": 145 mad_estimator = ExtraTreesRegressor() 146 normalizer = RegressorNormalizer( 147 self.obj, mad_estimator, AbsErrorErrFunc() 148 ) 149 nc = RegressorNc(self.obj, AbsErrorErrFunc(), normalizer) 150 self.icp_ = IcpRegressor(nc) 151 self.icp_.fit(X_train, y_train) 152 self.icp_.calibrate(X_calibration, y_calibration) 153 154 # FIX: Store calibration residuals from the ICP scorer so that 155 # simulation-based prediction intervals are available in predict(). 156 raw_residuals = self.icp_.nc_function.err_func.apply( 157 self.icp_.nc_function.predict(X_calibration), y_calibration 158 ) 159 self.calibrated_residuals_ = raw_residuals 160 self.calibrated_residuals_scaler_ = StandardScaler( 161 with_mean=True, with_std=True 162 ) 163 self.scaled_calibrated_residuals_ = ( 164 self.calibrated_residuals_scaler_.fit_transform( 165 self.calibrated_residuals_.reshape(-1, 1) 166 ).ravel() 167 ) 168 169 # Calculate AIC 170 # Get predictions 171 preds = self.obj.predict(X_calibration) 172 173 # Calculate SSE 174 self.sse_ = np.sum((y_calibration - preds) ** 2) 175 176 # Get number of parameters from the base model 177 n_params = ( 178 getattr(self.obj, "n_hidden_features", 0) + X_calibration.shape[1] 179 ) 180 181 # Calculate AIC 182 n_samples = len(y_calibration) 183 temp = n_samples * np.log(self.sse_ / n_samples) 184 self.aic_ = temp + 2 * n_params 185 self.bic_ = temp + np.log(n_samples) * n_params 186 187 return self 188 189 def _simulate_from_residuals(self, pred, n_obs): 190 """Shared helper: draw `self.replications` simulations from calibrated 191 residuals and return (sims, mean, lower, upper). 192 193 Args: 194 pred: 1-D array of point predictions, shape [n_obs]. 195 n_obs: int, number of test observations. 196 197 Returns: 198 sims_ : 2-D array, shape [n_obs, replications] 199 mean_ : 1-D array, shape [n_obs] 200 lower_ : 1-D array, shape [n_obs] 201 upper_ : 1-D array, shape [n_obs] 202 """ 203 type_pi = self.type_pi if self.type_pi is not None else "kde" 204 replications = ( 205 self.replications if self.replications is not None else 100 206 ) 207 208 assert type_pi in ( 209 "bootstrap", 210 "kde", 211 "normal", 212 "ecdf", 213 "permutation", 214 "smooth-bootstrap", 215 ), ( 216 "`type_pi` must be in ('bootstrap', 'kde', 'normal', 'ecdf', " 217 "'permutation', 'smooth-bootstrap')" 218 ) 219 220 scale = self.calibrated_residuals_scaler_.scale_[0] 221 222 if type_pi == "bootstrap": 223 np.random.seed(self.seed) 224 residuals_sims = np.asarray( 225 [ 226 np.random.choice( 227 a=self.scaled_calibrated_residuals_, 228 size=n_obs, 229 ) 230 for _ in range(replications) 231 ] 232 ).T # shape [n_obs, replications] 233 234 elif type_pi == "kde": 235 kde = gaussian_kde(dataset=self.scaled_calibrated_residuals_) 236 residuals_sims = np.asarray( 237 [ 238 kde.resample(size=n_obs, seed=self.seed + i).ravel() 239 for i in range(replications) 240 ] 241 ).T # shape [n_obs, replications] 242 243 else: # normal / ecdf / permutation / smooth-bootstrap 244 residuals_sims = np.asarray( 245 simulate_replications( 246 data=self.scaled_calibrated_residuals_, 247 method=type_pi, 248 num_replications=replications, 249 n_obs=n_obs, 250 seed=self.seed, 251 ) 252 ).T # shape [n_obs, replications] 253 254 sims = np.asarray( 255 [ 256 pred + scale * residuals_sims[:, i].ravel() 257 for i in range(replications) 258 ] 259 ).T # shape [n_obs, replications] 260 261 mean_ = np.mean(sims, axis=1) 262 lower_ = np.quantile(sims, q=self.alpha_ / 200, axis=1) 263 upper_ = np.quantile(sims, q=1 - self.alpha_ / 200, axis=1) 264 265 return sims, mean_, lower_, upper_ 266 267 def predict(self, X, return_pi=False): 268 """Obtain predictions and prediction intervals 269 270 Args: 271 272 X: array-like, shape = [n_samples, n_features]; 273 Testing set vectors, where n_samples is the number 274 of samples and n_features is the number of features. 275 276 return_pi: boolean 277 Whether the prediction interval is returned or not. 278 Default is False, for compatibility with other _estimators_. 279 If True, a tuple containing the predictions + lower and upper 280 bounds is returned. 281 282 """ 283 284 if self.method == "splitconformal": 285 pred = self.obj.predict(X) 286 287 if self.method == "localconformal": 288 pred = self.icp_.predict(X) 289 290 # ------------------------------------------------------------------ # 291 # splitconformal 292 # ------------------------------------------------------------------ # 293 if self.method == "splitconformal": 294 if self.replications is None and self.type_pi is None: 295 # Plain split-conformal: symmetric quantile band 296 if return_pi: 297 DescribeResult = namedtuple( 298 "DescribeResult", ("mean", "lower", "upper") 299 ) 300 return DescribeResult( 301 pred, pred - self.quantile_, pred + self.quantile_ 302 ) 303 else: 304 return pred 305 306 else: 307 # FIX: simulation-based prediction intervals for splitconformal. 308 # Previously this branch raised NotImplementedError even though 309 # all the necessary logic was present — it was simply unreachable 310 # because the raise fired unconditionally. The code has been 311 # moved into _simulate_from_residuals() and called here. 312 313 if self.type_pi is None: 314 warnings.warn( 315 "type_pi must be set when replications is not None; " 316 "defaulting to 'kde'." 317 ) 318 if self.replications is None: 319 warnings.warn( 320 "replications must be set when type_pi is not None; " 321 "defaulting to 100." 322 ) 323 324 ( 325 self.sims_, 326 self.mean_, 327 self.lower_, 328 self.upper_, 329 ) = self._simulate_from_residuals(pred, X.shape[0]) 330 331 DescribeResult = namedtuple( 332 "DescribeResult", ("mean", "sims", "lower", "upper") 333 ) 334 return DescribeResult( 335 self.mean_, self.sims_, self.lower_, self.upper_ 336 ) 337 338 # ------------------------------------------------------------------ # 339 # localconformal 340 # ------------------------------------------------------------------ # 341 if self.method == "localconformal": 342 if self.replications is None: 343 if return_pi: 344 predictions_bounds = self.icp_.predict( 345 X, significance=1 - self.level 346 ) 347 DescribeResult = namedtuple( 348 "DescribeResult", ("mean", "lower", "upper") 349 ) 350 return DescribeResult( 351 pred, 352 predictions_bounds[:, 0], 353 predictions_bounds[:, 1], 354 ) 355 else: 356 return pred 357 358 else: 359 # FIX: simulation-based prediction intervals for localconformal. 360 # Previously this always raised NotImplementedError. Now we 361 # reuse the calibration residuals stored during fit() and apply 362 # the same simulation logic used by splitconformal via the 363 # shared helper _simulate_from_residuals(). 364 365 if self.type_pi is None: 366 warnings.warn( 367 "type_pi must be set when replications is not None; " 368 "defaulting to 'kde'." 369 ) 370 371 ( 372 self.sims_, 373 self.mean_, 374 self.lower_, 375 self.upper_, 376 ) = self._simulate_from_residuals(pred, X.shape[0]) 377 378 DescribeResult = namedtuple( 379 "DescribeResult", ("mean", "sims", "lower", "upper") 380 ) 381 return DescribeResult( 382 self.mean_, self.sims_, self.lower_, self.upper_ 383 )
Class PredictionInterval: Obtain prediction intervals.
Attributes:
obj: an object;
fitted object containing methods `fit` and `predict`
method: a string;
method for constructing the prediction intervals.
Currently "splitconformal" (default) and "localconformal"
level: a float;
Confidence level for prediction intervals. Default is 95,
equivalent to a miscoverage error of 5 (%)
replications: an integer;
Number of replications for simulated conformal (default is `None`)
type_pi: a string;
type of prediction interval: currently `None`
(split conformal without simulation)
for type_pi in:
- 'bootstrap': Bootstrap resampling.
- 'kde': Kernel Density Estimation.
type_split: a string;
"random" (random split of data) or "sequential" (sequential split of data)
seed: an integer;
Reproducibility of fit (there's a random split between fitting and calibration data)
87 def fit(self, X, y, sample_weight=None, **kwargs): 88 """Fit the `method` to training data (X, y). 89 90 Args: 91 92 X: array-like, shape = [n_samples, n_features]; 93 Training set vectors, where n_samples is the number 94 of samples and n_features is the number of features. 95 96 y: array-like, shape = [n_samples, ]; Target values. 97 98 sample_weight: array-like, shape = [n_samples] 99 Sample weights. 100 101 """ 102 103 if self.type_split == "random": 104 X_train, X_calibration, y_train, y_calibration = train_test_split( 105 X, y, test_size=0.5, random_state=self.seed 106 ) 107 108 elif self.type_split == "sequential": 109 n_x = X.shape[0] 110 n_x_half = n_x // 2 111 first_half_idx = range(0, n_x_half) 112 second_half_idx = range(n_x_half, n_x) 113 X_train = X[first_half_idx, :] 114 X_calibration = X[second_half_idx, :] 115 y_train = y[first_half_idx] 116 y_calibration = y[second_half_idx] 117 118 if self.method == "splitconformal": 119 self.obj.fit(X_train, y_train) 120 preds_calibration = self.obj.predict(X_calibration) 121 self.calibrated_residuals_ = y_calibration - preds_calibration 122 absolute_residuals = np.abs(self.calibrated_residuals_) 123 self.calibrated_residuals_scaler_ = StandardScaler( 124 with_mean=True, with_std=True 125 ) 126 self.scaled_calibrated_residuals_ = ( 127 self.calibrated_residuals_scaler_.fit_transform( 128 self.calibrated_residuals_.reshape(-1, 1) 129 ).ravel() 130 ) 131 try: 132 # numpy version >= 1.22 133 self.quantile_ = np.quantile( 134 a=absolute_residuals, q=self.level / 100, method="higher" 135 ) 136 except Exception: 137 # numpy version < 1.22 138 self.quantile_ = np.quantile( 139 a=absolute_residuals, 140 q=self.level / 100, 141 interpolation="higher", 142 ) 143 144 if self.method == "localconformal": 145 mad_estimator = ExtraTreesRegressor() 146 normalizer = RegressorNormalizer( 147 self.obj, mad_estimator, AbsErrorErrFunc() 148 ) 149 nc = RegressorNc(self.obj, AbsErrorErrFunc(), normalizer) 150 self.icp_ = IcpRegressor(nc) 151 self.icp_.fit(X_train, y_train) 152 self.icp_.calibrate(X_calibration, y_calibration) 153 154 # FIX: Store calibration residuals from the ICP scorer so that 155 # simulation-based prediction intervals are available in predict(). 156 raw_residuals = self.icp_.nc_function.err_func.apply( 157 self.icp_.nc_function.predict(X_calibration), y_calibration 158 ) 159 self.calibrated_residuals_ = raw_residuals 160 self.calibrated_residuals_scaler_ = StandardScaler( 161 with_mean=True, with_std=True 162 ) 163 self.scaled_calibrated_residuals_ = ( 164 self.calibrated_residuals_scaler_.fit_transform( 165 self.calibrated_residuals_.reshape(-1, 1) 166 ).ravel() 167 ) 168 169 # Calculate AIC 170 # Get predictions 171 preds = self.obj.predict(X_calibration) 172 173 # Calculate SSE 174 self.sse_ = np.sum((y_calibration - preds) ** 2) 175 176 # Get number of parameters from the base model 177 n_params = ( 178 getattr(self.obj, "n_hidden_features", 0) + X_calibration.shape[1] 179 ) 180 181 # Calculate AIC 182 n_samples = len(y_calibration) 183 temp = n_samples * np.log(self.sse_ / n_samples) 184 self.aic_ = temp + 2 * n_params 185 self.bic_ = temp + np.log(n_samples) * n_params 186 187 return self
Fit the method to training data (X, y).
Args:
X: array-like, shape = [n_samples, n_features];
Training set vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples, ]; Target values.
sample_weight: array-like, shape = [n_samples]
Sample weights.
267 def predict(self, X, return_pi=False): 268 """Obtain predictions and prediction intervals 269 270 Args: 271 272 X: array-like, shape = [n_samples, n_features]; 273 Testing set vectors, where n_samples is the number 274 of samples and n_features is the number of features. 275 276 return_pi: boolean 277 Whether the prediction interval is returned or not. 278 Default is False, for compatibility with other _estimators_. 279 If True, a tuple containing the predictions + lower and upper 280 bounds is returned. 281 282 """ 283 284 if self.method == "splitconformal": 285 pred = self.obj.predict(X) 286 287 if self.method == "localconformal": 288 pred = self.icp_.predict(X) 289 290 # ------------------------------------------------------------------ # 291 # splitconformal 292 # ------------------------------------------------------------------ # 293 if self.method == "splitconformal": 294 if self.replications is None and self.type_pi is None: 295 # Plain split-conformal: symmetric quantile band 296 if return_pi: 297 DescribeResult = namedtuple( 298 "DescribeResult", ("mean", "lower", "upper") 299 ) 300 return DescribeResult( 301 pred, pred - self.quantile_, pred + self.quantile_ 302 ) 303 else: 304 return pred 305 306 else: 307 # FIX: simulation-based prediction intervals for splitconformal. 308 # Previously this branch raised NotImplementedError even though 309 # all the necessary logic was present — it was simply unreachable 310 # because the raise fired unconditionally. The code has been 311 # moved into _simulate_from_residuals() and called here. 312 313 if self.type_pi is None: 314 warnings.warn( 315 "type_pi must be set when replications is not None; " 316 "defaulting to 'kde'." 317 ) 318 if self.replications is None: 319 warnings.warn( 320 "replications must be set when type_pi is not None; " 321 "defaulting to 100." 322 ) 323 324 ( 325 self.sims_, 326 self.mean_, 327 self.lower_, 328 self.upper_, 329 ) = self._simulate_from_residuals(pred, X.shape[0]) 330 331 DescribeResult = namedtuple( 332 "DescribeResult", ("mean", "sims", "lower", "upper") 333 ) 334 return DescribeResult( 335 self.mean_, self.sims_, self.lower_, self.upper_ 336 ) 337 338 # ------------------------------------------------------------------ # 339 # localconformal 340 # ------------------------------------------------------------------ # 341 if self.method == "localconformal": 342 if self.replications is None: 343 if return_pi: 344 predictions_bounds = self.icp_.predict( 345 X, significance=1 - self.level 346 ) 347 DescribeResult = namedtuple( 348 "DescribeResult", ("mean", "lower", "upper") 349 ) 350 return DescribeResult( 351 pred, 352 predictions_bounds[:, 0], 353 predictions_bounds[:, 1], 354 ) 355 else: 356 return pred 357 358 else: 359 # FIX: simulation-based prediction intervals for localconformal. 360 # Previously this always raised NotImplementedError. Now we 361 # reuse the calibration residuals stored during fit() and apply 362 # the same simulation logic used by splitconformal via the 363 # shared helper _simulate_from_residuals(). 364 365 if self.type_pi is None: 366 warnings.warn( 367 "type_pi must be set when replications is not None; " 368 "defaulting to 'kde'." 369 ) 370 371 ( 372 self.sims_, 373 self.mean_, 374 self.lower_, 375 self.upper_, 376 ) = self._simulate_from_residuals(pred, X.shape[0]) 377 378 DescribeResult = namedtuple( 379 "DescribeResult", ("mean", "sims", "lower", "upper") 380 ) 381 return DescribeResult( 382 self.mean_, self.sims_, self.lower_, self.upper_ 383 )
Obtain predictions and prediction intervals
Args:
X: array-like, shape = [n_samples, n_features];
Testing set vectors, where n_samples is the number
of samples and n_features is the number of features.
return_pi: boolean
Whether the prediction interval is returned or not.
Default is False, for compatibility with other _estimators_.
If True, a tuple containing the predictions + lower and upper
bounds is returned.
20class PredictionSet(BaseEstimator, ClassifierMixin): 21 """Class PredictionSet: Obtain prediction sets. 22 23 Attributes: 24 25 obj: an object; 26 fitted object containing methods `fit` and `predict` 27 28 method: a string; 29 method for constructing the prediction sets. 30 Currently "icp" (default, inductive conformal) and "tcp" (transductive conformal) 31 32 level: a float; 33 Confidence level for prediction sets. Default is None, 34 95 is equivalent to a miscoverage error of 5 (%) 35 36 seed: an integer; 37 Reproducibility of fit (there's a random split between fitting and calibration data) 38 """ 39 40 def __init__( 41 self, 42 obj, 43 method="icp", 44 level=None, 45 seed=123, 46 ): 47 self.obj = obj 48 self.method = method 49 self.level = level 50 self.seed = seed 51 if self.level is not None: 52 self.alpha_ = 1 - self.level / 100 53 self.quantile_ = None 54 self.icp_ = None 55 self.tcp_ = None 56 57 if self.method == "icp": 58 self.icp_ = IcpClassifier( 59 ClassifierNc(ClassifierAdapter(self.obj), MarginErrFunc()), 60 ) 61 elif self.method == "tcp": 62 self.tcp_ = TcpClassifier( 63 ClassifierNc(ClassifierAdapter(self.obj), MarginErrFunc()), 64 ) 65 else: 66 raise ValueError("`self.method` must be in ('icp', 'tcp')") 67 68 def fit(self, X, y, sample_weight=None, **kwargs): 69 """Fit the `method` to training data (X, y). 70 71 Args: 72 73 X: array-like, shape = [n_samples, n_features]; 74 Training set vectors, where n_samples is the number 75 of samples and n_features is the number of features. 76 77 y: array-like, shape = [n_samples, ]; Target values. 78 79 sample_weight: array-like, shape = [n_samples] 80 Sample weights. 81 82 """ 83 if self.method == "icp": 84 X_train, X_calibration, y_train, y_calibration = train_test_split( 85 X, y, test_size=0.5, random_state=self.seed 86 ) 87 self.icp_.fit(X_train, y_train) 88 self.icp_.calibrate(X_calibration, y_calibration) 89 90 elif self.method == "tcp": 91 self.tcp_.fit(X, y) 92 93 return self 94 95 def predict(self, X, **kwargs): 96 """Obtain predictions and prediction sets 97 98 Args: 99 100 X: array-like, shape = [n_samples, n_features]; 101 Testing set vectors, where n_samples is the number 102 of samples and n_features is the number of features. 103 104 """ 105 106 if self.method == "icp": 107 return self.icp_.predict(X, significance=self.alpha_, **kwargs) 108 109 elif self.method == "tcp": 110 return self.tcp_.predict(X, significance=self.alpha_, **kwargs) 111 112 else: 113 raise ValueError("`self.method` must be in ('icp', 'tcp')") 114 115 def predict_proba(self, X): 116 predictions = self.predict(X) 117 return np.eye(len(np.unique(predictions)))[predictions]
Class PredictionSet: Obtain prediction sets.
Attributes:
obj: an object;
fitted object containing methods `fit` and `predict`
method: a string;
method for constructing the prediction sets.
Currently "icp" (default, inductive conformal) and "tcp" (transductive conformal)
level: a float;
Confidence level for prediction sets. Default is None,
95 is equivalent to a miscoverage error of 5 (%)
seed: an integer;
Reproducibility of fit (there's a random split between fitting and calibration data)
68 def fit(self, X, y, sample_weight=None, **kwargs): 69 """Fit the `method` to training data (X, y). 70 71 Args: 72 73 X: array-like, shape = [n_samples, n_features]; 74 Training set vectors, where n_samples is the number 75 of samples and n_features is the number of features. 76 77 y: array-like, shape = [n_samples, ]; Target values. 78 79 sample_weight: array-like, shape = [n_samples] 80 Sample weights. 81 82 """ 83 if self.method == "icp": 84 X_train, X_calibration, y_train, y_calibration = train_test_split( 85 X, y, test_size=0.5, random_state=self.seed 86 ) 87 self.icp_.fit(X_train, y_train) 88 self.icp_.calibrate(X_calibration, y_calibration) 89 90 elif self.method == "tcp": 91 self.tcp_.fit(X, y) 92 93 return self
Fit the method to training data (X, y).
Args:
X: array-like, shape = [n_samples, n_features];
Training set vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples, ]; Target values.
sample_weight: array-like, shape = [n_samples]
Sample weights.
95 def predict(self, X, **kwargs): 96 """Obtain predictions and prediction sets 97 98 Args: 99 100 X: array-like, shape = [n_samples, n_features]; 101 Testing set vectors, where n_samples is the number 102 of samples and n_features is the number of features. 103 104 """ 105 106 if self.method == "icp": 107 return self.icp_.predict(X, significance=self.alpha_, **kwargs) 108 109 elif self.method == "tcp": 110 return self.tcp_.predict(X, significance=self.alpha_, **kwargs) 111 112 else: 113 raise ValueError("`self.method` must be in ('icp', 'tcp')")
Obtain predictions and prediction sets
Args:
X: array-like, shape = [n_samples, n_features];
Testing set vectors, where n_samples is the number
of samples and n_features is the number of features.
19class SimpleMultitaskClassifier(Base, ClassifierMixin): 20 """Multitask Classification model based on regression models, with shared covariates 21 22 Parameters: 23 24 obj: object 25 any object (must be a regression model) containing a method fit (obj.fit()) 26 and a method predict (obj.predict()) 27 28 seed: int 29 reproducibility seed 30 31 Attributes: 32 33 fit_objs_: dict 34 objects adjusted to each individual time series 35 36 n_classes_: int 37 number of classes for the classifier 38 39 Examples: 40 41 ```python 42 import nnetsauce as ns 43 import numpy as np 44 from sklearn.datasets import load_breast_cancer 45 from sklearn.linear_model import LinearRegression 46 from sklearn.model_selection import train_test_split 47 from sklearn import metrics 48 from time import time 49 50 breast_cancer = load_breast_cancer() 51 Z = breast_cancer.data 52 t = breast_cancer.target 53 54 X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2, 55 random_state=123+2*10) 56 57 # Linear Regression is used 58 regr = LinearRegression() 59 fit_obj = ns.SimpleMultitaskClassifier(regr) 60 61 start = time() 62 fit_obj.fit(X_train, y_train) 63 print(f"Elapsed {time() - start}") 64 65 print(fit_obj.score(X_test, y_test)) 66 print(fit_obj.score(X_test, y_test, scoring="roc_auc")) 67 68 start = time() 69 preds = fit_obj.predict(X_test) 70 print(f"Elapsed {time() - start}") 71 print(metrics.classification_report(preds, y_test)) 72 ``` 73 74 """ 75 76 # construct the object ----- 77 _estimator_type = "classifier" 78 79 def __init__( 80 self, 81 obj, 82 ): 83 self.type_fit = "classification" 84 self.obj = obj 85 self.fit_objs_ = {} 86 self.multioutput_model_ = None 87 self.X_scaler_ = StandardScaler() 88 self.scaled_X_ = None 89 90 def fit(self, X, y, sample_weight=None, **kwargs): 91 """Fit SimpleMultitaskClassifier to training data (X, y). 92 93 Args: 94 95 X: {array-like}, shape = [n_samples, n_features] 96 Training vectors, where n_samples is the number 97 of samples and n_features is the number of features. 98 99 y: array-like, shape = [n_samples] 100 Target values. 101 102 **kwargs: additional parameters to be passed to 103 self.cook_training_set or self.obj.fit 104 105 Returns: 106 107 self: object 108 109 """ 110 111 assert mx.is_factor(y), "y must contain only integers" 112 113 self.classes_ = np.unique(y) # for compatibility with sklearn 114 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 115 116 self.scaled_X_ = self.X_scaler_.fit_transform(X) 117 118 # multitask response 119 Y = mo.one_hot_encode2(y, self.n_classes_) 120 121 # Try MultiOutputRegressor first (more efficient) 122 try: 123 self.multioutput_model_ = MultiOutputRegressor(deepcopy(self.obj)) 124 try: 125 self.multioutput_model_.fit( 126 self.scaled_X_, Y, sample_weight=sample_weight, **kwargs 127 ) 128 except TypeError: 129 # If sample_weight not supported, try without it 130 self.multioutput_model_.fit(self.scaled_X_, Y, **kwargs) 131 except Exception: 132 # Fallback: fit separate models for each class 133 self.multioutput_model_ = None 134 try: 135 for i in range(self.n_classes_): 136 self.fit_objs_[i] = deepcopy( 137 self.obj.fit( 138 self.scaled_X_, 139 Y[:, i], 140 sample_weight=sample_weight, 141 **kwargs 142 ) 143 ) 144 except TypeError: 145 for i in range(self.n_classes_): 146 self.fit_objs_[i] = deepcopy( 147 self.obj.fit(self.scaled_X_, Y[:, i], **kwargs) 148 ) 149 return self 150 151 def predict(self, X, **kwargs): 152 """Predict test data X. 153 154 Args: 155 156 X: {array-like}, shape = [n_samples, n_features] 157 Training vectors, where n_samples is the number 158 of samples and n_features is the number of features. 159 160 **kwargs: additional parameters 161 162 Returns: 163 164 model predictions: {array-like} 165 166 """ 167 return np.argmax(self.predict_proba(X, **kwargs), axis=1) 168 169 def predict_proba(self, X, **kwargs): 170 """Predict probabilities for test data X. 171 172 Args: 173 174 X: {array-like}, shape = [n_samples, n_features] 175 Training vectors, where n_samples is the number 176 of samples and n_features is the number of features. 177 178 **kwargs: additional parameters 179 180 Returns: 181 182 probability estimates for test data: {array-like} 183 184 """ 185 186 shape_X = X.shape 187 188 if self.multioutput_model_ is not None: 189 # Use MultiOutputRegressor for prediction 190 if len(shape_X) == 1: # one example 191 n_features = shape_X[0] 192 new_X = mo.rbind( 193 X.reshape(1, n_features), 194 np.ones(n_features).reshape(1, n_features), 195 ) 196 Z = self.X_scaler_.transform(new_X, **kwargs) 197 probs = self.multioutput_model_.predict(Z, **kwargs)[:1, :] 198 else: # multiple rows 199 Z = self.X_scaler_.transform(X, **kwargs) 200 probs = self.multioutput_model_.predict(Z, **kwargs) 201 else: 202 # Use separate models for each class 203 probs = np.zeros((shape_X[0], self.n_classes_)) 204 205 if len(shape_X) == 1: # one example 206 n_features = shape_X[0] 207 208 new_X = mo.rbind( 209 X.reshape(1, n_features), 210 np.ones(n_features).reshape(1, n_features), 211 ) 212 213 Z = self.X_scaler_.transform(new_X, **kwargs) 214 215 # Fallback to standard model 216 for i in range(self.n_classes_): 217 probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs)[0] 218 219 else: # multiple rows 220 Z = self.X_scaler_.transform(X, **kwargs) 221 222 # Fallback to standard model 223 for i in range(self.n_classes_): 224 probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs) 225 226 expit_raw_probs = expit(probs) 227 228 # Add small epsilon to avoid division by zero 229 row_sums = expit_raw_probs.sum(axis=1)[:, None] 230 row_sums[row_sums < 1e-10] = 1e-10 231 232 return expit_raw_probs / row_sums 233 234 def decision_function(self, X, **kwargs): 235 """Compute the decision function of X. 236 237 Parameters: 238 X: {array-like}, shape = [n_samples, n_features] 239 Samples to compute decision function for. 240 241 **kwargs: additional parameters to be passed to 242 self.cook_test_set 243 244 Returns: 245 array-like of shape (n_samples,) or (n_samples, n_classes) 246 Decision function of the input samples. The order of outputs is the same 247 as that of the classes passed to fit. 248 """ 249 if not hasattr(self.obj, "decision_function"): 250 # If base classifier doesn't have decision_function, use predict_proba 251 proba = self.predict_proba(X, **kwargs) 252 if proba.shape[1] == 2: 253 return proba[:, 1] # For binary classification 254 return proba # For multiclass 255 256 if len(X.shape) == 1: 257 n_features = X.shape[0] 258 new_X = mo.rbind( 259 X.reshape(1, n_features), 260 np.ones(n_features).reshape(1, n_features), 261 ) 262 263 return ( 264 self.obj.decision_function( 265 self.cook_test_set(new_X, **kwargs), **kwargs 266 ) 267 )[0] 268 269 return self.obj.decision_function( 270 self.cook_test_set(X, **kwargs), **kwargs 271 ) 272 273 @property 274 def _estimator_type(self): 275 return "classifier"
Multitask Classification model based on regression models, with shared covariates
Parameters:
obj: object
any object (must be a regression model) containing a method fit (obj.fit())
and a method predict (obj.predict())
seed: int
reproducibility seed
Attributes:
fit_objs_: dict
objects adjusted to each individual time series
n_classes_: int
number of classes for the classifier
Examples:
import nnetsauce as ns
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics
from time import time
breast_cancer = load_breast_cancer()
Z = breast_cancer.data
t = breast_cancer.target
X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2,
random_state=123+2*10)
# Linear Regression is used
regr = LinearRegression()
fit_obj = ns.SimpleMultitaskClassifier(regr)
start = time()
fit_obj.fit(X_train, y_train)
print(f"Elapsed {time() - start}")
print(fit_obj.score(X_test, y_test))
print(fit_obj.score(X_test, y_test, scoring="roc_auc"))
start = time()
preds = fit_obj.predict(X_test)
print(f"Elapsed {time() - start}")
print(metrics.classification_report(preds, y_test))
90 def fit(self, X, y, sample_weight=None, **kwargs): 91 """Fit SimpleMultitaskClassifier to training data (X, y). 92 93 Args: 94 95 X: {array-like}, shape = [n_samples, n_features] 96 Training vectors, where n_samples is the number 97 of samples and n_features is the number of features. 98 99 y: array-like, shape = [n_samples] 100 Target values. 101 102 **kwargs: additional parameters to be passed to 103 self.cook_training_set or self.obj.fit 104 105 Returns: 106 107 self: object 108 109 """ 110 111 assert mx.is_factor(y), "y must contain only integers" 112 113 self.classes_ = np.unique(y) # for compatibility with sklearn 114 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 115 116 self.scaled_X_ = self.X_scaler_.fit_transform(X) 117 118 # multitask response 119 Y = mo.one_hot_encode2(y, self.n_classes_) 120 121 # Try MultiOutputRegressor first (more efficient) 122 try: 123 self.multioutput_model_ = MultiOutputRegressor(deepcopy(self.obj)) 124 try: 125 self.multioutput_model_.fit( 126 self.scaled_X_, Y, sample_weight=sample_weight, **kwargs 127 ) 128 except TypeError: 129 # If sample_weight not supported, try without it 130 self.multioutput_model_.fit(self.scaled_X_, Y, **kwargs) 131 except Exception: 132 # Fallback: fit separate models for each class 133 self.multioutput_model_ = None 134 try: 135 for i in range(self.n_classes_): 136 self.fit_objs_[i] = deepcopy( 137 self.obj.fit( 138 self.scaled_X_, 139 Y[:, i], 140 sample_weight=sample_weight, 141 **kwargs 142 ) 143 ) 144 except TypeError: 145 for i in range(self.n_classes_): 146 self.fit_objs_[i] = deepcopy( 147 self.obj.fit(self.scaled_X_, Y[:, i], **kwargs) 148 ) 149 return self
Fit SimpleMultitaskClassifier to training data (X, y).
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
151 def predict(self, X, **kwargs): 152 """Predict test data X. 153 154 Args: 155 156 X: {array-like}, shape = [n_samples, n_features] 157 Training vectors, where n_samples is the number 158 of samples and n_features is the number of features. 159 160 **kwargs: additional parameters 161 162 Returns: 163 164 model predictions: {array-like} 165 166 """ 167 return np.argmax(self.predict_proba(X, **kwargs), axis=1)
Predict test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters
Returns:
model predictions: {array-like}
169 def predict_proba(self, X, **kwargs): 170 """Predict probabilities for test data X. 171 172 Args: 173 174 X: {array-like}, shape = [n_samples, n_features] 175 Training vectors, where n_samples is the number 176 of samples and n_features is the number of features. 177 178 **kwargs: additional parameters 179 180 Returns: 181 182 probability estimates for test data: {array-like} 183 184 """ 185 186 shape_X = X.shape 187 188 if self.multioutput_model_ is not None: 189 # Use MultiOutputRegressor for prediction 190 if len(shape_X) == 1: # one example 191 n_features = shape_X[0] 192 new_X = mo.rbind( 193 X.reshape(1, n_features), 194 np.ones(n_features).reshape(1, n_features), 195 ) 196 Z = self.X_scaler_.transform(new_X, **kwargs) 197