nnetsauce
1from .base.base import Base 2from .base.baseRegressor import BaseRegressor 3from .boosting.adaBoostClassifier import AdaBoostClassifier 4from .custom.customClassifier import CustomClassifier 5from .custom.customRegressor import CustomRegressor 6from .custom.customBackpropRegressor import CustomBackPropRegressor 7from .datasets import Downloader 8from .deep.deepClassifier import DeepClassifier 9from .deep.deepRegressor import DeepRegressor 10from .deep.deepMTS import DeepMTS 11from .glm.glmClassifier import GLMClassifier 12from .glm.glmRegressor import GLMRegressor 13from .kernel.kernel import KernelRidge 14from .lazypredict.lazydeepClassifier import LazyDeepClassifier, LazyClassifier 15from .lazypredict.lazydeepRegressor import LazyDeepRegressor, LazyRegressor 16from .lazypredict.lazydeepClassifier import LazyDeepClassifier 17from .lazypredict.lazydeepRegressor import LazyDeepRegressor 18from .lazypredict.lazydeepMTS import LazyDeepMTS, LazyMTS 19from .mts.mts import MTS 20from .mts.mlarch import MLARCH 21from .mts.classical import ClassicalMTS 22from .multitask.multitaskClassifier import MultitaskClassifier 23from .multitask.simplemultitaskClassifier import SimpleMultitaskClassifier 24from .neuralnet.neuralnetregression import NeuralNetRegressor 25from .neuralnet.neuralnetclassification import NeuralNetClassifier 26from .optimizers.optimizer import Optimizer 27from .predictioninterval import PredictionInterval 28from .quantile.quantileregression import QuantileRegressor 29from .quantile.quantileclassification import QuantileClassifier 30from .randombag.randomBagClassifier import RandomBagClassifier 31from .randombag.randomBagRegressor import RandomBagRegressor 32from .ridge.ridge import RidgeRegressor 33from .ridge2.ridge2Classifier import Ridge2Classifier 34from .ridge2.ridge2Regressor import Ridge2Regressor 35from .ridge2.ridge2MultitaskClassifier import Ridge2MultitaskClassifier 36from .rvfl.bayesianrvflRegressor import BayesianRVFLRegressor 37from .rvfl.bayesianrvfl2Regressor import BayesianRVFL2Regressor 38from .sampling import SubSampler 39from .updater import RegressorUpdater, ClassifierUpdater 40from .votingregressor import MedianVotingRegressor 41 42__all__ = [ 43 "AdaBoostClassifier", 44 "Base", 45 "BaseRegressor", 46 "BayesianRVFLRegressor", 47 "BayesianRVFL2Regressor", 48 "ClassicalMTS", 49 "CustomClassifier", 50 "CustomRegressor", 51 "CustomBackPropRegressor", 52 "DeepClassifier", 53 "DeepRegressor", 54 "DeepMTS", 55 "Downloader", 56 "GLMClassifier", 57 "GLMRegressor", 58 "KernelRidge", 59 "LazyClassifier", 60 "LazyRegressor", 61 "LazyDeepClassifier", 62 "LazyDeepRegressor", 63 "LazyMTS", 64 "LazyDeepMTS", 65 "MLARCH", 66 "MedianVotingRegressor", 67 "MTS", 68 "MultitaskClassifier", 69 "NeuralNetRegressor", 70 "NeuralNetClassifier", 71 "PredictionInterval", 72 "SimpleMultitaskClassifier", 73 "Optimizer", 74 "QuantileRegressor", 75 "QuantileClassifier", 76 "RandomBagRegressor", 77 "RandomBagClassifier", 78 "RegressorUpdater", 79 "ClassifierUpdater", 80 "RidgeRegressor", 81 "Ridge2Regressor", 82 "Ridge2Classifier", 83 "Ridge2MultitaskClassifier", 84 "SubSampler", 85]
21class AdaBoostClassifier(Boosting, ClassifierMixin): 22 """AdaBoost Classification (SAMME) model class derived from class Boosting 23 24 Parameters: 25 26 obj: object 27 any object containing a method fit (obj.fit()) and a method predict 28 (obj.predict()) 29 30 n_estimators: int 31 number of boosting iterations 32 33 learning_rate: float 34 learning rate of the boosting procedure 35 36 n_hidden_features: int 37 number of nodes in the hidden layer 38 39 reg_lambda: float 40 regularization parameter for weights 41 42 reg_alpha: float 43 controls compromize between l1 and l2 norm of weights 44 45 activation_name: str 46 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 47 48 a: float 49 hyperparameter for 'prelu' or 'elu' activation function 50 51 nodes_sim: str 52 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 53 'uniform' 54 55 bias: boolean 56 indicates if the hidden layer contains a bias term (True) or not 57 (False) 58 59 dropout: float 60 regularization parameter; (random) percentage of nodes dropped out 61 of the training 62 63 direct_link: boolean 64 indicates if the original predictors are included (True) in model's 65 fitting or not (False) 66 67 n_clusters: int 68 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: 69 no clustering) 70 71 cluster_encode: bool 72 defines how the variable containing clusters is treated (default is one-hot) 73 if `False`, then labels are used, without one-hot encoding 74 75 type_clust: str 76 type of clustering method: currently k-means ('kmeans') or Gaussian 77 Mixture Model ('gmm') 78 79 type_scaling: a tuple of 3 strings 80 scaling methods for inputs, hidden layer, and clustering respectively 81 (and when relevant). 82 Currently available: standardization ('std') or MinMax scaling ('minmax') 83 84 col_sample: float 85 percentage of covariates randomly chosen for training 86 87 row_sample: float 88 percentage of rows chosen for training, by stratified bootstrapping 89 90 seed: int 91 reproducibility seed for nodes_sim=='uniform' 92 93 verbose: int 94 0 for no output, 1 for a progress bar (default is 1) 95 96 method: str 97 type of Adaboost method, 'SAMME' (discrete) or 'SAMME.R' (real) 98 99 backend: str 100 "cpu" or "gpu" or "tpu" 101 102 Attributes: 103 104 alpha_: list 105 AdaBoost coefficients alpha_m 106 107 base_learners_: dict 108 a dictionary containing the base learners 109 110 Examples: 111 112 See also [https://github.com/Techtonique/nnetsauce/blob/master/examples/adaboost_classification.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/adaboost_classification.py) 113 114 ```python 115 import nnetsauce as ns 116 import numpy as np 117 from sklearn.datasets import load_breast_cancer 118 from sklearn.linear_model import LogisticRegression 119 from sklearn.model_selection import train_test_split 120 from sklearn import metrics 121 from time import time 122 123 breast_cancer = load_breast_cancer() 124 Z = breast_cancer.data 125 t = breast_cancer.target 126 np.random.seed(123) 127 X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2) 128 129 # SAMME.R 130 clf = LogisticRegression(solver='liblinear', multi_class = 'ovr', 131 random_state=123) 132 fit_obj = ns.AdaBoostClassifier(clf, 133 n_hidden_features=int(11.22338867), 134 direct_link=True, 135 n_estimators=250, learning_rate=0.01126343, 136 col_sample=0.72684326, row_sample=0.86429443, 137 dropout=0.63078613, n_clusters=2, 138 type_clust="gmm", 139 verbose=1, seed = 123, 140 method="SAMME.R") 141 142 start = time() 143 fit_obj.fit(X_train, y_train) 144 print(f"Elapsed {time() - start}") 145 146 start = time() 147 print(fit_obj.score(X_test, y_test)) 148 print(f"Elapsed {time() - start}") 149 150 preds = fit_obj.predict(X_test) 151 152 print(metrics.classification_report(preds, y_test)) 153 154 ``` 155 156 """ 157 158 # construct the object ----- 159 _estimator_type = "classifier" 160 161 def __init__( 162 self, 163 obj, 164 n_estimators=10, 165 learning_rate=0.1, 166 n_hidden_features=1, 167 reg_lambda=0, 168 reg_alpha=0.5, 169 activation_name="relu", 170 a=0.01, 171 nodes_sim="sobol", 172 bias=True, 173 dropout=0, 174 direct_link=False, 175 n_clusters=2, 176 cluster_encode=True, 177 type_clust="kmeans", 178 type_scaling=("std", "std", "std"), 179 col_sample=1, 180 row_sample=1, 181 seed=123, 182 verbose=1, 183 method="SAMME", 184 backend="cpu", 185 ): 186 self.type_fit = "classification" 187 self.verbose = verbose 188 self.method = method 189 self.reg_lambda = reg_lambda 190 self.reg_alpha = reg_alpha 191 192 super().__init__( 193 obj=obj, 194 n_estimators=n_estimators, 195 learning_rate=learning_rate, 196 n_hidden_features=n_hidden_features, 197 activation_name=activation_name, 198 a=a, 199 nodes_sim=nodes_sim, 200 bias=bias, 201 dropout=dropout, 202 direct_link=direct_link, 203 n_clusters=n_clusters, 204 cluster_encode=cluster_encode, 205 type_clust=type_clust, 206 type_scaling=type_scaling, 207 col_sample=col_sample, 208 row_sample=row_sample, 209 seed=seed, 210 backend=backend, 211 ) 212 213 self.alpha_ = [] 214 self.base_learners_ = dict.fromkeys(range(n_estimators)) 215 216 def fit(self, X, y, sample_weight=None, **kwargs): 217 """Fit Boosting model to training data (X, y). 218 219 Parameters: 220 221 X: {array-like}, shape = [n_samples, n_features] 222 Training vectors, where n_samples is the number 223 of samples and n_features is the number of features. 224 225 y: array-like, shape = [n_samples] 226 Target values. 227 228 **kwargs: additional parameters to be passed to 229 self.cook_training_set or self.obj.fit 230 231 Returns: 232 233 self: object 234 """ 235 236 assert mx.is_factor(y), "y must contain only integers" 237 238 assert self.method in ( 239 "SAMME", 240 "SAMME.R", 241 ), "`method` must be either 'SAMME' or 'SAMME.R'" 242 243 assert (self.reg_lambda <= 1) & ( 244 self.reg_lambda >= 0 245 ), "must have self.reg_lambda <= 1 & self.reg_lambda >= 0" 246 247 assert (self.reg_alpha <= 1) & ( 248 self.reg_alpha >= 0 249 ), "must have self.reg_alpha <= 1 & self.reg_alpha >= 0" 250 251 # training 252 n, p = X.shape 253 self.n_classes = len(np.unique(y)) 254 self.classes_ = np.unique(y) # for compatibility with sklearn 255 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 256 257 if sample_weight is None: 258 w_m = np.repeat(1.0 / n, n) 259 else: 260 w_m = np.asarray(sample_weight) 261 262 base_learner = CustomClassifier( 263 self.obj, 264 n_hidden_features=self.n_hidden_features, 265 activation_name=self.activation_name, 266 a=self.a, 267 nodes_sim=self.nodes_sim, 268 bias=self.bias, 269 dropout=self.dropout, 270 direct_link=self.direct_link, 271 n_clusters=self.n_clusters, 272 type_clust=self.type_clust, 273 type_scaling=self.type_scaling, 274 col_sample=self.col_sample, 275 row_sample=self.row_sample, 276 seed=self.seed, 277 ) 278 279 if self.verbose == 1: 280 pbar = Progbar(self.n_estimators) 281 282 if self.method == "SAMME": 283 err_m = 1e6 284 err_bound = 1 - 1 / self.n_classes 285 self.alpha_.append(1.0) 286 x_range_n = range(n) 287 288 for m in range(self.n_estimators): 289 preds = base_learner.fit( 290 X, y, sample_weight=w_m.ravel(), **kwargs 291 ).predict(X) 292 293 self.base_learners_.update({m: deepcopy(base_learner)}) 294 295 cond = [y[i] != preds[i] for i in x_range_n] 296 297 err_m = max( 298 sum([elt[0] * elt[1] for elt in zip(cond, w_m)]), 299 2.220446049250313e-16, 300 ) # sum(w_m) == 1 301 302 if self.reg_lambda > 0: 303 err_m += self.reg_lambda * ( 304 (1 - self.reg_alpha) * 0.5 * sum([x**2 for x in w_m]) 305 + self.reg_alpha * sum([abs(x) for x in w_m]) 306 ) 307 308 err_m = min(err_m, err_bound) 309 310 alpha_m = self.learning_rate * log( 311 (self.n_classes - 1) * (1 - err_m) / err_m 312 ) 313 314 self.alpha_.append(alpha_m) 315 316 w_m_temp = [exp(alpha_m * cond[i]) for i in x_range_n] 317 318 sum_w_m = sum(w_m_temp) 319 320 w_m = np.asarray([w_m_temp[i] / sum_w_m for i in x_range_n]) 321 322 base_learner.set_params(seed=self.seed + (m + 1) * 1000) 323 324 if self.verbose == 1: 325 pbar.update(m) 326 327 if self.verbose == 1: 328 pbar.update(self.n_estimators) 329 330 self.n_estimators = len(self.base_learners_) 331 self.classes_ = np.unique(y) 332 333 return self 334 335 if self.method == "SAMME.R": 336 Y = mo.one_hot_encode2(y, self.n_classes) 337 338 if sample_weight is None: 339 w_m = np.repeat(1.0 / n, n) # (N, 1) 340 341 else: 342 w_m = np.asarray(sample_weight) 343 344 for m in range(self.n_estimators): 345 probs = base_learner.fit( 346 X, y, sample_weight=w_m.ravel(), **kwargs 347 ).predict_proba(X) 348 349 np.clip( 350 a=probs, a_min=2.220446049250313e-16, a_max=1.0, out=probs 351 ) 352 353 self.base_learners_.update({m: deepcopy(base_learner)}) 354 355 w_m *= np.exp( 356 -1.0 357 * self.learning_rate 358 * (1.0 - 1.0 / self.n_classes) 359 * xlogy(Y, probs).sum(axis=1) 360 ) 361 362 w_m /= np.sum(w_m) 363 364 base_learner.set_params(seed=self.seed + (m + 1) * 1000) 365 366 if self.verbose == 1: 367 pbar.update(m) 368 369 if self.verbose == 1: 370 pbar.update(self.n_estimators) 371 372 self.n_estimators = len(self.base_learners_) 373 self.classes_ = np.unique(y) 374 375 return self 376 377 def predict(self, X, **kwargs): 378 """Predict test data X. 379 380 Parameters: 381 382 X: {array-like}, shape = [n_samples, n_features] 383 Training vectors, where n_samples is the number 384 of samples and n_features is the number of features. 385 386 **kwargs: additional parameters to be passed to 387 self.cook_test_set 388 389 Returns: 390 391 model predictions: {array-like} 392 """ 393 return self.predict_proba(X, **kwargs).argmax(axis=1) 394 395 def predict_proba(self, X, **kwargs): 396 """Predict probabilities for test data X. 397 398 Parameters: 399 400 X: {array-like}, shape = [n_samples, n_features] 401 Training vectors, where n_samples is the number 402 of samples and n_features is the number of features. 403 404 **kwargs: additional parameters to be passed to 405 self.cook_test_set 406 407 Returns: 408 409 probability estimates for test data: {array-like} 410 411 """ 412 413 n_iter = len(self.base_learners_) 414 415 if self.method == "SAMME": 416 ensemble_learner = np.zeros((X.shape[0], self.n_classes)) 417 418 # if self.verbose == 1: 419 # pbar = Progbar(n_iter) 420 421 for idx, base_learner in self.base_learners_.items(): 422 preds = base_learner.predict(X, **kwargs) 423 424 ensemble_learner += self.alpha_[idx] * mo.one_hot_encode2( 425 preds, self.n_classes 426 ) 427 428 # if self.verbose == 1: 429 # pbar.update(idx) 430 431 # if self.verbose == 1: 432 # pbar.update(n_iter) 433 434 expit_ensemble_learner = expit(ensemble_learner) 435 436 sum_ensemble = expit_ensemble_learner.sum(axis=1) 437 438 return expit_ensemble_learner / sum_ensemble[:, None] 439 440 # if self.method == "SAMME.R": 441 ensemble_learner = 0 442 443 # if self.verbose == 1: 444 # pbar = Progbar(n_iter) 445 446 for idx, base_learner in self.base_learners_.items(): 447 probs = base_learner.predict_proba(X, **kwargs) 448 449 np.clip(a=probs, a_min=2.220446049250313e-16, a_max=1.0, out=probs) 450 451 log_preds_proba = np.log(probs) 452 453 ensemble_learner += ( 454 log_preds_proba - log_preds_proba.mean(axis=1)[:, None] 455 ) 456 457 # if self.verbose == 1: 458 # pbar.update(idx) 459 460 ensemble_learner *= self.n_classes - 1 461 462 # if self.verbose == 1: 463 # pbar.update(n_iter) 464 465 expit_ensemble_learner = expit(ensemble_learner) 466 467 sum_ensemble = expit_ensemble_learner.sum(axis=1) 468 469 return expit_ensemble_learner / sum_ensemble[:, None] 470 471 @property 472 def _estimator_type(self): 473 return "classifier"
AdaBoost Classification (SAMME) model class derived from class Boosting
Parameters:
obj: object
any object containing a method fit (obj.fit()) and a method predict
(obj.predict())
n_estimators: int
number of boosting iterations
learning_rate: float
learning rate of the boosting procedure
n_hidden_features: int
number of nodes in the hidden layer
reg_lambda: float
regularization parameter for weights
reg_alpha: float
controls compromize between l1 and l2 norm of weights
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not
(False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original predictors are included (True) in model's
fitting or not (False)
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
col_sample: float
percentage of covariates randomly chosen for training
row_sample: float
percentage of rows chosen for training, by stratified bootstrapping
seed: int
reproducibility seed for nodes_sim=='uniform'
verbose: int
0 for no output, 1 for a progress bar (default is 1)
method: str
type of Adaboost method, 'SAMME' (discrete) or 'SAMME.R' (real)
backend: str
"cpu" or "gpu" or "tpu"
Attributes:
alpha_: list
AdaBoost coefficients alpha_m
base_learners_: dict
a dictionary containing the base learners
Examples:
See also https://github.com/Techtonique/nnetsauce/blob/master/examples/adaboost_classification.py
import nnetsauce as ns
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics
from time import time
breast_cancer = load_breast_cancer()
Z = breast_cancer.data
t = breast_cancer.target
np.random.seed(123)
X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2)
# SAMME.R
clf = LogisticRegression(solver='liblinear', multi_class = 'ovr',
random_state=123)
fit_obj = ns.AdaBoostClassifier(clf,
n_hidden_features=int(11.22338867),
direct_link=True,
n_estimators=250, learning_rate=0.01126343,
col_sample=0.72684326, row_sample=0.86429443,
dropout=0.63078613, n_clusters=2,
type_clust="gmm",
verbose=1, seed = 123,
method="SAMME.R")
start = time()
fit_obj.fit(X_train, y_train)
print(f"Elapsed {time() - start}")
start = time()
print(fit_obj.score(X_test, y_test))
print(f"Elapsed {time() - start}")
preds = fit_obj.predict(X_test)
print(metrics.classification_report(preds, y_test))
216 def fit(self, X, y, sample_weight=None, **kwargs): 217 """Fit Boosting model to training data (X, y). 218 219 Parameters: 220 221 X: {array-like}, shape = [n_samples, n_features] 222 Training vectors, where n_samples is the number 223 of samples and n_features is the number of features. 224 225 y: array-like, shape = [n_samples] 226 Target values. 227 228 **kwargs: additional parameters to be passed to 229 self.cook_training_set or self.obj.fit 230 231 Returns: 232 233 self: object 234 """ 235 236 assert mx.is_factor(y), "y must contain only integers" 237 238 assert self.method in ( 239 "SAMME", 240 "SAMME.R", 241 ), "`method` must be either 'SAMME' or 'SAMME.R'" 242 243 assert (self.reg_lambda <= 1) & ( 244 self.reg_lambda >= 0 245 ), "must have self.reg_lambda <= 1 & self.reg_lambda >= 0" 246 247 assert (self.reg_alpha <= 1) & ( 248 self.reg_alpha >= 0 249 ), "must have self.reg_alpha <= 1 & self.reg_alpha >= 0" 250 251 # training 252 n, p = X.shape 253 self.n_classes = len(np.unique(y)) 254 self.classes_ = np.unique(y) # for compatibility with sklearn 255 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 256 257 if sample_weight is None: 258 w_m = np.repeat(1.0 / n, n) 259 else: 260 w_m = np.asarray(sample_weight) 261 262 base_learner = CustomClassifier( 263 self.obj, 264 n_hidden_features=self.n_hidden_features, 265 activation_name=self.activation_name, 266 a=self.a, 267 nodes_sim=self.nodes_sim, 268 bias=self.bias, 269 dropout=self.dropout, 270 direct_link=self.direct_link, 271 n_clusters=self.n_clusters, 272 type_clust=self.type_clust, 273 type_scaling=self.type_scaling, 274 col_sample=self.col_sample, 275 row_sample=self.row_sample, 276 seed=self.seed, 277 ) 278 279 if self.verbose == 1: 280 pbar = Progbar(self.n_estimators) 281 282 if self.method == "SAMME": 283 err_m = 1e6 284 err_bound = 1 - 1 / self.n_classes 285 self.alpha_.append(1.0) 286 x_range_n = range(n) 287 288 for m in range(self.n_estimators): 289 preds = base_learner.fit( 290 X, y, sample_weight=w_m.ravel(), **kwargs 291 ).predict(X) 292 293 self.base_learners_.update({m: deepcopy(base_learner)}) 294 295 cond = [y[i] != preds[i] for i in x_range_n] 296 297 err_m = max( 298 sum([elt[0] * elt[1] for elt in zip(cond, w_m)]), 299 2.220446049250313e-16, 300 ) # sum(w_m) == 1 301 302 if self.reg_lambda > 0: 303 err_m += self.reg_lambda * ( 304 (1 - self.reg_alpha) * 0.5 * sum([x**2 for x in w_m]) 305 + self.reg_alpha * sum([abs(x) for x in w_m]) 306 ) 307 308 err_m = min(err_m, err_bound) 309 310 alpha_m = self.learning_rate * log( 311 (self.n_classes - 1) * (1 - err_m) / err_m 312 ) 313 314 self.alpha_.append(alpha_m) 315 316 w_m_temp = [exp(alpha_m * cond[i]) for i in x_range_n] 317 318 sum_w_m = sum(w_m_temp) 319 320 w_m = np.asarray([w_m_temp[i] / sum_w_m for i in x_range_n]) 321 322 base_learner.set_params(seed=self.seed + (m + 1) * 1000) 323 324 if self.verbose == 1: 325 pbar.update(m) 326 327 if self.verbose == 1: 328 pbar.update(self.n_estimators) 329 330 self.n_estimators = len(self.base_learners_) 331 self.classes_ = np.unique(y) 332 333 return self 334 335 if self.method == "SAMME.R": 336 Y = mo.one_hot_encode2(y, self.n_classes) 337 338 if sample_weight is None: 339 w_m = np.repeat(1.0 / n, n) # (N, 1) 340 341 else: 342 w_m = np.asarray(sample_weight) 343 344 for m in range(self.n_estimators): 345 probs = base_learner.fit( 346 X, y, sample_weight=w_m.ravel(), **kwargs 347 ).predict_proba(X) 348 349 np.clip( 350 a=probs, a_min=2.220446049250313e-16, a_max=1.0, out=probs 351 ) 352 353 self.base_learners_.update({m: deepcopy(base_learner)}) 354 355 w_m *= np.exp( 356 -1.0 357 * self.learning_rate 358 * (1.0 - 1.0 / self.n_classes) 359 * xlogy(Y, probs).sum(axis=1) 360 ) 361 362 w_m /= np.sum(w_m) 363 364 base_learner.set_params(seed=self.seed + (m + 1) * 1000) 365 366 if self.verbose == 1: 367 pbar.update(m) 368 369 if self.verbose == 1: 370 pbar.update(self.n_estimators) 371 372 self.n_estimators = len(self.base_learners_) 373 self.classes_ = np.unique(y) 374 375 return self
Fit Boosting model to training data (X, y).
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
377 def predict(self, X, **kwargs): 378 """Predict test data X. 379 380 Parameters: 381 382 X: {array-like}, shape = [n_samples, n_features] 383 Training vectors, where n_samples is the number 384 of samples and n_features is the number of features. 385 386 **kwargs: additional parameters to be passed to 387 self.cook_test_set 388 389 Returns: 390 391 model predictions: {array-like} 392 """ 393 return self.predict_proba(X, **kwargs).argmax(axis=1)
Predict test data X.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
395 def predict_proba(self, X, **kwargs): 396 """Predict probabilities for test data X. 397 398 Parameters: 399 400 X: {array-like}, shape = [n_samples, n_features] 401 Training vectors, where n_samples is the number 402 of samples and n_features is the number of features. 403 404 **kwargs: additional parameters to be passed to 405 self.cook_test_set 406 407 Returns: 408 409 probability estimates for test data: {array-like} 410 411 """ 412 413 n_iter = len(self.base_learners_) 414 415 if self.method == "SAMME": 416 ensemble_learner = np.zeros((X.shape[0], self.n_classes)) 417 418 # if self.verbose == 1: 419 # pbar = Progbar(n_iter) 420 421 for idx, base_learner in self.base_learners_.items(): 422 preds = base_learner.predict(X, **kwargs) 423 424 ensemble_learner += self.alpha_[idx] * mo.one_hot_encode2( 425 preds, self.n_classes 426 ) 427 428 # if self.verbose == 1: 429 # pbar.update(idx) 430 431 # if self.verbose == 1: 432 # pbar.update(n_iter) 433 434 expit_ensemble_learner = expit(ensemble_learner) 435 436 sum_ensemble = expit_ensemble_learner.sum(axis=1) 437 438 return expit_ensemble_learner / sum_ensemble[:, None] 439 440 # if self.method == "SAMME.R": 441 ensemble_learner = 0 442 443 # if self.verbose == 1: 444 # pbar = Progbar(n_iter) 445 446 for idx, base_learner in self.base_learners_.items(): 447 probs = base_learner.predict_proba(X, **kwargs) 448 449 np.clip(a=probs, a_min=2.220446049250313e-16, a_max=1.0, out=probs) 450 451 log_preds_proba = np.log(probs) 452 453 ensemble_learner += ( 454 log_preds_proba - log_preds_proba.mean(axis=1)[:, None] 455 ) 456 457 # if self.verbose == 1: 458 # pbar.update(idx) 459 460 ensemble_learner *= self.n_classes - 1 461 462 # if self.verbose == 1: 463 # pbar.update(n_iter) 464 465 expit_ensemble_learner = expit(ensemble_learner) 466 467 sum_ensemble = expit_ensemble_learner.sum(axis=1) 468 469 return expit_ensemble_learner / sum_ensemble[:, None]
Predict probabilities for test data X.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
probability estimates for test data: {array-like}
48class Base(BaseEstimator): 49 """Base model from which all the other classes inherit. 50 51 This class contains the most important data preprocessing/feature engineering methods. 52 53 Parameters: 54 55 n_hidden_features: int 56 number of nodes in the hidden layer 57 58 activation_name: str 59 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 60 61 a: float 62 hyperparameter for 'prelu' or 'elu' activation function 63 64 nodes_sim: str 65 type of simulation for hidden layer nodes: 'sobol', 'hammersley', 'halton', 66 'uniform' 67 68 bias: boolean 69 indicates if the hidden layer contains a bias term (True) or 70 not (False) 71 72 dropout: float 73 regularization parameter; (random) percentage of nodes dropped out 74 of the training 75 76 direct_link: boolean 77 indicates if the original features are included (True) in model's 78 fitting or not (False) 79 80 n_clusters: int 81 number of clusters for type_clust='kmeans' or type_clust='gmm' 82 clustering (could be 0: no clustering) 83 84 cluster_encode: bool 85 defines how the variable containing clusters is treated (default is one-hot); 86 if `False`, then labels are used, without one-hot encoding 87 88 type_clust: str 89 type of clustering method: currently k-means ('kmeans') or Gaussian 90 Mixture Model ('gmm') 91 92 type_scaling: a tuple of 3 strings 93 scaling methods for inputs, hidden layer, and clustering respectively 94 (and when relevant). 95 Currently available: standardization ('std') or MinMax scaling ('minmax') or robust scaling ('robust') or max absolute scaling ('maxabs') 96 97 col_sample: float 98 percentage of features randomly chosen for training 99 100 row_sample: float 101 percentage of rows chosen for training, by stratified bootstrapping 102 103 seed: int 104 reproducibility seed for nodes_sim=='uniform', clustering and dropout 105 106 backend: str 107 "cpu" or "gpu" or "tpu" 108 109 """ 110 111 # construct the object ----- 112 113 def __init__( 114 self, 115 n_hidden_features=5, 116 activation_name="relu", 117 a=0.01, 118 nodes_sim="sobol", 119 bias=True, 120 dropout=0, 121 direct_link=True, 122 n_clusters=2, 123 cluster_encode=True, 124 type_clust="kmeans", 125 type_scaling=("std", "std", "std"), 126 col_sample=1, 127 row_sample=1, 128 seed=123, 129 backend="cpu", 130 ): 131 # input checks ----- 132 133 sys_platform = platform.system() 134 135 if (sys_platform == "Windows") and (backend in ("gpu", "tpu")): 136 warnings.warn( 137 "No GPU/TPU computing on Windows yet, backend set to 'cpu'" 138 ) 139 backend = "cpu" 140 141 assert activation_name in ( 142 "relu", 143 "tanh", 144 "sigmoid", 145 "prelu", 146 "elu", 147 ), "'activation_name' must be in ('relu', 'tanh', 'sigmoid','prelu', 'elu')" 148 149 assert nodes_sim in ( 150 "sobol", 151 "hammersley", 152 "uniform", 153 "halton", 154 ), "'nodes_sim' must be in ('sobol', 'hammersley', 'uniform', 'halton')" 155 156 assert type_clust in ( 157 "kmeans", 158 "gmm", 159 ), "'type_clust' must be in ('kmeans', 'gmm')" 160 161 assert (len(type_scaling) == 3) & all( 162 type_scaling[i] in ("minmax", "std", "robust", "maxabs") 163 for i in range(len(type_scaling)) 164 ), "'type_scaling' must have length 3, and available scaling methods are 'minmax' scaling, standardization ('std'), robust scaling ('robust') and max absolute ('maxabs')" 165 166 assert (col_sample >= 0) & ( 167 col_sample <= 1 168 ), "'col_sample' must be comprised between 0 and 1 (both included)" 169 170 assert backend in ( 171 "cpu", 172 "gpu", 173 "tpu", 174 ), "must have 'backend' in ('cpu', 'gpu', 'tpu')" 175 176 self.n_hidden_features = n_hidden_features 177 self.activation_name = activation_name 178 self.a = a 179 self.nodes_sim = nodes_sim 180 self.bias = bias 181 self.seed = seed 182 self.backend = backend 183 self.dropout = dropout 184 self.direct_link = direct_link 185 self.cluster_encode = cluster_encode 186 self.type_clust = type_clust 187 self.type_scaling = type_scaling 188 self.col_sample = col_sample 189 self.row_sample = row_sample 190 self.n_clusters = n_clusters 191 if isinstance(self, RegressorMixin): 192 self.type_fit = "regression" 193 elif isinstance(self, ClassifierMixin): 194 self.type_fit = "classification" 195 self.subsampler_ = None 196 self.index_col_ = None 197 self.index_row_ = True 198 self.clustering_obj_ = None 199 self.clustering_scaler_ = None 200 self.nn_scaler_ = None 201 self.scaler_ = None 202 self.encoder_ = None 203 self.W_ = None 204 self.X_ = None 205 self.y_ = None 206 self.y_mean_ = None 207 self.beta_ = None 208 209 # activation function ----- 210 if sys_platform in ("Linux", "Darwin"): 211 activation_options = { 212 "relu": ac.relu if (self.backend == "cpu") else jnn.relu, 213 "tanh": np.tanh if (self.backend == "cpu") else jnp.tanh, 214 "sigmoid": ( 215 ac.sigmoid if (self.backend == "cpu") else jnn.sigmoid 216 ), 217 "prelu": partial(ac.prelu, a=a), 218 "elu": ( 219 partial(ac.elu, a=a) 220 if (self.backend == "cpu") 221 else partial(jnn.elu, a=a) 222 ), 223 } 224 else: # on Windows currently, no JAX 225 activation_options = { 226 "relu": ( 227 ac.relu if (self.backend == "cpu") else NotImplementedError 228 ), 229 "tanh": ( 230 np.tanh if (self.backend == "cpu") else NotImplementedError 231 ), 232 "sigmoid": ( 233 ac.sigmoid 234 if (self.backend == "cpu") 235 else NotImplementedError 236 ), 237 "prelu": partial(ac.prelu, a=a), 238 "elu": ( 239 partial(ac.elu, a=a) 240 if (self.backend == "cpu") 241 else NotImplementedError 242 ), 243 } 244 self.activation_func = activation_options[activation_name] 245 246 # "preprocessing" methods to be inherited ----- 247 248 def encode_clusters(self, X=None, predict=False, scaler=None, **kwargs): # 249 """Create new covariates with kmeans or GMM clustering 250 251 Parameters: 252 253 X: {array-like}, shape = [n_samples, n_features] 254 Training vectors, where n_samples is the number 255 of samples and n_features is the number of features. 256 257 predict: boolean 258 is False on training set and True on test set 259 260 scaler: {object} of class StandardScaler, MinMaxScaler, RobustScaler or MaxAbsScaler 261 if scaler has already been fitted on training data (online training), it can be passed here 262 263 **kwargs: 264 additional parameters to be passed to the 265 clustering method 266 267 Returns: 268 269 Clusters' matrix, one-hot encoded: {array-like} 270 271 """ 272 273 np.random.seed(self.seed) 274 275 if X is None: 276 X = self.X_ 277 278 if isinstance(X, pd.DataFrame): 279 X = copy.deepcopy(X.values.astype(float)) 280 281 if len(X.shape) == 1: 282 X = X.reshape(1, -1) 283 284 if predict is False: # encode training set 285 286 # scale input data before clustering 287 self.clustering_scaler_, scaled_X = mo.scale_covariates( 288 X, choice=self.type_scaling[2], scaler=self.clustering_scaler_ 289 ) 290 291 self.clustering_obj_, X_clustered = mo.cluster_covariates( 292 scaled_X, 293 self.n_clusters, 294 self.seed, 295 type_clust=self.type_clust, 296 **kwargs 297 ) 298 299 if self.cluster_encode: 300 return mo.one_hot_encode(X_clustered, self.n_clusters).astype( 301 np.float16 302 ) 303 304 return X_clustered.astype(np.float16) 305 306 # if predict == True, encode test set 307 X_clustered = self.clustering_obj_.predict( 308 self.clustering_scaler_.transform(X) 309 ) 310 311 if self.cluster_encode == True: 312 return mo.one_hot_encode(X_clustered, self.n_clusters).astype( 313 np.float16 314 ) 315 316 return X_clustered.astype(np.float16) 317 318 def create_layer(self, scaled_X, W=None): 319 """Create hidden layer. 320 321 Parameters: 322 323 scaled_X: {array-like}, shape = [n_samples, n_features] 324 Training vectors, where n_samples is the number 325 of samples and n_features is the number of features 326 327 W: {array-like}, shape = [n_features, hidden_features] 328 if provided, constructs the hidden layer with W; otherwise computed internally 329 330 Returns: 331 332 Hidden layer matrix: {array-like} 333 334 """ 335 336 n_features = scaled_X.shape[1] 337 338 # hash_sim = { 339 # "sobol": generate_sobol, 340 # "hammersley": generate_hammersley, 341 # "uniform": generate_uniform, 342 # "halton": generate_halton 343 # } 344 345 if self.bias is False: # no bias term in the hidden layer 346 if W is None: 347 if self.nodes_sim == "sobol": 348 self.W_ = generate_sobol( 349 n_dims=n_features, 350 n_points=self.n_hidden_features, 351 seed=self.seed, 352 ) 353 elif self.nodes_sim == "hammersley": 354 self.W_ = generate_hammersley( 355 n_dims=n_features, 356 n_points=self.n_hidden_features, 357 seed=self.seed, 358 ) 359 elif self.nodes_sim == "uniform": 360 self.W_ = generate_uniform( 361 n_dims=n_features, 362 n_points=self.n_hidden_features, 363 seed=self.seed, 364 ) 365 else: 366 self.W_ = generate_halton( 367 n_dims=n_features, 368 n_points=self.n_hidden_features, 369 seed=self.seed, 370 ) 371 372 assert ( 373 scaled_X.shape[1] == self.W_.shape[0] 374 ), "check dimensions of covariates X and matrix W" 375 376 return mo.dropout( 377 x=self.activation_func( 378 mo.safe_sparse_dot( 379 a=scaled_X, b=self.W_, backend=self.backend 380 ) 381 ), 382 drop_prob=self.dropout, 383 seed=self.seed, 384 ) 385 386 # W is not none 387 assert ( 388 scaled_X.shape[1] == W.shape[0] 389 ), "check dimensions of covariates X and matrix W" 390 391 # self.W_ = W 392 return mo.dropout( 393 x=self.activation_func( 394 mo.safe_sparse_dot(a=scaled_X, b=W, backend=self.backend) 395 ), 396 drop_prob=self.dropout, 397 seed=self.seed, 398 ) 399 400 # with bias term in the hidden layer 401 if W is None: 402 n_features_1 = n_features + 1 403 404 if self.nodes_sim == "sobol": 405 self.W_ = generate_sobol( 406 n_dims=n_features_1, 407 n_points=self.n_hidden_features, 408 seed=self.seed, 409 ) 410 elif self.nodes_sim == "hammersley": 411 self.W_ = generate_hammersley( 412 n_dims=n_features_1, 413 n_points=self.n_hidden_features, 414 seed=self.seed, 415 ) 416 elif self.nodes_sim == "uniform": 417 self.W_ = generate_uniform( 418 n_dims=n_features_1, 419 n_points=self.n_hidden_features, 420 seed=self.seed, 421 ) 422 else: 423 self.W_ = generate_halton( 424 n_dims=n_features_1, 425 n_points=self.n_hidden_features, 426 seed=self.seed, 427 ) 428 429 # self.W_ = hash_sim[self.nodes_sim]( 430 # n_dims=n_features_1, 431 # n_points=self.n_hidden_features, 432 # seed=self.seed, 433 # ) 434 435 return mo.dropout( 436 x=self.activation_func( 437 mo.safe_sparse_dot( 438 a=mo.cbind( 439 np.ones(scaled_X.shape[0]), 440 scaled_X, 441 backend=self.backend, 442 ), 443 b=self.W_, 444 backend=self.backend, 445 ) 446 ), 447 drop_prob=self.dropout, 448 seed=self.seed, 449 ) 450 451 # W is not None 452 # self.W_ = W 453 return mo.dropout( 454 x=self.activation_func( 455 mo.safe_sparse_dot( 456 a=mo.cbind( 457 np.ones(scaled_X.shape[0]), 458 scaled_X, 459 backend=self.backend, 460 ), 461 b=W, 462 backend=self.backend, 463 ) 464 ), 465 drop_prob=self.dropout, 466 seed=self.seed, 467 ) 468 469 def _jax_create_layer( 470 self, scaled_X: jnp.ndarray, W: Optional[jnp.ndarray] = None 471 ) -> jnp.ndarray: 472 """JAX-compatible version of create_layer that exactly matches the original functionality.""" 473 key = jax.random.PRNGKey(self.seed) 474 n_features = scaled_X.shape[1] 475 476 # Generate weights if not provided 477 if W is None: 478 if self.bias: 479 n_features_1 = n_features + 1 480 shape = (n_features_1, self.n_hidden_features) 481 else: 482 shape = (n_features, self.n_hidden_features) 483 484 # JAX-compatible weight generation matching original behavior 485 if self.nodes_sim == "sobol": 486 W_np = generate_sobol( 487 n_dims=n_features_1, 488 n_points=self.n_hidden_features, 489 seed=self.seed, 490 ) 491 W = jnp.asarray(W_np) 492 elif self.nodes_sim == "hammersley": 493 W_np = generate_hammersley( 494 n_dims=n_features_1, 495 n_points=self.n_hidden_features, 496 seed=self.seed, 497 ) 498 W = jnp.asarray(W_np) 499 elif self.nodes_sim == "uniform": 500 key, subkey = jax.random.split(key) 501 W = jax.random.uniform( 502 subkey, shape=shape, minval=-1.0, maxval=1.0 503 ) 504 else: # halton 505 W_np = generate_halton( 506 n_dims=n_features_1, 507 n_points=self.n_hidden_features, 508 seed=self.seed, 509 ) 510 W = jnp.asarray(W_np) 511 512 self.W_ = np.array(W) # Store as numpy for original methods 513 514 # Prepare input with bias if needed 515 if self.bias: 516 X_with_bias = jnp.hstack( 517 [jnp.ones((scaled_X.shape[0], 1)), scaled_X] 518 ) 519 print("X_with_bias shape:", X_with_bias.shape) 520 print("W shape:", W.shape) 521 linear_output = jnp.dot(X_with_bias, W) 522 else: 523 linear_output = jnp.dot(scaled_X, W) 524 525 # Apply activation function 526 if self.activation_name == "relu": 527 activated = jax.nn.relu(linear_output) 528 elif self.activation_name == "tanh": 529 activated = jnp.tanh(linear_output) 530 elif self.activation_name == "sigmoid": 531 activated = jax.nn.sigmoid(linear_output) 532 else: # leaky relu 533 activated = jax.nn.leaky_relu(linear_output, negative_slope=self.a) 534 535 # Apply dropout 536 if self.dropout > 0: 537 key, subkey = jax.random.split(key) 538 mask = jax.random.bernoulli( 539 subkey, p=1 - self.dropout, shape=activated.shape 540 ) 541 activated = jnp.where(mask, activated / (1 - self.dropout), 0) 542 543 return activated 544 545 def cook_training_set(self, y=None, X=None, W=None, **kwargs): 546 """Create new hidden features for training set, with hidden layer, center the response. 547 548 Parameters: 549 550 y: array-like, shape = [n_samples] 551 Target values 552 553 X: {array-like}, shape = [n_samples, n_features] 554 Training vectors, where n_samples is the number 555 of samples and n_features is the number of features 556 557 W: {array-like}, shape = [n_features, hidden_features] 558 if provided, constructs the hidden layer via W 559 560 Returns: 561 562 (centered response, direct link + hidden layer matrix): {tuple} 563 564 """ 565 566 # either X and y are stored or not 567 # assert ((y is None) & (X is None)) | ((y is not None) & (X is not None)) 568 if self.n_hidden_features > 0: # has a hidden layer 569 assert ( 570 len(self.type_scaling) >= 2 571 ), "must have len(self.type_scaling) >= 2 when self.n_hidden_features > 0" 572 573 if X is None: 574 575 if self.col_sample == 1: 576 input_X = self.X_ 577 else: 578 n_features = self.X_.shape[1] 579 new_n_features = int(np.ceil(n_features * self.col_sample)) 580 assert ( 581 new_n_features >= 1 582 ), "check class attribute 'col_sample' and the number of covariates provided for X" 583 np.random.seed(self.seed) 584 index_col = np.random.choice( 585 range(n_features), size=new_n_features, replace=False 586 ) 587 self.index_col_ = index_col 588 input_X = self.X_[:, self.index_col_] 589 590 else: # X is not None # keep X vs self.X_ 591 592 if isinstance(X, pd.DataFrame): 593 X = copy.deepcopy(X.values.astype(float)) 594 595 if self.col_sample == 1: 596 input_X = X 597 else: 598 n_features = X.shape[1] 599 new_n_features = int(np.ceil(n_features * self.col_sample)) 600 assert ( 601 new_n_features >= 1 602 ), "check class attribute 'col_sample' and the number of covariates provided for X" 603 np.random.seed(self.seed) 604 index_col = np.random.choice( 605 range(n_features), size=new_n_features, replace=False 606 ) 607 self.index_col_ = index_col 608 input_X = X[:, self.index_col_] 609 610 if self.n_clusters <= 0: 611 # data without any clustering: self.n_clusters is None ----- 612 613 if self.n_hidden_features > 0: # with hidden layer 614 615 self.nn_scaler_, scaled_X = mo.scale_covariates( 616 input_X, choice=self.type_scaling[1], scaler=self.nn_scaler_ 617 ) 618 Phi_X = ( 619 self.create_layer(scaled_X) 620 if W is None 621 else self.create_layer(scaled_X, W=W) 622 ) 623 Z = ( 624 mo.cbind(input_X, Phi_X, backend=self.backend) 625 if self.direct_link is True 626 else Phi_X 627 ) 628 self.scaler_, scaled_Z = mo.scale_covariates( 629 Z, choice=self.type_scaling[0], scaler=self.scaler_ 630 ) 631 else: # no hidden layer 632 Z = input_X 633 self.scaler_, scaled_Z = mo.scale_covariates( 634 Z, choice=self.type_scaling[0], scaler=self.scaler_ 635 ) 636 637 else: 638 639 # data with clustering: self.n_clusters is not None ----- # keep 640 641 augmented_X = mo.cbind( 642 input_X, 643 self.encode_clusters(input_X, **kwargs), 644 backend=self.backend, 645 ) 646 647 if self.n_hidden_features > 0: # with hidden layer 648 649 self.nn_scaler_, scaled_X = mo.scale_covariates( 650 augmented_X, 651 choice=self.type_scaling[1], 652 scaler=self.nn_scaler_, 653 ) 654 Phi_X = ( 655 self.create_layer(scaled_X) 656 if W is None 657 else self.create_layer(scaled_X, W=W) 658 ) 659 Z = ( 660 mo.cbind(augmented_X, Phi_X, backend=self.backend) 661 if self.direct_link is True 662 else Phi_X 663 ) 664 self.scaler_, scaled_Z = mo.scale_covariates( 665 Z, choice=self.type_scaling[0], scaler=self.scaler_ 666 ) 667 else: # no hidden layer 668 Z = augmented_X 669 self.scaler_, scaled_Z = mo.scale_covariates( 670 Z, choice=self.type_scaling[0], scaler=self.scaler_ 671 ) 672 673 # Returning model inputs ----- 674 if mx.is_factor(y) is False: # regression 675 # center y 676 if y is None: 677 self.y_mean_, centered_y = mo.center_response(self.y_) 678 else: 679 self.y_mean_, centered_y = mo.center_response(y) 680 681 # y is subsampled 682 if self.row_sample < 1: 683 n, p = Z.shape 684 685 self.subsampler_ = ( 686 SubSampler( 687 y=self.y_, row_sample=self.row_sample, seed=self.seed 688 ) 689 if y is None 690 else SubSampler( 691 y=y, row_sample=self.row_sample, seed=self.seed 692 ) 693 ) 694 695 self.index_row_ = self.subsampler_.subsample() 696 697 n_row_sample = len(self.index_row_) 698 # regression 699 return ( 700 centered_y[self.index_row_].reshape(n_row_sample), 701 self.scaler_.transform( 702 Z[self.index_row_, :].reshape(n_row_sample, p) 703 ), 704 ) 705 # y is not subsampled 706 # regression 707 return (centered_y, self.scaler_.transform(Z)) 708 709 # classification 710 # y is subsampled 711 if self.row_sample < 1: 712 n, p = Z.shape 713 714 self.subsampler_ = ( 715 SubSampler( 716 y=self.y_, row_sample=self.row_sample, seed=self.seed 717 ) 718 if y is None 719 else SubSampler(y=y, row_sample=self.row_sample, seed=self.seed) 720 ) 721 722 self.index_row_ = self.subsampler_.subsample() 723 724 n_row_sample = len(self.index_row_) 725 # classification 726 return ( 727 y[self.index_row_].reshape(n_row_sample), 728 self.scaler_.transform( 729 Z[self.index_row_, :].reshape(n_row_sample, p) 730 ), 731 ) 732 # y is not subsampled 733 # classification 734 return (y, self.scaler_.transform(Z)) 735 736 def cook_test_set(self, X, **kwargs): 737 """Transform data from test set, with hidden layer. 738 739 Parameters: 740 741 X: {array-like}, shape = [n_samples, n_features] 742 Training vectors, where n_samples is the number 743 of samples and n_features is the number of features 744 745 **kwargs: additional parameters to be passed to self.encode_cluster 746 747 Returns: 748 749 Transformed test set : {array-like} 750 """ 751 752 if isinstance(X, pd.DataFrame): 753 X = copy.deepcopy(X.values.astype(float)) 754 755 if len(X.shape) == 1: 756 X = X.reshape(1, -1) 757 758 if ( 759 self.n_clusters == 0 760 ): # data without clustering: self.n_clusters is None ----- 761 if self.n_hidden_features > 0: 762 # if hidden layer 763 scaled_X = ( 764 self.nn_scaler_.transform(X) 765 if (self.col_sample == 1) 766 else self.nn_scaler_.transform(X[:, self.index_col_]) 767 ) 768 Phi_X = self.create_layer(scaled_X, self.W_) 769 if self.direct_link: 770 return self.scaler_.transform( 771 mo.cbind(scaled_X, Phi_X, backend=self.backend) 772 ) 773 # when self.direct_link == False 774 return self.scaler_.transform(Phi_X) 775 # if no hidden layer # self.n_hidden_features == 0 776 return self.scaler_.transform(X) 777 778 # data with clustering: self.n_clusters > 0 ----- 779 if self.col_sample == 1: 780 predicted_clusters = self.encode_clusters( 781 X=X, predict=True, **kwargs 782 ) 783 augmented_X = mo.cbind(X, predicted_clusters, backend=self.backend) 784 else: 785 predicted_clusters = self.encode_clusters( 786 X=X[:, self.index_col_], predict=True, **kwargs 787 ) 788 augmented_X = mo.cbind( 789 X[:, self.index_col_], predicted_clusters, backend=self.backend 790 ) 791 792 if self.n_hidden_features > 0: # if hidden layer 793 scaled_X = self.nn_scaler_.transform(augmented_X) 794 Phi_X = self.create_layer(scaled_X, self.W_) 795 if self.direct_link: 796 return self.scaler_.transform( 797 mo.cbind(augmented_X, Phi_X, backend=self.backend) 798 ) 799 return self.scaler_.transform(Phi_X) 800 801 # if no hidden layer 802 return self.scaler_.transform(augmented_X) 803 804 def cook_training_set_jax(self, y=None, X=None, W=None, **kwargs): 805 """JAX-compatible version of cook_training_set that maintains side effects.""" 806 # Initialize random key 807 key = jax.random.PRNGKey(self.seed) 808 809 # Convert inputs to JAX arrays 810 X = jnp.asarray(X) if X is not None else jnp.asarray(self.X_) 811 y = jnp.asarray(y) if y is not None else jnp.asarray(self.y_) 812 813 # Handle column sampling 814 if self.col_sample < 1: 815 n_features = X.shape[1] 816 new_n_features = int(jnp.ceil(n_features * self.col_sample)) 817 assert new_n_features >= 1, "Invalid col_sample" 818 819 key, subkey = jax.random.split(key) 820 index_col = jax.random.choice( 821 subkey, n_features, shape=(new_n_features,), replace=False 822 ) 823 self.index_col_ = np.array( 824 index_col 825 ) # Store as numpy for original methods 826 input_X = X[:, index_col] 827 n_features = ( 828 new_n_features # Update n_features after column sampling 829 ) 830 else: 831 input_X = X 832 n_features = X.shape[1] 833 834 augmented_X = input_X 835 836 # JAX-compatible scaling 837 def jax_scale(data, mean=None, std=None): 838 if mean is None: 839 mean = jnp.mean(data, axis=0) 840 if std is None: 841 std = jnp.std(data, axis=0) 842 return (data - mean) / (std + 1e-10), mean, std 843 844 # Hidden layer processing 845 if self.n_hidden_features > 0: 846 # Initialize weights if not provided 847 if W is None: 848 shape = (n_features, self.n_hidden_features) 849 850 # JAX-compatible weight generation 851 if self.nodes_sim == "uniform": 852 key, subkey = jax.random.split(key) 853 W = jax.random.uniform( 854 subkey, shape=shape, minval=-1.0, maxval=1.0 855 ) * (1 / jnp.sqrt(n_features)) 856 else: 857 # For other sequences, use numpy generation then convert to JAX 858 if self.nodes_sim == "sobol": 859 W_np = generate_sobol( 860 n_dims=shape[0], 861 n_points=shape[1], 862 seed=self.seed, 863 ) 864 elif self.nodes_sim == "hammersley": 865 W_np = generate_hammersley( 866 n_dims=shape[0], 867 n_points=shape[1], 868 seed=self.seed, 869 ) 870 elif self.nodes_sim == "halton": 871 W_np = generate_halton( 872 n_dims=shape[0], 873 n_points=shape[1], 874 seed=self.seed, 875 ) 876 else: # default to uniform 877 key, subkey = jax.random.split(key) 878 W = jax.random.uniform( 879 subkey, shape=shape, minval=-1.0, maxval=1.0 880 ) * (1 / jnp.sqrt(n_features)) 881 882 if self.nodes_sim in ["sobol", "hammersley", "halton"]: 883 W = jnp.asarray(W_np) * (1 / jnp.sqrt(n_features)) 884 885 self.W_ = np.array(W) # Store as numpy for original methods 886 887 # Scale features 888 scaled_X, self.nn_mean_, self.nn_std_ = jax_scale( 889 augmented_X, 890 getattr(self, "nn_mean_", None), 891 getattr(self, "nn_std_", None), 892 ) 893 894 # Create hidden layer with proper bias handling 895 linear_output = jnp.dot(scaled_X, W) 896 897 # Apply activation 898 if self.activation_name == "relu": 899 Phi_X = jax.nn.relu(linear_output) 900 elif self.activation_name == "tanh": 901 Phi_X = jnp.tanh(linear_output) 902 elif self.activation_name == "sigmoid": 903 Phi_X = jax.nn.sigmoid(linear_output) 904 else: # leaky relu 905 Phi_X = jax.nn.leaky_relu(linear_output, negative_slope=self.a) 906 907 # Apply dropout 908 if self.dropout > 0: 909 key, subkey = jax.random.split(key) 910 mask = jax.random.bernoulli( 911 subkey, p=1 - self.dropout, shape=Phi_X.shape 912 ) 913 Phi_X = jnp.where(mask, Phi_X / (1 - self.dropout), 0) 914 915 Z = jnp.hstack([scaled_X, Phi_X]) if self.direct_link else Phi_X 916 else: 917 Z = augmented_X 918 919 # Final scaling 920 scaled_Z, self.scale_mean_, self.scale_std_ = jax_scale( 921 Z, 922 getattr(self, "scale_mean_", None), 923 getattr(self, "scale_std_", None), 924 ) 925 926 # Center response for regression 927 if not hasattr(mx, "is_factor") or not mx.is_factor( 928 y 929 ): # regression case 930 self.y_mean_ = float( 931 jnp.mean(y) 932 ) # Convert to Python float for compatibility 933 centered_y = y - self.y_mean_ 934 else: 935 centered_y = y 936 937 # Handle row sampling 938 if self.row_sample < 1: 939 key, subkey = jax.random.split(key) 940 n_samples = Z.shape[0] 941 n_row_sample = int(jnp.ceil(n_samples * self.row_sample)) 942 index_row = jax.random.choice( 943 subkey, n_samples, shape=(n_row_sample,), replace=False 944 ) 945 self.index_row_ = np.array( 946 index_row 947 ) # Store as numpy for original methods 948 return (centered_y[index_row], scaled_Z[index_row]) 949 950 return (centered_y, scaled_Z) 951 952 def cook_test_set_jax(self, X, **kwargs): 953 """JAX-compatible test set processing with matching dimension handling.""" 954 X = jnp.asarray(X) 955 956 if len(X.shape) == 1: 957 X = X.reshape(1, -1) 958 959 # Handle column sampling 960 input_X = ( 961 X if self.col_sample == 1 else X[:, jnp.asarray(self.index_col_)] 962 ) 963 964 augmented_X = input_X 965 966 # JAX-compatible scaling 967 scaled_X = (augmented_X - self.nn_mean_) / (self.nn_std_ + 1e-10) 968 969 # Process hidden layer if needed 970 if self.n_hidden_features > 0: 971 Phi_X = self._jax_create_layer(scaled_X, jnp.asarray(self.W_)) 972 Z = jnp.hstack([scaled_X, Phi_X]) if self.direct_link else Phi_X 973 else: 974 Z = augmented_X 975 976 # Final scaling 977 scaled_Z = (Z - self.scale_mean_) / (self.scale_std_ + 1e-10) 978 979 return scaled_Z 980 981 def _jax_create_layer(self, X, W): 982 """JAX-compatible hidden layer creation.""" 983 # print("X", X.shape) 984 # print("W", W.shape) 985 # print("self.W_", self.W_.shape) 986 linear_output = jnp.dot(X, W) 987 988 if self.activation_name == "relu": 989 return jax.nn.relu(linear_output) 990 elif self.activation_name == "tanh": 991 return jnp.tanh(linear_output) 992 elif self.activation_name == "sigmoid": 993 return jax.nn.sigmoid(linear_output) 994 else: # leaky relu 995 return jax.nn.leaky_relu(linear_output, negative_slope=self.a) 996 997 def cross_val_score( 998 self, 999 X, 1000 y, 1001 cv=5, 1002 scoring="accuracy", 1003 random_state=42, 1004 n_jobs=-1, 1005 epsilon=0.5, 1006 penalized=True, 1007 objective="abs", 1008 **kwargs 1009 ): 1010 """ 1011 Penalized Cross-validation score for a model. 1012 1013 Parameters: 1014 1015 X: {array-like}, shape = [n_samples, n_features] 1016 Training vectors, where n_samples is the number 1017 of samples and n_features is the number of features 1018 1019 y: array-like, shape = [n_samples] 1020 Target values 1021 1022 X_test: {array-like}, shape = [n_samples, n_features] 1023 Test vectors, where n_samples is the number 1024 of samples and n_features is the number of features 1025 1026 y_test: array-like, shape = [n_samples] 1027 Target values 1028 1029 cv: int 1030 Number of folds 1031 1032 scoring: str 1033 Scoring metric 1034 1035 random_state: int 1036 Random state 1037 1038 n_jobs: int 1039 Number of jobs to run in parallel 1040 1041 epsilon: float 1042 Penalty parameter 1043 1044 penalized: bool 1045 Whether to obtain penalized cross-validation score or not 1046 1047 objective: str 1048 'abs': Minimize the absolute difference between cross-validation score and validation score 1049 'relative': Minimize the relative difference between cross-validation score and validation score 1050 Returns: 1051 1052 A namedtuple with the following fields: 1053 - cv_score: float 1054 cross-validation score 1055 - val_score: float 1056 validation score 1057 - penalized_score: float 1058 penalized cross-validation score: cv_score / val_score + epsilon*(1/val_score + 1/cv_score) 1059 If higher scoring metric is better, minimize the function result. 1060 If lower scoring metric is better, maximize the function result. 1061 """ 1062 if scoring == "accuracy": 1063 scoring_func = accuracy_score 1064 elif scoring == "balanced_accuracy": 1065 scoring_func = balanced_accuracy_score 1066 elif scoring == "f1": 1067 scoring_func = f1_score 1068 elif scoring == "roc_auc": 1069 scoring_func = roc_auc_score 1070 elif scoring == "r2": 1071 scoring_func = r2_score 1072 elif scoring == "mse": 1073 scoring_func = mean_squared_error 1074 elif scoring == "mae": 1075 scoring_func = mean_absolute_error 1076 elif scoring == "mape": 1077 scoring_func = mean_absolute_percentage_error 1078 elif scoring == "rmse": 1079 1080 def scoring_func(y_true, y_pred): 1081 return np.sqrt(mean_squared_error(y_true, y_pred)) 1082 1083 X_train, X_val, y_train, y_val = train_test_split( 1084 X, y, test_size=0.2, random_state=random_state 1085 ) 1086 1087 res = cross_val_score( 1088 self, X_train, y_train, cv=cv, scoring=scoring, n_jobs=n_jobs 1089 ) # cross-validation error 1090 1091 if penalized == False: 1092 return res 1093 1094 DescribeResult = namedtuple( 1095 "DescribeResult", ["cv_score", "val_score", "penalized_score"] 1096 ) 1097 1098 numerator = res.mean() 1099 1100 # Evaluate on the (cv+1)-th fold 1101 preds_val = self.fit(X_train, y_train).predict(X_val) 1102 try: 1103 denominator = scoring(y_val, preds_val) # validation error 1104 except Exception as e: 1105 denominator = scoring_func(y_val, preds_val) 1106 1107 # if higher is better 1108 if objective == "abs": 1109 penalized_score = np.abs(numerator - denominator) + epsilon * ( 1110 1 / denominator + 1 / numerator 1111 ) 1112 elif objective == "relative": 1113 ratio = numerator / denominator 1114 penalized_score = np.abs(ratio - 1) + epsilon * ( 1115 1 / denominator + 1 / numerator 1116 ) 1117 1118 return DescribeResult( 1119 cv_score=numerator, 1120 val_score=denominator, 1121 penalized_score=penalized_score, 1122 )
Base model from which all the other classes inherit.
This class contains the most important data preprocessing/feature engineering methods.
Parameters:
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for hidden layer nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or
not (False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original features are included (True) in model's
fitting or not (False)
n_clusters: int
number of clusters for type_clust='kmeans' or type_clust='gmm'
clustering (could be 0: no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot);
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax') or robust scaling ('robust') or max absolute scaling ('maxabs')
col_sample: float
percentage of features randomly chosen for training
row_sample: float
percentage of rows chosen for training, by stratified bootstrapping
seed: int
reproducibility seed for nodes_sim=='uniform', clustering and dropout
backend: str
"cpu" or "gpu" or "tpu"
248 def encode_clusters(self, X=None, predict=False, scaler=None, **kwargs): # 249 """Create new covariates with kmeans or GMM clustering 250 251 Parameters: 252 253 X: {array-like}, shape = [n_samples, n_features] 254 Training vectors, where n_samples is the number 255 of samples and n_features is the number of features. 256 257 predict: boolean 258 is False on training set and True on test set 259 260 scaler: {object} of class StandardScaler, MinMaxScaler, RobustScaler or MaxAbsScaler 261 if scaler has already been fitted on training data (online training), it can be passed here 262 263 **kwargs: 264 additional parameters to be passed to the 265 clustering method 266 267 Returns: 268 269 Clusters' matrix, one-hot encoded: {array-like} 270 271 """ 272 273 np.random.seed(self.seed) 274 275 if X is None: 276 X = self.X_ 277 278 if isinstance(X, pd.DataFrame): 279 X = copy.deepcopy(X.values.astype(float)) 280 281 if len(X.shape) == 1: 282 X = X.reshape(1, -1) 283 284 if predict is False: # encode training set 285 286 # scale input data before clustering 287 self.clustering_scaler_, scaled_X = mo.scale_covariates( 288 X, choice=self.type_scaling[2], scaler=self.clustering_scaler_ 289 ) 290 291 self.clustering_obj_, X_clustered = mo.cluster_covariates( 292 scaled_X, 293 self.n_clusters, 294 self.seed, 295 type_clust=self.type_clust, 296 **kwargs 297 ) 298 299 if self.cluster_encode: 300 return mo.one_hot_encode(X_clustered, self.n_clusters).astype( 301 np.float16 302 ) 303 304 return X_clustered.astype(np.float16) 305 306 # if predict == True, encode test set 307 X_clustered = self.clustering_obj_.predict( 308 self.clustering_scaler_.transform(X) 309 ) 310 311 if self.cluster_encode == True: 312 return mo.one_hot_encode(X_clustered, self.n_clusters).astype( 313 np.float16 314 ) 315 316 return X_clustered.astype(np.float16)
Create new covariates with kmeans or GMM clustering
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
predict: boolean
is False on training set and True on test set
scaler: {object} of class StandardScaler, MinMaxScaler, RobustScaler or MaxAbsScaler
if scaler has already been fitted on training data (online training), it can be passed here
**kwargs:
additional parameters to be passed to the
clustering method
Returns:
Clusters' matrix, one-hot encoded: {array-like}
318 def create_layer(self, scaled_X, W=None): 319 """Create hidden layer. 320 321 Parameters: 322 323 scaled_X: {array-like}, shape = [n_samples, n_features] 324 Training vectors, where n_samples is the number 325 of samples and n_features is the number of features 326 327 W: {array-like}, shape = [n_features, hidden_features] 328 if provided, constructs the hidden layer with W; otherwise computed internally 329 330 Returns: 331 332 Hidden layer matrix: {array-like} 333 334 """ 335 336 n_features = scaled_X.shape[1] 337 338 # hash_sim = { 339 # "sobol": generate_sobol, 340 # "hammersley": generate_hammersley, 341 # "uniform": generate_uniform, 342 # "halton": generate_halton 343 # } 344 345 if self.bias is False: # no bias term in the hidden layer 346 if W is None: 347 if self.nodes_sim == "sobol": 348 self.W_ = generate_sobol( 349 n_dims=n_features, 350 n_points=self.n_hidden_features, 351 seed=self.seed, 352 ) 353 elif self.nodes_sim == "hammersley": 354 self.W_ = generate_hammersley( 355 n_dims=n_features, 356 n_points=self.n_hidden_features, 357 seed=self.seed, 358 ) 359 elif self.nodes_sim == "uniform": 360 self.W_ = generate_uniform( 361 n_dims=n_features, 362 n_points=self.n_hidden_features, 363 seed=self.seed, 364 ) 365 else: 366 self.W_ = generate_halton( 367 n_dims=n_features, 368 n_points=self.n_hidden_features, 369 seed=self.seed, 370 ) 371 372 assert ( 373 scaled_X.shape[1] == self.W_.shape[0] 374 ), "check dimensions of covariates X and matrix W" 375 376 return mo.dropout( 377 x=self.activation_func( 378 mo.safe_sparse_dot( 379 a=scaled_X, b=self.W_, backend=self.backend 380 ) 381 ), 382 drop_prob=self.dropout, 383 seed=self.seed, 384 ) 385 386 # W is not none 387 assert ( 388 scaled_X.shape[1] == W.shape[0] 389 ), "check dimensions of covariates X and matrix W" 390 391 # self.W_ = W 392 return mo.dropout( 393 x=self.activation_func( 394 mo.safe_sparse_dot(a=scaled_X, b=W, backend=self.backend) 395 ), 396 drop_prob=self.dropout, 397 seed=self.seed, 398 ) 399 400 # with bias term in the hidden layer 401 if W is None: 402 n_features_1 = n_features + 1 403 404 if self.nodes_sim == "sobol": 405 self.W_ = generate_sobol( 406 n_dims=n_features_1, 407 n_points=self.n_hidden_features, 408 seed=self.seed, 409 ) 410 elif self.nodes_sim == "hammersley": 411 self.W_ = generate_hammersley( 412 n_dims=n_features_1, 413 n_points=self.n_hidden_features, 414 seed=self.seed, 415 ) 416 elif self.nodes_sim == "uniform": 417 self.W_ = generate_uniform( 418 n_dims=n_features_1, 419 n_points=self.n_hidden_features, 420 seed=self.seed, 421 ) 422 else: 423 self.W_ = generate_halton( 424 n_dims=n_features_1, 425 n_points=self.n_hidden_features, 426 seed=self.seed, 427 ) 428 429 # self.W_ = hash_sim[self.nodes_sim]( 430 # n_dims=n_features_1, 431 # n_points=self.n_hidden_features, 432 # seed=self.seed, 433 # ) 434 435 return mo.dropout( 436 x=self.activation_func( 437 mo.safe_sparse_dot( 438 a=mo.cbind( 439 np.ones(scaled_X.shape[0]), 440 scaled_X, 441 backend=self.backend, 442 ), 443 b=self.W_, 444 backend=self.backend, 445 ) 446 ), 447 drop_prob=self.dropout, 448 seed=self.seed, 449 ) 450 451 # W is not None 452 # self.W_ = W 453 return mo.dropout( 454 x=self.activation_func( 455 mo.safe_sparse_dot( 456 a=mo.cbind( 457 np.ones(scaled_X.shape[0]), 458 scaled_X, 459 backend=self.backend, 460 ), 461 b=W, 462 backend=self.backend, 463 ) 464 ), 465 drop_prob=self.dropout, 466 seed=self.seed, 467 )
Create hidden layer.
Parameters:
scaled_X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features
W: {array-like}, shape = [n_features, hidden_features]
if provided, constructs the hidden layer with W; otherwise computed internally
Returns:
Hidden layer matrix: {array-like}
545 def cook_training_set(self, y=None, X=None, W=None, **kwargs): 546 """Create new hidden features for training set, with hidden layer, center the response. 547 548 Parameters: 549 550 y: array-like, shape = [n_samples] 551 Target values 552 553 X: {array-like}, shape = [n_samples, n_features] 554 Training vectors, where n_samples is the number 555 of samples and n_features is the number of features 556 557 W: {array-like}, shape = [n_features, hidden_features] 558 if provided, constructs the hidden layer via W 559 560 Returns: 561 562 (centered response, direct link + hidden layer matrix): {tuple} 563 564 """ 565 566 # either X and y are stored or not 567 # assert ((y is None) & (X is None)) | ((y is not None) & (X is not None)) 568 if self.n_hidden_features > 0: # has a hidden layer 569 assert ( 570 len(self.type_scaling) >= 2 571 ), "must have len(self.type_scaling) >= 2 when self.n_hidden_features > 0" 572 573 if X is None: 574 575 if self.col_sample == 1: 576 input_X = self.X_ 577 else: 578 n_features = self.X_.shape[1] 579 new_n_features = int(np.ceil(n_features * self.col_sample)) 580 assert ( 581 new_n_features >= 1 582 ), "check class attribute 'col_sample' and the number of covariates provided for X" 583 np.random.seed(self.seed) 584 index_col = np.random.choice( 585 range(n_features), size=new_n_features, replace=False 586 ) 587 self.index_col_ = index_col 588 input_X = self.X_[:, self.index_col_] 589 590 else: # X is not None # keep X vs self.X_ 591 592 if isinstance(X, pd.DataFrame): 593 X = copy.deepcopy(X.values.astype(float)) 594 595 if self.col_sample == 1: 596 input_X = X 597 else: 598 n_features = X.shape[1] 599 new_n_features = int(np.ceil(n_features * self.col_sample)) 600 assert ( 601 new_n_features >= 1 602 ), "check class attribute 'col_sample' and the number of covariates provided for X" 603 np.random.seed(self.seed) 604 index_col = np.random.choice( 605 range(n_features), size=new_n_features, replace=False 606 ) 607 self.index_col_ = index_col 608 input_X = X[:, self.index_col_] 609 610 if self.n_clusters <= 0: 611 # data without any clustering: self.n_clusters is None ----- 612 613 if self.n_hidden_features > 0: # with hidden layer 614 615 self.nn_scaler_, scaled_X = mo.scale_covariates( 616 input_X, choice=self.type_scaling[1], scaler=self.nn_scaler_ 617 ) 618 Phi_X = ( 619 self.create_layer(scaled_X) 620 if W is None 621 else self.create_layer(scaled_X, W=W) 622 ) 623 Z = ( 624 mo.cbind(input_X, Phi_X, backend=self.backend) 625 if self.direct_link is True 626 else Phi_X 627 ) 628 self.scaler_, scaled_Z = mo.scale_covariates( 629 Z, choice=self.type_scaling[0], scaler=self.scaler_ 630 ) 631 else: # no hidden layer 632 Z = input_X 633 self.scaler_, scaled_Z = mo.scale_covariates( 634 Z, choice=self.type_scaling[0], scaler=self.scaler_ 635 ) 636 637 else: 638 639 # data with clustering: self.n_clusters is not None ----- # keep 640 641 augmented_X = mo.cbind( 642 input_X, 643 self.encode_clusters(input_X, **kwargs), 644 backend=self.backend, 645 ) 646 647 if self.n_hidden_features > 0: # with hidden layer 648 649 self.nn_scaler_, scaled_X = mo.scale_covariates( 650 augmented_X, 651 choice=self.type_scaling[1], 652 scaler=self.nn_scaler_, 653 ) 654 Phi_X = ( 655 self.create_layer(scaled_X) 656 if W is None 657 else self.create_layer(scaled_X, W=W) 658 ) 659 Z = ( 660 mo.cbind(augmented_X, Phi_X, backend=self.backend) 661 if self.direct_link is True 662 else Phi_X 663 ) 664 self.scaler_, scaled_Z = mo.scale_covariates( 665 Z, choice=self.type_scaling[0], scaler=self.scaler_ 666 ) 667 else: # no hidden layer 668 Z = augmented_X 669 self.scaler_, scaled_Z = mo.scale_covariates( 670 Z, choice=self.type_scaling[0], scaler=self.scaler_ 671 ) 672 673 # Returning model inputs ----- 674 if mx.is_factor(y) is False: # regression 675 # center y 676 if y is None: 677 self.y_mean_, centered_y = mo.center_response(self.y_) 678 else: 679 self.y_mean_, centered_y = mo.center_response(y) 680 681 # y is subsampled 682 if self.row_sample < 1: 683 n, p = Z.shape 684 685 self.subsampler_ = ( 686 SubSampler( 687 y=self.y_, row_sample=self.row_sample, seed=self.seed 688 ) 689 if y is None 690 else SubSampler( 691 y=y, row_sample=self.row_sample, seed=self.seed 692 ) 693 ) 694 695 self.index_row_ = self.subsampler_.subsample() 696 697 n_row_sample = len(self.index_row_) 698 # regression 699 return ( 700 centered_y[self.index_row_].reshape(n_row_sample), 701 self.scaler_.transform( 702 Z[self.index_row_, :].reshape(n_row_sample, p) 703 ), 704 ) 705 # y is not subsampled 706 # regression 707 return (centered_y, self.scaler_.transform(Z)) 708 709 # classification 710 # y is subsampled 711 if self.row_sample < 1: 712 n, p = Z.shape 713 714 self.subsampler_ = ( 715 SubSampler( 716 y=self.y_, row_sample=self.row_sample, seed=self.seed 717 ) 718 if y is None 719 else SubSampler(y=y, row_sample=self.row_sample, seed=self.seed) 720 ) 721 722 self.index_row_ = self.subsampler_.subsample() 723 724 n_row_sample = len(self.index_row_) 725 # classification 726 return ( 727 y[self.index_row_].reshape(n_row_sample), 728 self.scaler_.transform( 729 Z[self.index_row_, :].reshape(n_row_sample, p) 730 ), 731 ) 732 # y is not subsampled 733 # classification 734 return (y, self.scaler_.transform(Z))
Create new hidden features for training set, with hidden layer, center the response.
Parameters:
y: array-like, shape = [n_samples]
Target values
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features
W: {array-like}, shape = [n_features, hidden_features]
if provided, constructs the hidden layer via W
Returns:
(centered response, direct link + hidden layer matrix): {tuple}
736 def cook_test_set(self, X, **kwargs): 737 """Transform data from test set, with hidden layer. 738 739 Parameters: 740 741 X: {array-like}, shape = [n_samples, n_features] 742 Training vectors, where n_samples is the number 743 of samples and n_features is the number of features 744 745 **kwargs: additional parameters to be passed to self.encode_cluster 746 747 Returns: 748 749 Transformed test set : {array-like} 750 """ 751 752 if isinstance(X, pd.DataFrame): 753 X = copy.deepcopy(X.values.astype(float)) 754 755 if len(X.shape) == 1: 756 X = X.reshape(1, -1) 757 758 if ( 759 self.n_clusters == 0 760 ): # data without clustering: self.n_clusters is None ----- 761 if self.n_hidden_features > 0: 762 # if hidden layer 763 scaled_X = ( 764 self.nn_scaler_.transform(X) 765 if (self.col_sample == 1) 766 else self.nn_scaler_.transform(X[:, self.index_col_]) 767 ) 768 Phi_X = self.create_layer(scaled_X, self.W_) 769 if self.direct_link: 770 return self.scaler_.transform( 771 mo.cbind(scaled_X, Phi_X, backend=self.backend) 772 ) 773 # when self.direct_link == False 774 return self.scaler_.transform(Phi_X) 775 # if no hidden layer # self.n_hidden_features == 0 776 return self.scaler_.transform(X) 777 778 # data with clustering: self.n_clusters > 0 ----- 779 if self.col_sample == 1: 780 predicted_clusters = self.encode_clusters( 781 X=X, predict=True, **kwargs 782 ) 783 augmented_X = mo.cbind(X, predicted_clusters, backend=self.backend) 784 else: 785 predicted_clusters = self.encode_clusters( 786 X=X[:, self.index_col_], predict=True, **kwargs 787 ) 788 augmented_X = mo.cbind( 789 X[:, self.index_col_], predicted_clusters, backend=self.backend 790 ) 791 792 if self.n_hidden_features > 0: # if hidden layer 793 scaled_X = self.nn_scaler_.transform(augmented_X) 794 Phi_X = self.create_layer(scaled_X, self.W_) 795 if self.direct_link: 796 return self.scaler_.transform( 797 mo.cbind(augmented_X, Phi_X, backend=self.backend) 798 ) 799 return self.scaler_.transform(Phi_X) 800 801 # if no hidden layer 802 return self.scaler_.transform(augmented_X)
Transform data from test set, with hidden layer.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features
**kwargs: additional parameters to be passed to self.encode_cluster
Returns:
Transformed test set : {array-like}
15class BaseRegressor(Base, RegressorMixin): 16 """Random Vector Functional Link Network regression without shrinkage 17 18 Parameters: 19 20 n_hidden_features: int 21 number of nodes in the hidden layer 22 23 activation_name: str 24 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 25 26 a: float 27 hyperparameter for 'prelu' or 'elu' activation function 28 29 nodes_sim: str 30 type of simulation for hidden layer nodes: 'sobol', 'hammersley', 'halton', 31 'uniform' 32 33 bias: boolean 34 indicates if the hidden layer contains a bias term (True) or 35 not (False) 36 37 dropout: float 38 regularization parameter; (random) percentage of nodes dropped out 39 of the training 40 41 direct_link: boolean 42 indicates if the original features are included (True) in model's 43 fitting or not (False) 44 45 n_clusters: int 46 number of clusters for type_clust='kmeans' or type_clust='gmm' 47 clustering (could be 0: no clustering) 48 49 cluster_encode: bool 50 defines how the variable containing clusters is treated (default is one-hot); 51 if `False`, then labels are used, without one-hot encoding 52 53 type_clust: str 54 type of clustering method: currently k-means ('kmeans') or Gaussian 55 Mixture Model ('gmm') 56 57 type_scaling: a tuple of 3 strings 58 scaling methods for inputs, hidden layer, and clustering respectively 59 (and when relevant). 60 Currently available: standardization ('std') or MinMax scaling ('minmax') 61 62 col_sample: float 63 percentage of features randomly chosen for training 64 65 row_sample: float 66 percentage of rows chosen for training, by stratified bootstrapping 67 68 seed: int 69 reproducibility seed for nodes_sim=='uniform', clustering and dropout 70 71 backend: str 72 "cpu" or "gpu" or "tpu" 73 74 Attributes: 75 76 beta_: vector 77 regression coefficients 78 79 GCV_: float 80 Generalized Cross-Validation error 81 82 """ 83 84 # construct the object ----- 85 86 def __init__( 87 self, 88 n_hidden_features=5, 89 activation_name="relu", 90 a=0.01, 91 nodes_sim="sobol", 92 bias=True, 93 dropout=0, 94 direct_link=True, 95 n_clusters=2, 96 cluster_encode=True, 97 type_clust="kmeans", 98 type_scaling=("std", "std", "std"), 99 col_sample=1, 100 row_sample=1, 101 seed=123, 102 backend="cpu", 103 ): 104 super().__init__( 105 n_hidden_features=n_hidden_features, 106 activation_name=activation_name, 107 a=a, 108 nodes_sim=nodes_sim, 109 bias=bias, 110 dropout=dropout, 111 direct_link=direct_link, 112 n_clusters=n_clusters, 113 cluster_encode=cluster_encode, 114 type_clust=type_clust, 115 type_scaling=type_scaling, 116 col_sample=col_sample, 117 row_sample=row_sample, 118 seed=seed, 119 backend=backend, 120 ) 121 122 def fit(self, X, y, **kwargs): 123 """Fit BaseRegressor to training data (X, y) 124 125 Parameters: 126 127 X: {array-like}, shape = [n_samples, n_features] 128 Training vectors, where n_samples is the number 129 of samples and n_features is the number of features 130 131 y: array-like, shape = [n_samples] 132 Target values 133 134 **kwargs: additional parameters to be passed to self.cook_training_set 135 136 Returns: 137 138 self: object 139 """ 140 141 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 142 143 fit_obj = lmf.beta_Sigma_hat( 144 X=scaled_Z, y=centered_y, backend=self.backend 145 ) 146 147 self.beta_ = fit_obj["beta_hat"] 148 149 self.GCV_ = fit_obj["GCV"] 150 151 return self 152 153 def predict(self, X, **kwargs): 154 """Predict test data X. 155 156 Parameters: 157 158 X: {array-like}, shape = [n_samples, n_features] 159 Training vectors, where n_samples is the number 160 of samples and n_features is the number of features 161 162 **kwargs: additional parameters to be passed to self.cook_test_set 163 164 Returns: 165 166 model predictions: {array-like} 167 """ 168 169 if len(X.shape) == 1: 170 n_features = X.shape[0] 171 new_X = mo.rbind( 172 X.reshape(1, n_features), 173 np.ones(n_features).reshape(1, n_features), 174 ) 175 176 return ( 177 self.y_mean_ 178 + mo.safe_sparse_dot( 179 a=self.cook_test_set(new_X, **kwargs), 180 b=self.beta_, 181 backend=self.backend, 182 ) 183 )[0] 184 185 return self.y_mean_ + mo.safe_sparse_dot( 186 a=self.cook_test_set(X, **kwargs), 187 b=self.beta_, 188 backend=self.backend, 189 )
Random Vector Functional Link Network regression without shrinkage
Parameters:
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for hidden layer nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or
not (False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original features are included (True) in model's
fitting or not (False)
n_clusters: int
number of clusters for type_clust='kmeans' or type_clust='gmm'
clustering (could be 0: no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot);
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
col_sample: float
percentage of features randomly chosen for training
row_sample: float
percentage of rows chosen for training, by stratified bootstrapping
seed: int
reproducibility seed for nodes_sim=='uniform', clustering and dropout
backend: str
"cpu" or "gpu" or "tpu"
Attributes:
beta_: vector
regression coefficients
GCV_: float
Generalized Cross-Validation error
122 def fit(self, X, y, **kwargs): 123 """Fit BaseRegressor to training data (X, y) 124 125 Parameters: 126 127 X: {array-like}, shape = [n_samples, n_features] 128 Training vectors, where n_samples is the number 129 of samples and n_features is the number of features 130 131 y: array-like, shape = [n_samples] 132 Target values 133 134 **kwargs: additional parameters to be passed to self.cook_training_set 135 136 Returns: 137 138 self: object 139 """ 140 141 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 142 143 fit_obj = lmf.beta_Sigma_hat( 144 X=scaled_Z, y=centered_y, backend=self.backend 145 ) 146 147 self.beta_ = fit_obj["beta_hat"] 148 149 self.GCV_ = fit_obj["GCV"] 150 151 return self
Fit BaseRegressor to training data (X, y)
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features
y: array-like, shape = [n_samples]
Target values
**kwargs: additional parameters to be passed to self.cook_training_set
Returns:
self: object
153 def predict(self, X, **kwargs): 154 """Predict test data X. 155 156 Parameters: 157 158 X: {array-like}, shape = [n_samples, n_features] 159 Training vectors, where n_samples is the number 160 of samples and n_features is the number of features 161 162 **kwargs: additional parameters to be passed to self.cook_test_set 163 164 Returns: 165 166 model predictions: {array-like} 167 """ 168 169 if len(X.shape) == 1: 170 n_features = X.shape[0] 171 new_X = mo.rbind( 172 X.reshape(1, n_features), 173 np.ones(n_features).reshape(1, n_features), 174 ) 175 176 return ( 177 self.y_mean_ 178 + mo.safe_sparse_dot( 179 a=self.cook_test_set(new_X, **kwargs), 180 b=self.beta_, 181 backend=self.backend, 182 ) 183 )[0] 184 185 return self.y_mean_ + mo.safe_sparse_dot( 186 a=self.cook_test_set(X, **kwargs), 187 b=self.beta_, 188 backend=self.backend, 189 )
Predict test data X.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features
**kwargs: additional parameters to be passed to self.cook_test_set
Returns:
model predictions: {array-like}
15class BayesianRVFLRegressor(Base, RegressorMixin): 16 """Bayesian Random Vector Functional Link Network regression with one prior 17 18 Parameters: 19 20 n_hidden_features: int 21 number of nodes in the hidden layer 22 23 activation_name: str 24 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 25 26 a: float 27 hyperparameter for 'prelu' or 'elu' activation function 28 29 nodes_sim: str 30 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 'uniform' 31 32 bias: boolean 33 indicates if the hidden layer contains a bias term (True) or not (False) 34 35 dropout: float 36 regularization parameter; (random) percentage of nodes dropped out 37 of the training 38 39 direct_link: boolean 40 indicates if the original features are included (True) in model''s fitting or not (False) 41 42 n_clusters: int 43 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering) 44 45 cluster_encode: bool 46 defines how the variable containing clusters is treated (default is one-hot) 47 if `False`, then labels are used, without one-hot encoding 48 49 type_clust: str 50 type of clustering method: currently k-means ('kmeans') or Gaussian Mixture Model ('gmm') 51 52 type_scaling: a tuple of 3 strings 53 scaling methods for inputs, hidden layer, and clustering respectively 54 (and when relevant). 55 Currently available: standardization ('std') or MinMax scaling ('minmax') 56 57 seed: int 58 reproducibility seed for nodes_sim=='uniform' 59 60 s: float 61 std. dev. of regression parameters in Bayesian Ridge Regression 62 63 sigma: float 64 std. dev. of residuals in Bayesian Ridge Regression 65 66 return_std: boolean 67 if True, uncertainty around predictions is evaluated 68 69 backend: str 70 "cpu" or "gpu" or "tpu" 71 72 Attributes: 73 74 beta_: array-like 75 regression''s coefficients 76 77 Sigma_: array-like 78 covariance of the distribution of fitted parameters 79 80 GCV_: float 81 Generalized cross-validation error 82 83 y_mean_: float 84 average response 85 86 Examples: 87 88 ```python 89 TBD 90 ``` 91 92 """ 93 94 # construct the object ----- 95 96 def __init__( 97 self, 98 n_hidden_features=5, 99 activation_name="relu", 100 a=0.01, 101 nodes_sim="sobol", 102 bias=True, 103 dropout=0, 104 direct_link=True, 105 n_clusters=2, 106 cluster_encode=True, 107 type_clust="kmeans", 108 type_scaling=("std", "std", "std"), 109 seed=123, 110 s=0.1, 111 sigma=0.05, 112 return_std=True, 113 backend="cpu", 114 ): 115 super().__init__( 116 n_hidden_features=n_hidden_features, 117 activation_name=activation_name, 118 a=a, 119 nodes_sim=nodes_sim, 120 bias=bias, 121 dropout=dropout, 122 direct_link=direct_link, 123 n_clusters=n_clusters, 124 cluster_encode=cluster_encode, 125 type_clust=type_clust, 126 type_scaling=type_scaling, 127 seed=seed, 128 backend=backend, 129 ) 130 self.s = s 131 self.sigma = sigma 132 self.beta_ = None 133 self.Sigma_ = None 134 self.GCV_ = None 135 self.return_std = return_std 136 137 def fit(self, X, y, **kwargs): 138 """Fit BayesianRVFLRegressor to training data (X, y). 139 140 Parameters: 141 142 X: {array-like}, shape = [n_samples, n_features] 143 Training vectors, where n_samples is the number 144 of samples and n_features is the number of features. 145 146 y: array-like, shape = [n_samples] 147 Target values. 148 149 **kwargs: additional parameters to be passed to 150 self.cook_training_set 151 152 Returns: 153 154 self: object 155 156 """ 157 158 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 159 160 fit_obj = lmf.beta_Sigma_hat_rvfl( 161 X=scaled_Z, 162 y=centered_y, 163 s=self.s, 164 sigma=self.sigma, 165 fit_intercept=False, 166 return_cov=self.return_std, 167 backend=self.backend, 168 ) 169 170 self.beta_ = fit_obj["beta_hat"] 171 172 if self.return_std == True: 173 self.Sigma_ = fit_obj["Sigma_hat"] 174 175 self.GCV_ = fit_obj["GCV"] 176 177 return self 178 179 def predict(self, X, return_std=False, **kwargs): 180 """Predict test data X. 181 182 Parameters: 183 184 X: {array-like}, shape = [n_samples, n_features] 185 Training vectors, where n_samples is the number 186 of samples and n_features is the number of features. 187 188 return_std: {boolean}, standard dev. is returned or not 189 190 **kwargs: additional parameters to be passed to 191 self.cook_test_set 192 193 Returns: 194 195 model predictions: {array-like} 196 197 """ 198 199 if len(X.shape) == 1: # one observation in the test set only 200 n_features = X.shape[0] 201 new_X = mo.rbind( 202 x=X.reshape(1, n_features), 203 y=np.ones(n_features).reshape(1, n_features), 204 backend=self.backend, 205 ) 206 207 self.return_std = return_std 208 209 if self.return_std == False: 210 if len(X.shape) == 1: 211 return ( 212 self.y_mean_ 213 + mo.safe_sparse_dot( 214 a=self.cook_test_set(new_X, **kwargs), 215 b=self.beta_, 216 backend=self.backend, 217 ) 218 )[0] 219 220 return self.y_mean_ + mo.safe_sparse_dot( 221 a=self.cook_test_set(X, **kwargs), 222 b=self.beta_, 223 backend=self.backend, 224 ) 225 226 else: # confidence interval required for preds? 227 if len(X.shape) == 1: 228 Z = self.cook_test_set(new_X, **kwargs) 229 230 pred_obj = lmf.beta_Sigma_hat_rvfl( 231 s=self.s, 232 sigma=self.sigma, 233 X_star=Z, 234 return_cov=True, 235 beta_hat_=self.beta_, 236 Sigma_hat_=self.Sigma_, 237 backend=self.backend, 238 ) 239 240 return ( 241 self.y_mean_ + pred_obj["preds"][0], 242 pred_obj["preds_std"][0], 243 ) 244 245 Z = self.cook_test_set(X, **kwargs) 246 247 pred_obj = lmf.beta_Sigma_hat_rvfl( 248 s=self.s, 249 sigma=self.sigma, 250 X_star=Z, 251 return_cov=True, 252 beta_hat_=self.beta_, 253 Sigma_hat_=self.Sigma_, 254 backend=self.backend, 255 ) 256 257 return (self.y_mean_ + pred_obj["preds"], pred_obj["preds_std"])
Bayesian Random Vector Functional Link Network regression with one prior
Parameters:
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not (False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original features are included (True) in model''s fitting or not (False)
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
seed: int
reproducibility seed for nodes_sim=='uniform'
s: float
std. dev. of regression parameters in Bayesian Ridge Regression
sigma: float
std. dev. of residuals in Bayesian Ridge Regression
return_std: boolean
if True, uncertainty around predictions is evaluated
backend: str
"cpu" or "gpu" or "tpu"
Attributes:
beta_: array-like
regression''s coefficients
Sigma_: array-like
covariance of the distribution of fitted parameters
GCV_: float
Generalized cross-validation error
y_mean_: float
average response
Examples:
TBD
137 def fit(self, X, y, **kwargs): 138 """Fit BayesianRVFLRegressor to training data (X, y). 139 140 Parameters: 141 142 X: {array-like}, shape = [n_samples, n_features] 143 Training vectors, where n_samples is the number 144 of samples and n_features is the number of features. 145 146 y: array-like, shape = [n_samples] 147 Target values. 148 149 **kwargs: additional parameters to be passed to 150 self.cook_training_set 151 152 Returns: 153 154 self: object 155 156 """ 157 158 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 159 160 fit_obj = lmf.beta_Sigma_hat_rvfl( 161 X=scaled_Z, 162 y=centered_y, 163 s=self.s, 164 sigma=self.sigma, 165 fit_intercept=False, 166 return_cov=self.return_std, 167 backend=self.backend, 168 ) 169 170 self.beta_ = fit_obj["beta_hat"] 171 172 if self.return_std == True: 173 self.Sigma_ = fit_obj["Sigma_hat"] 174 175 self.GCV_ = fit_obj["GCV"] 176 177 return self
Fit BayesianRVFLRegressor to training data (X, y).
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
**kwargs: additional parameters to be passed to
self.cook_training_set
Returns:
self: object
179 def predict(self, X, return_std=False, **kwargs): 180 """Predict test data X. 181 182 Parameters: 183 184 X: {array-like}, shape = [n_samples, n_features] 185 Training vectors, where n_samples is the number 186 of samples and n_features is the number of features. 187 188 return_std: {boolean}, standard dev. is returned or not 189 190 **kwargs: additional parameters to be passed to 191 self.cook_test_set 192 193 Returns: 194 195 model predictions: {array-like} 196 197 """ 198 199 if len(X.shape) == 1: # one observation in the test set only 200 n_features = X.shape[0] 201 new_X = mo.rbind( 202 x=X.reshape(1, n_features), 203 y=np.ones(n_features).reshape(1, n_features), 204 backend=self.backend, 205 ) 206 207 self.return_std = return_std 208 209 if self.return_std == False: 210 if len(X.shape) == 1: 211 return ( 212 self.y_mean_ 213 + mo.safe_sparse_dot( 214 a=self.cook_test_set(new_X, **kwargs), 215 b=self.beta_, 216 backend=self.backend, 217 ) 218 )[0] 219 220 return self.y_mean_ + mo.safe_sparse_dot( 221 a=self.cook_test_set(X, **kwargs), 222 b=self.beta_, 223 backend=self.backend, 224 ) 225 226 else: # confidence interval required for preds? 227 if len(X.shape) == 1: 228 Z = self.cook_test_set(new_X, **kwargs) 229 230 pred_obj = lmf.beta_Sigma_hat_rvfl( 231 s=self.s, 232 sigma=self.sigma, 233 X_star=Z, 234 return_cov=True, 235 beta_hat_=self.beta_, 236 Sigma_hat_=self.Sigma_, 237 backend=self.backend, 238 ) 239 240 return ( 241 self.y_mean_ + pred_obj["preds"][0], 242 pred_obj["preds_std"][0], 243 ) 244 245 Z = self.cook_test_set(X, **kwargs) 246 247 pred_obj = lmf.beta_Sigma_hat_rvfl( 248 s=self.s, 249 sigma=self.sigma, 250 X_star=Z, 251 return_cov=True, 252 beta_hat_=self.beta_, 253 Sigma_hat_=self.Sigma_, 254 backend=self.backend, 255 ) 256 257 return (self.y_mean_ + pred_obj["preds"], pred_obj["preds_std"])
Predict test data X.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
return_std: {boolean}, standard dev. is returned or not
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
15class BayesianRVFL2Regressor(Base, RegressorMixin): 16 """Bayesian Random Vector Functional Link Network regression with two priors 17 18 Parameters: 19 20 n_hidden_features: int 21 number of nodes in the hidden layer 22 23 activation_name: str 24 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 25 26 a: float 27 hyperparameter for 'prelu' or 'elu' activation function 28 29 nodes_sim: str 30 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 'uniform' 31 32 bias: boolean 33 indicates if the hidden layer contains a bias term (True) or not (False) 34 35 dropout: float 36 regularization parameter; (random) percentage of nodes dropped out 37 of the training 38 39 direct_link: boolean 40 indicates if the original features are included (True) in model''s fitting or not (False) 41 42 n_clusters: int 43 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering) 44 45 cluster_encode: bool 46 defines how the variable containing clusters is treated (default is one-hot) 47 if `False`, then labels are used, without one-hot encoding 48 49 type_clust: str 50 type of clustering method: currently k-means ('kmeans') or Gaussian Mixture Model ('gmm') 51 52 type_scaling: a tuple of 3 strings 53 scaling methods for inputs, hidden layer, and clustering respectively 54 (and when relevant). 55 Currently available: standardization ('std') or MinMax scaling ('minmax') 56 57 seed: int 58 reproducibility seed for nodes_sim=='uniform' 59 60 s1: float 61 std. dev. of init. regression parameters in Bayesian Ridge Regression 62 63 s2: float 64 std. dev. of augmented regression parameters in Bayesian Ridge Regression 65 66 sigma: float 67 std. dev. of residuals in Bayesian Ridge Regression 68 69 return_std: boolean 70 if True, uncertainty around predictions is evaluated 71 72 backend: str 73 "cpu" or "gpu" or "tpu" 74 75 Attributes: 76 77 beta_: array-like 78 regression''s coefficients 79 80 Sigma_: array-like 81 covariance of the distribution of fitted parameters 82 83 GCV_: float 84 Generalized cross-validation error 85 86 y_mean_: float 87 average response 88 89 Examples: 90 91 ```python 92 TBD 93 ``` 94 95 """ 96 97 # construct the object ----- 98 99 def __init__( 100 self, 101 n_hidden_features=5, 102 activation_name="relu", 103 a=0.01, 104 nodes_sim="sobol", 105 bias=True, 106 dropout=0, 107 direct_link=True, 108 n_clusters=0, 109 cluster_encode=True, 110 type_clust="kmeans", 111 type_scaling=("std", "std", "std"), 112 seed=123, 113 s1=0.1, 114 s2=0.1, 115 sigma=0.05, 116 return_std=True, 117 backend="cpu", 118 ): 119 super().__init__( 120 n_hidden_features=n_hidden_features, 121 activation_name=activation_name, 122 a=a, 123 nodes_sim=nodes_sim, 124 bias=bias, 125 dropout=dropout, 126 direct_link=direct_link, 127 n_clusters=n_clusters, 128 cluster_encode=cluster_encode, 129 type_clust=type_clust, 130 type_scaling=type_scaling, 131 seed=seed, 132 backend=backend, 133 ) 134 135 self.s1 = s1 136 self.s2 = s2 137 self.sigma = sigma 138 self.beta_ = None 139 self.Sigma_ = None 140 self.GCV_ = None 141 self.return_std = return_std 142 143 def fit(self, X, y, **kwargs): 144 """Fit BayesianRVFL2Regressor to training data (X, y) 145 146 Parameters: 147 148 X: {array-like}, shape = [n_samples, n_features] 149 Training vectors, where n_samples is the number 150 of samples and n_features is the number of features 151 152 y: array-like, shape = [n_samples] 153 Target values 154 155 **kwargs: additional parameters to be passed to 156 self.cook_training_set 157 158 Returns: 159 160 self: object 161 162 """ 163 164 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 165 166 n, p = X.shape 167 q = self.n_hidden_features 168 169 if self.direct_link == True: 170 r = p + self.n_clusters 171 172 block11 = (self.s1**2) * np.eye(r) 173 block12 = np.zeros((r, q)) 174 block21 = np.zeros((q, r)) 175 block22 = (self.s2**2) * np.eye(q) 176 177 Sigma_prior = mo.rbind( 178 x=mo.cbind(x=block11, y=block12, backend=self.backend), 179 y=mo.cbind(x=block21, y=block22, backend=self.backend), 180 backend=self.backend, 181 ) 182 183 else: 184 Sigma_prior = (self.s2**2) * np.eye(q) 185 186 fit_obj = lmf.beta_Sigma_hat_rvfl2( 187 X=scaled_Z, 188 y=centered_y, 189 Sigma=Sigma_prior, 190 sigma=self.sigma, 191 fit_intercept=False, 192 return_cov=self.return_std, 193 backend=self.backend, 194 ) 195 196 self.beta_ = fit_obj["beta_hat"] 197 198 if self.return_std == True: 199 self.Sigma_ = fit_obj["Sigma_hat"] 200 201 self.GCV_ = fit_obj["GCV"] 202 203 return self 204 205 def predict(self, X, return_std=False, **kwargs): 206 """Predict test data X. 207 208 Parameters: 209 210 X: {array-like}, shape = [n_samples, n_features] 211 Training vectors, where n_samples is the number 212 of samples and n_features is the number of features. 213 214 return_std: {boolean}, standard dev. is returned or not 215 216 **kwargs: additional parameters to be passed to 217 self.cook_test_set 218 219 Returns: 220 221 model predictions: {array-like} 222 223 """ 224 225 if len(X.shape) == 1: # one observation in the test set only 226 n_features = X.shape[0] 227 new_X = mo.rbind( 228 x=X.reshape(1, n_features), 229 y=np.ones(n_features).reshape(1, n_features), 230 backend=self.backend, 231 ) 232 233 self.return_std = return_std 234 235 if self.return_std == False: 236 if len(X.shape) == 1: 237 return ( 238 self.y_mean_ 239 + mo.safe_sparse_dot( 240 self.cook_test_set(new_X, **kwargs), 241 self.beta_, 242 backend=self.backend, 243 ) 244 )[0] 245 246 return self.y_mean_ + mo.safe_sparse_dot( 247 self.cook_test_set(X, **kwargs), 248 self.beta_, 249 backend=self.backend, 250 ) 251 252 else: # confidence interval required for preds? 253 if len(X.shape) == 1: 254 Z = self.cook_test_set(new_X, **kwargs) 255 256 pred_obj = lmf.beta_Sigma_hat_rvfl2( 257 X_star=Z, 258 return_cov=self.return_std, 259 beta_hat_=self.beta_, 260 Sigma_hat_=self.Sigma_, 261 backend=self.backend, 262 ) 263 264 return ( 265 self.y_mean_ + pred_obj["preds"][0], 266 pred_obj["preds_std"][0], 267 ) 268 269 Z = self.cook_test_set(X, **kwargs) 270 271 pred_obj = lmf.beta_Sigma_hat_rvfl2( 272 X_star=Z, 273 return_cov=self.return_std, 274 beta_hat_=self.beta_, 275 Sigma_hat_=self.Sigma_, 276 backend=self.backend, 277 ) 278 279 return (self.y_mean_ + pred_obj["preds"], pred_obj["preds_std"])
Bayesian Random Vector Functional Link Network regression with two priors
Parameters:
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not (False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original features are included (True) in model''s fitting or not (False)
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
seed: int
reproducibility seed for nodes_sim=='uniform'
s1: float
std. dev. of init. regression parameters in Bayesian Ridge Regression
s2: float
std. dev. of augmented regression parameters in Bayesian Ridge Regression
sigma: float
std. dev. of residuals in Bayesian Ridge Regression
return_std: boolean
if True, uncertainty around predictions is evaluated
backend: str
"cpu" or "gpu" or "tpu"
Attributes:
beta_: array-like
regression''s coefficients
Sigma_: array-like
covariance of the distribution of fitted parameters
GCV_: float
Generalized cross-validation error
y_mean_: float
average response
Examples:
TBD
143 def fit(self, X, y, **kwargs): 144 """Fit BayesianRVFL2Regressor to training data (X, y) 145 146 Parameters: 147 148 X: {array-like}, shape = [n_samples, n_features] 149 Training vectors, where n_samples is the number 150 of samples and n_features is the number of features 151 152 y: array-like, shape = [n_samples] 153 Target values 154 155 **kwargs: additional parameters to be passed to 156 self.cook_training_set 157 158 Returns: 159 160 self: object 161 162 """ 163 164 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 165 166 n, p = X.shape 167 q = self.n_hidden_features 168 169 if self.direct_link == True: 170 r = p + self.n_clusters 171 172 block11 = (self.s1**2) * np.eye(r) 173 block12 = np.zeros((r, q)) 174 block21 = np.zeros((q, r)) 175 block22 = (self.s2**2) * np.eye(q) 176 177 Sigma_prior = mo.rbind( 178 x=mo.cbind(x=block11, y=block12, backend=self.backend), 179 y=mo.cbind(x=block21, y=block22, backend=self.backend), 180 backend=self.backend, 181 ) 182 183 else: 184 Sigma_prior = (self.s2**2) * np.eye(q) 185 186 fit_obj = lmf.beta_Sigma_hat_rvfl2( 187 X=scaled_Z, 188 y=centered_y, 189 Sigma=Sigma_prior, 190 sigma=self.sigma, 191 fit_intercept=False, 192 return_cov=self.return_std, 193 backend=self.backend, 194 ) 195 196 self.beta_ = fit_obj["beta_hat"] 197 198 if self.return_std == True: 199 self.Sigma_ = fit_obj["Sigma_hat"] 200 201 self.GCV_ = fit_obj["GCV"] 202 203 return self
Fit BayesianRVFL2Regressor to training data (X, y)
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features
y: array-like, shape = [n_samples]
Target values
**kwargs: additional parameters to be passed to
self.cook_training_set
Returns:
self: object
205 def predict(self, X, return_std=False, **kwargs): 206 """Predict test data X. 207 208 Parameters: 209 210 X: {array-like}, shape = [n_samples, n_features] 211 Training vectors, where n_samples is the number 212 of samples and n_features is the number of features. 213 214 return_std: {boolean}, standard dev. is returned or not 215 216 **kwargs: additional parameters to be passed to 217 self.cook_test_set 218 219 Returns: 220 221 model predictions: {array-like} 222 223 """ 224 225 if len(X.shape) == 1: # one observation in the test set only 226 n_features = X.shape[0] 227 new_X = mo.rbind( 228 x=X.reshape(1, n_features), 229 y=np.ones(n_features).reshape(1, n_features), 230 backend=self.backend, 231 ) 232 233 self.return_std = return_std 234 235 if self.return_std == False: 236 if len(X.shape) == 1: 237 return ( 238 self.y_mean_ 239 + mo.safe_sparse_dot( 240 self.cook_test_set(new_X, **kwargs), 241 self.beta_, 242 backend=self.backend, 243 ) 244 )[0] 245 246 return self.y_mean_ + mo.safe_sparse_dot( 247 self.cook_test_set(X, **kwargs), 248 self.beta_, 249 backend=self.backend, 250 ) 251 252 else: # confidence interval required for preds? 253 if len(X.shape) == 1: 254 Z = self.cook_test_set(new_X, **kwargs) 255 256 pred_obj = lmf.beta_Sigma_hat_rvfl2( 257 X_star=Z, 258 return_cov=self.return_std, 259 beta_hat_=self.beta_, 260 Sigma_hat_=self.Sigma_, 261 backend=self.backend, 262 ) 263 264 return ( 265 self.y_mean_ + pred_obj["preds"][0], 266 pred_obj["preds_std"][0], 267 ) 268 269 Z = self.cook_test_set(X, **kwargs) 270 271 pred_obj = lmf.beta_Sigma_hat_rvfl2( 272 X_star=Z, 273 return_cov=self.return_std, 274 beta_hat_=self.beta_, 275 Sigma_hat_=self.Sigma_, 276 backend=self.backend, 277 ) 278 279 return (self.y_mean_ + pred_obj["preds"], pred_obj["preds_std"])
Predict test data X.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
return_std: {boolean}, standard dev. is returned or not
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
42class ClassicalMTS(Base): 43 """Multivariate time series (FactorMTS) forecasting with Factor models 44 45 Parameters: 46 47 model: type of model: str. 48 currently, 'VAR', 'VECM', 'ARIMA', 'ETS', 'Theta' 49 50 Attributes: 51 52 df_: data frame 53 the input data frame, in case a data.frame is provided to `fit` 54 55 level_: int 56 level of confidence for prediction intervals (default is 95) 57 58 Examples: 59 See examples/classical_mts_timeseries.py 60 """ 61 62 # construct the object ----- 63 64 def __init__(self, model="VAR"): 65 66 self.model = model 67 if self.model == "VAR": 68 self.obj = VAR 69 elif self.model == "VECM": 70 self.obj = VECM 71 elif self.model == "ARIMA": 72 self.obj = ARIMA 73 elif self.model == "ETS": 74 self.obj = ExponentialSmoothing 75 elif self.model == "Theta": 76 self.obj = ThetaModel 77 else: 78 raise ValueError("model not recognized") 79 self.n_series = None 80 self.replications = None 81 self.mean_ = None 82 self.upper_ = None 83 self.lower_ = None 84 self.output_dates_ = None 85 self.alpha_ = None 86 self.df_ = None 87 self.residuals_ = [] 88 self.sims_ = None 89 self.level_ = None 90 91 def fit(self, X, **kwargs): 92 """Fit FactorMTS model to training data X, with optional regressors xreg 93 94 Parameters: 95 96 X: {array-like}, shape = [n_samples, n_features] 97 Training time series, where n_samples is the number 98 of samples and n_features is the number of features; 99 X must be in increasing order (most recent observations last) 100 101 **kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity) 102 103 Returns: 104 105 self: object 106 """ 107 108 try: 109 self.n_series = X.shape[1] 110 except Exception: 111 self.n_series = 1 112 113 if (isinstance(X, pd.DataFrame) is False) and isinstance( 114 X, pd.Series 115 ) is False: # input data set is a numpy array 116 117 X = pd.DataFrame(X) 118 if self.n_series > 1: 119 self.series_names = [ 120 "series" + str(i) for i in range(X.shape[1]) 121 ] 122 else: 123 self.series_names = "series0" 124 125 else: # input data set is a DataFrame or Series with column names 126 127 X_index = None 128 if X.index is not None and len(X.shape) > 1: 129 X_index = X.index 130 X = copy.deepcopy(mo.convert_df_to_numeric(X)) 131 if X_index is not None: 132 try: 133 X.index = X_index 134 except Exception: 135 pass 136 if isinstance(X, pd.DataFrame): 137 self.series_names = X.columns.tolist() 138 else: 139 self.series_names = X.name 140 141 if isinstance(X, pd.DataFrame) or isinstance(X, pd.Series): 142 self.df_ = X 143 X = X.values 144 self.df_.columns = self.series_names 145 self.input_dates = ts.compute_input_dates(self.df_) 146 else: 147 self.df_ = pd.DataFrame(X, columns=self.series_names) 148 149 if self.model == "Theta": 150 self.obj = self.obj(self.df_, **kwargs).fit() 151 else: 152 self.obj = self.obj(X, **kwargs).fit(**kwargs) 153 154 return self 155 156 def predict(self, h=5, level=95, **kwargs): 157 """Forecast all the time series, h steps ahead 158 159 Parameters: 160 161 h: {integer} 162 Forecasting horizon 163 164 **kwargs: additional parameters to be passed to 165 self.cook_test_set 166 167 Returns: 168 169 model predictions for horizon = h: {array-like} 170 171 """ 172 173 self.output_dates_, frequency = ts.compute_output_dates(self.df_, h) 174 175 self.level_ = level 176 177 self.lower_ = None # do not remove (/!\) 178 179 self.upper_ = None # do not remove (/!\) 180 181 self.sims_ = None # do not remove (/!\) 182 183 self.level_ = level 184 185 self.alpha_ = 100 - level 186 187 pi_multiplier = norm.ppf(1 - self.alpha_ / 200) 188 189 # Named tuple for forecast results 190 DescribeResult = namedtuple( 191 "DescribeResult", ("mean", "lower", "upper") 192 ) 193 194 if self.model == "VAR": 195 mean_forecast, lower_bound, upper_bound = ( 196 self.obj.forecast_interval( 197 self.obj.endog, steps=h, alpha=self.alpha_ / 100, **kwargs 198 ) 199 ) 200 201 elif self.model == "VECM": 202 forecast_result = self.obj.predict(steps=h) 203 mean_forecast = forecast_result 204 lower_bound, upper_bound = self._compute_confidence_intervals( 205 forecast_result, alpha=self.alpha_ / 100, **kwargs 206 ) 207 208 elif self.model == "ARIMA": 209 forecast_result = self.obj.get_forecast(steps=h) 210 mean_forecast = forecast_result.predicted_mean 211 lower_bound = forecast_result.conf_int()[:, 0] 212 upper_bound = forecast_result.conf_int()[:, 1] 213 214 elif self.model == "ETS": 215 forecast_result = self.obj.forecast(steps=h) 216 residuals = self.obj.resid 217 std_errors = np.std(residuals) 218 mean_forecast = forecast_result 219 lower_bound = forecast_result - pi_multiplier * std_errors 220 upper_bound = forecast_result + pi_multiplier * std_errors 221 222 elif self.model == "Theta": 223 try: 224 mean_forecast = self.obj.forecast(steps=h).values 225 forecast_result = self.obj.prediction_intervals( 226 steps=h, alpha=self.alpha_ / 100, **kwargs 227 ) 228 lower_bound = forecast_result["lower"].values 229 upper_bound = forecast_result["upper"].values 230 except Exception: 231 mean_forecast = self.obj.forecast(steps=h) 232 forecast_result = self.obj.prediction_intervals( 233 steps=h, alpha=self.alpha_ / 100, **kwargs 234 ) 235 lower_bound = forecast_result["lower"] 236 upper_bound = forecast_result["upper"] 237 238 else: 239 240 raise ValueError("model not recognized") 241 242 try: 243 self.mean_ = pd.DataFrame( 244 mean_forecast, 245 columns=self.series_names, 246 index=self.output_dates_, 247 ) 248 self.lower_ = pd.DataFrame( 249 lower_bound, columns=self.series_names, index=self.output_dates_ 250 ) 251 self.upper_ = pd.DataFrame( 252 upper_bound, columns=self.series_names, index=self.output_dates_ 253 ) 254 except Exception: 255 self.mean_ = pd.Series( 256 mean_forecast, name=self.series_names, index=self.output_dates_ 257 ) 258 self.lower_ = pd.Series( 259 lower_bound, name=self.series_names, index=self.output_dates_ 260 ) 261 self.upper_ = pd.Series( 262 upper_bound, name=self.series_names, index=self.output_dates_ 263 ) 264 265 return DescribeResult( 266 mean=self.mean_, lower=self.lower_, upper=self.upper_ 267 ) 268 269 def _compute_confidence_intervals(self, forecast_result, alpha): 270 """ 271 Compute confidence intervals for VECM forecasts. 272 Uses the covariance of residuals to approximate the confidence intervals. 273 """ 274 residuals = self.obj.resid 275 cov_matrix = np.cov(residuals.T) # Covariance matrix of residuals 276 std_errors = np.sqrt(np.diag(cov_matrix)) # Standard errors 277 278 z_value = norm.ppf(1 - alpha / 2) # Z-score for the given alpha level 279 lower_bound = forecast_result - z_value * std_errors 280 upper_bound = forecast_result + z_value * std_errors 281 282 return lower_bound, upper_bound 283 284 def score(self, X, training_index, testing_index, scoring=None, **kwargs): 285 """Train on training_index, score on testing_index.""" 286 287 assert ( 288 bool(set(training_index).intersection(set(testing_index))) == False 289 ), "Non-overlapping 'training_index' and 'testing_index' required" 290 291 # Dimensions 292 try: 293 # multivariate time series 294 n, p = X.shape 295 except: 296 # univariate time series 297 n = X.shape[0] 298 p = 1 299 300 # Training and testing sets 301 if p > 1: 302 X_train = X[training_index, :] 303 X_test = X[testing_index, :] 304 else: 305 X_train = X[training_index] 306 X_test = X[testing_index] 307 308 # Horizon 309 h = len(testing_index) 310 assert ( 311 len(training_index) + h 312 ) <= n, "Please check lengths of training and testing windows" 313 314 # Fit and predict 315 self.fit(X_train, **kwargs) 316 preds = self.predict(h=h, **kwargs) 317 318 if scoring is None: 319 scoring = "neg_root_mean_squared_error" 320 321 # check inputs 322 assert scoring in ( 323 "explained_variance", 324 "neg_mean_absolute_error", 325 "neg_mean_squared_error", 326 "neg_root_mean_squared_error", 327 "neg_mean_squared_log_error", 328 "neg_median_absolute_error", 329 "r2", 330 ), "'scoring' should be in ('explained_variance', 'neg_mean_absolute_error', \ 331 'neg_mean_squared_error', 'neg_root_mean_squared_error', 'neg_mean_squared_log_error', \ 332 'neg_median_absolute_error', 'r2')" 333 334 scoring_options = { 335 "explained_variance": skm2.explained_variance_score, 336 "neg_mean_absolute_error": skm2.mean_absolute_error, 337 "neg_mean_squared_error": lambda x, y: np.mean((x - y) ** 2), 338 "neg_root_mean_squared_error": lambda x, y: np.sqrt( 339 np.mean((x - y) ** 2) 340 ), 341 "neg_mean_squared_log_error": skm2.mean_squared_log_error, 342 "neg_median_absolute_error": skm2.median_absolute_error, 343 "r2": skm2.r2_score, 344 } 345 346 # if p > 1: 347 # return tuple( 348 # [ 349 # scoring_options[scoring]( 350 # X_test[:, i], preds[:, i]#, **kwargs 351 # ) 352 # for i in range(p) 353 # ] 354 # ) 355 # else: 356 return scoring_options[scoring](X_test, preds) 357 358 def plot(self, series=None, type_axis="dates", type_plot="pi"): 359 """Plot time series forecast 360 361 Parameters: 362 363 series: {integer} or {string} 364 series index or name 365 366 """ 367 368 assert all( 369 [ 370 self.mean_ is not None, 371 self.lower_ is not None, 372 self.upper_ is not None, 373 self.output_dates_ is not None, 374 ] 375 ), "model forecasting must be obtained first (with predict)" 376 377 if series is None: 378 assert ( 379 self.n_series == 1 380 ), "please specify series index or name (n_series > 1)" 381 series = 0 382 383 if isinstance(series, str): 384 assert ( 385 series in self.series_names 386 ), f"series {series} doesn't exist in the input dataset" 387 series_idx = self.df_.columns.get_loc(series) 388 else: 389 assert isinstance(series, int) and ( 390 0 <= series < self.n_series 391 ), f"check series index (< {self.n_series})" 392 series_idx = series 393 394 if isinstance(self.df_, pd.DataFrame): 395 y_all = list(self.df_.iloc[:, series_idx]) + list( 396 self.mean_.iloc[:, series_idx] 397 ) 398 y_test = list(self.mean_.iloc[:, series_idx]) 399 else: 400 y_all = list(self.df_.values) + list(self.mean_.values) 401 y_test = list(self.mean_.values) 402 n_points_all = len(y_all) 403 n_points_train = self.df_.shape[0] 404 405 if type_axis == "numeric": 406 x_all = [i for i in range(n_points_all)] 407 x_test = [i for i in range(n_points_train, n_points_all)] 408 409 if type_axis == "dates": # use dates 410 x_all = np.concatenate( 411 (self.input_dates.values, self.output_dates_.values), axis=None 412 ) 413 x_test = self.output_dates_.values 414 415 if type_plot == "pi": 416 fig, ax = plt.subplots() 417 ax.plot(x_all, y_all, "-") 418 ax.plot(x_test, y_test, "-", color="orange") 419 try: 420 ax.fill_between( 421 x_test, 422 self.lower_.iloc[:, series_idx], 423 self.upper_.iloc[:, series_idx], 424 alpha=0.2, 425 color="orange", 426 ) 427 except Exception: 428 ax.fill_between( 429 x_test, 430 self.lower_.values, 431 self.upper_.values, 432 alpha=0.2, 433 color="orange", 434 ) 435 if self.replications is None: 436 if self.n_series > 1: 437 plt.title( 438 f"prediction intervals for {series}", 439 loc="left", 440 fontsize=12, 441 fontweight=0, 442 color="black", 443 ) 444 else: 445 plt.title( 446 f"prediction intervals for input time series", 447 loc="left", 448 fontsize=12, 449 fontweight=0, 450 color="black", 451 ) 452 plt.show() 453 else: # self.replications is not None 454 if self.n_series > 1: 455 plt.title( 456 f"prediction intervals for {self.replications} simulations of {series}", 457 loc="left", 458 fontsize=12, 459 fontweight=0, 460 color="black", 461 ) 462 else: 463 plt.title( 464 f"prediction intervals for {self.replications} simulations of input time series", 465 loc="left", 466 fontsize=12, 467 fontweight=0, 468 color="black", 469 ) 470 plt.show() 471 472 if type_plot == "spaghetti": 473 palette = plt.get_cmap("Set1") 474 sims_ix = getsims(self.sims_, series_idx) 475 plt.plot(x_all, y_all, "-") 476 for col_ix in range( 477 sims_ix.shape[1] 478 ): # avoid this when there are thousands of simulations 479 plt.plot( 480 x_test, 481 sims_ix[:, col_ix], 482 "-", 483 color=palette(col_ix), 484 linewidth=1, 485 alpha=0.9, 486 ) 487 plt.plot(x_all, y_all, "-", color="black") 488 plt.plot(x_test, y_test, "-", color="blue") 489 # Add titles 490 if self.n_series > 1: 491 plt.title( 492 f"{self.replications} simulations of {series}", 493 loc="left", 494 fontsize=12, 495 fontweight=0, 496 color="black", 497 ) 498 else: 499 plt.title( 500 f"{self.replications} simulations of input time series", 501 loc="left", 502 fontsize=12, 503 fontweight=0, 504 color="black", 505 ) 506 plt.xlabel("Time") 507 plt.ylabel("Values") 508 # Show the graph 509 plt.show() 510 511 def cross_val_score( 512 self, 513 X, 514 scoring="root_mean_squared_error", 515 n_jobs=None, 516 verbose=0, 517 xreg=None, 518 initial_window=5, 519 horizon=3, 520 fixed_window=False, 521 show_progress=True, 522 level=95, 523 **kwargs, 524 ): 525 """Evaluate a score by time series cross-validation. 526 527 Parameters: 528 529 X: {array-like, sparse matrix} of shape (n_samples, n_features) 530 The data to fit. 531 532 scoring: str or a function 533 A str in ('root_mean_squared_error', 'mean_squared_error', 'mean_error', 534 'mean_absolute_error', 'mean_error', 'mean_percentage_error', 535 'mean_absolute_percentage_error', 'winkler_score', 'coverage') 536 Or a function defined as 'coverage' and 'winkler_score' in `utils.timeseries` 537 538 n_jobs: int, default=None 539 Number of jobs to run in parallel. 540 541 verbose: int, default=0 542 The verbosity level. 543 544 xreg: array-like, optional (default=None) 545 Additional (external) regressors to be passed to `fit` 546 xreg must be in 'increasing' order (most recent observations last) 547 548 initial_window: int 549 initial number of consecutive values in each training set sample 550 551 horizon: int 552 number of consecutive values in test set sample 553 554 fixed_window: boolean 555 if False, all training samples start at index 0, and the training 556 window's size is increasing. 557 if True, the training window's size is fixed, and the window is 558 rolling forward 559 560 show_progress: boolean 561 if True, a progress bar is printed 562 563 **kwargs: dict 564 additional parameters to be passed to `fit` and `predict` 565 566 Returns: 567 568 A tuple: descriptive statistics or errors and raw errors 569 570 """ 571 tscv = TimeSeriesSplit() 572 573 tscv_obj = tscv.split( 574 X, 575 initial_window=initial_window, 576 horizon=horizon, 577 fixed_window=fixed_window, 578 ) 579 580 if isinstance(scoring, str): 581 582 assert scoring in ( 583 "root_mean_squared_error", 584 "mean_squared_error", 585 "mean_error", 586 "mean_absolute_error", 587 "mean_percentage_error", 588 "mean_absolute_percentage_error", 589 "winkler_score", 590 "coverage", 591 ), "must have scoring in ('root_mean_squared_error', 'mean_squared_error', 'mean_error', 'mean_absolute_error', 'mean_error', 'mean_percentage_error', 'mean_absolute_percentage_error', 'winkler_score', 'coverage')" 592 593 def err_func(X_test, X_pred, scoring): 594 if (self.replications is not None) or ( 595 self.type_pi == "gaussian" 596 ): # probabilistic 597 if scoring == "winkler_score": 598 return winkler_score(X_pred, X_test, level=level) 599 elif scoring == "coverage": 600 return coverage(X_pred, X_test, level=level) 601 else: 602 return mean_errors( 603 pred=X_pred.mean, actual=X_test, scoring=scoring 604 ) 605 else: # not probabilistic 606 return mean_errors( 607 pred=X_pred, actual=X_test, scoring=scoring 608 ) 609 610 else: # isinstance(scoring, str) = False 611 612 err_func = scoring 613 614 errors = [] 615 616 train_indices = [] 617 618 test_indices = [] 619 620 for train_index, test_index in tscv_obj: 621 train_indices.append(train_index) 622 test_indices.append(test_index) 623 624 if show_progress is True: 625 iterator = tqdm( 626 zip(train_indices, test_indices), total=len(train_indices) 627 ) 628 else: 629 iterator = zip(train_indices, test_indices) 630 631 for train_index, test_index in iterator: 632 633 if verbose == 1: 634 print(f"TRAIN: {train_index}") 635 print(f"TEST: {test_index}") 636 637 if isinstance(X, pd.DataFrame): 638 self.fit(X.iloc[train_index, :], xreg=xreg, **kwargs) 639 X_test = X.iloc[test_index, :] 640 else: 641 self.fit(X[train_index, :], xreg=xreg, **kwargs) 642 X_test = X[test_index, :] 643 X_pred = self.predict(h=int(len(test_index)), level=level, **kwargs) 644 645 errors.append(err_func(X_test, X_pred, scoring)) 646 647 res = np.asarray(errors) 648 649 return res, describe(res)
Multivariate time series (FactorMTS) forecasting with Factor models
Parameters:
model: type of model: str.
currently, 'VAR', 'VECM', 'ARIMA', 'ETS', 'Theta'
Attributes:
df_: data frame
the input data frame, in case a data.frame is provided to `fit`
level_: int
level of confidence for prediction intervals (default is 95)
Examples: See examples/classical_mts_timeseries.py
91 def fit(self, X, **kwargs): 92 """Fit FactorMTS model to training data X, with optional regressors xreg 93 94 Parameters: 95 96 X: {array-like}, shape = [n_samples, n_features] 97 Training time series, where n_samples is the number 98 of samples and n_features is the number of features; 99 X must be in increasing order (most recent observations last) 100 101 **kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity) 102 103 Returns: 104 105 self: object 106 """ 107 108 try: 109 self.n_series = X.shape[1] 110 except Exception: 111 self.n_series = 1 112 113 if (isinstance(X, pd.DataFrame) is False) and isinstance( 114 X, pd.Series 115 ) is False: # input data set is a numpy array 116 117 X = pd.DataFrame(X) 118 if self.n_series > 1: 119 self.series_names = [ 120 "series" + str(i) for i in range(X.shape[1]) 121 ] 122 else: 123 self.series_names = "series0" 124 125 else: # input data set is a DataFrame or Series with column names 126 127 X_index = None 128 if X.index is not None and len(X.shape) > 1: 129 X_index = X.index 130 X = copy.deepcopy(mo.convert_df_to_numeric(X)) 131 if X_index is not None: 132 try: 133 X.index = X_index 134 except Exception: 135 pass 136 if isinstance(X, pd.DataFrame): 137 self.series_names = X.columns.tolist() 138 else: 139 self.series_names = X.name 140 141 if isinstance(X, pd.DataFrame) or isinstance(X, pd.Series): 142 self.df_ = X 143 X = X.values 144 self.df_.columns = self.series_names 145 self.input_dates = ts.compute_input_dates(self.df_) 146 else: 147 self.df_ = pd.DataFrame(X, columns=self.series_names) 148 149 if self.model == "Theta": 150 self.obj = self.obj(self.df_, **kwargs).fit() 151 else: 152 self.obj = self.obj(X, **kwargs).fit(**kwargs) 153 154 return self
Fit FactorMTS model to training data X, with optional regressors xreg
Parameters:
X: {array-like}, shape = [n_samples, n_features] Training time series, where n_samples is the number of samples and n_features is the number of features; X must be in increasing order (most recent observations last)
**kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity)
Returns:
self: object
156 def predict(self, h=5, level=95, **kwargs): 157 """Forecast all the time series, h steps ahead 158 159 Parameters: 160 161 h: {integer} 162 Forecasting horizon 163 164 **kwargs: additional parameters to be passed to 165 self.cook_test_set 166 167 Returns: 168 169 model predictions for horizon = h: {array-like} 170 171 """ 172 173 self.output_dates_, frequency = ts.compute_output_dates(self.df_, h) 174 175 self.level_ = level 176 177 self.lower_ = None # do not remove (/!\) 178 179 self.upper_ = None # do not remove (/!\) 180 181 self.sims_ = None # do not remove (/!\) 182 183 self.level_ = level 184 185 self.alpha_ = 100 - level 186 187 pi_multiplier = norm.ppf(1 - self.alpha_ / 200) 188 189 # Named tuple for forecast results 190 DescribeResult = namedtuple( 191 "DescribeResult", ("mean", "lower", "upper") 192 ) 193 194 if self.model == "VAR": 195 mean_forecast, lower_bound, upper_bound = ( 196 self.obj.forecast_interval( 197 self.obj.endog, steps=h, alpha=self.alpha_ / 100, **kwargs 198 ) 199 ) 200 201 elif self.model == "VECM": 202 forecast_result = self.obj.predict(steps=h) 203 mean_forecast = forecast_result 204 lower_bound, upper_bound = self._compute_confidence_intervals( 205 forecast_result, alpha=self.alpha_ / 100, **kwargs 206 ) 207 208 elif self.model == "ARIMA": 209 forecast_result = self.obj.get_forecast(steps=h) 210 mean_forecast = forecast_result.predicted_mean 211 lower_bound = forecast_result.conf_int()[:, 0] 212 upper_bound = forecast_result.conf_int()[:, 1] 213 214 elif self.model == "ETS": 215 forecast_result = self.obj.forecast(steps=h) 216 residuals = self.obj.resid 217 std_errors = np.std(residuals) 218 mean_forecast = forecast_result 219 lower_bound = forecast_result - pi_multiplier * std_errors 220 upper_bound = forecast_result + pi_multiplier * std_errors 221 222 elif self.model == "Theta": 223 try: 224 mean_forecast = self.obj.forecast(steps=h).values 225 forecast_result = self.obj.prediction_intervals( 226 steps=h, alpha=self.alpha_ / 100, **kwargs 227 ) 228 lower_bound = forecast_result["lower"].values 229 upper_bound = forecast_result["upper"].values 230 except Exception: 231 mean_forecast = self.obj.forecast(steps=h) 232 forecast_result = self.obj.prediction_intervals( 233 steps=h, alpha=self.alpha_ / 100, **kwargs 234 ) 235 lower_bound = forecast_result["lower"] 236 upper_bound = forecast_result["upper"] 237 238 else: 239 240 raise ValueError("model not recognized") 241 242 try: 243 self.mean_ = pd.DataFrame( 244 mean_forecast, 245 columns=self.series_names, 246 index=self.output_dates_, 247 ) 248 self.lower_ = pd.DataFrame( 249 lower_bound, columns=self.series_names, index=self.output_dates_ 250 ) 251 self.upper_ = pd.DataFrame( 252 upper_bound, columns=self.series_names, index=self.output_dates_ 253 ) 254 except Exception: 255 self.mean_ = pd.Series( 256 mean_forecast, name=self.series_names, index=self.output_dates_ 257 ) 258 self.lower_ = pd.Series( 259 lower_bound, name=self.series_names, index=self.output_dates_ 260 ) 261 self.upper_ = pd.Series( 262 upper_bound, name=self.series_names, index=self.output_dates_ 263 ) 264 265 return DescribeResult( 266 mean=self.mean_, lower=self.lower_, upper=self.upper_ 267 )
Forecast all the time series, h steps ahead
Parameters:
h: {integer} Forecasting horizon
**kwargs: additional parameters to be passed to self.cook_test_set
Returns:
model predictions for horizon = h: {array-like}
284 def score(self, X, training_index, testing_index, scoring=None, **kwargs): 285 """Train on training_index, score on testing_index.""" 286 287 assert ( 288 bool(set(training_index).intersection(set(testing_index))) == False 289 ), "Non-overlapping 'training_index' and 'testing_index' required" 290 291 # Dimensions 292 try: 293 # multivariate time series 294 n, p = X.shape 295 except: 296 # univariate time series 297 n = X.shape[0] 298 p = 1 299 300 # Training and testing sets 301 if p > 1: 302 X_train = X[training_index, :] 303 X_test = X[testing_index, :] 304 else: 305 X_train = X[training_index] 306 X_test = X[testing_index] 307 308 # Horizon 309 h = len(testing_index) 310 assert ( 311 len(training_index) + h 312 ) <= n, "Please check lengths of training and testing windows" 313 314 # Fit and predict 315 self.fit(X_train, **kwargs) 316 preds = self.predict(h=h, **kwargs) 317 318 if scoring is None: 319 scoring = "neg_root_mean_squared_error" 320 321 # check inputs 322 assert scoring in ( 323 "explained_variance", 324 "neg_mean_absolute_error", 325 "neg_mean_squared_error", 326 "neg_root_mean_squared_error", 327 "neg_mean_squared_log_error", 328 "neg_median_absolute_error", 329 "r2", 330 ), "'scoring' should be in ('explained_variance', 'neg_mean_absolute_error', \ 331 'neg_mean_squared_error', 'neg_root_mean_squared_error', 'neg_mean_squared_log_error', \ 332 'neg_median_absolute_error', 'r2')" 333 334 scoring_options = { 335 "explained_variance": skm2.explained_variance_score, 336 "neg_mean_absolute_error": skm2.mean_absolute_error, 337 "neg_mean_squared_error": lambda x, y: np.mean((x - y) ** 2), 338 "neg_root_mean_squared_error": lambda x, y: np.sqrt( 339 np.mean((x - y) ** 2) 340 ), 341 "neg_mean_squared_log_error": skm2.mean_squared_log_error, 342 "neg_median_absolute_error": skm2.median_absolute_error, 343 "r2": skm2.r2_score, 344 } 345 346 # if p > 1: 347 # return tuple( 348 # [ 349 # scoring_options[scoring]( 350 # X_test[:, i], preds[:, i]#, **kwargs 351 # ) 352 # for i in range(p) 353 # ] 354 # ) 355 # else: 356 return scoring_options[scoring](X_test, preds)
Train on training_index, score on testing_index.
16class CustomClassifier(Custom, ClassifierMixin): 17 """Custom Classification model 18 19 Attributes: 20 21 obj: object 22 any object containing a method fit (obj.fit()) and a method predict 23 (obj.predict()) 24 25 n_hidden_features: int 26 number of nodes in the hidden layer 27 28 activation_name: str 29 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 30 31 a: float 32 hyperparameter for 'prelu' or 'elu' activation function 33 34 nodes_sim: str 35 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 36 'uniform' 37 38 bias: boolean 39 indicates if the hidden layer contains a bias term (True) or not 40 (False) 41 42 dropout: float 43 regularization parameter; (random) percentage of nodes dropped out 44 of the training 45 46 direct_link: boolean 47 indicates if the original predictors are included (True) in model''s 48 fitting or not (False) 49 50 n_clusters: int 51 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: 52 no clustering) 53 54 cluster_encode: bool 55 defines how the variable containing clusters is treated (default is one-hot) 56 if `False`, then labels are used, without one-hot encoding 57 58 type_clust: str 59 type of clustering method: currently k-means ('kmeans') or Gaussian 60 Mixture Model ('gmm') 61 62 type_scaling: a tuple of 3 strings 63 scaling methods for inputs, hidden layer, and clustering respectively 64 (and when relevant). 65 Currently available: standardization ('std') or MinMax scaling ('minmax') 66 67 col_sample: float 68 percentage of covariates randomly chosen for training 69 70 row_sample: float 71 percentage of rows chosen for training, by stratified bootstrapping 72 73 cv_calibration: int, cross-validation generator, or iterable, default=2 74 Determines the cross-validation splitting strategy. Same as 75 `sklearn.calibration.CalibratedClassifierCV` 76 77 calibration_method: str 78 {‘sigmoid’, ‘isotonic’}, default=’sigmoid’ 79 The method to use for calibration. Same as 80 `sklearn.calibration.CalibratedClassifierCV` 81 82 seed: int 83 reproducibility seed for nodes_sim=='uniform' 84 85 backend: str 86 "cpu" or "gpu" or "tpu" 87 88 Examples: 89 90 Note: it's better to use the `DeepClassifier` or `LazyDeepClassifier` classes directly 91 92 ```python 93 import nnetsauce as ns 94 from sklearn.ensemble import RandomForestClassifier 95 from sklearn.model_selection import train_test_split 96 from sklearn.datasets import load_digits 97 from time import time 98 99 digits = load_digits() 100 X = digits.data 101 y = digits.target 102 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, 103 random_state=123) 104 105 # layer 1 (base layer) ---- 106 layer1_regr = RandomForestClassifier(n_estimators=10, random_state=123) 107 108 start = time() 109 110 layer1_regr.fit(X_train, y_train) 111 112 # Accuracy in layer 1 113 print(layer1_regr.score(X_test, y_test)) 114 115 # layer 2 using layer 1 ---- 116 layer2_regr = ns.CustomClassifier(obj = layer1_regr, n_hidden_features=5, 117 direct_link=True, bias=True, 118 nodes_sim='uniform', activation_name='relu', 119 n_clusters=2, seed=123) 120 layer2_regr.fit(X_train, y_train) 121 122 # Accuracy in layer 2 123 print(layer2_regr.score(X_test, y_test)) 124 125 # layer 3 using layer 2 ---- 126 layer3_regr = ns.CustomClassifier(obj = layer2_regr, n_hidden_features=10, 127 direct_link=True, bias=True, dropout=0.7, 128 nodes_sim='uniform', activation_name='relu', 129 n_clusters=2, seed=123) 130 layer3_regr.fit(X_train, y_train) 131 132 # Accuracy in layer 3 133 print(layer3_regr.score(X_test, y_test)) 134 135 print(f"Elapsed {time() - start}") 136 ``` 137 138 """ 139 140 # construct the object ----- 141 _estimator_type = "classifier" 142 143 def __init__( 144 self, 145 obj, 146 n_hidden_features=5, 147 activation_name="relu", 148 a=0.01, 149 nodes_sim="sobol", 150 bias=True, 151 dropout=0, 152 direct_link=True, 153 n_clusters=2, 154 cluster_encode=True, 155 type_clust="kmeans", 156 type_scaling=("std", "std", "std"), 157 col_sample=1, 158 row_sample=1, 159 cv_calibration=2, 160 calibration_method="sigmoid", 161 seed=123, 162 backend="cpu", 163 ): 164 super().__init__( 165 obj=obj, 166 n_hidden_features=n_hidden_features, 167 activation_name=activation_name, 168 a=a, 169 nodes_sim=nodes_sim, 170 bias=bias, 171 dropout=dropout, 172 direct_link=direct_link, 173 n_clusters=n_clusters, 174 cluster_encode=cluster_encode, 175 type_clust=type_clust, 176 type_scaling=type_scaling, 177 col_sample=col_sample, 178 row_sample=row_sample, 179 seed=seed, 180 backend=backend, 181 ) 182 self.coef_ = None 183 self.intercept_ = None 184 self.type_fit = "classification" 185 self.cv_calibration = cv_calibration 186 self.calibration_method = calibration_method 187 188 def __sklearn_clone__(self): 189 """Create a clone of the estimator. 190 191 This is required for scikit-learn's calibration system to work properly. 192 """ 193 # Create a new instance with the same parameters 194 clone = CustomClassifier( 195 obj=self.obj, 196 n_hidden_features=self.n_hidden_features, 197 activation_name=self.activation_name, 198 a=self.a, 199 nodes_sim=self.nodes_sim, 200 bias=self.bias, 201 dropout=self.dropout, 202 direct_link=self.direct_link, 203 n_clusters=self.n_clusters, 204 cluster_encode=self.cluster_encode, 205 type_clust=self.type_clust, 206 type_scaling=self.type_scaling, 207 col_sample=self.col_sample, 208 row_sample=self.row_sample, 209 cv_calibration=self.cv_calibration, 210 calibration_method=self.calibration_method, 211 seed=self.seed, 212 backend=self.backend, 213 ) 214 return clone 215 216 def fit(self, X, y, sample_weight=None, **kwargs): 217 """Fit custom model to training data (X, y). 218 219 Parameters: 220 221 X: {array-like}, shape = [n_samples, n_features] 222 Training vectors, where n_samples is the number 223 of samples and n_features is the number of features. 224 225 y: array-like, shape = [n_samples] 226 Target values. 227 228 sample_weight: array-like, shape = [n_samples] 229 Sample weights. 230 231 **kwargs: additional parameters to be passed to 232 self.cook_training_set or self.obj.fit 233 234 Returns: 235 236 self: object 237 """ 238 239 if len(X.shape) == 1: 240 if isinstance(X, pd.DataFrame): 241 X = pd.DataFrame(X.values.reshape(1, -1), columns=X.columns) 242 else: 243 X = X.reshape(1, -1) 244 245 output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 246 self.classes_ = np.unique(y) 247 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 248 249 # Wrap in CalibratedClassifierCV if needed 250 if self.cv_calibration is not None: 251 self.obj = CalibratedClassifierCV( 252 self.obj, cv=self.cv_calibration, method=self.calibration_method 253 ) 254 255 # if sample_weights, else: (must use self.row_index) 256 if sample_weight is not None: 257 self.obj.fit( 258 scaled_Z, 259 output_y, 260 sample_weight=sample_weight[self.index_row_].ravel(), 261 **kwargs 262 ) 263 return self 264 265 # if sample_weight is None: 266 self.obj.fit(scaled_Z, output_y, **kwargs) 267 self.classes_ = np.unique(y) # for compatibility with sklearn 268 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 269 270 if hasattr(self.obj, "coef_"): 271 self.coef_ = self.obj.coef_ 272 273 if hasattr(self.obj, "intercept_"): 274 self.intercept_ = self.obj.intercept_ 275 276 return self 277 278 def partial_fit(self, X, y, sample_weight=None, **kwargs): 279 """Partial fit custom model to training data (X, y). 280 281 Parameters: 282 283 X: {array-like}, shape = [n_samples, n_features] 284 Subset of training vectors, where n_samples is the number 285 of samples and n_features is the number of features. 286 287 y: array-like, shape = [n_samples] 288 Subset of target values. 289 290 sample_weight: array-like, shape = [n_samples] 291 Sample weights. 292 293 **kwargs: additional parameters to be passed to 294 self.cook_training_set or self.obj.fit 295 296 Returns: 297 298 self: object 299 """ 300 301 if len(X.shape) == 1: 302 if isinstance(X, pd.DataFrame): 303 X = pd.DataFrame(X.values.reshape(1, -1), columns=X.columns) 304 else: 305 X = X.reshape(1, -1) 306 y = np.array([y], dtype=np.integer) 307 308 output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 309 self.n_classes_ = len(np.unique(y)) # for compatibility with sklearn 310 311 # if sample_weights, else: (must use self.row_index) 312 if sample_weight is not None: 313 try: 314 self.obj.partial_fit( 315 scaled_Z, 316 output_y, 317 sample_weight=sample_weight[self.index_row_].ravel(), 318 # **kwargs 319 ) 320 except: 321 NotImplementedError 322 323 return self 324 325 # if sample_weight is None: 326 # try: 327 self.obj.partial_fit(scaled_Z, output_y) 328 # except: 329 # raise NotImplementedError 330 331 self.classes_ = np.unique(y) # for compatibility with sklearn 332 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 333 334 return self 335 336 def predict(self, X, **kwargs): 337 """Predict test data X. 338 339 Parameters: 340 341 X: {array-like}, shape = [n_samples, n_features] 342 Training vectors, where n_samples is the number 343 of samples and n_features is the number of features. 344 345 **kwargs: additional parameters to be passed to 346 self.cook_test_set 347 348 Returns: 349 350 model predictions: {array-like} 351 """ 352 353 if len(X.shape) == 1: 354 n_features = X.shape[0] 355 new_X = mo.rbind( 356 X.reshape(1, n_features), 357 np.ones(n_features).reshape(1, n_features), 358 ) 359 360 return ( 361 self.obj.predict(self.cook_test_set(new_X, **kwargs), **kwargs) 362 )[0] 363 364 return self.obj.predict(self.cook_test_set(X, **kwargs), **kwargs) 365 366 def predict_proba(self, X, **kwargs): 367 """Predict probabilities for test data X. 368 369 Args: 370 371 X: {array-like}, shape = [n_samples, n_features] 372 Training vectors, where n_samples is the number 373 of samples and n_features is the number of features. 374 375 **kwargs: additional parameters to be passed to 376 self.cook_test_set 377 378 Returns: 379 380 probability estimates for test data: {array-like} 381 """ 382 383 if len(X.shape) == 1: 384 n_features = X.shape[0] 385 new_X = mo.rbind( 386 X.reshape(1, n_features), 387 np.ones(n_features).reshape(1, n_features), 388 ) 389 return ( 390 self.obj.predict_proba( 391 self.cook_test_set(new_X, **kwargs), **kwargs 392 ) 393 )[0] 394 return self.obj.predict_proba(self.cook_test_set(X, **kwargs), **kwargs) 395 396 def decision_function(self, X, **kwargs): 397 """Compute the decision function of X. 398 399 Parameters: 400 X: {array-like}, shape = [n_samples, n_features] 401 Samples to compute decision function for. 402 403 **kwargs: additional parameters to be passed to 404 self.cook_test_set 405 406 Returns: 407 array-like of shape (n_samples,) or (n_samples, n_classes) 408 Decision function of the input samples. The order of outputs is the same 409 as that of the classes passed to fit. 410 """ 411 if not hasattr(self.obj, "decision_function"): 412 # If base classifier doesn't have decision_function, use predict_proba 413 proba = self.predict_proba(X, **kwargs) 414 if proba.shape[1] == 2: 415 return proba[:, 1] # For binary classification 416 return proba # For multiclass 417 418 if len(X.shape) == 1: 419 n_features = X.shape[0] 420 new_X = mo.rbind( 421 X.reshape(1, n_features), 422 np.ones(n_features).reshape(1, n_features), 423 ) 424 425 return ( 426 self.obj.decision_function( 427 self.cook_test_set(new_X, **kwargs), **kwargs 428 ) 429 )[0] 430 431 return self.obj.decision_function( 432 self.cook_test_set(X, **kwargs), **kwargs 433 ) 434 435 def score(self, X, y, scoring=None): 436 """Scoring function for classification. 437 438 Args: 439 440 X: {array-like}, shape = [n_samples, n_features] 441 Training vectors, where n_samples is the number 442 of samples and n_features is the number of features. 443 444 y: array-like, shape = [n_samples] 445 Target values. 446 447 scoring: str 448 scoring method (default is accuracy) 449 450 Returns: 451 452 score: float 453 """ 454 455 if scoring is None: 456 scoring = "accuracy" 457 458 if scoring == "accuracy": 459 return skm2.accuracy_score(y, self.predict(X)) 460 461 if scoring == "f1": 462 return skm2.f1_score(y, self.predict(X)) 463 464 if scoring == "precision": 465 return skm2.precision_score(y, self.predict(X)) 466 467 if scoring == "recall": 468 return skm2.recall_score(y, self.predict(X)) 469 470 if scoring == "roc_auc": 471 return skm2.roc_auc_score(y, self.predict(X)) 472 473 if scoring == "log_loss": 474 return skm2.log_loss(y, self.predict_proba(X)) 475 476 if scoring == "balanced_accuracy": 477 return skm2.balanced_accuracy_score(y, self.predict(X)) 478 479 if scoring == "average_precision": 480 return skm2.average_precision_score(y, self.predict(X)) 481 482 if scoring == "neg_brier_score": 483 return -skm2.brier_score_loss(y, self.predict_proba(X)) 484 485 if scoring == "neg_log_loss": 486 return -skm2.log_loss(y, self.predict_proba(X)) 487 488 @property 489 def _estimator_type(self): 490 return "classifier"
Custom Classification model
Attributes:
obj: object
any object containing a method fit (obj.fit()) and a method predict
(obj.predict())
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not
(False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original predictors are included (True) in model''s
fitting or not (False)
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
col_sample: float
percentage of covariates randomly chosen for training
row_sample: float
percentage of rows chosen for training, by stratified bootstrapping
cv_calibration: int, cross-validation generator, or iterable, default=2
Determines the cross-validation splitting strategy. Same as
`sklearn.calibration.CalibratedClassifierCV`
calibration_method: str
{‘sigmoid’, ‘isotonic’}, default=’sigmoid’
The method to use for calibration. Same as
`sklearn.calibration.CalibratedClassifierCV`
seed: int
reproducibility seed for nodes_sim=='uniform'
backend: str
"cpu" or "gpu" or "tpu"
Examples:
Note: it's better to use the DeepClassifier
or LazyDeepClassifier
classes directly
import nnetsauce as ns
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_digits
from time import time
digits = load_digits()
X = digits.data
y = digits.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=123)
# layer 1 (base layer) ----
layer1_regr = RandomForestClassifier(n_estimators=10, random_state=123)
start = time()
layer1_regr.fit(X_train, y_train)
# Accuracy in layer 1
print(layer1_regr.score(X_test, y_test))
# layer 2 using layer 1 ----
layer2_regr = ns.CustomClassifier(obj = layer1_regr, n_hidden_features=5,
direct_link=True, bias=True,
nodes_sim='uniform', activation_name='relu',
n_clusters=2, seed=123)
layer2_regr.fit(X_train, y_train)
# Accuracy in layer 2
print(layer2_regr.score(X_test, y_test))
# layer 3 using layer 2 ----
layer3_regr = ns.CustomClassifier(obj = layer2_regr, n_hidden_features=10,
direct_link=True, bias=True, dropout=0.7,
nodes_sim='uniform', activation_name='relu',
n_clusters=2, seed=123)
layer3_regr.fit(X_train, y_train)
# Accuracy in layer 3
print(layer3_regr.score(X_test, y_test))
print(f"Elapsed {time() - start}")
216 def fit(self, X, y, sample_weight=None, **kwargs): 217 """Fit custom model to training data (X, y). 218 219 Parameters: 220 221 X: {array-like}, shape = [n_samples, n_features] 222 Training vectors, where n_samples is the number 223 of samples and n_features is the number of features. 224 225 y: array-like, shape = [n_samples] 226 Target values. 227 228 sample_weight: array-like, shape = [n_samples] 229 Sample weights. 230 231 **kwargs: additional parameters to be passed to 232 self.cook_training_set or self.obj.fit 233 234 Returns: 235 236 self: object 237 """ 238 239 if len(X.shape) == 1: 240 if isinstance(X, pd.DataFrame): 241 X = pd.DataFrame(X.values.reshape(1, -1), columns=X.columns) 242 else: 243 X = X.reshape(1, -1) 244 245 output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 246 self.classes_ = np.unique(y) 247 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 248 249 # Wrap in CalibratedClassifierCV if needed 250 if self.cv_calibration is not None: 251 self.obj = CalibratedClassifierCV( 252 self.obj, cv=self.cv_calibration, method=self.calibration_method 253 ) 254 255 # if sample_weights, else: (must use self.row_index) 256 if sample_weight is not None: 257 self.obj.fit( 258 scaled_Z, 259 output_y, 260 sample_weight=sample_weight[self.index_row_].ravel(), 261 **kwargs 262 ) 263 return self 264 265 # if sample_weight is None: 266 self.obj.fit(scaled_Z, output_y, **kwargs) 267 self.classes_ = np.unique(y) # for compatibility with sklearn 268 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 269 270 if hasattr(self.obj, "coef_"): 271 self.coef_ = self.obj.coef_ 272 273 if hasattr(self.obj, "intercept_"): 274 self.intercept_ = self.obj.intercept_ 275 276 return self
Fit custom model to training data (X, y).
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
sample_weight: array-like, shape = [n_samples]
Sample weights.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
336 def predict(self, X, **kwargs): 337 """Predict test data X. 338 339 Parameters: 340 341 X: {array-like}, shape = [n_samples, n_features] 342 Training vectors, where n_samples is the number 343 of samples and n_features is the number of features. 344 345 **kwargs: additional parameters to be passed to 346 self.cook_test_set 347 348 Returns: 349 350 model predictions: {array-like} 351 """ 352 353 if len(X.shape) == 1: 354 n_features = X.shape[0] 355 new_X = mo.rbind( 356 X.reshape(1, n_features), 357 np.ones(n_features).reshape(1, n_features), 358 ) 359 360 return ( 361 self.obj.predict(self.cook_test_set(new_X, **kwargs), **kwargs) 362 )[0] 363 364 return self.obj.predict(self.cook_test_set(X, **kwargs), **kwargs)
Predict test data X.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
366 def predict_proba(self, X, **kwargs): 367 """Predict probabilities for test data X. 368 369 Args: 370 371 X: {array-like}, shape = [n_samples, n_features] 372 Training vectors, where n_samples is the number 373 of samples and n_features is the number of features. 374 375 **kwargs: additional parameters to be passed to 376 self.cook_test_set 377 378 Returns: 379 380 probability estimates for test data: {array-like} 381 """ 382 383 if len(X.shape) == 1: 384 n_features = X.shape[0] 385 new_X = mo.rbind( 386 X.reshape(1, n_features), 387 np.ones(n_features).reshape(1, n_features), 388 ) 389 return ( 390 self.obj.predict_proba( 391 self.cook_test_set(new_X, **kwargs), **kwargs 392 ) 393 )[0] 394 return self.obj.predict_proba(self.cook_test_set(X, **kwargs), **kwargs)
Predict probabilities for test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
probability estimates for test data: {array-like}
435 def score(self, X, y, scoring=None): 436 """Scoring function for classification. 437 438 Args: 439 440 X: {array-like}, shape = [n_samples, n_features] 441 Training vectors, where n_samples is the number 442 of samples and n_features is the number of features. 443 444 y: array-like, shape = [n_samples] 445 Target values. 446 447 scoring: str 448 scoring method (default is accuracy) 449 450 Returns: 451 452 score: float 453 """ 454 455 if scoring is None: 456 scoring = "accuracy" 457 458 if scoring == "accuracy": 459 return skm2.accuracy_score(y, self.predict(X)) 460 461 if scoring == "f1": 462 return skm2.f1_score(y, self.predict(X)) 463 464 if scoring == "precision": 465 return skm2.precision_score(y, self.predict(X)) 466 467 if scoring == "recall": 468 return skm2.recall_score(y, self.predict(X)) 469 470 if scoring == "roc_auc": 471 return skm2.roc_auc_score(y, self.predict(X)) 472 473 if scoring == "log_loss": 474 return skm2.log_loss(y, self.predict_proba(X)) 475 476 if scoring == "balanced_accuracy": 477 return skm2.balanced_accuracy_score(y, self.predict(X)) 478 479 if scoring == "average_precision": 480 return skm2.average_precision_score(y, self.predict(X)) 481 482 if scoring == "neg_brier_score": 483 return -skm2.brier_score_loss(y, self.predict_proba(X)) 484 485 if scoring == "neg_log_loss": 486 return -skm2.log_loss(y, self.predict_proba(X))
Scoring function for classification.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
scoring: str
scoring method (default is accuracy)
Returns:
score: float
18class CustomRegressor(Custom, RegressorMixin): 19 """Custom Regression model 20 21 This class is used to 'augment' any regression model with transformed features. 22 23 Parameters: 24 25 obj: object 26 any object containing a method fit (obj.fit()) and a method predict 27 (obj.predict()) 28 29 n_hidden_features: int 30 number of nodes in the hidden layer 31 32 activation_name: str 33 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 34 35 a: float 36 hyperparameter for 'prelu' or 'elu' activation function 37 38 nodes_sim: str 39 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 40 'uniform' 41 42 bias: boolean 43 indicates if the hidden layer contains a bias term (True) or not 44 (False) 45 46 dropout: float 47 regularization parameter; (random) percentage of nodes dropped out 48 of the training 49 50 direct_link: boolean 51 indicates if the original predictors are included (True) in model's 52 fitting or not (False) 53 54 n_clusters: int 55 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: 56 no clustering) 57 58 cluster_encode: bool 59 defines how the variable containing clusters is treated (default is one-hot) 60 if `False`, then labels are used, without one-hot encoding 61 62 type_clust: str 63 type of clustering method: currently k-means ('kmeans') or Gaussian 64 Mixture Model ('gmm') 65 66 type_scaling: a tuple of 3 strings 67 scaling methods for inputs, hidden layer, and clustering respectively 68 (and when relevant). 69 Currently available: standardization ('std') or MinMax scaling ('minmax') 70 71 type_pi: str. 72 type of prediction interval; currently `None` (split or local 73 conformal without simulation), "kde" or "bootstrap" (simulated split 74 conformal). 75 76 replications: int. 77 number of replications (if needed) for predictive simulation. 78 Used only in `self.predict`, for `self.kernel` in ('gaussian', 79 'tophat') and `self.type_pi = 'kde'`. Default is `None`. 80 81 kernel: str. 82 the kernel to use for kernel density estimation (used for predictive 83 simulation in `self.predict`, with `method='splitconformal'` and 84 `type_pi = 'kde'`). Currently, either 'gaussian' or 'tophat'. 85 86 type_split: str. 87 Type of splitting for conformal prediction. None (default), or 88 "random" (random split of data) or "sequential" (sequential split of data) 89 90 col_sample: float 91 percentage of covariates randomly chosen for training 92 93 row_sample: float 94 percentage of rows chosen for training, by stratified bootstrapping 95 96 level: float 97 confidence level for prediction intervals 98 99 pi_method: str 100 method for prediction intervals: 'splitconformal' or 'localconformal' 101 102 seed: int 103 reproducibility seed for nodes_sim=='uniform' 104 105 type_fit: str 106 'regression' 107 108 backend: str 109 "cpu" or "gpu" or "tpu" 110 111 Examples: 112 113 See [https://thierrymoudiki.github.io/blog/2024/03/18/python/conformal-and-bayesian-regression](https://thierrymoudiki.github.io/blog/2024/03/18/python/conformal-and-bayesian-regression) 114 115 """ 116 117 # construct the object ----- 118 119 def __init__( 120 self, 121 obj, 122 n_hidden_features=5, 123 activation_name="relu", 124 a=0.01, 125 nodes_sim="sobol", 126 bias=True, 127 dropout=0, 128 direct_link=True, 129 n_clusters=2, 130 cluster_encode=True, 131 type_clust="kmeans", 132 type_scaling=("std", "std", "std"), 133 type_pi=None, 134 replications=None, 135 kernel=None, 136 type_split=None, 137 col_sample=1, 138 row_sample=1, 139 level=None, 140 pi_method=None, 141 seed=123, 142 backend="cpu", 143 ): 144 super().__init__( 145 obj=obj, 146 n_hidden_features=n_hidden_features, 147 activation_name=activation_name, 148 a=a, 149 nodes_sim=nodes_sim, 150 bias=bias, 151 dropout=dropout, 152 direct_link=direct_link, 153 n_clusters=n_clusters, 154 cluster_encode=cluster_encode, 155 type_clust=type_clust, 156 type_scaling=type_scaling, 157 col_sample=col_sample, 158 row_sample=row_sample, 159 seed=seed, 160 backend=backend, 161 ) 162 163 self.type_fit = "regression" 164 self.type_pi = type_pi 165 self.replications = replications 166 self.kernel = kernel 167 self.type_split = type_split 168 self.level = level 169 self.pi_method = pi_method 170 self.coef_ = None 171 self.intercept_ = None 172 self.X_ = None 173 self.y_ = None 174 self.aic_ = None 175 self.aicc_ = None 176 self.bic_ = None 177 178 def fit(self, X, y, sample_weight=None, **kwargs): 179 """Fit custom model to training data (X, y). 180 181 Parameters: 182 183 X: {array-like}, shape = [n_samples, n_features] 184 Training vectors, where n_samples is the number 185 of samples and n_features is the number of features. 186 187 y: array-like, shape = [n_samples] 188 Target values. 189 190 sample_weight: array-like, shape = [n_samples] 191 Sample weights. 192 193 **kwargs: additional parameters to be passed to 194 self.cook_training_set or self.obj.fit 195 196 Returns: 197 198 self: object 199 200 """ 201 202 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 203 204 if self.level is not None: 205 self.obj = PredictionInterval( 206 obj=self.obj, method=self.pi_method, level=self.level 207 ) 208 209 # if sample_weights, else: (must use self.row_index) 210 if sample_weight is not None: 211 self.obj.fit( 212 scaled_Z, 213 centered_y, 214 sample_weight=sample_weight[self.index_row_].ravel(), 215 **kwargs 216 ) 217 218 return self 219 220 self.obj.fit(scaled_Z, centered_y, **kwargs) 221 222 self.X_ = X 223 224 self.y_ = y 225 226 # Compute SSE 227 centered_y_pred = self.obj.predict(scaled_Z) 228 self.sse_ = np.sum((centered_y - centered_y_pred) ** 2) 229 230 # Get number of parameters 231 n_params = ( 232 self.n_hidden_features + X.shape[1] 233 ) # hidden features + original features 234 if self.n_clusters > 0: 235 n_params += self.n_clusters # add clusters if used 236 237 # Compute information criteria 238 n_samples = X.shape[0] 239 temp = n_samples * np.log(self.sse_ / n_samples) 240 self.aic_ = temp + 2 * n_params 241 self.bic_ = temp + np.log(n_samples) * n_params 242 243 if hasattr(self.obj, "coef_"): 244 self.coef_ = self.obj.coef_ 245 246 if hasattr(self.obj, "intercept_"): 247 self.intercept_ = self.obj.intercept_ 248 249 return self 250 251 def partial_fit(self, X, y, **kwargs): 252 """Partial fit custom model to training data (X, y). 253 254 Parameters: 255 256 X: {array-like}, shape = [n_samples, n_features] 257 Subset of training vectors, where n_samples is the number 258 of samples and n_features is the number of features. 259 260 y: array-like, shape = [n_samples] 261 Subset of target values. 262 263 **kwargs: additional parameters to be passed to 264 self.cook_training_set or self.obj.fit 265 266 Returns: 267 268 self: object 269 270 """ 271 272 if len(X.shape) == 1: 273 if isinstance(X, pd.DataFrame): 274 X = pd.DataFrame(X.values.reshape(1, -1), columns=X.columns) 275 else: 276 X = X.reshape(1, -1) 277 y = np.array([y]) 278 279 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 280 281 self.obj.partial_fit(scaled_Z, centered_y, **kwargs) 282 283 self.X_ = X 284 285 self.y_ = y 286 287 return self 288 289 def predict(self, X, level=95, method="splitconformal", **kwargs): 290 """Predict test data X. 291 292 Parameters: 293 294 X: {array-like}, shape = [n_samples, n_features] 295 Training vectors, where n_samples is the number 296 of samples and n_features is the number of features. 297 298 level: int 299 Level of confidence (default = 95) 300 301 method: str 302 'splitconformal', 'localconformal' 303 prediction (if you specify `return_pi = True`) 304 305 **kwargs: additional parameters 306 `return_pi = True` for conformal prediction, 307 with `method` in ('splitconformal', 'localconformal') 308 or `return_std = True` for `self.obj` in 309 (`sklearn.linear_model.BayesianRidge`, 310 `sklearn.linear_model.ARDRegressor`, 311 `sklearn.gaussian_process.GaussianProcessRegressor`)` 312 313 Returns: 314 315 model predictions: 316 an array if uncertainty quantification is not requested, 317 or a tuple if with prediction intervals and simulations 318 if `return_std = True` (mean, standard deviation, 319 lower and upper prediction interval) or `return_pi = True` 320 () 321 322 """ 323 324 if "return_std" in kwargs: 325 326 alpha = 100 - level 327 pi_multiplier = norm.ppf(1 - alpha / 200) 328 329 if len(X.shape) == 1: 330 331 n_features = X.shape[0] 332 new_X = mo.rbind( 333 X.reshape(1, n_features), 334 np.ones(n_features).reshape(1, n_features), 335 ) 336 337 mean_, std_ = self.obj.predict( 338 self.cook_test_set(new_X, **kwargs), return_std=True 339 )[0] 340 341 preds = self.y_mean_ + mean_ 342 lower = self.y_mean_ + (mean_ - pi_multiplier * std_) 343 upper = self.y_mean_ + (mean_ + pi_multiplier * std_) 344 345 DescribeResults = namedtuple( 346 "DescribeResults", ["mean", "std", "lower", "upper"] 347 ) 348 349 return DescribeResults(preds, std_, lower, upper) 350 351 # len(X.shape) > 1 352 mean_, std_ = self.obj.predict( 353 self.cook_test_set(X, **kwargs), return_std=True 354 ) 355 356 preds = self.y_mean_ + mean_ 357 lower = self.y_mean_ + (mean_ - pi_multiplier * std_) 358 upper = self.y_mean_ + (mean_ + pi_multiplier * std_) 359 360 DescribeResults = namedtuple( 361 "DescribeResults", ["mean", "std", "lower", "upper"] 362 ) 363 364 return DescribeResults(preds, std_, lower, upper) 365 366 if "return_pi" in kwargs: 367 assert method in ( 368 "splitconformal", 369 "localconformal", 370 ), "method must be in ('splitconformal', 'localconformal')" 371 self.pi = PredictionInterval( 372 obj=self, 373 method=method, 374 level=level, 375 type_pi=self.type_pi, 376 replications=self.replications, 377 kernel=self.kernel, 378 ) 379 380 if len(self.X_.shape) == 1: 381 if isinstance(X, pd.DataFrame): 382 self.X_ = pd.DataFrame( 383 self.X_.values.reshape(1, -1), columns=self.X_.columns 384 ) 385 else: 386 self.X_ = self.X_.reshape(1, -1) 387 self.y_ = np.array([self.y_]) 388 389 self.pi.fit(self.X_, self.y_) 390 # self.X_ = None # consumes memory to keep, dangerous to delete (side effect) 391 # self.y_ = None # consumes memory to keep, dangerous to delete (side effect) 392 preds = self.pi.predict(X, return_pi=True) 393 return preds 394 395 # "return_std" not in kwargs 396 if len(X.shape) == 1: 397 398 n_features = X.shape[0] 399 new_X = mo.rbind( 400 X.reshape(1, n_features), 401 np.ones(n_features).reshape(1, n_features), 402 ) 403 404 return ( 405 self.y_mean_ 406 + self.obj.predict( 407 self.cook_test_set(new_X, **kwargs), **kwargs 408 ) 409 )[0] 410 411 # len(X.shape) > 1 412 return self.y_mean_ + self.obj.predict( 413 self.cook_test_set(X, **kwargs), **kwargs 414 ) 415 416 def score(self, X, y, scoring=None): 417 """Compute the score of the model. 418 419 Parameters: 420 421 X: {array-like}, shape = [n_samples, n_features] 422 Training vectors, where n_samples is the number 423 of samples and n_features is the number of features. 424 425 y: array-like, shape = [n_samples] 426 Target values. 427 428 scoring: str 429 scoring method 430 431 Returns: 432 433 score: float 434 435 """ 436 437 if scoring is None: 438 return np.sqrt(np.mean((self.predict(X) - y) ** 2)) 439 440 return skm2.get_scorer(scoring)(self, X, y)
Custom Regression model
This class is used to 'augment' any regression model with transformed features.
Parameters:
obj: object
any object containing a method fit (obj.fit()) and a method predict
(obj.predict())
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not
(False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original predictors are included (True) in model's
fitting or not (False)
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
type_pi: str.
type of prediction interval; currently `None` (split or local
conformal without simulation), "kde" or "bootstrap" (simulated split
conformal).
replications: int.
number of replications (if needed) for predictive simulation.
Used only in `self.predict`, for `self.kernel` in ('gaussian',
'tophat') and `self.type_pi = 'kde'`. Default is `None`.
kernel: str.
the kernel to use for kernel density estimation (used for predictive
simulation in `self.predict`, with `method='splitconformal'` and
`type_pi = 'kde'`). Currently, either 'gaussian' or 'tophat'.
type_split: str.
Type of splitting for conformal prediction. None (default), or
"random" (random split of data) or "sequential" (sequential split of data)
col_sample: float
percentage of covariates randomly chosen for training
row_sample: float
percentage of rows chosen for training, by stratified bootstrapping
level: float
confidence level for prediction intervals
pi_method: str
method for prediction intervals: 'splitconformal' or 'localconformal'
seed: int
reproducibility seed for nodes_sim=='uniform'
type_fit: str
'regression'
backend: str
"cpu" or "gpu" or "tpu"
Examples:
See https://thierrymoudiki.github.io/blog/2024/03/18/python/conformal-and-bayesian-regression
178 def fit(self, X, y, sample_weight=None, **kwargs): 179 """Fit custom model to training data (X, y). 180 181 Parameters: 182 183 X: {array-like}, shape = [n_samples, n_features] 184 Training vectors, where n_samples is the number 185 of samples and n_features is the number of features. 186 187 y: array-like, shape = [n_samples] 188 Target values. 189 190 sample_weight: array-like, shape = [n_samples] 191 Sample weights. 192 193 **kwargs: additional parameters to be passed to 194 self.cook_training_set or self.obj.fit 195 196 Returns: 197 198 self: object 199 200 """ 201 202 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 203 204 if self.level is not None: 205 self.obj = PredictionInterval( 206 obj=self.obj, method=self.pi_method, level=self.level 207 ) 208 209 # if sample_weights, else: (must use self.row_index) 210 if sample_weight is not None: 211 self.obj.fit( 212 scaled_Z, 213 centered_y, 214 sample_weight=sample_weight[self.index_row_].ravel(), 215 **kwargs 216 ) 217 218 return self 219 220 self.obj.fit(scaled_Z, centered_y, **kwargs) 221 222 self.X_ = X 223 224 self.y_ = y 225 226 # Compute SSE 227 centered_y_pred = self.obj.predict(scaled_Z) 228 self.sse_ = np.sum((centered_y - centered_y_pred) ** 2) 229 230 # Get number of parameters 231 n_params = ( 232 self.n_hidden_features + X.shape[1] 233 ) # hidden features + original features 234 if self.n_clusters > 0: 235 n_params += self.n_clusters # add clusters if used 236 237 # Compute information criteria 238 n_samples = X.shape[0] 239 temp = n_samples * np.log(self.sse_ / n_samples) 240 self.aic_ = temp + 2 * n_params 241 self.bic_ = temp + np.log(n_samples) * n_params 242 243 if hasattr(self.obj, "coef_"): 244 self.coef_ = self.obj.coef_ 245 246 if hasattr(self.obj, "intercept_"): 247 self.intercept_ = self.obj.intercept_ 248 249 return self
Fit custom model to training data (X, y).
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
sample_weight: array-like, shape = [n_samples]
Sample weights.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
289 def predict(self, X, level=95, method="splitconformal", **kwargs): 290 """Predict test data X. 291 292 Parameters: 293 294 X: {array-like}, shape = [n_samples, n_features] 295 Training vectors, where n_samples is the number 296 of samples and n_features is the number of features. 297 298 level: int 299 Level of confidence (default = 95) 300 301 method: str 302 'splitconformal', 'localconformal' 303 prediction (if you specify `return_pi = True`) 304 305 **kwargs: additional parameters 306 `return_pi = True` for conformal prediction, 307 with `method` in ('splitconformal', 'localconformal') 308 or `return_std = True` for `self.obj` in 309 (`sklearn.linear_model.BayesianRidge`, 310 `sklearn.linear_model.ARDRegressor`, 311 `sklearn.gaussian_process.GaussianProcessRegressor`)` 312 313 Returns: 314 315 model predictions: 316 an array if uncertainty quantification is not requested, 317 or a tuple if with prediction intervals and simulations 318 if `return_std = True` (mean, standard deviation, 319 lower and upper prediction interval) or `return_pi = True` 320 () 321 322 """ 323 324 if "return_std" in kwargs: 325 326 alpha = 100 - level 327 pi_multiplier = norm.ppf(1 - alpha / 200) 328 329 if len(X.shape) == 1: 330 331 n_features = X.shape[0] 332 new_X = mo.rbind( 333 X.reshape(1, n_features), 334 np.ones(n_features).reshape(1, n_features), 335 ) 336 337 mean_, std_ = self.obj.predict( 338 self.cook_test_set(new_X, **kwargs), return_std=True 339 )[0] 340 341 preds = self.y_mean_ + mean_ 342 lower = self.y_mean_ + (mean_ - pi_multiplier * std_) 343 upper = self.y_mean_ + (mean_ + pi_multiplier * std_) 344 345 DescribeResults = namedtuple( 346 "DescribeResults", ["mean", "std", "lower", "upper"] 347 ) 348 349 return DescribeResults(preds, std_, lower, upper) 350 351 # len(X.shape) > 1 352 mean_, std_ = self.obj.predict( 353 self.cook_test_set(X, **kwargs), return_std=True 354 ) 355 356 preds = self.y_mean_ + mean_ 357 lower = self.y_mean_ + (mean_ - pi_multiplier * std_) 358 upper = self.y_mean_ + (mean_ + pi_multiplier * std_) 359 360 DescribeResults = namedtuple( 361 "DescribeResults", ["mean", "std", "lower", "upper"] 362 ) 363 364 return DescribeResults(preds, std_, lower, upper) 365 366 if "return_pi" in kwargs: 367 assert method in ( 368 "splitconformal", 369 "localconformal", 370 ), "method must be in ('splitconformal', 'localconformal')" 371 self.pi = PredictionInterval( 372 obj=self, 373 method=method, 374 level=level, 375 type_pi=self.type_pi, 376 replications=self.replications, 377 kernel=self.kernel, 378 ) 379 380 if len(self.X_.shape) == 1: 381 if isinstance(X, pd.DataFrame): 382 self.X_ = pd.DataFrame( 383 self.X_.values.reshape(1, -1), columns=self.X_.columns 384 ) 385 else: 386 self.X_ = self.X_.reshape(1, -1) 387 self.y_ = np.array([self.y_]) 388 389 self.pi.fit(self.X_, self.y_) 390 # self.X_ = None # consumes memory to keep, dangerous to delete (side effect) 391 # self.y_ = None # consumes memory to keep, dangerous to delete (side effect) 392 preds = self.pi.predict(X, return_pi=True) 393 return preds 394 395 # "return_std" not in kwargs 396 if len(X.shape) == 1: 397 398 n_features = X.shape[0] 399 new_X = mo.rbind( 400 X.reshape(1, n_features), 401 np.ones(n_features).reshape(1, n_features), 402 ) 403 404 return ( 405 self.y_mean_ 406 + self.obj.predict( 407 self.cook_test_set(new_X, **kwargs), **kwargs 408 ) 409 )[0] 410 411 # len(X.shape) > 1 412 return self.y_mean_ + self.obj.predict( 413 self.cook_test_set(X, **kwargs), **kwargs 414 )
Predict test data X.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
level: int
Level of confidence (default = 95)
method: str
'splitconformal', 'localconformal'
prediction (if you specify `return_pi = True`)
**kwargs: additional parameters
`return_pi = True` for conformal prediction,
with `method` in ('splitconformal', 'localconformal')
or `return_std = True` for `self.obj` in
(`sklearn.linear_model.BayesianRidge`,
`sklearn.linear_model.ARDRegressor`,
`sklearn.gaussian_process.GaussianProcessRegressor`)`
Returns:
model predictions:
an array if uncertainty quantification is not requested,
or a tuple if with prediction intervals and simulations
if `return_std = True` (mean, standard deviation,
lower and upper prediction interval) or `return_pi = True`
()
416 def score(self, X, y, scoring=None): 417 """Compute the score of the model. 418 419 Parameters: 420 421 X: {array-like}, shape = [n_samples, n_features] 422 Training vectors, where n_samples is the number 423 of samples and n_features is the number of features. 424 425 y: array-like, shape = [n_samples] 426 Target values. 427 428 scoring: str 429 scoring method 430 431 Returns: 432 433 score: float 434 435 """ 436 437 if scoring is None: 438 return np.sqrt(np.mean((self.predict(X) - y) ** 2)) 439 440 return skm2.get_scorer(scoring)(self, X, y)
Compute the score of the model.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
scoring: str
scoring method
Returns:
score: float
20class CustomBackPropRegressor(Custom, RegressorMixin): 21 """ 22 Finite difference trainer for nnetsauce models. 23 24 Parameters 25 ---------- 26 27 base_model : str 28 The name of the base model (e.g., 'RidgeCV'). 29 30 type_grad : {'finitediff', 'autodiff'}, optional 31 Type of gradient computation to use (default='finitediff'). 32 33 lr : float, optional 34 Learning rate for optimization (default=1e-4). 35 36 optimizer : {'gd', 'sgd', 'adam', 'cd'}, optional 37 Optimization algorithm: gradient descent ('gd'), stochastic gradient descent ('sgd'), 38 Adam ('adam'), or coordinate descent ('cd'). Default is 'gd'. 39 40 eps : float, optional 41 Scaling factor for adaptive finite difference step size (default=1e-3). 42 43 batch_size : int, optional 44 Batch size for 'sgd' optimizer (default=32). 45 46 alpha : float, optional 47 Elastic net penalty strength (default=0.0). 48 49 l1_ratio : float, optional 50 Elastic net mixing parameter (0 = Ridge, 1 = Lasso, default=0.0). 51 52 type_loss : {'mse', 'quantile'}, optional 53 Type of loss function to use (default='mse'). 54 55 q : float, optional 56 Quantile for quantile loss (default=0.5). 57 58 **kwargs 59 Additional parameters to pass to the scikit-learn model. 60 61 """ 62 63 def __init__( 64 self, 65 base_model, 66 type_grad="finitediff", 67 lr=1e-4, 68 optimizer="gd", 69 eps=1e-3, 70 batch_size=32, 71 alpha=0.0, 72 l1_ratio=0.0, 73 type_loss="mse", 74 q=0.5, 75 backend="cpu", 76 **kwargs, 77 ): 78 super().__init__(base_model, True, **kwargs) 79 self.base_model = base_model 80 self.custom_kwargs = kwargs 81 self.backend = backend 82 self.model = ns.CustomRegressor( 83 self.base_model, backend=self.backend, **self.custom_kwargs 84 ) 85 assert isinstance( 86 self.model, ns.CustomRegressor 87 ), "'model' must be of class ns.CustomRegressor" 88 self.type_grad = type_grad 89 self.lr = lr 90 self.optimizer = optimizer 91 self.eps = eps 92 self.loss_history_ = [] 93 self.opt_state = None 94 self.batch_size = batch_size # for SGD 95 self.loss_history_ = [] 96 self._cd_index = 0 # For coordinate descent 97 self.alpha = alpha 98 self.l1_ratio = l1_ratio 99 self.type_loss = type_loss 100 self.q = q 101 102 def _loss(self, X, y, **kwargs): 103 """ 104 Compute the loss (with elastic net penalty) for the current model. 105 106 Parameters 107 ---------- 108 109 X : array-like of shape (n_samples, n_features) 110 Input data. 111 112 y : array-like of shape (n_samples,) 113 Target values. 114 115 **kwargs 116 Additional keyword arguments for loss calculation. 117 118 Returns 119 ------- 120 float 121 The computed loss value. 122 """ 123 y_pred = self.model.predict(X) 124 if self.type_loss == "mse": 125 loss = np.mean((y - y_pred) ** 2) 126 elif self.type_loss == "quantile": 127 loss = mean_pinball_loss(y, y_pred, alpha=self.q, **kwargs) 128 W = self.model.W_ 129 l1 = np.sum(np.abs(W)) 130 l2 = np.sum(W**2) 131 return loss + self.alpha * ( 132 self.l1_ratio * l1 + 0.5 * (1 - self.l1_ratio) * l2 133 ) 134 135 def _compute_grad(self, X, y): 136 """ 137 Compute the gradient of the loss with respect to W_ using finite differences. 138 139 Parameters 140 ---------- 141 142 X : array-like of shape (n_samples, n_features) 143 Input data. 144 145 y : array-like of shape (n_samples,) 146 Target values. 147 148 Returns 149 ------- 150 151 ndarray 152 Gradient array with the same shape as W_. 153 """ 154 if self.type_grad == "autodiff": 155 raise NotImplementedError( 156 "Automatic differentiation is not implemented yet." 157 ) 158 # Use JAX for automatic differentiation 159 W = deepcopy(self.model.W_) 160 W_flat = W.flatten() 161 n_params = W_flat.size 162 163 def loss_fn(W_flat): 164 W_reshaped = W_flat.reshape(W.shape) 165 self.model.W_ = W_reshaped 166 return self._loss(X, y) 167 168 grad_fn = jax.grad(loss_fn) 169 grad_flat = grad_fn(W_flat) 170 grad = grad_flat.reshape(W.shape) 171 172 # Add elastic net gradient 173 l1_grad = self.alpha * self.l1_ratio * np.sign(W) 174 l2_grad = self.alpha * (1 - self.l1_ratio) * W 175 grad += l1_grad + l2_grad 176 177 self.model.W_ = W 178 return grad 179 180 # Finite difference gradient computation 181 W = deepcopy(self.model.W_) 182 shape = W.shape 183 W_flat = W.flatten() 184 n_params = W_flat.size 185 186 # Adaptive finite difference step 187 h_vec = self.eps * np.maximum(1.0, np.abs(W_flat)) 188 eye = np.eye(n_params) 189 190 loss_plus = np.zeros(n_params) 191 loss_minus = np.zeros(n_params) 192 193 for i in range(n_params): 194 h_i = h_vec[i] 195 Wp = W_flat.copy() 196 Wp[i] += h_i 197 Wm = W_flat.copy() 198 Wm[i] -= h_i 199 200 self.model.W_ = Wp.reshape(shape) 201 loss_plus[i] = self._loss(X, y) 202 203 self.model.W_ = Wm.reshape(shape) 204 loss_minus[i] = self._loss(X, y) 205 206 grad = ((loss_plus - loss_minus) / (2 * h_vec)).reshape(shape) 207 208 # Add elastic net gradient 209 l1_grad = self.alpha * self.l1_ratio * np.sign(W) 210 l2_grad = self.alpha * (1 - self.l1_ratio) * W 211 grad += l1_grad + l2_grad 212 213 self.model.W_ = W # restore original 214 return grad 215 216 def fit( 217 self, 218 X, 219 y, 220 epochs=10, 221 verbose=True, 222 show_progress=True, 223 sample_weight=None, 224 **kwargs, 225 ): 226 """ 227 Fit the model using finite difference optimization. 228 229 Parameters 230 ---------- 231 232 X : array-like of shape (n_samples, n_features) 233 Training data. 234 235 y : array-like of shape (n_samples,) 236 Target values. 237 238 epochs : int, optional 239 Number of optimization steps (default=10). 240 241 verbose : bool, optional 242 Whether to print progress messages (default=True). 243 244 show_progress : bool, optional 245 Whether to show tqdm progress bar (default=True). 246 247 sample_weight : array-like, optional 248 Sample weights. 249 250 **kwargs 251 Additional keyword arguments. 252 253 Returns 254 ------- 255 256 self : object 257 Returns self. 258 """ 259 260 self.model.fit(X, y) 261 262 iterator = tqdm(range(epochs)) if show_progress else range(epochs) 263 264 for epoch in iterator: 265 grad = self._compute_grad(X, y) 266 267 if self.optimizer == "gd": 268 self.model.W_ -= self.lr * grad 269 self.model.W_ = np.clip(self.model.W_, 0, 1) 270 # print("self.model.W_", self.model.W_) 271 272 elif self.optimizer == "sgd": 273 # Sample a mini-batch for stochastic gradient 274 n_samples = X.shape[0] 275 idxs = np.random.choice( 276 n_samples, self.batch_size, replace=False 277 ) 278 if isinstance(X, pd.DataFrame): 279 X_batch = X.iloc[idxs, :] 280 else: 281 X_batch = X[idxs, :] 282 y_batch = y[idxs] 283 grad = self._compute_grad(X_batch, y_batch) 284 285 self.model.W_ -= self.lr * grad 286 self.model.W_ = np.clip(self.model.W_, 0, 1) 287 288 elif self.optimizer == "adam": 289 if self.opt_state is None: 290 self.opt_state = { 291 "m": np.zeros_like(grad), 292 "v": np.zeros_like(grad), 293 "t": 0, 294 } 295 beta1, beta2, eps = 0.9, 0.999, 1e-8 296 self.opt_state["t"] += 1 297 self.opt_state["m"] = ( 298 beta1 * self.opt_state["m"] + (1 - beta1) * grad 299 ) 300 self.opt_state["v"] = beta2 * self.opt_state["v"] + ( 301 1 - beta2 302 ) * (grad**2) 303 m_hat = self.opt_state["m"] / (1 - beta1 ** self.opt_state["t"]) 304 v_hat = self.opt_state["v"] / (1 - beta2 ** self.opt_state["t"]) 305 306 self.model.W_ -= self.lr * m_hat / (np.sqrt(v_hat) + eps) 307 self.model.W_ = np.clip(self.model.W_, 0, 1) 308 # print("self.model.W_", self.model.W_) 309 310 elif self.optimizer == "cd": # coordinate descent 311 312 W_shape = self.model.W_.shape 313 W_flat_size = self.model.W_.size 314 W_flat = self.model.W_.flatten() 315 grad_flat = grad.flatten() 316 317 # Update only one coordinate per epoch (cyclic) 318 idx = self._cd_index % W_flat_size 319 W_flat[idx] -= self.lr * grad_flat[idx] 320 # Clip the updated value 321 W_flat[idx] = np.clip(W_flat[idx], 0, 1) 322 323 # Restore W_ 324 self.model.W_ = W_flat.reshape(W_shape) 325 326 self._cd_index += 1 327 328 else: 329 raise ValueError(f"Unsupported optimizer: {self.optimizer}") 330 331 loss = self._loss(X, y) 332 self.loss_history_.append(loss) 333 334 if verbose: 335 print(f"Epoch {epoch+1}: Loss = {loss:.6f}") 336 337 # if sample_weights, else: (must use self.row_index) 338 if sample_weight in kwargs: 339 self.model.fit( 340 X, 341 y, 342 sample_weight=sample_weight[self.index_row_].ravel(), 343 **kwargs, 344 ) 345 346 return self 347 348 return self 349 350 def predict(self, X, level=95, method="splitconformal", **kwargs): 351 """ 352 Predict using the trained model. 353 354 Parameters 355 ---------- 356 357 X : array-like of shape (n_samples, n_features) 358 Input data. 359 360 level : int, optional 361 Level of confidence for prediction intervals (default=95). 362 363 method : {'splitconformal', 'localconformal'}, optional 364 Method for conformal prediction (default='splitconformal'). 365 366 **kwargs 367 Additional keyword arguments. Use `return_pi=True` for prediction intervals, 368 or `return_std=True` for standard deviation estimates. 369 370 Returns 371 ------- 372 373 array or tuple 374 Model predictions, or a tuple with prediction intervals or standard deviations if requested. 375 """ 376 if "return_std" in kwargs: 377 378 alpha = 100 - level 379 pi_multiplier = norm.ppf(1 - alpha / 200) 380 381 if len(X.shape) == 1: 382 383 n_features = X.shape[0] 384 new_X = mo.rbind( 385 X.reshape(1, n_features), 386 np.ones(n_features).reshape(1, n_features), 387 ) 388 389 mean_, std_ = self.model.predict(new_X, return_std=True)[0] 390 391 preds = mean_ 392 lower = mean_ - pi_multiplier * std_ 393 upper = mean_ + pi_multiplier * std_ 394 395 DescribeResults = namedtuple( 396 "DescribeResults", ["mean", "std", "lower", "upper"] 397 ) 398 399 return DescribeResults(preds, std_, lower, upper) 400 401 # len(X.shape) > 1 402 mean_, std_ = self.model.predict(X, return_std=True) 403 404 preds = mean_ 405 lower = mean_ - pi_multiplier * std_ 406 upper = mean_ + pi_multiplier * std_ 407 408 DescribeResults = namedtuple( 409 "DescribeResults", ["mean", "std", "lower", "upper"] 410 ) 411 412 return DescribeResults(preds, std_, lower, upper) 413 414 if "return_pi" in kwargs: 415 assert method in ( 416 "splitconformal", 417 "localconformal", 418 ), "method must be in ('splitconformal', 'localconformal')" 419 self.pi = ns.PredictionInterval( 420 obj=self, 421 method=method, 422 level=level, 423 type_pi=self.type_pi, 424 replications=self.replications, 425 kernel=self.kernel, 426 ) 427 428 if len(self.X_.shape) == 1: 429 if isinstance(X, pd.DataFrame): 430 self.X_ = pd.DataFrame( 431 self.X_.values.reshape(1, -1), columns=self.X_.columns 432 ) 433 else: 434 self.X_ = self.X_.reshape(1, -1) 435 self.y_ = np.array([self.y_]) 436 437 self.pi.fit(self.X_, self.y_) 438 # self.X_ = None # consumes memory to keep, dangerous to delete (side effect) 439 # self.y_ = None # consumes memory to keep, dangerous to delete (side effect) 440 preds = self.pi.predict(X, return_pi=True) 441 return preds 442 443 # "return_std" not in kwargs 444 if len(X.shape) == 1: 445 446 n_features = X.shape[0] 447 new_X = mo.rbind( 448 X.reshape(1, n_features), 449 np.ones(n_features).reshape(1, n_features), 450 ) 451 452 return (0 + self.model.predict(new_X, **kwargs))[0] 453 454 # len(X.shape) > 1 455 return self.model.predict(X, **kwargs)
Finite difference trainer for nnetsauce models.
Parameters
base_model : str The name of the base model (e.g., 'RidgeCV').
type_grad : {'finitediff', 'autodiff'}, optional Type of gradient computation to use (default='finitediff').
lr : float, optional Learning rate for optimization (default=1e-4).
optimizer : {'gd', 'sgd', 'adam', 'cd'}, optional Optimization algorithm: gradient descent ('gd'), stochastic gradient descent ('sgd'), Adam ('adam'), or coordinate descent ('cd'). Default is 'gd'.
eps : float, optional Scaling factor for adaptive finite difference step size (default=1e-3).
batch_size : int, optional Batch size for 'sgd' optimizer (default=32).
alpha : float, optional Elastic net penalty strength (default=0.0).
l1_ratio : float, optional Elastic net mixing parameter (0 = Ridge, 1 = Lasso, default=0.0).
type_loss : {'mse', 'quantile'}, optional Type of loss function to use (default='mse').
q : float, optional Quantile for quantile loss (default=0.5).
**kwargs Additional parameters to pass to the scikit-learn model.
216 def fit( 217 self, 218 X, 219 y, 220 epochs=10, 221 verbose=True, 222 show_progress=True, 223 sample_weight=None, 224 **kwargs, 225 ): 226 """ 227 Fit the model using finite difference optimization. 228 229 Parameters 230 ---------- 231 232 X : array-like of shape (n_samples, n_features) 233 Training data. 234 235 y : array-like of shape (n_samples,) 236 Target values. 237 238 epochs : int, optional 239 Number of optimization steps (default=10). 240 241 verbose : bool, optional 242 Whether to print progress messages (default=True). 243 244 show_progress : bool, optional 245 Whether to show tqdm progress bar (default=True). 246 247 sample_weight : array-like, optional 248 Sample weights. 249 250 **kwargs 251 Additional keyword arguments. 252 253 Returns 254 ------- 255 256 self : object 257 Returns self. 258 """ 259 260 self.model.fit(X, y) 261 262 iterator = tqdm(range(epochs)) if show_progress else range(epochs) 263 264 for epoch in iterator: 265 grad = self._compute_grad(X, y) 266 267 if self.optimizer == "gd": 268 self.model.W_ -= self.lr * grad 269 self.model.W_ = np.clip(self.model.W_, 0, 1) 270 # print("self.model.W_", self.model.W_) 271 272 elif self.optimizer == "sgd": 273 # Sample a mini-batch for stochastic gradient 274 n_samples = X.shape[0] 275 idxs = np.random.choice( 276 n_samples, self.batch_size, replace=False 277 ) 278 if isinstance(X, pd.DataFrame): 279 X_batch = X.iloc[idxs, :] 280 else: 281 X_batch = X[idxs, :] 282 y_batch = y[idxs] 283 grad = self._compute_grad(X_batch, y_batch) 284 285 self.model.W_ -= self.lr * grad 286 self.model.W_ = np.clip(self.model.W_, 0, 1) 287 288 elif self.optimizer == "adam": 289 if self.opt_state is None: 290 self.opt_state = { 291 "m": np.zeros_like(grad), 292 "v": np.zeros_like(grad), 293 "t": 0, 294 } 295 beta1, beta2, eps = 0.9, 0.999, 1e-8 296 self.opt_state["t"] += 1 297 self.opt_state["m"] = ( 298 beta1 * self.opt_state["m"] + (1 - beta1) * grad 299 ) 300 self.opt_state["v"] = beta2 * self.opt_state["v"] + ( 301 1 - beta2 302 ) * (grad**2) 303 m_hat = self.opt_state["m"] / (1 - beta1 ** self.opt_state["t"]) 304 v_hat = self.opt_state["v"] / (1 - beta2 ** self.opt_state["t"]) 305 306 self.model.W_ -= self.lr * m_hat / (np.sqrt(v_hat) + eps) 307 self.model.W_ = np.clip(self.model.W_, 0, 1) 308 # print("self.model.W_", self.model.W_) 309 310 elif self.optimizer == "cd": # coordinate descent 311 312 W_shape = self.model.W_.shape 313 W_flat_size = self.model.W_.size 314 W_flat = self.model.W_.flatten() 315 grad_flat = grad.flatten() 316 317 # Update only one coordinate per epoch (cyclic) 318 idx = self._cd_index % W_flat_size 319 W_flat[idx] -= self.lr * grad_flat[idx] 320 # Clip the updated value 321 W_flat[idx] = np.clip(W_flat[idx], 0, 1) 322 323 # Restore W_ 324 self.model.W_ = W_flat.reshape(W_shape) 325 326 self._cd_index += 1 327 328 else: 329 raise ValueError(f"Unsupported optimizer: {self.optimizer}") 330 331 loss = self._loss(X, y) 332 self.loss_history_.append(loss) 333 334 if verbose: 335 print(f"Epoch {epoch+1}: Loss = {loss:.6f}") 336 337 # if sample_weights, else: (must use self.row_index) 338 if sample_weight in kwargs: 339 self.model.fit( 340 X, 341 y, 342 sample_weight=sample_weight[self.index_row_].ravel(), 343 **kwargs, 344 ) 345 346 return self 347 348 return self
Fit the model using finite difference optimization.
Parameters
X : array-like of shape (n_samples, n_features) Training data.
y : array-like of shape (n_samples,) Target values.
epochs : int, optional Number of optimization steps (default=10).
verbose : bool, optional Whether to print progress messages (default=True).
show_progress : bool, optional Whether to show tqdm progress bar (default=True).
sample_weight : array-like, optional Sample weights.
**kwargs Additional keyword arguments.
Returns
self : object Returns self.
350 def predict(self, X, level=95, method="splitconformal", **kwargs): 351 """ 352 Predict using the trained model. 353 354 Parameters 355 ---------- 356 357 X : array-like of shape (n_samples, n_features) 358 Input data. 359 360 level : int, optional 361 Level of confidence for prediction intervals (default=95). 362 363 method : {'splitconformal', 'localconformal'}, optional 364 Method for conformal prediction (default='splitconformal'). 365 366 **kwargs 367 Additional keyword arguments. Use `return_pi=True` for prediction intervals, 368 or `return_std=True` for standard deviation estimates. 369 370 Returns 371 ------- 372 373 array or tuple 374 Model predictions, or a tuple with prediction intervals or standard deviations if requested. 375 """ 376 if "return_std" in kwargs: 377 378 alpha = 100 - level 379 pi_multiplier = norm.ppf(1 - alpha / 200) 380 381 if len(X.shape) == 1: 382 383 n_features = X.shape[0] 384 new_X = mo.rbind( 385 X.reshape(1, n_features), 386 np.ones(n_features).reshape(1, n_features), 387 ) 388 389 mean_, std_ = self.model.predict(new_X, return_std=True)[0] 390 391 preds = mean_ 392 lower = mean_ - pi_multiplier * std_ 393 upper = mean_ + pi_multiplier * std_ 394 395 DescribeResults = namedtuple( 396 "DescribeResults", ["mean", "std", "lower", "upper"] 397 ) 398 399 return DescribeResults(preds, std_, lower, upper) 400 401 # len(X.shape) > 1 402 mean_, std_ = self.model.predict(X, return_std=True) 403 404 preds = mean_ 405 lower = mean_ - pi_multiplier * std_ 406 upper = mean_ + pi_multiplier * std_ 407 408 DescribeResults = namedtuple( 409 "DescribeResults", ["mean", "std", "lower", "upper"] 410 ) 411 412 return DescribeResults(preds, std_, lower, upper) 413 414 if "return_pi" in kwargs: 415 assert method in ( 416 "splitconformal", 417 "localconformal", 418 ), "method must be in ('splitconformal', 'localconformal')" 419 self.pi = ns.PredictionInterval( 420 obj=self, 421 method=method, 422 level=level, 423 type_pi=self.type_pi, 424 replications=self.replications, 425 kernel=self.kernel, 426 ) 427 428 if len(self.X_.shape) == 1: 429 if isinstance(X, pd.DataFrame): 430 self.X_ = pd.DataFrame( 431 self.X_.values.reshape(1, -1), columns=self.X_.columns 432 ) 433 else: 434 self.X_ = self.X_.reshape(1, -1) 435 self.y_ = np.array([self.y_]) 436 437 self.pi.fit(self.X_, self.y_) 438 # self.X_ = None # consumes memory to keep, dangerous to delete (side effect) 439 # self.y_ = None # consumes memory to keep, dangerous to delete (side effect) 440 preds = self.pi.predict(X, return_pi=True) 441 return preds 442 443 # "return_std" not in kwargs 444 if len(X.shape) == 1: 445 446 n_features = X.shape[0] 447 new_X = mo.rbind( 448 X.reshape(1, n_features), 449 np.ones(n_features).reshape(1, n_features), 450 ) 451 452 return (0 + self.model.predict(new_X, **kwargs))[0] 453 454 # len(X.shape) > 1 455 return self.model.predict(X, **kwargs)
Predict using the trained model.
Parameters
X : array-like of shape (n_samples, n_features) Input data.
level : int, optional Level of confidence for prediction intervals (default=95).
method : {'splitconformal', 'localconformal'}, optional Method for conformal prediction (default='splitconformal').
**kwargs
Additional keyword arguments. Use return_pi=True
for prediction intervals,
or return_std=True
for standard deviation estimates.
Returns
array or tuple Model predictions, or a tuple with prediction intervals or standard deviations if requested.
36class DeepClassifier(CustomClassifier, ClassifierMixin): 37 """ 38 Deep Classifier 39 40 Parameters: 41 42 obj: an object 43 A base learner, see also https://www.researchgate.net/publication/380701207_Deep_Quasi-Randomized_neural_Networks_for_classification 44 45 n_layers: int (default=3) 46 Number of layers. `n_layers = 1` is a simple `CustomClassifier` 47 48 verbose : int, optional (default=0) 49 Monitor progress when fitting. 50 51 All the other parameters are nnetsauce `CustomClassifier`'s 52 53 Examples: 54 55 ```python 56 import nnetsauce as ns 57 from sklearn.datasets import load_breast_cancer 58 from sklearn.model_selection import train_test_split 59 from sklearn.linear_model import LogisticRegressionCV 60 data = load_breast_cancer() 61 X = data.data 62 y= data.target 63 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=123) 64 obj = LogisticRegressionCV() 65 clf = ns.DeepClassifier(obj) 66 clf.fit(X_train, y_train) 67 print(clf.score(clf.predict(X_test), y_test)) 68 ``` 69 """ 70 71 _estimator_type = "classifier" 72 73 def __init__( 74 self, 75 obj, 76 # Defining depth 77 n_layers=3, 78 verbose=0, 79 # CustomClassifier attributes 80 n_hidden_features=5, 81 activation_name="relu", 82 a=0.01, 83 nodes_sim="sobol", 84 bias=True, 85 dropout=0, 86 direct_link=True, 87 n_clusters=2, 88 cluster_encode=True, 89 type_clust="kmeans", 90 type_scaling=("std", "std", "std"), 91 col_sample=1, 92 row_sample=1, 93 cv_calibration=2, 94 calibration_method="sigmoid", 95 seed=123, 96 backend="cpu", 97 ): 98 super().__init__( 99 obj=obj, 100 n_hidden_features=n_hidden_features, 101 activation_name=activation_name, 102 a=a, 103 nodes_sim=nodes_sim, 104 bias=bias, 105 dropout=dropout, 106 direct_link=direct_link, 107 n_clusters=n_clusters, 108 cluster_encode=cluster_encode, 109 type_clust=type_clust, 110 type_scaling=type_scaling, 111 col_sample=col_sample, 112 row_sample=row_sample, 113 seed=seed, 114 backend=backend, 115 ) 116 self.coef_ = None 117 self.intercept_ = None 118 self.type_fit = "classification" 119 self.cv_calibration = cv_calibration 120 self.calibration_method = calibration_method 121 122 # Only wrap in CalibratedClassifierCV if not already wrapped 123 # if not isinstance(obj, CalibratedClassifierCV): 124 # self.obj = CalibratedClassifierCV( 125 # self.obj, 126 # cv=self.cv_calibration, 127 # method=self.calibration_method 128 # ) 129 # else: 130 self.coef_ = None 131 self.intercept_ = None 132 self.type_fit = "classification" 133 self.cv_calibration = cv_calibration 134 self.calibration_method = calibration_method 135 self.obj = obj 136 137 assert n_layers >= 1, "must have n_layers >= 1" 138 self.stacked_obj = obj 139 self.verbose = verbose 140 self.n_layers = n_layers 141 self.classes_ = None 142 self.n_classes_ = None 143 144 def fit(self, X, y, **kwargs): 145 """Fit Classification algorithms to X and y. 146 Parameters 147 ---------- 148 X : array-like, 149 Training vectors, where rows is the number of samples 150 and columns is the number of features. 151 y : array-like, 152 Training vectors, where rows is the number of samples 153 and columns is the number of features. 154 **kwargs: dict 155 Additional parameters to be passed to the fit method 156 of the base learner. For example, `sample_weight`. 157 158 Returns 159 ------- 160 A fitted object 161 """ 162 163 self.classes_ = np.unique(y) 164 self.n_classes_ = len( 165 self.classes_ 166 ) # for compatibility with scikit-learn 167 168 if isinstance(X, np.ndarray): 169 X = pd.DataFrame(X) 170 171 # init layer 172 self.stacked_obj = CustomClassifier( 173 obj=self.stacked_obj, 174 n_hidden_features=self.n_hidden_features, 175 activation_name=self.activation_name, 176 a=self.a, 177 nodes_sim=self.nodes_sim, 178 bias=self.bias, 179 dropout=self.dropout, 180 direct_link=self.direct_link, 181 n_clusters=self.n_clusters, 182 cluster_encode=self.cluster_encode, 183 type_clust=self.type_clust, 184 type_scaling=self.type_scaling, 185 col_sample=self.col_sample, 186 row_sample=self.row_sample, 187 cv_calibration=None, 188 calibration_method=None, 189 seed=self.seed, 190 backend=self.backend, 191 ) 192 193 if self.verbose > 0: 194 iterator = tqdm(range(self.n_layers - 1)) 195 else: 196 iterator = range(self.n_layers - 1) 197 198 for _ in iterator: 199 self.stacked_obj = deepcopy( 200 CustomClassifier( 201 obj=self.stacked_obj, 202 n_hidden_features=self.n_hidden_features, 203 activation_name=self.activation_name, 204 a=self.a, 205 nodes_sim=self.nodes_sim, 206 bias=self.bias, 207 dropout=self.dropout, 208 direct_link=self.direct_link, 209 n_clusters=self.n_clusters, 210 cluster_encode=self.cluster_encode, 211 type_clust=self.type_clust, 212 type_scaling=self.type_scaling, 213 col_sample=self.col_sample, 214 row_sample=self.row_sample, 215 cv_calibration=None, 216 calibration_method=None, 217 seed=self.seed, 218 backend=self.backend, 219 ) 220 ) 221 self.stacked_obj.fit(X, y, **kwargs) 222 223 return self 224 225 def partial_fit(self, X, y, **kwargs): 226 """Fit Regression algorithms to X and y. 227 Parameters 228 ---------- 229 X : array-like, 230 Training vectors, where rows is the number of samples 231 and columns is the number of features. 232 y : array-like, 233 Training vectors, where rows is the number of samples 234 and columns is the number of features. 235 **kwargs: dict 236 Additional parameters to be passed to the fit method 237 of the base learner. For example, `sample_weight`. 238 Returns 239 ------- 240 A fitted object 241 """ 242 assert hasattr(self, "stacked_obj"), "model must be fitted first" 243 current_obj = self.stacked_obj 244 for _ in range(self.n_layers): 245 try: 246 input_X = current_obj.obj.cook_test_set(X) 247 current_obj.obj.partial_fit(input_X, y, **kwargs) 248 try: 249 current_obj = current_obj.obj 250 except AttributeError: 251 pass 252 except ValueError: 253 pass 254 return self 255 256 def predict(self, X): 257 return self.stacked_obj.predict(X) 258 259 def predict_proba(self, X): 260 return self.stacked_obj.predict_proba(X) 261 262 def score(self, X, y, scoring=None): 263 return self.stacked_obj.score(X, y, scoring) 264 265 def cross_val_optim( 266 self, 267 X_train, 268 y_train, 269 X_test=None, 270 y_test=None, 271 scoring="accuracy", 272 surrogate_obj=None, 273 cv=5, 274 n_jobs=None, 275 n_init=10, 276 n_iter=190, 277 abs_tol=1e-3, 278 verbose=2, 279 seed=123, 280 **kwargs, 281 ): 282 """Cross-validation function and hyperparameters' search 283 284 Parameters: 285 286 X_train: array-like, 287 Training vectors, where rows is the number of samples 288 and columns is the number of features. 289 290 y_train: array-like, 291 Training vectors, where rows is the number of samples 292 and columns is the number of features. 293 294 X_test: array-like, 295 Testing vectors, where rows is the number of samples 296 and columns is the number of features. 297 298 y_test: array-like, 299 Testing vectors, where rows is the number of samples 300 and columns is the number of features. 301 302 scoring: str 303 scoring metric; see https://scikit-learn.org/stable/modules/model_evaluation.html#the-scoring-parameter-defining-model-evaluation-rules 304 305 surrogate_obj: an object; 306 An ML model for estimating the uncertainty around the objective function 307 308 cv: int; 309 number of cross-validation folds 310 311 n_jobs: int; 312 number of jobs for parallel execution 313 314 n_init: an integer; 315 number of points in the initial setting, when `x_init` and `y_init` are not provided 316 317 n_iter: an integer; 318 number of iterations of the minimization algorithm 319 320 abs_tol: a float; 321 tolerance for convergence of the optimizer (early stopping based on acquisition function) 322 323 verbose: int 324 controls verbosity 325 326 seed: int 327 reproducibility seed 328 329 **kwargs: dict 330 additional parameters to be passed to the estimator 331 332 Examples: 333 334 ```python 335 ``` 336 """ 337 338 num_to_activation_name = {1: "relu", 2: "sigmoid", 3: "tanh"} 339 num_to_nodes_sim = {1: "sobol", 2: "uniform", 3: "hammersley"} 340 num_to_type_clust = {1: "kmeans", 2: "gmm"} 341 342 def deepclassifier_cv( 343 X_train, 344 y_train, 345 # Defining depth 346 n_layers=3, 347 # CustomClassifier attributes 348 n_hidden_features=5, 349 activation_name="relu", 350 nodes_sim="sobol", 351 dropout=0, 352 n_clusters=2, 353 type_clust="kmeans", 354 cv=5, 355 n_jobs=None, 356 scoring="accuracy", 357 seed=123, 358 ): 359 self.set_params( 360 **{ 361 "n_layers": n_layers, 362 # CustomClassifier attributes 363 "n_hidden_features": n_hidden_features, 364 "activation_name": activation_name, 365 "nodes_sim": nodes_sim, 366 "dropout": dropout, 367 "n_clusters": n_clusters, 368 "type_clust": type_clust, 369 **kwargs, 370 } 371 ) 372 return -cross_val_score( 373 estimator=self, 374 X=X_train, 375 y=y_train, 376 scoring=scoring, 377 cv=cv, 378 n_jobs=n_jobs, 379 verbose=0, 380 ).mean() 381 382 # objective function for hyperparams tuning 383 def crossval_objective(xx): 384 return deepclassifier_cv( 385 X_train=X_train, 386 y_train=y_train, 387 # Defining depth 388 n_layers=int(np.ceil(xx[0])), 389 # CustomClassifier attributes 390 n_hidden_features=int(np.ceil(xx[1])), 391 activation_name=num_to_activation_name[np.ceil(xx[2])], 392 nodes_sim=num_to_nodes_sim[int(np.ceil(xx[3]))], 393 dropout=xx[4], 394 n_clusters=int(np.ceil(xx[5])), 395 type_clust=num_to_type_clust[int(np.ceil(xx[6]))], 396 cv=cv, 397 n_jobs=n_jobs, 398 scoring=scoring, 399 seed=seed, 400 ) 401 402 if surrogate_obj is None: 403 gp_opt = gp.GPOpt( 404 objective_func=crossval_objective, 405 lower_bound=np.array([0, 3, 0, 0, 0.0, 0, 0]), 406 upper_bound=np.array([5, 100, 3, 3, 0.4, 5, 2]), 407 params_names=[ 408 "n_layers", 409 # CustomClassifier attributes 410 "n_hidden_features", 411 "activation_name", 412 "nodes_sim", 413 "dropout", 414 "n_clusters", 415 "type_clust", 416 ], 417 method="bayesian", 418 n_init=n_init, 419 n_iter=n_iter, 420 seed=seed, 421 ) 422 else: 423 gp_opt = gp.GPOpt( 424 objective_func=crossval_objective, 425 lower_bound=np.array([0, 3, 0, 0, 0.0, 0, 0]), 426 upper_bound=np.array([5, 100, 3, 3, 0.4, 5, 2]), 427 params_names=[ 428 "n_layers", 429 # CustomClassifier attributes 430 "n_hidden_features", 431 "activation_name", 432 "nodes_sim", 433 "dropout", 434 "n_clusters", 435 "type_clust", 436 ], 437 acquisition="ucb", 438 method="splitconformal", 439 surrogate_obj=ns.PredictionInterval( 440 obj=surrogate_obj, method="splitconformal" 441 ), 442 n_init=n_init, 443 n_iter=n_iter, 444 seed=seed, 445 ) 446 447 res = gp_opt.optimize(verbose=verbose, abs_tol=abs_tol) 448 res.best_params["n_layers"] = int(np.ceil(res.best_params["n_layers"])) 449 res.best_params["n_hidden_features"] = int( 450 np.ceil(res.best_params["n_hidden_features"]) 451 ) 452 res.best_params["activation_name"] = num_to_activation_name[ 453 np.ceil(res.best_params["activation_name"]) 454 ] 455 res.best_params["nodes_sim"] = num_to_nodes_sim[ 456 int(np.ceil(res.best_params["nodes_sim"])) 457 ] 458 res.best_params["dropout"] = res.best_params["dropout"] 459 res.best_params["n_clusters"] = int( 460 np.ceil(res.best_params["n_clusters"]) 461 ) 462 res.best_params["type_clust"] = num_to_type_clust[ 463 int(np.ceil(res.best_params["type_clust"])) 464 ] 465 466 # out-of-sample error 467 if X_test is not None and y_test is not None: 468 self.set_params(**res.best_params, verbose=0, seed=seed) 469 preds = self.fit(X_train, y_train).predict(X_test) 470 # check error on y_test 471 oos_err = getattr(metrics, scoring + "_score")( 472 y_true=y_test, y_pred=preds 473 ) 474 result = namedtuple("result", res._fields + ("test_" + scoring,)) 475 return result(*res, oos_err) 476 else: 477 return res 478 479 def lazy_cross_val_optim( 480 self, 481 X_train, 482 y_train, 483 X_test=None, 484 y_test=None, 485 scoring="accuracy", 486 surrogate_objs=None, 487 customize=False, 488 cv=5, 489 n_jobs=None, 490 n_init=10, 491 n_iter=190, 492 abs_tol=1e-3, 493 verbose=1, 494 seed=123, 495 ): 496 """Automated Cross-validation function and hyperparameters' search using multiple surrogates 497 498 Parameters: 499 500 X_train: array-like, 501 Training vectors, where rows is the number of samples 502 and columns is the number of features. 503 504 y_train: array-like, 505 Training vectors, where rows is the number of samples 506 and columns is the number of features. 507 508 X_test: array-like, 509 Testing vectors, where rows is the number of samples 510 and columns is the number of features. 511 512 y_test: array-like, 513 Testing vectors, where rows is the number of samples 514 and columns is the number of features. 515 516 scoring: str 517 scoring metric; see https://scikit-learn.org/stable/modules/model_evaluation.html#the-scoring-parameter-defining-model-evaluation-rules 518 519 surrogate_objs: object names as a list of strings; 520 ML models for estimating the uncertainty around the objective function 521 522 customize: boolean 523 if True, the surrogate is transformed into a quasi-randomized network (default is False) 524 525 cv: int; 526 number of cross-validation folds 527 528 n_jobs: int; 529 number of jobs for parallel execution 530 531 n_init: an integer; 532 number of points in the initial setting, when `x_init` and `y_init` are not provided 533 534 n_iter: an integer; 535 number of iterations of the minimization algorithm 536 537 abs_tol: a float; 538 tolerance for convergence of the optimizer (early stopping based on acquisition function) 539 540 verbose: int 541 controls verbosity 542 543 seed: int 544 reproducibility seed 545 546 Examples: 547 548 ```python 549 ``` 550 """ 551 552 removed_regressors = [ 553 "TheilSenRegressor", 554 "ARDRegression", 555 "CCA", 556 "GaussianProcessRegressor", 557 "GradientBoostingRegressor", 558 "HistGradientBoostingRegressor", 559 "IsotonicRegression", 560 "MultiOutputRegressor", 561 "MultiTaskElasticNet", 562 "MultiTaskElasticNetCV", 563 "MultiTaskLasso", 564 "MultiTaskLassoCV", 565 "OrthogonalMatchingPursuit", 566 "OrthogonalMatchingPursuitCV", 567 "PLSCanonical", 568 "PLSRegression", 569 "RadiusNeighborsRegressor", 570 "RegressorChain", 571 "StackingRegressor", 572 "VotingRegressor", 573 ] 574 575 results = [] 576 577 for est in all_estimators(): 578 579 if surrogate_objs is None: 580 581 if issubclass(est[1], RegressorMixin) and ( 582 est[0] not in removed_regressors 583 ): 584 try: 585 if customize == True: 586 surr_obj = ns.CustomClassifier(obj=est[1]()) 587 else: 588 surr_obj = est[1]() 589 res = self.cross_val_optim( 590 X_train=X_train, 591 y_train=y_train, 592 X_test=X_test, 593 y_test=y_test, 594 surrogate_obj=surr_obj, 595 cv=cv, 596 n_jobs=n_jobs, 597 scoring=scoring, 598 n_init=n_init, 599 n_iter=n_iter, 600 abs_tol=abs_tol, 601 verbose=verbose, 602 seed=seed, 603 ) 604 if customize == True: 605 results.append((f"CustomClassifier({est[0]})", res)) 606 else: 607 results.append((est[0], res)) 608 except: 609 pass 610 611 else: 612 613 if ( 614 issubclass(est[1], RegressorMixin) 615 and (est[0] not in removed_regressors) 616 and est[0] in surrogate_objs 617 ): 618 try: 619 if customize == True: 620 surr_obj = ns.CustomClassifier(obj=est[1]()) 621 else: 622 surr_obj = est[1]() 623 res = self.cross_val_optim( 624 X_train=X_train, 625 y_train=y_train, 626 X_test=X_test, 627 y_test=y_test, 628 surrogate_obj=surr_obj, 629 cv=cv, 630 n_jobs=n_jobs, 631 scoring=scoring, 632 n_init=n_init, 633 n_iter=n_iter, 634 abs_tol=abs_tol, 635 verbose=verbose, 636 seed=seed, 637 ) 638 if customize == True: 639 results.append((f"CustomClassifier({est[0]})", res)) 640 else: 641 results.append((est[0], res)) 642 except: 643 pass 644 645 return results 646 647 @property 648 def _estimator_type(self): 649 return "classifier"
Deep Classifier
Parameters:
obj: an object
A base learner, see also https://www.researchgate.net/publication/380701207_Deep_Quasi-Randomized_neural_Networks_for_classification
n_layers: int (default=3)
Number of layers. `n_layers = 1` is a simple `CustomClassifier`
verbose : int, optional (default=0)
Monitor progress when fitting.
All the other parameters are nnetsauce `CustomClassifier`'s
Examples:
import nnetsauce as ns
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegressionCV
data = load_breast_cancer()
X = data.data
y= data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=123)
obj = LogisticRegressionCV()
clf = ns.DeepClassifier(obj)
clf.fit(X_train, y_train)
print(clf.score(clf.predict(X_test), y_test))
144 def fit(self, X, y, **kwargs): 145 """Fit Classification algorithms to X and y. 146 Parameters 147 ---------- 148 X : array-like, 149 Training vectors, where rows is the number of samples 150 and columns is the number of features. 151 y : array-like, 152 Training vectors, where rows is the number of samples 153 and columns is the number of features. 154 **kwargs: dict 155 Additional parameters to be passed to the fit method 156 of the base learner. For example, `sample_weight`. 157 158 Returns 159 ------- 160 A fitted object 161 """ 162 163 self.classes_ = np.unique(y) 164 self.n_classes_ = len( 165 self.classes_ 166 ) # for compatibility with scikit-learn 167 168 if isinstance(X, np.ndarray): 169 X = pd.DataFrame(X) 170 171 # init layer 172 self.stacked_obj = CustomClassifier( 173 obj=self.stacked_obj, 174 n_hidden_features=self.n_hidden_features, 175 activation_name=self.activation_name, 176 a=self.a, 177 nodes_sim=self.nodes_sim, 178 bias=self.bias, 179 dropout=self.dropout, 180 direct_link=self.direct_link, 181 n_clusters=self.n_clusters, 182 cluster_encode=self.cluster_encode, 183 type_clust=self.type_clust, 184 type_scaling=self.type_scaling, 185 col_sample=self.col_sample, 186 row_sample=self.row_sample, 187 cv_calibration=None, 188 calibration_method=None, 189 seed=self.seed, 190 backend=self.backend, 191 ) 192 193 if self.verbose > 0: 194 iterator = tqdm(range(self.n_layers - 1)) 195 else: 196 iterator = range(self.n_layers - 1) 197 198 for _ in iterator: 199 self.stacked_obj = deepcopy( 200 CustomClassifier( 201 obj=self.stacked_obj, 202 n_hidden_features=self.n_hidden_features, 203 activation_name=self.activation_name, 204 a=self.a, 205 nodes_sim=self.nodes_sim, 206 bias=self.bias, 207 dropout=self.dropout, 208 direct_link=self.direct_link, 209 n_clusters=self.n_clusters, 210 cluster_encode=self.cluster_encode, 211 type_clust=self.type_clust, 212 type_scaling=self.type_scaling, 213 col_sample=self.col_sample, 214 row_sample=self.row_sample, 215 cv_calibration=None, 216 calibration_method=None, 217 seed=self.seed, 218 backend=self.backend, 219 ) 220 ) 221 self.stacked_obj.fit(X, y, **kwargs) 222 223 return self
Fit Classification algorithms to X and y.
Parameters
X : array-like,
Training vectors, where rows is the number of samples
and columns is the number of features.
y : array-like,
Training vectors, where rows is the number of samples
and columns is the number of features.
**kwargs: dict
Additional parameters to be passed to the fit method
of the base learner. For example, sample_weight
.
Returns
A fitted object
Predict test data X.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
Predict probabilities for test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
probability estimates for test data: {array-like}
Scoring function for classification.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
scoring: str
scoring method (default is accuracy)
Returns:
score: float
13class DeepRegressor(CustomRegressor, RegressorMixin): 14 """ 15 Deep Regressor 16 17 Parameters: 18 19 obj: an object 20 A base learner, see also https://www.researchgate.net/publication/380701207_Deep_Quasi-Randomized_neural_Networks_for_classification 21 22 verbose : int, optional (default=0) 23 Monitor progress when fitting. 24 25 n_layers: int (default=2) 26 Number of layers. `n_layers = 1` is a simple `CustomRegressor` 27 28 All the other parameters are nnetsauce `CustomRegressor`'s 29 30 Examples: 31 32 ```python 33 import nnetsauce as ns 34 from sklearn.datasets import load_diabetes 35 from sklearn.model_selection import train_test_split 36 from sklearn.linear_model import RidgeCV 37 data = load_diabetes() 38 X = data.data 39 y= data.target 40 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=123) 41 obj = RidgeCV() 42 clf = ns.DeepRegressor(obj) 43 clf.fit(X_train, y_train) 44 print(clf.score(clf.predict(X_test), y_test)) 45 ``` 46 47 """ 48 49 def __init__( 50 self, 51 obj, 52 # Defining depth 53 n_layers=2, 54 verbose=0, 55 # CustomRegressor attributes 56 n_hidden_features=5, 57 activation_name="relu", 58 a=0.01, 59 nodes_sim="sobol", 60 bias=True, 61 dropout=0, 62 direct_link=True, 63 n_clusters=2, 64 cluster_encode=True, 65 type_clust="kmeans", 66 type_scaling=("std", "std", "std"), 67 col_sample=1, 68 row_sample=1, 69 level=None, 70 pi_method="splitconformal", 71 seed=123, 72 backend="cpu", 73 ): 74 super().__init__( 75 obj=obj, 76 n_hidden_features=n_hidden_features, 77 activation_name=activation_name, 78 a=a, 79 nodes_sim=nodes_sim, 80 bias=bias, 81 dropout=dropout, 82 direct_link=direct_link, 83 n_clusters=n_clusters, 84 cluster_encode=cluster_encode, 85 type_clust=type_clust, 86 type_scaling=type_scaling, 87 col_sample=col_sample, 88 row_sample=row_sample, 89 level=level, 90 pi_method=pi_method, 91 seed=seed, 92 backend=backend, 93 ) 94 95 assert n_layers >= 1, "must have n_layers >= 1" 96 97 self.stacked_obj = deepcopy(obj) 98 self.verbose = verbose 99 self.n_layers = n_layers 100 self.level = level 101 self.pi_method = pi_method 102 self.coef_ = None 103 104 def fit(self, X, y, **kwargs): 105 """Fit Regression algorithms to X and y. 106 Parameters 107 ---------- 108 X : array-like, 109 Training vectors, where rows is the number of samples 110 and columns is the number of features. 111 y : array-like, 112 Training vectors, where rows is the number of samples 113 and columns is the number of features. 114 **kwargs: dict 115 Additional parameters to be passed to the fit method 116 of the base learner. For example, `sample_weight`. 117 Returns 118 ------- 119 A fitted object 120 """ 121 122 if isinstance(X, np.ndarray): 123 X = pd.DataFrame(X) 124 125 # init layer 126 self.stacked_obj = CustomRegressor( 127 obj=self.stacked_obj, 128 n_hidden_features=self.n_hidden_features, 129 activation_name=self.activation_name, 130 a=self.a, 131 nodes_sim=self.nodes_sim, 132 bias=self.bias, 133 dropout=self.dropout, 134 direct_link=self.direct_link, 135 n_clusters=self.n_clusters, 136 cluster_encode=self.cluster_encode, 137 type_clust=self.type_clust, 138 type_scaling=self.type_scaling, 139 col_sample=self.col_sample, 140 row_sample=self.row_sample, 141 seed=self.seed, 142 backend=self.backend, 143 ) 144 145 if self.verbose > 0: 146 iterator = tqdm(range(self.n_layers - 1)) 147 else: 148 iterator = range(self.n_layers - 1) 149 150 for _ in iterator: 151 self.stacked_obj = deepcopy( 152 CustomRegressor( 153 obj=self.stacked_obj, 154 n_hidden_features=self.n_hidden_features, 155 activation_name=self.activation_name, 156 a=self.a, 157 nodes_sim=self.nodes_sim, 158 bias=self.bias, 159 dropout=self.dropout, 160 direct_link=self.direct_link, 161 n_clusters=self.n_clusters, 162 cluster_encode=self.cluster_encode, 163 type_clust=self.type_clust, 164 type_scaling=self.type_scaling, 165 col_sample=self.col_sample, 166 row_sample=self.row_sample, 167 seed=self.seed, 168 backend=self.backend, 169 ) 170 ) 171 172 self.stacked_obj.fit(X, y, **kwargs) 173 174 if self.level is not None: 175 self.stacked_obj = PredictionInterval( 176 obj=self.stacked_obj, method=self.pi_method, level=self.level 177 ) 178 179 if hasattr(self.stacked_obj, "clustering_obj_"): 180 self.clustering_obj_ = self.stacked_obj.clustering_obj_ 181 182 if hasattr(self.stacked_obj, "coef_"): 183 self.coef_ = self.stacked_obj.coef_ 184 185 if hasattr(self.stacked_obj, "scaler_"): 186 self.scaler_ = self.stacked_obj.scaler_ 187 188 if hasattr(self.stacked_obj, "nn_scaler_"): 189 self.nn_scaler_ = self.stacked_obj.nn_scaler_ 190 191 if hasattr(self.stacked_obj, "clustering_scaler_"): 192 self.clustering_scaler_ = self.stacked_obj.clustering_scaler_ 193 194 return self 195 196 def partial_fit(self, X, y, **kwargs): 197 """Fit Regression algorithms to X and y. 198 Parameters 199 ---------- 200 X : array-like, 201 Training vectors, where rows is the number of samples 202 and columns is the number of features. 203 y : array-like, 204 Training vectors, where rows is the number of samples 205 and columns is the number of features. 206 **kwargs: dict 207 Additional parameters to be passed to the fit method 208 of the base learner. For example, `sample_weight`. 209 Returns 210 ------- 211 A fitted object 212 """ 213 assert hasattr(self, "stacked_obj"), "model must be fitted first" 214 current_obj = self.stacked_obj 215 for _ in range(self.n_layers): 216 try: 217 input_X = current_obj.obj.cook_test_set(X) 218 current_obj.obj.partial_fit(input_X, y, **kwargs) 219 try: 220 current_obj = current_obj.obj 221 except AttributeError: 222 pass 223 except ValueError as e: 224 print(e) 225 pass 226 return self 227 228 def predict(self, X, **kwargs): 229 if self.level is not None: 230 return self.stacked_obj.predict(X, return_pi=True) 231 return self.stacked_obj.predict(X, **kwargs) 232 233 def score(self, X, y, scoring=None): 234 return self.stacked_obj.score(X, y, scoring)
Deep Regressor
Parameters:
obj: an object
A base learner, see also https://www.researchgate.net/publication/380701207_Deep_Quasi-Randomized_neural_Networks_for_classification
verbose : int, optional (default=0)
Monitor progress when fitting.
n_layers: int (default=2)
Number of layers. `n_layers = 1` is a simple `CustomRegressor`
All the other parameters are nnetsauce `CustomRegressor`'s
Examples:
import nnetsauce as ns
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.linear_model import RidgeCV
data = load_diabetes()
X = data.data
y= data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=123)
obj = RidgeCV()
clf = ns.DeepRegressor(obj)
clf.fit(X_train, y_train)
print(clf.score(clf.predict(X_test), y_test))
104 def fit(self, X, y, **kwargs): 105 """Fit Regression algorithms to X and y. 106 Parameters 107 ---------- 108 X : array-like, 109 Training vectors, where rows is the number of samples 110 and columns is the number of features. 111 y : array-like, 112 Training vectors, where rows is the number of samples 113 and columns is the number of features. 114 **kwargs: dict 115 Additional parameters to be passed to the fit method 116 of the base learner. For example, `sample_weight`. 117 Returns 118 ------- 119 A fitted object 120 """ 121 122 if isinstance(X, np.ndarray): 123 X = pd.DataFrame(X) 124 125 # init layer 126 self.stacked_obj = CustomRegressor( 127 obj=self.stacked_obj, 128 n_hidden_features=self.n_hidden_features, 129 activation_name=self.activation_name, 130 a=self.a, 131 nodes_sim=self.nodes_sim, 132 bias=self.bias, 133 dropout=self.dropout, 134 direct_link=self.direct_link, 135 n_clusters=self.n_clusters, 136 cluster_encode=self.cluster_encode, 137 type_clust=self.type_clust, 138 type_scaling=self.type_scaling, 139 col_sample=self.col_sample, 140 row_sample=self.row_sample, 141 seed=self.seed, 142 backend=self.backend, 143 ) 144 145 if self.verbose > 0: 146 iterator = tqdm(range(self.n_layers - 1)) 147 else: 148 iterator = range(self.n_layers - 1) 149 150 for _ in iterator: 151 self.stacked_obj = deepcopy( 152 CustomRegressor( 153 obj=self.stacked_obj, 154 n_hidden_features=self.n_hidden_features, 155 activation_name=self.activation_name, 156 a=self.a, 157 nodes_sim=self.nodes_sim, 158 bias=self.bias, 159 dropout=self.dropout, 160 direct_link=self.direct_link, 161 n_clusters=self.n_clusters, 162 cluster_encode=self.cluster_encode, 163 type_clust=self.type_clust, 164 type_scaling=self.type_scaling, 165 col_sample=self.col_sample, 166 row_sample=self.row_sample, 167 seed=self.seed, 168 backend=self.backend, 169 ) 170 ) 171 172 self.stacked_obj.fit(X, y, **kwargs) 173 174 if self.level is not None: 175 self.stacked_obj = PredictionInterval( 176 obj=self.stacked_obj, method=self.pi_method, level=self.level 177 ) 178 179 if hasattr(self.stacked_obj, "clustering_obj_"): 180 self.clustering_obj_ = self.stacked_obj.clustering_obj_ 181 182 if hasattr(self.stacked_obj, "coef_"): 183 self.coef_ = self.stacked_obj.coef_ 184 185 if hasattr(self.stacked_obj, "scaler_"): 186 self.scaler_ = self.stacked_obj.scaler_ 187 188 if hasattr(self.stacked_obj, "nn_scaler_"): 189 self.nn_scaler_ = self.stacked_obj.nn_scaler_ 190 191 if hasattr(self.stacked_obj, "clustering_scaler_"): 192 self.clustering_scaler_ = self.stacked_obj.clustering_scaler_ 193 194 return self
Fit Regression algorithms to X and y.
Parameters
X : array-like,
Training vectors, where rows is the number of samples
and columns is the number of features.
y : array-like,
Training vectors, where rows is the number of samples
and columns is the number of features.
**kwargs: dict
Additional parameters to be passed to the fit method
of the base learner. For example, sample_weight
.
Returns
A fitted object
228 def predict(self, X, **kwargs): 229 if self.level is not None: 230 return self.stacked_obj.predict(X, return_pi=True) 231 return self.stacked_obj.predict(X, **kwargs)
Predict test data X.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
level: int
Level of confidence (default = 95)
method: str
'splitconformal', 'localconformal'
prediction (if you specify `return_pi = True`)
**kwargs: additional parameters
`return_pi = True` for conformal prediction,
with `method` in ('splitconformal', 'localconformal')
or `return_std = True` for `self.obj` in
(`sklearn.linear_model.BayesianRidge`,
`sklearn.linear_model.ARDRegressor`,
`sklearn.gaussian_process.GaussianProcessRegressor`)`
Returns:
model predictions:
an array if uncertainty quantification is not requested,
or a tuple if with prediction intervals and simulations
if `return_std = True` (mean, standard deviation,
lower and upper prediction interval) or `return_pi = True`
()
Compute the score of the model.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
scoring: str
scoring method
Returns:
score: float
11class DeepMTS(MTS): 12 """Univariate and multivariate time series (DeepMTS) forecasting with Quasi-Randomized networks (Work in progress) 13 14 Parameters: 15 16 obj: object. 17 any object containing a method fit (obj.fit()) and a method predict 18 (obj.predict()). 19 20 n_layers: int. 21 number of layers in the neural network. 22 23 n_hidden_features: int. 24 number of nodes in the hidden layer. 25 26 activation_name: str. 27 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'. 28 29 a: float. 30 hyperparameter for 'prelu' or 'elu' activation function. 31 32 nodes_sim: str. 33 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 34 'uniform'. 35 36 bias: boolean. 37 indicates if the hidden layer contains a bias term (True) or not 38 (False). 39 40 dropout: float. 41 regularization parameter; (random) percentage of nodes dropped out 42 of the training. 43 44 direct_link: boolean. 45 indicates if the original predictors are included (True) in model's fitting or not (False). 46 47 n_clusters: int. 48 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering). 49 50 cluster_encode: bool. 51 defines how the variable containing clusters is treated (default is one-hot) 52 if `False`, then labels are used, without one-hot encoding. 53 54 type_clust: str. 55 type of clustering method: currently k-means ('kmeans') or Gaussian 56 Mixture Model ('gmm'). 57 58 type_scaling: a tuple of 3 strings. 59 scaling methods for inputs, hidden layer, and clustering respectively 60 (and when relevant). 61 Currently available: standardization ('std') or MinMax scaling ('minmax'). 62 63 lags: int. 64 number of lags used for each time series. 65 66 type_pi: str. 67 type of prediction interval; currently: 68 - "gaussian": simple, fast, but: assumes stationarity of Gaussian in-sample residuals and independence in the multivariate case 69 - "kde": based on Kernel Density Estimation of in-sample residuals 70 - "bootstrap": based on independent bootstrap of in-sample residuals 71 - "block-bootstrap": based on basic block bootstrap of in-sample residuals 72 - "scp-kde": Sequential split conformal prediction with Kernel Density Estimation of calibrated residuals 73 - "scp-bootstrap": Sequential split conformal prediction with independent bootstrap of calibrated residuals 74 - "scp-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of calibrated residuals 75 - "scp2-kde": Sequential split conformal prediction with Kernel Density Estimation of standardized calibrated residuals 76 - "scp2-bootstrap": Sequential split conformal prediction with independent bootstrap of standardized calibrated residuals 77 - "scp2-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of standardized calibrated residuals 78 79 block_size: int. 80 size of block for 'type_pi' in ("block-bootstrap", "scp-block-bootstrap", "scp2-block-bootstrap"). 81 Default is round(3.15*(n_residuals^1/3)) 82 83 replications: int. 84 number of replications (if needed, for predictive simulation). Default is 'None'. 85 86 kernel: str. 87 the kernel to use for residuals density estimation (used for predictive simulation). Currently, either 'gaussian' or 'tophat'. 88 89 agg: str. 90 either "mean" or "median" for simulation of bootstrap aggregating 91 92 seed: int. 93 reproducibility seed for nodes_sim=='uniform' or predictive simulation. 94 95 backend: str. 96 "cpu" or "gpu" or "tpu". 97 98 verbose: int. 99 0: not printing; 1: printing 100 101 show_progress: bool. 102 True: progress bar when fitting each series; False: no progress bar when fitting each series 103 104 Attributes: 105 106 fit_objs_: dict 107 objects adjusted to each individual time series 108 109 y_: {array-like} 110 DeepMTS responses (most recent observations first) 111 112 X_: {array-like} 113 DeepMTS lags 114 115 xreg_: {array-like} 116 external regressors 117 118 y_means_: dict 119 a dictionary of each series mean values 120 121 preds_: {array-like} 122 successive model predictions 123 124 preds_std_: {array-like} 125 standard deviation around the predictions 126 127 return_std_: boolean 128 return uncertainty or not (set in predict) 129 130 df_: data frame 131 the input data frame, in case a data.frame is provided to `fit` 132 133 Examples: 134 135 Example 1: 136 137 ```python 138 import nnetsauce as ns 139 import numpy as np 140 from sklearn import linear_model 141 np.random.seed(123) 142 143 M = np.random.rand(10, 3) 144 M[:,0] = 10*M[:,0] 145 M[:,2] = 25*M[:,2] 146 print(M) 147 148 # Adjust Bayesian Ridge 149 regr4 = linear_model.BayesianRidge() 150 obj_DeepMTS = ns.DeepMTS(regr4, lags = 1, n_hidden_features=5) 151 obj_DeepMTS.fit(M) 152 print(obj_DeepMTS.predict()) 153 154 # with credible intervals 155 print(obj_DeepMTS.predict(return_std=True, level=80)) 156 157 print(obj_DeepMTS.predict(return_std=True, level=95)) 158 ``` 159 160 Example 2: 161 162 ```python 163 import nnetsauce as ns 164 import numpy as np 165 from sklearn import linear_model 166 167 dataset = { 168 'date' : ['2001-01-01', '2002-01-01', '2003-01-01', '2004-01-01', '2005-01-01'], 169 'series1' : [34, 30, 35.6, 33.3, 38.1], 170 'series2' : [4, 5.5, 5.6, 6.3, 5.1], 171 'series3' : [100, 100.5, 100.6, 100.2, 100.1]} 172 df = pd.DataFrame(dataset).set_index('date') 173 print(df) 174 175 # Adjust Bayesian Ridge 176 regr5 = linear_model.BayesianRidge() 177 obj_DeepMTS = ns.DeepMTS(regr5, lags = 1, n_hidden_features=5) 178 obj_DeepMTS.fit(df) 179 print(obj_DeepMTS.predict()) 180 181 # with credible intervals 182 print(obj_DeepMTS.predict(return_std=True, level=80)) 183 184 print(obj_DeepMTS.predict(return_std=True, level=95)) 185 ``` 186 187 """ 188 189 # construct the object ----- 190 191 def __init__( 192 self, 193 obj, 194 n_layers=3, 195 n_hidden_features=5, 196 activation_name="relu", 197 a=0.01, 198 nodes_sim="sobol", 199 bias=True, 200 dropout=0, 201 direct_link=True, 202 n_clusters=2, 203 cluster_encode=True, 204 type_clust="kmeans", 205 type_scaling=("std", "std", "std"), 206 lags=1, 207 type_pi="kde", 208 block_size=None, 209 replications=None, 210 kernel=None, 211 agg="mean", 212 seed=123, 213 backend="cpu", 214 verbose=0, 215 show_progress=True, 216 ): 217 assert int(lags) == lags, "parameter 'lags' should be an integer" 218 assert n_layers >= 1, "must have n_layers >= 1" 219 self.n_layers = int(n_layers) 220 221 if self.n_layers > 1: 222 223 for _ in range(self.n_layers - 1): 224 obj = CustomRegressor( 225 obj=deepcopy(obj), 226 n_hidden_features=n_hidden_features, 227 activation_name=activation_name, 228 a=a, 229 nodes_sim=nodes_sim, 230 bias=bias, 231 dropout=dropout, 232 direct_link=direct_link, 233 n_clusters=n_clusters, 234 cluster_encode=cluster_encode, 235 type_clust=type_clust, 236 type_scaling=type_scaling, 237 seed=seed, 238 backend=backend, 239 ) 240 241 self.obj = deepcopy(obj) 242 super().__init__( 243 obj=self.obj, 244 n_hidden_features=n_hidden_features, 245 activation_name=activation_name, 246 a=a, 247 nodes_sim=nodes_sim, 248 bias=bias, 249 dropout=dropout, 250 direct_link=direct_link, 251 n_clusters=n_clusters, 252 cluster_encode=cluster_encode, 253 type_clust=type_clust, 254 type_scaling=type_scaling, 255 lags=lags, 256 type_pi=type_pi, 257 block_size=block_size, 258 replications=replications, 259 kernel=kernel, 260 agg=agg, 261 seed=seed, 262 backend=backend, 263 verbose=verbose, 264 show_progress=show_progress, 265 )
Univariate and multivariate time series (DeepMTS) forecasting with Quasi-Randomized networks (Work in progress)
Parameters:
obj: object.
any object containing a method fit (obj.fit()) and a method predict
(obj.predict()).
n_layers: int.
number of layers in the neural network.
n_hidden_features: int.
number of nodes in the hidden layer.
activation_name: str.
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'.
a: float.
hyperparameter for 'prelu' or 'elu' activation function.
nodes_sim: str.
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'.
bias: boolean.
indicates if the hidden layer contains a bias term (True) or not
(False).
dropout: float.
regularization parameter; (random) percentage of nodes dropped out
of the training.
direct_link: boolean.
indicates if the original predictors are included (True) in model's fitting or not (False).
n_clusters: int.
number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering).
cluster_encode: bool.
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding.
type_clust: str.
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm').
type_scaling: a tuple of 3 strings.
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax').
lags: int.
number of lags used for each time series.
type_pi: str.
type of prediction interval; currently:
- "gaussian": simple, fast, but: assumes stationarity of Gaussian in-sample residuals and independence in the multivariate case
- "kde": based on Kernel Density Estimation of in-sample residuals
- "bootstrap": based on independent bootstrap of in-sample residuals
- "block-bootstrap": based on basic block bootstrap of in-sample residuals
- "scp-kde": Sequential split conformal prediction with Kernel Density Estimation of calibrated residuals
- "scp-bootstrap": Sequential split conformal prediction with independent bootstrap of calibrated residuals
- "scp-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of calibrated residuals
- "scp2-kde": Sequential split conformal prediction with Kernel Density Estimation of standardized calibrated residuals
- "scp2-bootstrap": Sequential split conformal prediction with independent bootstrap of standardized calibrated residuals
- "scp2-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of standardized calibrated residuals
block_size: int.
size of block for 'type_pi' in ("block-bootstrap", "scp-block-bootstrap", "scp2-block-bootstrap").
Default is round(3.15*(n_residuals^1/3))
replications: int.
number of replications (if needed, for predictive simulation). Default is 'None'.
kernel: str.
the kernel to use for residuals density estimation (used for predictive simulation). Currently, either 'gaussian' or 'tophat'.
agg: str.
either "mean" or "median" for simulation of bootstrap aggregating
seed: int.
reproducibility seed for nodes_sim=='uniform' or predictive simulation.
backend: str.
"cpu" or "gpu" or "tpu".
verbose: int.
0: not printing; 1: printing
show_progress: bool.
True: progress bar when fitting each series; False: no progress bar when fitting each series
Attributes:
fit_objs_: dict
objects adjusted to each individual time series
y_: {array-like}
DeepMTS responses (most recent observations first)
X_: {array-like}
DeepMTS lags
xreg_: {array-like}
external regressors
y_means_: dict
a dictionary of each series mean values
preds_: {array-like}
successive model predictions
preds_std_: {array-like}
standard deviation around the predictions
return_std_: boolean
return uncertainty or not (set in predict)
df_: data frame
the input data frame, in case a data.frame is provided to `fit`
Examples:
Example 1:
import nnetsauce as ns
import numpy as np
from sklearn import linear_model
np.random.seed(123)
M = np.random.rand(10, 3)
M[:,0] = 10M[:,0]
M[:,2] = 25M[:,2]
print(M)
# Adjust Bayesian Ridge
regr4 = linear_model.BayesianRidge()
obj_DeepMTS = ns.DeepMTS(regr4, lags = 1, n_hidden_features=5)
obj_DeepMTS.fit(M)
print(obj_DeepMTS.predict())
# with credible intervals
print(obj_DeepMTS.predict(return_std=True, level=80))
print(obj_DeepMTS.predict(return_std=True, level=95))
Example 2:
import nnetsauce as ns
import numpy as np
from sklearn import linear_model
dataset = {
'date' : ['2001-01-01', '2002-01-01', '2003-01-01', '2004-01-01', '2005-01-01'],
'series1' : [34, 30, 35.6, 33.3, 38.1],
'series2' : [4, 5.5, 5.6, 6.3, 5.1],
'series3' : [100, 100.5, 100.6, 100.2, 100.1]}
df = pd.DataFrame(dataset).set_index('date')
print(df)
# Adjust Bayesian Ridge
regr5 = linear_model.BayesianRidge()
obj_DeepMTS = ns.DeepMTS(regr5, lags = 1, n_hidden_features=5)
obj_DeepMTS.fit(df)
print(obj_DeepMTS.predict())
# with credible intervals
print(obj_DeepMTS.predict(return_std=True, level=80))
print(obj_DeepMTS.predict(return_std=True, level=95))
6class Downloader: 7 """Download datasets from data sources (R-universe for now)""" 8 9 def __init__(self): 10 self.pkgname = None 11 self.dataset = None 12 self.source = None 13 self.url = None 14 self.request = None 15 16 def download( 17 self, 18 pkgname="MASS", 19 dataset="Boston", 20 source="https://cran.r-universe.dev/", 21 **kwargs 22 ): 23 """Download datasets from data sources (R-universe for now) 24 25 Examples: 26 27 ```python 28 import nnetsauce as ns 29 30 downloader = ns.Downloader() 31 df = downloader.download(pkgname="MASS", dataset="Boston") 32 ``` 33 34 """ 35 self.pkgname = pkgname 36 self.dataset = dataset 37 self.source = source 38 self.url = source + pkgname + "/data/" + dataset + "/json" 39 self.request = requests.get(self.url) 40 return pd.DataFrame(self.request.json(), **kwargs)
Download datasets from data sources (R-universe for now)
16 def download( 17 self, 18 pkgname="MASS", 19 dataset="Boston", 20 source="https://cran.r-universe.dev/", 21 **kwargs 22 ): 23 """Download datasets from data sources (R-universe for now) 24 25 Examples: 26 27 ```python 28 import nnetsauce as ns 29 30 downloader = ns.Downloader() 31 df = downloader.download(pkgname="MASS", dataset="Boston") 32 ``` 33 34 """ 35 self.pkgname = pkgname 36 self.dataset = dataset 37 self.source = source 38 self.url = source + pkgname + "/data/" + dataset + "/json" 39 self.request = requests.get(self.url) 40 return pd.DataFrame(self.request.json(), **kwargs)
Download datasets from data sources (R-universe for now)
Examples:
import nnetsauce as ns
downloader = ns.Downloader()
df = downloader.download(pkgname="MASS", dataset="Boston")
21class GLMClassifier(GLM, ClassifierMixin): 22 """Generalized 'linear' models using quasi-randomized networks (classification) 23 24 Parameters: 25 26 n_hidden_features: int 27 number of nodes in the hidden layer 28 29 lambda1: float 30 regularization parameter for GLM coefficients on original features 31 32 alpha1: float 33 controls compromize between l1 and l2 norm of GLM coefficients on original features 34 35 lambda2: float 36 regularization parameter for GLM coefficients on nonlinear features 37 38 alpha2: float 39 controls compromize between l1 and l2 norm of GLM coefficients on nonlinear features 40 41 activation_name: str 42 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 43 44 a: float 45 hyperparameter for 'prelu' or 'elu' activation function 46 47 nodes_sim: str 48 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 49 'uniform' 50 51 bias: boolean 52 indicates if the hidden layer contains a bias term (True) or not 53 (False) 54 55 dropout: float 56 regularization parameter; (random) percentage of nodes dropped out 57 of the training 58 59 direct_link: boolean 60 indicates if the original predictors are included (True) in model's 61 fitting or not (False) 62 63 n_clusters: int 64 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: 65 no clustering) 66 67 cluster_encode: bool 68 defines how the variable containing clusters is treated (default is one-hot) 69 if `False`, then labels are used, without one-hot encoding 70 71 type_clust: str 72 type of clustering method: currently k-means ('kmeans') or Gaussian 73 Mixture Model ('gmm') 74 75 type_scaling: a tuple of 3 strings 76 scaling methods for inputs, hidden layer, and clustering respectively 77 (and when relevant). 78 Currently available: standardization ('std') or MinMax scaling ('minmax') 79 80 optimizer: object 81 optimizer, from class nnetsauce.Optimizer 82 83 backend: str. 84 "cpu" or "gpu" or "tpu". 85 86 seed: int 87 reproducibility seed for nodes_sim=='uniform' 88 89 Attributes: 90 91 beta_: vector 92 regression coefficients 93 94 Examples: 95 96 See [https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_classification.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_classification.py) 97 98 """ 99 100 # construct the object ----- 101 _estimator_type = "classifier" 102 103 def __init__( 104 self, 105 n_hidden_features=5, 106 lambda1=0.01, 107 alpha1=0.5, 108 lambda2=0.01, 109 alpha2=0.5, 110 family="expit", 111 activation_name="relu", 112 a=0.01, 113 nodes_sim="sobol", 114 bias=True, 115 dropout=0, 116 direct_link=True, 117 n_clusters=2, 118 cluster_encode=True, 119 type_clust="kmeans", 120 type_scaling=("std", "std", "std"), 121 optimizer=Optimizer(), 122 backend="cpu", 123 seed=123, 124 ): 125 super().__init__( 126 n_hidden_features=n_hidden_features, 127 lambda1=lambda1, 128 alpha1=alpha1, 129 lambda2=lambda2, 130 alpha2=alpha2, 131 activation_name=activation_name, 132 a=a, 133 nodes_sim=nodes_sim, 134 bias=bias, 135 dropout=dropout, 136 direct_link=direct_link, 137 n_clusters=n_clusters, 138 cluster_encode=cluster_encode, 139 type_clust=type_clust, 140 type_scaling=type_scaling, 141 optimizer=optimizer, 142 backend=backend, 143 seed=seed, 144 ) 145 146 self.family = family 147 148 def logit_loss(self, Y, row_index, XB): 149 self.n_classes = Y.shape[1] # len(np.unique(y)) 150 # Y = mo.one_hot_encode2(y, self.n_classes) 151 # Y = self.optimizer.one_hot_encode(y, self.n_classes) 152 153 # max_double = 709.0 # only if softmax 154 # XB[XB > max_double] = max_double 155 XB[XB > 709.0] = 709.0 156 157 if row_index is None: 158 return -np.mean(np.sum(Y * XB, axis=1) - logsumexp(XB)) 159 160 return -np.mean(np.sum(Y[row_index, :] * XB, axis=1) - logsumexp(XB)) 161 162 def expit_erf_loss(self, Y, row_index, XB): 163 # self.n_classes = len(np.unique(y)) 164 # Y = mo.one_hot_encode2(y, self.n_classes) 165 # Y = self.optimizer.one_hot_encode(y, self.n_classes) 166 self.n_classes = Y.shape[1] 167 168 if row_index is None: 169 return -np.mean(np.sum(Y * XB, axis=1) - logsumexp(XB)) 170 171 return -np.mean(np.sum(Y[row_index, :] * XB, axis=1) - logsumexp(XB)) 172 173 def loss_func( 174 self, 175 beta, 176 group_index, 177 X, 178 Y, 179 y, 180 row_index=None, 181 type_loss="logit", 182 **kwargs 183 ): 184 res = { 185 "logit": self.logit_loss, 186 "expit": self.expit_erf_loss, 187 "erf": self.expit_erf_loss, 188 } 189 190 if row_index is None: 191 row_index = range(len(y)) 192 XB = self.compute_XB( 193 X, 194 beta=np.reshape(beta, (X.shape[1], self.n_classes), order="F"), 195 ) 196 197 return res[type_loss](Y, row_index, XB) + self.compute_penalty( 198 group_index=group_index, beta=beta 199 ) 200 201 XB = self.compute_XB( 202 X, 203 beta=np.reshape(beta, (X.shape[1], self.n_classes), order="F"), 204 row_index=row_index, 205 ) 206 207 return res[type_loss](Y, row_index, XB) + self.compute_penalty( 208 group_index=group_index, beta=beta 209 ) 210 211 def fit(self, X, y, **kwargs): 212 """Fit GLM model to training data (X, y). 213 214 Args: 215 216 X: {array-like}, shape = [n_samples, n_features] 217 Training vectors, where n_samples is the number 218 of samples and n_features is the number of features. 219 220 y: array-like, shape = [n_samples] 221 Target values. 222 223 **kwargs: additional parameters to be passed to 224 self.cook_training_set or self.obj.fit 225 226 Returns: 227 228 self: object 229 230 """ 231 232 assert mx.is_factor( 233 y 234 ), "y must contain only integers" # change is_factor and subsampling everywhere 235 236 self.classes_ = np.unique(y) # for compatibility with sklearn 237 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 238 239 self.beta_ = None 240 241 n, p = X.shape 242 243 self.group_index = n * X.shape[1] 244 245 self.n_classes = len(np.unique(y)) 246 247 output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 248 249 # Y = mo.one_hot_encode2(output_y, self.n_classes) 250 Y = self.optimizer.one_hot_encode(output_y, self.n_classes) 251 252 # initialization 253 if self.backend == "cpu": 254 beta_ = np.linalg.lstsq(scaled_Z, Y, rcond=None)[0] 255 else: 256 beta_ = jnp.linalg.lstsq(scaled_Z, Y, rcond=None)[0] 257 258 # optimization 259 # fit(self, loss_func, response, x0, **kwargs): 260 # loss_func(self, beta, group_index, X, y, 261 # row_index=None, type_loss="gaussian", 262 # **kwargs) 263 self.optimizer.fit( 264 self.loss_func, 265 response=y, 266 x0=beta_.flatten(order="F"), 267 group_index=self.group_index, 268 X=scaled_Z, 269 Y=Y, 270 y=y, 271 type_loss=self.family, 272 ) 273 274 self.beta_ = self.optimizer.results[0] 275 self.classes_ = np.unique(y) 276 277 return self 278 279 def predict(self, X, **kwargs): 280 """Predict test data X. 281 282 Args: 283 284 X: {array-like}, shape = [n_samples, n_features] 285 Training vectors, where n_samples is the number 286 of samples and n_features is the number of features. 287 288 **kwargs: additional parameters to be passed to 289 self.cook_test_set 290 291 Returns: 292 293 model predictions: {array-like} 294 295 """ 296 297 return np.argmax(self.predict_proba(X, **kwargs), axis=1) 298 299 def predict_proba(self, X, **kwargs): 300 """Predict probabilities for test data X. 301 302 Args: 303 304 X: {array-like}, shape = [n_samples, n_features] 305 Training vectors, where n_samples is the number 306 of samples and n_features is the number of features. 307 308 **kwargs: additional parameters to be passed to 309 self.cook_test_set 310 311 Returns: 312 313 probability estimates for test data: {array-like} 314 315 """ 316 if len(X.shape) == 1: 317 n_features = X.shape[0] 318 new_X = mo.rbind( 319 X.reshape(1, n_features), 320 np.ones(n_features).reshape(1, n_features), 321 ) 322 323 Z = self.cook_test_set(new_X, **kwargs) 324 325 else: 326 Z = self.cook_test_set(X, **kwargs) 327 328 ZB = mo.safe_sparse_dot( 329 Z, 330 self.beta_.reshape( 331 self.n_classes, 332 X.shape[1] + self.n_hidden_features + self.n_clusters, 333 ).T, 334 ) 335 336 if self.family == "logit": 337 exp_ZB = np.exp(ZB) 338 339 return exp_ZB / exp_ZB.sum(axis=1)[:, None] 340 341 if self.family == "expit": 342 exp_ZB = expit(ZB) 343 344 return exp_ZB / exp_ZB.sum(axis=1)[:, None] 345 346 if self.family == "erf": 347 exp_ZB = 0.5 * (1 + erf(ZB)) 348 349 return exp_ZB / exp_ZB.sum(axis=1)[:, None] 350 351 def score(self, X, y, scoring=None): 352 """Scoring function for classification. 353 354 Args: 355 356 X: {array-like}, shape = [n_samples, n_features] 357 Training vectors, where n_samples is the number 358 of samples and n_features is the number of features. 359 360 y: array-like, shape = [n_samples] 361 Target values. 362 363 scoring: str 364 scoring method (default is accuracy) 365 366 Returns: 367 368 score: float 369 """ 370 371 if scoring is None: 372 scoring = "accuracy" 373 374 if scoring == "accuracy": 375 return skm2.accuracy_score(y, self.predict(X)) 376 377 if scoring == "f1": 378 return skm2.f1_score(y, self.predict(X)) 379 380 if scoring == "precision": 381 return skm2.precision_score(y, self.predict(X)) 382 383 if scoring == "recall": 384 return skm2.recall_score(y, self.predict(X)) 385 386 if scoring == "roc_auc": 387 return skm2.roc_auc_score(y, self.predict(X)) 388 389 if scoring == "log_loss": 390 return skm2.log_loss(y, self.predict_proba(X)) 391 392 if scoring == "balanced_accuracy": 393 return skm2.balanced_accuracy_score(y, self.predict(X)) 394 395 if scoring == "average_precision": 396 return skm2.average_precision_score(y, self.predict(X)) 397 398 if scoring == "neg_brier_score": 399 return -skm2.brier_score_loss(y, self.predict_proba(X)) 400 401 if scoring == "neg_log_loss": 402 return -skm2.log_loss(y, self.predict_proba(X)) 403 404 @property 405 def _estimator_type(self): 406 return "classifier"
Generalized 'linear' models using quasi-randomized networks (classification)
Parameters:
n_hidden_features: int
number of nodes in the hidden layer
lambda1: float
regularization parameter for GLM coefficients on original features
alpha1: float
controls compromize between l1 and l2 norm of GLM coefficients on original features
lambda2: float
regularization parameter for GLM coefficients on nonlinear features
alpha2: float
controls compromize between l1 and l2 norm of GLM coefficients on nonlinear features
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not
(False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original predictors are included (True) in model's
fitting or not (False)
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
optimizer: object
optimizer, from class nnetsauce.Optimizer
backend: str.
"cpu" or "gpu" or "tpu".
seed: int
reproducibility seed for nodes_sim=='uniform'
Attributes:
beta_: vector
regression coefficients
Examples:
See https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_classification.py
211 def fit(self, X, y, **kwargs): 212 """Fit GLM model to training data (X, y). 213 214 Args: 215 216 X: {array-like}, shape = [n_samples, n_features] 217 Training vectors, where n_samples is the number 218 of samples and n_features is the number of features. 219 220 y: array-like, shape = [n_samples] 221 Target values. 222 223 **kwargs: additional parameters to be passed to 224 self.cook_training_set or self.obj.fit 225 226 Returns: 227 228 self: object 229 230 """ 231 232 assert mx.is_factor( 233 y 234 ), "y must contain only integers" # change is_factor and subsampling everywhere 235 236 self.classes_ = np.unique(y) # for compatibility with sklearn 237 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 238 239 self.beta_ = None 240 241 n, p = X.shape 242 243 self.group_index = n * X.shape[1] 244 245 self.n_classes = len(np.unique(y)) 246 247 output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 248 249 # Y = mo.one_hot_encode2(output_y, self.n_classes) 250 Y = self.optimizer.one_hot_encode(output_y, self.n_classes) 251 252 # initialization 253 if self.backend == "cpu": 254 beta_ = np.linalg.lstsq(scaled_Z, Y, rcond=None)[0] 255 else: 256 beta_ = jnp.linalg.lstsq(scaled_Z, Y, rcond=None)[0] 257 258 # optimization 259 # fit(self, loss_func, response, x0, **kwargs): 260 # loss_func(self, beta, group_index, X, y, 261 # row_index=None, type_loss="gaussian", 262 # **kwargs) 263 self.optimizer.fit( 264 self.loss_func, 265 response=y, 266 x0=beta_.flatten(order="F"), 267 group_index=self.group_index, 268 X=scaled_Z, 269 Y=Y, 270 y=y, 271 type_loss=self.family, 272 ) 273 274 self.beta_ = self.optimizer.results[0] 275 self.classes_ = np.unique(y) 276 277 return self
Fit GLM model to training data (X, y).
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
279 def predict(self, X, **kwargs): 280 """Predict test data X. 281 282 Args: 283 284 X: {array-like}, shape = [n_samples, n_features] 285 Training vectors, where n_samples is the number 286 of samples and n_features is the number of features. 287 288 **kwargs: additional parameters to be passed to 289 self.cook_test_set 290 291 Returns: 292 293 model predictions: {array-like} 294 295 """ 296 297 return np.argmax(self.predict_proba(X, **kwargs), axis=1)
Predict test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
299 def predict_proba(self, X, **kwargs): 300 """Predict probabilities for test data X. 301 302 Args: 303 304 X: {array-like}, shape = [n_samples, n_features] 305 Training vectors, where n_samples is the number 306 of samples and n_features is the number of features. 307 308 **kwargs: additional parameters to be passed to 309 self.cook_test_set 310 311 Returns: 312 313 probability estimates for test data: {array-like} 314 315 """ 316 if len(X.shape) == 1: 317 n_features = X.shape[0] 318 new_X = mo.rbind( 319 X.reshape(1, n_features), 320 np.ones(n_features).reshape(1, n_features), 321 ) 322 323 Z = self.cook_test_set(new_X, **kwargs) 324 325 else: 326 Z = self.cook_test_set(X, **kwargs) 327 328 ZB = mo.safe_sparse_dot( 329 Z, 330 self.beta_.reshape( 331 self.n_classes, 332 X.shape[1] + self.n_hidden_features + self.n_clusters, 333 ).T, 334 ) 335 336 if self.family == "logit": 337 exp_ZB = np.exp(ZB) 338 339 return exp_ZB / exp_ZB.sum(axis=1)[:, None] 340 341 if self.family == "expit": 342 exp_ZB = expit(ZB) 343 344 return exp_ZB / exp_ZB.sum(axis=1)[:, None] 345 346 if self.family == "erf": 347 exp_ZB = 0.5 * (1 + erf(ZB)) 348 349 return exp_ZB / exp_ZB.sum(axis=1)[:, None]
Predict probabilities for test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
probability estimates for test data: {array-like}
351 def score(self, X, y, scoring=None): 352 """Scoring function for classification. 353 354 Args: 355 356 X: {array-like}, shape = [n_samples, n_features] 357 Training vectors, where n_samples is the number 358 of samples and n_features is the number of features. 359 360 y: array-like, shape = [n_samples] 361 Target values. 362 363 scoring: str 364 scoring method (default is accuracy) 365 366 Returns: 367 368 score: float 369 """ 370 371 if scoring is None: 372 scoring = "accuracy" 373 374 if scoring == "accuracy": 375 return skm2.accuracy_score(y, self.predict(X)) 376 377 if scoring == "f1": 378 return skm2.f1_score(y, self.predict(X)) 379 380 if scoring == "precision": 381 return skm2.precision_score(y, self.predict(X)) 382 383 if scoring == "recall": 384 return skm2.recall_score(y, self.predict(X)) 385 386 if scoring == "roc_auc": 387 return skm2.roc_auc_score(y, self.predict(X)) 388 389 if scoring == "log_loss": 390 return skm2.log_loss(y, self.predict_proba(X)) 391 392 if scoring == "balanced_accuracy": 393 return skm2.balanced_accuracy_score(y, self.predict(X)) 394 395 if scoring == "average_precision": 396 return skm2.average_precision_score(y, self.predict(X)) 397 398 if scoring == "neg_brier_score": 399 return -skm2.brier_score_loss(y, self.predict_proba(X)) 400 401 if scoring == "neg_log_loss": 402 return -skm2.log_loss(y, self.predict_proba(X))
Scoring function for classification.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
scoring: str
scoring method (default is accuracy)
Returns:
score: float
20class GLMRegressor(GLM, RegressorMixin): 21 """Generalized 'linear' models using quasi-randomized networks (regression) 22 23 Attributes: 24 25 n_hidden_features: int 26 number of nodes in the hidden layer 27 28 lambda1: float 29 regularization parameter for GLM coefficients on original features 30 31 alpha1: float 32 controls compromize between l1 and l2 norm of GLM coefficients on original features 33 34 lambda2: float 35 regularization parameter for GLM coefficients on nonlinear features 36 37 alpha2: float 38 controls compromize between l1 and l2 norm of GLM coefficients on nonlinear features 39 40 family: str 41 "gaussian", "laplace", "poisson", or "quantile" (for now) 42 43 level: int, default=50 44 The level of the quantiles to compute for family = "quantile". 45 Default is the median. 46 47 activation_name: str 48 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 49 50 a: float 51 hyperparameter for 'prelu' or 'elu' activation function 52 53 nodes_sim: str 54 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 55 'uniform' 56 57 bias: boolean 58 indicates if the hidden layer contains a bias term (True) or not 59 (False) 60 61 dropout: float 62 regularization parameter; (random) percentage of nodes dropped out 63 of the training 64 65 direct_link: boolean 66 indicates if the original predictors are included (True) in model's 67 fitting or not (False) 68 69 n_clusters: int 70 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: 71 no clustering) 72 73 cluster_encode: bool 74 defines how the variable containing clusters is treated (default is one-hot) 75 if `False`, then labels are used, without one-hot encoding 76 77 type_clust: str 78 type of clustering method: currently k-means ('kmeans') or Gaussian 79 Mixture Model ('gmm') 80 81 type_scaling: a tuple of 3 strings 82 scaling methods for inputs, hidden layer, and clustering respectively 83 (and when relevant). 84 Currently available: standardization ('std') or MinMax scaling ('minmax') 85 86 optimizer: object 87 optimizer, from class nnetsauce.utils.Optimizer 88 89 backend: str. 90 "cpu" or "gpu" or "tpu". 91 92 seed: int 93 reproducibility seed for nodes_sim=='uniform' 94 95 backend: str 96 "cpu", "gpu", "tpu" 97 98 Attributes: 99 100 beta_: vector 101 regression coefficients 102 103 Examples: 104 105 See [https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_regression.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_regression.py) 106 107 """ 108 109 # construct the object ----- 110 111 def __init__( 112 self, 113 n_hidden_features=5, 114 lambda1=0.01, 115 alpha1=0.5, 116 lambda2=0.01, 117 alpha2=0.5, 118 family="gaussian", 119 level=50, 120 activation_name="relu", 121 a=0.01, 122 nodes_sim="sobol", 123 bias=True, 124 dropout=0, 125 direct_link=True, 126 n_clusters=2, 127 cluster_encode=True, 128 type_clust="kmeans", 129 type_scaling=("std", "std", "std"), 130 optimizer=Optimizer(), 131 backend="cpu", 132 seed=123, 133 ): 134 super().__init__( 135 n_hidden_features=n_hidden_features, 136 lambda1=lambda1, 137 alpha1=alpha1, 138 lambda2=lambda2, 139 alpha2=alpha2, 140 activation_name=activation_name, 141 a=a, 142 nodes_sim=nodes_sim, 143 bias=bias, 144 dropout=dropout, 145 direct_link=direct_link, 146 n_clusters=n_clusters, 147 cluster_encode=cluster_encode, 148 type_clust=type_clust, 149 type_scaling=type_scaling, 150 optimizer=optimizer, 151 backend=backend, 152 seed=seed, 153 ) 154 155 self.family = family 156 self.level = level 157 self.q = self.level / 100 158 159 def gaussian_loss(self, y, row_index, XB): 160 return 0.5 * np.mean(np.square(y[row_index] - XB)) 161 162 def laplace_loss(self, y, row_index, XB): 163 return 0.5 * np.mean(np.abs(y[row_index] - XB)) 164 165 def poisson_loss(self, y, row_index, XB): 166 return -np.mean(y[row_index] * XB - np.exp(XB)) 167 168 def pinball_loss(self, y, row_index, XB, tau=0.5): 169 y = np.array(y[row_index]) 170 y_pred = np.array(XB) 171 return mean_pinball_loss(y, y_pred, alpha=tau) 172 # return np.mean(np.maximum(tau * residuals, (tau - 1) * residuals)) 173 174 def loss_func( 175 self, 176 beta, 177 group_index, 178 X, 179 y, 180 row_index=None, 181 type_loss="gaussian", 182 **kwargs 183 ): 184 res = { 185 "gaussian": self.gaussian_loss, 186 "laplace": self.laplace_loss, 187 "poisson": self.poisson_loss, 188 "quantile": self.pinball_loss, 189 } 190 191 if type_loss != "quantile": 192 193 if row_index is None: 194 row_index = range(len(y)) 195 XB = self.compute_XB(X, beta=beta) 196 197 return res[type_loss](y, row_index, XB) + self.compute_penalty( 198 group_index=group_index, beta=beta 199 ) 200 201 XB = self.compute_XB(X, beta=beta, row_index=row_index) 202 203 return res[type_loss](y, row_index, XB) + self.compute_penalty( 204 group_index=group_index, beta=beta 205 ) 206 207 else: # quantile 208 209 assert ( 210 self.q > 0 and self.q < 1 211 ), "'tau' must be comprised 0 < tau < 1" 212 213 if row_index is None: 214 row_index = range(len(y)) 215 XB = self.compute_XB(X, beta=beta) 216 return res[type_loss](y, row_index, XB, self.q) 217 218 XB = self.compute_XB(X, beta=beta, row_index=row_index) 219 return res[type_loss](y, row_index, XB, self.q) 220 221 def fit(self, X, y, **kwargs): 222 """Fit GLM model to training data (X, y). 223 224 Args: 225 226 X: {array-like}, shape = [n_samples, n_features] 227 Training vectors, where n_samples is the number 228 of samples and n_features is the number of features. 229 230 y: array-like, shape = [n_samples] 231 Target values. 232 233 **kwargs: additional parameters to be passed to 234 self.cook_training_set or self.obj.fit 235 236 Returns: 237 238 self: object 239 240 """ 241 self.beta_ = None 242 self.n_iter = 0 243 244 _, self.group_index = X.shape 245 246 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 247 # initialization 248 if self.backend == "cpu": 249 beta_ = np.linalg.lstsq(scaled_Z, centered_y, rcond=None)[0] 250 else: 251 beta_ = jnp.linalg.lstsq(scaled_Z, centered_y, rcond=None)[0] 252 # optimization 253 # fit(self, loss_func, response, x0, **kwargs): 254 # loss_func(self, beta, group_index, X, y, 255 # row_index=None, type_loss="gaussian", 256 # **kwargs) 257 self.optimizer.fit( 258 self.loss_func, 259 response=centered_y, 260 x0=beta_, 261 group_index=self.group_index, 262 X=scaled_Z, 263 y=centered_y, 264 type_loss=self.family, 265 **kwargs 266 ) 267 268 self.beta_ = self.optimizer.results[0] 269 270 return self 271 272 def predict(self, X, **kwargs): 273 """Predict test data X. 274 275 Args: 276 277 X: {array-like}, shape = [n_samples, n_features] 278 Training vectors, where n_samples is the number 279 of samples and n_features is the number of features. 280 281 **kwargs: additional parameters to be passed to 282 self.cook_test_set 283 284 Returns: 285 286 model predictions: {array-like} 287 288 """ 289 290 if len(X.shape) == 1: 291 n_features = X.shape[0] 292 new_X = mo.rbind( 293 X.reshape(1, n_features), 294 np.ones(n_features).reshape(1, n_features), 295 ) 296 297 return ( 298 self.y_mean_ 299 + np.dot(self.cook_test_set(new_X, **kwargs), self.beta_) 300 )[0] 301 302 return self.y_mean_ + np.dot( 303 self.cook_test_set(X, **kwargs), self.beta_ 304 ) 305 306 def score(self, X, y, scoring=None): 307 """Compute the score of the model. 308 309 Parameters: 310 311 X: {array-like}, shape = [n_samples, n_features] 312 Training vectors, where n_samples is the number 313 of samples and n_features is the number of features. 314 315 y: array-like, shape = [n_samples] 316 Target values. 317 318 scoring: str 319 scoring method 320 321 Returns: 322 323 score: float 324 325 """ 326 327 if scoring is None: 328 return np.sqrt(np.mean((self.predict(X) - y) ** 2)) 329 330 return skm2.get_scorer(scoring)(self, X, y)
Generalized 'linear' models using quasi-randomized networks (regression)
Attributes:
n_hidden_features: int
number of nodes in the hidden layer
lambda1: float
regularization parameter for GLM coefficients on original features
alpha1: float
controls compromize between l1 and l2 norm of GLM coefficients on original features
lambda2: float
regularization parameter for GLM coefficients on nonlinear features
alpha2: float
controls compromize between l1 and l2 norm of GLM coefficients on nonlinear features
family: str
"gaussian", "laplace", "poisson", or "quantile" (for now)
level: int, default=50
The level of the quantiles to compute for family = "quantile".
Default is the median.
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not
(False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original predictors are included (True) in model's
fitting or not (False)
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
optimizer: object
optimizer, from class nnetsauce.utils.Optimizer
backend: str.
"cpu" or "gpu" or "tpu".
seed: int
reproducibility seed for nodes_sim=='uniform'
backend: str
"cpu", "gpu", "tpu"
Attributes:
beta_: vector
regression coefficients
Examples:
See [https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_regression.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_regression.py)
221 def fit(self, X, y, **kwargs): 222 """Fit GLM model to training data (X, y). 223 224 Args: 225 226 X: {array-like}, shape = [n_samples, n_features] 227 Training vectors, where n_samples is the number 228 of samples and n_features is the number of features. 229 230 y: array-like, shape = [n_samples] 231 Target values. 232 233 **kwargs: additional parameters to be passed to 234 self.cook_training_set or self.obj.fit 235 236 Returns: 237 238 self: object 239 240 """ 241 self.beta_ = None 242 self.n_iter = 0 243 244 _, self.group_index = X.shape 245 246 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 247 # initialization 248 if self.backend == "cpu": 249 beta_ = np.linalg.lstsq(scaled_Z, centered_y, rcond=None)[0] 250 else: 251 beta_ = jnp.linalg.lstsq(scaled_Z, centered_y, rcond=None)[0] 252 # optimization 253 # fit(self, loss_func, response, x0, **kwargs): 254 # loss_func(self, beta, group_index, X, y, 255 # row_index=None, type_loss="gaussian", 256 # **kwargs) 257 self.optimizer.fit( 258 self.loss_func, 259 response=centered_y, 260 x0=beta_, 261 group_index=self.group_index, 262 X=scaled_Z, 263 y=centered_y, 264 type_loss=self.family, 265 **kwargs 266 ) 267 268 self.beta_ = self.optimizer.results[0] 269 270 return self
Fit GLM model to training data (X, y).
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
272 def predict(self, X, **kwargs): 273 """Predict test data X. 274 275 Args: 276 277 X: {array-like}, shape = [n_samples, n_features] 278 Training vectors, where n_samples is the number 279 of samples and n_features is the number of features. 280 281 **kwargs: additional parameters to be passed to 282 self.cook_test_set 283 284 Returns: 285 286 model predictions: {array-like} 287 288 """ 289 290 if len(X.shape) == 1: 291 n_features = X.shape[0] 292 new_X = mo.rbind( 293 X.reshape(1, n_features), 294 np.ones(n_features).reshape(1, n_features), 295 ) 296 297 return ( 298 self.y_mean_ 299 + np.dot(self.cook_test_set(new_X, **kwargs), self.beta_) 300 )[0] 301 302 return self.y_mean_ + np.dot( 303 self.cook_test_set(X, **kwargs), self.beta_ 304 )
Predict test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
306 def score(self, X, y, scoring=None): 307 """Compute the score of the model. 308 309 Parameters: 310 311 X: {array-like}, shape = [n_samples, n_features] 312 Training vectors, where n_samples is the number 313 of samples and n_features is the number of features. 314 315 y: array-like, shape = [n_samples] 316 Target values. 317 318 scoring: str 319 scoring method 320 321 Returns: 322 323 score: float 324 325 """ 326 327 if scoring is None: 328 return np.sqrt(np.mean((self.predict(X) - y) ** 2)) 329 330 return skm2.get_scorer(scoring)(self, X, y)
Compute the score of the model.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
scoring: str
scoring method
Returns:
score: float
18class KernelRidge(BaseEstimator, RegressorMixin): 19 """ 20 Kernel Ridge Regression with optional GPU support, Matérn kernels, and automatic input standardization. 21 22 Parameters: 23 - alpha: float 24 Regularization parameter. 25 - kernel: str 26 Kernel type ("linear", "rbf", or "matern"). 27 - gamma: float 28 Kernel coefficient for "rbf". Ignored for other kernels. 29 - nu: float 30 Smoothness parameter for the Matérn kernel. Default is 1.5. 31 - length_scale: float 32 Length scale parameter for the Matérn kernel. Default is 1.0. 33 - backend: str 34 "cpu" or "gpu" (uses JAX if "gpu"). 35 """ 36 37 def __init__( 38 self, 39 alpha=1.0, 40 kernel="rbf", 41 gamma=None, 42 nu=1.5, 43 length_scale=1.0, 44 backend="cpu", 45 ): 46 self.alpha = alpha 47 self.alpha_ = alpha 48 self.kernel = kernel 49 self.gamma = gamma 50 self.nu = nu 51 self.length_scale = length_scale 52 self.backend = backend 53 self.scaler = StandardScaler() 54 55 if backend == "gpu" and not JAX_AVAILABLE: 56 raise ImportError( 57 "JAX is not installed. Please install JAX to use the GPU backend." 58 ) 59 60 def _linear_kernel(self, X, Y): 61 return jnp.dot(X, Y.T) if self.backend == "gpu" else np.dot(X, Y.T) 62 63 def _rbf_kernel(self, X, Y): 64 if self.gamma is None: 65 self.gamma = 1.0 / X.shape[1] 66 if self.backend == "gpu": 67 sq_dists = ( 68 jnp.sum(X**2, axis=1)[:, None] 69 + jnp.sum(Y**2, axis=1) 70 - 2 * jnp.dot(X, Y.T) 71 ) 72 return jnp.exp(-self.gamma * sq_dists) 73 else: 74 sq_dists = ( 75 np.sum(X**2, axis=1)[:, None] 76 + np.sum(Y**2, axis=1) 77 - 2 * np.dot(X, Y.T) 78 ) 79 return np.exp(-self.gamma * sq_dists) 80 81 def _matern_kernel(self, X, Y): 82 """ 83 Compute the Matérn kernel using JAX for GPU or NumPy for CPU. 84 85 Parameters: 86 - X: array-like, shape (n_samples_X, n_features) 87 - Y: array-like, shape (n_samples_Y, n_features) 88 89 Returns: 90 - Kernel matrix, shape (n_samples_X, n_samples_Y) 91 """ 92 if self.backend == "gpu": 93 # Compute pairwise distances 94 dists = jnp.sqrt( 95 jnp.sum((X[:, None, :] - Y[None, :, :]) ** 2, axis=2) 96 ) 97 scaled_dists = jnp.sqrt(2 * self.nu) * dists / self.length_scale 98 99 # Matérn kernel formula 100 coeff = (2 ** (1 - self.nu)) / jnp.exp(gammaln(self.nu)) 101 matern_kernel = ( 102 coeff * (scaled_dists**self.nu) * kv(self.nu, scaled_dists) 103 ) 104 matern_kernel = jnp.where( 105 dists == 0, 1.0, matern_kernel 106 ) # Handle the case where distance is 0 107 return matern_kernel 108 else: 109 # Use NumPy for CPU 110 from scipy.special import ( 111 gammaln, 112 kv, 113 ) # Ensure scipy.special is used for CPU 114 115 dists = np.sqrt( 116 np.sum((X[:, None, :] - Y[None, :, :]) ** 2, axis=2) 117 ) 118 scaled_dists = np.sqrt(2 * self.nu) * dists / self.length_scale 119 120 # Matérn kernel formula 121 coeff = (2 ** (1 - self.nu)) / np.exp(gammaln(self.nu)) 122 matern_kernel = ( 123 coeff * (scaled_dists**self.nu) * kv(self.nu, scaled_dists) 124 ) 125 matern_kernel = np.where( 126 dists == 0, 1.0, matern_kernel 127 ) # Handle the case where distance is 0 128 return matern_kernel 129 130 def _get_kernel(self, X, Y): 131 if self.kernel == "linear": 132 return self._linear_kernel(X, Y) 133 elif self.kernel == "rbf": 134 return self._rbf_kernel(X, Y) 135 elif self.kernel == "matern": 136 return self._matern_kernel(X, Y) 137 else: 138 raise ValueError(f"Unsupported kernel: {self.kernel}") 139 140 def fit(self, X, y): 141 """ 142 Fit the Kernel Ridge Regression model. 143 144 Parameters: 145 - X: array-like, shape (n_samples, n_features) 146 Training data. 147 - y: array-like, shape (n_samples,) 148 Target values. 149 """ 150 # Standardize the inputs 151 X = self.scaler.fit_transform(X) 152 self.X_fit_ = X 153 154 # Center the response 155 self.y_mean_ = np.mean(y) 156 y_centered = y - self.y_mean_ 157 158 n_samples = X.shape[0] 159 160 # Compute the kernel matrix 161 K = self._get_kernel(X, X) 162 self.K_ = K 163 self.y_fit_ = y_centered 164 165 if isinstance(self.alpha, (list, np.ndarray)): 166 # If alpha is a list or array, compute LOOE for each alpha 167 self.alphas_ = self.alpha # Store the list of alphas 168 self.dual_coefs_ = [] # Store dual coefficients for each alpha 169 self.looe_ = [] # Store LOOE for each alpha 170 171 for alpha in self.alpha: 172 G = K + alpha * np.eye(n_samples) 173 G_inv = np.linalg.inv(G) 174 diag_G_inv = np.diag(G_inv) 175 dual_coef = np.linalg.solve(G, y_centered) 176 looe = np.sum((dual_coef / diag_G_inv) ** 2) # Compute LOOE 177 self.dual_coefs_.append(dual_coef) 178 self.looe_.append(looe) 179 180 # Select the best alpha based on the smallest LOOE 181 best_index = np.argmin(self.looe_) 182 self.alpha_ = self.alpha[best_index] 183 self.dual_coef_ = self.dual_coefs_[best_index] 184 else: 185 # If alpha is a single value, proceed as usual 186 if self.backend == "gpu": 187 self.dual_coef_ = jnp.linalg.solve( 188 K + self.alpha * jnp.eye(n_samples), y_centered 189 ) 190 else: 191 self.dual_coef_ = np.linalg.solve( 192 K + self.alpha * np.eye(n_samples), y_centered 193 ) 194 195 return self 196 197 def predict(self, X, probs=False): 198 """ 199 Predict using the Kernel Ridge Regression model. 200 201 Parameters: 202 - X: array-like, shape (n_samples, n_features) 203 Test data. 204 205 Returns: 206 - Predicted values, shape (n_samples,). 207 """ 208 # Standardize the inputs 209 X = self.scaler.transform(X) 210 K = self._get_kernel(X, self.X_fit_) 211 if self.backend == "gpu": 212 preds = jnp.dot(K, self.dual_coef_) + self.y_mean_ 213 if probs: 214 # Compute similarity to self.X_fit_ 215 similarities = jnp.dot( 216 preds, self.X_fit_.T 217 ) # Shape: (n_samples, n_fit_) 218 # Apply softmax to get probabilities 219 return jaxsoftmax(similarities, axis=1) 220 return preds 221 else: 222 preds = np.dot(K, self.dual_coef_) + self.y_mean_ 223 if probs: 224 # Compute similarity to self.X_fit_ 225 similarities = np.dot( 226 preds, self.X_fit_.T 227 ) # Shape: (n_samples, n_fit_) 228 # Apply softmax to get probabilities 229 return softmax(similarities, axis=1) 230 return preds 231 232 def partial_fit(self, X, y): 233 """ 234 Incrementally fit the Kernel Ridge Regression model with new data using a recursive approach. 235 236 Parameters: 237 - X: array-like, shape (n_samples, n_features) 238 New training data. 239 - y: array-like, shape (n_samples,) 240 New target values. 241 242 Returns: 243 - self: object 244 The updated model. 245 """ 246 # Standardize the inputs 247 X = ( 248 self.scaler.fit_transform(X) 249 if not hasattr(self, "X_fit_") 250 else self.scaler.transform(X) 251 ) 252 253 if not hasattr(self, "X_fit_"): 254 # Initialize with the first batch of data 255 self.X_fit_ = X 256 257 # Center the response 258 self.y_mean_ = np.mean(y) 259 y_centered = y - self.y_mean_ 260 self.y_fit_ = y_centered 261 262 n_samples = X.shape[0] 263 264 # Compute the kernel matrix for the initial data 265 self.K_ = self._get_kernel(X, X) 266 267 # Initialize dual coefficients for each alpha 268 if isinstance(self.alpha, (list, np.ndarray)): 269 self.dual_coefs_ = [np.zeros(n_samples) for _ in self.alpha] 270 else: 271 self.dual_coef_ = np.zeros(n_samples) 272 else: 273 # Incrementally update with new data 274 y_centered = y - self.y_mean_ # Center the new batch of responses 275 for x_new, y_new in zip(X, y_centered): 276 x_new = x_new.reshape(1, -1) # Ensure x_new is 2D 277 k_new = self._get_kernel(self.X_fit_, x_new).flatten() 278 279 # Compute the kernel value for the new data point 280 k_self = self._get_kernel(x_new, x_new).item() 281 282 if isinstance(self.alpha, (list, np.ndarray)): 283 # Update dual coefficients for each alpha 284 for idx, alpha in enumerate(self.alpha): 285 gamma_new = 1 / (k_self + alpha) 286 residual = y_new - np.dot(self.dual_coefs_[idx], k_new) 287 self.dual_coefs_[idx] = np.append( 288 self.dual_coefs_[idx], gamma_new * residual 289 ) 290 else: 291 # Update dual coefficients for a single alpha 292 gamma_new = 1 / (k_self + self.alpha) 293 residual = y_new - np.dot(self.dual_coef_, k_new) 294 self.dual_coef_ = np.append( 295 self.dual_coef_, gamma_new * residual 296 ) 297 298 # Update the kernel matrix 299 self.K_ = np.block( 300 [ 301 [self.K_, k_new[:, None]], 302 [k_new[None, :], np.array([[k_self]])], 303 ] 304 ) 305 306 # Update the stored data 307 self.X_fit_ = np.vstack([self.X_fit_, x_new]) 308 self.y_fit_ = np.append(self.y_fit_, y_new) 309 310 # Select the best alpha based on LOOE after the batch 311 if isinstance(self.alpha, (list, np.ndarray)): 312 self.looe_ = [] 313 for idx, alpha in enumerate(self.alpha): 314 G = self.K_ + alpha * np.eye(self.K_.shape[0]) 315 G_inv = np.linalg.inv(G) 316 diag_G_inv = np.diag(G_inv) 317 looe = np.sum((self.dual_coefs_[idx] / diag_G_inv) ** 2) 318 self.looe_.append(looe) 319 320 # Select the best alpha 321 best_index = np.argmin(self.looe_) 322 self.alpha_ = self.alpha[best_index] 323 self.dual_coef_ = self.dual_coefs_[best_index] 324 325 return self
Kernel Ridge Regression with optional GPU support, Matérn kernels, and automatic input standardization.
Parameters:
- alpha: float Regularization parameter.
- kernel: str Kernel type ("linear", "rbf", or "matern").
- gamma: float Kernel coefficient for "rbf". Ignored for other kernels.
- nu: float Smoothness parameter for the Matérn kernel. Default is 1.5.
- length_scale: float Length scale parameter for the Matérn kernel. Default is 1.0.
- backend: str "cpu" or "gpu" (uses JAX if "gpu").
140 def fit(self, X, y): 141 """ 142 Fit the Kernel Ridge Regression model. 143 144 Parameters: 145 - X: array-like, shape (n_samples, n_features) 146 Training data. 147 - y: array-like, shape (n_samples,) 148 Target values. 149 """ 150 # Standardize the inputs 151 X = self.scaler.fit_transform(X) 152 self.X_fit_ = X 153 154 # Center the response 155 self.y_mean_ = np.mean(y) 156 y_centered = y - self.y_mean_ 157 158 n_samples = X.shape[0] 159 160 # Compute the kernel matrix 161 K = self._get_kernel(X, X) 162 self.K_ = K 163 self.y_fit_ = y_centered 164 165 if isinstance(self.alpha, (list, np.ndarray)): 166 # If alpha is a list or array, compute LOOE for each alpha 167 self.alphas_ = self.alpha # Store the list of alphas 168 self.dual_coefs_ = [] # Store dual coefficients for each alpha 169 self.looe_ = [] # Store LOOE for each alpha 170 171 for alpha in self.alpha: 172 G = K + alpha * np.eye(n_samples) 173 G_inv = np.linalg.inv(G) 174 diag_G_inv = np.diag(G_inv) 175 dual_coef = np.linalg.solve(G, y_centered) 176 looe = np.sum((dual_coef / diag_G_inv) ** 2) # Compute LOOE 177 self.dual_coefs_.append(dual_coef) 178 self.looe_.append(looe) 179 180 # Select the best alpha based on the smallest LOOE 181 best_index = np.argmin(self.looe_) 182 self.alpha_ = self.alpha[best_index] 183 self.dual_coef_ = self.dual_coefs_[best_index] 184 else: 185 # If alpha is a single value, proceed as usual 186 if self.backend == "gpu": 187 self.dual_coef_ = jnp.linalg.solve( 188 K + self.alpha * jnp.eye(n_samples), y_centered 189 ) 190 else: 191 self.dual_coef_ = np.linalg.solve( 192 K + self.alpha * np.eye(n_samples), y_centered 193 ) 194 195 return self
Fit the Kernel Ridge Regression model.
Parameters:
- X: array-like, shape (n_samples, n_features) Training data.
- y: array-like, shape (n_samples,) Target values.
197 def predict(self, X, probs=False): 198 """ 199 Predict using the Kernel Ridge Regression model. 200 201 Parameters: 202 - X: array-like, shape (n_samples, n_features) 203 Test data. 204 205 Returns: 206 - Predicted values, shape (n_samples,). 207 """ 208 # Standardize the inputs 209 X = self.scaler.transform(X) 210 K = self._get_kernel(X, self.X_fit_) 211 if self.backend == "gpu": 212 preds = jnp.dot(K, self.dual_coef_) + self.y_mean_ 213 if probs: 214 # Compute similarity to self.X_fit_ 215 similarities = jnp.dot( 216 preds, self.X_fit_.T 217 ) # Shape: (n_samples, n_fit_) 218 # Apply softmax to get probabilities 219 return jaxsoftmax(similarities, axis=1) 220 return preds 221 else: 222 preds = np.dot(K, self.dual_coef_) + self.y_mean_ 223 if probs: 224 # Compute similarity to self.X_fit_ 225 similarities = np.dot( 226 preds, self.X_fit_.T 227 ) # Shape: (n_samples, n_fit_) 228 # Apply softmax to get probabilities 229 return softmax(similarities, axis=1) 230 return preds
Predict using the Kernel Ridge Regression model.
Parameters:
- X: array-like, shape (n_samples, n_features) Test data.
Returns:
- Predicted values, shape (n_samples,).
761class LazyClassifier(LazyDeepClassifier): 762 """ 763 Fitting -- almost -- all the classification algorithms with 764 nnetsauce's CustomClassifier and returning their scores (no layers). 765 766 Parameters: 767 768 verbose: int, optional (default=0) 769 Any positive number for verbosity. 770 771 ignore_warnings: bool, optional (default=True) 772 When set to True, the warning related to algorigms that are not able to run are ignored. 773 774 custom_metric: function, optional (default=None) 775 When function is provided, models are evaluated based on the custom evaluation metric provided. 776 777 predictions: bool, optional (default=False) 778 When set to True, the predictions of all the models models are returned as dataframe. 779 780 sort_by: string, optional (default='Accuracy') 781 Sort models by a metric. Available options are 'Accuracy', 'Balanced Accuracy', 'ROC AUC', 'F1 Score' 782 or a custom metric identified by its name and provided by custom_metric. 783 784 random_state: int, optional (default=42) 785 Reproducibiility seed. 786 787 estimators: list, optional (default='all') 788 list of Estimators names or just 'all' (default='all') 789 790 preprocess: bool 791 preprocessing is done when set to True 792 793 n_jobs : int, when possible, run in parallel 794 For now, only used by individual models that support it. 795 796 All the other parameters are the same as CustomClassifier's. 797 798 Attributes: 799 800 models_: dict-object 801 Returns a dictionary with each model pipeline as value 802 with key as name of models. 803 804 best_model_: object 805 Returns the best model pipeline based on the sort_by metric. 806 807 Examples: 808 809 import nnetsauce as ns 810 import numpy as np 811 from sklearn import datasets 812 from sklearn.utils import shuffle 813 814 dataset = datasets.load_iris() 815 X = dataset.data 816 y = dataset.target 817 X, y = shuffle(X, y, random_state=123) 818 X = X.astype(np.float32) 819 y = y.astype(np.float32) 820 X_train, X_test = X[:100], X[100:] 821 y_train, y_test = y[:100], y[100:] 822 823 clf = ns.LazyClassifier(verbose=0, ignore_warnings=True, custom_metric=None) 824 models, predictions = clf.fit(X_train, X_test, y_train, y_test) 825 model_dictionary = clf.provide_models(X_train,X_test,y_train,y_test) 826 print(models) 827 828 """ 829 830 def __init__( 831 self, 832 verbose=0, 833 ignore_warnings=True, 834 custom_metric=None, 835 predictions=False, 836 sort_by="Accuracy", 837 random_state=42, 838 estimators="all", 839 preprocess=False, 840 n_jobs=None, 841 # CustomClassifier attributes 842 obj=None, 843 n_hidden_features=5, 844 activation_name="relu", 845 a=0.01, 846 nodes_sim="sobol", 847 bias=True, 848 dropout=0, 849 direct_link=True, 850 n_clusters=2, 851 cluster_encode=True, 852 type_clust="kmeans", 853 type_scaling=("std", "std", "std"), 854 col_sample=1, 855 row_sample=1, 856 seed=123, 857 backend="cpu", 858 ): 859 super().__init__( 860 verbose=verbose, 861 ignore_warnings=ignore_warnings, 862 custom_metric=custom_metric, 863 predictions=predictions, 864 sort_by=sort_by, 865 random_state=random_state, 866 estimators=estimators, 867 preprocess=preprocess, 868 n_jobs=n_jobs, 869 n_layers=1, 870 obj=obj, 871 n_hidden_features=n_hidden_features, 872 activation_name=activation_name, 873 a=a, 874 nodes_sim=nodes_sim, 875 bias=bias, 876 dropout=dropout, 877 direct_link=direct_link, 878 n_clusters=n_clusters, 879 cluster_encode=cluster_encode, 880 type_clust=type_clust, 881 type_scaling=type_scaling, 882 col_sample=col_sample, 883 row_sample=row_sample, 884 seed=seed, 885 backend=backend, 886 )
Fitting -- almost -- all the classification algorithms with nnetsauce's CustomClassifier and returning their scores (no layers).
Parameters:
verbose: int, optional (default=0)
Any positive number for verbosity.
ignore_warnings: bool, optional (default=True)
When set to True, the warning related to algorigms that are not able to run are ignored.
custom_metric: function, optional (default=None)
When function is provided, models are evaluated based on the custom evaluation metric provided.
predictions: bool, optional (default=False)
When set to True, the predictions of all the models models are returned as dataframe.
sort_by: string, optional (default='Accuracy')
Sort models by a metric. Available options are 'Accuracy', 'Balanced Accuracy', 'ROC AUC', 'F1 Score'
or a custom metric identified by its name and provided by custom_metric.
random_state: int, optional (default=42)
Reproducibiility seed.
estimators: list, optional (default='all')
list of Estimators names or just 'all' (default='all')
preprocess: bool
preprocessing is done when set to True
n_jobs : int, when possible, run in parallel
For now, only used by individual models that support it.
All the other parameters are the same as CustomClassifier's.
Attributes:
models_: dict-object
Returns a dictionary with each model pipeline as value
with key as name of models.
best_model_: object
Returns the best model pipeline based on the sort_by metric.
Examples:
import nnetsauce as ns
import numpy as np
from sklearn import datasets
from sklearn.utils import shuffle
dataset = datasets.load_iris()
X = dataset.data
y = dataset.target
X, y = shuffle(X, y, random_state=123)
X = X.astype(np.float32)
y = y.astype(np.float32)
X_train, X_test = X[:100], X[100:]
y_train, y_test = y[:100], y[100:]
clf = ns.LazyClassifier(verbose=0, ignore_warnings=True, custom_metric=None)
models, predictions = clf.fit(X_train, X_test, y_train, y_test)
model_dictionary = clf.provide_models(X_train,X_test,y_train,y_test)
print(models)
657class LazyRegressor(LazyDeepRegressor): 658 """ 659 Fitting -- almost -- all the regression algorithms with 660 nnetsauce's CustomRegressor and returning their scores. 661 662 Parameters: 663 664 verbose: int, optional (default=0) 665 Any positive number for verbosity. 666 667 ignore_warnings: bool, optional (default=True) 668 When set to True, the warning related to algorigms that are not able to run are ignored. 669 670 custom_metric: function, optional (default=None) 671 When function is provided, models are evaluated based on the custom evaluation metric provided. 672 673 predictions: bool, optional (default=False) 674 When set to True, the predictions of all the models models are returned as dataframe. 675 676 sort_by: string, optional (default='RMSE') 677 Sort models by a metric. Available options are 'R-Squared', 'Adjusted R-Squared', 'RMSE', 'Time Taken' and 'Custom Metric'. 678 or a custom metric identified by its name and provided by custom_metric. 679 680 random_state: int, optional (default=42) 681 Reproducibiility seed. 682 683 estimators: list, optional (default='all') 684 list of Estimators names or just 'all' (default='all') 685 686 preprocess: bool 687 preprocessing is done when set to True 688 689 n_jobs : int, when possible, run in parallel 690 For now, only used by individual models that support it. 691 692 All the other parameters are the same as CustomRegressor's. 693 694 Attributes: 695 696 models_: dict-object 697 Returns a dictionary with each model pipeline as value 698 with key as name of models. 699 700 best_model_: object 701 Returns the best model pipeline based on the sort_by metric. 702 703 Examples: 704 705 import nnetsauce as ns 706 import numpy as np 707 from sklearn import datasets 708 from sklearn.utils import shuffle 709 710 diabetes = datasets.load_diabetes() 711 X, y = shuffle(diabetes.data, diabetes.target, random_state=13) 712 X = X.astype(np.float32) 713 714 offset = int(X.shape[0] * 0.9) 715 X_train, y_train = X[:offset], y[:offset] 716 X_test, y_test = X[offset:], y[offset:] 717 718 reg = ns.LazyRegressor(verbose=0, ignore_warnings=False, 719 custom_metric=None) 720 models, predictions = reg.fit(X_train, X_test, y_train, y_test) 721 print(models) 722 723 """ 724 725 def __init__( 726 self, 727 verbose=0, 728 ignore_warnings=True, 729 custom_metric=None, 730 predictions=False, 731 sort_by="RMSE", 732 random_state=42, 733 estimators="all", 734 preprocess=False, 735 n_jobs=None, 736 # CustomRegressor attributes 737 obj=None, 738 n_hidden_features=5, 739 activation_name="relu", 740 a=0.01, 741 nodes_sim="sobol", 742 bias=True, 743 dropout=0, 744 direct_link=True, 745 n_clusters=2, 746 cluster_encode=True, 747 type_clust="kmeans", 748 type_scaling=("std", "std", "std"), 749 col_sample=1, 750 row_sample=1, 751 seed=123, 752 backend="cpu", 753 ): 754 super().__init__( 755 verbose=verbose, 756 ignore_warnings=ignore_warnings, 757 custom_metric=custom_metric, 758 predictions=predictions, 759 sort_by=sort_by, 760 random_state=random_state, 761 estimators=estimators, 762 preprocess=preprocess, 763 n_jobs=n_jobs, 764 n_layers=1, 765 obj=obj, 766 n_hidden_features=n_hidden_features, 767 activation_name=activation_name, 768 a=a, 769 nodes_sim=nodes_sim, 770 bias=bias, 771 dropout=dropout, 772 direct_link=direct_link, 773 n_clusters=n_clusters, 774 cluster_encode=cluster_encode, 775 type_clust=type_clust, 776 type_scaling=type_scaling, 777 col_sample=col_sample, 778 row_sample=row_sample, 779 seed=seed, 780 backend=backend, 781 )
Fitting -- almost -- all the regression algorithms with nnetsauce's CustomRegressor and returning their scores.
Parameters:
verbose: int, optional (default=0)
Any positive number for verbosity.
ignore_warnings: bool, optional (default=True)
When set to True, the warning related to algorigms that are not able to run are ignored.
custom_metric: function, optional (default=None)
When function is provided, models are evaluated based on the custom evaluation metric provided.
predictions: bool, optional (default=False)
When set to True, the predictions of all the models models are returned as dataframe.
sort_by: string, optional (default='RMSE')
Sort models by a metric. Available options are 'R-Squared', 'Adjusted R-Squared', 'RMSE', 'Time Taken' and 'Custom Metric'.
or a custom metric identified by its name and provided by custom_metric.
random_state: int, optional (default=42)
Reproducibiility seed.
estimators: list, optional (default='all')
list of Estimators names or just 'all' (default='all')
preprocess: bool
preprocessing is done when set to True
n_jobs : int, when possible, run in parallel
For now, only used by individual models that support it.
All the other parameters are the same as CustomRegressor's.
Attributes:
models_: dict-object
Returns a dictionary with each model pipeline as value
with key as name of models.
best_model_: object
Returns the best model pipeline based on the sort_by metric.
Examples:
import nnetsauce as ns
import numpy as np
from sklearn import datasets
from sklearn.utils import shuffle
diabetes = datasets.load_diabetes()
X, y = shuffle(diabetes.data, diabetes.target, random_state=13)
X = X.astype(np.float32)
offset = int(X.shape[0] * 0.9)
X_train, y_train = X[:offset], y[:offset]
X_test, y_test = X[offset:], y[offset:]
reg = ns.LazyRegressor(verbose=0, ignore_warnings=False,
custom_metric=None)
models, predictions = reg.fit(X_train, X_test, y_train, y_test)
print(models)
94class LazyDeepClassifier(Custom, ClassifierMixin): 95 """ 96 97 Fitting -- almost -- all the classification algorithms with layers of 98 nnetsauce's CustomClassifier and returning their scores. 99 100 Parameters: 101 102 verbose: int, optional (default=0) 103 Any positive number for verbosity. 104 105 ignore_warnings: bool, optional (default=True) 106 When set to True, the warning related to algorigms that are not 107 able to run are ignored. 108 109 custom_metric: function, optional (default=None) 110 When function is provided, models are evaluated based on the custom 111 evaluation metric provided. 112 113 predictions: bool, optional (default=False) 114 When set to True, the predictions of all the models models are 115 returned as data frame. 116 117 sort_by: string, optional (default='Accuracy') 118 Sort models by a metric. Available options are 'Accuracy', 119 'Balanced Accuracy', 'ROC AUC', 'F1 Score' or a custom metric 120 identified by its name and provided by custom_metric. 121 122 random_state: int, optional (default=42) 123 Reproducibiility seed. 124 125 estimators: list, optional (default='all') 126 list of Estimators names or just 'all' for > 90 classifiers 127 (default='all') 128 129 preprocess: bool, preprocessing is done when set to True 130 131 n_jobs: int, when possible, run in parallel 132 For now, only used by individual models that support it. 133 134 n_layers: int, optional (default=3) 135 Number of layers of CustomClassifiers to be used. 136 137 All the other parameters are the same as CustomClassifier's. 138 139 Attributes: 140 141 models_: dict-object 142 Returns a dictionary with each model pipeline as value 143 with key as name of models. 144 145 best_model_: object 146 Returns the best model pipeline. 147 148 Examples 149 150 ```python 151 import nnetsauce as ns 152 from sklearn.datasets import load_breast_cancer 153 from sklearn.model_selection import train_test_split 154 data = load_breast_cancer() 155 X = data.data 156 y= data.target 157 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, 158 random_state=123) 159 clf = ns.LazyDeepClassifier(verbose=0, ignore_warnings=True, custom_metric=None) 160 models, predictions = clf.fit(X_train, X_test, y_train, y_test) 161 model_dictionary = clf.provide_models(X_train,X_test,y_train,y_test) 162 print(models) 163 ``` 164 165 """ 166 167 def __init__( 168 self, 169 verbose=0, 170 ignore_warnings=True, 171 custom_metric=None, 172 predictions=False, 173 sort_by="Accuracy", 174 random_state=42, 175 estimators="all", 176 preprocess=False, 177 n_jobs=None, 178 # Defining depth 179 n_layers=3, 180 # CustomClassifier attributes 181 obj=None, 182 n_hidden_features=5, 183 activation_name="relu", 184 a=0.01, 185 nodes_sim="sobol", 186 bias=True, 187 dropout=0, 188 direct_link=True, 189 n_clusters=2, 190 cluster_encode=True, 191 type_clust="kmeans", 192 type_scaling=("std", "std", "std"), 193 col_sample=1, 194 row_sample=1, 195 seed=123, 196 backend="cpu", 197 ): 198 self.verbose = verbose 199 self.ignore_warnings = ignore_warnings 200 self.custom_metric = custom_metric 201 self.predictions = predictions 202 self.sort_by = sort_by 203 self.models_ = {} 204 self.best_model_ = None 205 self.random_state = random_state 206 self.estimators = estimators 207 self.preprocess = preprocess 208 self.n_layers = n_layers - 1 209 self.n_jobs = n_jobs 210 super().__init__( 211 obj=obj, 212 n_hidden_features=n_hidden_features, 213 activation_name=activation_name, 214 a=a, 215 nodes_sim=nodes_sim, 216 bias=bias, 217 dropout=dropout, 218 direct_link=direct_link, 219 n_clusters=n_clusters, 220 cluster_encode=cluster_encode, 221 type_clust=type_clust, 222 type_scaling=type_scaling, 223 col_sample=col_sample, 224 row_sample=row_sample, 225 seed=seed, 226 backend=backend, 227 ) 228 229 def fit(self, X_train, X_test, y_train, y_test): 230 """Fit classifiers to X_train and y_train, predict and score on X_test, 231 y_test. 232 233 Parameters: 234 235 X_train: array-like, 236 Training vectors, where rows is the number of samples 237 and columns is the number of features. 238 239 X_test: array-like, 240 Testing vectors, where rows is the number of samples 241 and columns is the number of features. 242 243 y_train: array-like, 244 Training vectors, where rows is the number of samples 245 and columns is the number of features. 246 247 y_test: array-like, 248 Testing vectors, where rows is the number of samples 249 and columns is the number of features. 250 251 Returns: 252 253 scores: Pandas DataFrame 254 Returns metrics of all the models in a Pandas DataFrame. 255 256 predictions: Pandas DataFrame 257 Returns predictions of all the models in a Pandas DataFrame. 258 """ 259 Accuracy = [] 260 B_Accuracy = [] 261 ROC_AUC = [] 262 F1 = [] 263 names = [] 264 TIME = [] 265 predictions = {} 266 267 if self.custom_metric is not None: 268 CUSTOM_METRIC = [] 269 270 if isinstance(X_train, np.ndarray): 271 X_train = pd.DataFrame(X_train) 272 X_test = pd.DataFrame(X_test) 273 274 numeric_features = X_train.select_dtypes(include=[np.number]).columns 275 categorical_features = X_train.select_dtypes(include=["object"]).columns 276 277 categorical_low, categorical_high = get_card_split( 278 X_train, categorical_features 279 ) 280 281 if self.preprocess is True: 282 preprocessor = ColumnTransformer( 283 transformers=[ 284 ("numeric", numeric_transformer, numeric_features), 285 ( 286 "categorical_low", 287 categorical_transformer_low, 288 categorical_low, 289 ), 290 ( 291 "categorical_high", 292 categorical_transformer_high, 293 categorical_high, 294 ), 295 ] 296 ) 297 298 # baseline models 299 try: 300 baseline_names = ["RandomForestClassifier", "XGBClassifier"] 301 baseline_models = [RandomForestClassifier(), xgb.XGBClassifier()] 302 except Exception as exception: 303 baseline_names = ["RandomForestClassifier"] 304 baseline_models = [RandomForestClassifier()] 305 306 for name, model in zip(baseline_names, baseline_models): 307 start = time.time() 308 try: 309 model.fit(X_train, y_train) 310 self.models_[name] = model 311 y_pred = model.predict(X_test) 312 accuracy = accuracy_score(y_test, y_pred, normalize=True) 313 b_accuracy = balanced_accuracy_score(y_test, y_pred) 314 f1 = f1_score(y_test, y_pred, average="weighted") 315 try: 316 roc_auc = roc_auc_score(y_test, y_pred) 317 except Exception as exception: 318 roc_auc = None 319 if self.ignore_warnings is False: 320 print("ROC AUC couldn't be calculated for " + name) 321 print(exception) 322 names.append(name) 323 Accuracy.append(accuracy) 324 B_Accuracy.append(b_accuracy) 325 ROC_AUC.append(roc_auc) 326 F1.append(f1) 327 TIME.append(time.time() - start) 328 if self.custom_metric is not None: 329 custom_metric = self.custom_metric(y_test, y_pred) 330 CUSTOM_METRIC.append(custom_metric) 331 if self.verbose > 0: 332 if self.custom_metric is not None: 333 print( 334 { 335 "Model": name, 336 "Accuracy": accuracy, 337 "Balanced Accuracy": b_accuracy, 338 "ROC AUC": roc_auc, 339 "F1 Score": f1, 340 self.custom_metric.__name__: custom_metric, 341 "Time taken": time.time() - start, 342 } 343 ) 344 else: 345 print( 346 { 347 "Model": name, 348 "Accuracy": accuracy, 349 "Balanced Accuracy": b_accuracy, 350 "ROC AUC": roc_auc, 351 "F1 Score": f1, 352 "Time taken": time.time() - start, 353 } 354 ) 355 if self.predictions: 356 predictions[name] = y_pred 357 except Exception as exception: 358 if self.ignore_warnings is False: 359 print(name + " model failed to execute") 360 print(exception) 361 362 if self.estimators == "all": 363 self.classifiers = [ 364 item 365 for sublist in [ 366 DEEPCLASSIFIERS, 367 DEEPMULTITASKCLASSIFIERS, 368 DEEPSIMPLEMULTITASKCLASSIFIERS, 369 ] 370 for item in sublist 371 ] 372 else: 373 self.classifiers = ( 374 [ 375 ("DeepCustomClassifier(" + est[0] + ")", est[1]) 376 for est in all_estimators() 377 if ( 378 issubclass(est[1], ClassifierMixin) 379 and (est[0] in self.estimators) 380 ) 381 ] 382 + [ 383 ( 384 "DeepMultitaskClassifier(" + est[0] + ")", 385 partial(MultitaskClassifier, obj=est[1]()), 386 ) 387 for est in all_estimators() 388 if ( 389 issubclass(est[1], RegressorMixin) 390 and (est[0] in self.estimators) 391 ) 392 ] 393 + [ 394 ( 395 "DeepSimpleMultitaskClassifier(" + est[0] + ")", 396 partial(SimpleMultitaskClassifier, obj=est[1]()), 397 ) 398 for est in all_estimators() 399 if ( 400 issubclass(est[1], RegressorMixin) 401 and (est[0] in self.estimators) 402 ) 403 ] 404 ) 405 406 if self.preprocess is True: 407 408 for name, model in tqdm(self.classifiers): # do parallel exec 409 410 other_args = ( 411 {} 412 ) # use this trick for `random_state` too --> refactor 413 try: 414 if ( 415 "n_jobs" in model().get_params().keys() 416 and name.find("LogisticRegression") == -1 417 ): 418 other_args["n_jobs"] = self.n_jobs 419 except Exception: 420 pass 421 422 start = time.time() 423 424 try: 425 if "random_state" in model().get_params().keys(): 426 layer_clf = CustomClassifier( 427 obj=model(random_state=self.random_state), 428 n_hidden_features=self.n_hidden_features, 429 activation_name=self.activation_name, 430 a=self.a, 431 nodes_sim=self.nodes_sim, 432 bias=self.bias, 433 dropout=self.dropout, 434 direct_link=self.direct_link, 435 n_clusters=self.n_clusters, 436 cluster_encode=self.cluster_encode, 437 type_clust=self.type_clust, 438 type_scaling=self.type_scaling, 439 col_sample=self.col_sample, 440 row_sample=self.row_sample, 441 seed=self.seed, 442 backend=self.backend, 443 cv_calibration=None, 444 ) 445 446 else: 447 layer_clf = CustomClassifier( 448 obj=model(), 449 n_hidden_features=self.n_hidden_features, 450 activation_name=self.activation_name, 451 a=self.a, 452 nodes_sim=self.nodes_sim, 453 bias=self.bias, 454 dropout=self.dropout, 455 direct_link=self.direct_link, 456 n_clusters=self.n_clusters, 457 cluster_encode=self.cluster_encode, 458 type_clust=self.type_clust, 459 type_scaling=self.type_scaling, 460 col_sample=self.col_sample, 461 row_sample=self.row_sample, 462 seed=self.seed, 463 backend=self.backend, 464 cv_calibration=None, 465 ) 466 467 layer_clf.fit(X_train, y_train) 468 469 for _ in range(self.n_layers): 470 layer_clf = deepcopy( 471 CustomClassifier( 472 obj=layer_clf, 473 n_hidden_features=self.n_hidden_features, 474 activation_name=self.activation_name, 475 a=self.a, 476 nodes_sim=self.nodes_sim, 477 bias=self.bias, 478 dropout=self.dropout, 479 direct_link=self.direct_link, 480 n_clusters=self.n_clusters, 481 cluster_encode=self.cluster_encode, 482 type_clust=self.type_clust, 483 type_scaling=self.type_scaling, 484 col_sample=self.col_sample, 485 row_sample=self.row_sample, 486 seed=self.seed, 487 backend=self.backend, 488 cv_calibration=None, 489 ) 490 ) 491 492 pipe = Pipeline( 493 [ 494 ("preprocessor", preprocessor), 495 ("classifier", layer_clf), 496 ] 497 ) 498 499 pipe.fit(X_train, y_train) 500 self.models_[name] = pipe 501 y_pred = pipe.predict(X_test) 502 accuracy = accuracy_score(y_test, y_pred, normalize=True) 503 b_accuracy = balanced_accuracy_score(y_test, y_pred) 504 f1 = f1_score(y_test, y_pred, average="weighted") 505 try: 506 roc_auc = roc_auc_score(y_test, y_pred) 507 except Exception as exception: 508 roc_auc = None 509 if self.ignore_warnings is False: 510 print("ROC AUC couldn't be calculated for " + name) 511 print(exception) 512 names.append(name) 513 Accuracy.append(accuracy) 514 B_Accuracy.append(b_accuracy) 515 ROC_AUC.append(roc_auc) 516 F1.append(f1) 517 TIME.append(time.time() - start) 518 if self.custom_metric is not None: 519 custom_metric = self.custom_metric(y_test, y_pred) 520 CUSTOM_METRIC.append(custom_metric) 521 if self.verbose > 0: 522 if self.custom_metric is not None: 523 print( 524 { 525 "Model": name, 526 "Accuracy": accuracy, 527 "Balanced Accuracy": b_accuracy, 528 "ROC AUC": roc_auc, 529 "F1 Score": f1, 530 self.custom_metric.__name__: custom_metric, 531 "Time taken": time.time() - start, 532 } 533 ) 534 else: 535 print( 536 { 537 "Model": name, 538 "Accuracy": accuracy, 539 "Balanced Accuracy": b_accuracy, 540 "ROC AUC": roc_auc, 541 "F1 Score": f1, 542 "Time taken": time.time() - start, 543 } 544 ) 545 if self.predictions: 546 predictions[name] = y_pred 547 except Exception as exception: 548 if self.ignore_warnings is False: 549 print(name + " model failed to execute") 550 print(exception) 551 552 else: # no preprocessing 553 554 for name, model in tqdm(self.classifiers): # do parallel exec 555 start = time.time() 556 try: 557 if "random_state" in model().get_params().keys(): 558 layer_clf = CustomClassifier( 559 obj=model(random_state=self.random_state), 560 n_hidden_features=self.n_hidden_features, 561 activation_name=self.activation_name, 562 a=self.a, 563 nodes_sim=self.nodes_sim, 564 bias=self.bias, 565 dropout=self.dropout, 566 direct_link=self.direct_link, 567 n_clusters=self.n_clusters, 568 cluster_encode=self.cluster_encode, 569 type_clust=self.type_clust, 570 type_scaling=self.type_scaling, 571 col_sample=self.col_sample, 572 row_sample=self.row_sample, 573 seed=self.seed, 574 backend=self.backend, 575 cv_calibration=None, 576 ) 577 578 else: 579 layer_clf = CustomClassifier( 580 obj=model(), 581 n_hidden_features=self.n_hidden_features, 582 activation_name=self.activation_name, 583 a=self.a, 584 nodes_sim=self.nodes_sim, 585 bias=self.bias, 586 dropout=self.dropout, 587 direct_link=self.direct_link, 588 n_clusters=self.n_clusters, 589 cluster_encode=self.cluster_encode, 590 type_clust=self.type_clust, 591 type_scaling=self.type_scaling, 592 col_sample=self.col_sample, 593 row_sample=self.row_sample, 594 seed=self.seed, 595 backend=self.backend, 596 cv_calibration=None, 597 ) 598 599 layer_clf.fit(X_train, y_train) 600 601 for _ in range(self.n_layers): 602 layer_clf = deepcopy( 603 CustomClassifier( 604 obj=layer_clf, 605 n_hidden_features=self.n_hidden_features, 606 activation_name=self.activation_name, 607 a=self.a, 608 nodes_sim=self.nodes_sim, 609 bias=self.bias, 610 dropout=self.dropout, 611 direct_link=self.direct_link, 612 n_clusters=self.n_clusters, 613 cluster_encode=self.cluster_encode, 614 type_clust=self.type_clust, 615 type_scaling=self.type_scaling, 616 col_sample=self.col_sample, 617 row_sample=self.row_sample, 618 seed=self.seed, 619 backend=self.backend, 620 cv_calibration=None, 621 ) 622 ) 623 624 # layer_clf.fit(X_train, y_train) 625 626 layer_clf.fit(X_train, y_train) 627 628 self.models_[name] = layer_clf 629 y_pred = layer_clf.predict(X_test) 630 accuracy = accuracy_score(y_test, y_pred, normalize=True) 631 b_accuracy = balanced_accuracy_score(y_test, y_pred) 632 f1 = f1_score(y_test, y_pred, average="weighted") 633 try: 634 roc_auc = roc_auc_score(y_test, y_pred) 635 except Exception as exception: 636 roc_auc = None 637 if self.ignore_warnings is False: 638 print("ROC AUC couldn't be calculated for " + name) 639 print(exception) 640 names.append(name) 641 Accuracy.append(accuracy) 642 B_Accuracy.append(b_accuracy) 643 ROC_AUC.append(roc_auc) 644 F1.append(f1) 645 TIME.append(time.time() - start) 646 if self.custom_metric is not None: 647 custom_metric = self.custom_metric(y_test, y_pred) 648 CUSTOM_METRIC.append(custom_metric) 649 if self.verbose > 0: 650 if self.custom_metric is not None: 651 print( 652 { 653 "Model": name, 654 "Accuracy": accuracy, 655 "Balanced Accuracy": b_accuracy, 656 "ROC AUC": roc_auc, 657 "F1 Score": f1, 658 self.custom_metric.__name__: custom_metric, 659 "Time taken": time.time() - start, 660 } 661 ) 662 else: 663 print( 664 { 665 "Model": name, 666 "Accuracy": accuracy, 667 "Balanced Accuracy": b_accuracy, 668 "ROC AUC": roc_auc, 669 "F1 Score": f1, 670 "Time taken": time.time() - start, 671 } 672 ) 673 if self.predictions: 674 predictions[name] = y_pred 675 except Exception as exception: 676 if self.ignore_warnings is False: 677 print(name + " model failed to execute") 678 print(exception) 679 680 if self.custom_metric is None: 681 scores = pd.DataFrame( 682 { 683 "Model": names, 684 "Accuracy": Accuracy, 685 "Balanced Accuracy": B_Accuracy, 686 "ROC AUC": ROC_AUC, 687 "F1 Score": F1, 688 "Time Taken": TIME, 689 } 690 ) 691 else: 692 scores = pd.DataFrame( 693 { 694 "Model": names, 695 "Accuracy": Accuracy, 696 "Balanced Accuracy": B_Accuracy, 697 "ROC AUC": ROC_AUC, 698 "F1 Score": F1, 699 "Custom metric": CUSTOM_METRIC, 700 "Time Taken": TIME, 701 } 702 ) 703 scores = scores.sort_values(by=self.sort_by, ascending=False).set_index( 704 "Model" 705 ) 706 707 self.best_model_ = self.models_[scores.index[0]] 708 709 if self.predictions is True: 710 711 return scores, predictions 712 713 return scores 714 715 def get_best_model(self): 716 """ 717 This function returns the best model pipeline based on the sort_by metric. 718 719 Returns: 720 721 best_model: object, 722 Returns the best model pipeline based on the sort_by metric. 723 724 """ 725 return self.best_model_ 726 727 def provide_models(self, X_train, X_test, y_train, y_test): 728 """Returns all the model objects trained. If fit hasn't been called yet, 729 then it's called to return the models. 730 731 Parameters: 732 733 X_train: array-like, 734 Training vectors, where rows is the number of samples 735 and columns is the number of features. 736 737 X_test: array-like, 738 Testing vectors, where rows is the number of samples 739 and columns is the number of features. 740 741 y_train: array-like, 742 Training vectors, where rows is the number of samples 743 and columns is the number of features. 744 745 y_test: array-like, 746 Testing vectors, where rows is the number of samples 747 and columns is the number of features. 748 749 Returns: 750 751 models: dict-object, 752 Returns a dictionary with each model's pipeline as value 753 and key = name of the model. 754 """ 755 if len(self.models_.keys()) == 0: 756 self.fit(X_train, X_test, y_train, y_test) 757 758 return self.models_
Fitting -- almost -- all the classification algorithms with layers of nnetsauce's CustomClassifier and returning their scores.
Parameters:
verbose: int, optional (default=0)
Any positive number for verbosity.
ignore_warnings: bool, optional (default=True)
When set to True, the warning related to algorigms that are not
able to run are ignored.
custom_metric: function, optional (default=None)
When function is provided, models are evaluated based on the custom
evaluation metric provided.
predictions: bool, optional (default=False)
When set to True, the predictions of all the models models are
returned as data frame.
sort_by: string, optional (default='Accuracy')
Sort models by a metric. Available options are 'Accuracy',
'Balanced Accuracy', 'ROC AUC', 'F1 Score' or a custom metric
identified by its name and provided by custom_metric.
random_state: int, optional (default=42)
Reproducibiility seed.
estimators: list, optional (default='all')
list of Estimators names or just 'all' for > 90 classifiers
(default='all')
preprocess: bool, preprocessing is done when set to True
n_jobs: int, when possible, run in parallel
For now, only used by individual models that support it.
n_layers: int, optional (default=3)
Number of layers of CustomClassifiers to be used.
All the other parameters are the same as CustomClassifier's.
Attributes:
models_: dict-object
Returns a dictionary with each model pipeline as value
with key as name of models.
best_model_: object
Returns the best model pipeline.
Examples
import nnetsauce as ns
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
data = load_breast_cancer()
X = data.data
y= data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2,
random_state=123)
clf = ns.LazyDeepClassifier(verbose=0, ignore_warnings=True, custom_metric=None)
models, predictions = clf.fit(X_train, X_test, y_train, y_test)
model_dictionary = clf.provide_models(X_train,X_test,y_train,y_test)
print(models)
229 def fit(self, X_train, X_test, y_train, y_test): 230 """Fit classifiers to X_train and y_train, predict and score on X_test, 231 y_test. 232 233 Parameters: 234 235 X_train: array-like, 236 Training vectors, where rows is the number of samples 237 and columns is the number of features. 238 239 X_test: array-like, 240 Testing vectors, where rows is the number of samples 241 and columns is the number of features. 242 243 y_train: array-like, 244 Training vectors, where rows is the number of samples 245 and columns is the number of features. 246 247 y_test: array-like, 248 Testing vectors, where rows is the number of samples 249 and columns is the number of features. 250 251 Returns: 252 253 scores: Pandas DataFrame 254 Returns metrics of all the models in a Pandas DataFrame. 255 256 predictions: Pandas DataFrame 257 Returns predictions of all the models in a Pandas DataFrame. 258 """ 259 Accuracy = [] 260 B_Accuracy = [] 261 ROC_AUC = [] 262 F1 = [] 263 names = [] 264 TIME = [] 265 predictions = {} 266 267 if self.custom_metric is not None: 268 CUSTOM_METRIC = [] 269 270 if isinstance(X_train, np.ndarray): 271 X_train = pd.DataFrame(X_train) 272 X_test = pd.DataFrame(X_test) 273 274 numeric_features = X_train.select_dtypes(include=[np.number]).columns 275 categorical_features = X_train.select_dtypes(include=["object"]).columns 276 277 categorical_low, categorical_high = get_card_split( 278 X_train, categorical_features 279 ) 280 281 if self.preprocess is True: 282 preprocessor = ColumnTransformer( 283 transformers=[ 284 ("numeric", numeric_transformer, numeric_features), 285 ( 286 "categorical_low", 287 categorical_transformer_low, 288 categorical_low, 289 ), 290 ( 291 "categorical_high", 292 categorical_transformer_high, 293 categorical_high, 294 ), 295 ] 296 ) 297 298 # baseline models 299 try: 300 baseline_names = ["RandomForestClassifier", "XGBClassifier"] 301 baseline_models = [RandomForestClassifier(), xgb.XGBClassifier()] 302 except Exception as exception: 303 baseline_names = ["RandomForestClassifier"] 304 baseline_models = [RandomForestClassifier()] 305 306 for name, model in zip(baseline_names, baseline_models): 307 start = time.time() 308 try: 309 model.fit(X_train, y_train) 310 self.models_[name] = model 311 y_pred = model.predict(X_test) 312 accuracy = accuracy_score(y_test, y_pred, normalize=True) 313 b_accuracy = balanced_accuracy_score(y_test, y_pred) 314 f1 = f1_score(y_test, y_pred, average="weighted") 315 try: 316 roc_auc = roc_auc_score(y_test, y_pred) 317 except Exception as exception: 318 roc_auc = None 319 if self.ignore_warnings is False: 320 print("ROC AUC couldn't be calculated for " + name) 321 print(exception) 322 names.append(name) 323 Accuracy.append(accuracy) 324 B_Accuracy.append(b_accuracy) 325 ROC_AUC.append(roc_auc) 326 F1.append(f1) 327 TIME.append(time.time() - start) 328 if self.custom_metric is not None: 329 custom_metric = self.custom_metric(y_test, y_pred) 330 CUSTOM_METRIC.append(custom_metric) 331 if self.verbose > 0: 332 if self.custom_metric is not None: 333 print( 334 { 335 "Model": name, 336 "Accuracy": accuracy, 337 "Balanced Accuracy": b_accuracy, 338 "ROC AUC": roc_auc, 339 "F1 Score": f1, 340 self.custom_metric.__name__: custom_metric, 341 "Time taken": time.time() - start, 342 } 343 ) 344 else: 345 print( 346 { 347 "Model": name, 348 "Accuracy": accuracy, 349 "Balanced Accuracy": b_accuracy, 350 "ROC AUC": roc_auc, 351 "F1 Score": f1, 352 "Time taken": time.time() - start, 353 } 354 ) 355 if self.predictions: 356 predictions[name] = y_pred 357 except Exception as exception: 358 if self.ignore_warnings is False: 359 print(name + " model failed to execute") 360 print(exception) 361 362 if self.estimators == "all": 363 self.classifiers = [ 364 item 365 for sublist in [ 366 DEEPCLASSIFIERS, 367 DEEPMULTITASKCLASSIFIERS, 368 DEEPSIMPLEMULTITASKCLASSIFIERS, 369 ] 370 for item in sublist 371 ] 372 else: 373 self.classifiers = ( 374 [ 375 ("DeepCustomClassifier(" + est[0] + ")", est[1]) 376 for est in all_estimators() 377 if ( 378 issubclass(est[1], ClassifierMixin) 379 and (est[0] in self.estimators) 380 ) 381 ] 382 + [ 383 ( 384 "DeepMultitaskClassifier(" + est[0] + ")", 385 partial(MultitaskClassifier, obj=est[1]()), 386 ) 387 for est in all_estimators() 388 if ( 389 issubclass(est[1], RegressorMixin) 390 and (est[0] in self.estimators) 391 ) 392 ] 393 + [ 394 ( 395 "DeepSimpleMultitaskClassifier(" + est[0] + ")", 396 partial(SimpleMultitaskClassifier, obj=est[1]()), 397 ) 398 for est in all_estimators() 399 if ( 400 issubclass(est[1], RegressorMixin) 401 and (est[0] in self.estimators) 402 ) 403 ] 404 ) 405 406 if self.preprocess is True: 407 408 for name, model in tqdm(self.classifiers): # do parallel exec 409 410 other_args = ( 411 {} 412 ) # use this trick for `random_state` too --> refactor 413 try: 414 if ( 415 "n_jobs" in model().get_params().keys() 416 and name.find("LogisticRegression") == -1 417 ): 418 other_args["n_jobs"] = self.n_jobs 419 except Exception: 420 pass 421 422 start = time.time() 423 424 try: 425 if "random_state" in model().get_params().keys(): 426 layer_clf = CustomClassifier( 427 obj=model(random_state=self.random_state), 428 n_hidden_features=self.n_hidden_features, 429 activation_name=self.activation_name, 430 a=self.a, 431 nodes_sim=self.nodes_sim, 432 bias=self.bias, 433 dropout=self.dropout, 434 direct_link=self.direct_link, 435 n_clusters=self.n_clusters, 436 cluster_encode=self.cluster_encode, 437 type_clust=self.type_clust, 438 type_scaling=self.type_scaling, 439 col_sample=self.col_sample, 440 row_sample=self.row_sample, 441 seed=self.seed, 442 backend=self.backend, 443 cv_calibration=None, 444 ) 445 446 else: 447 layer_clf = CustomClassifier( 448 obj=model(), 449 n_hidden_features=self.n_hidden_features, 450 activation_name=self.activation_name, 451 a=self.a, 452 nodes_sim=self.nodes_sim, 453 bias=self.bias, 454 dropout=self.dropout, 455 direct_link=self.direct_link, 456 n_clusters=self.n_clusters, 457 cluster_encode=self.cluster_encode, 458 type_clust=self.type_clust, 459 type_scaling=self.type_scaling, 460 col_sample=self.col_sample, 461 row_sample=self.row_sample, 462 seed=self.seed, 463 backend=self.backend, 464 cv_calibration=None, 465 ) 466 467 layer_clf.fit(X_train, y_train) 468 469 for _ in range(self.n_layers): 470 layer_clf = deepcopy( 471 CustomClassifier( 472 obj=layer_clf, 473 n_hidden_features=self.n_hidden_features, 474 activation_name=self.activation_name, 475 a=self.a, 476 nodes_sim=self.nodes_sim, 477 bias=self.bias, 478 dropout=self.dropout, 479 direct_link=self.direct_link, 480 n_clusters=self.n_clusters, 481 cluster_encode=self.cluster_encode, 482 type_clust=self.type_clust, 483 type_scaling=self.type_scaling, 484 col_sample=self.col_sample, 485 row_sample=self.row_sample, 486 seed=self.seed, 487 backend=self.backend, 488 cv_calibration=None, 489 ) 490 ) 491 492 pipe = Pipeline( 493 [ 494 ("preprocessor", preprocessor), 495 ("classifier", layer_clf), 496 ] 497 ) 498 499 pipe.fit(X_train, y_train) 500 self.models_[name] = pipe 501 y_pred = pipe.predict(X_test) 502 accuracy = accuracy_score(y_test, y_pred, normalize=True) 503 b_accuracy = balanced_accuracy_score(y_test, y_pred) 504 f1 = f1_score(y_test, y_pred, average="weighted") 505 try: 506 roc_auc = roc_auc_score(y_test, y_pred) 507 except Exception as exception: 508 roc_auc = None 509 if self.ignore_warnings is False: 510 print("ROC AUC couldn't be calculated for " + name) 511 print(exception) 512 names.append(name) 513 Accuracy.append(accuracy) 514 B_Accuracy.append(b_accuracy) 515 ROC_AUC.append(roc_auc) 516 F1.append(f1) 517 TIME.append(time.time() - start) 518 if self.custom_metric is not None: 519 custom_metric = self.custom_metric(y_test, y_pred) 520 CUSTOM_METRIC.append(custom_metric) 521 if self.verbose > 0: 522 if self.custom_metric is not None: 523 print( 524 { 525 "Model": name, 526 "Accuracy": accuracy, 527 "Balanced Accuracy": b_accuracy, 528 "ROC AUC": roc_auc, 529 "F1 Score": f1, 530 self.custom_metric.__name__: custom_metric, 531 "Time taken": time.time() - start, 532 } 533 ) 534 else: 535 print( 536 { 537 "Model": name, 538 "Accuracy": accuracy, 539 "Balanced Accuracy": b_accuracy, 540 "ROC AUC": roc_auc, 541 "F1 Score": f1, 542 "Time taken": time.time() - start, 543 } 544 ) 545 if self.predictions: 546 predictions[name] = y_pred 547 except Exception as exception: 548 if self.ignore_warnings is False: 549 print(name + " model failed to execute") 550 print(exception) 551 552 else: # no preprocessing 553 554 for name, model in tqdm(self.classifiers): # do parallel exec 555 start = time.time() 556 try: 557 if "random_state" in model().get_params().keys(): 558 layer_clf = CustomClassifier( 559 obj=model(random_state=self.random_state), 560 n_hidden_features=self.n_hidden_features, 561 activation_name=self.activation_name, 562 a=self.a, 563 nodes_sim=self.nodes_sim, 564 bias=self.bias, 565 dropout=self.dropout, 566 direct_link=self.direct_link, 567 n_clusters=self.n_clusters, 568 cluster_encode=self.cluster_encode, 569 type_clust=self.type_clust, 570 type_scaling=self.type_scaling, 571 col_sample=self.col_sample, 572 row_sample=self.row_sample, 573 seed=self.seed, 574 backend=self.backend, 575 cv_calibration=None, 576 ) 577 578 else: 579 layer_clf = CustomClassifier( 580 obj=model(), 581 n_hidden_features=self.n_hidden_features, 582 activation_name=self.activation_name, 583 a=self.a, 584 nodes_sim=self.nodes_sim, 585 bias=self.bias, 586 dropout=self.dropout, 587 direct_link=self.direct_link, 588 n_clusters=self.n_clusters, 589 cluster_encode=self.cluster_encode, 590 type_clust=self.type_clust, 591 type_scaling=self.type_scaling, 592 col_sample=self.col_sample, 593 row_sample=self.row_sample, 594 seed=self.seed, 595 backend=self.backend, 596 cv_calibration=None, 597 ) 598 599 layer_clf.fit(X_train, y_train) 600 601 for _ in range(self.n_layers): 602 layer_clf = deepcopy( 603 CustomClassifier( 604 obj=layer_clf, 605 n_hidden_features=self.n_hidden_features, 606 activation_name=self.activation_name, 607 a=self.a, 608 nodes_sim=self.nodes_sim, 609 bias=self.bias, 610 dropout=self.dropout, 611 direct_link=self.direct_link, 612 n_clusters=self.n_clusters, 613 cluster_encode=self.cluster_encode, 614 type_clust=self.type_clust, 615 type_scaling=self.type_scaling, 616 col_sample=self.col_sample, 617 row_sample=self.row_sample, 618 seed=self.seed, 619 backend=self.backend, 620 cv_calibration=None, 621 ) 622 ) 623 624 # layer_clf.fit(X_train, y_train) 625 626 layer_clf.fit(X_train, y_train) 627 628 self.models_[name] = layer_clf 629 y_pred = layer_clf.predict(X_test) 630 accuracy = accuracy_score(y_test, y_pred, normalize=True) 631 b_accuracy = balanced_accuracy_score(y_test, y_pred) 632 f1 = f1_score(y_test, y_pred, average="weighted") 633 try: 634 roc_auc = roc_auc_score(y_test, y_pred) 635 except Exception as exception: 636 roc_auc = None 637 if self.ignore_warnings is False: 638 print("ROC AUC couldn't be calculated for " + name) 639 print(exception) 640 names.append(name) 641 Accuracy.append(accuracy) 642 B_Accuracy.append(b_accuracy) 643 ROC_AUC.append(roc_auc) 644 F1.append(f1) 645 TIME.append(time.time() - start) 646 if self.custom_metric is not None: 647 custom_metric = self.custom_metric(y_test, y_pred) 648 CUSTOM_METRIC.append(custom_metric) 649 if self.verbose > 0: 650 if self.custom_metric is not None: 651 print( 652 { 653 "Model": name, 654 "Accuracy": accuracy, 655 "Balanced Accuracy": b_accuracy, 656 "ROC AUC": roc_auc, 657 "F1 Score": f1, 658 self.custom_metric.__name__: custom_metric, 659 "Time taken": time.time() - start, 660 } 661 ) 662 else: 663 print( 664 { 665 "Model": name, 666 "Accuracy": accuracy, 667 "Balanced Accuracy": b_accuracy, 668 "ROC AUC": roc_auc, 669 "F1 Score": f1, 670 "Time taken": time.time() - start, 671 } 672 ) 673 if self.predictions: 674 predictions[name] = y_pred 675 except Exception as exception: 676 if self.ignore_warnings is False: 677 print(name + " model failed to execute") 678 print(exception) 679 680 if self.custom_metric is None: 681 scores = pd.DataFrame( 682 { 683 "Model": names, 684 "Accuracy": Accuracy, 685 "Balanced Accuracy": B_Accuracy, 686 "ROC AUC": ROC_AUC, 687 "F1 Score": F1, 688 "Time Taken": TIME, 689 } 690 ) 691 else: 692 scores = pd.DataFrame( 693 { 694 "Model": names, 695 "Accuracy": Accuracy, 696 "Balanced Accuracy": B_Accuracy, 697 "ROC AUC": ROC_AUC, 698 "F1 Score": F1, 699 "Custom metric": CUSTOM_METRIC, 700 "Time Taken": TIME, 701 } 702 ) 703 scores = scores.sort_values(by=self.sort_by, ascending=False).set_index( 704 "Model" 705 ) 706 707 self.best_model_ = self.models_[scores.index[0]] 708 709 if self.predictions is True: 710 711 return scores, predictions 712 713 return scores
Fit classifiers to X_train and y_train, predict and score on X_test, y_test.
Parameters:
X_train: array-like,
Training vectors, where rows is the number of samples
and columns is the number of features.
X_test: array-like,
Testing vectors, where rows is the number of samples
and columns is the number of features.
y_train: array-like,
Training vectors, where rows is the number of samples
and columns is the number of features.
y_test: array-like,
Testing vectors, where rows is the number of samples
and columns is the number of features.
Returns:
scores: Pandas DataFrame
Returns metrics of all the models in a Pandas DataFrame.
predictions: Pandas DataFrame
Returns predictions of all the models in a Pandas DataFrame.
727 def provide_models(self, X_train, X_test, y_train, y_test): 728 """Returns all the model objects trained. If fit hasn't been called yet, 729 then it's called to return the models. 730 731 Parameters: 732 733 X_train: array-like, 734 Training vectors, where rows is the number of samples 735 and columns is the number of features. 736 737 X_test: array-like, 738 Testing vectors, where rows is the number of samples 739 and columns is the number of features. 740 741 y_train: array-like, 742 Training vectors, where rows is the number of samples 743 and columns is the number of features. 744 745 y_test: array-like, 746 Testing vectors, where rows is the number of samples 747 and columns is the number of features. 748 749 Returns: 750 751 models: dict-object, 752 Returns a dictionary with each model's pipeline as value 753 and key = name of the model. 754 """ 755 if len(self.models_.keys()) == 0: 756 self.fit(X_train, X_test, y_train, y_test) 757 758 return self.models_
Returns all the model objects trained. If fit hasn't been called yet, then it's called to return the models.
Parameters:
X_train: array-like, Training vectors, where rows is the number of samples and columns is the number of features.
X_test: array-like, Testing vectors, where rows is the number of samples and columns is the number of features.
y_train: array-like, Training vectors, where rows is the number of samples and columns is the number of features.
y_test: array-like, Testing vectors, where rows is the number of samples and columns is the number of features.
Returns:
models: dict-object,
Returns a dictionary with each model's pipeline as value
and key = name of the model.
90class LazyDeepRegressor(Custom, RegressorMixin): 91 """ 92 Fitting -- almost -- all the regression algorithms with layers of 93 nnetsauce's CustomRegressor and returning their scores. 94 95 Parameters: 96 97 verbose: int, optional (default=0) 98 Any positive number for verbosity. 99 100 ignore_warnings: bool, optional (default=True) 101 When set to True, the warning related to algorigms that are not able to run are ignored. 102 103 custom_metric: function, optional (default=None) 104 When function is provided, models are evaluated based on the custom evaluation metric provided. 105 106 predictions: bool, optional (default=False) 107 When set to True, the predictions of all the models models are returned as dataframe. 108 109 sort_by: string, optional (default='RMSE') 110 Sort models by a metric. Available options are 'R-Squared', 'Adjusted R-Squared', 'RMSE', 'Time Taken' and 'Custom Metric'. 111 or a custom metric identified by its name and provided by custom_metric. 112 113 random_state: int, optional (default=42) 114 Reproducibiility seed. 115 116 estimators: list, optional (default='all') 117 list of Estimators names or just 'all' (default='all') 118 119 preprocess: bool 120 preprocessing is done when set to True 121 122 n_jobs : int, when possible, run in parallel 123 For now, only used by individual models that support it. 124 125 n_layers: int, optional (default=3) 126 Number of layers of CustomRegressors to be used. 127 128 All the other parameters are the same as CustomRegressor's. 129 130 Attributes: 131 132 models_: dict-object 133 Returns a dictionary with each model pipeline as value 134 with key as name of models. 135 136 best_model_: object 137 Returns the best model pipeline based on the sort_by metric. 138 139 Examples: 140 141 import nnetsauce as ns 142 import numpy as np 143 from sklearn import datasets 144 from sklearn.utils import shuffle 145 146 diabetes = datasets.load_diabetes() 147 X, y = shuffle(diabetes.data, diabetes.target, random_state=13) 148 X = X.astype(np.float32) 149 150 offset = int(X.shape[0] * 0.9) 151 X_train, y_train = X[:offset], y[:offset] 152 X_test, y_test = X[offset:], y[offset:] 153 154 reg = ns.LazyDeepRegressor(verbose=0, ignore_warnings=False, custom_metric=None) 155 models, predictions = reg.fit(X_train, X_test, y_train, y_test) 156 print(models) 157 158 """ 159 160 def __init__( 161 self, 162 verbose=0, 163 ignore_warnings=True, 164 custom_metric=None, 165 predictions=False, 166 sort_by="RMSE", 167 random_state=42, 168 estimators="all", 169 preprocess=False, 170 n_jobs=None, 171 # Defining depth 172 n_layers=3, 173 # CustomRegressor attributes 174 obj=None, 175 n_hidden_features=5, 176 activation_name="relu", 177 a=0.01, 178 nodes_sim="sobol", 179 bias=True, 180 dropout=0, 181 direct_link=True, 182 n_clusters=2, 183 cluster_encode=True, 184 type_clust="kmeans", 185 type_scaling=("std", "std", "std"), 186 col_sample=1, 187 row_sample=1, 188 seed=123, 189 backend="cpu", 190 ): 191 self.verbose = verbose 192 self.ignore_warnings = ignore_warnings 193 self.custom_metric = custom_metric 194 self.predictions = predictions 195 self.sort_by = sort_by 196 self.models_ = {} 197 self.best_model_ = None 198 self.random_state = random_state 199 self.estimators = estimators 200 self.preprocess = preprocess 201 self.n_layers = n_layers - 1 202 self.n_jobs = n_jobs 203 super().__init__( 204 obj=obj, 205 n_hidden_features=n_hidden_features, 206 activation_name=activation_name, 207 a=a, 208 nodes_sim=nodes_sim, 209 bias=bias, 210 dropout=dropout, 211 direct_link=direct_link, 212 n_clusters=n_clusters, 213 cluster_encode=cluster_encode, 214 type_clust=type_clust, 215 type_scaling=type_scaling, 216 col_sample=col_sample, 217 row_sample=row_sample, 218 seed=seed, 219 backend=backend, 220 ) 221 222 def fit(self, X_train, X_test, y_train, y_test): 223 """Fit Regression algorithms to X_train and y_train, predict and score on X_test, y_test. 224 225 Parameters: 226 227 X_train : array-like, 228 Training vectors, where rows is the number of samples 229 and columns is the number of features. 230 231 X_test : array-like, 232 Testing vectors, where rows is the number of samples 233 and columns is the number of features. 234 235 y_train : array-like, 236 Training vectors, where rows is the number of samples 237 and columns is the number of features. 238 239 y_test : array-like, 240 Testing vectors, where rows is the number of samples 241 and columns is the number of features. 242 243 Returns: 244 ------- 245 scores: Pandas DataFrame 246 Returns metrics of all the models in a Pandas DataFrame. 247 248 predictions : Pandas DataFrame 249 Returns predictions of all the models in a Pandas DataFrame. 250 251 """ 252 R2 = [] 253 ADJR2 = [] 254 RMSE = [] 255 # WIN = [] 256 names = [] 257 TIME = [] 258 predictions = {} 259 260 if self.custom_metric: 261 CUSTOM_METRIC = [] 262 263 if isinstance(X_train, np.ndarray): 264 X_train = pd.DataFrame(X_train) 265 X_test = pd.DataFrame(X_test) 266 267 numeric_features = X_train.select_dtypes(include=[np.number]).columns 268 categorical_features = X_train.select_dtypes(include=["object"]).columns 269 270 categorical_low, categorical_high = get_card_split( 271 X_train, categorical_features 272 ) 273 274 if self.preprocess is True: 275 preprocessor = ColumnTransformer( 276 transformers=[ 277 ("numeric", numeric_transformer, numeric_features), 278 ( 279 "categorical_low", 280 categorical_transformer_low, 281 categorical_low, 282 ), 283 ( 284 "categorical_high", 285 categorical_transformer_high, 286 categorical_high, 287 ), 288 ] 289 ) 290 291 # base models 292 try: 293 baseline_names = ["RandomForestRegressor", "XGBRegressor"] 294 baseline_models = [RandomForestRegressor(), xgb.XGBRegressor()] 295 except Exception as exception: 296 baseline_names = ["RandomForestRegressor"] 297 baseline_models = [RandomForestRegressor()] 298 299 for name, model in zip(baseline_names, baseline_models): 300 start = time.time() 301 try: 302 model.fit(X_train, y_train) 303 self.models_[name] = model 304 y_pred = model.predict(X_test) 305 r_squared = r2_score(y_test, y_pred) 306 adj_rsquared = adjusted_rsquared( 307 r_squared, X_test.shape[0], X_test.shape[1] 308 ) 309 rmse = np.sqrt(np.mean((y_test - y_pred) ** 2)) 310 311 names.append(name) 312 R2.append(r_squared) 313 ADJR2.append(adj_rsquared) 314 RMSE.append(rmse) 315 TIME.append(time.time() - start) 316 317 if self.custom_metric: 318 custom_metric = self.custom_metric(y_test, y_pred) 319 CUSTOM_METRIC.append(custom_metric) 320 321 if self.verbose > 0: 322 scores_verbose = { 323 "Model": name, 324 "R-Squared": r_squared, 325 "Adjusted R-Squared": adj_rsquared, 326 "RMSE": rmse, 327 "Time taken": time.time() - start, 328 } 329 330 if self.custom_metric: 331 scores_verbose[self.custom_metric.__name__] = ( 332 custom_metric 333 ) 334 335 print(scores_verbose) 336 if self.predictions: 337 predictions[name] = y_pred 338 except Exception as exception: 339 if self.ignore_warnings is False: 340 print(name + " model failed to execute") 341 print(exception) 342 343 if self.estimators == "all": 344 self.regressors = DEEPREGRESSORS 345 else: 346 self.regressors = [ 347 ("DeepCustomRegressor(" + est[0] + ")", est[1]) 348 for est in all_estimators() 349 if ( 350 issubclass(est[1], RegressorMixin) 351 and (est[0] in self.estimators) 352 ) 353 ] 354 355 if self.preprocess is True: 356 357 for name, model in tqdm(self.regressors): # do parallel exec 358 start = time.time() 359 try: 360 if "random_state" in model().get_params().keys(): 361 layer_regr = CustomRegressor( 362 obj=model(random_state=self.random_state), 363 n_hidden_features=self.n_hidden_features, 364 activation_name=self.activation_name, 365 a=self.a, 366 nodes_sim=self.nodes_sim, 367 bias=self.bias, 368 dropout=self.dropout, 369 direct_link=self.direct_link, 370 n_clusters=self.n_clusters, 371 cluster_encode=self.cluster_encode, 372 type_clust=self.type_clust, 373 type_scaling=self.type_scaling, 374 col_sample=self.col_sample, 375 row_sample=self.row_sample, 376 seed=self.seed, 377 backend=self.backend, 378 ) 379 else: 380 layer_regr = CustomRegressor( 381 obj=model(), 382 n_hidden_features=self.n_hidden_features, 383 activation_name=self.activation_name, 384 a=self.a, 385 nodes_sim=self.nodes_sim, 386 bias=self.bias, 387 dropout=self.dropout, 388 direct_link=self.direct_link, 389 n_clusters=self.n_clusters, 390 cluster_encode=self.cluster_encode, 391 type_clust=self.type_clust, 392 type_scaling=self.type_scaling, 393 col_sample=self.col_sample, 394 row_sample=self.row_sample, 395 seed=self.seed, 396 backend=self.backend, 397 ) 398 399 for _ in range(self.n_layers): 400 layer_regr = deepcopy( 401 CustomRegressor( 402 obj=layer_regr, 403 n_hidden_features=self.n_hidden_features, 404 activation_name=self.activation_name, 405 a=self.a, 406 nodes_sim=self.nodes_sim, 407 bias=self.bias, 408 dropout=self.dropout, 409 direct_link=self.direct_link, 410 n_clusters=self.n_clusters, 411 cluster_encode=self.cluster_encode, 412 type_clust=self.type_clust, 413 type_scaling=self.type_scaling, 414 col_sample=self.col_sample, 415 row_sample=self.row_sample, 416 seed=self.seed, 417 backend=self.backend, 418 ) 419 ) 420 421 layer_regr.fit(X_train, y_train) 422 423 pipe = Pipeline( 424 steps=[ 425 ("preprocessor", preprocessor), 426 ("regressor", layer_regr), 427 ] 428 ) 429 430 pipe.fit(X_train, y_train) 431 432 self.models_[name] = pipe 433 y_pred = pipe.predict(X_test) 434 r_squared = r2_score(y_test, y_pred) 435 adj_rsquared = adjusted_rsquared( 436 r_squared, X_test.shape[0], X_test.shape[1] 437 ) 438 rmse = np.sqrt(np.mean((y_test - y_pred) ** 2)) 439 440 names.append(name) 441 R2.append(r_squared) 442 ADJR2.append(adj_rsquared) 443 RMSE.append(rmse) 444 TIME.append(time.time() - start) 445 446 if self.custom_metric: 447 custom_metric = self.custom_metric(y_test, y_pred) 448 CUSTOM_METRIC.append(custom_metric) 449 450 if self.verbose > 0: 451 scores_verbose = { 452 "Model": name, 453 "R-Squared": r_squared, 454 "Adjusted R-Squared": adj_rsquared, 455 "RMSE": rmse, 456 "Time taken": time.time() - start, 457 } 458 459 if self.custom_metric: 460 scores_verbose[self.custom_metric.__name__] = ( 461 custom_metric 462 ) 463 464 print(scores_verbose) 465 if self.predictions: 466 predictions[name] = y_pred 467 except Exception as exception: 468 if self.ignore_warnings is False: 469 print(name + " model failed to execute") 470 print(exception) 471 472 else: # no preprocessing 473 474 for name, model in tqdm(self.regressors): # do parallel exec 475 start = time.time() 476 try: 477 if "random_state" in model().get_params().keys(): 478 layer_regr = CustomRegressor( 479 obj=model(random_state=self.random_state), 480 n_hidden_features=self.n_hidden_features, 481 activation_name=self.activation_name, 482 a=self.a, 483 nodes_sim=self.nodes_sim, 484 bias=self.bias, 485 dropout=self.dropout, 486 direct_link=self.direct_link, 487 n_clusters=self.n_clusters, 488 cluster_encode=self.cluster_encode, 489 type_clust=self.type_clust, 490 type_scaling=self.type_scaling, 491 col_sample=self.col_sample, 492 row_sample=self.row_sample, 493 seed=self.seed, 494 backend=self.backend, 495 ) 496 else: 497 layer_regr = CustomRegressor( 498 obj=model(), 499 n_hidden_features=self.n_hidden_features, 500 activation_name=self.activation_name, 501 a=self.a, 502 nodes_sim=self.nodes_sim, 503 bias=self.bias, 504 dropout=self.dropout, 505 direct_link=self.direct_link, 506 n_clusters=self.n_clusters, 507 cluster_encode=self.cluster_encode, 508 type_clust=self.type_clust, 509 type_scaling=self.type_scaling, 510 col_sample=self.col_sample, 511 row_sample=self.row_sample, 512 seed=self.seed, 513 backend=self.backend, 514 ) 515 516 layer_regr.fit(X_train, y_train) 517 518 for _ in range(self.n_layers): 519 layer_regr = deepcopy( 520 CustomRegressor( 521 obj=layer_regr, 522 n_hidden_features=self.n_hidden_features, 523 activation_name=self.activation_name, 524 a=self.a, 525 nodes_sim=self.nodes_sim, 526 bias=self.bias, 527 dropout=self.dropout, 528 direct_link=self.direct_link, 529 n_clusters=self.n_clusters, 530 cluster_encode=self.cluster_encode, 531 type_clust=self.type_clust, 532 type_scaling=self.type_scaling, 533 col_sample=self.col_sample, 534 row_sample=self.row_sample, 535 seed=self.seed, 536 backend=self.backend, 537 ) 538 ) 539 540 # layer_regr.fit(X_train, y_train) 541 542 layer_regr.fit(X_train, y_train) 543 544 self.models_[name] = layer_regr 545 y_pred = layer_regr.predict(X_test) 546 547 r_squared = r2_score(y_test, y_pred) 548 adj_rsquared = adjusted_rsquared( 549 r_squared, X_test.shape[0], X_test.shape[1] 550 ) 551 rmse = np.sqrt(np.mean((y_test - y_pred) ** 2)) 552 553 names.append(name) 554 R2.append(r_squared) 555 ADJR2.append(adj_rsquared) 556 RMSE.append(rmse) 557 TIME.append(time.time() - start) 558 559 if self.custom_metric: 560 custom_metric = self.custom_metric(y_test, y_pred) 561 CUSTOM_METRIC.append(custom_metric) 562 563 if self.verbose > 0: 564 scores_verbose = { 565 "Model": name, 566 "R-Squared": r_squared, 567 "Adjusted R-Squared": adj_rsquared, 568 "RMSE": rmse, 569 "Time taken": time.time() - start, 570 } 571 572 if self.custom_metric: 573 scores_verbose[self.custom_metric.__name__] = ( 574 custom_metric 575 ) 576 577 print(scores_verbose) 578 if self.predictions: 579 predictions[name] = y_pred 580 except Exception as exception: 581 if self.ignore_warnings is False: 582 print(name + " model failed to execute") 583 print(exception) 584 585 scores = { 586 "Model": names, 587 "Adjusted R-Squared": ADJR2, 588 "R-Squared": R2, 589 "RMSE": RMSE, 590 "Time Taken": TIME, 591 } 592 593 if self.custom_metric: 594 scores["Custom metric"] = CUSTOM_METRIC 595 596 scores = pd.DataFrame(scores) 597 scores = scores.sort_values(by=self.sort_by, ascending=True).set_index( 598 "Model" 599 ) 600 601 self.best_model_ = self.models_[scores.index[0]] 602 603 if self.predictions is True: 604 605 return scores, predictions 606 607 return scores 608 609 def get_best_model(self): 610 """ 611 This function returns the best model pipeline based on the sort_by metric. 612 613 Returns: 614 615 best_model: object, 616 Returns the best model pipeline based on the sort_by metric. 617 618 """ 619 return self.best_model_ 620 621 def provide_models(self, X_train, X_test, y_train, y_test): 622 """ 623 This function returns all the model objects trained in fit function. 624 If fit is not called already, then we call fit and then return the models. 625 626 Parameters: 627 628 X_train : array-like, 629 Training vectors, where rows is the number of samples 630 and columns is the number of features. 631 632 X_test : array-like, 633 Testing vectors, where rows is the number of samples 634 and columns is the number of features. 635 636 y_train : array-like, 637 Training vectors, where rows is the number of samples 638 and columns is the number of features. 639 640 y_test : array-like, 641 Testing vectors, where rows is the number of samples 642 and columns is the number of features. 643 644 Returns: 645 646 models: dict-object, 647 Returns a dictionary with each model pipeline as value 648 with key as name of models. 649 650 """ 651 if len(self.models_.keys()) == 0: 652 self.fit(X_train, X_test, y_train, y_test) 653 654 return self.models_
Fitting -- almost -- all the regression algorithms with layers of nnetsauce's CustomRegressor and returning their scores.
Parameters:
verbose: int, optional (default=0)
Any positive number for verbosity.
ignore_warnings: bool, optional (default=True)
When set to True, the warning related to algorigms that are not able to run are ignored.
custom_metric: function, optional (default=None)
When function is provided, models are evaluated based on the custom evaluation metric provided.
predictions: bool, optional (default=False)
When set to True, the predictions of all the models models are returned as dataframe.
sort_by: string, optional (default='RMSE')
Sort models by a metric. Available options are 'R-Squared', 'Adjusted R-Squared', 'RMSE', 'Time Taken' and 'Custom Metric'.
or a custom metric identified by its name and provided by custom_metric.
random_state: int, optional (default=42)
Reproducibiility seed.
estimators: list, optional (default='all')
list of Estimators names or just 'all' (default='all')
preprocess: bool
preprocessing is done when set to True
n_jobs : int, when possible, run in parallel
For now, only used by individual models that support it.
n_layers: int, optional (default=3)
Number of layers of CustomRegressors to be used.
All the other parameters are the same as CustomRegressor's.
Attributes:
models_: dict-object
Returns a dictionary with each model pipeline as value
with key as name of models.
best_model_: object
Returns the best model pipeline based on the sort_by metric.
Examples:
import nnetsauce as ns
import numpy as np
from sklearn import datasets
from sklearn.utils import shuffle
diabetes = datasets.load_diabetes()
X, y = shuffle(diabetes.data, diabetes.target, random_state=13)
X = X.astype(np.float32)
offset = int(X.shape[0] * 0.9)
X_train, y_train = X[:offset], y[:offset]
X_test, y_test = X[offset:], y[offset:]
reg = ns.LazyDeepRegressor(verbose=0, ignore_warnings=False, custom_metric=None)
models, predictions = reg.fit(X_train, X_test, y_train, y_test)
print(models)
222 def fit(self, X_train, X_test, y_train, y_test): 223 """Fit Regression algorithms to X_train and y_train, predict and score on X_test, y_test. 224 225 Parameters: 226 227 X_train : array-like, 228 Training vectors, where rows is the number of samples 229 and columns is the number of features. 230 231 X_test : array-like, 232 Testing vectors, where rows is the number of samples 233 and columns is the number of features. 234 235 y_train : array-like, 236 Training vectors, where rows is the number of samples 237 and columns is the number of features. 238 239 y_test : array-like, 240 Testing vectors, where rows is the number of samples 241 and columns is the number of features. 242 243 Returns: 244 ------- 245 scores: Pandas DataFrame 246 Returns metrics of all the models in a Pandas DataFrame. 247 248 predictions : Pandas DataFrame 249 Returns predictions of all the models in a Pandas DataFrame. 250 251 """ 252 R2 = [] 253 ADJR2 = [] 254 RMSE = [] 255 # WIN = [] 256 names = [] 257 TIME = [] 258 predictions = {} 259 260 if self.custom_metric: 261 CUSTOM_METRIC = [] 262 263 if isinstance(X_train, np.ndarray): 264 X_train = pd.DataFrame(X_train) 265 X_test = pd.DataFrame(X_test) 266 267 numeric_features = X_train.select_dtypes(include=[np.number]).columns 268 categorical_features = X_train.select_dtypes(include=["object"]).columns 269 270 categorical_low, categorical_high = get_card_split( 271 X_train, categorical_features 272 ) 273 274 if self.preprocess is True: 275 preprocessor = ColumnTransformer( 276 transformers=[ 277 ("numeric", numeric_transformer, numeric_features), 278 ( 279 "categorical_low", 280 categorical_transformer_low, 281 categorical_low, 282 ), 283 ( 284 "categorical_high", 285 categorical_transformer_high, 286 categorical_high, 287 ), 288 ] 289 ) 290 291 # base models 292 try: 293 baseline_names = ["RandomForestRegressor", "XGBRegressor"] 294 baseline_models = [RandomForestRegressor(), xgb.XGBRegressor()] 295 except Exception as exception: 296 baseline_names = ["RandomForestRegressor"] 297 baseline_models = [RandomForestRegressor()] 298 299 for name, model in zip(baseline_names, baseline_models): 300 start = time.time() 301 try: 302 model.fit(X_train, y_train) 303 self.models_[name] = model 304 y_pred = model.predict(X_test) 305 r_squared = r2_score(y_test, y_pred) 306 adj_rsquared = adjusted_rsquared( 307 r_squared, X_test.shape[0], X_test.shape[1] 308 ) 309 rmse = np.sqrt(np.mean((y_test - y_pred) ** 2)) 310 311 names.append(name) 312 R2.append(r_squared) 313 ADJR2.append(adj_rsquared) 314 RMSE.append(rmse) 315 TIME.append(time.time() - start) 316 317 if self.custom_metric: 318 custom_metric = self.custom_metric(y_test, y_pred) 319 CUSTOM_METRIC.append(custom_metric) 320 321 if self.verbose > 0: 322 scores_verbose = { 323 "Model": name, 324 "R-Squared": r_squared, 325 "Adjusted R-Squared": adj_rsquared, 326 "RMSE": rmse, 327 "Time taken": time.time() - start, 328 } 329 330 if self.custom_metric: 331 scores_verbose[self.custom_metric.__name__] = ( 332 custom_metric 333 ) 334 335 print(scores_verbose) 336 if self.predictions: 337 predictions[name] = y_pred 338 except Exception as exception: 339 if self.ignore_warnings is False: 340 print(name + " model failed to execute") 341 print(exception) 342 343 if self.estimators == "all": 344 self.regressors = DEEPREGRESSORS 345 else: 346 self.regressors = [ 347 ("DeepCustomRegressor(" + est[0] + ")", est[1]) 348 for est in all_estimators() 349 if ( 350 issubclass(est[1], RegressorMixin) 351 and (est[0] in self.estimators) 352 ) 353 ] 354 355 if self.preprocess is True: 356 357 for name, model in tqdm(self.regressors): # do parallel exec 358 start = time.time() 359 try: 360 if "random_state" in model().get_params().keys(): 361 layer_regr = CustomRegressor( 362 obj=model(random_state=self.random_state), 363 n_hidden_features=self.n_hidden_features, 364 activation_name=self.activation_name, 365 a=self.a, 366 nodes_sim=self.nodes_sim, 367 bias=self.bias, 368 dropout=self.dropout, 369 direct_link=self.direct_link, 370 n_clusters=self.n_clusters, 371 cluster_encode=self.cluster_encode, 372 type_clust=self.type_clust, 373 type_scaling=self.type_scaling, 374 col_sample=self.col_sample, 375 row_sample=self.row_sample, 376 seed=self.seed, 377 backend=self.backend, 378 ) 379 else: 380 layer_regr = CustomRegressor( 381 obj=model(), 382 n_hidden_features=self.n_hidden_features, 383 activation_name=self.activation_name, 384 a=self.a, 385 nodes_sim=self.nodes_sim, 386 bias=self.bias, 387 dropout=self.dropout, 388 direct_link=self.direct_link, 389 n_clusters=self.n_clusters, 390 cluster_encode=self.cluster_encode, 391 type_clust=self.type_clust, 392 type_scaling=self.type_scaling, 393 col_sample=self.col_sample, 394 row_sample=self.row_sample, 395 seed=self.seed, 396 backend=self.backend, 397 ) 398 399 for _ in range(self.n_layers): 400 layer_regr = deepcopy( 401 CustomRegressor( 402 obj=layer_regr, 403 n_hidden_features=self.n_hidden_features, 404 activation_name=self.activation_name, 405 a=self.a, 406 nodes_sim=self.nodes_sim, 407 bias=self.bias, 408 dropout=self.dropout, 409 direct_link=self.direct_link, 410 n_clusters=self.n_clusters, 411 cluster_encode=self.cluster_encode, 412 type_clust=self.type_clust, 413 type_scaling=self.type_scaling, 414 col_sample=self.col_sample, 415 row_sample=self.row_sample, 416 seed=self.seed, 417 backend=self.backend, 418 ) 419 ) 420 421 layer_regr.fit(X_train, y_train) 422 423 pipe = Pipeline( 424 steps=[ 425 ("preprocessor", preprocessor), 426 ("regressor", layer_regr), 427 ] 428 ) 429 430 pipe.fit(X_train, y_train) 431 432 self.models_[name] = pipe 433 y_pred = pipe.predict(X_test) 434 r_squared = r2_score(y_test, y_pred) 435 adj_rsquared = adjusted_rsquared( 436 r_squared, X_test.shape[0], X_test.shape[1] 437 ) 438 rmse = np.sqrt(np.mean((y_test - y_pred) ** 2)) 439 440 names.append(name) 441 R2.append(r_squared) 442 ADJR2.append(adj_rsquared) 443 RMSE.append(rmse) 444 TIME.append(time.time() - start) 445 446 if self.custom_metric: 447 custom_metric = self.custom_metric(y_test, y_pred) 448 CUSTOM_METRIC.append(custom_metric) 449 450 if self.verbose > 0: 451 scores_verbose = { 452 "Model": name, 453 "R-Squared": r_squared, 454 "Adjusted R-Squared": adj_rsquared, 455 "RMSE": rmse, 456 "Time taken": time.time() - start, 457 } 458 459 if self.custom_metric: 460 scores_verbose[self.custom_metric.__name__] = ( 461 custom_metric 462 ) 463 464 print(scores_verbose) 465 if self.predictions: 466 predictions[name] = y_pred 467 except Exception as exception: 468 if self.ignore_warnings is False: 469 print(name + " model failed to execute") 470 print(exception) 471 472 else: # no preprocessing 473 474 for name, model in tqdm(self.regressors): # do parallel exec 475 start = time.time() 476 try: 477 if "random_state" in model().get_params().keys(): 478 layer_regr = CustomRegressor( 479 obj=model(random_state=self.random_state), 480 n_hidden_features=self.n_hidden_features, 481 activation_name=self.activation_name, 482 a=self.a, 483 nodes_sim=self.nodes_sim, 484 bias=self.bias, 485 dropout=self.dropout, 486 direct_link=self.direct_link, 487 n_clusters=self.n_clusters, 488 cluster_encode=self.cluster_encode, 489 type_clust=self.type_clust, 490 type_scaling=self.type_scaling, 491 col_sample=self.col_sample, 492 row_sample=self.row_sample, 493 seed=self.seed, 494 backend=self.backend, 495 ) 496 else: 497 layer_regr = CustomRegressor( 498 obj=model(), 499 n_hidden_features=self.n_hidden_features, 500 activation_name=self.activation_name, 501 a=self.a, 502 nodes_sim=self.nodes_sim, 503 bias=self.bias, 504 dropout=self.dropout, 505 direct_link=self.direct_link, 506 n_clusters=self.n_clusters, 507 cluster_encode=self.cluster_encode, 508 type_clust=self.type_clust, 509 type_scaling=self.type_scaling, 510 col_sample=self.col_sample, 511 row_sample=self.row_sample, 512 seed=self.seed, 513 backend=self.backend, 514 ) 515 516 layer_regr.fit(X_train, y_train) 517 518 for _ in range(self.n_layers): 519 layer_regr = deepcopy( 520 CustomRegressor( 521 obj=layer_regr, 522 n_hidden_features=self.n_hidden_features, 523 activation_name=self.activation_name, 524 a=self.a, 525 nodes_sim=self.nodes_sim, 526 bias=self.bias, 527 dropout=self.dropout, 528 direct_link=self.direct_link, 529 n_clusters=self.n_clusters, 530 cluster_encode=self.cluster_encode, 531 type_clust=self.type_clust, 532 type_scaling=self.type_scaling, 533 col_sample=self.col_sample, 534 row_sample=self.row_sample, 535 seed=self.seed, 536 backend=self.backend, 537 ) 538 ) 539 540 # layer_regr.fit(X_train, y_train) 541 542 layer_regr.fit(X_train, y_train) 543 544 self.models_[name] = layer_regr 545 y_pred = layer_regr.predict(X_test) 546 547 r_squared = r2_score(y_test, y_pred) 548 adj_rsquared = adjusted_rsquared( 549 r_squared, X_test.shape[0], X_test.shape[1] 550 ) 551 rmse = np.sqrt(np.mean((y_test - y_pred) ** 2)) 552 553 names.append(name) 554 R2.append(r_squared) 555 ADJR2.append(adj_rsquared) 556 RMSE.append(rmse) 557 TIME.append(time.time() - start) 558 559 if self.custom_metric: 560 custom_metric = self.custom_metric(y_test, y_pred) 561 CUSTOM_METRIC.append(custom_metric) 562 563 if self.verbose > 0: 564 scores_verbose = { 565 "Model": name, 566 "R-Squared": r_squared, 567 "Adjusted R-Squared": adj_rsquared, 568 "RMSE": rmse, 569 "Time taken": time.time() - start, 570 } 571 572 if self.custom_metric: 573 scores_verbose[self.custom_metric.__name__] = ( 574 custom_metric 575 ) 576 577 print(scores_verbose) 578 if self.predictions: 579 predictions[name] = y_pred 580 except Exception as exception: 581 if self.ignore_warnings is False: 582 print(name + " model failed to execute") 583 print(exception) 584 585 scores = { 586 "Model": names, 587 "Adjusted R-Squared": ADJR2, 588 "R-Squared": R2, 589 "RMSE": RMSE, 590 "Time Taken": TIME, 591 } 592 593 if self.custom_metric: 594 scores["Custom metric"] = CUSTOM_METRIC 595 596 scores = pd.DataFrame(scores) 597 scores = scores.sort_values(by=self.sort_by, ascending=True).set_index( 598 "Model" 599 ) 600 601 self.best_model_ = self.models_[scores.index[0]] 602 603 if self.predictions is True: 604 605 return scores, predictions 606 607 return scores
Fit Regression algorithms to X_train and y_train, predict and score on X_test, y_test.
Parameters:
X_train : array-like,
Training vectors, where rows is the number of samples
and columns is the number of features.
X_test : array-like,
Testing vectors, where rows is the number of samples
and columns is the number of features.
y_train : array-like,
Training vectors, where rows is the number of samples
and columns is the number of features.
y_test : array-like,
Testing vectors, where rows is the number of samples
and columns is the number of features.
Returns:
scores: Pandas DataFrame Returns metrics of all the models in a Pandas DataFrame.
predictions : Pandas DataFrame Returns predictions of all the models in a Pandas DataFrame.
621 def provide_models(self, X_train, X_test, y_train, y_test): 622 """ 623 This function returns all the model objects trained in fit function. 624 If fit is not called already, then we call fit and then return the models. 625 626 Parameters: 627 628 X_train : array-like, 629 Training vectors, where rows is the number of samples 630 and columns is the number of features. 631 632 X_test : array-like, 633 Testing vectors, where rows is the number of samples 634 and columns is the number of features. 635 636 y_train : array-like, 637 Training vectors, where rows is the number of samples 638 and columns is the number of features. 639 640 y_test : array-like, 641 Testing vectors, where rows is the number of samples 642 and columns is the number of features. 643 644 Returns: 645 646 models: dict-object, 647 Returns a dictionary with each model pipeline as value 648 with key as name of models. 649 650 """ 651 if len(self.models_.keys()) == 0: 652 self.fit(X_train, X_test, y_train, y_test) 653 654 return self.models_
This function returns all the model objects trained in fit function. If fit is not called already, then we call fit and then return the models.
Parameters:
X_train : array-like,
Training vectors, where rows is the number of samples
and columns is the number of features.
X_test : array-like,
Testing vectors, where rows is the number of samples
and columns is the number of features.
y_train : array-like,
Training vectors, where rows is the number of samples
and columns is the number of features.
y_test : array-like,
Testing vectors, where rows is the number of samples
and columns is the number of features.
Returns:
models: dict-object,
Returns a dictionary with each model pipeline as value
with key as name of models.
1004class LazyMTS(LazyDeepMTS): 1005 """ 1006 Fitting -- almost -- all the regression algorithms to multivariate time series 1007 and returning their scores (no layers). 1008 1009 Parameters: 1010 1011 verbose: int, optional (default=0) 1012 Any positive number for verbosity. 1013 1014 ignore_warnings: bool, optional (default=True) 1015 When set to True, the warning related to algorigms that are not 1016 able to run are ignored. 1017 1018 custom_metric: function, optional (default=None) 1019 When function is provided, models are evaluated based on the custom 1020 evaluation metric provided. 1021 1022 predictions: bool, optional (default=False) 1023 When set to True, the predictions of all the models models are returned as dataframe. 1024 1025 sort_by: string, optional (default='RMSE') 1026 Sort models by a metric. Available options are 'RMSE', 'MAE', 'MPL', 'MPE', 'MAPE', 1027 'R-Squared', 'Adjusted R-Squared' or a custom metric identified by its name and 1028 provided by custom_metric. 1029 1030 random_state: int, optional (default=42) 1031 Reproducibiility seed. 1032 1033 estimators: list, optional (default='all') 1034 list of Estimators (regression algorithms) names or just 'all' (default='all') 1035 1036 preprocess: bool, preprocessing is done when set to True 1037 1038 h: int, optional (default=None) 1039 Number of steps ahead to predict (when used, must be > 0 and < X_test.shape[0]). 1040 1041 All the other parameters are the same as MTS's. 1042 1043 Attributes: 1044 1045 models_: dict-object 1046 Returns a dictionary with each model pipeline as value 1047 with key as name of models. 1048 1049 best_model_: object 1050 Returns the best model pipeline based on the sort_by metric. 1051 1052 Examples: 1053 1054 See https://thierrymoudiki.github.io/blog/2023/10/29/python/quasirandomizednn/MTS-LazyPredict 1055 1056 """ 1057 1058 def __init__( 1059 self, 1060 verbose=0, 1061 ignore_warnings=True, 1062 custom_metric=None, 1063 predictions=False, 1064 sort_by=None, # leave it as is 1065 random_state=42, 1066 estimators="all", 1067 preprocess=False, 1068 h=None, 1069 # MTS attributes 1070 obj=None, 1071 n_hidden_features=5, 1072 activation_name="relu", 1073 a=0.01, 1074 nodes_sim="sobol", 1075 bias=True, 1076 dropout=0, 1077 direct_link=True, 1078 n_clusters=2, 1079 cluster_encode=True, 1080 type_clust="kmeans", 1081 type_scaling=("std", "std", "std"), 1082 lags=15, 1083 type_pi="scp2-kde", 1084 block_size=None, 1085 replications=None, 1086 kernel=None, 1087 agg="mean", 1088 seed=123, 1089 backend="cpu", 1090 show_progress=False, 1091 ): 1092 super().__init__( 1093 verbose=verbose, 1094 ignore_warnings=ignore_warnings, 1095 custom_metric=custom_metric, 1096 predictions=predictions, 1097 sort_by=sort_by, 1098 random_state=random_state, 1099 estimators=estimators, 1100 preprocess=preprocess, 1101 n_layers=1, 1102 h=h, 1103 obj=obj, 1104 n_hidden_features=n_hidden_features, 1105 activation_name=activation_name, 1106 a=a, 1107 nodes_sim=nodes_sim, 1108 bias=bias, 1109 dropout=dropout, 1110 direct_link=direct_link, 1111 n_clusters=n_clusters, 1112 cluster_encode=cluster_encode, 1113 type_clust=type_clust, 1114 type_scaling=type_scaling, 1115 lags=lags, 1116 type_pi=type_pi, 1117 block_size=block_size, 1118 replications=replications, 1119 kernel=kernel, 1120 agg=agg, 1121 seed=seed, 1122 backend=backend, 1123 show_progress=show_progress, 1124 )
Fitting -- almost -- all the regression algorithms to multivariate time series and returning their scores (no layers).
Parameters:
verbose: int, optional (default=0)
Any positive number for verbosity.
ignore_warnings: bool, optional (default=True)
When set to True, the warning related to algorigms that are not
able to run are ignored.
custom_metric: function, optional (default=None)
When function is provided, models are evaluated based on the custom
evaluation metric provided.
predictions: bool, optional (default=False)
When set to True, the predictions of all the models models are returned as dataframe.
sort_by: string, optional (default='RMSE')
Sort models by a metric. Available options are 'RMSE', 'MAE', 'MPL', 'MPE', 'MAPE',
'R-Squared', 'Adjusted R-Squared' or a custom metric identified by its name and
provided by custom_metric.
random_state: int, optional (default=42)
Reproducibiility seed.
estimators: list, optional (default='all')
list of Estimators (regression algorithms) names or just 'all' (default='all')
preprocess: bool, preprocessing is done when set to True
h: int, optional (default=None)
Number of steps ahead to predict (when used, must be > 0 and < X_test.shape[0]).
All the other parameters are the same as MTS's.
Attributes:
models_: dict-object
Returns a dictionary with each model pipeline as value
with key as name of models.
best_model_: object
Returns the best model pipeline based on the sort_by metric.
Examples:
See https://thierrymoudiki.github.io/blog/2023/10/29/python/quasirandomizednn/MTS-LazyPredict
104class LazyDeepMTS(MTS): 105 """ 106 107 Fitting -- almost -- all the regression algorithms with layers of 108 nnetsauce's CustomRegressor to multivariate time series 109 and returning their scores. 110 111 Parameters: 112 113 verbose: int, optional (default=0) 114 Any positive number for verbosity. 115 116 ignore_warnings: bool, optional (default=True) 117 When set to True, the warning related to algorigms that are not 118 able to run are ignored. 119 120 custom_metric: function, optional (default=None) 121 When function is provided, models are evaluated based on the custom 122 evaluation metric provided. 123 124 predictions: bool, optional (default=False) 125 When set to True, the predictions of all the models models are returned as dataframe. 126 127 sort_by: string, optional (default='RMSE') 128 Sort models by a metric. Available options are 'RMSE', 'MAE', 'MPL', 'MPE', 'MAPE', 129 'R-Squared', 'Adjusted R-Squared' or a custom metric identified by its name and 130 provided by custom_metric. 131 132 random_state: int, optional (default=42) 133 Reproducibiility seed. 134 135 estimators: list, optional (default='all') 136 list of Estimators (regression algorithms) names or just 'all' (default='all') 137 138 preprocess: bool, preprocessing is done when set to True 139 140 n_layers: int, optional (default=1) 141 Number of layers in the network. When set to 1, the model is equivalent to a MTS. 142 143 h: int, optional (default=None) 144 Number of steps ahead to predict (when used, must be > 0 and < X_test.shape[0]). 145 146 All the other parameters are the same as MTS's. 147 148 Attributes: 149 150 models_: dict-object 151 Returns a dictionary with each model pipeline as value 152 with key as name of models. 153 154 best_model_: object 155 Returns the best model pipeline based on the sort_by metric. 156 157 Examples: 158 159 See https://thierrymoudiki.github.io/blog/2023/10/29/python/quasirandomizednn/MTS-LazyPredict 160 161 """ 162 163 def __init__( 164 self, 165 verbose=0, 166 ignore_warnings=True, 167 custom_metric=None, 168 predictions=False, 169 sort_by=None, # leave it as is 170 random_state=42, 171 estimators="all", 172 preprocess=False, 173 n_layers=1, 174 h=None, 175 # MTS attributes 176 obj=None, 177 n_hidden_features=5, 178 activation_name="relu", 179 a=0.01, 180 nodes_sim="sobol", 181 bias=True, 182 dropout=0, 183 direct_link=True, 184 n_clusters=2, 185 cluster_encode=True, 186 type_clust="kmeans", 187 type_scaling=("std", "std", "std"), 188 lags=15, 189 type_pi="scp2-kde", 190 block_size=None, 191 replications=None, 192 kernel=None, 193 agg="mean", 194 seed=123, 195 backend="cpu", 196 show_progress=False, 197 ): 198 self.verbose = verbose 199 self.ignore_warnings = ignore_warnings 200 self.custom_metric = custom_metric 201 self.predictions = predictions 202 self.sort_by = sort_by 203 self.models_ = {} 204 self.best_model_ = None 205 self.random_state = random_state 206 self.estimators = estimators 207 self.preprocess = preprocess 208 self.n_layers = n_layers 209 self.h = h 210 super().__init__( 211 obj=obj, 212 n_hidden_features=n_hidden_features, 213 activation_name=activation_name, 214 a=a, 215 nodes_sim=nodes_sim, 216 bias=bias, 217 dropout=dropout, 218 direct_link=direct_link, 219 n_clusters=n_clusters, 220 cluster_encode=cluster_encode, 221 type_clust=type_clust, 222 type_scaling=type_scaling, 223 seed=seed, 224 backend=backend, 225 lags=lags, 226 type_pi=type_pi, 227 block_size=block_size, 228 replications=replications, 229 kernel=kernel, 230 agg=agg, 231 verbose=verbose, 232 show_progress=show_progress, 233 ) 234 if self.replications is not None or self.type_pi == "gaussian": 235 if self.sort_by is None: 236 self.sort_by = "WINKLERSCORE" 237 else: 238 if self.sort_by is None: 239 self.sort_by = "RMSE" 240 241 def fit(self, X_train, X_test, xreg=None, per_series=False, **kwargs): 242 """Fit Regression algorithms to X_train, predict and score on X_test. 243 244 Parameters: 245 246 X_train: array-like or data frame, 247 Training vectors, where rows is the number of samples 248 and columns is the number of features. 249 250 X_test: array-like or data frame, 251 Testing vectors, where rows is the number of samples 252 and columns is the number of features. 253 254 xreg: array-like, optional (default=None) 255 Additional (external) regressors to be passed to self.obj 256 xreg must be in 'increasing' order (most recent observations last) 257 258 per_series: bool, optional (default=False) 259 When set to True, the metrics are computed series by series. 260 261 **kwargs: dict, optional (default=None) 262 Additional parameters to be passed to `fit` method of `obj`. 263 264 Returns: 265 266 scores: Pandas DataFrame 267 Returns metrics of all the models in a Pandas DataFrame. 268 269 predictions: Pandas DataFrame 270 Returns predictions of all the models in a Pandas DataFrame. 271 272 """ 273 R2 = [] 274 ADJR2 = [] 275 ME = [] 276 MPL = [] 277 RMSE = [] 278 MAE = [] 279 MPE = [] 280 MAPE = [] 281 WINKLERSCORE = [] 282 COVERAGE = [] 283 284 # WIN = [] 285 names = [] 286 TIME = [] 287 predictions = {} 288 289 if self.custom_metric is not None: 290 CUSTOM_METRIC = [] 291 292 if self.h is None: 293 assert X_test is not None, "If h is None, X_test must be provided." 294 295 if isinstance(X_train, np.ndarray): 296 X_train = pd.DataFrame(X_train) 297 X_test = pd.DataFrame(X_test) 298 299 self.series_names = X_train.columns.tolist() 300 301 X_train = convert_df_to_numeric(X_train) 302 X_test = convert_df_to_numeric(X_test) 303 304 numeric_features = X_train.select_dtypes(include=[np.number]).columns 305 categorical_features = X_train.select_dtypes(include=["object"]).columns 306 307 categorical_low, categorical_high = get_card_split( 308 X_train, categorical_features 309 ) 310 311 if self.preprocess: 312 preprocessor = ColumnTransformer( 313 transformers=[ 314 ("numeric", numeric_transformer, numeric_features), 315 ( 316 "categorical_low", 317 categorical_transformer_low, 318 categorical_low, 319 ), 320 ( 321 "categorical_high", 322 categorical_transformer_high, 323 categorical_high, 324 ), 325 ] 326 ) 327 328 # baselines (Classical MTS) ---- 329 for i, name in enumerate(["ARIMA", "ETS", "Theta", "VAR", "VECM"]): 330 try: 331 start = time.time() 332 regr = ClassicalMTS(model=name) 333 regr.fit(X_train, **kwargs) 334 self.models_[name] = regr 335 if self.h is None: 336 X_pred = regr.predict(h=X_test.shape[0], **kwargs) 337 else: 338 assert self.h > 0, "h must be > 0" 339 X_pred = regr.predict(h=self.h, **kwargs) 340 try: 341 X_test = X_test[0: self.h, :] 342 except Exception as e: 343 X_test = X_test.iloc[0: self.h, :] 344 345 rmse = mean_errors( 346 actual=X_test, 347 pred=X_pred, 348 scoring="root_mean_squared_error", 349 per_series=per_series, 350 ) 351 mae = mean_errors( 352 actual=X_test, 353 pred=X_pred, 354 scoring="mean_absolute_error", 355 per_series=per_series, 356 ) 357 mpl = mean_errors( 358 actual=X_test, 359 pred=X_pred, 360 scoring="mean_pinball_loss", 361 per_series=per_series, 362 ) 363 except Exception: 364 365 continue 366 367 names.append(name) 368 RMSE.append(rmse) 369 MAE.append(mae) 370 MPL.append(mpl) 371 372 if self.custom_metric is not None: 373 try: 374 if self.h is None: 375 custom_metric = self.custom_metric(X_test, X_pred) 376 else: 377 custom_metric = self.custom_metric(X_test_h, X_pred) 378 CUSTOM_METRIC.append(custom_metric) 379 except Exception as e: 380 custom_metric = np.iinfo(np.float32).max 381 CUSTOM_METRIC.append(np.iinfo(np.float32).max) 382 383 if (self.replications is not None) or (self.type_pi == "gaussian"): 384 if per_series == False: 385 winklerscore = winkler_score( 386 obj=X_pred, actual=X_test, level=95 387 ) 388 coveragecalc = coverage(X_pred, X_test, level=95) 389 else: 390 winklerscore = winkler_score( 391 obj=X_pred, actual=X_test, level=95, per_series=True 392 ) 393 coveragecalc = coverage( 394 X_pred, X_test, level=95, per_series=True 395 ) 396 WINKLERSCORE.append(winklerscore) 397 COVERAGE.append(coveragecalc) 398 TIME.append(time.time() - start) 399 400 if self.estimators == "all": 401 if self.n_layers <= 1: 402 self.regressors = REGRESSORSMTS 403 else: 404 self.regressors = DEEPREGRESSORSMTS 405 else: 406 if self.n_layers <= 1: 407 self.regressors = [ 408 ("MTS(" + est[0] + ")", est[1]) 409 for est in all_estimators() 410 if ( 411 issubclass(est[1], RegressorMixin) 412 and (est[0] in self.estimators) 413 ) 414 ] 415 else: # self.n_layers > 1 416 self.regressors = [ 417 ("DeepMTS(" + est[0] + ")", est[1]) 418 for est in all_estimators() 419 if ( 420 issubclass(est[1], RegressorMixin) 421 and (est[0] in self.estimators) 422 ) 423 ] 424 425 if self.preprocess is True: 426 for name, model in tqdm(self.regressors): # do parallel exec 427 start = time.time() 428 try: 429 if "random_state" in model().get_params().keys(): 430 pipe = Pipeline( 431 steps=[ 432 ("preprocessor", preprocessor), 433 ( 434 "regressor", 435 DeepMTS( 436 obj=model( 437 random_state=self.random_state, 438 **kwargs, 439 ), 440 n_layers=self.n_layers, 441 n_hidden_features=self.n_hidden_features, 442 activation_name=self.activation_name, 443 a=self.a, 444 nodes_sim=self.nodes_sim, 445 bias=self.bias, 446 dropout=self.dropout, 447 direct_link=self.direct_link, 448 n_clusters=self.n_clusters, 449 cluster_encode=self.cluster_encode, 450 type_clust=self.type_clust, 451 type_scaling=self.type_scaling, 452 lags=self.lags, 453 type_pi=self.type_pi, 454 block_size=self.block_size, 455 replications=self.replications, 456 kernel=self.kernel, 457 agg=self.agg, 458 seed=self.seed, 459 backend=self.backend, 460 show_progress=self.show_progress, 461 ), 462 ), 463 ] 464 ) 465 else: # "random_state" in model().get_params().keys() 466 pipe = Pipeline( 467 steps=[ 468 ("preprocessor", preprocessor), 469 ( 470 "regressor", 471 DeepMTS( 472 obj=model(**kwargs), 473 n_layers=self.n_layers, 474 n_hidden_features=self.n_hidden_features, 475 activation_name=self.activation_name, 476 a=self.a, 477 nodes_sim=self.nodes_sim, 478 bias=self.bias, 479 dropout=self.dropout, 480 direct_link=self.direct_link, 481 n_clusters=self.n_clusters, 482 cluster_encode=self.cluster_encode, 483 type_clust=self.type_clust, 484 type_scaling=self.type_scaling, 485 lags=self.lags, 486 type_pi=self.type_pi, 487 block_size=self.block_size, 488 replications=self.replications, 489 kernel=self.kernel, 490 agg=self.agg, 491 seed=self.seed, 492 backend=self.backend, 493 show_progress=self.show_progress, 494 ), 495 ), 496 ] 497 ) 498 499 pipe.fit(X_train, **kwargs) 500 # pipe.fit(X_train, xreg=xreg) 501 502 self.models_[name] = pipe 503 504 if self.h is None: 505 X_pred = pipe["regressor"].predict(h=self.h, **kwargs) 506 else: 507 assert self.h > 0, "h must be > 0" 508 X_pred = pipe["regressor"].predict(h=self.h, **kwargs) 509 510 if (self.replications is not None) or ( 511 self.type_pi == "gaussian" 512 ): 513 rmse = mean_errors( 514 actual=X_test, 515 pred=X_pred, 516 scoring="root_mean_squared_error", 517 per_series=per_series, 518 ) 519 mae = mean_errors( 520 actual=X_test, 521 pred=X_pred, 522 scoring="mean_absolute_error", 523 per_series=per_series, 524 ) 525 mpl = mean_errors( 526 actual=X_test, 527 pred=X_pred, 528 scoring="mean_pinball_loss", 529 per_series=per_series, 530 ) 531 winklerscore = winkler_score( 532 obj=X_pred, 533 actual=X_test, 534 level=95, 535 per_series=per_series, 536 ) 537 coveragecalc = coverage( 538 X_pred, X_test, level=95, per_series=per_series 539 ) 540 else: 541 rmse = mean_errors( 542 actual=X_test, 543 pred=X_pred, 544 scoring="root_mean_squared_error", 545 per_series=per_series, 546 ) 547 mae = mean_errors( 548 actual=X_test, 549 pred=X_pred, 550 scoring="mean_absolute_error", 551 per_series=per_series, 552 ) 553 mpl = mean_errors( 554 actual=X_test, 555 pred=X_pred, 556 scoring="mean_pinball_loss", 557 per_series=per_series, 558 ) 559 560 names.append(name) 561 RMSE.append(rmse) 562 MAE.append(mae) 563 MPL.append(mpl) 564 565 if (self.replications is not None) or ( 566 self.type_pi == "gaussian" 567 ): 568 WINKLERSCORE.append(winklerscore) 569 COVERAGE.append(coveragecalc) 570 TIME.append(time.time() - start) 571 572 if self.custom_metric is not None: 573 try: 574 custom_metric = self.custom_metric(X_test, X_pred) 575 CUSTOM_METRIC.append(custom_metric) 576 except Exception as e: 577 custom_metric = np.iinfo(np.float32).max 578 CUSTOM_METRIC.append(custom_metric) 579 580 if self.verbose > 0: 581 if (self.replications is not None) or ( 582 self.type_pi == "gaussian" 583 ): 584 scores_verbose = { 585 "Model": name, 586 "RMSE": rmse, 587 "MAE": mae, 588 "MPL": mpl, 589 "WINKLERSCORE": winklerscore, 590 "COVERAGE": coveragecalc, 591 "Time taken": time.time() - start, 592 } 593 else: 594 scores_verbose = { 595 "Model": name, 596 "RMSE": rmse, 597 "MAE": mae, 598 "MPL": mpl, 599 "Time taken": time.time() - start, 600 } 601 602 if self.custom_metric is not None: 603 scores_verbose["Custom metric"] = custom_metric 604 605 if self.predictions: 606 predictions[name] = X_pred 607 except Exception as exception: 608 if self.ignore_warnings is False: 609 print(name + " model failed to execute") 610 print(exception) 611 612 else: # no preprocessing 613 614 for name, model in tqdm(self.regressors): # do parallel exec 615 start = time.time() 616 try: 617 if "random_state" in model().get_params().keys(): 618 pipe = DeepMTS( 619 obj=model(random_state=self.random_state, **kwargs), 620 n_layers=self.n_layers, 621 n_hidden_features=self.n_hidden_features, 622 activation_name=self.activation_name, 623 a=self.a, 624 nodes_sim=self.nodes_sim, 625 bias=self.bias, 626 dropout=self.dropout, 627 direct_link=self.direct_link, 628 n_clusters=self.n_clusters, 629 cluster_encode=self.cluster_encode, 630 type_clust=self.type_clust, 631 type_scaling=self.type_scaling, 632 lags=self.lags, 633 type_pi=self.type_pi, 634 block_size=self.block_size, 635 replications=self.replications, 636 kernel=self.kernel, 637 agg=self.agg, 638 seed=self.seed, 639 backend=self.backend, 640 show_progress=self.show_progress, 641 ) 642 else: 643 pipe = DeepMTS( 644 obj=model(**kwargs), 645 n_layers=self.n_layers, 646 n_hidden_features=self.n_hidden_features, 647 activation_name=self.activation_name, 648 a=self.a, 649 nodes_sim=self.nodes_sim, 650 bias=self.bias, 651 dropout=self.dropout, 652 direct_link=self.direct_link, 653 n_clusters=self.n_clusters, 654 cluster_encode=self.cluster_encode, 655 type_clust=self.type_clust, 656 type_scaling=self.type_scaling, 657 lags=self.lags, 658 type_pi=self.type_pi, 659 block_size=self.block_size, 660 replications=self.replications, 661 kernel=self.kernel, 662 agg=self.agg, 663 seed=self.seed, 664 backend=self.backend, 665 show_progress=self.show_progress, 666 ) 667 668 pipe.fit(X_train, xreg, **kwargs) 669 # pipe.fit(X_train, xreg=xreg) # DO xreg like in `ahead` 670 671 self.models_[name] = pipe 672 673 if self.preprocess is True: 674 if self.h is None: 675 X_pred = pipe["regressor"].predict( 676 h=X_test.shape[0], **kwargs 677 ) 678 else: 679 assert ( 680 self.h > 0 and self.h <= X_test.shape[0] 681 ), "h must be > 0 and < X_test.shape[0]" 682 X_pred = pipe["regressor"].predict( 683 h=self.h, **kwargs 684 ) 685 686 else: 687 688 if self.h is None: 689 X_pred = pipe.predict( 690 h=X_test.shape[0], 691 **kwargs, 692 # X_pred = pipe.predict(h=X_test.shape[0], new_xreg=new_xreg) ## DO xreg like in `ahead` 693 ) 694 else: 695 assert ( 696 self.h > 0 and self.h <= X_test.shape[0] 697 ), "h must be > 0 and < X_test.shape[0]" 698 X_pred = pipe.predict(h=self.h, **kwargs) 699 700 if self.h is None: 701 if (self.replications is not None) or ( 702 self.type_pi == "gaussian" 703 ): 704 rmse = mean_errors( 705 actual=X_test, 706 pred=X_pred.mean, 707 scoring="root_mean_squared_error", 708 per_series=per_series, 709 ) 710 mae = mean_errors( 711 actual=X_test, 712 pred=X_pred.mean, 713 scoring="mean_absolute_error", 714 per_series=per_series, 715 ) 716 mpl = mean_errors( 717 actual=X_test, 718 pred=X_pred.mean, 719 scoring="mean_pinball_loss", 720 per_series=per_series, 721 ) 722 winklerscore = winkler_score( 723 obj=X_pred, 724 actual=X_test, 725 level=95, 726 per_series=per_series, 727 ) 728 coveragecalc = coverage( 729 X_pred, X_test, level=95, per_series=per_series 730 ) 731 else: # no prediction interval 732 rmse = mean_errors( 733 actual=X_test, 734 pred=X_pred, 735 scoring="root_mean_squared_error", 736 per_series=per_series, 737 ) 738 mae = mean_errors( 739 actual=X_test, 740 pred=X_pred, 741 scoring="mean_absolute_error", 742 per_series=per_series, 743 ) 744 mpl = mean_errors( 745 actual=X_test, 746 pred=X_pred, 747 scoring="mean_pinball_loss", 748 per_series=per_series, 749 ) 750 else: # self.h is not None 751 if (self.replications is not None) or ( 752 self.type_pi == "gaussian" 753 ): 754 755 if isinstance(X_test, pd.DataFrame): 756 X_test_h = X_test.iloc[0: self.h, :] 757 rmse = mean_errors( 758 actual=X_test_h, 759 pred=X_pred, 760 scoring="root_mean_squared_error", 761 per_series=per_series, 762 ) 763 mae = mean_errors( 764 actual=X_test_h, 765 pred=X_pred, 766 scoring="mean_absolute_error", 767 per_series=per_series, 768 ) 769 mpl = mean_errors( 770 actual=X_test_h, 771 pred=X_pred, 772 scoring="mean_pinball_loss", 773 per_series=per_series, 774 ) 775 winklerscore = winkler_score( 776 obj=X_pred, 777 actual=X_test_h, 778 level=95, 779 per_series=per_series, 780 ) 781 coveragecalc = coverage( 782 X_pred, 783 X_test_h, 784 level=95, 785 per_series=per_series, 786 ) 787 else: 788 X_test_h = X_test[0: self.h, :] 789 rmse = mean_errors( 790 actual=X_test_h, 791 pred=X_pred, 792 scoring="root_mean_squared_error", 793 per_series=per_series, 794 ) 795 mae = mean_errors( 796 actual=X_test_h, 797 pred=X_pred, 798 scoring="mean_absolute_error", 799 per_series=per_series, 800 ) 801 mpl = mean_errors( 802 actual=X_test_h, 803 pred=X_pred, 804 scoring="mean_pinball_loss", 805 per_series=per_series, 806 ) 807 winklerscore = winkler_score( 808 obj=X_pred, 809 actual=X_test_h, 810 level=95, 811 per_series=per_series, 812 ) 813 coveragecalc = coverage( 814 X_pred, 815 X_test_h, 816 level=95, 817 per_series=per_series, 818 ) 819 else: # no prediction interval 820 821 if isinstance(X_test, pd.DataFrame): 822 X_test_h = X_test.iloc[0: self.h, :] 823 rmse = mean_errors( 824 actual=X_test_h, 825 pred=X_pred, 826 scoring="root_mean_squared_error", 827 per_series=per_series, 828 ) 829 mae = mean_errors( 830 actual=X_test_h, 831 pred=X_pred, 832 scoring="mean_absolute_error", 833 per_series=per_series, 834 ) 835 mpl = mean_errors( 836 actual=X_test_h, 837 pred=X_pred, 838 scoring="mean_pinball_loss", 839 per_series=per_series, 840 ) 841 else: 842 X_test_h = X_test[0: self.h, :] 843 rmse = mean_errors( 844 actual=X_test_h, 845 pred=X_pred, 846 scoring="root_mean_squared_error", 847 per_series=per_series, 848 ) 849 mae = mean_errors( 850 actual=X_test_h, 851 pred=X_pred, 852 scoring="mean_absolute_error", 853 per_series=per_series, 854 ) 855 856 names.append(name) 857 RMSE.append(rmse) 858 MAE.append(mae) 859 MPL.append(mpl) 860 if (self.replications is not None) or ( 861 self.type_pi == "gaussian" 862 ): 863 WINKLERSCORE.append(winklerscore) 864 COVERAGE.append(coveragecalc) 865 TIME.append(time.time() - start) 866 867 if self.custom_metric is not None: 868 try: 869 if self.h is None: 870 custom_metric = self.custom_metric( 871 X_test, X_pred 872 ) 873 else: 874 custom_metric = self.custom_metric( 875 X_test_h, X_pred 876 ) 877 CUSTOM_METRIC.append(custom_metric) 878 except Exception as e: 879 custom_metric = np.iinfo(np.float32).max 880 CUSTOM_METRIC.append(np.iinfo(np.float32).max) 881 882 if self.verbose > 0: 883 if (self.replications is not None) or ( 884 self.type_pi == "gaussian" 885 ): 886 scores_verbose = { 887 "Model": name, 888 "RMSE": rmse, 889 "MAE": mae, 890 "MPL": mpl, 891 "WINKLERSCORE": winklerscore, 892 "COVERAGE": coveragecalc, 893 "Time taken": time.time() - start, 894 } 895 else: 896 scores_verbose = { 897 "Model": name, 898 "RMSE": rmse, 899 "MAE": mae, 900 "MPL": mpl, 901 "Time taken": time.time() - start, 902 } 903 904 if self.custom_metric is not None: 905 scores_verbose["Custom metric"] = custom_metric 906 907 if self.predictions: 908 predictions[name] = X_pred 909 910 except Exception as exception: 911 if self.ignore_warnings is False: 912 print(name + " model failed to execute") 913 print(exception) 914 915 if (self.replications is not None) or (self.type_pi == "gaussian"): 916 scores = { 917 "Model": names, 918 "RMSE": RMSE, 919 "MAE": MAE, 920 "MPL": MPL, 921 "WINKLERSCORE": WINKLERSCORE, 922 "COVERAGE": COVERAGE, 923 "Time Taken": TIME, 924 } 925 else: 926 scores = { 927 "Model": names, 928 "RMSE": RMSE, 929 "MAE": MAE, 930 "MPL": MPL, 931 "Time Taken": TIME, 932 } 933 934 if self.custom_metric is not None: 935 scores["Custom metric"] = CUSTOM_METRIC 936 937 if per_series: 938 scores = dict_to_dataframe_series(scores, self.series_names) 939 else: 940 scores = pd.DataFrame(scores) 941 942 try: # case per_series, can't be sorted 943 scores = scores.sort_values( 944 by=self.sort_by, ascending=True 945 ).set_index("Model") 946 947 self.best_model_ = self.models_[scores.index[0]] 948 except Exception as e: 949 pass 950 951 if self.predictions is True: 952 953 return scores, predictions 954 955 return scores 956 957 def get_best_model(self): 958 """ 959 This function returns the best model pipeline based on the sort_by metric. 960 961 Returns: 962 963 best_model: object, 964 Returns the best model pipeline based on the sort_by metric. 965 966 """ 967 return self.best_model_ 968 969 def provide_models(self, X_train, X_test): 970 """ 971 This function returns all the model objects trained in fit function. 972 If fit is not called already, then we call fit and then return the models. 973 974 Parameters: 975 976 X_train : array-like, 977 Training vectors, where rows is the number of samples 978 and columns is the number of features. 979 980 X_test : array-like, 981 Testing vectors, where rows is the number of samples 982 and columns is the number of features. 983 984 Returns: 985 986 models: dict-object, 987 Returns a dictionary with each model pipeline as value 988 with key as name of models. 989 990 """ 991 if self.h is None: 992 if len(self.models_.keys()) == 0: 993 self.fit(X_train, X_test) 994 else: 995 if len(self.models_.keys()) == 0: 996 if isinstance(X_test, pd.DataFrame): 997 self.fit(X_train, X_test.iloc[0: self.h, :]) 998 else: 999 self.fit(X_train, X_test[0: self.h, :]) 1000 1001 return self.models_
Fitting -- almost -- all the regression algorithms with layers of nnetsauce's CustomRegressor to multivariate time series and returning their scores.
Parameters:
verbose: int, optional (default=0)
Any positive number for verbosity.
ignore_warnings: bool, optional (default=True)
When set to True, the warning related to algorigms that are not
able to run are ignored.
custom_metric: function, optional (default=None)
When function is provided, models are evaluated based on the custom
evaluation metric provided.
predictions: bool, optional (default=False)
When set to True, the predictions of all the models models are returned as dataframe.
sort_by: string, optional (default='RMSE')
Sort models by a metric. Available options are 'RMSE', 'MAE', 'MPL', 'MPE', 'MAPE',
'R-Squared', 'Adjusted R-Squared' or a custom metric identified by its name and
provided by custom_metric.
random_state: int, optional (default=42)
Reproducibiility seed.
estimators: list, optional (default='all')
list of Estimators (regression algorithms) names or just 'all' (default='all')
preprocess: bool, preprocessing is done when set to True
n_layers: int, optional (default=1)
Number of layers in the network. When set to 1, the model is equivalent to a MTS.
h: int, optional (default=None)
Number of steps ahead to predict (when used, must be > 0 and < X_test.shape[0]).
All the other parameters are the same as MTS's.
Attributes:
models_: dict-object
Returns a dictionary with each model pipeline as value
with key as name of models.
best_model_: object
Returns the best model pipeline based on the sort_by metric.
Examples:
See https://thierrymoudiki.github.io/blog/2023/10/29/python/quasirandomizednn/MTS-LazyPredict
241 def fit(self, X_train, X_test, xreg=None, per_series=False, **kwargs): 242 """Fit Regression algorithms to X_train, predict and score on X_test. 243 244 Parameters: 245 246 X_train: array-like or data frame, 247 Training vectors, where rows is the number of samples 248 and columns is the number of features. 249 250 X_test: array-like or data frame, 251 Testing vectors, where rows is the number of samples 252 and columns is the number of features. 253 254 xreg: array-like, optional (default=None) 255 Additional (external) regressors to be passed to self.obj 256 xreg must be in 'increasing' order (most recent observations last) 257 258 per_series: bool, optional (default=False) 259 When set to True, the metrics are computed series by series. 260 261 **kwargs: dict, optional (default=None) 262 Additional parameters to be passed to `fit` method of `obj`. 263 264 Returns: 265 266 scores: Pandas DataFrame 267 Returns metrics of all the models in a Pandas DataFrame. 268 269 predictions: Pandas DataFrame 270 Returns predictions of all the models in a Pandas DataFrame. 271 272 """ 273 R2 = [] 274 ADJR2 = [] 275 ME = [] 276 MPL = [] 277 RMSE = [] 278 MAE = [] 279 MPE = [] 280 MAPE = [] 281 WINKLERSCORE = [] 282 COVERAGE = [] 283 284 # WIN = [] 285 names = [] 286 TIME = [] 287 predictions = {} 288 289 if self.custom_metric is not None: 290 CUSTOM_METRIC = [] 291 292 if self.h is None: 293 assert X_test is not None, "If h is None, X_test must be provided." 294 295 if isinstance(X_train, np.ndarray): 296 X_train = pd.DataFrame(X_train) 297 X_test = pd.DataFrame(X_test) 298 299 self.series_names = X_train.columns.tolist() 300 301 X_train = convert_df_to_numeric(X_train) 302 X_test = convert_df_to_numeric(X_test) 303 304 numeric_features = X_train.select_dtypes(include=[np.number]).columns 305 categorical_features = X_train.select_dtypes(include=["object"]).columns 306 307 categorical_low, categorical_high = get_card_split( 308 X_train, categorical_features 309 ) 310 311 if self.preprocess: 312 preprocessor = ColumnTransformer( 313 transformers=[ 314 ("numeric", numeric_transformer, numeric_features), 315 ( 316 "categorical_low", 317 categorical_transformer_low, 318 categorical_low, 319 ), 320 ( 321 "categorical_high", 322 categorical_transformer_high, 323 categorical_high, 324 ), 325 ] 326 ) 327 328 # baselines (Classical MTS) ---- 329 for i, name in enumerate(["ARIMA", "ETS", "Theta", "VAR", "VECM"]): 330 try: 331 start = time.time() 332 regr = ClassicalMTS(model=name) 333 regr.fit(X_train, **kwargs) 334 self.models_[name] = regr 335 if self.h is None: 336 X_pred = regr.predict(h=X_test.shape[0], **kwargs) 337 else: 338 assert self.h > 0, "h must be > 0" 339 X_pred = regr.predict(h=self.h, **kwargs) 340 try: 341 X_test = X_test[0: self.h, :] 342 except Exception as e: 343 X_test = X_test.iloc[0: self.h, :] 344 345 rmse = mean_errors( 346 actual=X_test, 347 pred=X_pred, 348 scoring="root_mean_squared_error", 349 per_series=per_series, 350 ) 351 mae = mean_errors( 352 actual=X_test, 353 pred=X_pred, 354 scoring="mean_absolute_error", 355 per_series=per_series, 356 ) 357 mpl = mean_errors( 358 actual=X_test, 359 pred=X_pred, 360 scoring="mean_pinball_loss", 361 per_series=per_series, 362 ) 363 except Exception: 364 365 continue 366 367 names.append(name) 368 RMSE.append(rmse) 369 MAE.append(mae) 370 MPL.append(mpl) 371 372 if self.custom_metric is not None: 373 try: 374 if self.h is None: 375 custom_metric = self.custom_metric(X_test, X_pred) 376 else: 377 custom_metric = self.custom_metric(X_test_h, X_pred) 378 CUSTOM_METRIC.append(custom_metric) 379 except Exception as e: 380 custom_metric = np.iinfo(np.float32).max 381 CUSTOM_METRIC.append(np.iinfo(np.float32).max) 382 383 if (self.replications is not None) or (self.type_pi == "gaussian"): 384 if per_series == False: 385 winklerscore = winkler_score( 386 obj=X_pred, actual=X_test, level=95 387 ) 388 coveragecalc = coverage(X_pred, X_test, level=95) 389 else: 390 winklerscore = winkler_score( 391 obj=X_pred, actual=X_test, level=95, per_series=True 392 ) 393 coveragecalc = coverage( 394 X_pred, X_test, level=95, per_series=True 395 ) 396 WINKLERSCORE.append(winklerscore) 397 COVERAGE.append(coveragecalc) 398 TIME.append(time.time() - start) 399 400 if self.estimators == "all": 401 if self.n_layers <= 1: 402 self.regressors = REGRESSORSMTS 403 else: 404 self.regressors = DEEPREGRESSORSMTS 405 else: 406 if self.n_layers <= 1: 407 self.regressors = [ 408 ("MTS(" + est[0] + ")", est[1]) 409 for est in all_estimators() 410 if ( 411 issubclass(est[1], RegressorMixin) 412 and (est[0] in self.estimators) 413 ) 414 ] 415 else: # self.n_layers > 1 416 self.regressors = [ 417 ("DeepMTS(" + est[0] + ")", est[1]) 418 for est in all_estimators() 419 if ( 420 issubclass(est[1], RegressorMixin) 421 and (est[0] in self.estimators) 422 ) 423 ] 424 425 if self.preprocess is True: 426 for name, model in tqdm(self.regressors): # do parallel exec 427 start = time.time() 428 try: 429 if "random_state" in model().get_params().keys(): 430 pipe = Pipeline( 431 steps=[ 432 ("preprocessor", preprocessor), 433 ( 434 "regressor", 435 DeepMTS( 436 obj=model( 437 random_state=self.random_state, 438 **kwargs, 439 ), 440 n_layers=self.n_layers, 441 n_hidden_features=self.n_hidden_features, 442 activation_name=self.activation_name, 443 a=self.a, 444 nodes_sim=self.nodes_sim, 445 bias=self.bias, 446 dropout=self.dropout, 447 direct_link=self.direct_link, 448 n_clusters=self.n_clusters, 449 cluster_encode=self.cluster_encode, 450 type_clust=self.type_clust, 451 type_scaling=self.type_scaling, 452 lags=self.lags, 453 type_pi=self.type_pi, 454 block_size=self.block_size, 455 replications=self.replications, 456 kernel=self.kernel, 457 agg=self.agg, 458 seed=self.seed, 459 backend=self.backend, 460 show_progress=self.show_progress, 461 ), 462 ), 463 ] 464 ) 465 else: # "random_state" in model().get_params().keys() 466 pipe = Pipeline( 467 steps=[ 468 ("preprocessor", preprocessor), 469 ( 470 "regressor", 471 DeepMTS( 472 obj=model(**kwargs), 473 n_layers=self.n_layers, 474 n_hidden_features=self.n_hidden_features, 475 activation_name=self.activation_name, 476 a=self.a, 477 nodes_sim=self.nodes_sim, 478 bias=self.bias, 479 dropout=self.dropout, 480 direct_link=self.direct_link, 481 n_clusters=self.n_clusters, 482 cluster_encode=self.cluster_encode, 483 type_clust=self.type_clust, 484 type_scaling=self.type_scaling, 485 lags=self.lags, 486 type_pi=self.type_pi, 487 block_size=self.block_size, 488 replications=self.replications, 489 kernel=self.kernel, 490 agg=self.agg, 491 seed=self.seed, 492 backend=self.backend, 493 show_progress=self.show_progress, 494 ), 495 ), 496 ] 497 ) 498 499 pipe.fit(X_train, **kwargs) 500 # pipe.fit(X_train, xreg=xreg) 501 502 self.models_[name] = pipe 503 504 if self.h is None: 505 X_pred = pipe["regressor"].predict(h=self.h, **kwargs) 506 else: 507 assert self.h > 0, "h must be > 0" 508 X_pred = pipe["regressor"].predict(h=self.h, **kwargs) 509 510 if (self.replications is not None) or ( 511 self.type_pi == "gaussian" 512 ): 513 rmse = mean_errors( 514 actual=X_test, 515 pred=X_pred, 516 scoring="root_mean_squared_error", 517 per_series=per_series, 518 ) 519 mae = mean_errors( 520 actual=X_test, 521 pred=X_pred, 522 scoring="mean_absolute_error", 523 per_series=per_series, 524 ) 525 mpl = mean_errors( 526 actual=X_test, 527 pred=X_pred, 528 scoring="mean_pinball_loss", 529 per_series=per_series, 530 ) 531 winklerscore = winkler_score( 532 obj=X_pred, 533 actual=X_test, 534 level=95, 535 per_series=per_series, 536 ) 537 coveragecalc = coverage( 538 X_pred, X_test, level=95, per_series=per_series 539 ) 540 else: 541 rmse = mean_errors( 542 actual=X_test, 543 pred=X_pred, 544 scoring="root_mean_squared_error", 545 per_series=per_series, 546 ) 547 mae = mean_errors( 548 actual=X_test, 549 pred=X_pred, 550 scoring="mean_absolute_error", 551 per_series=per_series, 552 ) 553 mpl = mean_errors( 554 actual=X_test, 555 pred=X_pred, 556 scoring="mean_pinball_loss", 557 per_series=per_series, 558 ) 559 560 names.append(name) 561 RMSE.append(rmse) 562 MAE.append(mae) 563 MPL.append(mpl) 564 565 if (self.replications is not None) or ( 566 self.type_pi == "gaussian" 567 ): 568 WINKLERSCORE.append(winklerscore) 569 COVERAGE.append(coveragecalc) 570 TIME.append(time.time() - start) 571 572 if self.custom_metric is not None: 573 try: 574 custom_metric = self.custom_metric(X_test, X_pred) 575 CUSTOM_METRIC.append(custom_metric) 576 except Exception as e: 577 custom_metric = np.iinfo(np.float32).max 578 CUSTOM_METRIC.append(custom_metric) 579 580 if self.verbose > 0: 581 if (self.replications is not None) or ( 582 self.type_pi == "gaussian" 583 ): 584 scores_verbose = { 585 "Model": name, 586 "RMSE": rmse, 587 "MAE": mae, 588 "MPL": mpl, 589 "WINKLERSCORE": winklerscore, 590 "COVERAGE": coveragecalc, 591 "Time taken": time.time() - start, 592 } 593 else: 594 scores_verbose = { 595 "Model": name, 596 "RMSE": rmse, 597 "MAE": mae, 598 "MPL": mpl, 599 "Time taken": time.time() - start, 600 } 601 602 if self.custom_metric is not None: 603 scores_verbose["Custom metric"] = custom_metric 604 605 if self.predictions: 606 predictions[name] = X_pred 607 except Exception as exception: 608 if self.ignore_warnings is False: 609 print(name + " model failed to execute") 610 print(exception) 611 612 else: # no preprocessing 613 614 for name, model in tqdm(self.regressors): # do parallel exec 615 start = time.time() 616 try: 617 if "random_state" in model().get_params().keys(): 618 pipe = DeepMTS( 619 obj=model(random_state=self.random_state, **kwargs), 620 n_layers=self.n_layers, 621 n_hidden_features=self.n_hidden_features, 622 activation_name=self.activation_name, 623 a=self.a, 624 nodes_sim=self.nodes_sim, 625 bias=self.bias, 626 dropout=self.dropout, 627 direct_link=self.direct_link, 628 n_clusters=self.n_clusters, 629 cluster_encode=self.cluster_encode, 630 type_clust=self.type_clust, 631 type_scaling=self.type_scaling, 632 lags=self.lags, 633 type_pi=self.type_pi, 634 block_size=self.block_size, 635 replications=self.replications, 636 kernel=self.kernel, 637 agg=self.agg, 638 seed=self.seed, 639 backend=self.backend, 640 show_progress=self.show_progress, 641 ) 642 else: 643 pipe = DeepMTS( 644 obj=model(**kwargs), 645 n_layers=self.n_layers, 646 n_hidden_features=self.n_hidden_features, 647 activation_name=self.activation_name, 648 a=self.a, 649 nodes_sim=self.nodes_sim, 650 bias=self.bias, 651 dropout=self.dropout, 652 direct_link=self.direct_link, 653 n_clusters=self.n_clusters, 654 cluster_encode=self.cluster_encode, 655 type_clust=self.type_clust, 656 type_scaling=self.type_scaling, 657 lags=self.lags, 658 type_pi=self.type_pi, 659 block_size=self.block_size, 660 replications=self.replications, 661 kernel=self.kernel, 662 agg=self.agg, 663 seed=self.seed, 664 backend=self.backend, 665 show_progress=self.show_progress, 666 ) 667 668 pipe.fit(X_train, xreg, **kwargs) 669 # pipe.fit(X_train, xreg=xreg) # DO xreg like in `ahead` 670 671 self.models_[name] = pipe 672 673 if self.preprocess is True: 674 if self.h is None: 675 X_pred = pipe["regressor"].predict( 676 h=X_test.shape[0], **kwargs 677 ) 678 else: 679 assert ( 680 self.h > 0 and self.h <= X_test.shape[0] 681 ), "h must be > 0 and < X_test.shape[0]" 682 X_pred = pipe["regressor"].predict( 683 h=self.h, **kwargs 684 ) 685 686 else: 687 688 if self.h is None: 689 X_pred = pipe.predict( 690 h=X_test.shape[0], 691 **kwargs, 692 # X_pred = pipe.predict(h=X_test.shape[0], new_xreg=new_xreg) ## DO xreg like in `ahead` 693 ) 694 else: 695 assert ( 696 self.h > 0 and self.h <= X_test.shape[0] 697 ), "h must be > 0 and < X_test.shape[0]" 698 X_pred = pipe.predict(h=self.h, **kwargs) 699 700 if self.h is None: 701 if (self.replications is not None) or ( 702 self.type_pi == "gaussian" 703 ): 704 rmse = mean_errors( 705 actual=X_test, 706 pred=X_pred.mean, 707 scoring="root_mean_squared_error", 708 per_series=per_series, 709 ) 710 mae = mean_errors( 711 actual=X_test, 712 pred=X_pred.mean, 713 scoring="mean_absolute_error", 714 per_series=per_series, 715 ) 716 mpl = mean_errors( 717 actual=X_test, 718 pred=X_pred.mean, 719 scoring="mean_pinball_loss", 720 per_series=per_series, 721 ) 722 winklerscore = winkler_score( 723 obj=X_pred, 724 actual=X_test, 725 level=95, 726 per_series=per_series, 727 ) 728 coveragecalc = coverage( 729 X_pred, X_test, level=95, per_series=per_series 730 ) 731 else: # no prediction interval 732 rmse = mean_errors( 733 actual=X_test, 734 pred=X_pred, 735 scoring="root_mean_squared_error", 736 per_series=per_series, 737 ) 738 mae = mean_errors( 739 actual=X_test, 740 pred=X_pred, 741 scoring="mean_absolute_error", 742 per_series=per_series, 743 ) 744 mpl = mean_errors( 745 actual=X_test, 746 pred=X_pred, 747 scoring="mean_pinball_loss", 748 per_series=per_series, 749 ) 750 else: # self.h is not None 751 if (self.replications is not None) or ( 752 self.type_pi == "gaussian" 753 ): 754 755 if isinstance(X_test, pd.DataFrame): 756 X_test_h = X_test.iloc[0: self.h, :] 757 rmse = mean_errors( 758 actual=X_test_h, 759 pred=X_pred, 760 scoring="root_mean_squared_error", 761 per_series=per_series, 762 ) 763 mae = mean_errors( 764 actual=X_test_h, 765 pred=X_pred, 766 scoring="mean_absolute_error", 767 per_series=per_series, 768 ) 769 mpl = mean_errors( 770 actual=X_test_h, 771 pred=X_pred, 772 scoring="mean_pinball_loss", 773 per_series=per_series, 774 ) 775 winklerscore = winkler_score( 776 obj=X_pred, 777 actual=X_test_h, 778 level=95, 779 per_series=per_series, 780 ) 781 coveragecalc = coverage( 782 X_pred, 783 X_test_h, 784 level=95, 785 per_series=per_series, 786 ) 787 else: 788 X_test_h = X_test[0: self.h, :] 789 rmse = mean_errors( 790 actual=X_test_h, 791 pred=X_pred, 792 scoring="root_mean_squared_error", 793 per_series=per_series, 794 ) 795 mae = mean_errors( 796 actual=X_test_h, 797 pred=X_pred, 798 scoring="mean_absolute_error", 799 per_series=per_series, 800 ) 801 mpl = mean_errors( 802 actual=X_test_h, 803 pred=X_pred, 804 scoring="mean_pinball_loss", 805 per_series=per_series, 806 ) 807 winklerscore = winkler_score( 808 obj=X_pred, 809 actual=X_test_h, 810 level=95, 811 per_series=per_series, 812 ) 813 coveragecalc = coverage( 814 X_pred, 815 X_test_h, 816 level=95, 817 per_series=per_series, 818 ) 819 else: # no prediction interval 820 821 if isinstance(X_test, pd.DataFrame): 822 X_test_h = X_test.iloc[0: self.h, :] 823 rmse = mean_errors( 824 actual=X_test_h, 825 pred=X_pred, 826 scoring="root_mean_squared_error", 827 per_series=per_series, 828 ) 829 mae = mean_errors( 830 actual=X_test_h, 831 pred=X_pred, 832 scoring="mean_absolute_error", 833 per_series=per_series, 834 ) 835 mpl = mean_errors( 836 actual=X_test_h, 837 pred=X_pred, 838 scoring="mean_pinball_loss", 839 per_series=per_series, 840 ) 841 else: 842 X_test_h = X_test[0: self.h, :] 843 rmse = mean_errors( 844 actual=X_test_h, 845 pred=X_pred, 846 scoring="root_mean_squared_error", 847 per_series=per_series, 848 ) 849 mae = mean_errors( 850 actual=X_test_h, 851 pred=X_pred, 852 scoring="mean_absolute_error", 853 per_series=per_series, 854 ) 855 856 names.append(name) 857 RMSE.append(rmse) 858 MAE.append(mae) 859 MPL.append(mpl) 860 if (self.replications is not None) or ( 861 self.type_pi == "gaussian" 862 ): 863 WINKLERSCORE.append(winklerscore) 864 COVERAGE.append(coveragecalc) 865 TIME.append(time.time() - start) 866 867 if self.custom_metric is not None: 868 try: 869 if self.h is None: 870 custom_metric = self.custom_metric( 871 X_test, X_pred 872 ) 873 else: 874 custom_metric = self.custom_metric( 875 X_test_h, X_pred 876 ) 877 CUSTOM_METRIC.append(custom_metric) 878 except Exception as e: 879 custom_metric = np.iinfo(np.float32).max 880 CUSTOM_METRIC.append(np.iinfo(np.float32).max) 881 882 if self.verbose > 0: 883 if (self.replications is not None) or ( 884 self.type_pi == "gaussian" 885 ): 886 scores_verbose = { 887 "Model": name, 888 "RMSE": rmse, 889 "MAE": mae, 890 "MPL": mpl, 891 "WINKLERSCORE": winklerscore, 892 "COVERAGE": coveragecalc, 893 "Time taken": time.time() - start, 894 } 895 else: 896 scores_verbose = { 897 "Model": name, 898 "RMSE": rmse, 899 "MAE": mae, 900 "MPL": mpl, 901 "Time taken": time.time() - start, 902 } 903 904 if self.custom_metric is not None: 905 scores_verbose["Custom metric"] = custom_metric 906 907 if self.predictions: 908 predictions[name] = X_pred 909 910 except Exception as exception: 911 if self.ignore_warnings is False: 912 print(name + " model failed to execute") 913 print(exception) 914 915 if (self.replications is not None) or (self.type_pi == "gaussian"): 916 scores = { 917 "Model": names, 918 "RMSE": RMSE, 919 "MAE": MAE, 920 "MPL": MPL, 921 "WINKLERSCORE": WINKLERSCORE, 922 "COVERAGE": COVERAGE, 923 "Time Taken": TIME, 924 } 925 else: 926 scores = { 927 "Model": names, 928 "RMSE": RMSE, 929 "MAE": MAE, 930 "MPL": MPL, 931 "Time Taken": TIME, 932 } 933 934 if self.custom_metric is not None: 935 scores["Custom metric"] = CUSTOM_METRIC 936 937 if per_series: 938 scores = dict_to_dataframe_series(scores, self.series_names) 939 else: 940 scores = pd.DataFrame(scores) 941 942 try: # case per_series, can't be sorted 943 scores = scores.sort_values( 944 by=self.sort_by, ascending=True 945 ).set_index("Model") 946 947 self.best_model_ = self.models_[scores.index[0]] 948 except Exception as e: 949 pass 950 951 if self.predictions is True: 952 953 return scores, predictions 954 955 return scores
Fit Regression algorithms to X_train, predict and score on X_test.
Parameters:
X_train: array-like or data frame,
Training vectors, where rows is the number of samples
and columns is the number of features.
X_test: array-like or data frame,
Testing vectors, where rows is the number of samples
and columns is the number of features.
xreg: array-like, optional (default=None)
Additional (external) regressors to be passed to self.obj
xreg must be in 'increasing' order (most recent observations last)
per_series: bool, optional (default=False)
When set to True, the metrics are computed series by series.
**kwargs: dict, optional (default=None)
Additional parameters to be passed to `fit` method of `obj`.
Returns:
scores: Pandas DataFrame
Returns metrics of all the models in a Pandas DataFrame.
predictions: Pandas DataFrame
Returns predictions of all the models in a Pandas DataFrame.
969 def provide_models(self, X_train, X_test): 970 """ 971 This function returns all the model objects trained in fit function. 972 If fit is not called already, then we call fit and then return the models. 973 974 Parameters: 975 976 X_train : array-like, 977 Training vectors, where rows is the number of samples 978 and columns is the number of features. 979 980 X_test : array-like, 981 Testing vectors, where rows is the number of samples 982 and columns is the number of features. 983 984 Returns: 985 986 models: dict-object, 987 Returns a dictionary with each model pipeline as value 988 with key as name of models. 989 990 """ 991 if self.h is None: 992 if len(self.models_.keys()) == 0: 993 self.fit(X_train, X_test) 994 else: 995 if len(self.models_.keys()) == 0: 996 if isinstance(X_test, pd.DataFrame): 997 self.fit(X_train, X_test.iloc[0: self.h, :]) 998 else: 999 self.fit(X_train, X_test[0: self.h, :]) 1000 1001 return self.models_
This function returns all the model objects trained in fit function. If fit is not called already, then we call fit and then return the models.
Parameters:
X_train : array-like,
Training vectors, where rows is the number of samples
and columns is the number of features.
X_test : array-like,
Testing vectors, where rows is the number of samples
and columns is the number of features.
Returns:
models: dict-object,
Returns a dictionary with each model pipeline as value
with key as name of models.
18class MLARCH(MTS): 19 """Machine Learning with ARCH effects for time series forecasting 20 21 Parameters: 22 23 model_mean: object of class nnetsauce.MTS 24 Model for mean prediction (default: None, uses obj) 25 26 model_sigma: object of class nnetsauce.MTS 27 Model for residuals volatility prediction (default: None, uses obj) 28 29 model_residuals: object of class nnetsauce.MTS 30 Model for residuals prediction (default: None, uses obj) 31 32 Examples: 33 34 See examples/mlarch.py 35 36 """ 37 38 def __init__(self, model_mean, model_sigma, model_residuals): 39 assert isinstance( 40 model_mean, MTS 41 ), "model_mean must be an object of class nnetsauce.MTS" 42 assert isinstance( 43 model_sigma, MTS 44 ), "model_sigma must be an object of class nnetsauce.MTS" 45 assert isinstance( 46 model_residuals, MTS 47 ), "model_residuals must be an object of class nnetsauce.MTS" 48 assert ( 49 model_sigma.type_pi.startswith("scp") 50 and model_sigma.replications is not None 51 ), "for now, the models must be conformalized, i.e type_pi must start with 'scp' and replications must be an integer" 52 assert ( 53 model_residuals.type_pi.startswith("scp") 54 and model_residuals.replications is not None 55 ), "for now, the models must be conformalized, i.e type_pi must start with 'scp' and replications must be an integer" 56 57 self.model_mean = model_mean 58 self.model_sigma = model_sigma 59 self.model_residuals = model_residuals 60 61 self.mean_residuals_ = None 62 self.mean_residuals_wilcoxon_test_ = None 63 self.mean_residuals_kpss_test_ = None 64 self.standardized_residuals_ = None 65 66 def fit(self, y): 67 """Fit the MLARCH model to the time series data. 68 69 Parameters 70 ---------- 71 y : array-like of shape (n_samples,) 72 The target time series to be fitted. 73 74 Returns 75 ------- 76 self : object 77 Returns self. 78 79 Notes 80 ----- 81 This method: 82 83 1. Fits the mean model to the time series 84 2. Performs statistical tests on the residuals (Wilcoxon and KPSS) 85 3. Fits the volatility model to the squared residuals 86 4. Computes standardized residuals 87 5. Fits the residuals model to the standardized residuals 88 """ 89 n = len(y) 90 self.model_mean.fit(y.reshape(-1, 1)) 91 # Wilcoxon signed-rank test on residuals (mean = 0) 92 self.mean_residuals_wilcoxon_test_ = stats.wilcoxon( 93 self.model_mean.residuals_ 94 ) 95 # KPSS test for stationarity on residuals 96 self.mean_residuals_kpss_test_ = kpss( 97 self.model_mean.residuals_, regression="c" 98 ) 99 self.model_sigma.fit( 100 np.log(self.model_mean.residuals_.reshape(-1, 1) ** 2) 101 ) 102 # n//2 here because the model is conformalized 103 fitted_sigma = ( 104 self.model_sigma.residuals_ 105 + np.log(self.model_mean.residuals_**2)[(n // 2):, :] 106 ) 107 # standardized residuals 108 self.standardized_residuals_ = self.model_mean.residuals_[ 109 (n // 2):, : 110 ] / np.sqrt(np.exp(fitted_sigma)) 111 self.model_residuals.fit(self.standardized_residuals_.reshape(-1, 1)) 112 113 # Calculate AIC 114 # Get predictions from all models 115 mean_pred = self.model_mean.predict(h=0).values.ravel() 116 sigma_pred = self.model_sigma.predict(h=0).values.ravel() 117 z_pred = self.model_residuals.predict(h=0).values.ravel() 118 119 # Calculate combined predictions 120 combined_pred = mean_pred + z_pred * np.sqrt(np.exp(sigma_pred)) 121 122 # Calculate SSE using the last half of the data (matching standardized_residuals_) 123 y_actual = y[(n // 2):].ravel() 124 self.sse_ = np.sum((y_actual - combined_pred) ** 2) 125 126 # Calculate number of parameters (sum of parameters from all three models) 127 n_params = ( 128 self.model_mean.n_hidden_features 129 + 1 # mean model 130 + self.model_sigma.n_hidden_features 131 + 1 # sigma model 132 + self.model_residuals.n_hidden_features 133 + 1 134 ) # residuals model 135 136 # Calculate AIC 137 n_samples = len(y_actual) 138 self.aic_ = n_samples * np.log(self.sse_ / n_samples) + 2 * n_params 139 140 return self 141 142 def predict(self, h=5, level=95): 143 """Predict (probabilistic) future values of the time series. 144 145 Parameters 146 ---------- 147 h : int, default=5 148 The forecast horizon. 149 level : int, default=95 150 The confidence level for prediction intervals. 151 152 Returns 153 ------- 154 DescribeResult : namedtuple 155 A named tuple containing: 156 157 - mean : array-like of shape (h,) 158 The mean forecast. 159 - sims : array-like of shape (h, n_replications) 160 The simulated forecasts. 161 - lower : array-like of shape (h,) 162 The lower bound of the prediction interval. 163 - upper : array-like of shape (h,) 164 The upper bound of the prediction interval. 165 166 Notes 167 ----- 168 This method: 169 1. Generates mean forecasts using the mean model 170 2. Generates standardized residual forecasts using the residuals model 171 3. Generates volatility forecasts using the sigma model 172 4. Combines these forecasts to generate the final predictions 173 5. Computes prediction intervals at the specified confidence level 174 """ 175 DescribeResult = namedtuple( 176 "DescribeResult", ("mean", "sims", "lower", "upper") 177 ) 178 mean_forecast = self.model_mean.predict(h=h).values.ravel() 179 preds_z = self.model_residuals.predict(h=h) 180 preds_sigma = self.model_sigma.predict(h=h) 181 sims_z = preds_z.sims 182 sims_sigma = preds_sigma.sims 183 184 f = [] 185 for i in range(len(sims_z)): 186 f.append( 187 mean_forecast 188 + sims_z[i].values.ravel() 189 * np.sqrt(np.exp(sims_sigma[i].values.ravel())) 190 ) 191 192 f = np.asarray(f).T 193 mean_f = np.mean(f, axis=1) 194 alpha = 1 - level / 100 195 lower_bound = np.quantile(f, alpha / 2, axis=1) 196 upper_bound = np.quantile(f, 1 - alpha / 2, axis=1) 197 198 return DescribeResult(mean_f, f, lower_bound, upper_bound)
Machine Learning with ARCH effects for time series forecasting
Parameters:
model_mean: object of class nnetsauce.MTS
Model for mean prediction (default: None, uses obj)
model_sigma: object of class nnetsauce.MTS
Model for residuals volatility prediction (default: None, uses obj)
model_residuals: object of class nnetsauce.MTS
Model for residuals prediction (default: None, uses obj)
Examples:
See examples/mlarch.py
66 def fit(self, y): 67 """Fit the MLARCH model to the time series data. 68 69 Parameters 70 ---------- 71 y : array-like of shape (n_samples,) 72 The target time series to be fitted. 73 74 Returns 75 ------- 76 self : object 77 Returns self. 78 79 Notes 80 ----- 81 This method: 82 83 1. Fits the mean model to the time series 84 2. Performs statistical tests on the residuals (Wilcoxon and KPSS) 85 3. Fits the volatility model to the squared residuals 86 4. Computes standardized residuals 87 5. Fits the residuals model to the standardized residuals 88 """ 89 n = len(y) 90 self.model_mean.fit(y.reshape(-1, 1)) 91 # Wilcoxon signed-rank test on residuals (mean = 0) 92 self.mean_residuals_wilcoxon_test_ = stats.wilcoxon( 93 self.model_mean.residuals_ 94 ) 95 # KPSS test for stationarity on residuals 96 self.mean_residuals_kpss_test_ = kpss( 97 self.model_mean.residuals_, regression="c" 98 ) 99 self.model_sigma.fit( 100 np.log(self.model_mean.residuals_.reshape(-1, 1) ** 2) 101 ) 102 # n//2 here because the model is conformalized 103 fitted_sigma = ( 104 self.model_sigma.residuals_ 105 + np.log(self.model_mean.residuals_**2)[(n // 2):, :] 106 ) 107 # standardized residuals 108 self.standardized_residuals_ = self.model_mean.residuals_[ 109 (n // 2):, : 110 ] / np.sqrt(np.exp(fitted_sigma)) 111 self.model_residuals.fit(self.standardized_residuals_.reshape(-1, 1)) 112 113 # Calculate AIC 114 # Get predictions from all models 115 mean_pred = self.model_mean.predict(h=0).values.ravel() 116 sigma_pred = self.model_sigma.predict(h=0).values.ravel() 117 z_pred = self.model_residuals.predict(h=0).values.ravel() 118 119 # Calculate combined predictions 120 combined_pred = mean_pred + z_pred * np.sqrt(np.exp(sigma_pred)) 121 122 # Calculate SSE using the last half of the data (matching standardized_residuals_) 123 y_actual = y[(n // 2):].ravel() 124 self.sse_ = np.sum((y_actual - combined_pred) ** 2) 125 126 # Calculate number of parameters (sum of parameters from all three models) 127 n_params = ( 128 self.model_mean.n_hidden_features 129 + 1 # mean model 130 + self.model_sigma.n_hidden_features 131 + 1 # sigma model 132 + self.model_residuals.n_hidden_features 133 + 1 134 ) # residuals model 135 136 # Calculate AIC 137 n_samples = len(y_actual) 138 self.aic_ = n_samples * np.log(self.sse_ / n_samples) + 2 * n_params 139 140 return self
Fit the MLARCH model to the time series data.
Parameters
y : array-like of shape (n_samples,) The target time series to be fitted.
Returns
self : object Returns self.
Notes
This method:
- Fits the mean model to the time series
- Performs statistical tests on the residuals (Wilcoxon and KPSS)
- Fits the volatility model to the squared residuals
- Computes standardized residuals
- Fits the residuals model to the standardized residuals
142 def predict(self, h=5, level=95): 143 """Predict (probabilistic) future values of the time series. 144 145 Parameters 146 ---------- 147 h : int, default=5 148 The forecast horizon. 149 level : int, default=95 150 The confidence level for prediction intervals. 151 152 Returns 153 ------- 154 DescribeResult : namedtuple 155 A named tuple containing: 156 157 - mean : array-like of shape (h,) 158 The mean forecast. 159 - sims : array-like of shape (h, n_replications) 160 The simulated forecasts. 161 - lower : array-like of shape (h,) 162 The lower bound of the prediction interval. 163 - upper : array-like of shape (h,) 164 The upper bound of the prediction interval. 165 166 Notes 167 ----- 168 This method: 169 1. Generates mean forecasts using the mean model 170 2. Generates standardized residual forecasts using the residuals model 171 3. Generates volatility forecasts using the sigma model 172 4. Combines these forecasts to generate the final predictions 173 5. Computes prediction intervals at the specified confidence level 174 """ 175 DescribeResult = namedtuple( 176 "DescribeResult", ("mean", "sims", "lower", "upper") 177 ) 178 mean_forecast = self.model_mean.predict(h=h).values.ravel() 179 preds_z = self.model_residuals.predict(h=h) 180 preds_sigma = self.model_sigma.predict(h=h) 181 sims_z = preds_z.sims 182 sims_sigma = preds_sigma.sims 183 184 f = [] 185 for i in range(len(sims_z)): 186 f.append( 187 mean_forecast 188 + sims_z[i].values.ravel() 189 * np.sqrt(np.exp(sims_sigma[i].values.ravel())) 190 ) 191 192 f = np.asarray(f).T 193 mean_f = np.mean(f, axis=1) 194 alpha = 1 - level / 100 195 lower_bound = np.quantile(f, alpha / 2, axis=1) 196 upper_bound = np.quantile(f, 1 - alpha / 2, axis=1) 197 198 return DescribeResult(mean_f, f, lower_bound, upper_bound)
Predict (probabilistic) future values of the time series.
Parameters
h : int, default=5 The forecast horizon. level : int, default=95 The confidence level for prediction intervals.
Returns
DescribeResult : namedtuple A named tuple containing:
- mean : array-like of shape (h,)
The mean forecast.
- sims : array-like of shape (h, n_replications)
The simulated forecasts.
- lower : array-like of shape (h,)
The lower bound of the prediction interval.
- upper : array-like of shape (h,)
The upper bound of the prediction interval.
Notes
This method:
- Generates mean forecasts using the mean model
- Generates standardized residual forecasts using the residuals model
- Generates volatility forecasts using the sigma model
- Combines these forecasts to generate the final predictions
- Computes prediction intervals at the specified confidence level
6class MedianVotingRegressor(VotingRegressor): 7 def predict(self, X): 8 """ 9 Predict using the median of the base regressors' predictions. 10 11 Parameters: 12 X (array-like): Feature matrix for predictions. 13 14 Returns: 15 y_pred (array): Median of predictions from the base regressors. 16 """ 17 predictions = np.asarray( 18 [regressor.predict(X) for regressor in self.estimators_] 19 ) 20 return np.median(predictions, axis=0)
Prediction voting regressor for unfitted estimators.
A voting regressor is an ensemble meta-estimator that fits several base regressors, each on the whole dataset. Then it averages the individual predictions to form a final prediction.
Read more in the :ref:User Guide <voting_regressor>
.
New in version 0.21.
Parameters
estimators : list of (str, estimator) tuples
Invoking the fit
method on the VotingRegressor
will fit clones
of those original estimators that will be stored in the class attribute
self.estimators_
. An estimator can be set to 'drop'
using
set_params()
.
*Changed in version 0.21:*
``'drop'`` is accepted. Using None was deprecated in 0.22 and
support was removed in 0.24.
weights : array-like of shape (n_regressors,), default=None
Sequence of weights (float
or int
) to weight the occurrences of
predicted values before averaging. Uses uniform weights if None
.
n_jobs : int, default=None
The number of jobs to run in parallel for fit
.
None
means 1 unless in a joblib.parallel_backend
context.
-1
means using all processors. See :term:Glossary <n_jobs>
for more details.
verbose : bool, default=False If True, the time elapsed while fitting will be printed as it is completed.
*New in version 0.23.*
Attributes
estimators_ : list of regressors
The collection of fitted sub-estimators as defined in estimators
that are not 'drop'.
named_estimators_ : ~sklearn.utils.Bunch
Attribute to access any fitted sub-estimators by name.
*New in version 0.20.*
n_features_in_ : int
Number of features seen during :term:fit
. Only defined if the
underlying regressor exposes such an attribute when fit.
*New in version 0.24.*
feature_names_in_ : ndarray of shape (n_features_in_
,)
Names of features seen during :term:fit
. Only defined if the
underlying estimators expose such an attribute when fit.
*New in version 1.0.*
See Also
VotingClassifier : Soft Voting/Majority Rule classifier.
Examples
>>> import numpy as np
>>> from sklearn.linear_model import LinearRegression
>>> from sklearn.ensemble import RandomForestRegressor
>>> from sklearn.ensemble import VotingRegressor
>>> from sklearn.neighbors import KNeighborsRegressor
>>> r1 = LinearRegression()
>>> r2 = RandomForestRegressor(n_estimators=10, random_state=1)
>>> r3 = KNeighborsRegressor()
>>> X = np.array([[1, 1], [2, 4], [3, 9], [4, 16], [5, 25], [6, 36]])
>>> y = np.array([2, 6, 12, 20, 30, 42])
>>> er = VotingRegressor([('lr', r1), ('rf', r2), ('r3', r3)])
>>> print(er.fit(X, y).predict(X))
[ 6.8... 8.4... 12.5... 17.8... 26... 34...]
In the following example, we drop the 'lr'
estimator with
~VotingRegressor.set_params()
and fit the remaining two estimators:
>>> er = er.set_params(lr='drop')
>>> er = er.fit(X, y)
>>> len(er.estimators_)
2
7 def predict(self, X): 8 """ 9 Predict using the median of the base regressors' predictions. 10 11 Parameters: 12 X (array-like): Feature matrix for predictions. 13 14 Returns: 15 y_pred (array): Median of predictions from the base regressors. 16 """ 17 predictions = np.asarray( 18 [regressor.predict(X) for regressor in self.estimators_] 19 ) 20 return np.median(predictions, axis=0)
Predict using the median of the base regressors' predictions.
Parameters: X (array-like): Feature matrix for predictions.
Returns: y_pred (array): Median of predictions from the base regressors.
30class MTS(Base): 31 """Univariate and multivariate time series (MTS) forecasting with Quasi-Randomized networks 32 33 Parameters: 34 35 obj: object. 36 any object containing a method fit (obj.fit()) and a method predict 37 (obj.predict()). 38 39 n_hidden_features: int. 40 number of nodes in the hidden layer. 41 42 activation_name: str. 43 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'. 44 45 a: float. 46 hyperparameter for 'prelu' or 'elu' activation function. 47 48 nodes_sim: str. 49 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 50 'uniform'. 51 52 bias: boolean. 53 indicates if the hidden layer contains a bias term (True) or not 54 (False). 55 56 dropout: float. 57 regularization parameter; (random) percentage of nodes dropped out 58 of the training. 59 60 direct_link: boolean. 61 indicates if the original predictors are included (True) in model's fitting or not (False). 62 63 n_clusters: int. 64 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering). 65 66 cluster_encode: bool. 67 defines how the variable containing clusters is treated (default is one-hot) 68 if `False`, then labels are used, without one-hot encoding. 69 70 type_clust: str. 71 type of clustering method: currently k-means ('kmeans') or Gaussian 72 Mixture Model ('gmm'). 73 74 type_scaling: a tuple of 3 strings. 75 scaling methods for inputs, hidden layer, and clustering respectively 76 (and when relevant). 77 Currently available: standardization ('std') or MinMax scaling ('minmax'). 78 79 lags: int. 80 number of lags used for each time series. 81 If string, lags must be one of 'AIC', 'AICc', or 'BIC'. 82 83 type_pi: str. 84 type of prediction interval; currently: 85 - "gaussian": simple, fast, but: assumes stationarity of Gaussian in-sample residuals and independence in the multivariate case 86 - "quantile": use model-agnostic quantile regression under the hood 87 - "kde": based on Kernel Density Estimation of in-sample residuals 88 - "bootstrap": based on independent bootstrap of in-sample residuals 89 - "block-bootstrap": based on basic block bootstrap of in-sample residuals 90 - "scp-kde": Sequential split conformal prediction with Kernel Density Estimation of calibrated residuals 91 - "scp-bootstrap": Sequential split conformal prediction with independent bootstrap of calibrated residuals 92 - "scp-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of calibrated residuals 93 - "scp2-kde": Sequential split conformal prediction with Kernel Density Estimation of standardized calibrated residuals 94 - "scp2-bootstrap": Sequential split conformal prediction with independent bootstrap of standardized calibrated residuals 95 - "scp2-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of standardized calibrated residuals 96 - based on copulas of in-sample residuals: 'vine-tll', 'vine-bb1', 'vine-bb6', 'vine-bb7', 'vine-bb8', 'vine-clayton', 97 'vine-frank', 'vine-gaussian', 'vine-gumbel', 'vine-indep', 'vine-joe', 'vine-student' 98 - 'scp-vine-tll', 'scp-vine-bb1', 'scp-vine-bb6', 'scp-vine-bb7', 'scp-vine-bb8', 'scp-vine-clayton', 99 'scp-vine-frank', 'scp-vine-gaussian', 'scp-vine-gumbel', 'scp-vine-indep', 'scp-vine-joe', 'scp-vine-student' 100 - 'scp2-vine-tll', 'scp2-vine-bb1', 'scp2-vine-bb6', 'scp2-vine-bb7', 'scp2-vine-bb8', 'scp2-vine-clayton', 101 'scp2-vine-frank', 'scp2-vine-gaussian', 'scp2-vine-gumbel', 'scp2-vine-indep', 'scp2-vine-joe', 'scp2-vine-student' 102 103 level: int. 104 level of confidence for `type_pi == 'quantile'` (default is `95`) 105 106 block_size: int. 107 size of block for 'type_pi' in ("block-bootstrap", "scp-block-bootstrap", "scp2-block-bootstrap"). 108 Default is round(3.15*(n_residuals^1/3)) 109 110 replications: int. 111 number of replications (if needed, for predictive simulation). Default is 'None'. 112 113 kernel: str. 114 the kernel to use for residuals density estimation (used for predictive simulation). Currently, either 'gaussian' or 'tophat'. 115 116 agg: str. 117 either "mean" or "median" for simulation of bootstrap aggregating 118 119 seed: int. 120 reproducibility seed for nodes_sim=='uniform' or predictive simulation. 121 122 backend: str. 123 "cpu" or "gpu" or "tpu". 124 125 verbose: int. 126 0: not printing; 1: printing 127 128 show_progress: bool. 129 True: progress bar when fitting each series; False: no progress bar when fitting each series 130 131 Attributes: 132 133 fit_objs_: dict 134 objects adjusted to each individual time series 135 136 y_: {array-like} 137 MTS responses (most recent observations first) 138 139 X_: {array-like} 140 MTS lags 141 142 xreg_: {array-like} 143 external regressors 144 145 y_means_: dict 146 a dictionary of each series mean values 147 148 preds_: {array-like} 149 successive model predictions 150 151 preds_std_: {array-like} 152 standard deviation around the predictions for Bayesian base learners (`obj`) 153 154 gaussian_preds_std_: {array-like} 155 standard deviation around the predictions for `type_pi='gaussian'` 156 157 return_std_: boolean 158 return uncertainty or not (set in predict) 159 160 df_: data frame 161 the input data frame, in case a data.frame is provided to `fit` 162 163 n_obs_: int 164 number of time series observations (number of rows for multivariate) 165 166 level_: int 167 level of confidence for prediction intervals (default is 95) 168 169 residuals_: {array-like} 170 in-sample residuals (for `type_pi` not conformal prediction) or calibrated residuals 171 (for `type_pi` in conformal prediction) 172 173 residuals_sims_: tuple of {array-like} 174 simulations of in-sample residuals (for `type_pi` not conformal prediction) or 175 calibrated residuals (for `type_pi` in conformal prediction) 176 177 kde_: A scikit-learn object, see https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KernelDensity.html 178 179 residuals_std_dev_: residuals standard deviation 180 181 Examples: 182 183 Example 1: 184 185 ```python 186 import nnetsauce as ns 187 import numpy as np 188 from sklearn import linear_model 189 np.random.seed(123) 190 191 M = np.random.rand(10, 3) 192 M[:,0] = 10*M[:,0] 193 M[:,2] = 25*M[:,2] 194 print(M) 195 196 # Adjust Bayesian Ridge 197 regr4 = linear_model.BayesianRidge() 198 obj_MTS = ns.MTS(regr4, lags = 1, n_hidden_features=5) 199 obj_MTS.fit(M) 200 print(obj_MTS.predict()) 201 202 # with credible intervals 203 print(obj_MTS.predict(return_std=True, level=80)) 204 205 print(obj_MTS.predict(return_std=True, level=95)) 206 ``` 207 208 Example 2: 209 210 ```python 211 import nnetsauce as ns 212 import numpy as np 213 from sklearn import linear_model 214 215 dataset = { 216 'date' : ['2001-01-01', '2002-01-01', '2003-01-01', '2004-01-01', '2005-01-01'], 217 'series1' : [34, 30, 35.6, 33.3, 38.1], 218 'series2' : [4, 5.5, 5.6, 6.3, 5.1], 219 'series3' : [100, 100.5, 100.6, 100.2, 100.1]} 220 df = pd.DataFrame(dataset).set_index('date') 221 print(df) 222 223 # Adjust Bayesian Ridge 224 regr5 = linear_model.BayesianRidge() 225 obj_MTS = ns.MTS(regr5, lags = 1, n_hidden_features=5) 226 obj_MTS.fit(df) 227 print(obj_MTS.predict()) 228 229 # with credible intervals 230 print(obj_MTS.predict(return_std=True, level=80)) 231 232 print(obj_MTS.predict(return_std=True, level=95)) 233 ``` 234 """ 235 236 # construct the object ----- 237 238 def __init__( 239 self, 240 obj, 241 n_hidden_features=5, 242 activation_name="relu", 243 a=0.01, 244 nodes_sim="sobol", 245 bias=True, 246 dropout=0, 247 direct_link=True, 248 n_clusters=2, 249 cluster_encode=True, 250 type_clust="kmeans", 251 type_scaling=("std", "std", "std"), 252 lags=1, 253 type_pi="kde", 254 level=95, 255 block_size=None, 256 replications=None, 257 kernel="gaussian", 258 agg="mean", 259 seed=123, 260 backend="cpu", 261 verbose=0, 262 show_progress=True, 263 ): 264 265 super().__init__( 266 n_hidden_features=n_hidden_features, 267 activation_name=activation_name, 268 a=a, 269 nodes_sim=nodes_sim, 270 bias=bias, 271 dropout=dropout, 272 direct_link=direct_link, 273 n_clusters=n_clusters, 274 cluster_encode=cluster_encode, 275 type_clust=type_clust, 276 type_scaling=type_scaling, 277 seed=seed, 278 backend=backend, 279 ) 280 281 # Add validation for lags parameter 282 if isinstance(lags, str): 283 assert lags in ( 284 "AIC", 285 "AICc", 286 "BIC", 287 ), "if string, lags must be one of 'AIC', 'AICc', or 'BIC'" 288 else: 289 assert ( 290 int(lags) == lags 291 ), "if numeric, lags parameter should be an integer" 292 293 self.obj = obj 294 self.n_series = None 295 self.lags = lags 296 self.type_pi = type_pi 297 self.level = level 298 if self.type_pi == "quantile": 299 self.obj = QuantileRegressor( 300 self.obj, level=self.level, scoring="conformal" 301 ) 302 self.block_size = block_size 303 self.replications = replications 304 self.kernel = kernel 305 self.agg = agg 306 self.verbose = verbose 307 self.show_progress = show_progress 308 self.series_names = None 309 self.input_dates = None 310 self.quantiles = None 311 self.fit_objs_ = {} 312 self.y_ = None # MTS responses (most recent observations first) 313 self.X_ = None # MTS lags 314 self.xreg_ = None 315 self.y_means_ = {} 316 self.mean_ = None 317 self.median_ = None 318 self.upper_ = None 319 self.lower_ = None 320 self.output_dates_ = None 321 self.preds_std_ = [] 322 self.gaussian_preds_std_ = None 323 self.alpha_ = None 324 self.return_std_ = None 325 self.df_ = None 326 self.residuals_ = [] 327 self.abs_calib_residuals_ = None 328 self.calib_residuals_quantile_ = None 329 self.residuals_sims_ = None 330 self.kde_ = None 331 self.sims_ = None 332 self.residuals_std_dev_ = None 333 self.n_obs_ = None 334 self.level_ = None 335 self.init_n_series_ = None 336 337 def fit(self, X, xreg=None, **kwargs): 338 """Fit MTS model to training data X, with optional regressors xreg 339 340 Parameters: 341 342 X: {array-like}, shape = [n_samples, n_features] 343 Training time series, where n_samples is the number 344 of samples and n_features is the number of features; 345 X must be in increasing order (most recent observations last) 346 347 xreg: {array-like}, shape = [n_samples, n_features_xreg] 348 Additional (external) regressors to be passed to self.obj 349 xreg must be in 'increasing' order (most recent observations last) 350 351 **kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity) 352 353 Returns: 354 355 self: object 356 """ 357 try: 358 self.init_n_series_ = X.shape[1] 359 except IndexError as e: 360 self.init_n_series_ = 1 361 362 # Automatic lag selection if requested 363 if isinstance(self.lags, str): 364 max_lags = min(25, X.shape[0] // 4) 365 best_ic = float("inf") 366 best_lags = 1 367 368 if self.verbose: 369 print( 370 f"\nSelecting optimal number of lags using {self.lags}..." 371 ) 372 iterator = tqdm(range(1, max_lags + 1)) 373 else: 374 iterator = range(1, max_lags + 1) 375 376 for lag in iterator: 377 # Convert DataFrame to numpy array before reversing 378 if isinstance(X, pd.DataFrame): 379 X_values = X.values[::-1] 380 else: 381 X_values = X[::-1] 382 383 # Try current lag value 384 if self.init_n_series_ > 1: 385 mts_input = ts.create_train_inputs(X_values, lag) 386 else: 387 mts_input = ts.create_train_inputs( 388 X_values.reshape(-1, 1), lag 389 ) 390 391 # Cook training set and fit model 392 dummy_y, scaled_Z = self.cook_training_set( 393 y=np.ones(mts_input[0].shape[0]), X=mts_input[1] 394 ) 395 residuals_ = [] 396 397 for i in range(self.init_n_series_): 398 y_mean = np.mean(mts_input[0][:, i]) 399 centered_y_i = mts_input[0][:, i] - y_mean 400 self.obj.fit(X=scaled_Z, y=centered_y_i) 401 residuals_.append( 402 (centered_y_i - self.obj.predict(scaled_Z)).tolist() 403 ) 404 405 self.residuals_ = np.asarray(residuals_).T 406 ic = self._compute_information_criterion( 407 curr_lags=lag, criterion=self.lags 408 ) 409 410 if self.verbose: 411 print(f"Trying lags={lag}, {self.lags}={ic:.2f}") 412 413 if ic < best_ic: 414 best_ic = ic 415 best_lags = lag 416 417 if self.verbose: 418 print( 419 f"\nSelected {best_lags} lags with {self.lags}={best_ic:.2f}" 420 ) 421 422 self.lags = best_lags 423 424 self.input_dates = None 425 self.df_ = None 426 427 if isinstance(X, pd.DataFrame) is False: 428 # input data set is a numpy array 429 if xreg is None: 430 X = pd.DataFrame(X) 431 self.series_names = [ 432 "series" + str(i) for i in range(X.shape[1]) 433 ] 434 else: 435 # xreg is not None 436 X = mo.cbind(X, xreg) 437 self.xreg_ = xreg 438 439 else: # input data set is a DataFrame with column names 440 441 X_index = None 442 if X.index is not None: 443 X_index = X.index 444 if xreg is None: 445 X = copy.deepcopy(mo.convert_df_to_numeric(X)) 446 else: 447 X = copy.deepcopy(mo.cbind(mo.convert_df_to_numeric(X), xreg)) 448 self.xreg_ = xreg 449 if X_index is not None: 450 X.index = X_index 451 self.series_names = X.columns.tolist() 452 453 if isinstance(X, pd.DataFrame): 454 if self.df_ is None: 455 self.df_ = X 456 X = X.values 457 else: 458 input_dates_prev = pd.DatetimeIndex(self.df_.index.values) 459 frequency = pd.infer_freq(input_dates_prev) 460 self.df_ = pd.concat([self.df_, X], axis=0) 461 self.input_dates = pd.date_range( 462 start=input_dates_prev[0], 463 periods=len(input_dates_prev) + X.shape[0], 464 freq=frequency, 465 ).values.tolist() 466 self.df_.index = self.input_dates 467 X = self.df_.values 468 self.df_.columns = self.series_names 469 else: 470 if self.df_ is None: 471 self.df_ = pd.DataFrame(X, columns=self.series_names) 472 else: 473 self.df_ = pd.concat( 474 [self.df_, pd.DataFrame(X, columns=self.series_names)], 475 axis=0, 476 ) 477 478 self.input_dates = ts.compute_input_dates(self.df_) 479 480 try: 481 # multivariate time series 482 n, p = X.shape 483 except: 484 # univariate time series 485 n = X.shape[0] 486 p = 1 487 self.n_obs_ = n 488 489 rep_1_n = np.repeat(1, n) 490 491 self.y_ = None 492 self.X_ = None 493 self.n_series = p 494 self.fit_objs_.clear() 495 self.y_means_.clear() 496 residuals_ = [] 497 self.residuals_ = None 498 self.residuals_sims_ = None 499 self.kde_ = None 500 self.sims_ = None 501 self.scaled_Z_ = None 502 self.centered_y_is_ = [] 503 504 if self.init_n_series_ > 1: 505 # multivariate time series 506 mts_input = ts.create_train_inputs(X[::-1], self.lags) 507 else: 508 # univariate time series 509 mts_input = ts.create_train_inputs( 510 X.reshape(-1, 1)[::-1], self.lags 511 ) 512 513 self.y_ = mts_input[0] 514 515 self.X_ = mts_input[1] 516 517 dummy_y, scaled_Z = self.cook_training_set(y=rep_1_n, X=self.X_) 518 519 self.scaled_Z_ = scaled_Z 520 521 # loop on all the time series and adjust self.obj.fit 522 if self.verbose > 0: 523 print( 524 f"\n Adjusting {type(self.obj).__name__} to multivariate time series... \n" 525 ) 526 527 if self.show_progress is True: 528 iterator = tqdm(range(self.init_n_series_)) 529 else: 530 iterator = range(self.init_n_series_) 531 532 if self.type_pi in ( 533 "gaussian", 534 "kde", 535 "bootstrap", 536 "block-bootstrap", 537 ) or self.type_pi.startswith("vine"): 538 for i in iterator: 539 y_mean = np.mean(self.y_[:, i]) 540 self.y_means_[i] = y_mean 541 centered_y_i = self.y_[:, i] - y_mean 542 self.centered_y_is_.append(centered_y_i) 543 self.obj.fit(X=scaled_Z, y=centered_y_i) 544 self.fit_objs_[i] = deepcopy(self.obj) 545 residuals_.append( 546 ( 547 centered_y_i - self.fit_objs_[i].predict(scaled_Z) 548 ).tolist() 549 ) 550 551 if self.type_pi == "quantile": 552 for i in iterator: 553 y_mean = np.mean(self.y_[:, i]) 554 self.y_means_[i] = y_mean 555 centered_y_i = self.y_[:, i] - y_mean 556 self.centered_y_is_.append(centered_y_i) 557 self.obj.fit(X=scaled_Z, y=centered_y_i) 558 self.fit_objs_[i] = deepcopy(self.obj) 559 560 if self.type_pi.startswith("scp"): 561 # split conformal prediction 562 for i in iterator: 563 n_y = self.y_.shape[0] 564 n_y_half = n_y // 2 565 first_half_idx = range(0, n_y_half) 566 second_half_idx = range(n_y_half, n_y) 567 y_mean_temp = np.mean(self.y_[first_half_idx, i]) 568 centered_y_i_temp = self.y_[first_half_idx, i] - y_mean_temp 569 self.obj.fit(X=scaled_Z[first_half_idx, :], y=centered_y_i_temp) 570 # calibrated residuals actually 571 residuals_.append( 572 ( 573 self.y_[second_half_idx, i] 574 - ( 575 y_mean_temp 576 + self.obj.predict(scaled_Z[second_half_idx, :]) 577 ) 578 ).tolist() 579 ) 580 # fit on the second half 581 y_mean = np.mean(self.y_[second_half_idx, i]) 582 self.y_means_[i] = y_mean 583 centered_y_i = self.y_[second_half_idx, i] - y_mean 584 self.obj.fit(X=scaled_Z[second_half_idx, :], y=centered_y_i) 585 self.fit_objs_[i] = deepcopy(self.obj) 586 587 self.residuals_ = np.asarray(residuals_).T 588 589 if self.type_pi == "gaussian": 590 self.gaussian_preds_std_ = np.std(self.residuals_, axis=0) 591 592 if self.type_pi.startswith("scp2"): 593 # Calculate mean and standard deviation for each column 594 data_mean = np.mean(self.residuals_, axis=0) 595 self.residuals_std_dev_ = np.std(self.residuals_, axis=0) 596 # Center and scale the array using broadcasting 597 self.residuals_ = ( 598 self.residuals_ - data_mean[np.newaxis, :] 599 ) / self.residuals_std_dev_[np.newaxis, :] 600 601 if self.replications != None and "kde" in self.type_pi: 602 if self.verbose > 0: 603 print(f"\n Simulate residuals using {self.kernel} kernel... \n") 604 assert self.kernel in ( 605 "gaussian", 606 "tophat", 607 ), "currently, 'kernel' must be either 'gaussian' or 'tophat'" 608 kernel_bandwidths = {"bandwidth": np.logspace(-6, 6, 150)} 609 grid = GridSearchCV( 610 KernelDensity(kernel=self.kernel, **kwargs), 611 param_grid=kernel_bandwidths, 612 ) 613 grid.fit(self.residuals_) 614 615 if self.verbose > 0: 616 print( 617 f"\n Best parameters for {self.kernel} kernel: {grid.best_params_} \n" 618 ) 619 620 self.kde_ = grid.best_estimator_ 621 622 return self 623 624 def partial_fit(self, X, xreg=None, **kwargs): 625 """Update the model with new observations X, with optional regressors xreg 626 627 Parameters: 628 629 X: {array-like}, shape = [n_samples, n_features] 630 Training time series, where n_samples is the number 631 of samples and n_features is the number of features; 632 X must be in increasing order (most recent observations last) 633 634 xreg: {array-like}, shape = [n_samples, n_features_xreg] 635 Additional (external) regressors to be passed to self.obj 636 xreg must be in 'increasing' order (most recent observations last) 637 638 **kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity) 639 640 Returns: 641 642 self: object 643 """ 644 645 assert self.df_ is not None, "fit() must be called before partial_fit()" 646 647 if (isinstance(X, pd.DataFrame) is False) and isinstance( 648 X, pd.Series 649 ) is False: 650 if len(X.shape) == 1: 651 X = X.reshape(1, -1) 652 653 return self.fit(X, xreg, **kwargs) 654 655 else: 656 if len(X.shape) == 1: 657 X = pd.DataFrame( 658 X.values.reshape(1, -1), columns=self.df_.columns 659 ) 660 661 return self.fit(X, xreg, **kwargs) 662 663 def _predict_quantiles(self, h, quantiles, **kwargs): 664 """Predict arbitrary quantiles from simulated paths.""" 665 # Ensure output dates are set 666 self.output_dates_, _ = ts.compute_output_dates(self.df_, h) 667 668 # Trigger full prediction to generate self.sims_ 669 if not hasattr(self, "sims_") or self.sims_ is None: 670 _ = self.predict(h=h, level=95, **kwargs) # Any level triggers sim 671 672 result_dict = {} 673 674 # Stack simulations: (R, h, n_series) 675 sims_array = np.stack([sim.values for sim in self.sims_], axis=0) 676 677 # Compute quantiles over replication axis 678 q_values = np.quantile( 679 sims_array, quantiles, axis=0 680 ) # (n_q, h, n_series) 681 682 for i, q in enumerate(quantiles): 683 # Clean label: 0.05 → "05", 0.1 → "10", 0.95 → "95" 684 q_label = ( 685 f"{int(q * 100):02d}" 686 if (q * 100).is_integer() 687 else f"{q:.3f}".replace(".", "_") 688 ) 689 for series_id in range(self.init_n_series_): 690 series_name = self.series_names[series_id] 691 col_name = f"quantile_{q_label}_{series_name}" 692 result_dict[col_name] = q_values[i, :, series_id] 693 694 df_return_quantiles = pd.DataFrame( 695 result_dict, index=self.output_dates_ 696 ) 697 698 return df_return_quantiles 699 700 def predict(self, h=5, level=95, quantiles=None, **kwargs): 701 """Forecast all the time series, h steps ahead""" 702 703 if quantiles is not None: 704 # Validate 705 quantiles = np.asarray(quantiles) 706 if not ((quantiles > 0) & (quantiles < 1)).all(): 707 raise ValueError("quantiles must be between 0 and 1.") 708 # Delegate to dedicated method 709 return self._predict_quantiles(h=h, quantiles=quantiles, **kwargs) 710 711 if isinstance(level, list) or isinstance(level, np.ndarray): 712 # Store results 713 result_dict = {} 714 # Loop through alphas and calculate lower/upper for each alpha level 715 # E.g [0.5, 2.5, 5, 16.5, 25, 50] 716 for lev in level: 717 # Get the forecast for this alpha 718 res = self.predict(h=h, level=lev, **kwargs) 719 # Adjust index and collect lower/upper bounds 720 res.lower.index = pd.to_datetime(res.lower.index) 721 res.upper.index = pd.to_datetime(res.upper.index) 722 # Loop over each time series (multivariate) and flatten results 723 if isinstance(res.lower, pd.DataFrame): 724 for ( 725 series 726 ) in ( 727 res.lower.columns 728 ): # Assumes 'lower' and 'upper' have multiple series 729 result_dict[f"lower_{lev}_{series}"] = ( 730 res.lower[series].to_numpy().flatten() 731 ) 732 result_dict[f"upper_{lev}_{series}"] = ( 733 res.upper[series].to_numpy().flatten() 734 ) 735 else: 736 for series_id in range( 737 self.n_series 738 ): # Assumes 'lower' and 'upper' have multiple series 739 result_dict[f"lower_{lev}_{series_id}"] = ( 740 res.lower[series_id, :].to_numpy().flatten() 741 ) 742 result_dict[f"upper_{lev}_{series_id}"] = ( 743 res.upper[series_id, :].to_numpy().flatten() 744 ) 745 return pd.DataFrame(result_dict, index=self.output_dates_) 746 747 # only one prediction interval 748 self.output_dates_, frequency = ts.compute_output_dates(self.df_, h) 749 750 self.level_ = level 751 752 self.return_std_ = False # do not remove (/!\) 753 754 self.mean_ = None # do not remove (/!\) 755 756 self.mean_ = deepcopy(self.y_) # do not remove (/!\) 757 758 self.lower_ = None # do not remove (/!\) 759 760 self.upper_ = None # do not remove (/!\) 761 762 self.sims_ = None # do not remove (/!\) 763 764 y_means_ = np.asarray( 765 [self.y_means_[i] for i in range(self.init_n_series_)] 766 ) 767 768 n_features = self.init_n_series_ * self.lags 769 770 self.alpha_ = 100 - level 771 772 pi_multiplier = norm.ppf(1 - self.alpha_ / 200) 773 774 if "return_std" in kwargs: # bayesian forecasting 775 self.return_std_ = True 776 self.preds_std_ = [] 777 DescribeResult = namedtuple( 778 "DescribeResult", ("mean", "lower", "upper") 779 ) # to be updated 780 781 if "return_pi" in kwargs: # split conformal, without simulation 782 mean_pi_ = [] 783 lower_pi_ = [] 784 upper_pi_ = [] 785 median_pi_ = [] 786 DescribeResult = namedtuple( 787 "DescribeResult", ("mean", "lower", "upper") 788 ) # to be updated 789 790 if self.kde_ != None and "kde" in self.type_pi: # kde 791 target_cols = self.df_.columns[ 792 : self.init_n_series_ 793 ] # Get target column names 794 if self.verbose == 1: 795 self.residuals_sims_ = tuple( 796 self.kde_.sample( 797 n_samples=h, random_state=self.seed + 100 * i 798 ) # Keep full sample 799 for i in tqdm(range(self.replications)) 800 ) 801 elif self.verbose == 0: 802 self.residuals_sims_ = tuple( 803 self.kde_.sample( 804 n_samples=h, random_state=self.seed + 100 * i 805 ) # Keep full sample 806 for i in range(self.replications) 807 ) 808 809 # Convert to DataFrames after sampling 810 self.residuals_sims_ = tuple( 811 pd.DataFrame( 812 sim, # Keep all columns 813 columns=target_cols, # Use original target column names 814 index=self.output_dates_, 815 ) 816 for sim in self.residuals_sims_ 817 ) 818 819 if self.type_pi in ("bootstrap", "scp-bootstrap", "scp2-bootstrap"): 820 assert self.replications is not None and isinstance( 821 self.replications, int 822 ), "'replications' must be provided and be an integer" 823 if self.verbose == 1: 824 self.residuals_sims_ = tuple( 825 ts.bootstrap( 826 self.residuals_, 827 h=h, 828 block_size=None, 829 seed=self.seed + 100 * i, 830 ) 831 for i in tqdm(range(self.replications)) 832 ) 833 elif self.verbose == 0: 834 self.residuals_sims_ = tuple( 835 ts.bootstrap( 836 self.residuals_, 837 h=h, 838 block_size=None, 839 seed=self.seed + 100 * i, 840 ) 841 for i in range(self.replications) 842 ) 843 844 if self.type_pi in ( 845 "block-bootstrap", 846 "scp-block-bootstrap", 847 "scp2-block-bootstrap", 848 ): 849 if self.block_size is None: 850 self.block_size = int( 851 np.ceil(3.15 * (self.residuals_.shape[0] ** (1 / 3))) 852 ) 853 854 assert self.replications is not None and isinstance( 855 self.replications, int 856 ), "'replications' must be provided and be an integer" 857 if self.verbose == 1: 858 self.residuals_sims_ = tuple( 859 ts.bootstrap( 860 self.residuals_, 861 h=h, 862 block_size=self.block_size, 863 seed=self.seed + 100 * i, 864 ) 865 for i in tqdm(range(self.replications)) 866 ) 867 elif self.verbose == 0: 868 self.residuals_sims_ = tuple( 869 ts.bootstrap( 870 self.residuals_, 871 h=h, 872 block_size=self.block_size, 873 seed=self.seed + 100 * i, 874 ) 875 for i in range(self.replications) 876 ) 877 878 if "vine" in self.type_pi: 879 if self.verbose == 1: 880 self.residuals_sims_ = tuple( 881 vinecopula_sample( 882 x=self.residuals_, 883 n_samples=h, 884 method=self.type_pi, 885 random_state=self.seed + 100 * i, 886 ) 887 for i in tqdm(range(self.replications)) 888 ) 889 elif self.verbose == 0: 890 self.residuals_sims_ = tuple( 891 vinecopula_sample( 892 x=self.residuals_, 893 n_samples=h, 894 method=self.type_pi, 895 random_state=self.seed + 100 * i, 896 ) 897 for i in range(self.replications) 898 ) 899 900 mean_ = deepcopy(self.mean_) 901 902 for i in range(h): 903 904 new_obs = ts.reformat_response(mean_, self.lags) 905 new_X = new_obs.reshape(1, -1) 906 cooked_new_X = self.cook_test_set(new_X, **kwargs) 907 908 if "return_std" in kwargs: 909 self.preds_std_.append( 910 [ 911 np.asarray( 912 self.fit_objs_[i].predict( 913 cooked_new_X, return_std=True 914 )[1] 915 ).item() 916 for i in range(self.n_series) 917 ] 918 ) 919 920 if "return_pi" in kwargs: 921 for i in range(self.n_series): 922 preds_pi = self.fit_objs_[i].predict(cooked_new_X, **kwargs) 923 mean_pi_.append(preds_pi.mean[0]) 924 lower_pi_.append(preds_pi.lower[0]) 925 upper_pi_.append(preds_pi.upper[0]) 926 927 if self.type_pi != "quantile": 928 predicted_cooked_new_X = np.asarray( 929 [ 930 np.asarray( 931 self.fit_objs_[i].predict(cooked_new_X) 932 ).item() 933 for i in range(self.init_n_series_) 934 ] 935 ) 936 else: 937 predicted_cooked_new_X = np.asarray( 938 [ 939 np.asarray( 940 self.fit_objs_[i] 941 .predict(cooked_new_X, return_pi=True) 942 .upper 943 ).item() 944 for i in range(self.init_n_series_) 945 ] 946 ) 947 948 preds = np.asarray(y_means_ + predicted_cooked_new_X) 949 950 # Create full row with both predictions and external regressors 951 if self.xreg_ is not None and "xreg" in kwargs: 952 next_xreg = kwargs["xreg"].iloc[i: i + 1].values.flatten() 953 full_row = np.concatenate([preds, next_xreg]) 954 else: 955 full_row = preds 956 957 # Create a new row with same number of columns as mean_ 958 new_row = np.zeros((1, mean_.shape[1])) 959 new_row[0, : full_row.shape[0]] = full_row 960 961 # Maintain the full dimensionality by using vstack instead of rbind 962 mean_ = np.vstack([new_row, mean_[:-1]]) 963 964 # Final output should only include the target columns 965 self.mean_ = pd.DataFrame( 966 mean_[0:h, : self.init_n_series_][::-1], 967 columns=self.df_.columns[: self.init_n_series_], 968 index=self.output_dates_, 969 ) 970 971 # function's return ---------------------------------------------------------------------- 972 if ( 973 (("return_std" not in kwargs) and ("return_pi" not in kwargs)) 974 and (self.type_pi not in ("gaussian", "scp")) 975 ) or ("vine" in self.type_pi): 976 977 if self.replications is None: 978 return self.mean_.iloc[:, : self.init_n_series_] 979 980 # if "return_std" not in kwargs and self.replications is not None 981 meanf = [] 982 medianf = [] 983 lower = [] 984 upper = [] 985 986 if "scp2" in self.type_pi: 987 988 if self.verbose == 1: 989 self.sims_ = tuple( 990 ( 991 self.mean_ 992 + self.residuals_sims_[i] 993 * self.residuals_std_dev_[np.newaxis, :] 994 for i in tqdm(range(self.replications)) 995 ) 996 ) 997 elif self.verbose == 0: 998 self.sims_ = tuple( 999 ( 1000 self.mean_ 1001 + self.residuals_sims_[i] 1002 * self.residuals_std_dev_[np.newaxis, :] 1003 for i in range(self.replications) 1004 ) 1005 ) 1006 else: 1007 1008 if self.verbose == 1: 1009 self.sims_ = tuple( 1010 ( 1011 self.mean_ + self.residuals_sims_[i] 1012 for i in tqdm(range(self.replications)) 1013 ) 1014 ) 1015 elif self.verbose == 0: 1016 self.sims_ = tuple( 1017 ( 1018 self.mean_ + self.residuals_sims_[i] 1019 for i in range(self.replications) 1020 ) 1021 ) 1022 1023 DescribeResult = namedtuple( 1024 "DescribeResult", ("mean", "sims", "lower", "upper") 1025 ) 1026 for ix in range(self.init_n_series_): 1027 sims_ix = getsims(self.sims_, ix) 1028 if self.agg == "mean": 1029 meanf.append(np.mean(sims_ix, axis=1)) 1030 else: 1031 medianf.append(np.median(sims_ix, axis=1)) 1032 lower.append(np.quantile(sims_ix, q=self.alpha_ / 200, axis=1)) 1033 upper.append( 1034 np.quantile(sims_ix, q=1 - self.alpha_ / 200, axis=1) 1035 ) 1036 self.mean_ = pd.DataFrame( 1037 np.asarray(meanf).T, 1038 columns=self.series_names[ 1039 : self.init_n_series_ 1040 ], # self.df_.columns, 1041 index=self.output_dates_, 1042 ) 1043 1044 self.lower_ = pd.DataFrame( 1045 np.asarray(lower).T, 1046 columns=self.series_names[ 1047 : self.init_n_series_ 1048 ], # self.df_.columns, 1049 index=self.output_dates_, 1050 ) 1051 1052 self.upper_ = pd.DataFrame( 1053 np.asarray(upper).T, 1054 columns=self.series_names[ 1055 : self.init_n_series_ 1056 ], # self.df_.columns, 1057 index=self.output_dates_, 1058 ) 1059 1060 try: 1061 self.median_ = pd.DataFrame( 1062 np.asarray(medianf).T, 1063 columns=self.series_names[ 1064 : self.init_n_series_ 1065 ], # self.df_.columns, 1066 index=self.output_dates_, 1067 ) 1068 except Exception as e: 1069 pass 1070 1071 return DescribeResult( 1072 self.mean_, self.sims_, self.lower_, self.upper_ 1073 ) 1074 1075 if ( 1076 (("return_std" in kwargs) or ("return_pi" in kwargs)) 1077 and (self.type_pi not in ("gaussian", "scp")) 1078 ) or "vine" in self.type_pi: 1079 DescribeResult = namedtuple( 1080 "DescribeResult", ("mean", "lower", "upper") 1081 ) 1082 1083 self.mean_ = pd.DataFrame( 1084 np.asarray(self.mean_), 1085 columns=self.series_names, # self.df_.columns, 1086 index=self.output_dates_, 1087 ) 1088 1089 if "return_std" in kwargs: 1090 1091 self.preds_std_ = np.asarray(self.preds_std_) 1092 1093 self.lower_ = pd.DataFrame( 1094 self.mean_.values - pi_multiplier * self.preds_std_, 1095 columns=self.series_names, # self.df_.columns, 1096 index=self.output_dates_, 1097 ) 1098 1099 self.upper_ = pd.DataFrame( 1100 self.mean_.values + pi_multiplier * self.preds_std_, 1101 columns=self.series_names, # self.df_.columns, 1102 index=self.output_dates_, 1103 ) 1104 1105 if "return_pi" in kwargs: 1106 1107 self.lower_ = pd.DataFrame( 1108 np.asarray(lower_pi_).reshape(h, self.n_series) 1109 + y_means_[np.newaxis, :], 1110 columns=self.series_names, # self.df_.columns, 1111 index=self.output_dates_, 1112 ) 1113 1114 self.upper_ = pd.DataFrame( 1115 np.asarray(upper_pi_).reshape(h, self.n_series) 1116 + y_means_[np.newaxis, :], 1117 columns=self.series_names, # self.df_.columns, 1118 index=self.output_dates_, 1119 ) 1120 1121 res = DescribeResult(self.mean_, self.lower_, self.upper_) 1122 1123 if self.xreg_ is not None: 1124 if len(self.xreg_.shape) > 1: 1125 res2 = mx.tuple_map( 1126 res, 1127 lambda x: mo.delete_last_columns( 1128 x, num_columns=self.xreg_.shape[1] 1129 ), 1130 ) 1131 else: 1132 res2 = mx.tuple_map( 1133 res, lambda x: mo.delete_last_columns(x, num_columns=1) 1134 ) 1135 return DescribeResult(res2[0], res2[1], res2[2]) 1136 1137 return res 1138 1139 if self.type_pi == "gaussian": 1140 1141 DescribeResult = namedtuple( 1142 "DescribeResult", ("mean", "lower", "upper") 1143 ) 1144 1145 self.mean_ = pd.DataFrame( 1146 np.asarray(self.mean_), 1147 columns=self.series_names, # self.df_.columns, 1148 index=self.output_dates_, 1149 ) 1150 1151 self.lower_ = pd.DataFrame( 1152 self.mean_.values - pi_multiplier * self.gaussian_preds_std_, 1153 columns=self.series_names, # self.df_.columns, 1154 index=self.output_dates_, 1155 ) 1156 1157 self.upper_ = pd.DataFrame( 1158 self.mean_.values + pi_multiplier * self.gaussian_preds_std_, 1159 columns=self.series_names, # self.df_.columns, 1160 index=self.output_dates_, 1161 ) 1162 1163 res = DescribeResult(self.mean_, self.lower_, self.upper_) 1164 1165 if self.xreg_ is not None: 1166 if len(self.xreg_.shape) > 1: 1167 res2 = mx.tuple_map( 1168 res, 1169 lambda x: mo.delete_last_columns( 1170 x, num_columns=self.xreg_.shape[1] 1171 ), 1172 ) 1173 else: 1174 res2 = mx.tuple_map( 1175 res, lambda x: mo.delete_last_columns(x, num_columns=1) 1176 ) 1177 return DescribeResult(res2[0], res2[1], res2[2]) 1178 1179 return res 1180 1181 if self.type_pi == "quantile": 1182 1183 DescribeResult = namedtuple("DescribeResult", ("mean")) 1184 1185 self.mean_ = pd.DataFrame( 1186 np.asarray(self.mean_), 1187 columns=self.series_names, # self.df_.columns, 1188 index=self.output_dates_, 1189 ) 1190 1191 res = DescribeResult(self.mean_) 1192 1193 if self.xreg_ is not None: 1194 if len(self.xreg_.shape) > 1: 1195 res2 = mx.tuple_map( 1196 res, 1197 lambda x: mo.delete_last_columns( 1198 x, num_columns=self.xreg_.shape[1] 1199 ), 1200 ) 1201 else: 1202 res2 = mx.tuple_map( 1203 res, lambda x: mo.delete_last_columns(x, num_columns=1) 1204 ) 1205 return DescribeResult(res2[0]) 1206 1207 return res 1208 1209 # After prediction loop, ensure sims only contain target columns 1210 if self.sims_ is not None: 1211 if self.verbose == 1: 1212 self.sims_ = tuple( 1213 sim[:h,] # Only keep target columns and h rows 1214 for sim in tqdm(self.sims_) 1215 ) 1216 elif self.verbose == 0: 1217 self.sims_ = tuple( 1218 sim[:h,] # Only keep target columns and h rows 1219 for sim in self.sims_ 1220 ) 1221 1222 # Convert numpy arrays to DataFrames with proper columns 1223 self.sims_ = tuple( 1224 pd.DataFrame( 1225 sim, 1226 columns=self.df_.columns[: self.init_n_series_], 1227 index=self.output_dates_, 1228 ) 1229 for sim in self.sims_ 1230 ) 1231 1232 if self.type_pi in ( 1233 "kde", 1234 "bootstrap", 1235 "block-bootstrap", 1236 "vine-copula", 1237 ): 1238 if self.xreg_ is not None: 1239 # Use getsimsxreg when external regressors are present 1240 target_cols = self.df_.columns[: self.init_n_series_] 1241 self.sims_ = getsimsxreg( 1242 self.sims_, self.output_dates_, target_cols 1243 ) 1244 else: 1245 # Use original getsims for backward compatibility 1246 self.sims_ = getsims(self.sims_) 1247 1248 def _crps_ensemble(self, y_true, simulations, axis=0): 1249 """ 1250 Compute the Continuous Ranked Probability Score (CRPS) for an ensemble of simulations. 1251 1252 The CRPS is a measure of the distance between the cumulative distribution 1253 function (CDF) of a forecast and the CDF of the observed value. This method 1254 computes the CRPS in a vectorized form for an ensemble of simulations, efficiently 1255 handling the case where there is only one simulation. 1256 1257 Parameters 1258 ---------- 1259 y_true : array_like, shape (n,) 1260 A 1D array of true values (observations). 1261 Each element represents the true value for a given sample. 1262 1263 simulations : array_like, shape (n, R) 1264 A 2D array of simulated values. Each row corresponds to a different sample 1265 and each column corresponds to a different simulation of that sample. 1266 1267 axis : int, optional, default=0 1268 Axis along which to transpose the simulations if needed. 1269 If axis=0, the simulations are transposed to shape (R, n). 1270 1271 Returns 1272 ------- 1273 crps : ndarray, shape (n,) 1274 A 1D array of CRPS scores, one for each sample. 1275 1276 Notes 1277 ----- 1278 The CRPS score is computed as: 1279 1280 CRPS(y_true, simulations) = E[|X - y|] - 0.5 * E[|X - X'|] 1281 1282 Where: 1283 - `X` is the ensemble of simulations. 1284 - `y` is the true value. 1285 - `X'` is a second independent sample from the ensemble. 1286 1287 The calculation is vectorized to optimize performance for large datasets. 1288 1289 The edge case where `R=1` (only one simulation) is handled by returning 1290 only `term1` (i.e., no ensemble spread). 1291 """ 1292 sims = np.asarray(simulations) # Convert simulations to numpy array 1293 if axis == 0: 1294 sims = sims.T # Transpose if the axis is 0 1295 n, R = sims.shape # n = number of samples, R = number of simulations 1296 # Term 1: E|X - y|, average absolute difference between simulations and true value 1297 term1 = np.mean(np.abs(sims - y_true[:, np.newaxis]), axis=1) 1298 # Handle edge case: if R == 1, return term1 (no spread in ensemble) 1299 if R == 1: 1300 return term1 1301 # Term 2: 0.5 * E|X - X'|, using efficient sorted formula 1302 sims_sorted = np.sort(sims, axis=1) # Sort simulations along each row 1303 # Correct coefficients for efficient calculation 1304 j = np.arange(R) # 0-indexed positions in the sorted simulations 1305 coefficients = (2 * (j + 1) - R - 1) / ( 1306 R * (R - 1) 1307 ) # Efficient coefficient calculation 1308 # Dot product along the second axis (over the simulations) 1309 term2 = np.dot(sims_sorted, coefficients) 1310 # Return CRPS score: term1 - 0.5 * term2 1311 return term1 - 0.5 * term2 1312 1313 def score( 1314 self, 1315 X, 1316 training_index, 1317 testing_index, 1318 scoring=None, 1319 alpha=0.5, 1320 **kwargs, 1321 ): 1322 """Train on training_index, score on testing_index.""" 1323 1324 assert ( 1325 bool(set(training_index).intersection(set(testing_index))) == False 1326 ), "Non-overlapping 'training_index' and 'testing_index' required" 1327 1328 # Dimensions 1329 try: 1330 # multivariate time series 1331 n, p = X.shape 1332 except: 1333 # univariate time series 1334 n = X.shape[0] 1335 p = 1 1336 1337 # Training and testing sets 1338 if p > 1: 1339 X_train = X[training_index, :] 1340 X_test = X[testing_index, :] 1341 else: 1342 X_train = X[training_index] 1343 X_test = X[testing_index] 1344 1345 # Horizon 1346 h = len(testing_index) 1347 assert ( 1348 len(training_index) + h 1349 ) <= n, "Please check lengths of training and testing windows" 1350 1351 # Fit and predict 1352 self.fit(X_train, **kwargs) 1353 preds = self.predict(h=h, **kwargs) 1354 1355 if scoring is None: 1356 scoring = "neg_root_mean_squared_error" 1357 1358 if scoring == "pinball": 1359 # Predict requested quantile 1360 q_pred = self.predict(h=h, quantiles=[alpha], **kwargs) 1361 # Handle multivariate 1362 scores = [] 1363 for j in range(p): 1364 series_name = getattr(self, "series_names", [f"Series_{j}"])[j] 1365 q_label = ( 1366 f"{int(alpha * 100):02d}" 1367 if (alpha * 100).is_integer() 1368 else f"{alpha:.3f}".replace(".", "_") 1369 ) 1370 col = f"quantile_{q_label}_{series_name}" 1371 if col not in q_pred.columns: 1372 raise ValueError( 1373 f"Column '{col}' not found in quantile forecast output." 1374 ) 1375 y_true_j = X_test[:, j] 1376 y_pred_j = q_pred[col].values 1377 # Compute pinball loss for this series 1378 loss = mean_pinball_loss(y_true_j, y_pred_j, alpha=alpha) 1379 scores.append(loss) 1380 # Return average over series 1381 return np.mean(scores) 1382 1383 if scoring == "crps": 1384 # Ensure simulations exist 1385 preds = self.predict(h=h, **kwargs) # triggers self.sims_ 1386 # Extract simulations: list of DataFrames → (R, h, p) 1387 sims_vals = np.stack( 1388 [sim.values for sim in self.sims_], axis=0 1389 ) # (R, h, p) 1390 crps_scores = [] 1391 for j in range(p): 1392 y_true_j = X_test[:, j] 1393 sims_j = sims_vals[:, :, j] # (R, h) 1394 crps_j = self._crps_ensemble(np.asarray(y_true_j), sims_j) 1395 crps_scores.append(np.mean(crps_j)) # average over horizon 1396 return np.mean(crps_scores) # average over series 1397 1398 # check inputs 1399 assert scoring in ( 1400 "explained_variance", 1401 "neg_mean_absolute_error", 1402 "neg_mean_squared_error", 1403 "neg_root_mean_squared_error", 1404 "neg_mean_squared_log_error", 1405 "neg_median_absolute_error", 1406 "r2", 1407 ), "'scoring' should be in ('explained_variance', 'neg_mean_absolute_error', \ 1408 'neg_mean_squared_error', 'neg_root_mean_squared_error', 'neg_mean_squared_log_error', \ 1409 'neg_median_absolute_error', 'r2')" 1410 1411 scoring_options = { 1412 "explained_variance": skm2.explained_variance_score, 1413 "neg_mean_absolute_error": skm2.mean_absolute_error, 1414 "neg_mean_squared_error": lambda x, y: np.mean((x - y) ** 2), 1415 "neg_root_mean_squared_error": lambda x, y: np.sqrt( 1416 np.mean((x - y) ** 2) 1417 ), 1418 "neg_mean_squared_log_error": skm2.mean_squared_log_error, 1419 "neg_median_absolute_error": skm2.median_absolute_error, 1420 "r2": skm2.r2_score, 1421 } 1422 1423 return scoring_options[scoring](X_test, preds) 1424 1425 def plot(self, series=None, type_axis="dates", type_plot="pi"): 1426 """Plot time series forecast 1427 1428 Parameters: 1429 1430 series: {integer} or {string} 1431 series index or name 1432 1433 """ 1434 1435 assert all( 1436 [ 1437 self.mean_ is not None, 1438 self.lower_ is not None, 1439 self.upper_ is not None, 1440 self.output_dates_ is not None, 1441 ] 1442 ), "model forecasting must be obtained first (with predict)" 1443 1444 if series is None: 1445 # assert ( 1446 # self.init_n_series_ == 1 1447 # ), "please specify series index or name (n_series > 1)" 1448 series = 0 1449 1450 if isinstance(series, str): 1451 assert ( 1452 series in self.series_names 1453 ), f"series {series} doesn't exist in the input dataset" 1454 series_idx = self.df_.columns.get_loc(series) 1455 else: 1456 assert isinstance(series, int) and ( 1457 0 <= series < self.n_series 1458 ), f"check series index (< {self.n_series})" 1459 series_idx = series 1460 1461 y_all = list(self.df_.iloc[:, series_idx]) + list( 1462 self.mean_.iloc[:, series_idx] 1463 ) 1464 y_test = list(self.mean_.iloc[:, series_idx]) 1465 n_points_all = len(y_all) 1466 n_points_train = self.df_.shape[0] 1467 1468 if type_axis == "numeric": 1469 x_all = [i for i in range(n_points_all)] 1470 x_test = [i for i in range(n_points_train, n_points_all)] 1471 1472 if type_axis == "dates": # use dates 1473 x_all = np.concatenate( 1474 (self.input_dates.values, self.output_dates_.values), axis=None 1475 ) 1476 x_test = self.output_dates_.values 1477 1478 if type_plot == "pi": 1479 fig, ax = plt.subplots() 1480 ax.plot(x_all, y_all, "-") 1481 ax.plot(x_test, y_test, "-", color="orange") 1482 ax.fill_between( 1483 x_test, 1484 self.lower_.iloc[:, series_idx], 1485 self.upper_.iloc[:, series_idx], 1486 alpha=0.2, 1487 color="orange", 1488 ) 1489 if self.replications is None: 1490 if self.n_series > 1: 1491 plt.title( 1492 f"prediction intervals for {series}", 1493 loc="left", 1494 fontsize=12, 1495 fontweight=0, 1496 color="black", 1497 ) 1498 else: 1499 plt.title( 1500 f"prediction intervals for input time series", 1501 loc="left", 1502 fontsize=12, 1503 fontweight=0, 1504 color="black", 1505 ) 1506 plt.show() 1507 else: # self.replications is not None 1508 if self.n_series > 1: 1509 plt.title( 1510 f"prediction intervals for {self.replications} simulations of {series}", 1511 loc="left", 1512 fontsize=12, 1513 fontweight=0, 1514 color="black", 1515 ) 1516 else: 1517 plt.title( 1518 f"prediction intervals for {self.replications} simulations of input time series", 1519 loc="left", 1520 fontsize=12, 1521 fontweight=0, 1522 color="black", 1523 ) 1524 plt.show() 1525 1526 if type_plot == "spaghetti": 1527 palette = plt.get_cmap("Set1") 1528 sims_ix = getsims(self.sims_, series_idx) 1529 plt.plot(x_all, y_all, "-") 1530 for col_ix in range( 1531 sims_ix.shape[1] 1532 ): # avoid this when there are thousands of simulations 1533 plt.plot( 1534 x_test, 1535 sims_ix[:, col_ix], 1536 "-", 1537 color=palette(col_ix), 1538 linewidth=1, 1539 alpha=0.9, 1540 ) 1541 plt.plot(x_all, y_all, "-", color="black") 1542 plt.plot(x_test, y_test, "-", color="blue") 1543 # Add titles 1544 if self.n_series > 1: 1545 plt.title( 1546 f"{self.replications} simulations of {series}", 1547 loc="left", 1548 fontsize=12, 1549 fontweight=0, 1550 color="black", 1551 ) 1552 else: 1553 plt.title( 1554 f"{self.replications} simulations of input time series", 1555 loc="left", 1556 fontsize=12, 1557 fontweight=0, 1558 color="black", 1559 ) 1560 plt.xlabel("Time") 1561 plt.ylabel("Values") 1562 # Show the graph 1563 plt.show() 1564 1565 def cross_val_score( 1566 self, 1567 X, 1568 scoring="root_mean_squared_error", 1569 n_jobs=None, 1570 verbose=0, 1571 xreg=None, 1572 initial_window=5, 1573 horizon=3, 1574 fixed_window=False, 1575 show_progress=True, 1576 level=95, 1577 alpha=0.5, 1578 **kwargs, 1579 ): 1580 """Evaluate a score by time series cross-validation. 1581 1582 Parameters: 1583 1584 X: {array-like, sparse matrix} of shape (n_samples, n_features) 1585 The data to fit. 1586 1587 scoring: str or a function 1588 A str in ('root_mean_squared_error', 'mean_squared_error', 'mean_error', 1589 'mean_absolute_error', 'mean_error', 'mean_percentage_error', 1590 'mean_absolute_percentage_error', 'winkler_score', 'coverage') 1591 Or a function defined as 'coverage' and 'winkler_score' in `utils.timeseries` 1592 1593 n_jobs: int, default=None 1594 Number of jobs to run in parallel. 1595 1596 verbose: int, default=0 1597 The verbosity level. 1598 1599 xreg: array-like, optional (default=None) 1600 Additional (external) regressors to be passed to `fit` 1601 xreg must be in 'increasing' order (most recent observations last) 1602 1603 initial_window: int 1604 initial number of consecutive values in each training set sample 1605 1606 horizon: int 1607 number of consecutive values in test set sample 1608 1609 fixed_window: boolean 1610 if False, all training samples start at index 0, and the training 1611 window's size is increasing. 1612 if True, the training window's size is fixed, and the window is 1613 rolling forward 1614 1615 show_progress: boolean 1616 if True, a progress bar is printed 1617 1618 level: int 1619 confidence level for prediction intervals 1620 1621 alpha: float 1622 quantile level for pinball loss if scoring='pinball' 1623 0 < alpha < 1 1624 1625 **kwargs: dict 1626 additional parameters to be passed to `fit` and `predict` 1627 1628 Returns: 1629 1630 A tuple: descriptive statistics or errors and raw errors 1631 1632 """ 1633 tscv = TimeSeriesSplit() 1634 1635 tscv_obj = tscv.split( 1636 X, 1637 initial_window=initial_window, 1638 horizon=horizon, 1639 fixed_window=fixed_window, 1640 ) 1641 1642 if isinstance(scoring, str): 1643 1644 assert scoring in ( 1645 "pinball", 1646 "crps", 1647 "root_mean_squared_error", 1648 "mean_squared_error", 1649 "mean_error", 1650 "mean_absolute_error", 1651 "mean_percentage_error", 1652 "mean_absolute_percentage_error", 1653 "winkler_score", 1654 "coverage", 1655 ), "must have scoring in ('pinball', 'crps', 'root_mean_squared_error', 'mean_squared_error', 'mean_error', 'mean_absolute_error', 'mean_error', 'mean_percentage_error', 'mean_absolute_percentage_error', 'winkler_score', 'coverage')" 1656 1657 def err_func(X_test, X_pred, scoring, alpha=0.5): 1658 if (self.replications is not None) or ( 1659 self.type_pi == "gaussian" 1660 ): # probabilistic 1661 if scoring == "pinball": 1662 # Predict requested quantile 1663 q_pred = self.predict( 1664 h=len(X_test), quantiles=[alpha], **kwargs 1665 ) 1666 # Handle multivariate 1667 scores = [] 1668 p = X_test.shape[1] if len(X_test.shape) > 1 else 1 1669 for j in range(p): 1670 series_name = getattr( 1671 self, "series_names", [f"Series_{j}"] 1672 )[j] 1673 q_label = ( 1674 f"{int(alpha * 100):02d}" 1675 if (alpha * 100).is_integer() 1676 else f"{alpha:.3f}".replace(".", "_") 1677 ) 1678 col = f"quantile_{q_label}_{series_name}" 1679 if col not in q_pred.columns: 1680 raise ValueError( 1681 f"Column '{col}' not found in quantile forecast output." 1682 ) 1683 try: 1684 y_true_j = X_test[:, j] if p > 1 else X_test 1685 except: 1686 y_true_j = ( 1687 X_test.iloc[:, j] 1688 if p > 1 1689 else X_test.values 1690 ) 1691 y_pred_j = q_pred[col].values 1692 # Compute pinball loss for this series 1693 loss = mean_pinball_loss( 1694 y_true_j, y_pred_j, alpha=alpha 1695 ) 1696 scores.append(loss) 1697 # Return average over series 1698 return np.mean(scores) 1699 elif scoring == "crps": 1700 # Ensure simulations exist 1701 _ = self.predict( 1702 h=len(X_test), **kwargs 1703 ) # triggers self.sims_ 1704 # Extract simulations: list of DataFrames → (R, h, p) 1705 sims_vals = np.stack( 1706 [sim.values for sim in self.sims_], axis=0 1707 ) # (R, h, p) 1708 crps_scores = [] 1709 p = X_test.shape[1] if len(X_test.shape) > 1 else 1 1710 for j in range(p): 1711 try: 1712 y_true_j = X_test[:, j] if p > 1 else X_test 1713 except Exception as e: 1714 y_true_j = ( 1715 X_test.iloc[:, j] 1716 if p > 1 1717 else X_test.values 1718 ) 1719 sims_j = sims_vals[:, :, j] # (R, h) 1720 crps_j = self._crps_ensemble( 1721 np.asarray(y_true_j), sims_j 1722 ) 1723 crps_scores.append( 1724 np.mean(crps_j) 1725 ) # average over horizon 1726 return np.mean(crps_scores) # average over series 1727 if scoring == "winkler_score": 1728 return winkler_score(X_pred, X_test, level=level) 1729 elif scoring == "coverage": 1730 return coverage(X_pred, X_test, level=level) 1731 else: 1732 return mean_errors( 1733 pred=X_pred.mean, actual=X_test, scoring=scoring 1734 ) 1735 else: # not probabilistic 1736 return mean_errors( 1737 pred=X_pred, actual=X_test, scoring=scoring 1738 ) 1739 1740 else: # isinstance(scoring, str) = False 1741 1742 err_func = scoring 1743 1744 errors = [] 1745 1746 train_indices = [] 1747 1748 test_indices = [] 1749 1750 for train_index, test_index in tscv_obj: 1751 train_indices.append(train_index) 1752 test_indices.append(test_index) 1753 1754 if show_progress is True: 1755 iterator = tqdm( 1756 zip(train_indices, test_indices), total=len(train_indices) 1757 ) 1758 else: 1759 iterator = zip(train_indices, test_indices) 1760 1761 for train_index, test_index in iterator: 1762 1763 if verbose == 1: 1764 print(f"TRAIN: {train_index}") 1765 print(f"TEST: {test_index}") 1766 1767 if isinstance(X, pd.DataFrame): 1768 self.fit(X.iloc[train_index, :], xreg=xreg, **kwargs) 1769 X_test = X.iloc[test_index, :] 1770 else: 1771 self.fit(X[train_index, :], xreg=xreg, **kwargs) 1772 X_test = X[test_index, :] 1773 X_pred = self.predict(h=int(len(test_index)), level=level, **kwargs) 1774 1775 errors.append(err_func(X_test, X_pred, scoring, alpha=alpha)) 1776 1777 res = np.asarray(errors) 1778 1779 return res, describe(res) 1780 1781 def _compute_information_criterion(self, curr_lags, criterion="AIC"): 1782 """Compute information criterion using existing residuals 1783 1784 Parameters 1785 ---------- 1786 curr_lags : int 1787 Current number of lags being evaluated 1788 criterion : str 1789 One of 'AIC', 'AICc', or 'BIC' 1790 1791 Returns 1792 ------- 1793 float 1794 Information criterion value or inf if parameters exceed observations 1795 """ 1796 # Get dimensions 1797 n_obs = self.residuals_.shape[0] 1798 n_features = int(self.init_n_series_ * curr_lags) 1799 n_hidden = int(self.n_hidden_features) 1800 # Calculate number of parameters 1801 term1 = int(n_features * n_hidden) 1802 term2 = int(n_hidden * self.init_n_series_) 1803 n_params = term1 + term2 1804 # Check if we have enough observations for the number of parameters 1805 if n_obs <= n_params + 1: 1806 return float("inf") # Return infinity if too many parameters 1807 # Compute RSS using existing residuals 1808 rss = np.sum(self.residuals_**2) 1809 # Compute criterion 1810 if criterion == "AIC": 1811 ic = n_obs * np.log(rss / n_obs) + 2 * n_params 1812 elif criterion == "AICc": 1813 ic = n_obs * np.log(rss / n_obs) + 2 * n_params * ( 1814 n_obs / (n_obs - n_params - 1) 1815 ) 1816 else: # BIC 1817 ic = n_obs * np.log(rss / n_obs) + n_params * np.log(n_obs) 1818 1819 return ic
Univariate and multivariate time series (MTS) forecasting with Quasi-Randomized networks
Parameters:
obj: object.
any object containing a method fit (obj.fit()) and a method predict
(obj.predict()).
n_hidden_features: int.
number of nodes in the hidden layer.
activation_name: str.
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'.
a: float.
hyperparameter for 'prelu' or 'elu' activation function.
nodes_sim: str.
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'.
bias: boolean.
indicates if the hidden layer contains a bias term (True) or not
(False).
dropout: float.
regularization parameter; (random) percentage of nodes dropped out
of the training.
direct_link: boolean.
indicates if the original predictors are included (True) in model's fitting or not (False).
n_clusters: int.
number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering).
cluster_encode: bool.
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding.
type_clust: str.
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm').
type_scaling: a tuple of 3 strings.
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax').
lags: int.
number of lags used for each time series.
If string, lags must be one of 'AIC', 'AICc', or 'BIC'.
type_pi: str.
type of prediction interval; currently:
- "gaussian": simple, fast, but: assumes stationarity of Gaussian in-sample residuals and independence in the multivariate case
- "quantile": use model-agnostic quantile regression under the hood
- "kde": based on Kernel Density Estimation of in-sample residuals
- "bootstrap": based on independent bootstrap of in-sample residuals
- "block-bootstrap": based on basic block bootstrap of in-sample residuals
- "scp-kde": Sequential split conformal prediction with Kernel Density Estimation of calibrated residuals
- "scp-bootstrap": Sequential split conformal prediction with independent bootstrap of calibrated residuals
- "scp-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of calibrated residuals
- "scp2-kde": Sequential split conformal prediction with Kernel Density Estimation of standardized calibrated residuals
- "scp2-bootstrap": Sequential split conformal prediction with independent bootstrap of standardized calibrated residuals
- "scp2-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of standardized calibrated residuals
- based on copulas of in-sample residuals: 'vine-tll', 'vine-bb1', 'vine-bb6', 'vine-bb7', 'vine-bb8', 'vine-clayton',
'vine-frank', 'vine-gaussian', 'vine-gumbel', 'vine-indep', 'vine-joe', 'vine-student'
- 'scp-vine-tll', 'scp-vine-bb1', 'scp-vine-bb6', 'scp-vine-bb7', 'scp-vine-bb8', 'scp-vine-clayton',
'scp-vine-frank', 'scp-vine-gaussian', 'scp-vine-gumbel', 'scp-vine-indep', 'scp-vine-joe', 'scp-vine-student'
- 'scp2-vine-tll', 'scp2-vine-bb1', 'scp2-vine-bb6', 'scp2-vine-bb7', 'scp2-vine-bb8', 'scp2-vine-clayton',
'scp2-vine-frank', 'scp2-vine-gaussian', 'scp2-vine-gumbel', 'scp2-vine-indep', 'scp2-vine-joe', 'scp2-vine-student'
level: int.
level of confidence for `type_pi == 'quantile'` (default is `95`)
block_size: int.
size of block for 'type_pi' in ("block-bootstrap", "scp-block-bootstrap", "scp2-block-bootstrap").
Default is round(3.15*(n_residuals^1/3))
replications: int.
number of replications (if needed, for predictive simulation). Default is 'None'.
kernel: str.
the kernel to use for residuals density estimation (used for predictive simulation). Currently, either 'gaussian' or 'tophat'.
agg: str.
either "mean" or "median" for simulation of bootstrap aggregating
seed: int.
reproducibility seed for nodes_sim=='uniform' or predictive simulation.
backend: str.
"cpu" or "gpu" or "tpu".
verbose: int.
0: not printing; 1: printing
show_progress: bool.
True: progress bar when fitting each series; False: no progress bar when fitting each series
Attributes:
fit_objs_: dict
objects adjusted to each individual time series
y_: {array-like}
MTS responses (most recent observations first)
X_: {array-like}
MTS lags
xreg_: {array-like}
external regressors
y_means_: dict
a dictionary of each series mean values
preds_: {array-like}
successive model predictions
preds_std_: {array-like}
standard deviation around the predictions for Bayesian base learners (`obj`)
gaussian_preds_std_: {array-like}
standard deviation around the predictions for `type_pi='gaussian'`
return_std_: boolean
return uncertainty or not (set in predict)
df_: data frame
the input data frame, in case a data.frame is provided to `fit`
n_obs_: int
number of time series observations (number of rows for multivariate)
level_: int
level of confidence for prediction intervals (default is 95)
residuals_: {array-like}
in-sample residuals (for `type_pi` not conformal prediction) or calibrated residuals
(for `type_pi` in conformal prediction)
residuals_sims_: tuple of {array-like}
simulations of in-sample residuals (for `type_pi` not conformal prediction) or
calibrated residuals (for `type_pi` in conformal prediction)
kde_: A scikit-learn object, see https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KernelDensity.html
residuals_std_dev_: residuals standard deviation
Examples:
Example 1:
import nnetsauce as ns
import numpy as np
from sklearn import linear_model
np.random.seed(123)
M = np.random.rand(10, 3)
M[:,0] = 10*M[:,0]
M[:,2] = 25*M[:,2]
print(M)
# Adjust Bayesian Ridge
regr4 = linear_model.BayesianRidge()
obj_MTS = ns.MTS(regr4, lags = 1, n_hidden_features=5)
obj_MTS.fit(M)
print(obj_MTS.predict())
# with credible intervals
print(obj_MTS.predict(return_std=True, level=80))
print(obj_MTS.predict(return_std=True, level=95))
Example 2:
import nnetsauce as ns
import numpy as np
from sklearn import linear_model
dataset = {
'date' : ['2001-01-01', '2002-01-01', '2003-01-01', '2004-01-01', '2005-01-01'],
'series1' : [34, 30, 35.6, 33.3, 38.1],
'series2' : [4, 5.5, 5.6, 6.3, 5.1],
'series3' : [100, 100.5, 100.6, 100.2, 100.1]}
df = pd.DataFrame(dataset).set_index('date')
print(df)
# Adjust Bayesian Ridge
regr5 = linear_model.BayesianRidge()
obj_MTS = ns.MTS(regr5, lags = 1, n_hidden_features=5)
obj_MTS.fit(df)
print(obj_MTS.predict())
# with credible intervals
print(obj_MTS.predict(return_std=True, level=80))
print(obj_MTS.predict(return_std=True, level=95))
337 def fit(self, X, xreg=None, **kwargs): 338 """Fit MTS model to training data X, with optional regressors xreg 339 340 Parameters: 341 342 X: {array-like}, shape = [n_samples, n_features] 343 Training time series, where n_samples is the number 344 of samples and n_features is the number of features; 345 X must be in increasing order (most recent observations last) 346 347 xreg: {array-like}, shape = [n_samples, n_features_xreg] 348 Additional (external) regressors to be passed to self.obj 349 xreg must be in 'increasing' order (most recent observations last) 350 351 **kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity) 352 353 Returns: 354 355 self: object 356 """ 357 try: 358 self.init_n_series_ = X.shape[1] 359 except IndexError as e: 360 self.init_n_series_ = 1 361 362 # Automatic lag selection if requested 363 if isinstance(self.lags, str): 364 max_lags = min(25, X.shape[0] // 4) 365 best_ic = float("inf") 366 best_lags = 1 367 368 if self.verbose: 369 print( 370 f"\nSelecting optimal number of lags using {self.lags}..." 371 ) 372 iterator = tqdm(range(1, max_lags + 1)) 373 else: 374 iterator = range(1, max_lags + 1) 375 376 for lag in iterator: 377 # Convert DataFrame to numpy array before reversing 378 if isinstance(X, pd.DataFrame): 379 X_values = X.values[::-1] 380 else: 381 X_values = X[::-1] 382 383 # Try current lag value 384 if self.init_n_series_ > 1: 385 mts_input = ts.create_train_inputs(X_values, lag) 386 else: 387 mts_input = ts.create_train_inputs( 388 X_values.reshape(-1, 1), lag 389 ) 390 391 # Cook training set and fit model 392 dummy_y, scaled_Z = self.cook_training_set( 393 y=np.ones(mts_input[0].shape[0]), X=mts_input[1] 394 ) 395 residuals_ = [] 396 397 for i in range(self.init_n_series_): 398 y_mean = np.mean(mts_input[0][:, i]) 399 centered_y_i = mts_input[0][:, i] - y_mean 400 self.obj.fit(X=scaled_Z, y=centered_y_i) 401 residuals_.append( 402 (centered_y_i - self.obj.predict(scaled_Z)).tolist() 403 ) 404 405 self.residuals_ = np.asarray(residuals_).T 406 ic = self._compute_information_criterion( 407 curr_lags=lag, criterion=self.lags 408 ) 409 410 if self.verbose: 411 print(f"Trying lags={lag}, {self.lags}={ic:.2f}") 412 413 if ic < best_ic: 414 best_ic = ic 415 best_lags = lag 416 417 if self.verbose: 418 print( 419 f"\nSelected {best_lags} lags with {self.lags}={best_ic:.2f}" 420 ) 421 422 self.lags = best_lags 423 424 self.input_dates = None 425 self.df_ = None 426 427 if isinstance(X, pd.DataFrame) is False: 428 # input data set is a numpy array 429 if xreg is None: 430 X = pd.DataFrame(X) 431 self.series_names = [ 432 "series" + str(i) for i in range(X.shape[1]) 433 ] 434 else: 435 # xreg is not None 436 X = mo.cbind(X, xreg) 437 self.xreg_ = xreg 438 439 else: # input data set is a DataFrame with column names 440 441 X_index = None 442 if X.index is not None: 443 X_index = X.index 444 if xreg is None: 445 X = copy.deepcopy(mo.convert_df_to_numeric(X)) 446 else: 447 X = copy.deepcopy(mo.cbind(mo.convert_df_to_numeric(X), xreg)) 448 self.xreg_ = xreg 449 if X_index is not None: 450 X.index = X_index 451 self.series_names = X.columns.tolist() 452 453 if isinstance(X, pd.DataFrame): 454 if self.df_ is None: 455 self.df_ = X 456 X = X.values 457 else: 458 input_dates_prev = pd.DatetimeIndex(self.df_.index.values) 459 frequency = pd.infer_freq(input_dates_prev) 460 self.df_ = pd.concat([self.df_, X], axis=0) 461 self.input_dates = pd.date_range( 462 start=input_dates_prev[0], 463 periods=len(input_dates_prev) + X.shape[0], 464 freq=frequency, 465 ).values.tolist() 466 self.df_.index = self.input_dates 467 X = self.df_.values 468 self.df_.columns = self.series_names 469 else: 470 if self.df_ is None: 471 self.df_ = pd.DataFrame(X, columns=self.series_names) 472 else: 473 self.df_ = pd.concat( 474 [self.df_, pd.DataFrame(X, columns=self.series_names)], 475 axis=0, 476 ) 477 478 self.input_dates = ts.compute_input_dates(self.df_) 479 480 try: 481 # multivariate time series 482 n, p = X.shape 483 except: 484 # univariate time series 485 n = X.shape[0] 486 p = 1 487 self.n_obs_ = n 488 489 rep_1_n = np.repeat(1, n) 490 491 self.y_ = None 492 self.X_ = None 493 self.n_series = p 494 self.fit_objs_.clear() 495 self.y_means_.clear() 496 residuals_ = [] 497 self.residuals_ = None 498 self.residuals_sims_ = None 499 self.kde_ = None 500 self.sims_ = None 501 self.scaled_Z_ = None 502 self.centered_y_is_ = [] 503 504 if self.init_n_series_ > 1: 505 # multivariate time series 506 mts_input = ts.create_train_inputs(X[::-1], self.lags) 507 else: 508 # univariate time series 509 mts_input = ts.create_train_inputs( 510 X.reshape(-1, 1)[::-1], self.lags 511 ) 512 513 self.y_ = mts_input[0] 514 515 self.X_ = mts_input[1] 516 517 dummy_y, scaled_Z = self.cook_training_set(y=rep_1_n, X=self.X_) 518 519 self.scaled_Z_ = scaled_Z 520 521 # loop on all the time series and adjust self.obj.fit 522 if self.verbose > 0: 523 print( 524 f"\n Adjusting {type(self.obj).__name__} to multivariate time series... \n" 525 ) 526 527 if self.show_progress is True: 528 iterator = tqdm(range(self.init_n_series_)) 529 else: 530 iterator = range(self.init_n_series_) 531 532 if self.type_pi in ( 533 "gaussian", 534 "kde", 535 "bootstrap", 536 "block-bootstrap", 537 ) or self.type_pi.startswith("vine"): 538 for i in iterator: 539 y_mean = np.mean(self.y_[:, i]) 540 self.y_means_[i] = y_mean 541 centered_y_i = self.y_[:, i] - y_mean 542 self.centered_y_is_.append(centered_y_i) 543 self.obj.fit(X=scaled_Z, y=centered_y_i) 544 self.fit_objs_[i] = deepcopy(self.obj) 545 residuals_.append( 546 ( 547 centered_y_i - self.fit_objs_[i].predict(scaled_Z) 548 ).tolist() 549 ) 550 551 if self.type_pi == "quantile": 552 for i in iterator: 553 y_mean = np.mean(self.y_[:, i]) 554 self.y_means_[i] = y_mean 555 centered_y_i = self.y_[:, i] - y_mean 556 self.centered_y_is_.append(centered_y_i) 557 self.obj.fit(X=scaled_Z, y=centered_y_i) 558 self.fit_objs_[i] = deepcopy(self.obj) 559 560 if self.type_pi.startswith("scp"): 561 # split conformal prediction 562 for i in iterator: 563 n_y = self.y_.shape[0] 564 n_y_half = n_y // 2 565 first_half_idx = range(0, n_y_half) 566 second_half_idx = range(n_y_half, n_y) 567 y_mean_temp = np.mean(self.y_[first_half_idx, i]) 568 centered_y_i_temp = self.y_[first_half_idx, i] - y_mean_temp 569 self.obj.fit(X=scaled_Z[first_half_idx, :], y=centered_y_i_temp) 570 # calibrated residuals actually 571 residuals_.append( 572 ( 573 self.y_[second_half_idx, i] 574 - ( 575 y_mean_temp 576 + self.obj.predict(scaled_Z[second_half_idx, :]) 577 ) 578 ).tolist() 579 ) 580 # fit on the second half 581 y_mean = np.mean(self.y_[second_half_idx, i]) 582 self.y_means_[i] = y_mean 583 centered_y_i = self.y_[second_half_idx, i] - y_mean 584 self.obj.fit(X=scaled_Z[second_half_idx, :], y=centered_y_i) 585 self.fit_objs_[i] = deepcopy(self.obj) 586 587 self.residuals_ = np.asarray(residuals_).T 588 589 if self.type_pi == "gaussian": 590 self.gaussian_preds_std_ = np.std(self.residuals_, axis=0) 591 592 if self.type_pi.startswith("scp2"): 593 # Calculate mean and standard deviation for each column 594 data_mean = np.mean(self.residuals_, axis=0) 595 self.residuals_std_dev_ = np.std(self.residuals_, axis=0) 596 # Center and scale the array using broadcasting 597 self.residuals_ = ( 598 self.residuals_ - data_mean[np.newaxis, :] 599 ) / self.residuals_std_dev_[np.newaxis, :] 600 601 if self.replications != None and "kde" in self.type_pi: 602 if self.verbose > 0: 603 print(f"\n Simulate residuals using {self.kernel} kernel... \n") 604 assert self.kernel in ( 605 "gaussian", 606 "tophat", 607 ), "currently, 'kernel' must be either 'gaussian' or 'tophat'" 608 kernel_bandwidths = {"bandwidth": np.logspace(-6, 6, 150)} 609 grid = GridSearchCV( 610 KernelDensity(kernel=self.kernel, **kwargs), 611 param_grid=kernel_bandwidths, 612 ) 613 grid.fit(self.residuals_) 614 615 if self.verbose > 0: 616 print( 617 f"\n Best parameters for {self.kernel} kernel: {grid.best_params_} \n" 618 ) 619 620 self.kde_ = grid.best_estimator_ 621 622 return self
Fit MTS model to training data X, with optional regressors xreg
Parameters:
X: {array-like}, shape = [n_samples, n_features] Training time series, where n_samples is the number of samples and n_features is the number of features; X must be in increasing order (most recent observations last)
xreg: {array-like}, shape = [n_samples, n_features_xreg] Additional (external) regressors to be passed to self.obj xreg must be in 'increasing' order (most recent observations last)
**kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity)
Returns:
self: object
700 def predict(self, h=5, level=95, quantiles=None, **kwargs): 701 """Forecast all the time series, h steps ahead""" 702 703 if quantiles is not None: 704 # Validate 705 quantiles = np.asarray(quantiles) 706 if not ((quantiles > 0) & (quantiles < 1)).all(): 707 raise ValueError("quantiles must be between 0 and 1.") 708 # Delegate to dedicated method 709 return self._predict_quantiles(h=h, quantiles=quantiles, **kwargs) 710 711 if isinstance(level, list) or isinstance(level, np.ndarray): 712 # Store results 713 result_dict = {} 714 # Loop through alphas and calculate lower/upper for each alpha level 715 # E.g [0.5, 2.5, 5, 16.5, 25, 50] 716 for lev in level: 717 # Get the forecast for this alpha 718 res = self.predict(h=h, level=lev, **kwargs) 719 # Adjust index and collect lower/upper bounds 720 res.lower.index = pd.to_datetime(res.lower.index) 721 res.upper.index = pd.to_datetime(res.upper.index) 722 # Loop over each time series (multivariate) and flatten results 723 if isinstance(res.lower, pd.DataFrame): 724 for ( 725 series 726 ) in ( 727 res.lower.columns 728 ): # Assumes 'lower' and 'upper' have multiple series 729 result_dict[f"lower_{lev}_{series}"] = ( 730 res.lower[series].to_numpy().flatten() 731 ) 732 result_dict[f"upper_{lev}_{series}"] = ( 733 res.upper[series].to_numpy().flatten() 734 ) 735 else: 736 for series_id in range( 737 self.n_series 738 ): # Assumes 'lower' and 'upper' have multiple series 739 result_dict[f"lower_{lev}_{series_id}"] = ( 740 res.lower[series_id, :].to_numpy().flatten() 741 ) 742 result_dict[f"upper_{lev}_{series_id}"] = ( 743 res.upper[series_id, :].to_numpy().flatten() 744 ) 745 return pd.DataFrame(result_dict, index=self.output_dates_) 746 747 # only one prediction interval 748 self.output_dates_, frequency = ts.compute_output_dates(self.df_, h) 749 750 self.level_ = level 751 752 self.return_std_ = False # do not remove (/!\) 753 754 self.mean_ = None # do not remove (/!\) 755 756 self.mean_ = deepcopy(self.y_) # do not remove (/!\) 757 758 self.lower_ = None # do not remove (/!\) 759 760 self.upper_ = None # do not remove (/!\) 761 762 self.sims_ = None # do not remove (/!\) 763 764 y_means_ = np.asarray( 765 [self.y_means_[i] for i in range(self.init_n_series_)] 766 ) 767 768 n_features = self.init_n_series_ * self.lags 769 770 self.alpha_ = 100 - level 771 772 pi_multiplier = norm.ppf(1 - self.alpha_ / 200) 773 774 if "return_std" in kwargs: # bayesian forecasting 775 self.return_std_ = True 776 self.preds_std_ = [] 777 DescribeResult = namedtuple( 778 "DescribeResult", ("mean", "lower", "upper") 779 ) # to be updated 780 781 if "return_pi" in kwargs: # split conformal, without simulation 782 mean_pi_ = [] 783 lower_pi_ = [] 784 upper_pi_ = [] 785 median_pi_ = [] 786 DescribeResult = namedtuple( 787 "DescribeResult", ("mean", "lower", "upper") 788 ) # to be updated 789 790 if self.kde_ != None and "kde" in self.type_pi: # kde 791 target_cols = self.df_.columns[ 792 : self.init_n_series_ 793 ] # Get target column names 794 if self.verbose == 1: 795 self.residuals_sims_ = tuple( 796 self.kde_.sample( 797 n_samples=h, random_state=self.seed + 100 * i 798 ) # Keep full sample 799 for i in tqdm(range(self.replications)) 800 ) 801 elif self.verbose == 0: 802 self.residuals_sims_ = tuple( 803 self.kde_.sample( 804 n_samples=h, random_state=self.seed + 100 * i 805 ) # Keep full sample 806 for i in range(self.replications) 807 ) 808 809 # Convert to DataFrames after sampling 810 self.residuals_sims_ = tuple( 811 pd.DataFrame( 812 sim, # Keep all columns 813 columns=target_cols, # Use original target column names 814 index=self.output_dates_, 815 ) 816 for sim in self.residuals_sims_ 817 ) 818 819 if self.type_pi in ("bootstrap", "scp-bootstrap", "scp2-bootstrap"): 820 assert self.replications is not None and isinstance( 821 self.replications, int 822 ), "'replications' must be provided and be an integer" 823 if self.verbose == 1: 824 self.residuals_sims_ = tuple( 825 ts.bootstrap( 826 self.residuals_, 827 h=h, 828 block_size=None, 829 seed=self.seed + 100 * i, 830 ) 831 for i in tqdm(range(self.replications)) 832 ) 833 elif self.verbose == 0: 834 self.residuals_sims_ = tuple( 835 ts.bootstrap( 836 self.residuals_, 837 h=h, 838 block_size=None, 839 seed=self.seed + 100 * i, 840 ) 841 for i in range(self.replications) 842 ) 843 844 if self.type_pi in ( 845 "block-bootstrap", 846 "scp-block-bootstrap", 847 "scp2-block-bootstrap", 848 ): 849 if self.block_size is None: 850 self.block_size = int( 851 np.ceil(3.15 * (self.residuals_.shape[0] ** (1 / 3))) 852 ) 853 854 assert self.replications is not None and isinstance( 855 self.replications, int 856 ), "'replications' must be provided and be an integer" 857 if self.verbose == 1: 858 self.residuals_sims_ = tuple( 859 ts.bootstrap( 860 self.residuals_, 861 h=h, 862 block_size=self.block_size, 863 seed=self.seed + 100 * i, 864 ) 865 for i in tqdm(range(self.replications)) 866 ) 867 elif self.verbose == 0: 868 self.residuals_sims_ = tuple( 869 ts.bootstrap( 870 self.residuals_, 871 h=h, 872 block_size=self.block_size, 873 seed=self.seed + 100 * i, 874 ) 875 for i in range(self.replications) 876 ) 877 878 if "vine" in self.type_pi: 879 if self.verbose == 1: 880 self.residuals_sims_ = tuple( 881 vinecopula_sample( 882 x=self.residuals_, 883 n_samples=h, 884 method=self.type_pi, 885 random_state=self.seed + 100 * i, 886 ) 887 for i in tqdm(range(self.replications)) 888 ) 889 elif self.verbose == 0: 890 self.residuals_sims_ = tuple( 891 vinecopula_sample( 892 x=self.residuals_, 893 n_samples=h, 894 method=self.type_pi, 895 random_state=self.seed + 100 * i, 896 ) 897 for i in range(self.replications) 898 ) 899 900 mean_ = deepcopy(self.mean_) 901 902 for i in range(h): 903 904 new_obs = ts.reformat_response(mean_, self.lags) 905 new_X = new_obs.reshape(1, -1) 906 cooked_new_X = self.cook_test_set(new_X, **kwargs) 907 908 if "return_std" in kwargs: 909 self.preds_std_.append( 910 [ 911 np.asarray( 912 self.fit_objs_[i].predict( 913 cooked_new_X, return_std=True 914 )[1] 915 ).item() 916 for i in range(self.n_series) 917 ] 918 ) 919 920 if "return_pi" in kwargs: 921 for i in range(self.n_series): 922 preds_pi = self.fit_objs_[i].predict(cooked_new_X, **kwargs) 923 mean_pi_.append(preds_pi.mean[0]) 924 lower_pi_.append(preds_pi.lower[0]) 925 upper_pi_.append(preds_pi.upper[0]) 926 927 if self.type_pi != "quantile": 928 predicted_cooked_new_X = np.asarray( 929 [ 930 np.asarray( 931 self.fit_objs_[i].predict(cooked_new_X) 932 ).item() 933 for i in range(self.init_n_series_) 934 ] 935 ) 936 else: 937 predicted_cooked_new_X = np.asarray( 938 [ 939 np.asarray( 940 self.fit_objs_[i] 941 .predict(cooked_new_X, return_pi=True) 942 .upper 943 ).item() 944 for i in range(self.init_n_series_) 945 ] 946 ) 947 948 preds = np.asarray(y_means_ + predicted_cooked_new_X) 949 950 # Create full row with both predictions and external regressors 951 if self.xreg_ is not None and "xreg" in kwargs: 952 next_xreg = kwargs["xreg"].iloc[i: i + 1].values.flatten() 953 full_row = np.concatenate([preds, next_xreg]) 954 else: 955 full_row = preds 956 957 # Create a new row with same number of columns as mean_ 958 new_row = np.zeros((1, mean_.shape[1])) 959 new_row[0, : full_row.shape[0]] = full_row 960 961 # Maintain the full dimensionality by using vstack instead of rbind 962 mean_ = np.vstack([new_row, mean_[:-1]]) 963 964 # Final output should only include the target columns 965 self.mean_ = pd.DataFrame( 966 mean_[0:h, : self.init_n_series_][::-1], 967 columns=self.df_.columns[: self.init_n_series_], 968 index=self.output_dates_, 969 ) 970 971 # function's return ---------------------------------------------------------------------- 972 if ( 973 (("return_std" not in kwargs) and ("return_pi" not in kwargs)) 974 and (self.type_pi not in ("gaussian", "scp")) 975 ) or ("vine" in self.type_pi): 976 977 if self.replications is None: 978 return self.mean_.iloc[:, : self.init_n_series_] 979 980 # if "return_std" not in kwargs and self.replications is not None 981 meanf = [] 982 medianf = [] 983 lower = [] 984 upper = [] 985 986 if "scp2" in self.type_pi: 987 988 if self.verbose == 1: 989 self.sims_ = tuple( 990 ( 991 self.mean_ 992 + self.residuals_sims_[i] 993 * self.residuals_std_dev_[np.newaxis, :] 994 for i in tqdm(range(self.replications)) 995 ) 996 ) 997 elif self.verbose == 0: 998 self.sims_ = tuple( 999 ( 1000 self.mean_ 1001 + self.residuals_sims_[i] 1002 * self.residuals_std_dev_[np.newaxis, :] 1003 for i in range(self.replications) 1004 ) 1005 ) 1006 else: 1007 1008 if self.verbose == 1: 1009 self.sims_ = tuple( 1010 ( 1011 self.mean_ + self.residuals_sims_[i] 1012 for i in tqdm(range(self.replications)) 1013 ) 1014 ) 1015 elif self.verbose == 0: 1016 self.sims_ = tuple( 1017 ( 1018 self.mean_ + self.residuals_sims_[i] 1019 for i in range(self.replications) 1020 ) 1021 ) 1022 1023 DescribeResult = namedtuple( 1024 "DescribeResult", ("mean", "sims", "lower", "upper") 1025 ) 1026 for ix in range(self.init_n_series_): 1027 sims_ix = getsims(self.sims_, ix) 1028 if self.agg == "mean": 1029 meanf.append(np.mean(sims_ix, axis=1)) 1030 else: 1031 medianf.append(np.median(sims_ix, axis=1)) 1032 lower.append(np.quantile(sims_ix, q=self.alpha_ / 200, axis=1)) 1033 upper.append( 1034 np.quantile(sims_ix, q=1 - self.alpha_ / 200, axis=1) 1035 ) 1036 self.mean_ = pd.DataFrame( 1037 np.asarray(meanf).T, 1038 columns=self.series_names[ 1039 : self.init_n_series_ 1040 ], # self.df_.columns, 1041 index=self.output_dates_, 1042 ) 1043 1044 self.lower_ = pd.DataFrame( 1045 np.asarray(lower).T, 1046 columns=self.series_names[ 1047 : self.init_n_series_ 1048 ], # self.df_.columns, 1049 index=self.output_dates_, 1050 ) 1051 1052 self.upper_ = pd.DataFrame( 1053 np.asarray(upper).T, 1054 columns=self.series_names[ 1055 : self.init_n_series_ 1056 ], # self.df_.columns, 1057 index=self.output_dates_, 1058 ) 1059 1060 try: 1061 self.median_ = pd.DataFrame( 1062 np.asarray(medianf).T, 1063 columns=self.series_names[ 1064 : self.init_n_series_ 1065 ], # self.df_.columns, 1066 index=self.output_dates_, 1067 ) 1068 except Exception as e: 1069 pass 1070 1071 return DescribeResult( 1072 self.mean_, self.sims_, self.lower_, self.upper_ 1073 ) 1074 1075 if ( 1076 (("return_std" in kwargs) or ("return_pi" in kwargs)) 1077 and (self.type_pi not in ("gaussian", "scp")) 1078 ) or "vine" in self.type_pi: 1079 DescribeResult = namedtuple( 1080 "DescribeResult", ("mean", "lower", "upper") 1081 ) 1082 1083 self.mean_ = pd.DataFrame( 1084 np.asarray(self.mean_), 1085 columns=self.series_names, # self.df_.columns, 1086 index=self.output_dates_, 1087 ) 1088 1089 if "return_std" in kwargs: 1090 1091 self.preds_std_ = np.asarray(self.preds_std_) 1092 1093 self.lower_ = pd.DataFrame( 1094 self.mean_.values - pi_multiplier * self.preds_std_, 1095 columns=self.series_names, # self.df_.columns, 1096 index=self.output_dates_, 1097 ) 1098 1099 self.upper_ = pd.DataFrame( 1100 self.mean_.values + pi_multiplier * self.preds_std_, 1101 columns=self.series_names, # self.df_.columns, 1102 index=self.output_dates_, 1103 ) 1104 1105 if "return_pi" in kwargs: 1106 1107 self.lower_ = pd.DataFrame( 1108 np.asarray(lower_pi_).reshape(h, self.n_series) 1109 + y_means_[np.newaxis, :], 1110 columns=self.series_names, # self.df_.columns, 1111 index=self.output_dates_, 1112 ) 1113 1114 self.upper_ = pd.DataFrame( 1115 np.asarray(upper_pi_).reshape(h, self.n_series) 1116 + y_means_[np.newaxis, :], 1117 columns=self.series_names, # self.df_.columns, 1118 index=self.output_dates_, 1119 ) 1120 1121 res = DescribeResult(self.mean_, self.lower_, self.upper_) 1122 1123 if self.xreg_ is not None: 1124 if len(self.xreg_.shape) > 1: 1125 res2 = mx.tuple_map( 1126 res, 1127 lambda x: mo.delete_last_columns( 1128 x, num_columns=self.xreg_.shape[1] 1129 ), 1130 ) 1131 else: 1132 res2 = mx.tuple_map( 1133 res, lambda x: mo.delete_last_columns(x, num_columns=1) 1134 ) 1135 return DescribeResult(res2[0], res2[1], res2[2]) 1136 1137 return res 1138 1139 if self.type_pi == "gaussian": 1140 1141 DescribeResult = namedtuple( 1142 "DescribeResult", ("mean", "lower", "upper") 1143 ) 1144 1145 self.mean_ = pd.DataFrame( 1146 np.asarray(self.mean_), 1147 columns=self.series_names, # self.df_.columns, 1148 index=self.output_dates_, 1149 ) 1150 1151 self.lower_ = pd.DataFrame( 1152 self.mean_.values - pi_multiplier * self.gaussian_preds_std_, 1153 columns=self.series_names, # self.df_.columns, 1154 index=self.output_dates_, 1155 ) 1156 1157 self.upper_ = pd.DataFrame( 1158 self.mean_.values + pi_multiplier * self.gaussian_preds_std_, 1159 columns=self.series_names, # self.df_.columns, 1160 index=self.output_dates_, 1161 ) 1162 1163 res = DescribeResult(self.mean_, self.lower_, self.upper_) 1164 1165 if self.xreg_ is not None: 1166 if len(self.xreg_.shape) > 1: 1167 res2 = mx.tuple_map( 1168 res, 1169 lambda x: mo.delete_last_columns( 1170 x, num_columns=self.xreg_.shape[1] 1171 ), 1172 ) 1173 else: 1174 res2 = mx.tuple_map( 1175 res, lambda x: mo.delete_last_columns(x, num_columns=1) 1176 ) 1177 return DescribeResult(res2[0], res2[1], res2[2]) 1178 1179 return res 1180 1181 if self.type_pi == "quantile": 1182 1183 DescribeResult = namedtuple("DescribeResult", ("mean")) 1184 1185 self.mean_ = pd.DataFrame( 1186 np.asarray(self.mean_), 1187 columns=self.series_names, # self.df_.columns, 1188 index=self.output_dates_, 1189 ) 1190 1191 res = DescribeResult(self.mean_) 1192 1193 if self.xreg_ is not None: 1194 if len(self.xreg_.shape) > 1: 1195 res2 = mx.tuple_map( 1196 res, 1197 lambda x: mo.delete_last_columns( 1198 x, num_columns=self.xreg_.shape[1] 1199 ), 1200 ) 1201 else: 1202 res2 = mx.tuple_map( 1203 res, lambda x: mo.delete_last_columns(x, num_columns=1) 1204 ) 1205 return DescribeResult(res2[0]) 1206 1207 return res 1208 1209 # After prediction loop, ensure sims only contain target columns 1210 if self.sims_ is not None: 1211 if self.verbose == 1: 1212 self.sims_ = tuple( 1213 sim[:h,] # Only keep target columns and h rows 1214 for sim in tqdm(self.sims_) 1215 ) 1216 elif self.verbose == 0: 1217 self.sims_ = tuple( 1218 sim[:h,] # Only keep target columns and h rows 1219 for sim in self.sims_ 1220 ) 1221 1222 # Convert numpy arrays to DataFrames with proper columns 1223 self.sims_ = tuple( 1224 pd.DataFrame( 1225 sim, 1226 columns=self.df_.columns[: self.init_n_series_], 1227 index=self.output_dates_, 1228 ) 1229 for sim in self.sims_ 1230 ) 1231 1232 if self.type_pi in ( 1233 "kde", 1234 "bootstrap", 1235 "block-bootstrap", 1236 "vine-copula", 1237 ): 1238 if self.xreg_ is not None: 1239 # Use getsimsxreg when external regressors are present 1240 target_cols = self.df_.columns[: self.init_n_series_] 1241 self.sims_ = getsimsxreg( 1242 self.sims_, self.output_dates_, target_cols 1243 ) 1244 else: 1245 # Use original getsims for backward compatibility 1246 self.sims_ = getsims(self.sims_)
Forecast all the time series, h steps ahead
1313 def score( 1314 self, 1315 X, 1316 training_index, 1317 testing_index, 1318 scoring=None, 1319 alpha=0.5, 1320 **kwargs, 1321 ): 1322 """Train on training_index, score on testing_index.""" 1323 1324 assert ( 1325 bool(set(training_index).intersection(set(testing_index))) == False 1326 ), "Non-overlapping 'training_index' and 'testing_index' required" 1327 1328 # Dimensions 1329 try: 1330 # multivariate time series 1331 n, p = X.shape 1332 except: 1333 # univariate time series 1334 n = X.shape[0] 1335 p = 1 1336 1337 # Training and testing sets 1338 if p > 1: 1339 X_train = X[training_index, :] 1340 X_test = X[testing_index, :] 1341 else: 1342 X_train = X[training_index] 1343 X_test = X[testing_index] 1344 1345 # Horizon 1346 h = len(testing_index) 1347 assert ( 1348 len(training_index) + h 1349 ) <= n, "Please check lengths of training and testing windows" 1350 1351 # Fit and predict 1352 self.fit(X_train, **kwargs) 1353 preds = self.predict(h=h, **kwargs) 1354 1355 if scoring is None: 1356 scoring = "neg_root_mean_squared_error" 1357 1358 if scoring == "pinball": 1359 # Predict requested quantile 1360 q_pred = self.predict(h=h, quantiles=[alpha], **kwargs) 1361 # Handle multivariate 1362 scores = [] 1363 for j in range(p): 1364 series_name = getattr(self, "series_names", [f"Series_{j}"])[j] 1365 q_label = ( 1366 f"{int(alpha * 100):02d}" 1367 if (alpha * 100).is_integer() 1368 else f"{alpha:.3f}".replace(".", "_") 1369 ) 1370 col = f"quantile_{q_label}_{series_name}" 1371 if col not in q_pred.columns: 1372 raise ValueError( 1373 f"Column '{col}' not found in quantile forecast output." 1374 ) 1375 y_true_j = X_test[:, j] 1376 y_pred_j = q_pred[col].values 1377 # Compute pinball loss for this series 1378 loss = mean_pinball_loss(y_true_j, y_pred_j, alpha=alpha) 1379 scores.append(loss) 1380 # Return average over series 1381 return np.mean(scores) 1382 1383 if scoring == "crps": 1384 # Ensure simulations exist 1385 preds = self.predict(h=h, **kwargs) # triggers self.sims_ 1386 # Extract simulations: list of DataFrames → (R, h, p) 1387 sims_vals = np.stack( 1388 [sim.values for sim in self.sims_], axis=0 1389 ) # (R, h, p) 1390 crps_scores = [] 1391 for j in range(p): 1392 y_true_j = X_test[:, j] 1393 sims_j = sims_vals[:, :, j] # (R, h) 1394 crps_j = self._crps_ensemble(np.asarray(y_true_j), sims_j) 1395 crps_scores.append(np.mean(crps_j)) # average over horizon 1396 return np.mean(crps_scores) # average over series 1397 1398 # check inputs 1399 assert scoring in ( 1400 "explained_variance", 1401 "neg_mean_absolute_error", 1402 "neg_mean_squared_error", 1403 "neg_root_mean_squared_error", 1404 "neg_mean_squared_log_error", 1405 "neg_median_absolute_error", 1406 "r2", 1407 ), "'scoring' should be in ('explained_variance', 'neg_mean_absolute_error', \ 1408 'neg_mean_squared_error', 'neg_root_mean_squared_error', 'neg_mean_squared_log_error', \ 1409 'neg_median_absolute_error', 'r2')" 1410 1411 scoring_options = { 1412 "explained_variance": skm2.explained_variance_score, 1413 "neg_mean_absolute_error": skm2.mean_absolute_error, 1414 "neg_mean_squared_error": lambda x, y: np.mean((x - y) ** 2), 1415 "neg_root_mean_squared_error": lambda x, y: np.sqrt( 1416 np.mean((x - y) ** 2) 1417 ), 1418 "neg_mean_squared_log_error": skm2.mean_squared_log_error, 1419 "neg_median_absolute_error": skm2.median_absolute_error, 1420 "r2": skm2.r2_score, 1421 } 1422 1423 return scoring_options[scoring](X_test, preds)
Train on training_index, score on testing_index.
16class MultitaskClassifier(Base, ClassifierMixin): 17 """Multitask Classification model based on regression models, with shared covariates 18 19 Parameters: 20 21 obj: object 22 any object (must be a regression model) containing a method fit (obj.fit()) 23 and a method predict (obj.predict()) 24 25 n_hidden_features: int 26 number of nodes in the hidden layer 27 28 activation_name: str 29 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 30 31 a: float 32 hyperparameter for 'prelu' or 'elu' activation function 33 34 nodes_sim: str 35 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 36 'uniform' 37 38 bias: boolean 39 indicates if the hidden layer contains a bias term (True) or not 40 (False) 41 42 dropout: float 43 regularization parameter; (random) percentage of nodes dropped out 44 of the training 45 46 direct_link: boolean 47 indicates if the original predictors are included (True) in model's 48 fitting or not (False) 49 50 n_clusters: int 51 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: 52 no clustering) 53 54 cluster_encode: bool 55 defines how the variable containing clusters is treated (default is one-hot) 56 if `False`, then labels are used, without one-hot encoding 57 58 type_clust: str 59 type of clustering method: currently k-means ('kmeans') or Gaussian 60 Mixture Model ('gmm') 61 62 type_scaling: a tuple of 3 strings 63 scaling methods for inputs, hidden layer, and clustering respectively 64 (and when relevant). 65 Currently available: standardization ('std') or MinMax scaling ('minmax') 66 67 col_sample: float 68 percentage of covariates randomly chosen for training 69 70 row_sample: float 71 percentage of rows chosen for training, by stratified bootstrapping 72 73 seed: int 74 reproducibility seed for nodes_sim=='uniform' 75 76 backend: str 77 "cpu" or "gpu" or "tpu" 78 79 Attributes: 80 81 fit_objs_: dict 82 objects adjusted to each individual time series 83 84 n_classes_: int 85 number of classes for the classifier 86 87 Examples: 88 89 See also [https://github.com/Techtonique/nnetsauce/blob/master/examples/mtask_classification.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/mtask_classification.py) 90 91 ```python 92 import nnetsauce as ns 93 import numpy as np 94 from sklearn.datasets import load_breast_cancer 95 from sklearn.linear_model import LinearRegression 96 from sklearn.model_selection import train_test_split 97 from sklearn import metrics 98 from time import time 99 100 breast_cancer = load_breast_cancer() 101 Z = breast_cancer.data 102 t = breast_cancer.target 103 104 X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2, 105 random_state=123+2*10) 106 107 # Linear Regression is used 108 regr = LinearRegression() 109 fit_obj = ns.MultitaskClassifier(regr, n_hidden_features=5, 110 n_clusters=2, type_clust="gmm") 111 112 start = time() 113 fit_obj.fit(X_train, y_train) 114 print(f"Elapsed {time() - start}") 115 116 print(fit_obj.score(X_test, y_test)) 117 print(fit_obj.score(X_test, y_test, scoring="roc_auc")) 118 119 start = time() 120 preds = fit_obj.predict(X_test) 121 print(f"Elapsed {time() - start}") 122 print(metrics.classification_report(preds, y_test)) 123 ``` 124 125 """ 126 127 # construct the object ----- 128 _estimator_type = "classifier" 129 130 def __init__( 131 self, 132 obj, 133 n_hidden_features=5, 134 activation_name="relu", 135 a=0.01, 136 nodes_sim="sobol", 137 bias=True, 138 dropout=0, 139 direct_link=True, 140 n_clusters=2, 141 cluster_encode=True, 142 type_clust="kmeans", 143 type_scaling=("std", "std", "std"), 144 col_sample=1, 145 row_sample=1, 146 seed=123, 147 backend="cpu", 148 ): 149 super().__init__( 150 n_hidden_features=n_hidden_features, 151 activation_name=activation_name, 152 a=a, 153 nodes_sim=nodes_sim, 154 bias=bias, 155 dropout=dropout, 156 direct_link=direct_link, 157 n_clusters=n_clusters, 158 cluster_encode=cluster_encode, 159 type_clust=type_clust, 160 type_scaling=type_scaling, 161 col_sample=col_sample, 162 row_sample=row_sample, 163 seed=seed, 164 backend=backend, 165 ) 166 167 self.type_fit = "classification" 168 self.obj = obj 169 self.fit_objs_ = {} 170 171 def fit(self, X, y, sample_weight=None, **kwargs): 172 """Fit MultitaskClassifier to training data (X, y). 173 174 Args: 175 176 X: {array-like}, shape = [n_samples, n_features] 177 Training vectors, where n_samples is the number 178 of samples and n_features is the number of features. 179 180 y: array-like, shape = [n_samples] 181 Target values. 182 183 **kwargs: additional parameters to be passed to 184 self.cook_training_set or self.obj.fit 185 186 Returns: 187 188 self: object 189 190 """ 191 192 assert mx.is_factor(y), "y must contain only integers" 193 194 output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 195 196 self.classes_ = np.unique(y) # for compatibility with sklearn 197 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 198 199 # multitask response 200 Y = mo.one_hot_encode2(output_y, self.n_classes_) 201 202 # if sample_weight is None: 203 for i in range(self.n_classes_): 204 self.fit_objs_[i] = deepcopy( 205 self.obj.fit(scaled_Z, Y[:, i], **kwargs) 206 ) 207 208 self.classes_ = np.unique(y) 209 return self 210 211 def predict(self, X, **kwargs): 212 """Predict test data X. 213 214 Args: 215 216 X: {array-like}, shape = [n_samples, n_features] 217 Training vectors, where n_samples is the number 218 of samples and n_features is the number of features. 219 220 **kwargs: additional parameters to be passed to 221 self.cook_test_set 222 223 Returns: 224 225 model predictions: {array-like} 226 227 """ 228 return np.argmax(self.predict_proba(X, **kwargs), axis=1) 229 230 def predict_proba(self, X, **kwargs): 231 """Predict probabilities for test data X. 232 233 Args: 234 235 X: {array-like}, shape = [n_samples, n_features] 236 Training vectors, where n_samples is the number 237 of samples and n_features is the number of features. 238 239 **kwargs: additional parameters to be passed to 240 self.cook_test_set 241 242 Returns: 243 244 probability estimates for test data: {array-like} 245 246 """ 247 248 shape_X = X.shape 249 250 probs = np.zeros((shape_X[0], self.n_classes_)) 251 252 if len(shape_X) == 1: 253 n_features = shape_X[0] 254 255 new_X = mo.rbind( 256 X.reshape(1, n_features), 257 np.ones(n_features).reshape(1, n_features), 258 ) 259 260 Z = self.cook_test_set(new_X, **kwargs) 261 262 # loop on all the classes 263 for i in range(self.n_classes_): 264 probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs)[0] 265 266 else: 267 Z = self.cook_test_set(X, **kwargs) 268 269 # loop on all the classes 270 for i in range(self.n_classes_): 271 probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs) 272 273 expit_raw_probs = expit(probs) 274 275 return expit_raw_probs / expit_raw_probs.sum(axis=1)[:, None] 276 277 def decision_function(self, X, **kwargs): 278 """Compute the decision function of X. 279 280 Parameters: 281 X: {array-like}, shape = [n_samples, n_features] 282 Samples to compute decision function for. 283 284 **kwargs: additional parameters to be passed to 285 self.cook_test_set 286 287 Returns: 288 array-like of shape (n_samples,) or (n_samples, n_classes) 289 Decision function of the input samples. The order of outputs is the same 290 as that of the classes passed to fit. 291 """ 292 if not hasattr(self.obj, "decision_function"): 293 # If base classifier doesn't have decision_function, use predict_proba 294 proba = self.predict_proba(X, **kwargs) 295 if proba.shape[1] == 2: 296 return proba[:, 1] # For binary classification 297 return proba # For multiclass 298 299 if len(X.shape) == 1: 300 n_features = X.shape[0] 301 new_X = mo.rbind( 302 X.reshape(1, n_features), 303 np.ones(n_features).reshape(1, n_features), 304 ) 305 306 return ( 307 self.obj.decision_function( 308 self.cook_test_set(new_X, **kwargs), **kwargs 309 ) 310 )[0] 311 312 return self.obj.decision_function( 313 self.cook_test_set(X, **kwargs), **kwargs 314 ) 315 316 @property 317 def _estimator_type(self): 318 return "classifier"
Multitask Classification model based on regression models, with shared covariates
Parameters:
obj: object
any object (must be a regression model) containing a method fit (obj.fit())
and a method predict (obj.predict())
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not
(False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original predictors are included (True) in model's
fitting or not (False)
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
col_sample: float
percentage of covariates randomly chosen for training
row_sample: float
percentage of rows chosen for training, by stratified bootstrapping
seed: int
reproducibility seed for nodes_sim=='uniform'
backend: str
"cpu" or "gpu" or "tpu"
Attributes:
fit_objs_: dict
objects adjusted to each individual time series
n_classes_: int
number of classes for the classifier
Examples:
See also https://github.com/Techtonique/nnetsauce/blob/master/examples/mtask_classification.py
import nnetsauce as ns
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics
from time import time
breast_cancer = load_breast_cancer()
Z = breast_cancer.data
t = breast_cancer.target
X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2,
random_state=123+2*10)
# Linear Regression is used
regr = LinearRegression()
fit_obj = ns.MultitaskClassifier(regr, n_hidden_features=5,
n_clusters=2, type_clust="gmm")
start = time()
fit_obj.fit(X_train, y_train)
print(f"Elapsed {time() - start}")
print(fit_obj.score(X_test, y_test))
print(fit_obj.score(X_test, y_test, scoring="roc_auc"))
start = time()
preds = fit_obj.predict(X_test)
print(f"Elapsed {time() - start}")
print(metrics.classification_report(preds, y_test))
171 def fit(self, X, y, sample_weight=None, **kwargs): 172 """Fit MultitaskClassifier to training data (X, y). 173 174 Args: 175 176 X: {array-like}, shape = [n_samples, n_features] 177 Training vectors, where n_samples is the number 178 of samples and n_features is the number of features. 179 180 y: array-like, shape = [n_samples] 181 Target values. 182 183 **kwargs: additional parameters to be passed to 184 self.cook_training_set or self.obj.fit 185 186 Returns: 187 188 self: object 189 190 """ 191 192 assert mx.is_factor(y), "y must contain only integers" 193 194 output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 195 196 self.classes_ = np.unique(y) # for compatibility with sklearn 197 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 198 199 # multitask response 200 Y = mo.one_hot_encode2(output_y, self.n_classes_) 201 202 # if sample_weight is None: 203 for i in range(self.n_classes_): 204 self.fit_objs_[i] = deepcopy( 205 self.obj.fit(scaled_Z, Y[:, i], **kwargs) 206 ) 207 208 self.classes_ = np.unique(y) 209 return self
Fit MultitaskClassifier to training data (X, y).
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
211 def predict(self, X, **kwargs): 212 """Predict test data X. 213 214 Args: 215 216 X: {array-like}, shape = [n_samples, n_features] 217 Training vectors, where n_samples is the number 218 of samples and n_features is the number of features. 219 220 **kwargs: additional parameters to be passed to 221 self.cook_test_set 222 223 Returns: 224 225 model predictions: {array-like} 226 227 """ 228 return np.argmax(self.predict_proba(X, **kwargs), axis=1)
Predict test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
230 def predict_proba(self, X, **kwargs): 231 """Predict probabilities for test data X. 232 233 Args: 234 235 X: {array-like}, shape = [n_samples, n_features] 236 Training vectors, where n_samples is the number 237 of samples and n_features is the number of features. 238 239 **kwargs: additional parameters to be passed to 240 self.cook_test_set 241 242 Returns: 243 244 probability estimates for test data: {array-like} 245 246 """ 247 248 shape_X = X.shape 249 250 probs = np.zeros((shape_X[0], self.n_classes_)) 251 252 if len(shape_X) == 1: 253 n_features = shape_X[0] 254 255 new_X = mo.rbind( 256 X.reshape(1, n_features), 257 np.ones(n_features).reshape(1, n_features), 258 ) 259 260 Z = self.cook_test_set(new_X, **kwargs) 261 262 # loop on all the classes 263 for i in range(self.n_classes_): 264 probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs)[0] 265 266 else: 267 Z = self.cook_test_set(X, **kwargs) 268 269 # loop on all the classes 270 for i in range(self.n_classes_): 271 probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs) 272 273 expit_raw_probs = expit(probs) 274 275 return expit_raw_probs / expit_raw_probs.sum(axis=1)[:, None]
Predict probabilities for test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
probability estimates for test data: {array-like}
110class NeuralNetRegressor(BaseEstimator, RegressorMixin): 111 """ 112 (Pretrained) Neural Network Regressor. 113 114 Parameters: 115 116 hidden_layer_sizes : tuple, default=(100,) 117 The number of neurons in each hidden layer. 118 max_iter : int, default=100 119 The maximum number of iterations to train the model. 120 learning_rate : float, default=0.01 121 The learning rate for the optimizer. 122 l1_ratio : float, default=0.5 123 The ratio of L1 regularization. 124 alpha : float, default=1e-6 125 The regularization parameter. 126 activation_name : str, default="relu" 127 The activation function to use. 128 dropout : float, default=0.0 129 The dropout rate. 130 random_state : int, default=None 131 The random state for the random number generator. 132 weights : list, default=None 133 The weights to initialize the model with. 134 135 Attributes: 136 137 weights : list 138 The weights of the model. 139 params : list 140 The parameters of the model. 141 scaler_ : sklearn.preprocessing.StandardScaler 142 The scaler used to standardize the input features. 143 y_mean_ : float 144 The mean of the target variable. 145 146 Methods: 147 148 fit(X, y) 149 Fit the model to the data. 150 predict(X) 151 Predict the target variable. 152 get_weights() 153 Get the weights of the model. 154 set_weights(weights) 155 Set the weights of the model. 156 """ 157 158 def __init__( 159 self, 160 hidden_layer_sizes=None, 161 max_iter=100, 162 learning_rate=0.01, 163 l1_ratio=0.5, 164 alpha=1e-6, 165 activation_name="relu", 166 dropout=0, 167 weights=None, 168 random_state=None, 169 ): 170 if weights is None and hidden_layer_sizes is None: 171 hidden_layer_sizes = (100,) # default value if neither is provided 172 self.hidden_layer_sizes = hidden_layer_sizes 173 self.max_iter = max_iter 174 self.learning_rate = learning_rate 175 self.l1_ratio = l1_ratio 176 self.alpha = alpha 177 self.activation_name = activation_name 178 self.dropout = dropout 179 self.weights = weights 180 self.random_state = random_state 181 self.params = None 182 self.scaler_ = StandardScaler() 183 self.y_mean_ = None 184 185 def _validate_weights(self, input_dim): 186 """Validate that weights dimensions are coherent.""" 187 if not self.weights: 188 return False 189 190 try: 191 # Check each layer's weights and biases 192 prev_dim = input_dim 193 for W, b in self.weights: 194 # Check weight matrix dimensions 195 if W.shape[0] != prev_dim: 196 raise ValueError( 197 f"Weight matrix input dimension {W.shape[0]} does not match, previous layer output dimension {prev_dim}" 198 ) 199 # Check bias dimension matches weight matrix output 200 if W.shape[1] != b.shape[0]: 201 raise ValueError( 202 f"Bias dimension {b.shape[0]} does not match weight matrix, output dimension {W.shape[1]}" 203 ) 204 prev_dim = W.shape[1] 205 206 # Check final output dimension is 1 for regression 207 if prev_dim != 1: 208 raise ValueError( 209 f"Final layer output dimension {prev_dim} must be 1 for regression" 210 ) 211 212 return True 213 except (AttributeError, IndexError): 214 raise ValueError( 215 "Weights format is invalid. Expected list of (weight, bias) tuples" 216 ) 217 218 def fit(self, X, y): 219 # Standardize the input features 220 X = self.scaler_.fit_transform(X) 221 # Ensure y is 2D for consistency 222 y = y.reshape(-1, 1) 223 self.y_mean_ = jnp.mean(y) 224 y = y - self.y_mean_ 225 # Validate or initialize weights 226 if self.weights is not None: 227 if self._validate_weights(X.shape[1]): 228 self.params = self.weights 229 else: 230 if self.hidden_layer_sizes is None: 231 raise ValueError( 232 "Either weights or hidden_layer_sizes must be provided" 233 ) 234 self.params = initialize_params( 235 X.shape[1], self.hidden_layer_sizes, self.random_state 236 ) 237 loss_fn = partial(loss, l1_ratio=self.l1_ratio, alpha=self.alpha) 238 grad_loss = jit(grad(loss_fn)) # compiled gradient evaluation function 239 perex_grads = jit( 240 vmap(grad_loss, in_axes=(None, 0, 0)) 241 ) # fast per-example grads 242 # Training loop 243 for _ in range(self.max_iter): 244 grads = perex_grads(self.params, X, y) 245 # Average gradients across examples 246 grads = jax.tree_map(lambda g: jnp.mean(g, axis=0), grads) 247 # Update parameters 248 self.params = [ 249 (W - self.learning_rate * dW, b - self.learning_rate * db) 250 for (W, b), (dW, db) in zip(self.params, grads) 251 ] 252 # Store final weights 253 self.weights = self.params 254 return self 255 256 def get_weights(self): 257 """Return the current weights of the model.""" 258 if self.weights is None: 259 raise ValueError( 260 "No weights available. Model has not been fitted yet." 261 ) 262 return self.weights 263 264 def set_weights(self, weights): 265 """Set the weights of the model manually.""" 266 self.weights = weights 267 self.params = weights 268 269 def predict(self, X): 270 X = self.scaler_.transform(X) 271 if self.params is None: 272 raise ValueError("Model has not been fitted yet.") 273 predictions = predict_internal( 274 self.params, 275 X, 276 activation_func=self.activation_name, 277 dropout=self.dropout, 278 seed=self.random_state, 279 ) 280 return predictions.reshape(-1) + self.y_mean_
(Pretrained) Neural Network Regressor.
Parameters:
hidden_layer_sizes : tuple, default=(100,)
The number of neurons in each hidden layer.
max_iter : int, default=100
The maximum number of iterations to train the model.
learning_rate : float, default=0.01
The learning rate for the optimizer.
l1_ratio : float, default=0.5
The ratio of L1 regularization.
alpha : float, default=1e-6
The regularization parameter.
activation_name : str, default="relu"
The activation function to use.
dropout : float, default=0.0
The dropout rate.
random_state : int, default=None
The random state for the random number generator.
weights : list, default=None
The weights to initialize the model with.
Attributes:
weights : list
The weights of the model.
params : list
The parameters of the model.
scaler_ : sklearn.preprocessing.StandardScaler
The scaler used to standardize the input features.
y_mean_ : float
The mean of the target variable.
Methods:
fit(X, y)
Fit the model to the data.
predict(X)
Predict the target variable.
get_weights()
Get the weights of the model.
set_weights(weights)
Set the weights of the model.
218 def fit(self, X, y): 219 # Standardize the input features 220 X = self.scaler_.fit_transform(X) 221 # Ensure y is 2D for consistency 222 y = y.reshape(-1, 1) 223 self.y_mean_ = jnp.mean(y) 224 y = y - self.y_mean_ 225 # Validate or initialize weights 226 if self.weights is not None: 227 if self._validate_weights(X.shape[1]): 228 self.params = self.weights 229 else: 230 if self.hidden_layer_sizes is None: 231 raise ValueError( 232 "Either weights or hidden_layer_sizes must be provided" 233 ) 234 self.params = initialize_params( 235 X.shape[1], self.hidden_layer_sizes, self.random_state 236 ) 237 loss_fn = partial(loss, l1_ratio=self.l1_ratio, alpha=self.alpha) 238 grad_loss = jit(grad(loss_fn)) # compiled gradient evaluation function 239 perex_grads = jit( 240 vmap(grad_loss, in_axes=(None, 0, 0)) 241 ) # fast per-example grads 242 # Training loop 243 for _ in range(self.max_iter): 244 grads = perex_grads(self.params, X, y) 245 # Average gradients across examples 246 grads = jax.tree_map(lambda g: jnp.mean(g, axis=0), grads) 247 # Update parameters 248 self.params = [ 249 (W - self.learning_rate * dW, b - self.learning_rate * db) 250 for (W, b), (dW, db) in zip(self.params, grads) 251 ] 252 # Store final weights 253 self.weights = self.params 254 return self
269 def predict(self, X): 270 X = self.scaler_.transform(X) 271 if self.params is None: 272 raise ValueError("Model has not been fitted yet.") 273 predictions = predict_internal( 274 self.params, 275 X, 276 activation_func=self.activation_name, 277 dropout=self.dropout, 278 seed=self.random_state, 279 ) 280 return predictions.reshape(-1) + self.y_mean_
10class NeuralNetClassifier(BaseEstimator, ClassifierMixin): 11 """ 12 (Pretrained) Neural Network Classifier. 13 14 Parameters: 15 16 hidden_layer_sizes : tuple, default=(100,) 17 The number of neurons in each hidden layer. 18 max_iter : int, default=100 19 The maximum number of iterations to train the model. 20 learning_rate : float, default=0.01 21 The learning rate for the optimizer. 22 l1_ratio : float, default=0.5 23 The ratio of L1 regularization. 24 alpha : float, default=1e-6 25 The regularization parameter. 26 activation_name : str, default="relu" 27 The activation function to use. 28 dropout : float, default=0.0 29 The dropout rate. 30 random_state : int, default=None 31 The random state for the random number generator. 32 weights : list, default=None 33 The weights to initialize the model with. 34 35 Attributes: 36 37 weights : list 38 The weights of the model. 39 params : list 40 The parameters of the model. 41 scaler_ : sklearn.preprocessing.StandardScaler 42 The scaler used to standardize the input features. 43 y_mean_ : float 44 The mean of the target variable. 45 46 Methods: 47 48 fit(X, y) 49 Fit the model to the data. 50 predict(X) 51 Predict the target variable. 52 predict_proba(X) 53 Predict the probability of the target variable. 54 get_weights() 55 Get the weights of the model. 56 set_weights(weights) 57 Set the weights of the model. 58 """ 59 60 _estimator_type = "classifier" 61 62 def __init__( 63 self, 64 hidden_layer_sizes=(100,), 65 max_iter=100, 66 learning_rate=0.01, 67 weights=None, 68 l1_ratio=0.5, 69 alpha=1e-6, 70 activation_name="relu", 71 dropout=0.0, 72 random_state=None, 73 ): 74 self.hidden_layer_sizes = hidden_layer_sizes 75 self.max_iter = max_iter 76 self.learning_rate = learning_rate 77 self.weights = weights 78 self.l1_ratio = l1_ratio 79 self.alpha = alpha 80 self.activation_name = activation_name 81 self.dropout = dropout 82 self.random_state = random_state 83 self.regr = None 84 85 def fit(self, X, y): 86 """Fit the model to the data. 87 88 Parameters: 89 90 X: {array-like}, shape = [n_samples, n_features] 91 Training vectors, where n_samples is the number of samples and 92 n_features is the number of features. 93 y: array-like, shape = [n_samples] 94 Target values. 95 """ 96 regressor = NeuralNetRegressor( 97 hidden_layer_sizes=self.hidden_layer_sizes, 98 max_iter=self.max_iter, 99 learning_rate=self.learning_rate, 100 weights=self.weights, 101 l1_ratio=self.l1_ratio, 102 alpha=self.alpha, 103 activation_name=self.activation_name, 104 dropout=self.dropout, 105 random_state=self.random_state, 106 ) 107 self.regr = SimpleMultitaskClassifier(regressor) 108 self.regr.fit(X, y) 109 self.classes_ = np.unique(y) 110 self.n_classes_ = len(self.classes_) 111 self.n_tasks_ = 1 112 self.n_features_in_ = X.shape[1] 113 self.n_outputs_ = 1 114 self.n_samples_fit_ = X.shape[0] 115 self.n_samples_test_ = X.shape[0] 116 self.n_features_out_ = 1 117 self.n_outputs_ = 1 118 self.n_features_in_ = X.shape[1] 119 self.n_features_out_ = 1 120 self.n_outputs_ = 1 121 return self 122 123 def predict_proba(self, X): 124 """Predict the probability of the target variable. 125 126 Parameters: 127 128 X: {array-like}, shape = [n_samples, n_features] 129 Training vectors, where n_samples is the number of samples and 130 n_features is the number of features. 131 """ 132 return self.regr.predict_proba(X) 133 134 def predict(self, X): 135 """Predict the target variable. 136 137 Parameters: 138 139 X: {array-like}, shape = [n_samples, n_features] 140 Training vectors, where n_samples is the number of samples and 141 n_features is the number of features. 142 """ 143 return self.regr.predict(X) 144 145 @property 146 def _estimator_type(self): 147 return "classifier"
(Pretrained) Neural Network Classifier.
Parameters:
hidden_layer_sizes : tuple, default=(100,)
The number of neurons in each hidden layer.
max_iter : int, default=100
The maximum number of iterations to train the model.
learning_rate : float, default=0.01
The learning rate for the optimizer.
l1_ratio : float, default=0.5
The ratio of L1 regularization.
alpha : float, default=1e-6
The regularization parameter.
activation_name : str, default="relu"
The activation function to use.
dropout : float, default=0.0
The dropout rate.
random_state : int, default=None
The random state for the random number generator.
weights : list, default=None
The weights to initialize the model with.
Attributes:
weights : list
The weights of the model.
params : list
The parameters of the model.
scaler_ : sklearn.preprocessing.StandardScaler
The scaler used to standardize the input features.
y_mean_ : float
The mean of the target variable.
Methods:
fit(X, y)
Fit the model to the data.
predict(X)
Predict the target variable.
predict_proba(X)
Predict the probability of the target variable.
get_weights()
Get the weights of the model.
set_weights(weights)
Set the weights of the model.
85 def fit(self, X, y): 86 """Fit the model to the data. 87 88 Parameters: 89 90 X: {array-like}, shape = [n_samples, n_features] 91 Training vectors, where n_samples is the number of samples and 92 n_features is the number of features. 93 y: array-like, shape = [n_samples] 94 Target values. 95 """ 96 regressor = NeuralNetRegressor( 97 hidden_layer_sizes=self.hidden_layer_sizes, 98 max_iter=self.max_iter, 99 learning_rate=self.learning_rate, 100 weights=self.weights, 101 l1_ratio=self.l1_ratio, 102 alpha=self.alpha, 103 activation_name=self.activation_name, 104 dropout=self.dropout, 105 random_state=self.random_state, 106 ) 107 self.regr = SimpleMultitaskClassifier(regressor) 108 self.regr.fit(X, y) 109 self.classes_ = np.unique(y) 110 self.n_classes_ = len(self.classes_) 111 self.n_tasks_ = 1 112 self.n_features_in_ = X.shape[1] 113 self.n_outputs_ = 1 114 self.n_samples_fit_ = X.shape[0] 115 self.n_samples_test_ = X.shape[0] 116 self.n_features_out_ = 1 117 self.n_outputs_ = 1 118 self.n_features_in_ = X.shape[1] 119 self.n_features_out_ = 1 120 self.n_outputs_ = 1 121 return self
Fit the model to the data.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number of samples and
n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
123 def predict_proba(self, X): 124 """Predict the probability of the target variable. 125 126 Parameters: 127 128 X: {array-like}, shape = [n_samples, n_features] 129 Training vectors, where n_samples is the number of samples and 130 n_features is the number of features. 131 """ 132 return self.regr.predict_proba(X)
Predict the probability of the target variable.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number of samples and
n_features is the number of features.
134 def predict(self, X): 135 """Predict the target variable. 136 137 Parameters: 138 139 X: {array-like}, shape = [n_samples, n_features] 140 Training vectors, where n_samples is the number of samples and 141 n_features is the number of features. 142 """ 143 return self.regr.predict(X)
Predict the target variable.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number of samples and
n_features is the number of features.
19class PredictionInterval(BaseEstimator, RegressorMixin): 20 """Class PredictionInterval: Obtain prediction intervals. 21 22 Attributes: 23 24 obj: an object; 25 fitted object containing methods `fit` and `predict` 26 27 method: a string; 28 method for constructing the prediction intervals. 29 Currently "splitconformal" (default) and "localconformal" 30 31 level: a float; 32 Confidence level for prediction intervals. Default is 95, 33 equivalent to a miscoverage error of 5 (%) 34 35 replications: an integer; 36 Number of replications for simulated conformal (default is `None`), 37 for type_pi = "bootstrap" or "kde" 38 39 type_pi: a string; 40 type of prediction interval: currently `None` 41 (split conformal without simulation), "kde" or "bootstrap" 42 43 type_split: a string; 44 "random" (random split of data) or "sequential" (sequential split of data) 45 46 seed: an integer; 47 Reproducibility of fit (there's a random split between fitting and calibration data) 48 """ 49 50 def __init__( 51 self, 52 obj, 53 method="splitconformal", 54 level=95, 55 type_pi=None, 56 type_split="random", 57 replications=None, 58 kernel=None, 59 agg="mean", 60 seed=123, 61 ): 62 63 self.obj = obj 64 self.method = method 65 self.level = level 66 self.type_pi = type_pi 67 self.type_split = type_split 68 self.replications = replications 69 self.kernel = kernel 70 self.agg = agg 71 self.seed = seed 72 self.alpha_ = 1 - self.level / 100 73 self.quantile_ = None 74 self.icp_ = None 75 self.calibrated_residuals_ = None 76 self.scaled_calibrated_residuals_ = None 77 self.calibrated_residuals_scaler_ = None 78 self.kde_ = None 79 self.aic_ = None 80 self.aicc_ = None 81 self.bic_ = None 82 self.sse_ = None 83 84 def fit(self, X, y, sample_weight=None, **kwargs): 85 """Fit the `method` to training data (X, y). 86 87 Args: 88 89 X: array-like, shape = [n_samples, n_features]; 90 Training set vectors, where n_samples is the number 91 of samples and n_features is the number of features. 92 93 y: array-like, shape = [n_samples, ]; Target values. 94 95 sample_weight: array-like, shape = [n_samples] 96 Sample weights. 97 98 """ 99 100 if self.type_split == "random": 101 102 X_train, X_calibration, y_train, y_calibration = train_test_split( 103 X, y, test_size=0.5, random_state=self.seed 104 ) 105 106 elif self.type_split == "sequential": 107 108 n_x = X.shape[0] 109 n_x_half = n_x // 2 110 first_half_idx = range(0, n_x_half) 111 second_half_idx = range(n_x_half, n_x) 112 X_train = X[first_half_idx, :] 113 X_calibration = X[second_half_idx, :] 114 y_train = y[first_half_idx] 115 y_calibration = y[second_half_idx] 116 117 if self.method == "splitconformal": 118 119 self.obj.fit(X_train, y_train) 120 preds_calibration = self.obj.predict(X_calibration) 121 self.calibrated_residuals_ = y_calibration - preds_calibration 122 absolute_residuals = np.abs(self.calibrated_residuals_) 123 self.calibrated_residuals_scaler_ = StandardScaler( 124 with_mean=True, with_std=True 125 ) 126 self.scaled_calibrated_residuals_ = ( 127 self.calibrated_residuals_scaler_.fit_transform( 128 self.calibrated_residuals_.reshape(-1, 1) 129 ).ravel() 130 ) 131 try: 132 # numpy version >= 1.22 133 self.quantile_ = np.quantile( 134 a=absolute_residuals, q=self.level / 100, method="higher" 135 ) 136 except Exception: 137 # numpy version < 1.22 138 self.quantile_ = np.quantile( 139 a=absolute_residuals, 140 q=self.level / 100, 141 interpolation="higher", 142 ) 143 144 if self.method == "localconformal": 145 146 mad_estimator = ExtraTreesRegressor() 147 normalizer = RegressorNormalizer( 148 self.obj, mad_estimator, AbsErrorErrFunc() 149 ) 150 nc = RegressorNc(self.obj, AbsErrorErrFunc(), normalizer) 151 self.icp_ = IcpRegressor(nc) 152 self.icp_.fit(X_train, y_train) 153 self.icp_.calibrate(X_calibration, y_calibration) 154 155 # Calculate AIC 156 # Get predictions 157 preds = self.obj.predict(X_calibration) 158 159 # Calculate SSE 160 self.sse_ = np.sum((y_calibration - preds) ** 2) 161 162 # Get number of parameters from the base model 163 n_params = ( 164 getattr(self.obj, "n_hidden_features", 0) + X_calibration.shape[1] 165 ) 166 167 # Calculate AIC 168 n_samples = len(y_calibration) 169 temp = n_samples * np.log(self.sse_ / n_samples) 170 self.aic_ = temp + 2 * n_params 171 self.bic_ = temp + np.log(n_samples) * n_params 172 173 return self 174 175 def predict(self, X, return_pi=False): 176 """Obtain predictions and prediction intervals 177 178 Args: 179 180 X: array-like, shape = [n_samples, n_features]; 181 Testing set vectors, where n_samples is the number 182 of samples and n_features is the number of features. 183 184 return_pi: boolean 185 Whether the prediction interval is returned or not. 186 Default is False, for compatibility with other _estimators_. 187 If True, a tuple containing the predictions + lower and upper 188 bounds is returned. 189 190 """ 191 192 if self.method == "splitconformal": 193 pred = self.obj.predict(X) 194 195 if self.method == "localconformal": 196 pred = self.icp_.predict(X) 197 198 if self.method == "splitconformal": 199 200 if ( 201 self.replications is None and self.type_pi is None 202 ): # type_pi is not used here, no bootstrap or kde 203 204 if return_pi: 205 206 DescribeResult = namedtuple( 207 "DescribeResult", ("mean", "lower", "upper") 208 ) 209 return DescribeResult( 210 pred, pred - self.quantile_, pred + self.quantile_ 211 ) 212 213 else: 214 215 return pred 216 217 else: # self.method == "splitconformal" and if self.replications is not None, type_pi must be used 218 219 if self.type_pi is None: 220 self.type_pi = "kde" 221 raise Warning("type_pi must be set, setting to 'kde'") 222 223 if self.replications is None: 224 self.replications = 100 225 raise Warning("replications must be set, setting to 100") 226 227 assert self.type_pi in ( 228 "bootstrap", 229 "kde", 230 ), "`self.type_pi` must be in ('bootstrap', 'kde')" 231 232 if self.type_pi == "bootstrap": 233 np.random.seed(self.seed) 234 self.residuals_sims_ = np.asarray( 235 [ 236 np.random.choice( 237 a=self.scaled_calibrated_residuals_, 238 size=X.shape[0], 239 ) 240 for _ in range(self.replications) 241 ] 242 ).T 243 self.sims_ = np.asarray( 244 [ 245 pred 246 + self.calibrated_residuals_scaler_.scale_[0] 247 * self.residuals_sims_[:, i].ravel() 248 for i in range(self.replications) 249 ] 250 ).T 251 elif self.type_pi == "kde": 252 self.kde_ = gaussian_kde( 253 dataset=self.scaled_calibrated_residuals_ 254 ) 255 self.sims_ = np.asarray( 256 [ 257 pred 258 + self.calibrated_residuals_scaler_.scale_[0] 259 * self.kde_.resample( 260 size=X.shape[0], seed=self.seed + i 261 ).ravel() 262 for i in range(self.replications) 263 ] 264 ).T 265 266 self.mean_ = np.mean(self.sims_, axis=1) 267 self.lower_ = np.quantile( 268 self.sims_, q=self.alpha_ / 200, axis=1 269 ) 270 self.upper_ = np.quantile( 271 self.sims_, q=1 - self.alpha_ / 200, axis=1 272 ) 273 274 DescribeResult = namedtuple( 275 "DescribeResult", ("mean", "sims", "lower", "upper") 276 ) 277 278 return DescribeResult( 279 self.mean_, self.sims_, self.lower_, self.upper_ 280 ) 281 282 if self.method == "localconformal": 283 284 if self.replications is None: 285 286 if return_pi: 287 288 predictions_bounds = self.icp_.predict( 289 X, significance=1 - self.level 290 ) 291 DescribeResult = namedtuple( 292 "DescribeResult", ("mean", "lower", "upper") 293 ) 294 return DescribeResult( 295 pred, predictions_bounds[:, 0], predictions_bounds[:, 1] 296 ) 297 298 else: 299 300 return pred 301 302 else: # (self.method == "localconformal") and if self.replications is not None 303 304 raise NotImplementedError( 305 "When self.method == 'localconformal', there are no simulations" 306 )
Class PredictionInterval: Obtain prediction intervals.
Attributes:
obj: an object;
fitted object containing methods `fit` and `predict`
method: a string;
method for constructing the prediction intervals.
Currently "splitconformal" (default) and "localconformal"
level: a float;
Confidence level for prediction intervals. Default is 95,
equivalent to a miscoverage error of 5 (%)
replications: an integer;
Number of replications for simulated conformal (default is `None`),
for type_pi = "bootstrap" or "kde"
type_pi: a string;
type of prediction interval: currently `None`
(split conformal without simulation), "kde" or "bootstrap"
type_split: a string;
"random" (random split of data) or "sequential" (sequential split of data)
seed: an integer;
Reproducibility of fit (there's a random split between fitting and calibration data)
84 def fit(self, X, y, sample_weight=None, **kwargs): 85 """Fit the `method` to training data (X, y). 86 87 Args: 88 89 X: array-like, shape = [n_samples, n_features]; 90 Training set vectors, where n_samples is the number 91 of samples and n_features is the number of features. 92 93 y: array-like, shape = [n_samples, ]; Target values. 94 95 sample_weight: array-like, shape = [n_samples] 96 Sample weights. 97 98 """ 99 100 if self.type_split == "random": 101 102 X_train, X_calibration, y_train, y_calibration = train_test_split( 103 X, y, test_size=0.5, random_state=self.seed 104 ) 105 106 elif self.type_split == "sequential": 107 108 n_x = X.shape[0] 109 n_x_half = n_x // 2 110 first_half_idx = range(0, n_x_half) 111 second_half_idx = range(n_x_half, n_x) 112 X_train = X[first_half_idx, :] 113 X_calibration = X[second_half_idx, :] 114 y_train = y[first_half_idx] 115 y_calibration = y[second_half_idx] 116 117 if self.method == "splitconformal": 118 119 self.obj.fit(X_train, y_train) 120 preds_calibration = self.obj.predict(X_calibration) 121 self.calibrated_residuals_ = y_calibration - preds_calibration 122 absolute_residuals = np.abs(self.calibrated_residuals_) 123 self.calibrated_residuals_scaler_ = StandardScaler( 124 with_mean=True, with_std=True 125 ) 126 self.scaled_calibrated_residuals_ = ( 127 self.calibrated_residuals_scaler_.fit_transform( 128 self.calibrated_residuals_.reshape(-1, 1) 129 ).ravel() 130 ) 131 try: 132 # numpy version >= 1.22 133 self.quantile_ = np.quantile( 134 a=absolute_residuals, q=self.level / 100, method="higher" 135 ) 136 except Exception: 137 # numpy version < 1.22 138 self.quantile_ = np.quantile( 139 a=absolute_residuals, 140 q=self.level / 100, 141 interpolation="higher", 142 ) 143 144 if self.method == "localconformal": 145 146 mad_estimator = ExtraTreesRegressor() 147 normalizer = RegressorNormalizer( 148 self.obj, mad_estimator, AbsErrorErrFunc() 149 ) 150 nc = RegressorNc(self.obj, AbsErrorErrFunc(), normalizer) 151 self.icp_ = IcpRegressor(nc) 152 self.icp_.fit(X_train, y_train) 153 self.icp_.calibrate(X_calibration, y_calibration) 154 155 # Calculate AIC 156 # Get predictions 157 preds = self.obj.predict(X_calibration) 158 159 # Calculate SSE 160 self.sse_ = np.sum((y_calibration - preds) ** 2) 161 162 # Get number of parameters from the base model 163 n_params = ( 164 getattr(self.obj, "n_hidden_features", 0) + X_calibration.shape[1] 165 ) 166 167 # Calculate AIC 168 n_samples = len(y_calibration) 169 temp = n_samples * np.log(self.sse_ / n_samples) 170 self.aic_ = temp + 2 * n_params 171 self.bic_ = temp + np.log(n_samples) * n_params 172 173 return self
Fit the method
to training data (X, y).
Args:
X: array-like, shape = [n_samples, n_features];
Training set vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples, ]; Target values.
sample_weight: array-like, shape = [n_samples]
Sample weights.
175 def predict(self, X, return_pi=False): 176 """Obtain predictions and prediction intervals 177 178 Args: 179 180 X: array-like, shape = [n_samples, n_features]; 181 Testing set vectors, where n_samples is the number 182 of samples and n_features is the number of features. 183 184 return_pi: boolean 185 Whether the prediction interval is returned or not. 186 Default is False, for compatibility with other _estimators_. 187 If True, a tuple containing the predictions + lower and upper 188 bounds is returned. 189 190 """ 191 192 if self.method == "splitconformal": 193 pred = self.obj.predict(X) 194 195 if self.method == "localconformal": 196 pred = self.icp_.predict(X) 197 198 if self.method == "splitconformal": 199 200 if ( 201 self.replications is None and self.type_pi is None 202 ): # type_pi is not used here, no bootstrap or kde 203 204 if return_pi: 205 206 DescribeResult = namedtuple( 207 "DescribeResult", ("mean", "lower", "upper") 208 ) 209 return DescribeResult( 210 pred, pred - self.quantile_, pred + self.quantile_ 211 ) 212 213 else: 214 215 return pred 216 217 else: # self.method == "splitconformal" and if self.replications is not None, type_pi must be used 218 219 if self.type_pi is None: 220 self.type_pi = "kde" 221 raise Warning("type_pi must be set, setting to 'kde'") 222 223 if self.replications is None: 224 self.replications = 100 225 raise Warning("replications must be set, setting to 100") 226 227 assert self.type_pi in ( 228 "bootstrap", 229 "kde", 230 ), "`self.type_pi` must be in ('bootstrap', 'kde')" 231 232 if self.type_pi == "bootstrap": 233 np.random.seed(self.seed) 234 self.residuals_sims_ = np.asarray( 235 [ 236 np.random.choice( 237 a=self.scaled_calibrated_residuals_, 238 size=X.shape[0], 239 ) 240 for _ in range(self.replications) 241 ] 242 ).T 243 self.sims_ = np.asarray( 244 [ 245 pred 246 + self.calibrated_residuals_scaler_.scale_[0] 247 * self.residuals_sims_[:, i].ravel() 248 for i in range(self.replications) 249 ] 250 ).T 251 elif self.type_pi == "kde": 252 self.kde_ = gaussian_kde( 253 dataset=self.scaled_calibrated_residuals_ 254 ) 255 self.sims_ = np.asarray( 256 [ 257 pred 258 + self.calibrated_residuals_scaler_.scale_[0] 259 * self.kde_.resample( 260 size=X.shape[0], seed=self.seed + i 261 ).ravel() 262 for i in range(self.replications) 263 ] 264 ).T 265 266 self.mean_ = np.mean(self.sims_, axis=1) 267 self.lower_ = np.quantile( 268 self.sims_, q=self.alpha_ / 200, axis=1 269 ) 270 self.upper_ = np.quantile( 271 self.sims_, q=1 - self.alpha_ / 200, axis=1 272 ) 273 274 DescribeResult = namedtuple( 275 "DescribeResult", ("mean", "sims", "lower", "upper") 276 ) 277 278 return DescribeResult( 279 self.mean_, self.sims_, self.lower_, self.upper_ 280 ) 281 282 if self.method == "localconformal": 283 284 if self.replications is None: 285 286 if return_pi: 287 288 predictions_bounds = self.icp_.predict( 289 X, significance=1 - self.level 290 ) 291 DescribeResult = namedtuple( 292 "DescribeResult", ("mean", "lower", "upper") 293 ) 294 return DescribeResult( 295 pred, predictions_bounds[:, 0], predictions_bounds[:, 1] 296 ) 297 298 else: 299 300 return pred 301 302 else: # (self.method == "localconformal") and if self.replications is not None 303 304 raise NotImplementedError( 305 "When self.method == 'localconformal', there are no simulations" 306 )
Obtain predictions and prediction intervals
Args:
X: array-like, shape = [n_samples, n_features];
Testing set vectors, where n_samples is the number
of samples and n_features is the number of features.
return_pi: boolean
Whether the prediction interval is returned or not.
Default is False, for compatibility with other _estimators_.
If True, a tuple containing the predictions + lower and upper
bounds is returned.
18class SimpleMultitaskClassifier(Base, ClassifierMixin): 19 """Multitask Classification model based on regression models, with shared covariates 20 21 Parameters: 22 23 obj: object 24 any object (must be a regression model) containing a method fit (obj.fit()) 25 and a method predict (obj.predict()) 26 27 seed: int 28 reproducibility seed 29 30 Attributes: 31 32 fit_objs_: dict 33 objects adjusted to each individual time series 34 35 n_classes_: int 36 number of classes for the classifier 37 38 Examples: 39 40 ```python 41 import nnetsauce as ns 42 import numpy as np 43 from sklearn.datasets import load_breast_cancer 44 from sklearn.linear_model import LinearRegression 45 from sklearn.model_selection import train_test_split 46 from sklearn import metrics 47 from time import time 48 49 breast_cancer = load_breast_cancer() 50 Z = breast_cancer.data 51 t = breast_cancer.target 52 53 X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2, 54 random_state=123+2*10) 55 56 # Linear Regression is used 57 regr = LinearRegression() 58 fit_obj = ns.SimpleMultitaskClassifier(regr) 59 60 start = time() 61 fit_obj.fit(X_train, y_train) 62 print(f"Elapsed {time() - start}") 63 64 print(fit_obj.score(X_test, y_test)) 65 print(fit_obj.score(X_test, y_test, scoring="roc_auc")) 66 67 start = time() 68 preds = fit_obj.predict(X_test) 69 print(f"Elapsed {time() - start}") 70 print(metrics.classification_report(preds, y_test)) 71 ``` 72 73 """ 74 75 # construct the object ----- 76 _estimator_type = "classifier" 77 78 def __init__( 79 self, 80 obj, 81 ): 82 self.type_fit = "classification" 83 self.obj = obj 84 self.fit_objs_ = {} 85 self.X_scaler_ = StandardScaler() 86 self.scaled_X_ = None 87 88 def fit(self, X, y, sample_weight=None, **kwargs): 89 """Fit SimpleMultitaskClassifier to training data (X, y). 90 91 Args: 92 93 X: {array-like}, shape = [n_samples, n_features] 94 Training vectors, where n_samples is the number 95 of samples and n_features is the number of features. 96 97 y: array-like, shape = [n_samples] 98 Target values. 99 100 **kwargs: additional parameters to be passed to 101 self.cook_training_set or self.obj.fit 102 103 Returns: 104 105 self: object 106 107 """ 108 109 assert mx.is_factor(y), "y must contain only integers" 110 111 self.classes_ = np.unique(y) # for compatibility with sklearn 112 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 113 114 self.scaled_X_ = self.X_scaler_.fit_transform(X) 115 116 # multitask response 117 Y = mo.one_hot_encode2(y, self.n_classes_) 118 119 try: 120 for i in range(self.n_classes_): 121 self.fit_objs_[i] = deepcopy( 122 self.obj.fit( 123 self.scaled_X_, 124 Y[:, i], 125 sample_weight=sample_weight, 126 **kwargs 127 ) 128 ) 129 except Exception as e: 130 for i in range(self.n_classes_): 131 self.fit_objs_[i] = deepcopy( 132 self.obj.fit(self.scaled_X_, Y[:, i], **kwargs) 133 ) 134 return self 135 136 def predict(self, X, **kwargs): 137 """Predict test data X. 138 139 Args: 140 141 X: {array-like}, shape = [n_samples, n_features] 142 Training vectors, where n_samples is the number 143 of samples and n_features is the number of features. 144 145 **kwargs: additional parameters 146 147 Returns: 148 149 model predictions: {array-like} 150 151 """ 152 return np.argmax(self.predict_proba(X, **kwargs), axis=1) 153 154 def predict_proba(self, X, **kwargs): 155 """Predict probabilities for test data X. 156 157 Args: 158 159 X: {array-like}, shape = [n_samples, n_features] 160 Training vectors, where n_samples is the number 161 of samples and n_features is the number of features. 162 163 **kwargs: additional parameters 164 165 Returns: 166 167 probability estimates for test data: {array-like} 168 169 """ 170 171 shape_X = X.shape 172 173 probs = np.zeros((shape_X[0], self.n_classes_)) 174 175 if len(shape_X) == 1: # one example 176 177 n_features = shape_X[0] 178 179 new_X = mo.rbind( 180 X.reshape(1, n_features), 181 np.ones(n_features).reshape(1, n_features), 182 ) 183 184 Z = self.X_scaler_.transform(new_X, **kwargs) 185 186 # Fallback to standard model 187 for i in range(self.n_classes_): 188 probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs)[0] 189 190 else: # multiple rows 191 192 Z = self.X_scaler_.transform(X, **kwargs) 193 194 # Fallback to standard model 195 for i in range(self.n_classes_): 196 probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs) 197 198 expit_raw_probs = expit(probs) 199 200 # Add small epsilon to avoid division by zero 201 row_sums = expit_raw_probs.sum(axis=1)[:, None] 202 row_sums[row_sums < 1e-10] = 1e-10 203 204 return expit_raw_probs / row_sums 205 206 def decision_function(self, X, **kwargs): 207 """Compute the decision function of X. 208 209 Parameters: 210 X: {array-like}, shape = [n_samples, n_features] 211 Samples to compute decision function for. 212 213 **kwargs: additional parameters to be passed to 214 self.cook_test_set 215 216 Returns: 217 array-like of shape (n_samples,) or (n_samples, n_classes) 218 Decision function of the input samples. The order of outputs is the same 219 as that of the classes passed to fit. 220 """ 221 if not hasattr(self.obj, "decision_function"): 222 # If base classifier doesn't have decision_function, use predict_proba 223 proba = self.predict_proba(X, **kwargs) 224 if proba.shape[1] == 2: 225 return proba[:, 1] # For binary classification 226 return proba # For multiclass 227 228 if len(X.shape) == 1: 229 n_features = X.shape[0] 230 new_X = mo.rbind( 231 X.reshape(1, n_features), 232 np.ones(n_features).reshape(1, n_features), 233 ) 234 235 return ( 236 self.obj.decision_function( 237 self.cook_test_set(new_X, **kwargs), **kwargs 238 ) 239 )[0] 240 241 return self.obj.decision_function( 242 self.cook_test_set(X, **kwargs), **kwargs 243 ) 244 245 @property 246 def _estimator_type(self): 247 return "classifier"
Multitask Classification model based on regression models, with shared covariates
Parameters:
obj: object
any object (must be a regression model) containing a method fit (obj.fit())
and a method predict (obj.predict())
seed: int
reproducibility seed
Attributes:
fit_objs_: dict
objects adjusted to each individual time series
n_classes_: int
number of classes for the classifier
Examples:
import nnetsauce as ns
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics
from time import time
breast_cancer = load_breast_cancer()
Z = breast_cancer.data
t = breast_cancer.target
X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2,
random_state=123+2*10)
# Linear Regression is used
regr = LinearRegression()
fit_obj = ns.SimpleMultitaskClassifier(regr)
start = time()
fit_obj.fit(X_train, y_train)
print(f"Elapsed {time() - start}")
print(fit_obj.score(X_test, y_test))
print(fit_obj.score(X_test, y_test, scoring="roc_auc"))
start = time()
preds = fit_obj.predict(X_test)
print(f"Elapsed {time() - start}")
print(metrics.classification_report(preds, y_test))
88 def fit(self, X, y, sample_weight=None, **kwargs): 89 """Fit SimpleMultitaskClassifier to training data (X, y). 90 91 Args: 92 93 X: {array-like}, shape = [n_samples, n_features] 94 Training vectors, where n_samples is the number 95 of samples and n_features is the number of features. 96 97 y: array-like, shape = [n_samples] 98 Target values. 99 100 **kwargs: additional parameters to be passed to 101 self.cook_training_set or self.obj.fit 102 103 Returns: 104 105 self: object 106 107 """ 108 109 assert mx.is_factor(y), "y must contain only integers" 110 111 self.classes_ = np.unique(y) # for compatibility with sklearn 112 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 113 114 self.scaled_X_ = self.X_scaler_.fit_transform(X) 115 116 # multitask response 117 Y = mo.one_hot_encode2(y, self.n_classes_) 118 119 try: 120 for i in range(self.n_classes_): 121 self.fit_objs_[i] = deepcopy( 122 self.obj.fit( 123 self.scaled_X_, 124 Y[:, i], 125 sample_weight=sample_weight, 126 **kwargs 127 ) 128 ) 129 except Exception as e: 130 for i in range(self.n_classes_): 131 self.fit_objs_[i] = deepcopy( 132 self.obj.fit(self.scaled_X_, Y[:, i], **kwargs) 133 ) 134 return self
Fit SimpleMultitaskClassifier to training data (X, y).
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
136 def predict(self, X, **kwargs): 137 """Predict test data X. 138 139 Args: 140 141 X: {array-like}, shape = [n_samples, n_features] 142 Training vectors, where n_samples is the number 143 of samples and n_features is the number of features. 144 145 **kwargs: additional parameters 146 147 Returns: 148 149 model predictions: {array-like} 150 151 """ 152 return np.argmax(self.predict_proba(X, **kwargs), axis=1)
Predict test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters
Returns:
model predictions: {array-like}
154 def predict_proba(self, X, **kwargs): 155 """Predict probabilities for test data X. 156 157 Args: 158 159 X: {array-like}, shape = [n_samples, n_features] 160 Training vectors, where n_samples is the number 161 of samples and n_features is the number of features. 162 163 **kwargs: additional parameters 164 165 Returns: 166 167 probability estimates for test data: {array-like} 168 169 """ 170 171 shape_X = X.shape 172 173 probs = np.zeros((shape_X[0], self.n_classes_)) 174 175 if len(shape_X) == 1: # one example 176 177 n_features = shape_X[0] 178 179 new_X = mo.rbind( 180 X.reshape(1, n_features), 181 np.ones(n_features).reshape(1, n_features), 182 ) 183 184 Z = self.X_scaler_.transform(new_X, **kwargs) 185 186 # Fallback to standard model 187 for i in range(self.n_classes_): 188 probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs)[0] 189 190 else: # multiple rows 191 192 Z = self.X_scaler_.transform(X, **kwargs) 193 194 # Fallback to standard model 195 for i in range(self.n_classes_): 196 probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs) 197 198 expit_raw_probs = expit(probs) 199 200 # Add small epsilon to avoid division by zero 201 row_sums = expit_raw_probs.sum(axis=1)[:, None] 202 row_sums[row_sums < 1e-10] = 1e-10 203 204 return expit_raw_probs / row_sums
Predict probabilities for test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters
Returns:
probability estimates for test data: {array-like}
9class Optimizer: 10 """Optimizer class 11 12 Attributes: 13 14 type_optim: str 15 type of optimizer, (currently) either 'sgd' (stochastic minibatch gradient descent) 16 or 'scd' (stochastic minibatch coordinate descent) 17 18 num_iters: int 19 number of iterations of the optimizer 20 21 learning_rate: float 22 step size 23 24 batch_prop: float 25 proportion of the initial data used at each optimization step 26 27 learning_method: str 28 "poly" - learning rate decreasing as a polynomial function 29 of # of iterations (default) 30 "exp" - learning rate decreasing as an exponential function 31 of # of iterations 32 "momentum" - gradient descent using momentum 33 34 randomization: str 35 type of randomization applied at each step 36 "strat" - stratified subsampling (default) 37 "shuffle" - random subsampling 38 39 mass: float 40 mass on velocity, for `method` == "momentum" 41 42 decay: float 43 coefficient of decrease of the learning rate for 44 `method` == "poly" and `method` == "exp" 45 46 tolerance: float 47 early stopping parameter (convergence of loss function) 48 49 verbose: int 50 controls verbosity of gradient descent 51 0 - nothing is printed 52 1 - a progress bar is printed 53 2 - successive loss function values are printed 54 55 """ 56 57 # construct the object ----- 58 59 def __init__( 60 self, 61 type_optim="sgd", 62 num_iters=100, 63 learning_rate=0.01, 64 batch_prop=1.0, 65 learning_method="momentum", 66 randomization="strat", 67 mass=0.9, 68 decay=0.1, 69 tolerance=1e-3, 70 verbose=1, 71 ): 72 self.type_optim = type_optim 73 self.num_iters = num_iters 74 self.learning_rate = learning_rate 75 self.batch_prop = batch_prop 76 self.learning_method = learning_method 77 self.randomization = randomization 78 self.mass = mass 79 self.decay = decay 80 self.tolerance = tolerance 81 self.verbose = verbose 82 self.opt = None 83 84 def fit(self, loss_func, response, x0, q=None, **kwargs): 85 """Fit GLM model to training data (X, y). 86 87 Args: 88 89 loss_func: loss function 90 91 response: array-like, shape = [n_samples] 92 target variable (used for subsampling) 93 94 x0: array-like, shape = [n_features] 95 initial value provided to the optimizer 96 97 **kwargs: additional parameters to be passed to 98 loss function 99 100 Returns: 101 102 self: object 103 104 """ 105 106 if self.type_optim == "scd": 107 self.results = scd( 108 loss_func, 109 response=response, 110 x=x0, 111 num_iters=self.num_iters, 112 batch_prop=self.batch_prop, 113 learning_rate=self.learning_rate, 114 learning_method=self.learning_method, 115 mass=self.mass, 116 decay=self.decay, 117 randomization=self.randomization, 118 tolerance=self.tolerance, 119 verbose=self.verbose, 120 **kwargs 121 ) 122 123 if self.type_optim == "sgd": 124 self.results = sgd( 125 loss_func, 126 response=response, 127 x=x0, 128 num_iters=self.num_iters, 129 batch_prop=self.batch_prop, 130 learning_rate=self.learning_rate, 131 learning_method=self.learning_method, 132 mass=self.mass, 133 decay=self.decay, 134 randomization=self.randomization, 135 tolerance=self.tolerance, 136 verbose=self.verbose, 137 **kwargs 138 ) 139 140 return self 141 142 def one_hot_encode(self, y, n_classes): 143 return one_hot_encode(y, n_classes)
Optimizer class
Attributes:
type_optim: str
type of optimizer, (currently) either 'sgd' (stochastic minibatch gradient descent)
or 'scd' (stochastic minibatch coordinate descent)
num_iters: int
number of iterations of the optimizer
learning_rate: float
step size
batch_prop: float
proportion of the initial data used at each optimization step
learning_method: str
"poly" - learning rate decreasing as a polynomial function
of # of iterations (default)
"exp" - learning rate decreasing as an exponential function
of # of iterations
"momentum" - gradient descent using momentum
randomization: str
type of randomization applied at each step
"strat" - stratified subsampling (default)
"shuffle" - random subsampling
mass: float
mass on velocity, for `method` == "momentum"
decay: float
coefficient of decrease of the learning rate for
`method` == "poly" and `method` == "exp"
tolerance: float
early stopping parameter (convergence of loss function)
verbose: int
controls verbosity of gradient descent
0 - nothing is printed
1 - a progress bar is printed
2 - successive loss function values are printed
84 def fit(self, loss_func, response, x0, q=None, **kwargs): 85 """Fit GLM model to training data (X, y). 86 87 Args: 88 89 loss_func: loss function 90 91 response: array-like, shape = [n_samples] 92 target variable (used for subsampling) 93 94 x0: array-like, shape = [n_features] 95 initial value provided to the optimizer 96 97 **kwargs: additional parameters to be passed to 98 loss function 99 100 Returns: 101 102 self: object 103 104 """ 105 106 if self.type_optim == "scd": 107 self.results = scd( 108 loss_func, 109 response=response, 110 x=x0, 111 num_iters=self.num_iters, 112 batch_prop=self.batch_prop, 113 learning_rate=self.learning_rate, 114 learning_method=self.learning_method, 115 mass=self.mass, 116 decay=self.decay, 117 randomization=self.randomization, 118 tolerance=self.tolerance, 119 verbose=self.verbose, 120 **kwargs 121 ) 122 123 if self.type_optim == "sgd": 124 self.results = sgd( 125 loss_func, 126 response=response, 127 x=x0, 128 num_iters=self.num_iters, 129 batch_prop=self.batch_prop, 130 learning_rate=self.learning_rate, 131 learning_method=self.learning_method, 132 mass=self.mass, 133 decay=self.decay, 134 randomization=self.randomization, 135 tolerance=self.tolerance, 136 verbose=self.verbose, 137 **kwargs 138 ) 139 140 return self
Fit GLM model to training data (X, y).
Args:
loss_func: loss function
response: array-like, shape = [n_samples]
target variable (used for subsampling)
x0: array-like, shape = [n_features]
initial value provided to the optimizer
**kwargs: additional parameters to be passed to
loss function
Returns:
self: object
37class QuantileRegressor(BaseEstimator, RegressorMixin): 38 """ 39 Quantile Regressor. 40 41 Parameters: 42 43 obj: base model (regression model) 44 The base regressor from which to build a 45 quantile regressor. 46 47 level: int, default=95 48 The level of the quantiles to compute. 49 50 scoring: str, default="predictions" 51 The scoring to use for the optimization and constructing 52 prediction intervals (predictions, residuals, conformal, 53 studentized, conformal-studentized). 54 55 Attributes: 56 57 obj_ : base model (regression model) 58 The base regressor from which to build a 59 quantile regressor. 60 61 offset_multipliers_ : list 62 The multipliers for the offset. 63 64 scoring_residuals_ : list 65 The residuals for the scoring. 66 67 student_multiplier_ : float 68 The multiplier for the student. 69 70 """ 71 72 def __init__(self, obj, level=95, scoring="predictions"): 73 assert scoring in ( 74 "predictions", 75 "residuals", 76 "conformal", 77 "studentized", 78 "conformal-studentized", 79 ), "scoring must be 'predictions' or 'residuals'" 80 self.obj = obj 81 low_risk_level = (1 - level / 100) / 2 82 self.quantiles = [low_risk_level, 0.5, 1 - low_risk_level] 83 self.scoring = scoring 84 self.offset_multipliers_ = None 85 self.obj_ = None 86 self.scoring_residuals_ = None 87 self.student_multiplier_ = None 88 89 def _compute_quantile_loss(self, residuals, quantile): 90 """ 91 Compute the quantile loss for a given set of residuals and quantile. 92 """ 93 return np.mean( 94 residuals 95 * (quantile * (residuals >= 0) + (quantile - 1) * (residuals < 0)) 96 ) 97 98 def _optimize_multiplier( 99 self, 100 y, 101 base_predictions, 102 prev_predictions, 103 scoring_residuals=None, 104 quantile=0.5, 105 ): 106 """ 107 Optimize the multiplier for a given quantile. 108 """ 109 if not 0 < quantile < 1: 110 raise ValueError("Quantile should be between 0 and 1.") 111 112 n = len(y) 113 114 def objective(log_multiplier): 115 """ 116 Objective function for optimization. 117 """ 118 # Convert to positive multiplier using exp 119 multiplier = np.exp(log_multiplier[0]) 120 if self.scoring == "predictions": 121 assert ( 122 base_predictions is not None 123 ), "base_predictions must be not None" 124 # Calculate predictions 125 if prev_predictions is None: 126 # For first quantile, subtract from conditional expectation 127 predictions = base_predictions - multiplier * np.abs( 128 base_predictions 129 ) 130 else: 131 # For other quantiles, add to previous quantile 132 offset = multiplier * np.abs(base_predictions) 133 predictions = prev_predictions + offset 134 elif self.scoring in ("residuals", "conformal"): 135 assert ( 136 scoring_residuals is not None 137 ), "scoring_residuals must be not None" 138 # print("scoring_residuals", scoring_residuals) 139 # Calculate predictions 140 if prev_predictions is None: 141 # For first quantile, subtract from conditional expectation 142 predictions = base_predictions - multiplier * np.std( 143 scoring_residuals 144 ) / np.sqrt(len(scoring_residuals)) 145 # print("predictions", predictions) 146 else: 147 # For other quantiles, add to previous quantile 148 offset = ( 149 multiplier 150 * np.std(scoring_residuals) 151 / np.sqrt(len(scoring_residuals)) 152 ) 153 predictions = prev_predictions + offset 154 elif self.scoring in ("studentized", "conformal-studentized"): 155 assert ( 156 scoring_residuals is not None 157 ), "scoring_residuals must be not None" 158 # Calculate predictions 159 if prev_predictions is None: 160 # For first quantile, subtract from conditional expectation 161 predictions = ( 162 base_predictions - multiplier * self.student_multiplier_ 163 ) 164 # print("predictions", predictions) 165 else: 166 # For other quantiles, add to previous quantile 167 offset = multiplier * self.student_multiplier_ 168 predictions = prev_predictions + offset 169 else: 170 raise ValueError("Invalid argument 'scoring'") 171 172 return self._compute_quantile_loss(y - predictions, quantile) 173 174 # Optimize in log space for numerical stability 175 # bounds = [(-10, 10)] # log space bounds 176 bounds = [(-100, 100)] # log space bounds 177 result = differential_evolution( 178 objective, 179 bounds, 180 # popsize=15, 181 # maxiter=100, 182 # tol=1e-4, 183 popsize=25, 184 maxiter=200, 185 tol=1e-6, 186 disp=False, 187 ) 188 189 return np.exp(result.x[0]) 190 191 def fit(self, X, y): 192 """Fit the model to the data. 193 194 Parameters: 195 196 X: {array-like}, shape = [n_samples, n_features] 197 Training vectors, where n_samples is the number of samples and 198 n_features is the number of features. 199 y: array-like, shape = [n_samples] 200 Target values. 201 """ 202 self.obj_ = clone(self.obj) 203 204 if self.scoring in ("predictions", "residuals"): 205 206 self.obj_.fit(X, y) 207 base_predictions = self.obj_.predict(X) 208 scoring_residuals = y - base_predictions 209 self.scoring_residuals_ = scoring_residuals 210 211 elif self.scoring == "conformal": 212 213 X_train, X_calib, y_train, y_calib = train_test_split( 214 X, y, test_size=0.5, random_state=42 215 ) 216 self.obj_.fit(X_train, y_train) 217 scoring_residuals = y_calib - self.obj_.predict( 218 X_calib 219 ) # These are calibration predictions 220 self.scoring_residuals_ = scoring_residuals 221 # Update base_predictions to use training predictions for optimization 222 self.obj_.fit(X_calib, y_calib) 223 base_predictions = self.obj_.predict(X_calib) 224 225 elif self.scoring in ("studentized", "conformal-studentized"): 226 227 # Calculate student multiplier 228 if self.scoring == "conformal-studentized": 229 X_train, X_calib, y_train, y_calib = train_test_split( 230 X, y, test_size=0.5, random_state=42 231 ) 232 self.obj_.fit(X_train, y_train) 233 scoring_residuals = y_calib - self.obj_.predict(X_calib) 234 # Calculate studentized multiplier using calibration data 235 self.student_multiplier_ = np.std(y_calib, ddof=1) / np.sqrt( 236 len(y_calib) - 1 237 ) 238 self.obj_.fit(X_calib, y_calib) 239 base_predictions = self.obj_.predict(X_calib) 240 else: # regular studentized 241 self.obj_.fit(X, y) 242 base_predictions = self.obj_.predict(X) 243 scoring_residuals = y - base_predictions 244 self.student_multiplier_ = np.std(y, ddof=1) / np.sqrt( 245 len(y) - 1 246 ) 247 248 # Initialize storage for multipliers 249 self.offset_multipliers_ = [] 250 # Keep track of current predictions for each quantile 251 current_predictions = None 252 253 # Fit each quantile sequentially 254 for i, quantile in enumerate(self.quantiles): 255 256 if self.scoring == "predictions": 257 258 multiplier = self._optimize_multiplier( 259 y=y, 260 base_predictions=base_predictions, 261 prev_predictions=current_predictions, 262 quantile=quantile, 263 ) 264 265 self.offset_multipliers_.append(multiplier) 266 267 # Update current predictions 268 if current_predictions is None: 269 # First quantile (lowest) 270 current_predictions = ( 271 base_predictions - multiplier * np.abs(base_predictions) 272 ) 273 else: 274 # Subsequent quantiles 275 offset = multiplier * np.abs(base_predictions) 276 current_predictions = current_predictions + offset 277 278 elif self.scoring == "residuals": 279 280 multiplier = self._optimize_multiplier( 281 y=y, 282 base_predictions=base_predictions, 283 scoring_residuals=scoring_residuals, 284 prev_predictions=current_predictions, 285 quantile=quantile, 286 ) 287 288 self.offset_multipliers_.append(multiplier) 289 290 # Update current predictions 291 if current_predictions is None: 292 # First quantile (lowest) 293 current_predictions = ( 294 base_predictions 295 - multiplier 296 * np.std(scoring_residuals) 297 / np.sqrt(len(scoring_residuals)) 298 ) 299 else: 300 # Subsequent quantiles 301 offset = ( 302 multiplier 303 * np.std(scoring_residuals) 304 / np.sqrt(len(scoring_residuals)) 305 ) 306 current_predictions = current_predictions + offset 307 308 elif self.scoring == "conformal": 309 310 multiplier = self._optimize_multiplier( 311 y=y_calib, 312 base_predictions=base_predictions, 313 scoring_residuals=scoring_residuals, 314 prev_predictions=current_predictions, 315 quantile=quantile, 316 ) 317 318 self.offset_multipliers_.append(multiplier) 319 320 # Update current predictions 321 if current_predictions is None: 322 # First quantile (lowest) 323 current_predictions = ( 324 base_predictions 325 - multiplier 326 * np.std(scoring_residuals) 327 / np.sqrt(len(scoring_residuals)) 328 ) 329 else: 330 # Subsequent quantiles 331 offset = ( 332 multiplier 333 * np.std(scoring_residuals) 334 / np.sqrt(len(scoring_residuals)) 335 ) 336 current_predictions = current_predictions + offset 337 338 elif self.scoring in ("studentized", "conformal-studentized"): 339 340 multiplier = self._optimize_multiplier( 341 y=y_calib if self.scoring == "conformal-studentized" else y, 342 base_predictions=base_predictions, 343 scoring_residuals=scoring_residuals, 344 prev_predictions=current_predictions, 345 quantile=quantile, 346 ) 347 348 self.offset_multipliers_.append(multiplier) 349 350 # Update current predictions 351 if current_predictions is None: 352 current_predictions = ( 353 base_predictions - multiplier * self.student_multiplier_ 354 ) 355 else: 356 offset = multiplier * self.student_multiplier_ 357 current_predictions = current_predictions + offset 358 359 return self 360 361 def predict(self, X, return_pi=False): 362 """Predict the target variable. 363 364 Parameters: 365 366 X: {array-like}, shape = [n_samples, n_features] 367 Training vectors, where n_samples is the number of samples and 368 n_features is the number of features. 369 370 return_pi: bool, default=True 371 Whether to return the prediction intervals. 372 """ 373 if self.obj_ is None or self.offset_multipliers_ is None: 374 raise ValueError("Model not fitted yet.") 375 376 base_predictions = self.obj_.predict(X) 377 all_predictions = [] 378 379 if self.scoring == "predictions": 380 381 # Generate first quantile 382 current_predictions = base_predictions - self.offset_multipliers_[ 383 0 384 ] * np.abs(base_predictions) 385 all_predictions.append(current_predictions) 386 387 # Generate remaining quantiles 388 for multiplier in self.offset_multipliers_[1:]: 389 offset = multiplier * np.abs(base_predictions) 390 current_predictions = current_predictions + offset 391 all_predictions.append(current_predictions) 392 393 elif self.scoring in ("residuals", "conformal"): 394 395 # Generate first quantile 396 current_predictions = base_predictions - self.offset_multipliers_[ 397 0 398 ] * np.std(self.scoring_residuals_) / np.sqrt( 399 len(self.scoring_residuals_) 400 ) 401 all_predictions.append(current_predictions) 402 403 # Generate remaining quantiles 404 for multiplier in self.offset_multipliers_[1:]: 405 offset = ( 406 multiplier 407 * np.std(self.scoring_residuals_) 408 / np.sqrt(len(self.scoring_residuals_)) 409 ) 410 current_predictions = current_predictions + offset 411 all_predictions.append(current_predictions) 412 413 elif self.scoring in ("studentized", "conformal-studentized"): 414 # Generate first quantile 415 current_predictions = ( 416 base_predictions 417 - self.offset_multipliers_[0] * self.student_multiplier_ 418 ) 419 all_predictions.append(current_predictions) 420 421 # Generate remaining quantiles 422 for multiplier in self.offset_multipliers_[1:]: 423 offset = multiplier * self.student_multiplier_ 424 current_predictions = current_predictions + offset 425 all_predictions.append(current_predictions) 426 427 if return_pi == False: 428 return np.asarray(all_predictions[1]) 429 430 DescribeResult = namedtuple( 431 "DecribeResult", ["mean", "lower", "upper", "median"] 432 ) 433 DescribeResult.mean = base_predictions 434 DescribeResult.lower = np.asarray(all_predictions[0]) 435 DescribeResult.median = np.asarray(all_predictions[1]) 436 DescribeResult.upper = np.asarray(all_predictions[2]) 437 438 return DescribeResult
Quantile Regressor.
Parameters:
obj: base model (regression model)
The base regressor from which to build a
quantile regressor.
level: int, default=95
The level of the quantiles to compute.
scoring: str, default="predictions"
The scoring to use for the optimization and constructing
prediction intervals (predictions, residuals, conformal,
studentized, conformal-studentized).
Attributes:
obj_ : base model (regression model)
The base regressor from which to build a
quantile regressor.
offset_multipliers_ : list
The multipliers for the offset.
scoring_residuals_ : list
The residuals for the scoring.
student_multiplier_ : float
The multiplier for the student.
191 def fit(self, X, y): 192 """Fit the model to the data. 193 194 Parameters: 195 196 X: {array-like}, shape = [n_samples, n_features] 197 Training vectors, where n_samples is the number of samples and 198 n_features is the number of features. 199 y: array-like, shape = [n_samples] 200 Target values. 201 """ 202 self.obj_ = clone(self.obj) 203 204 if self.scoring in ("predictions", "residuals"): 205 206 self.obj_.fit(X, y) 207 base_predictions = self.obj_.predict(X) 208 scoring_residuals = y - base_predictions 209 self.scoring_residuals_ = scoring_residuals 210 211 elif self.scoring == "conformal": 212 213 X_train, X_calib, y_train, y_calib = train_test_split( 214 X, y, test_size=0.5, random_state=42 215 ) 216 self.obj_.fit(X_train, y_train) 217 scoring_residuals = y_calib - self.obj_.predict( 218 X_calib 219 ) # These are calibration predictions 220 self.scoring_residuals_ = scoring_residuals 221 # Update base_predictions to use training predictions for optimization 222 self.obj_.fit(X_calib, y_calib) 223 base_predictions = self.obj_.predict(X_calib) 224 225 elif self.scoring in ("studentized", "conformal-studentized"): 226 227 # Calculate student multiplier 228 if self.scoring == "conformal-studentized": 229 X_train, X_calib, y_train, y_calib = train_test_split( 230 X, y, test_size=0.5, random_state=42 231 ) 232 self.obj_.fit(X_train, y_train) 233 scoring_residuals = y_calib - self.obj_.predict(X_calib) 234 # Calculate studentized multiplier using calibration data 235 self.student_multiplier_ = np.std(y_calib, ddof=1) / np.sqrt( 236 len(y_calib) - 1 237 ) 238 self.obj_.fit(X_calib, y_calib) 239 base_predictions = self.obj_.predict(X_calib) 240 else: # regular studentized 241 self.obj_.fit(X, y) 242 base_predictions = self.obj_.predict(X) 243 scoring_residuals = y - base_predictions 244 self.student_multiplier_ = np.std(y, ddof=1) / np.sqrt( 245 len(y) - 1 246 ) 247 248 # Initialize storage for multipliers 249 self.offset_multipliers_ = [] 250 # Keep track of current predictions for each quantile 251 current_predictions = None 252 253 # Fit each quantile sequentially 254 for i, quantile in enumerate(self.quantiles): 255 256 if self.scoring == "predictions": 257 258 multiplier = self._optimize_multiplier( 259 y=y, 260 base_predictions=base_predictions, 261 prev_predictions=current_predictions, 262 quantile=quantile, 263 ) 264 265 self.offset_multipliers_.append(multiplier) 266 267 # Update current predictions 268 if current_predictions is None: 269 # First quantile (lowest) 270 current_predictions = ( 271 base_predictions - multiplier * np.abs(base_predictions) 272 ) 273 else: 274 # Subsequent quantiles 275 offset = multiplier * np.abs(base_predictions) 276 current_predictions = current_predictions + offset 277 278 elif self.scoring == "residuals": 279 280 multiplier = self._optimize_multiplier( 281 y=y, 282 base_predictions=base_predictions, 283 scoring_residuals=scoring_residuals, 284 prev_predictions=current_predictions, 285 quantile=quantile, 286 ) 287 288 self.offset_multipliers_.append(multiplier) 289 290 # Update current predictions 291 if current_predictions is None: 292 # First quantile (lowest) 293 current_predictions = ( 294 base_predictions 295 - multiplier 296 * np.std(scoring_residuals) 297 / np.sqrt(len(scoring_residuals)) 298 ) 299 else: 300 # Subsequent quantiles 301 offset = ( 302 multiplier 303 * np.std(scoring_residuals) 304 / np.sqrt(len(scoring_residuals)) 305 ) 306 current_predictions = current_predictions + offset 307 308 elif self.scoring == "conformal": 309 310 multiplier = self._optimize_multiplier( 311 y=y_calib, 312 base_predictions=base_predictions, 313 scoring_residuals=scoring_residuals, 314 prev_predictions=current_predictions, 315 quantile=quantile, 316 ) 317 318 self.offset_multipliers_.append(multiplier) 319 320 # Update current predictions 321 if current_predictions is None: 322 # First quantile (lowest) 323 current_predictions = ( 324 base_predictions 325 - multiplier 326 * np.std(scoring_residuals) 327 / np.sqrt(len(scoring_residuals)) 328 ) 329 else: 330 # Subsequent quantiles 331 offset = ( 332 multiplier 333 * np.std(scoring_residuals) 334 / np.sqrt(len(scoring_residuals)) 335 ) 336 current_predictions = current_predictions + offset 337 338 elif self.scoring in ("studentized", "conformal-studentized"): 339 340 multiplier = self._optimize_multiplier( 341 y=y_calib if self.scoring == "conformal-studentized" else y, 342 base_predictions=base_predictions, 343 scoring_residuals=scoring_residuals, 344 prev_predictions=current_predictions, 345 quantile=quantile, 346 ) 347 348 self.offset_multipliers_.append(multiplier) 349 350 # Update current predictions 351 if current_predictions is None: 352 current_predictions = ( 353 base_predictions - multiplier * self.student_multiplier_ 354 ) 355 else: 356 offset = multiplier * self.student_multiplier_ 357 current_predictions = current_predictions + offset 358 359 return self
Fit the model to the data.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number of samples and
n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
361 def predict(self, X, return_pi=False): 362 """Predict the target variable. 363 364 Parameters: 365 366 X: {array-like}, shape = [n_samples, n_features] 367 Training vectors, where n_samples is the number of samples and 368 n_features is the number of features. 369 370 return_pi: bool, default=True 371 Whether to return the prediction intervals. 372 """ 373 if self.obj_ is None or self.offset_multipliers_ is None: 374 raise ValueError("Model not fitted yet.") 375 376 base_predictions = self.obj_.predict(X) 377 all_predictions = [] 378 379 if self.scoring == "predictions": 380 381 # Generate first quantile 382 current_predictions = base_predictions - self.offset_multipliers_[ 383 0 384 ] * np.abs(base_predictions) 385 all_predictions.append(current_predictions) 386 387 # Generate remaining quantiles 388 for multiplier in self.offset_multipliers_[1:]: 389 offset = multiplier * np.abs(base_predictions) 390 current_predictions = current_predictions + offset 391 all_predictions.append(current_predictions) 392 393 elif self.scoring in ("residuals", "conformal"): 394 395 # Generate first quantile 396 current_predictions = base_predictions - self.offset_multipliers_[ 397 0 398 ] * np.std(self.scoring_residuals_) / np.sqrt( 399 len(self.scoring_residuals_) 400 ) 401 all_predictions.append(current_predictions) 402 403 # Generate remaining quantiles 404 for multiplier in self.offset_multipliers_[1:]: 405 offset = ( 406 multiplier 407 * np.std(self.scoring_residuals_) 408 / np.sqrt(len(self.scoring_residuals_)) 409 ) 410 current_predictions = current_predictions + offset 411 all_predictions.append(current_predictions) 412 413 elif self.scoring in ("studentized", "conformal-studentized"): 414 # Generate first quantile 415 current_predictions = ( 416 base_predictions 417 - self.offset_multipliers_[0] * self.student_multiplier_ 418 ) 419 all_predictions.append(current_predictions) 420 421 # Generate remaining quantiles 422 for multiplier in self.offset_multipliers_[1:]: 423 offset = multiplier * self.student_multiplier_ 424 current_predictions = current_predictions + offset 425 all_predictions.append(current_predictions) 426 427 if return_pi == False: 428 return np.asarray(all_predictions[1]) 429 430 DescribeResult = namedtuple( 431 "DecribeResult", ["mean", "lower", "upper", "median"] 432 ) 433 DescribeResult.mean = base_predictions 434 DescribeResult.lower = np.asarray(all_predictions[0]) 435 DescribeResult.median = np.asarray(all_predictions[1]) 436 DescribeResult.upper = np.asarray(all_predictions[2]) 437 438 return DescribeResult
Predict the target variable.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number of samples and
n_features is the number of features.
return_pi: bool, default=True
Whether to return the prediction intervals.
43class QuantileClassifier(BaseEstimator, ClassifierMixin): 44 """ 45 Quantile Classifier. 46 47 Parameters: 48 49 obj: base model (classification model) 50 The base classifier from which to build a 51 quantile classifier. 52 53 level: int, default=95 54 The level of the quantiles to compute. 55 56 scoring: str, default="predictions" 57 The scoring to use for the optimization and constructing 58 prediction intervals (predictions, residuals, conformal, 59 studentized, conformal-studentized). 60 61 Attributes: 62 63 obj_ : base model (classification model) 64 The base classifier from which to build a 65 quantile classifier. 66 67 offset_multipliers_ : list 68 The multipliers for the offset. 69 70 scoring_residuals_ : list 71 The residuals for the scoring. 72 73 student_multiplier_ : float 74 The multiplier for the student. 75 76 77 """ 78 79 def __init__(self, obj, level=95, scoring="predictions"): 80 assert scoring in ( 81 "predictions", 82 "residuals", 83 "conformal", 84 "studentized", 85 "conformal-studentized", 86 ), "scoring must be 'predictions' or 'residuals'" 87 self.obj = obj 88 quantileregressor = QuantileRegressor(self.obj) 89 quantileregressor.predict = partial( 90 quantileregressor.predict, return_pi=False 91 ) 92 self.obj_ = SimpleMultitaskClassifier(quantileregressor) 93 94 def fit(self, X, y, **kwargs): 95 self.obj_.fit(X, y, **kwargs) 96 97 def predict(self, X, **kwargs): 98 return self.obj_.predict(X, **kwargs) 99 100 def predict_proba(self, X, **kwargs): 101 return self.obj_.predict_proba(X, **kwargs)
Quantile Classifier.
Parameters:
obj: base model (classification model)
The base classifier from which to build a
quantile classifier.
level: int, default=95
The level of the quantiles to compute.
scoring: str, default="predictions"
The scoring to use for the optimization and constructing
prediction intervals (predictions, residuals, conformal,
studentized, conformal-studentized).
Attributes:
obj_ : base model (classification model)
The base classifier from which to build a
quantile classifier.
offset_multipliers_ : list
The multipliers for the offset.
scoring_residuals_ : list
The residuals for the scoring.
student_multiplier_ : float
The multiplier for the student.
18class RandomBagRegressor(RandomBag, RegressorMixin): 19 """Randomized 'Bagging' Regression model 20 21 Parameters: 22 23 obj: object 24 any object containing a method fit (obj.fit()) and a method predict 25 (obj.predict()) 26 27 n_estimators: int 28 number of boosting iterations 29 30 n_hidden_features: int 31 number of nodes in the hidden layer 32 33 activation_name: str 34 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 35 36 a: float 37 hyperparameter for 'prelu' or 'elu' activation function 38 39 nodes_sim: str 40 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 41 'uniform' 42 43 bias: boolean 44 indicates if the hidden layer contains a bias term (True) or not 45 (False) 46 47 dropout: float 48 regularization parameter; (random) percentage of nodes dropped out 49 of the training 50 51 direct_link: boolean 52 indicates if the original predictors are included (True) in model''s 53 fitting or not (False) 54 55 n_clusters: int 56 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: 57 no clustering) 58 59 cluster_encode: bool 60 defines how the variable containing clusters is treated (default is one-hot) 61 if `False`, then labels are used, without one-hot encoding 62 63 type_clust: str 64 type of clustering method: currently k-means ('kmeans') or Gaussian 65 Mixture Model ('gmm') 66 67 type_scaling: a tuple of 3 strings 68 scaling methods for inputs, hidden layer, and clustering respectively 69 (and when relevant). 70 Currently available: standardization ('std') or MinMax scaling ('minmax') 71 72 col_sample: float 73 percentage of covariates randomly chosen for training 74 75 row_sample: float 76 percentage of rows chosen for training, by stratified bootstrapping 77 78 seed: int 79 reproducibility seed for nodes_sim=='uniform' 80 81 backend: str 82 "cpu" or "gpu" or "tpu" 83 84 Attributes: 85 86 voter_: dict 87 dictionary containing all the fitted base-learners 88 89 90 Examples: 91 92 ```python 93 import numpy as np 94 import nnetsauce as ns 95 from sklearn.datasets import fetch_california_housing 96 from sklearn.tree import DecisionTreeRegressor 97 from sklearn.model_selection import train_test_split 98 99 X, y = fetch_california_housing(return_X_y=True, as_frame=False) 100 101 # split data into training test and test set 102 X_train, X_test, y_train, y_test = train_test_split(X, y, 103 test_size=0.2, random_state=13) 104 105 # Requires further tuning 106 obj = DecisionTreeRegressor(max_depth=3, random_state=123) 107 obj2 = ns.RandomBagRegressor(obj=obj, direct_link=False, 108 n_estimators=50, 109 col_sample=0.9, row_sample=0.9, 110 dropout=0, n_clusters=0, verbose=1) 111 112 obj2.fit(X_train, y_train) 113 114 print(np.sqrt(obj2.score(X_test, y_test))) # RMSE 115 116 ``` 117 118 """ 119 120 # construct the object ----- 121 122 def __init__( 123 self, 124 obj, 125 n_estimators=10, 126 n_hidden_features=1, 127 activation_name="relu", 128 a=0.01, 129 nodes_sim="sobol", 130 bias=True, 131 dropout=0, 132 direct_link=False, 133 n_clusters=2, 134 cluster_encode=True, 135 type_clust="kmeans", 136 type_scaling=("std", "std", "std"), 137 col_sample=1, 138 row_sample=1, 139 n_jobs=None, 140 seed=123, 141 verbose=1, 142 backend="cpu", 143 ): 144 super().__init__( 145 obj=obj, 146 n_estimators=n_estimators, 147 n_hidden_features=n_hidden_features, 148 activation_name=activation_name, 149 a=a, 150 nodes_sim=nodes_sim, 151 bias=bias, 152 dropout=dropout, 153 direct_link=direct_link, 154 n_clusters=n_clusters, 155 cluster_encode=cluster_encode, 156 type_clust=type_clust, 157 type_scaling=type_scaling, 158 col_sample=col_sample, 159 row_sample=row_sample, 160 seed=seed, 161 backend=backend, 162 ) 163 164 self.type_fit = "regression" 165 self.verbose = verbose 166 self.n_jobs = n_jobs 167 self.voter_ = {} 168 169 def fit(self, X, y, **kwargs): 170 """Fit Random 'Bagging' model to training data (X, y). 171 172 Args: 173 174 X: {array-like}, shape = [n_samples, n_features] 175 Training vectors, where n_samples is the number 176 of samples and n_features is the number of features. 177 178 y: array-like, shape = [n_samples] 179 Target values. 180 181 **kwargs: additional parameters to be passed to 182 self.cook_training_set or self.obj.fit 183 184 Returns: 185 186 self: object 187 188 """ 189 190 base_learner = CustomRegressor( 191 self.obj, 192 n_hidden_features=self.n_hidden_features, 193 activation_name=self.activation_name, 194 a=self.a, 195 nodes_sim=self.nodes_sim, 196 bias=self.bias, 197 dropout=self.dropout, 198 direct_link=self.direct_link, 199 n_clusters=self.n_clusters, 200 type_clust=self.type_clust, 201 type_scaling=self.type_scaling, 202 col_sample=self.col_sample, 203 row_sample=self.row_sample, 204 seed=self.seed, 205 ) 206 207 # 1 - Sequential training ----- 208 209 if self.n_jobs is None: 210 self.voter_ = rbagloop_regression( 211 base_learner, X, y, self.n_estimators, self.verbose, self.seed 212 ) 213 214 self.n_estimators = len(self.voter_) 215 216 return self 217 218 # 2 - Parallel training ----- 219 # buggy 220 # if self.n_jobs is not None: 221 def fit_estimators(m): 222 base_learner__ = deepcopy(base_learner) 223 base_learner__.set_params(seed=self.seed + m * 1000) 224 base_learner__.fit(X, y, **kwargs) 225 return base_learner__ 226 227 if self.verbose == 1: 228 voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")( 229 delayed(fit_estimators)(m) 230 for m in tqdm(range(self.n_estimators)) 231 ) 232 else: 233 voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")( 234 delayed(fit_estimators)(m) for m in range(self.n_estimators) 235 ) 236 237 self.voter_ = {i: elt for i, elt in enumerate(voters_list)} 238 239 self.n_estimators = len(self.voter_) 240 241 return self 242 243 def predict(self, X, weights=None, **kwargs): 244 """Predict for test data X. 245 246 Args: 247 248 X: {array-like}, shape = [n_samples, n_features] 249 Training vectors, where n_samples is the number 250 of samples and n_features is the number of features. 251 252 **kwargs: additional parameters to be passed to 253 self.cook_test_set 254 255 Returns: 256 257 estimates for test data: {array-like} 258 259 """ 260 261 def calculate_preds(voter, weights=None): 262 ensemble_preds = 0 263 264 n_iter = len(voter) 265 266 assert n_iter > 0, "no estimator found in `RandomBag` ensemble" 267 268 if weights is None: 269 for idx, elt in voter.items(): 270 ensemble_preds += elt.predict(X) 271 272 return ensemble_preds / n_iter 273 274 # if weights is not None: 275 for idx, elt in voter.items(): 276 ensemble_preds += weights[idx] * elt.predict(X) 277 278 return ensemble_preds 279 280 # end calculate_preds ---- 281 282 if weights is None: 283 return calculate_preds(self.voter_) 284 285 # if weights is not None: 286 self.weights = weights 287 288 return calculate_preds(self.voter_, weights)
Randomized 'Bagging' Regression model
Parameters:
obj: object
any object containing a method fit (obj.fit()) and a method predict
(obj.predict())
n_estimators: int
number of boosting iterations
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not
(False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original predictors are included (True) in model''s
fitting or not (False)
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
col_sample: float
percentage of covariates randomly chosen for training
row_sample: float
percentage of rows chosen for training, by stratified bootstrapping
seed: int
reproducibility seed for nodes_sim=='uniform'
backend: str
"cpu" or "gpu" or "tpu"
Attributes:
voter_: dict
dictionary containing all the fitted base-learners
Examples:
import numpy as np
import nnetsauce as ns
from sklearn.datasets import fetch_california_housing
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split
X, y = fetch_california_housing(return_X_y=True, as_frame=False)
# split data into training test and test set
X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size=0.2, random_state=13)
# Requires further tuning
obj = DecisionTreeRegressor(max_depth=3, random_state=123)
obj2 = ns.RandomBagRegressor(obj=obj, direct_link=False,
n_estimators=50,
col_sample=0.9, row_sample=0.9,
dropout=0, n_clusters=0, verbose=1)
obj2.fit(X_train, y_train)
print(np.sqrt(obj2.score(X_test, y_test))) # RMSE
169 def fit(self, X, y, **kwargs): 170 """Fit Random 'Bagging' model to training data (X, y). 171 172 Args: 173 174 X: {array-like}, shape = [n_samples, n_features] 175 Training vectors, where n_samples is the number 176 of samples and n_features is the number of features. 177 178 y: array-like, shape = [n_samples] 179 Target values. 180 181 **kwargs: additional parameters to be passed to 182 self.cook_training_set or self.obj.fit 183 184 Returns: 185 186 self: object 187 188 """ 189 190 base_learner = CustomRegressor( 191 self.obj, 192 n_hidden_features=self.n_hidden_features, 193 activation_name=self.activation_name, 194 a=self.a, 195 nodes_sim=self.nodes_sim, 196 bias=self.bias, 197 dropout=self.dropout, 198 direct_link=self.direct_link, 199 n_clusters=self.n_clusters, 200 type_clust=self.type_clust, 201 type_scaling=self.type_scaling, 202 col_sample=self.col_sample, 203 row_sample=self.row_sample, 204 seed=self.seed, 205 ) 206 207 # 1 - Sequential training ----- 208 209 if self.n_jobs is None: 210 self.voter_ = rbagloop_regression( 211 base_learner, X, y, self.n_estimators, self.verbose, self.seed 212 ) 213 214 self.n_estimators = len(self.voter_) 215 216 return self 217 218 # 2 - Parallel training ----- 219 # buggy 220 # if self.n_jobs is not None: 221 def fit_estimators(m): 222 base_learner__ = deepcopy(base_learner) 223 base_learner__.set_params(seed=self.seed + m * 1000) 224 base_learner__.fit(X, y, **kwargs) 225 return base_learner__ 226 227 if self.verbose == 1: 228 voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")( 229 delayed(fit_estimators)(m) 230 for m in tqdm(range(self.n_estimators)) 231 ) 232 else: 233 voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")( 234 delayed(fit_estimators)(m) for m in range(self.n_estimators) 235 ) 236 237 self.voter_ = {i: elt for i, elt in enumerate(voters_list)} 238 239 self.n_estimators = len(self.voter_) 240 241 return self
Fit Random 'Bagging' model to training data (X, y).
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
243 def predict(self, X, weights=None, **kwargs): 244 """Predict for test data X. 245 246 Args: 247 248 X: {array-like}, shape = [n_samples, n_features] 249 Training vectors, where n_samples is the number 250 of samples and n_features is the number of features. 251 252 **kwargs: additional parameters to be passed to 253 self.cook_test_set 254 255 Returns: 256 257 estimates for test data: {array-like} 258 259 """ 260 261 def calculate_preds(voter, weights=None): 262 ensemble_preds = 0 263 264 n_iter = len(voter) 265 266 assert n_iter > 0, "no estimator found in `RandomBag` ensemble" 267 268 if weights is None: 269 for idx, elt in voter.items(): 270 ensemble_preds += elt.predict(X) 271 272 return ensemble_preds / n_iter 273 274 # if weights is not None: 275 for idx, elt in voter.items(): 276 ensemble_preds += weights[idx] * elt.predict(X) 277 278 return ensemble_preds 279 280 # end calculate_preds ---- 281 282 if weights is None: 283 return calculate_preds(self.voter_) 284 285 # if weights is not None: 286 self.weights = weights 287 288 return calculate_preds(self.voter_, weights)
Predict for test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
estimates for test data: {array-like}
18class RandomBagClassifier(RandomBag, ClassifierMixin): 19 """Randomized 'Bagging' Classification model 20 21 Parameters: 22 23 obj: object 24 any object containing a method fit (obj.fit()) and a method predict 25 (obj.predict()) 26 27 n_estimators: int 28 number of boosting iterations 29 30 n_hidden_features: int 31 number of nodes in the hidden layer 32 33 activation_name: str 34 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 35 36 a: float 37 hyperparameter for 'prelu' or 'elu' activation function 38 39 nodes_sim: str 40 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 41 'uniform' 42 43 bias: boolean 44 indicates if the hidden layer contains a bias term (True) or not 45 (False) 46 47 dropout: float 48 regularization parameter; (random) percentage of nodes dropped out 49 of the training 50 51 direct_link: boolean 52 indicates if the original predictors are included (True) in model's 53 fitting or not (False) 54 55 n_clusters: int 56 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: 57 no clustering) 58 59 cluster_encode: bool 60 defines how the variable containing clusters is treated (default is one-hot) 61 if `False`, then labels are used, without one-hot encoding 62 63 type_clust: str 64 type of clustering method: currently k-means ('kmeans') or Gaussian 65 Mixture Model ('gmm') 66 67 type_scaling: a tuple of 3 strings 68 scaling methods for inputs, hidden layer, and clustering respectively 69 (and when relevant). 70 Currently available: standardization ('std') or MinMax scaling ('minmax') 71 72 col_sample: float 73 percentage of covariates randomly chosen for training 74 75 row_sample: float 76 percentage of rows chosen for training, by stratified bootstrapping 77 78 seed: int 79 reproducibility seed for nodes_sim=='uniform' 80 81 backend: str 82 "cpu" or "gpu" or "tpu" 83 84 Attributes: 85 86 voter_: dict 87 dictionary containing all the fitted base-learners 88 89 90 Examples: 91 92 See also [https://github.com/Techtonique/nnetsauce/blob/master/examples/randombag_classification.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/randombag_classification.py) 93 94 ```python 95 import nnetsauce as ns 96 from sklearn.datasets import load_breast_cancer 97 from sklearn.tree import DecisionTreeClassifier 98 from sklearn.model_selection import train_test_split 99 from sklearn import metrics 100 from time import time 101 102 103 breast_cancer = load_breast_cancer() 104 Z = breast_cancer.data 105 t = breast_cancer.target 106 np.random.seed(123) 107 X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2) 108 109 # decision tree 110 clf = DecisionTreeClassifier(max_depth=2, random_state=123) 111 fit_obj = ns.RandomBagClassifier(clf, n_hidden_features=2, 112 direct_link=True, 113 n_estimators=100, 114 col_sample=0.9, row_sample=0.9, 115 dropout=0.3, n_clusters=0, verbose=1) 116 117 start = time() 118 fit_obj.fit(X_train, y_train) 119 print(f"Elapsed {time() - start}") 120 121 print(fit_obj.score(X_test, y_test)) 122 print(fit_obj.score(X_test, y_test, scoring="roc_auc")) 123 124 start = time() 125 preds = fit_obj.predict(X_test) 126 print(f"Elapsed {time() - start}") 127 print(metrics.classification_report(preds, y_test)) 128 ``` 129 130 """ 131 132 # construct the object ----- 133 _estimator_type = "classifier" 134 135 def __init__( 136 self, 137 obj, 138 n_estimators=10, 139 n_hidden_features=1, 140 activation_name="relu", 141 a=0.01, 142 nodes_sim="sobol", 143 bias=True, 144 dropout=0, 145 direct_link=False, 146 n_clusters=2, 147 cluster_encode=True, 148 type_clust="kmeans", 149 type_scaling=("std", "std", "std"), 150 col_sample=1, 151 row_sample=1, 152 n_jobs=None, 153 seed=123, 154 verbose=1, 155 backend="cpu", 156 ): 157 super().__init__( 158 obj=obj, 159 n_estimators=n_estimators, 160 n_hidden_features=n_hidden_features, 161 activation_name=activation_name, 162 a=a, 163 nodes_sim=nodes_sim, 164 bias=bias, 165 dropout=dropout, 166 direct_link=direct_link, 167 n_clusters=n_clusters, 168 cluster_encode=cluster_encode, 169 type_clust=type_clust, 170 type_scaling=type_scaling, 171 col_sample=col_sample, 172 row_sample=row_sample, 173 seed=seed, 174 backend=backend, 175 ) 176 177 self.type_fit = "classification" 178 self.verbose = verbose 179 self.n_jobs = n_jobs 180 self.voter_ = {} 181 182 def fit(self, X, y, **kwargs): 183 """Fit Random 'Bagging' model to training data (X, y). 184 185 Args: 186 187 X: {array-like}, shape = [n_samples, n_features] 188 Training vectors, where n_samples is the number 189 of samples and n_features is the number of features. 190 191 y: array-like, shape = [n_samples] 192 Target values. 193 194 **kwargs: additional parameters to be passed to 195 self.cook_training_set or self.obj.fit 196 197 Returns: 198 199 self: object 200 201 """ 202 203 assert mx.is_factor(y), "y must contain only integers" 204 205 self.n_classes_ = len(np.unique(y)) # for compatibility with sklearn 206 207 # training 208 self.n_classes = len(np.unique(y)) 209 210 base_learner = CustomClassifier( 211 self.obj, 212 n_hidden_features=self.n_hidden_features, 213 activation_name=self.activation_name, 214 a=self.a, 215 nodes_sim=self.nodes_sim, 216 bias=self.bias, 217 dropout=self.dropout, 218 direct_link=self.direct_link, 219 n_clusters=self.n_clusters, 220 type_clust=self.type_clust, 221 type_scaling=self.type_scaling, 222 col_sample=self.col_sample, 223 row_sample=self.row_sample, 224 seed=self.seed, 225 cv_calibration=None, 226 ) 227 228 # 1 - Sequential training ----- 229 230 if self.n_jobs is None: 231 self.voter_ = rbagloop_classification( 232 base_learner, X, y, self.n_estimators, self.verbose, self.seed 233 ) 234 235 self.n_estimators = len(self.voter_) 236 237 return self 238 239 # 2 - Parallel training ----- 240 # buggy 241 # if self.n_jobs is not None: 242 def fit_estimators(m): 243 base_learner__ = deepcopy(base_learner) 244 base_learner__.set_params(seed=self.seed + m * 1000) 245 base_learner__.fit(X, y, **kwargs) 246 return base_learner__ 247 248 if self.verbose == 1: 249 voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")( 250 delayed(fit_estimators)(m) 251 for m in tqdm(range(self.n_estimators)) 252 ) 253 else: 254 voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")( 255 delayed(fit_estimators)(m) for m in range(self.n_estimators) 256 ) 257 258 self.voter_ = {idx: elt for idx, elt in enumerate(voters_list)} 259 260 self.n_estimators = len(self.voter_) 261 self.classes_ = np.unique(y) 262 return self 263 264 def predict(self, X, weights=None, **kwargs): 265 """Predict test data X. 266 267 Args: 268 269 X: {array-like}, shape = [n_samples, n_features] 270 Training vectors, where n_samples is the number 271 of samples and n_features is the number of features. 272 273 **kwargs: additional parameters to be passed to 274 self.cook_test_set 275 276 Returns: 277 278 model predictions: {array-like} 279 280 """ 281 return self.predict_proba(X, weights, **kwargs).argmax(axis=1) 282 283 def predict_proba(self, X, weights=None, **kwargs): 284 """Predict probabilities for test data X. 285 286 Args: 287 288 X: {array-like}, shape = [n_samples, n_features] 289 Training vectors, where n_samples is the number 290 of samples and n_features is the number of features. 291 292 **kwargs: additional parameters to be passed to 293 self.cook_test_set 294 295 Returns: 296 297 probability estimates for test data: {array-like} 298 299 """ 300 301 def calculate_probas(voter, weights=None, verbose=None): 302 ensemble_proba = 0 303 304 n_iter = len(voter) 305 306 assert n_iter > 0, "no estimator found in `RandomBag` ensemble" 307 308 if weights is None: 309 for idx, elt in voter.items(): 310 try: 311 ensemble_proba += elt.predict_proba(X) 312 313 # if verbose == 1: 314 # pbar.update(idx) 315 316 except: 317 continue 318 319 # if verbose == 1: 320 # pbar.update(n_iter) 321 322 return ensemble_proba / n_iter 323 324 # if weights is not None: 325 for idx, elt in voter.items(): 326 ensemble_proba += weights[idx] * elt.predict_proba(X) 327 328 # if verbose == 1: 329 # pbar.update(idx) 330 331 # if verbose == 1: 332 # pbar.update(n_iter) 333 334 return ensemble_proba 335 336 # end calculate_probas ---- 337 338 if self.n_jobs is None: 339 # if self.verbose == 1: 340 # pbar = Progbar(self.n_estimators) 341 342 if weights is None: 343 return calculate_probas(self.voter_, verbose=self.verbose) 344 345 # if weights is not None: 346 self.weights = weights 347 348 return calculate_probas(self.voter_, weights, verbose=self.verbose) 349 350 # if self.n_jobs is not None: 351 def predict_estimator(m): 352 try: 353 return self.voter_[m].predict_proba(X) 354 except: 355 pass 356 357 if self.verbose == 1: 358 preds = Parallel(n_jobs=self.n_jobs, prefer="threads")( 359 delayed(predict_estimator)(m) 360 for m in tqdm(range(self.n_estimators)) 361 ) 362 363 else: 364 preds = Parallel(n_jobs=self.n_jobs, prefer="threads")( 365 delayed(predict_estimator)(m) for m in range(self.n_estimators) 366 ) 367 368 ensemble_proba = 0 369 370 if weights is None: 371 for i in range(self.n_estimators): 372 ensemble_proba += preds[i] 373 374 return ensemble_proba / self.n_estimators 375 376 for i in range(self.n_estimators): 377 ensemble_proba += weights[i] * preds[i] 378 379 return ensemble_proba 380 381 @property 382 def _estimator_type(self): 383 return "classifier"
Randomized 'Bagging' Classification model
Parameters:
obj: object
any object containing a method fit (obj.fit()) and a method predict
(obj.predict())
n_estimators: int
number of boosting iterations
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not
(False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original predictors are included (True) in model's
fitting or not (False)
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
col_sample: float
percentage of covariates randomly chosen for training
row_sample: float
percentage of rows chosen for training, by stratified bootstrapping
seed: int
reproducibility seed for nodes_sim=='uniform'
backend: str
"cpu" or "gpu" or "tpu"
Attributes:
voter_: dict
dictionary containing all the fitted base-learners
Examples:
See also https://github.com/Techtonique/nnetsauce/blob/master/examples/randombag_classification.py
import nnetsauce as ns
from sklearn.datasets import load_breast_cancer
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn import metrics
from time import time
breast_cancer = load_breast_cancer()
Z = breast_cancer.data
t = breast_cancer.target
np.random.seed(123)
X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2)
# decision tree
clf = DecisionTreeClassifier(max_depth=2, random_state=123)
fit_obj = ns.RandomBagClassifier(clf, n_hidden_features=2,
direct_link=True,
n_estimators=100,
col_sample=0.9, row_sample=0.9,
dropout=0.3, n_clusters=0, verbose=1)
start = time()
fit_obj.fit(X_train, y_train)
print(f"Elapsed {time() - start}")
print(fit_obj.score(X_test, y_test))
print(fit_obj.score(X_test, y_test, scoring="roc_auc"))
start = time()
preds = fit_obj.predict(X_test)
print(f"Elapsed {time() - start}")
print(metrics.classification_report(preds, y_test))
182 def fit(self, X, y, **kwargs): 183 """Fit Random 'Bagging' model to training data (X, y). 184 185 Args: 186 187 X: {array-like}, shape = [n_samples, n_features] 188 Training vectors, where n_samples is the number 189 of samples and n_features is the number of features. 190 191 y: array-like, shape = [n_samples] 192 Target values. 193 194 **kwargs: additional parameters to be passed to 195 self.cook_training_set or self.obj.fit 196 197 Returns: 198 199 self: object 200 201 """ 202 203 assert mx.is_factor(y), "y must contain only integers" 204 205 self.n_classes_ = len(np.unique(y)) # for compatibility with sklearn 206 207 # training 208 self.n_classes = len(np.unique(y)) 209 210 base_learner = CustomClassifier( 211 self.obj, 212 n_hidden_features=self.n_hidden_features, 213 activation_name=self.activation_name, 214 a=self.a, 215 nodes_sim=self.nodes_sim, 216 bias=self.bias, 217 dropout=self.dropout, 218 direct_link=self.direct_link, 219 n_clusters=self.n_clusters, 220 type_clust=self.type_clust, 221 type_scaling=self.type_scaling, 222 col_sample=self.col_sample, 223 row_sample=self.row_sample, 224 seed=self.seed, 225 cv_calibration=None, 226 ) 227 228 # 1 - Sequential training ----- 229 230 if self.n_jobs is None: 231 self.voter_ = rbagloop_classification( 232 base_learner, X, y, self.n_estimators, self.verbose, self.seed 233 ) 234 235 self.n_estimators = len(self.voter_) 236 237 return self 238 239 # 2 - Parallel training ----- 240 # buggy 241 # if self.n_jobs is not None: 242 def fit_estimators(m): 243 base_learner__ = deepcopy(base_learner) 244 base_learner__.set_params(seed=self.seed + m * 1000) 245 base_learner__.fit(X, y, **kwargs) 246 return base_learner__ 247 248 if self.verbose == 1: 249 voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")( 250 delayed(fit_estimators)(m) 251 for m in tqdm(range(self.n_estimators)) 252 ) 253 else: 254 voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")( 255 delayed(fit_estimators)(m) for m in range(self.n_estimators) 256 ) 257 258 self.voter_ = {idx: elt for idx, elt in enumerate(voters_list)} 259 260 self.n_estimators = len(self.voter_) 261 self.classes_ = np.unique(y) 262 return self
Fit Random 'Bagging' model to training data (X, y).
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
264 def predict(self, X, weights=None, **kwargs): 265 """Predict test data X. 266 267 Args: 268 269 X: {array-like}, shape = [n_samples, n_features] 270 Training vectors, where n_samples is the number 271 of samples and n_features is the number of features. 272 273 **kwargs: additional parameters to be passed to 274 self.cook_test_set 275 276 Returns: 277 278 model predictions: {array-like} 279 280 """ 281 return self.predict_proba(X, weights, **kwargs).argmax(axis=1)
Predict test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
283 def predict_proba(self, X, weights=None, **kwargs): 284 """Predict probabilities for test data X. 285 286 Args: 287 288 X: {array-like}, shape = [n_samples, n_features] 289 Training vectors, where n_samples is the number 290 of samples and n_features is the number of features. 291 292 **kwargs: additional parameters to be passed to 293 self.cook_test_set 294 295 Returns: 296 297 probability estimates for test data: {array-like} 298 299 """ 300 301 def calculate_probas(voter, weights=None, verbose=None): 302 ensemble_proba = 0 303 304 n_iter = len(voter) 305 306 assert n_iter > 0, "no estimator found in `RandomBag` ensemble" 307 308 if weights is None: 309 for idx, elt in voter.items(): 310 try: 311 ensemble_proba += elt.predict_proba(X) 312 313 # if verbose == 1: 314 # pbar.update(idx) 315 316 except: 317 continue 318 319 # if verbose == 1: 320 # pbar.update(n_iter) 321 322 return ensemble_proba / n_iter 323 324 # if weights is not None: 325 for idx, elt in voter.items(): 326 ensemble_proba += weights[idx] * elt.predict_proba(X) 327 328 # if verbose == 1: 329 # pbar.update(idx) 330 331 # if verbose == 1: 332 # pbar.update(n_iter) 333 334 return ensemble_proba 335 336 # end calculate_probas ---- 337 338 if self.n_jobs is None: 339 # if self.verbose == 1: 340 # pbar = Progbar(self.n_estimators) 341 342 if weights is None: 343 return calculate_probas(self.voter_, verbose=self.verbose) 344 345 # if weights is not None: 346 self.weights = weights 347 348 return calculate_probas(self.voter_, weights, verbose=self.verbose) 349 350 # if self.n_jobs is not None: 351 def predict_estimator(m): 352 try: 353 return self.voter_[m].predict_proba(X) 354 except: 355 pass 356 357 if self.verbose == 1: 358 preds = Parallel(n_jobs=self.n_jobs, prefer="threads")( 359 delayed(predict_estimator)(m) 360 for m in tqdm(range(self.n_estimators)) 361 ) 362 363 else: 364 preds = Parallel(n_jobs=self.n_jobs, prefer="threads")( 365 delayed(predict_estimator)(m) for m in range(self.n_estimators) 366 ) 367 368 ensemble_proba = 0 369 370 if weights is None: 371 for i in range(self.n_estimators): 372 ensemble_proba += preds[i] 373 374 return ensemble_proba / self.n_estimators 375 376 for i in range(self.n_estimators): 377 ensemble_proba += weights[i] * preds[i] 378 379 return ensemble_proba
Predict probabilities for test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
probability estimates for test data: {array-like}
16class RegressorUpdater(BaseEstimator, RegressorMixin): 17 """ 18 Update a regression model with new observations 19 20 Parameters 21 ---------- 22 regr: object 23 A regression model with a coef_ attribute 24 alpha: float 25 Updating factor's exponent 26 27 Attributes 28 ---------- 29 n_obs_: int 30 Number of observations 31 coef_: np.ndarray 32 Coefficients of the model 33 updating_factor_: float 34 Updating factor 35 36 """ 37 38 def __init__(self, regr, alpha=0.5): 39 self.regr = regr 40 self.alpha = alpha 41 self.n_obs_ = None 42 self.coef_ = None 43 self.updating_factor_ = None 44 try: 45 self.coef_ = self.regr.coef_ 46 if isinstance(self.regr, Base): 47 self.n_obs_ = self.regr.scaler_.n_samples_seen_ 48 except AttributeError: 49 pass 50 51 def fit(self, X, y, **kwargs): 52 53 if isinstance( 54 self.regr, CustomRegressor 55 ): # nnetsauce model not deep --- 56 if check_is_fitted(self.regr) == False: 57 self.regr.fit(X, y, **kwargs) 58 self.n_obs_ = X.shape[0] 59 if hasattr(self.regr, "coef_"): 60 self.coef_ = self.regr.coef_ 61 return self 62 self.n_obs_ = self.regr.scaler_.n_samples_seen_ 63 if hasattr(self.regr, "coef_"): 64 self.coef_ = self.regr.coef_ 65 return self 66 67 if ( 68 hasattr(self.regr, "coef_") == False 69 ): # sklearn model or CustomRegressor model --- 70 self.regr.fit(X, y) 71 self.n_obs_ = X.shape[0] 72 self.regr.fit(X, y) 73 if hasattr(self.regr, "stacked_obj"): 74 self.coef_ = self.regr.stacked_obj.coef_ 75 else: 76 self.coef_ = self.regr.coef_ 77 return self 78 self.n_obs_ = X.shape[0] 79 if hasattr(self.regr, "coef_"): 80 self.coef_ = self.regr.coef_ 81 return self 82 83 def predict(self, X): 84 # assert hasattr(self.regr, "coef_"), "model must have coef_ attribute" 85 return self.regr.predict(X) 86 87 def partial_fit(self, X, y): 88 89 assert hasattr( 90 self.regr, "coef_" 91 ), "model must be fitted first (i.e have 'coef_' attribute)" 92 assert ( 93 self.n_obs_ is not None 94 ), "model must be fitted first (i.e have 'n_obs_' attribute)" 95 96 if len(X.shape) == 1: 97 X = X.reshape(1, -1) 98 99 assert X.shape[0] == 1, "X must have one row" 100 101 self.updating_factor_ = self.n_obs_ ** (-self.alpha) 102 103 if isinstance(self.regr, Base): # nnetsauce model --- 104 105 newX = deepcopy(X) 106 107 if isinstance( 108 self.regr, CustomRegressor 109 ): # other nnetsauce model (CustomRegressor) --- 110 newX = self.regr.cook_test_set(X=X) 111 if isinstance(X, pd.DataFrame): 112 newx = newX.values.ravel() 113 else: 114 newx = newX.ravel() 115 116 else: # an sklearn model --- 117 118 if isinstance(X, pd.DataFrame): 119 newx = X.values.ravel() 120 else: 121 newx = X.ravel() 122 123 new_coef = self.regr.coef_ + self.updating_factor_ * np.dot( 124 newx, y - np.dot(newx, self.regr.coef_) 125 ) 126 self.regr.coef_ = _update_mean(self.regr.coef_, self.n_obs_, new_coef) 127 self.coef_ = deepcopy(self.regr.coef_) 128 self.n_obs_ += 1 129 return self
Update a regression model with new observations
Parameters
regr: object A regression model with a coef_ attribute alpha: float Updating factor's exponent
Attributes
n_obs_: int Number of observations coef_: np.ndarray Coefficients of the model updating_factor_: float Updating factor
51 def fit(self, X, y, **kwargs): 52 53 if isinstance( 54 self.regr, CustomRegressor 55 ): # nnetsauce model not deep --- 56 if check_is_fitted(self.regr) == False: 57 self.regr.fit(X, y, **kwargs) 58 self.n_obs_ = X.shape[0] 59 if hasattr(self.regr, "coef_"): 60 self.coef_ = self.regr.coef_ 61 return self 62 self.n_obs_ = self.regr.scaler_.n_samples_seen_ 63 if hasattr(self.regr, "coef_"): 64 self.coef_ = self.regr.coef_ 65 return self 66 67 if ( 68 hasattr(self.regr, "coef_") == False 69 ): # sklearn model or CustomRegressor model --- 70 self.regr.fit(X, y) 71 self.n_obs_ = X.shape[0] 72 self.regr.fit(X, y) 73 if hasattr(self.regr, "stacked_obj"): 74 self.coef_ = self.regr.stacked_obj.coef_ 75 else: 76 self.coef_ = self.regr.coef_ 77 return self 78 self.n_obs_ = X.shape[0] 79 if hasattr(self.regr, "coef_"): 80 self.coef_ = self.regr.coef_ 81 return self
16class ClassifierUpdater(BaseEstimator, ClassifierMixin): 17 """ 18 Update a regression model with new observations 19 20 Parameters 21 ---------- 22 clf: object 23 A regression model with a coef_ attribute 24 alpha: float 25 Updating factor's exponent 26 27 Attributes 28 ---------- 29 n_obs_: int 30 Number of observations 31 coef_: np.ndarray 32 Coefficients of the model 33 updating_factor_: float 34 Updating factor 35 36 """ 37 38 _estimator_type = "classifier" 39 40 def __init__(self, clf, alpha=0.5): 41 self.clf = clf 42 self.alpha = alpha 43 self.n_obs_ = None 44 self.coef_ = None 45 self.updating_factor_ = None 46 try: 47 self.coef_ = self.clf.coef_ 48 if isinstance(self.clf, Base): 49 self.n_obs_ = self.clf.scaler_.n_samples_seen_ 50 except AttributeError: 51 pass 52 53 def fit(self, X, y, **kwargs): 54 55 raise NotImplementedError( 56 "fit method is not implemented for ClassifierUpdater" 57 ) 58 59 if isinstance( 60 self.clf, CustomClassifier 61 ): # nnetsauce model not deep --- 62 if check_is_fitted(self.clf) == False: 63 self.clf.fit(X, y, **kwargs) 64 self.n_obs_ = X.shape[0] 65 if hasattr(self.clf, "coef_"): 66 self.coef_ = self.clf.coef_ 67 return self 68 self.n_obs_ = self.clf.scaler_.n_samples_seen_ 69 if hasattr(self.clf, "coef_"): 70 self.coef_ = self.clf.coef_ 71 return self 72 73 if ( 74 hasattr(self.clf, "coef_") == False 75 ): # sklearn model or CustomClassifier model --- 76 self.clf.fit(X, y) 77 self.n_obs_ = X.shape[0] 78 self.clf.fit(X, y) 79 if hasattr(self.clf, "stacked_obj"): 80 self.coef_ = self.clf.stacked_obj.coef_ 81 else: 82 self.coef_ = self.clf.coef_ 83 return self 84 self.n_obs_ = X.shape[0] 85 if hasattr(self.clf, "coef_"): 86 self.coef_ = self.clf.coef_ 87 return self 88 89 def predict(self, X): 90 91 raise NotImplementedError( 92 "predict method is not implemented for ClassifierUpdater" 93 ) 94 # assert hasattr(self.clf, "coef_"), "model must have coef_ attribute" 95 return self.clf.predict(X) 96 97 def partial_fit(self, X, y): 98 99 raise NotImplementedError( 100 "partial_fit method is not implemented for ClassifierUpdater" 101 ) 102 103 assert hasattr( 104 self.clf, "coef_" 105 ), "model must be fitted first (i.e have 'coef_' attribute)" 106 assert ( 107 self.n_obs_ is not None 108 ), "model must be fitted first (i.e have 'n_obs_' attribute)" 109 110 if len(X.shape) == 1: 111 X = X.reshape(1, -1) 112 113 assert X.shape[0] == 1, "X must have one row" 114 115 self.updating_factor_ = self.n_obs_ ** (-self.alpha) 116 117 if isinstance(self.clf, Base): # nnetsauce model --- 118 119 newX = deepcopy(X) 120 121 if isinstance( 122 self.clf, CustomClassifier 123 ): # other nnetsauce model (CustomClassifier) --- 124 newX = self.clf.cook_test_set(X=X) 125 if isinstance(X, pd.DataFrame): 126 newx = newX.values.ravel() 127 else: 128 newx = newX.ravel() 129 130 else: # an sklearn model --- 131 132 if isinstance(X, pd.DataFrame): 133 newx = X.values.ravel() 134 else: 135 newx = X.ravel() 136 137 new_coef = self.clf.coef_ + self.updating_factor_ * np.dot( 138 newx, y - np.dot(newx, self.clf.coef_) 139 ) 140 self.clf.coef_ = _update_mean(self.clf.coef_, self.n_obs_, new_coef) 141 self.coef_ = deepcopy(self.clf.coef_) 142 self.n_obs_ += 1 143 return self
Update a regression model with new observations
Parameters
clf: object A regression model with a coef_ attribute alpha: float Updating factor's exponent
Attributes
n_obs_: int Number of observations coef_: np.ndarray Coefficients of the model updating_factor_: float Updating factor
53 def fit(self, X, y, **kwargs): 54 55 raise NotImplementedError( 56 "fit method is not implemented for ClassifierUpdater" 57 ) 58 59 if isinstance( 60 self.clf, CustomClassifier 61 ): # nnetsauce model not deep --- 62 if check_is_fitted(self.clf) == False: 63 self.clf.fit(X, y, **kwargs) 64 self.n_obs_ = X.shape[0] 65 if hasattr(self.clf, "coef_"): 66 self.coef_ = self.clf.coef_ 67 return self 68 self.n_obs_ = self.clf.scaler_.n_samples_seen_ 69 if hasattr(self.clf, "coef_"): 70 self.coef_ = self.clf.coef_ 71 return self 72 73 if ( 74 hasattr(self.clf, "coef_") == False 75 ): # sklearn model or CustomClassifier model --- 76 self.clf.fit(X, y) 77 self.n_obs_ = X.shape[0] 78 self.clf.fit(X, y) 79 if hasattr(self.clf, "stacked_obj"): 80 self.coef_ = self.clf.stacked_obj.coef_ 81 else: 82 self.coef_ = self.clf.coef_ 83 return self 84 self.n_obs_ = X.shape[0] 85 if hasattr(self.clf, "coef_"): 86 self.coef_ = self.clf.coef_ 87 return self
24class RidgeRegressor(BaseEstimator, RegressorMixin): 25 """Ridge. 26 27 Attributes: 28 29 reg_lambda: float 30 regularization parameter. 31 32 backend: str 33 type of backend; must be in ('cpu', 'gpu', 'tpu') 34 35 """ 36 37 def __init__(self, reg_lambda=0.1, backend="cpu"): 38 assert backend in ( 39 "cpu", 40 "gpu", 41 "tpu", 42 ), "`backend` must be in ('cpu', 'gpu', 'tpu')" 43 44 sys_platform = platform.system() 45 46 if (sys_platform == "Windows") and (backend in ("gpu", "tpu")): 47 warnings.warn( 48 "No GPU/TPU computing on Windows yet, backend set to 'cpu'" 49 ) 50 backend = "cpu" 51 52 self.reg_lambda = reg_lambda 53 self.backend = backend 54 self.coef_ = None 55 56 def fit(self, X, y, **kwargs): 57 """Fit matrixops (classifier) to training data (X, y) 58 59 Args: 60 61 X: {array-like}, shape = [n_samples, n_features] 62 Training vectors, where n_samples is the number 63 of samples and n_features is the number of features. 64 65 y: array-like, shape = [n_samples] 66 Target values. 67 68 **kwargs: additional parameters to be passed to self.cook_training_set. 69 70 Returns: 71 72 self: object. 73 74 """ 75 self.ym, centered_y = mo.center_response(y) 76 self.xm = X.mean(axis=0) 77 self.xsd = X.std(axis=0) 78 self.xsd[self.xsd == 0] = 1 # avoid division by zero 79 X_ = (X - self.xm[None, :]) / self.xsd[None, :] 80 81 if self.backend == "cpu": 82 if len(centered_y.shape) <= 1: 83 eye_term = np.sqrt(self.reg_lambda) * np.eye(X.shape[1]) 84 X_ = np.row_stack((X_, eye_term)) 85 y_ = np.concatenate((centered_y, np.zeros(X.shape[1]))) 86 beta_info = get_beta(X_, y_) 87 self.coef_ = beta_info[0] 88 else: 89 try: 90 eye_term = np.sqrt(self.reg_lambda) * np.eye(X.shape[1]) 91 X_ = np.row_stack((X_, eye_term)) 92 y_ = np.row_stack( 93 ( 94 centered_y, 95 np.zeros((eye_term.shape[0], centered_y.shape[1])), 96 ) 97 ) 98 beta_info = get_beta(X_, y_) 99 self.coef_ = beta_info[0] 100 except Exception: 101 x = inv( 102 mo.crossprod(X_) + self.reg_lambda * np.eye(X_.shape[1]) 103 ) 104 hat_matrix = mo.tcrossprod(x, X_) 105 self.coef_ = mo.safe_sparse_dot(hat_matrix, centered_y) 106 return self 107 108 x = jinv( 109 mo.crossprod(X_, backend=self.backend) 110 + self.reg_lambda * jnp.eye(X_.shape[1]) 111 ) 112 113 hat_matrix = mo.tcrossprod(x, X_, backend=self.backend) 114 self.coef_ = mo.safe_sparse_dot( 115 hat_matrix, centered_y, backend=self.backend 116 ) 117 return self 118 119 def predict(self, X, **kwargs): 120 """Predict test data X. 121 122 Args: 123 124 X: {array-like}, shape = [n_samples, n_features] 125 Training vectors, where n_samples is the number 126 of samples and n_features is the number of features. 127 128 **kwargs: additional parameters to be passed to `predict_proba` 129 130 Returns: 131 132 model predictions: {array-like} 133 134 """ 135 X_ = (X - self.xm[None, :]) / self.xsd[None, :] 136 137 if self.backend == "cpu": 138 if isinstance(self.ym, float): 139 return self.ym + mo.safe_sparse_dot(X_, self.coef_) 140 return self.ym[None, :] + mo.safe_sparse_dot(X_, self.coef_) 141 142 # if self.backend in ("gpu", "tpu"): 143 if isinstance(self.ym, float): 144 return self.ym + mo.safe_sparse_dot( 145 X_, self.coef_, backend=self.backend 146 ) 147 return self.ym[None, :] + mo.safe_sparse_dot( 148 X_, self.coef_, backend=self.backend 149 )
Ridge.
Attributes:
reg_lambda: float
regularization parameter.
backend: str
type of backend; must be in ('cpu', 'gpu', 'tpu')
56 def fit(self, X, y, **kwargs): 57 """Fit matrixops (classifier) to training data (X, y) 58 59 Args: 60 61 X: {array-like}, shape = [n_samples, n_features] 62 Training vectors, where n_samples is the number 63 of samples and n_features is the number of features. 64 65 y: array-like, shape = [n_samples] 66 Target values. 67 68 **kwargs: additional parameters to be passed to self.cook_training_set. 69 70 Returns: 71 72 self: object. 73 74 """ 75 self.ym, centered_y = mo.center_response(y) 76 self.xm = X.mean(axis=0) 77 self.xsd = X.std(axis=0) 78 self.xsd[self.xsd == 0] = 1 # avoid division by zero 79 X_ = (X - self.xm[None, :]) / self.xsd[None, :] 80 81 if self.backend == "cpu": 82 if len(centered_y.shape) <= 1: 83 eye_term = np.sqrt(self.reg_lambda) * np.eye(X.shape[1]) 84 X_ = np.row_stack((X_, eye_term)) 85 y_ = np.concatenate((centered_y, np.zeros(X.shape[1]))) 86 beta_info = get_beta(X_, y_) 87 self.coef_ = beta_info[0] 88 else: 89 try: 90 eye_term = np.sqrt(self.reg_lambda) * np.eye(X.shape[1]) 91 X_ = np.row_stack((X_, eye_term)) 92 y_ = np.row_stack( 93 ( 94 centered_y, 95 np.zeros((eye_term.shape[0], centered_y.shape[1])), 96 ) 97 ) 98 beta_info = get_beta(X_, y_) 99 self.coef_ = beta_info[0] 100 except Exception: 101 x = inv( 102 mo.crossprod(X_) + self.reg_lambda * np.eye(X_.shape[1]) 103 ) 104 hat_matrix = mo.tcrossprod(x, X_) 105 self.coef_ = mo.safe_sparse_dot(hat_matrix, centered_y) 106 return self 107 108 x = jinv( 109 mo.crossprod(X_, backend=self.backend) 110 + self.reg_lambda * jnp.eye(X_.shape[1]) 111 ) 112 113 hat_matrix = mo.tcrossprod(x, X_, backend=self.backend) 114 self.coef_ = mo.safe_sparse_dot( 115 hat_matrix, centered_y, backend=self.backend 116 ) 117 return self
Fit matrixops (classifier) to training data (X, y)
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
**kwargs: additional parameters to be passed to self.cook_training_set.
Returns:
self: object.
119 def predict(self, X, **kwargs): 120 """Predict test data X. 121 122 Args: 123 124 X: {array-like}, shape = [n_samples, n_features] 125 Training vectors, where n_samples is the number 126 of samples and n_features is the number of features. 127 128 **kwargs: additional parameters to be passed to `predict_proba` 129 130 Returns: 131 132 model predictions: {array-like} 133 134 """ 135 X_ = (X - self.xm[None, :]) / self.xsd[None, :] 136 137 if self.backend == "cpu": 138 if isinstance(self.ym, float): 139 return self.ym + mo.safe_sparse_dot(X_, self.coef_) 140 return self.ym[None, :] + mo.safe_sparse_dot(X_, self.coef_) 141 142 # if self.backend in ("gpu", "tpu"): 143 if isinstance(self.ym, float): 144 return self.ym + mo.safe_sparse_dot( 145 X_, self.coef_, backend=self.backend 146 ) 147 return self.ym[None, :] + mo.safe_sparse_dot( 148 X_, self.coef_, backend=self.backend 149 )
Predict test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to `predict_proba`
Returns:
model predictions: {array-like}
23class Ridge2Regressor(Ridge2, RegressorMixin): 24 """Ridge regression with 2 regularization parameters derived from class Ridge 25 26 Parameters: 27 28 n_hidden_features: int 29 number of nodes in the hidden layer 30 31 activation_name: str 32 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 33 34 a: float 35 hyperparameter for 'prelu' or 'elu' activation function 36 37 nodes_sim: str 38 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 39 'uniform' 40 41 bias: boolean 42 indicates if the hidden layer contains a bias term (True) or not 43 (False) 44 45 dropout: float 46 regularization parameter; (random) percentage of nodes dropped out 47 of the training 48 49 n_clusters: int 50 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: 51 no clustering) 52 53 cluster_encode: bool 54 defines how the variable containing clusters is treated (default is one-hot) 55 if `False`, then labels are used, without one-hot encoding 56 57 type_clust: str 58 type of clustering method: currently k-means ('kmeans') or Gaussian 59 Mixture Model ('gmm') 60 61 type_scaling: a tuple of 3 strings 62 scaling methods for inputs, hidden layer, and clustering respectively 63 (and when relevant). 64 Currently available: standardization ('std') or MinMax scaling ('minmax') 65 66 lambda1: float 67 regularization parameter on direct link 68 69 lambda2: float 70 regularization parameter on hidden layer 71 72 seed: int 73 reproducibility seed for nodes_sim=='uniform' 74 75 backend: str 76 'cpu' or 'gpu' or 'tpu' 77 78 Attributes: 79 80 beta_: {array-like} 81 regression coefficients 82 83 y_mean_: float 84 average response 85 86 """ 87 88 # construct the object ----- 89 90 def __init__( 91 self, 92 n_hidden_features=5, 93 activation_name="relu", 94 a=0.01, 95 nodes_sim="sobol", 96 bias=True, 97 dropout=0, 98 n_clusters=2, 99 cluster_encode=True, 100 type_clust="kmeans", 101 type_scaling=("std", "std", "std"), 102 lambda1=0.1, 103 lambda2=0.1, 104 seed=123, 105 backend="cpu", 106 ): 107 super().__init__( 108 n_hidden_features=n_hidden_features, 109 activation_name=activation_name, 110 a=a, 111 nodes_sim=nodes_sim, 112 bias=bias, 113 dropout=dropout, 114 n_clusters=n_clusters, 115 cluster_encode=cluster_encode, 116 type_clust=type_clust, 117 type_scaling=type_scaling, 118 lambda1=lambda1, 119 lambda2=lambda2, 120 seed=seed, 121 backend=backend, 122 ) 123 124 self.type_fit = "regression" 125 126 def fit(self, X, y, **kwargs): 127 """Fit Ridge model to training data (X, y). 128 129 Args: 130 131 X: {array-like}, shape = [n_samples, n_features] 132 Training vectors, where n_samples is the number 133 of samples and n_features is the number of features. 134 135 y: array-like, shape = [n_samples] 136 Target values. 137 138 **kwargs: additional parameters to be passed to 139 self.cook_training_set or self.obj.fit 140 141 Returns: 142 143 self: object 144 145 """ 146 147 sys_platform = platform.system() 148 149 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 150 151 n_X, p_X = X.shape 152 n_Z, p_Z = scaled_Z.shape 153 154 if self.n_clusters > 0: 155 if self.encode_clusters == True: 156 n_features = p_X + self.n_clusters 157 else: 158 n_features = p_X + 1 159 else: 160 n_features = p_X 161 162 X_ = scaled_Z[:, 0:n_features] 163 Phi_X_ = scaled_Z[:, n_features:p_Z] 164 165 B = mo.crossprod(x=X_, backend=self.backend) + self.lambda1 * np.diag( 166 np.repeat(1, n_features) 167 ) 168 C = mo.crossprod(x=Phi_X_, y=X_, backend=self.backend) 169 D = mo.crossprod( 170 x=Phi_X_, backend=self.backend 171 ) + self.lambda2 * np.diag(np.repeat(1, Phi_X_.shape[1])) 172 173 if sys_platform in ("Linux", "Darwin"): 174 B_inv = pinv(B) if self.backend == "cpu" else jpinv(B) 175 else: 176 B_inv = pinv(B) 177 178 W = mo.safe_sparse_dot(a=C, b=B_inv, backend=self.backend) 179 S_mat = D - mo.tcrossprod(x=W, y=C, backend=self.backend) 180 181 if sys_platform in ("Linux", "Darwin"): 182 S_inv = pinv(S_mat) if self.backend == "cpu" else jpinv(S_mat) 183 else: 184 S_inv = pinv(S_mat) 185 186 Y = mo.safe_sparse_dot(a=S_inv, b=W, backend=self.backend) 187 inv = mo.rbind( 188 mo.cbind( 189 x=B_inv + mo.crossprod(x=W, y=Y, backend=self.backend), 190 y=-np.transpose(Y), 191 backend=self.backend, 192 ), 193 mo.cbind(x=-Y, y=S_inv, backend=self.backend), 194 backend=self.backend, 195 ) 196 197 self.beta_ = mo.safe_sparse_dot( 198 a=inv, 199 b=mo.crossprod(x=scaled_Z, y=centered_y, backend=self.backend), 200 backend=self.backend, 201 ) 202 203 return self 204 205 def predict(self, X, **kwargs): 206 """Predict test data X. 207 208 Args: 209 210 X: {array-like}, shape = [n_samples, n_features] 211 Training vectors, where n_samples is the number 212 of samples and n_features is the number of features. 213 214 **kwargs: additional parameters to be passed to 215 self.cook_test_set 216 217 Returns: 218 219 model predictions: {array-like} 220 221 """ 222 223 if len(X.shape) == 1: 224 n_features = X.shape[0] 225 new_X = mo.rbind( 226 x=X.reshape(1, n_features), 227 y=np.ones(n_features).reshape(1, n_features), 228 backend=self.backend, 229 ) 230 231 return ( 232 self.y_mean_ 233 + mo.safe_sparse_dot( 234 a=self.cook_test_set(new_X, **kwargs), 235 b=self.beta_, 236 backend=self.backend, 237 ) 238 )[0] 239 240 return self.y_mean_ + mo.safe_sparse_dot( 241 a=self.cook_test_set(X, **kwargs), 242 b=self.beta_, 243 backend=self.backend, 244 )
Ridge regression with 2 regularization parameters derived from class Ridge
Parameters:
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not
(False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
lambda1: float
regularization parameter on direct link
lambda2: float
regularization parameter on hidden layer
seed: int
reproducibility seed for nodes_sim=='uniform'
backend: str
'cpu' or 'gpu' or 'tpu'
Attributes:
beta_: {array-like}
regression coefficients
y_mean_: float
average response
126 def fit(self, X, y, **kwargs): 127 """Fit Ridge model to training data (X, y). 128 129 Args: 130 131 X: {array-like}, shape = [n_samples, n_features] 132 Training vectors, where n_samples is the number 133 of samples and n_features is the number of features. 134 135 y: array-like, shape = [n_samples] 136 Target values. 137 138 **kwargs: additional parameters to be passed to 139 self.cook_training_set or self.obj.fit 140 141 Returns: 142 143 self: object 144 145 """ 146 147 sys_platform = platform.system() 148 149 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 150 151 n_X, p_X = X.shape 152 n_Z, p_Z = scaled_Z.shape 153 154 if self.n_clusters > 0: 155 if self.encode_clusters == True: 156 n_features = p_X + self.n_clusters 157 else: 158 n_features = p_X + 1 159 else: 160 n_features = p_X 161 162 X_ = scaled_Z[:, 0:n_features] 163 Phi_X_ = scaled_Z[:, n_features:p_Z] 164 165 B = mo.crossprod(x=X_, backend=self.backend) + self.lambda1 * np.diag( 166 np.repeat(1, n_features) 167 ) 168 C = mo.crossprod(x=Phi_X_, y=X_, backend=self.backend) 169 D = mo.crossprod( 170 x=Phi_X_, backend=self.backend 171 ) + self.lambda2 * np.diag(np.repeat(1, Phi_X_.shape[1])) 172 173 if sys_platform in ("Linux", "Darwin"): 174 B_inv = pinv(B) if self.backend == "cpu" else jpinv(B) 175 else: 176 B_inv = pinv(B) 177 178 W = mo.safe_sparse_dot(a=C, b=B_inv, backend=self.backend) 179 S_mat = D - mo.tcrossprod(x=W, y=C, backend=self.backend) 180 181 if sys_platform in ("Linux", "Darwin"): 182 S_inv = pinv(S_mat) if self.backend == "cpu" else jpinv(S_mat) 183 else: 184 S_inv = pinv(S_mat) 185 186 Y = mo.safe_sparse_dot(a=S_inv, b=W, backend=self.backend) 187 inv = mo.rbind( 188 mo.cbind( 189 x=B_inv + mo.crossprod(x=W, y=Y, backend=self.backend), 190 y=-np.transpose(Y), 191 backend=self.backend, 192 ), 193 mo.cbind(x=-Y, y=S_inv, backend=self.backend), 194 backend=self.backend, 195 ) 196 197 self.beta_ = mo.safe_sparse_dot( 198 a=inv, 199 b=mo.crossprod(x=scaled_Z, y=centered_y, backend=self.backend), 200 backend=self.backend, 201 ) 202 203 return self
Fit Ridge model to training data (X, y).
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
205 def predict(self, X, **kwargs): 206 """Predict test data X. 207 208 Args: 209 210 X: {array-like}, shape = [n_samples, n_features] 211 Training vectors, where n_samples is the number 212 of samples and n_features is the number of features. 213 214 **kwargs: additional parameters to be passed to 215 self.cook_test_set 216 217 Returns: 218 219 model predictions: {array-like} 220 221 """ 222 223 if len(X.shape) == 1: 224 n_features = X.shape[0] 225 new_X = mo.rbind( 226 x=X.reshape(1, n_features), 227 y=np.ones(n_features).reshape(1, n_features), 228 backend=self.backend, 229 ) 230 231 return ( 232 self.y_mean_ 233 + mo.safe_sparse_dot( 234 a=self.cook_test_set(new_X, **kwargs), 235 b=self.beta_, 236 backend=self.backend, 237 ) 238 )[0] 239 240 return self.y_mean_ + mo.safe_sparse_dot( 241 a=self.cook_test_set(X, **kwargs), 242 b=self.beta_, 243 backend=self.backend, 244 )
Predict test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
18class Ridge2Classifier(Ridge2, ClassifierMixin): 19 """Multinomial logit classification with 2 regularization parameters 20 21 Parameters: 22 23 n_hidden_features: int 24 number of nodes in the hidden layer 25 26 activation_name: str 27 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 28 29 a: float 30 hyperparameter for 'prelu' or 'elu' activation function 31 32 nodes_sim: str 33 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 34 'uniform' 35 36 bias: boolean 37 indicates if the hidden layer contains a bias term (True) or not 38 (False) 39 40 dropout: float 41 regularization parameter; (random) percentage of nodes dropped out 42 of the training 43 44 direct_link: boolean 45 indicates if the original predictors are included (True) in model's 46 fitting or not (False) 47 48 n_clusters: int 49 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: 50 no clustering) 51 52 cluster_encode: bool 53 defines how the variable containing clusters is treated (default is one-hot) 54 if `False`, then labels are used, without one-hot encoding 55 56 type_clust: str 57 type of clustering method: currently k-means ('kmeans') or Gaussian 58 Mixture Model ('gmm') 59 60 type_scaling: a tuple of 3 strings 61 scaling methods for inputs, hidden layer, and clustering respectively 62 (and when relevant). 63 Currently available: standardization ('std') or MinMax scaling ('minmax') 64 65 lambda1: float 66 regularization parameter on direct link 67 68 lambda2: float 69 regularization parameter on hidden layer 70 71 solver: str 72 optimization function "L-BFGS-B", "Newton-CG", 73 "trust-ncg", "L-BFGS-B-lstsq", "Newton-CG-lstsq", 74 "trust-ncg-lstsq" (see scipy.optimize.minimize) 75 When using "L-BFGS-B-lstsq", "Newton-CG-lstsq", or "trust-ncg-lstsq", 76 the initial value for the optimization is set to the least squares solution 77 78 seed: int 79 reproducibility seed for nodes_sim=='uniform' 80 81 backend: str 82 "cpu" or "gpu" or "tpu" 83 84 Attributes: 85 86 beta_: {array-like} 87 regression coefficients 88 89 classes_: {array-like} 90 unique classes in the target variable 91 92 minloglik_: float 93 minimum value of the negative log-likelihood 94 95 Examples: 96 97 See also [https://github.com/Techtonique/nnetsauce/blob/master/examples/ridge_classification.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/ridge_classification.py) 98 99 ```python 100 import nnetsauce as ns 101 import numpy as np 102 from sklearn.datasets import load_breast_cancer 103 from sklearn.model_selection import train_test_split 104 from time import time 105 106 107 breast_cancer = load_breast_cancer() 108 X = breast_cancer.data 109 y = breast_cancer.target 110 111 # split data into training test and test set 112 np.random.seed(123) 113 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) 114 115 # create the model with nnetsauce 116 fit_obj = ns.Ridge2Classifier(lambda1 = 6.90185578e+04, 117 lambda2 = 3.17392781e+02, 118 n_hidden_features=95, 119 n_clusters=2, 120 dropout = 3.62817383e-01, 121 type_clust = "gmm") 122 123 # fit the model on training set 124 start = time() 125 fit_obj.fit(X_train, y_train) 126 print(f"Elapsed {time() - start}") 127 128 # get the accuracy on test set 129 start = time() 130 print(fit_obj.score(X_test, y_test)) 131 print(f"Elapsed {time() - start}") 132 133 # get area under the curve on test set (auc) 134 print(fit_obj.score(X_test, y_test, scoring="roc_auc")) 135 ``` 136 137 138 """ 139 140 _estimator_type = "classifier" 141 142 # construct the object ----- 143 144 def __init__( 145 self, 146 n_hidden_features=5, 147 activation_name="relu", 148 a=0.01, 149 nodes_sim="sobol", 150 bias=True, 151 dropout=0, 152 direct_link=True, 153 n_clusters=2, 154 cluster_encode=True, 155 type_clust="kmeans", 156 type_scaling=("std", "std", "std"), 157 lambda1=0.1, 158 lambda2=0.1, 159 solver="L-BFGS-B", 160 seed=123, 161 backend="cpu", 162 ): 163 super().__init__( 164 n_hidden_features=n_hidden_features, 165 activation_name=activation_name, 166 a=a, 167 nodes_sim=nodes_sim, 168 bias=bias, 169 dropout=dropout, 170 direct_link=direct_link, 171 n_clusters=n_clusters, 172 cluster_encode=cluster_encode, 173 type_clust=type_clust, 174 type_scaling=type_scaling, 175 lambda1=lambda1, 176 lambda2=lambda2, 177 seed=seed, 178 backend=backend, 179 ) 180 181 self.type_fit = "classification" 182 self.solver = solver 183 self.beta_ = None 184 self.classes_ = None 185 self.minloglik_ = None 186 187 def loglik(self, X, Y, **kwargs): 188 """Log-likelihood for training data (X, Y). 189 190 Args: 191 192 X: {array-like}, shape = [n_samples, n_features] 193 Training vectors, where n_samples is the number 194 of samples and n_features is the number of features. 195 196 Y: array-like, shape = [n_samples] 197 One-hot encode target values. 198 199 **kwargs: additional parameters to be passed to 200 self.cook_training_set or self.obj.fit 201 202 Returns: 203 204 """ 205 206 def loglik_grad_hess(Y, X, B, XB, hessian=True, **kwargs): 207 # nobs, n_classes 208 n, K = Y.shape 209 210 # total number of covariates 211 p = X.shape[1] 212 213 # initial number of covariates 214 init_p = p - self.n_hidden_features 215 216 max_double = 709.0 217 XB[XB > max_double] = max_double 218 exp_XB = np.exp(XB) 219 probs = exp_XB / exp_XB.sum(axis=1)[:, None] 220 221 # gradient ----- 222 # (Y - p) -> (n, K) 223 # X -> (n, p) 224 # (K, n) %*% (n, p) -> (K, p) 225 if hessian is False: 226 grad = ( 227 -mo.safe_sparse_dot( 228 a=(Y - probs).T, b=X, backend=self.backend 229 ) 230 / n 231 ) 232 grad += self.lambda1 * B[0:init_p, :].sum(axis=0)[:, None] 233 grad += self.lambda2 * B[init_p:p, :].sum(axis=0)[:, None] 234 235 return grad.flatten() 236 237 # hessian ----- 238 if hessian is True: 239 Kp = K * p 240 hess = np.zeros((Kp, Kp), float) 241 for k1 in range(K): 242 x_index = range(k1 * p, (k1 + 1) * p) 243 for k2 in range(k1, K): 244 y_index = range(k2 * p, (k2 + 1) * p) 245 H_sub = ( 246 -mo.safe_sparse_dot( 247 a=X.T, 248 b=(probs[:, k1] * probs[:, k2])[:, None] * X, 249 backend=self.backend, 250 ) 251 / n 252 ) # do not store 253 hess[np.ix_(x_index, y_index)] = hess[ 254 np.ix_(y_index, x_index) 255 ] = H_sub 256 257 return hess + (self.lambda1 + self.lambda2) * np.identity(Kp) 258 259 # total number of covariates 260 p = X.shape[1] 261 262 # initial number of covariates 263 init_p = p - self.n_hidden_features 264 265 # log-likelihood (1st return) 266 def loglik_func(x): 267 # (p, K) 268 B = x.reshape(Y.shape[1], p).T 269 270 # (n, K) 271 XB = mo.safe_sparse_dot(X, B, backend=self.backend) 272 273 res = -(np.sum(Y * XB, axis=1) - logsumexp(XB)).mean() 274 275 res += ( 276 0.5 277 * self.lambda1 278 * mo.squared_norm(B[0:init_p, :], backend=self.backend) 279 ) 280 res += ( 281 0.5 282 * self.lambda2 283 * mo.squared_norm(B[init_p:p, :], backend=self.backend) 284 ) 285 286 return res 287 288 # gradient of log-likelihood 289 def grad_func(x): 290 # (p, K) 291 B = x.reshape(Y.shape[1], p).T 292 293 return loglik_grad_hess( 294 Y=Y, 295 X=X, 296 B=B, 297 XB=mo.safe_sparse_dot(X, B, backend=self.backend), 298 hessian=False, 299 **kwargs 300 ) 301 302 # hessian of log-likelihood 303 def hessian_func(x): 304 # (p, K) 305 B = x.reshape(Y.shape[1], p).T 306 307 return loglik_grad_hess( 308 Y=Y, 309 X=X, 310 B=B, 311 XB=mo.safe_sparse_dot(X, B, backend=self.backend), 312 hessian=True, 313 **kwargs 314 ) 315 316 return loglik_func, grad_func, hessian_func 317 318 # newton-cg 319 # L-BFGS-B 320 def fit(self, X, y, **kwargs): 321 """Fit Ridge model to training data (X, y). 322 323 for beta: regression coeffs (beta11, ..., beta1p, ..., betaK1, ..., betaKp) 324 for K classes and p covariates. 325 326 Args: 327 328 X: {array-like}, shape = [n_samples, n_features] 329 Training vectors, where n_samples is the number 330 of samples and n_features is the number of features. 331 332 y: array-like, shape = [n_samples] 333 Target values. 334 335 **kwargs: additional parameters to be passed to 336 self.cook_training_set or self.obj.fit 337 338 Returns: 339 340 self: object 341 342 """ 343 344 assert mx.is_factor(y), "y must contain only integers" 345 346 output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 347 348 self.n_classes = len(np.unique(y)) 349 self.classes_ = np.unique(y) # for compatibility with sklearn 350 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 351 352 Y = mo.one_hot_encode2(output_y, self.n_classes) 353 354 # optimize for beta, minimize self.loglik (maximize loglik) ----- 355 loglik_func, grad_func, hessian_func = self.loglik(X=scaled_Z, Y=Y) 356 357 if self.solver == "L-BFGS-B": 358 opt = minimize( 359 fun=loglik_func, 360 x0=np.zeros(scaled_Z.shape[1] * self.n_classes), 361 jac=grad_func, 362 method=self.solver, 363 ) 364 self.beta_ = opt.x 365 self.minloglik_ = opt.fun 366 367 if self.solver in ("Newton-CG", "trust-ncg"): 368 opt = minimize( 369 fun=loglik_func, 370 x0=np.zeros(scaled_Z.shape[1] * self.n_classes), 371 jac=grad_func, 372 hess=hessian_func, 373 method=self.solver, 374 ) 375 self.beta_ = opt.x 376 self.minloglik_ = opt.fun 377 378 if self.solver == "L-BFGS-B-lstsq": 379 opt = minimize( 380 fun=loglik_func, 381 x0=np.linalg.lstsq(scaled_Z, Y, rcond=None)[0].flatten( 382 order="F" 383 ), 384 jac=grad_func, 385 method="L-BFGS-B", 386 ) 387 self.beta_ = opt.x 388 self.minloglik_ = opt.fun 389 390 if self.solver in "Newton-CG-lstsq": 391 opt = minimize( 392 fun=loglik_func, 393 x0=np.linalg.lstsq(scaled_Z, Y, rcond=None)[0].flatten( 394 order="F" 395 ), 396 jac=grad_func, 397 hess=hessian_func, 398 method="Newton-CG", 399 ) 400 self.beta_ = opt.x 401 self.minloglik_ = opt.fun 402 403 if self.solver in "trust-ncg-lstsq": 404 opt = minimize( 405 fun=loglik_func, 406 x0=np.linalg.lstsq(scaled_Z, Y, rcond=None)[0].flatten( 407 order="F" 408 ), 409 jac=grad_func, 410 hess=hessian_func, 411 method="trust-ncg", 412 ) 413 self.beta_ = opt.x 414 self.minloglik_ = opt.fun 415 416 self.classes_ = np.unique(y) 417 418 return self 419 420 def predict(self, X, **kwargs): 421 """Predict test data X. 422 423 Args: 424 425 X: {array-like}, shape = [n_samples, n_features] 426 Training vectors, where n_samples is the number 427 of samples and n_features is the number of features. 428 429 **kwargs: additional parameters to be passed to 430 self.cook_test_set 431 432 Returns: 433 434 model predictions: {array-like} 435 """ 436 437 return np.argmax(self.predict_proba(X, **kwargs), axis=1) 438 439 def predict_proba(self, X, **kwargs): 440 """Predict probabilities for test data X. 441 442 Args: 443 444 X: {array-like}, shape = [n_samples, n_features] 445 Training vectors, where n_samples is the number 446 of samples and n_features is the number of features. 447 448 **kwargs: additional parameters to be passed to 449 self.cook_test_set 450 451 Returns: 452 453 probability estimates for test data: {array-like} 454 455 """ 456 if len(X.shape) == 1: 457 n_features = X.shape[0] 458 new_X = mo.rbind( 459 X.reshape(1, n_features), 460 np.ones(n_features).reshape(1, n_features), 461 ) 462 463 Z = self.cook_test_set(new_X, **kwargs) 464 465 else: 466 Z = self.cook_test_set(X, **kwargs) 467 468 ZB = mo.safe_sparse_dot( 469 a=Z, 470 b=self.beta_.reshape( 471 self.n_classes, 472 X.shape[1] + self.n_hidden_features + self.n_clusters, 473 ).T, 474 backend=self.backend, 475 ) 476 477 exp_ZB = np.exp(ZB) 478 479 return exp_ZB / exp_ZB.sum(axis=1)[:, None] 480 481 @property 482 def _estimator_type(self): 483 return "classifier"
Multinomial logit classification with 2 regularization parameters
Parameters:
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not
(False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original predictors are included (True) in model's
fitting or not (False)
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
lambda1: float
regularization parameter on direct link
lambda2: float
regularization parameter on hidden layer
solver: str
optimization function "L-BFGS-B", "Newton-CG",
"trust-ncg", "L-BFGS-B-lstsq", "Newton-CG-lstsq",
"trust-ncg-lstsq" (see scipy.optimize.minimize)
When using "L-BFGS-B-lstsq", "Newton-CG-lstsq", or "trust-ncg-lstsq",
the initial value for the optimization is set to the least squares solution
seed: int
reproducibility seed for nodes_sim=='uniform'
backend: str
"cpu" or "gpu" or "tpu"
Attributes:
beta_: {array-like}
regression coefficients
classes_: {array-like}
unique classes in the target variable
minloglik_: float
minimum value of the negative log-likelihood
Examples:
See also https://github.com/Techtonique/nnetsauce/blob/master/examples/ridge_classification.py
import nnetsauce as ns
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from time import time
breast_cancer = load_breast_cancer()
X = breast_cancer.data
y = breast_cancer.target
# split data into training test and test set
np.random.seed(123)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
# create the model with nnetsauce
fit_obj = ns.Ridge2Classifier(lambda1 = 6.90185578e+04,
lambda2 = 3.17392781e+02,
n_hidden_features=95,
n_clusters=2,
dropout = 3.62817383e-01,
type_clust = "gmm")
# fit the model on training set
start = time()
fit_obj.fit(X_train, y_train)
print(f"Elapsed {time() - start}")
# get the accuracy on test set
start = time()
print(fit_obj.score(X_test, y_test))
print(f"Elapsed {time() - start}")
# get area under the curve on test set (auc)
print(fit_obj.score(X_test, y_test, scoring="roc_auc"))
320 def fit(self, X, y, **kwargs): 321 """Fit Ridge model to training data (X, y). 322 323 for beta: regression coeffs (beta11, ..., beta1p, ..., betaK1, ..., betaKp) 324 for K classes and p covariates. 325 326 Args: 327 328 X: {array-like}, shape = [n_samples, n_features] 329 Training vectors, where n_samples is the number 330 of samples and n_features is the number of features. 331 332 y: array-like, shape = [n_samples] 333 Target values. 334 335 **kwargs: additional parameters to be passed to 336 self.cook_training_set or self.obj.fit 337 338 Returns: 339 340 self: object 341 342 """ 343 344 assert mx.is_factor(y), "y must contain only integers" 345 346 output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 347 348 self.n_classes = len(np.unique(y)) 349 self.classes_ = np.unique(y) # for compatibility with sklearn 350 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 351 352 Y = mo.one_hot_encode2(output_y, self.n_classes) 353 354 # optimize for beta, minimize self.loglik (maximize loglik) ----- 355 loglik_func, grad_func, hessian_func = self.loglik(X=scaled_Z, Y=Y) 356 357 if self.solver == "L-BFGS-B": 358 opt = minimize( 359 fun=loglik_func, 360 x0=np.zeros(scaled_Z.shape[1] * self.n_classes), 361 jac=grad_func, 362 method=self.solver, 363 ) 364 self.beta_ = opt.x 365 self.minloglik_ = opt.fun 366 367 if self.solver in ("Newton-CG", "trust-ncg"): 368 opt = minimize( 369 fun=loglik_func, 370 x0=np.zeros(scaled_Z.shape[1] * self.n_classes), 371 jac=grad_func, 372 hess=hessian_func, 373 method=self.solver, 374 ) 375 self.beta_ = opt.x 376 self.minloglik_ = opt.fun 377 378 if self.solver == "L-BFGS-B-lstsq": 379 opt = minimize( 380 fun=loglik_func, 381 x0=np.linalg.lstsq(scaled_Z, Y, rcond=None)[0].flatten( 382 order="F" 383 ), 384 jac=grad_func, 385 method="L-BFGS-B", 386 ) 387 self.beta_ = opt.x 388 self.minloglik_ = opt.fun 389 390 if self.solver in "Newton-CG-lstsq": 391 opt = minimize( 392 fun=loglik_func, 393 x0=np.linalg.lstsq(scaled_Z, Y, rcond=None)[0].flatten( 394 order="F" 395 ), 396 jac=grad_func, 397 hess=hessian_func, 398 method="Newton-CG", 399 ) 400 self.beta_ = opt.x 401 self.minloglik_ = opt.fun 402 403 if self.solver in "trust-ncg-lstsq": 404 opt = minimize( 405 fun=loglik_func, 406 x0=np.linalg.lstsq(scaled_Z, Y, rcond=None)[0].flatten( 407 order="F" 408 ), 409 jac=grad_func, 410 hess=hessian_func, 411 method="trust-ncg", 412 ) 413 self.beta_ = opt.x 414 self.minloglik_ = opt.fun 415 416 self.classes_ = np.unique(y) 417 418 return self
Fit Ridge model to training data (X, y).
for beta: regression coeffs (beta11, ..., beta1p, ..., betaK1, ..., betaKp) for K classes and p covariates.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
420 def predict(self, X, **kwargs): 421 """Predict test data X. 422 423 Args: 424 425 X: {array-like}, shape = [n_samples, n_features] 426 Training vectors, where n_samples is the number 427 of samples and n_features is the number of features. 428 429 **kwargs: additional parameters to be passed to 430 self.cook_test_set 431 432 Returns: 433 434 model predictions: {array-like} 435 """ 436 437 return np.argmax(self.predict_proba(X, **kwargs), axis=1)
Predict test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
439 def predict_proba(self, X, **kwargs): 440 """Predict probabilities for test data X. 441 442 Args: 443 444 X: {array-like}, shape = [n_samples, n_features] 445 Training vectors, where n_samples is the number 446 of samples and n_features is the number of features. 447 448 **kwargs: additional parameters to be passed to 449 self.cook_test_set 450 451 Returns: 452 453 probability estimates for test data: {array-like} 454 455 """ 456 if len(X.shape) == 1: 457 n_features = X.shape[0] 458 new_X = mo.rbind( 459 X.reshape(1, n_features), 460 np.ones(n_features).reshape(1, n_features), 461 ) 462 463 Z = self.cook_test_set(new_X, **kwargs) 464 465 else: 466 Z = self.cook_test_set(X, **kwargs) 467 468 ZB = mo.safe_sparse_dot( 469 a=Z, 470 b=self.beta_.reshape( 471 self.n_classes, 472 X.shape[1] + self.n_hidden_features + self.n_clusters, 473 ).T, 474 backend=self.backend, 475 ) 476 477 exp_ZB = np.exp(ZB) 478 479 return exp_ZB / exp_ZB.sum(axis=1)[:, None]
Predict probabilities for test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
probability estimates for test data: {array-like}
23class Ridge2MultitaskClassifier(Ridge2, ClassifierMixin): 24 """Multitask Ridge classification with 2 regularization parameters 25 26 Parameters: 27 28 n_hidden_features: int 29 number of nodes in the hidden layer 30 31 activation_name: str 32 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 33 34 a: float 35 hyperparameter for 'prelu' or 'elu' activation function 36 37 nodes_sim: str 38 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 39 'uniform' 40 41 bias: boolean 42 indicates if the hidden layer contains a bias term (True) or not 43 (False) 44 45 dropout: float 46 regularization parameter; (random) percentage of nodes dropped out 47 of the training 48 49 n_clusters: int 50 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: 51 no clustering) 52 53 cluster_encode: bool 54 defines how the variable containing clusters is treated (default is one-hot) 55 if `False`, then labels are used, without one-hot encoding 56 57 type_clust: str 58 type of clustering method: currently k-means ('kmeans') or Gaussian 59 Mixture Model ('gmm') 60 61 type_scaling: a tuple of 3 strings 62 scaling methods for inputs, hidden layer, and clustering respectively 63 (and when relevant). 64 Currently available: standardization ('std') or MinMax scaling ('minmax') 65 66 lambda1: float 67 regularization parameter on direct link 68 69 lambda2: float 70 regularization parameter on hidden layer 71 72 seed: int 73 reproducibility seed for nodes_sim=='uniform' 74 75 backend: str 76 "cpu" or "gpu" or "tpu" 77 78 Attributes: 79 80 beta_: {array-like} 81 regression coefficients 82 83 Examples: 84 85 See also [https://github.com/Techtonique/nnetsauce/blob/master/examples/ridgemtask_classification.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/ridgemtask_classification.py) 86 87 ```python 88 import nnetsauce as ns 89 import numpy as np 90 from sklearn.datasets import load_breast_cancer 91 from sklearn.model_selection import train_test_split 92 from sklearn import metrics 93 from time import time 94 95 breast_cancer = load_breast_cancer() 96 Z = breast_cancer.data 97 t = breast_cancer.target 98 np.random.seed(123) 99 X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2) 100 101 fit_obj = ns.Ridge2MultitaskClassifier(n_hidden_features=int(9.83730469e+01), 102 dropout=4.31054687e-01, 103 n_clusters=int(1.71484375e+00), 104 lambda1=1.24023438e+01, lambda2=7.30263672e+03) 105 106 start = time() 107 fit_obj.fit(X_train, y_train) 108 print(f"Elapsed {time() - start}") 109 110 print(fit_obj.score(X_test, y_test)) 111 print(fit_obj.score(X_test, y_test, scoring="roc_auc")) 112 113 start = time() 114 preds = fit_obj.predict(X_test) 115 print(f"Elapsed {time() - start}") 116 print(metrics.classification_report(preds, y_test)) 117 ``` 118 119 """ 120 121 # construct the object ----- 122 _estimator_type = "classifier" 123 124 def __init__( 125 self, 126 n_hidden_features=5, 127 activation_name="relu", 128 a=0.01, 129 nodes_sim="sobol", 130 bias=True, 131 dropout=0, 132 n_clusters=2, 133 cluster_encode=True, 134 type_clust="kmeans", 135 type_scaling=("std", "std", "std"), 136 lambda1=0.1, 137 lambda2=0.1, 138 seed=123, 139 backend="cpu", 140 ): 141 super().__init__( 142 n_hidden_features=n_hidden_features, 143 activation_name=activation_name, 144 a=a, 145 nodes_sim=nodes_sim, 146 bias=bias, 147 dropout=dropout, 148 n_clusters=n_clusters, 149 cluster_encode=cluster_encode, 150 type_clust=type_clust, 151 type_scaling=type_scaling, 152 lambda1=lambda1, 153 lambda2=lambda2, 154 seed=seed, 155 backend=backend, 156 ) 157 158 self.type_fit = "classification" 159 160 def fit(self, X, y, **kwargs): 161 """Fit Ridge model to training data (X, y). 162 163 Args: 164 165 X: {array-like}, shape = [n_samples, n_features] 166 Training vectors, where n_samples is the number 167 of samples and n_features is the number of features. 168 169 y: array-like, shape = [n_samples] 170 Target values. 171 172 **kwargs: additional parameters to be passed to 173 self.cook_training_set or self.obj.fit 174 175 Returns: 176 177 self: object 178 179 """ 180 181 sys_platform = platform.system() 182 183 assert mx.is_factor(y), "y must contain only integers" 184 185 self.classes_ = np.unique(y) # for compatibility with sklearn 186 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 187 188 output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 189 190 n_X, p_X = X.shape 191 n_Z, p_Z = scaled_Z.shape 192 193 self.n_classes = len(np.unique(y)) 194 195 # multitask response 196 Y = mo.one_hot_encode2(output_y, self.n_classes) 197 198 if self.n_clusters > 0: 199 if self.encode_clusters == True: 200 n_features = p_X + self.n_clusters 201 else: 202 n_features = p_X + 1 203 else: 204 n_features = p_X 205 206 X_ = scaled_Z[:, 0:n_features] 207 Phi_X_ = scaled_Z[:, n_features:p_Z] 208 209 B = mo.crossprod(x=X_, backend=self.backend) + self.lambda1 * np.diag( 210 np.repeat(1, X_.shape[1]) 211 ) 212 C = mo.crossprod(x=Phi_X_, y=X_, backend=self.backend) 213 D = mo.crossprod( 214 x=Phi_X_, backend=self.backend 215 ) + self.lambda2 * np.diag(np.repeat(1, Phi_X_.shape[1])) 216 217 if sys_platform in ("Linux", "Darwin"): 218 B_inv = pinv(B) if self.backend == "cpu" else jpinv(B) 219 else: 220 B_inv = pinv(B) 221 222 W = mo.safe_sparse_dot(a=C, b=B_inv, backend=self.backend) 223 S_mat = D - mo.tcrossprod(x=W, y=C, backend=self.backend) 224 225 if sys_platform in ("Linux", "Darwin"): 226 S_inv = pinv(S_mat) if self.backend == "cpu" else jpinv(S_mat) 227 else: 228 S_inv = pinv(S_mat) 229 230 Y2 = mo.safe_sparse_dot(a=S_inv, b=W, backend=self.backend) 231 inv = mo.rbind( 232 mo.cbind( 233 x=B_inv + mo.crossprod(x=W, y=Y2, backend=self.backend), 234 y=-np.transpose(Y2), 235 backend=self.backend, 236 ), 237 mo.cbind(x=-Y2, y=S_inv, backend=self.backend), 238 backend=self.backend, 239 ) 240 241 self.beta_ = mo.safe_sparse_dot( 242 a=inv, 243 b=mo.crossprod(x=scaled_Z, y=Y, backend=self.backend), 244 backend=self.backend, 245 ) 246 self.classes_ = np.unique(y) 247 return self 248 249 def predict(self, X, **kwargs): 250 """Predict test data X. 251 252 Args: 253 254 X: {array-like}, shape = [n_samples, n_features] 255 Training vectors, where n_samples is the number 256 of samples and n_features is the number of features. 257 258 **kwargs: additional parameters to be passed to 259 self.cook_test_set 260 261 Returns: 262 263 model predictions: {array-like} 264 265 """ 266 267 return np.argmax(self.predict_proba(X, **kwargs), axis=1) 268 269 def predict_proba(self, X, **kwargs): 270 """Predict probabilities for test data X. 271 272 Args: 273 274 X: {array-like}, shape = [n_samples, n_features] 275 Training vectors, where n_samples is the number 276 of samples and n_features is the number of features. 277 278 **kwargs: additional parameters to be passed to 279 self.cook_test_set 280 281 Returns: 282 283 probability estimates for test data: {array-like} 284 285 """ 286 287 if len(X.shape) == 1: 288 n_features = X.shape[0] 289 new_X = mo.rbind( 290 x=X.reshape(1, n_features), 291 y=np.ones(n_features).reshape(1, n_features), 292 backend=self.backend, 293 ) 294 295 Z = self.cook_test_set(new_X, **kwargs) 296 297 else: 298 Z = self.cook_test_set(X, **kwargs) 299 300 ZB = mo.safe_sparse_dot(a=Z, b=self.beta_, backend=self.backend) 301 302 exp_ZB = np.exp(ZB) 303 304 return exp_ZB / exp_ZB.sum(axis=1)[:, None] 305 306 def score(self, X, y, scoring=None): 307 """Scoring function for classification. 308 309 Args: 310 311 X: {array-like}, shape = [n_samples, n_features] 312 Training vectors, where n_samples is the number 313 of samples and n_features is the number of features. 314 315 y: array-like, shape = [n_samples] 316 Target values. 317 318 scoring: str 319 scoring method (default is accuracy) 320 321 Returns: 322 323 score: float 324 """ 325 326 if scoring is None: 327 scoring = "accuracy" 328 329 if scoring == "accuracy": 330 return skm2.accuracy_score(y, self.predict(X)) 331 332 if scoring == "f1": 333 return skm2.f1_score(y, self.predict(X)) 334 335 if scoring == "precision": 336 return skm2.precision_score(y, self.predict(X)) 337 338 if scoring == "recall": 339 return skm2.recall_score(y, self.predict(X)) 340 341 if scoring == "roc_auc": 342 return skm2.roc_auc_score(y, self.predict(X)) 343 344 if scoring == "log_loss": 345 return skm2.log_loss(y, self.predict_proba(X)) 346 347 if scoring == "balanced_accuracy": 348 return skm2.balanced_accuracy_score(y, self.predict(X)) 349 350 if scoring == "average_precision": 351 return skm2.average_precision_score(y, self.predict(X)) 352 353 if scoring == "neg_brier_score": 354 return -skm2.brier_score_loss(y, self.predict_proba(X)) 355 356 if scoring == "neg_log_loss": 357 return -skm2.log_loss(y, self.predict_proba(X)) 358 359 @property 360 def _estimator_type(self): 361 return "classifier"
Multitask Ridge classification with 2 regularization parameters
Parameters:
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not
(False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
lambda1: float
regularization parameter on direct link
lambda2: float
regularization parameter on hidden layer
seed: int
reproducibility seed for nodes_sim=='uniform'
backend: str
"cpu" or "gpu" or "tpu"
Attributes:
beta_: {array-like}
regression coefficients
Examples:
See also https://github.com/Techtonique/nnetsauce/blob/master/examples/ridgemtask_classification.py
import nnetsauce as ns
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn import metrics
from time import time
breast_cancer = load_breast_cancer()
Z = breast_cancer.data
t = breast_cancer.target
np.random.seed(123)
X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2)
fit_obj = ns.Ridge2MultitaskClassifier(n_hidden_features=int(9.83730469e+01),
dropout=4.31054687e-01,
n_clusters=int(1.71484375e+00),
lambda1=1.24023438e+01, lambda2=7.30263672e+03)
start = time()
fit_obj.fit(X_train, y_train)
print(f"Elapsed {time() - start}")
print(fit_obj.score(X_test, y_test))
print(fit_obj.score(X_test, y_test, scoring="roc_auc"))
start = time()
preds = fit_obj.predict(X_test)
print(f"Elapsed {time() - start}")
print(metrics.classification_report(preds, y_test))
160 def fit(self, X, y, **kwargs): 161 """Fit Ridge model to training data (X, y). 162 163 Args: 164 165 X: {array-like}, shape = [n_samples, n_features] 166 Training vectors, where n_samples is the number 167 of samples and n_features is the number of features. 168 169 y: array-like, shape = [n_samples] 170 Target values. 171 172 **kwargs: additional parameters to be passed to 173 self.cook_training_set or self.obj.fit 174 175 Returns: 176 177 self: object 178 179 """ 180 181 sys_platform = platform.system() 182 183 assert mx.is_factor(y), "y must contain only integers" 184 185 self.classes_ = np.unique(y) # for compatibility with sklearn 186 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 187 188 output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 189 190 n_X, p_X = X.shape 191 n_Z, p_Z = scaled_Z.shape 192 193 self.n_classes = len(np.unique(y)) 194 195 # multitask response 196 Y = mo.one_hot_encode2(output_y, self.n_classes) 197 198 if self.n_clusters > 0: 199 if self.encode_clusters == True: 200 n_features = p_X + self.n_clusters 201 else: 202 n_features = p_X + 1 203 else: 204 n_features = p_X 205 206 X_ = scaled_Z[:, 0:n_features] 207 Phi_X_ = scaled_Z[:, n_features:p_Z] 208 209 B = mo.crossprod(x=X_, backend=self.backend) + self.lambda1 * np.diag( 210 np.repeat(1, X_.shape[1]) 211 ) 212 C = mo.crossprod(x=Phi_X_, y=X_, backend=self.backend) 213 D = mo.crossprod( 214 x=Phi_X_, backend=self.backend 215 ) + self.lambda2 * np.diag(np.repeat(1, Phi_X_.shape[1])) 216 217 if sys_platform in ("Linux", "Darwin"): 218 B_inv = pinv(B) if self.backend == "cpu" else jpinv(B) 219 else: 220 B_inv = pinv(B) 221 222 W = mo.safe_sparse_dot(a=C, b=B_inv, backend=self.backend) 223 S_mat = D - mo.tcrossprod(x=W, y=C, backend=self.backend) 224 225 if sys_platform in ("Linux", "Darwin"): 226 S_inv = pinv(S_mat) if self.backend == "cpu" else jpinv(S_mat) 227 else: 228 S_inv = pinv(S_mat) 229 230 Y2 = mo.safe_sparse_dot(a=S_inv, b=W, backend=self.backend) 231 inv = mo.rbind( 232 mo.cbind( 233 x=B_inv + mo.crossprod(x=W, y=Y2, backend=self.backend), 234 y=-np.transpose(Y2), 235 backend=self.backend, 236 ), 237 mo.cbind(x=-Y2, y=S_inv, backend=self.backend), 238 backend=self.backend, 239 ) 240 241 self.beta_ = mo.safe_sparse_dot( 242 a=inv, 243 b=mo.crossprod(x=scaled_Z, y=Y, backend=self.backend), 244 backend=self.backend, 245 ) 246 self.classes_ = np.unique(y) 247 return self
Fit Ridge model to training data (X, y).
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
249 def predict(self, X, **kwargs): 250 """Predict test data X. 251 252 Args: 253 254 X: {array-like}, shape = [n_samples, n_features] 255 Training vectors, where n_samples is the number 256 of samples and n_features is the number of features. 257 258 **kwargs: additional parameters to be passed to 259 self.cook_test_set 260 261 Returns: 262 263 model predictions: {array-like} 264 265 """ 266 267 return np.argmax(self.predict_proba(X, **kwargs), axis=1)
Predict test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
269 def predict_proba(self, X, **kwargs): 270 """Predict probabilities for test data X. 271 272 Args: 273 274 X: {array-like}, shape = [n_samples, n_features] 275 Training vectors, where n_samples is the number 276 of samples and n_features is the number of features. 277 278 **kwargs: additional parameters to be passed to 279 self.cook_test_set 280 281 Returns: 282 283 probability estimates for test data: {array-like} 284 285 """ 286 287 if len(X.shape) == 1: 288 n_features = X.shape[0] 289 new_X = mo.rbind( 290 x=X.reshape(1, n_features), 291 y=np.ones(n_features).reshape(1, n_features), 292 backend=self.backend, 293 ) 294 295 Z = self.cook_test_set(new_X, **kwargs) 296 297 else: 298 Z = self.cook_test_set(X, **kwargs) 299 300 ZB = mo.safe_sparse_dot(a=Z, b=self.beta_, backend=self.backend) 301 302 exp_ZB = np.exp(ZB) 303 304 return exp_ZB / exp_ZB.sum(axis=1)[:, None]
Predict probabilities for test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
probability estimates for test data: {array-like}
306 def score(self, X, y, scoring=None): 307 """Scoring function for classification. 308 309 Args: 310 311 X: {array-like}, shape = [n_samples, n_features] 312 Training vectors, where n_samples is the number 313 of samples and n_features is the number of features. 314 315 y: array-like, shape = [n_samples] 316 Target values. 317 318 scoring: str 319 scoring method (default is accuracy) 320 321 Returns: 322 323 score: float 324 """ 325 326 if scoring is None: 327 scoring = "accuracy" 328 329 if scoring == "accuracy": 330 return skm2.accuracy_score(y, self.predict(X)) 331 332 if scoring == "f1": 333 return skm2.f1_score(y, self.predict(X)) 334 335 if scoring == "precision": 336 return skm2.precision_score(y, self.predict(X)) 337 338 if scoring == "recall": 339 return skm2.recall_score(y, self.predict(X)) 340 341 if scoring == "roc_auc": 342 return skm2.roc_auc_score(y, self.predict(X)) 343 344 if scoring == "log_loss": 345 return skm2.log_loss(y, self.predict_proba(X)) 346 347 if scoring == "balanced_accuracy": 348 return skm2.balanced_accuracy_score(y, self.predict(X)) 349 350 if scoring == "average_precision": 351 return skm2.average_precision_score(y, self.predict(X)) 352 353 if scoring == "neg_brier_score": 354 return -skm2.brier_score_loss(y, self.predict_proba(X)) 355 356 if scoring == "neg_log_loss": 357 return -skm2.log_loss(y, self.predict_proba(X))
Scoring function for classification.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
scoring: str
scoring method (default is accuracy)
Returns:
score: float
6class SubSampler: 7 """Subsampling class. 8 9 Attributes: 10 11 y: array-like, shape = [n_samples] 12 Target values. 13 14 row_sample: double 15 subsampling fraction 16 17 n_samples: int 18 subsampling by using the number of rows (supersedes row_sample) 19 20 seed: int 21 reproductibility seed 22 23 n_jobs: int 24 number of jobs to run in parallel 25 26 verbose: bool 27 print progress messages and bars 28 """ 29 30 def __init__( 31 self, 32 y, 33 row_sample=0.8, 34 n_samples=None, 35 seed=123, 36 n_jobs=None, 37 verbose=False, 38 ): 39 self.y = y 40 self.n_samples = n_samples 41 if self.n_samples is None: 42 assert ( 43 row_sample < 1 and row_sample >= 0 44 ), "'row_sample' must be provided, plus < 1 and >= 0" 45 self.row_sample = row_sample 46 else: 47 assert self.n_samples < len(y), "'n_samples' must be < len(y)" 48 self.row_sample = self.n_samples / len(y) 49 self.seed = seed 50 self.indices = None 51 self.n_jobs = n_jobs 52 self.verbose = verbose 53 54 def subsample(self): 55 """Returns indices of subsampled input data. 56 57 Examples: 58 59 <ul> 60 <li> <a href="https://github.com/Techtonique/nnetsauce/blob/master/nnetsauce/demo/thierrymoudiki_20240105_subsampling.ipynb">20240105_subsampling.ipynb</a> </li> 61 <li> <a href="https://github.com/Techtonique/nnetsauce/blob/master/nnetsauce/demo/thierrymoudiki_20240131_subsampling_nsamples.ipynb">20240131_subsampling_nsamples.ipynb</a> </li> 62 </ul> 63 64 """ 65 self.indices = dosubsample( 66 y=self.y, 67 row_sample=self.row_sample, 68 seed=self.seed, 69 n_jobs=self.n_jobs, 70 verbose=self.verbose, 71 ) 72 return self.indices
Subsampling class.
Attributes:
y: array-like, shape = [n_samples] Target values.
row_sample: double subsampling fraction
n_samples: int subsampling by using the number of rows (supersedes row_sample)
seed: int reproductibility seed
n_jobs: int number of jobs to run in parallel
verbose: bool print progress messages and bars
54 def subsample(self): 55 """Returns indices of subsampled input data. 56 57 Examples: 58 59 <ul> 60 <li> <a href="https://github.com/Techtonique/nnetsauce/blob/master/nnetsauce/demo/thierrymoudiki_20240105_subsampling.ipynb">20240105_subsampling.ipynb</a> </li> 61 <li> <a href="https://github.com/Techtonique/nnetsauce/blob/master/nnetsauce/demo/thierrymoudiki_20240131_subsampling_nsamples.ipynb">20240131_subsampling_nsamples.ipynb</a> </li> 62 </ul> 63 64 """ 65 self.indices = dosubsample( 66 y=self.y, 67 row_sample=self.row_sample, 68 seed=self.seed, 69 n_jobs=self.n_jobs, 70 verbose=self.verbose, 71 ) 72 return self.indices
Returns indices of subsampled input data.
Examples: