nnetsauce
1from .base.base import Base 2from .base.baseRegressor import BaseRegressor 3from .boosting.adaBoostClassifier import AdaBoostClassifier 4from .custom.customClassifier import CustomClassifier 5from .custom.customRegressor import CustomRegressor 6from .custom.customBackpropRegressor import CustomBackPropRegressor 7from .datasets import Downloader 8from .deep.deepClassifier import DeepClassifier 9from .deep.deepRegressor import DeepRegressor 10from .deep.deepMTS import DeepMTS 11from .glm.glmClassifier import GLMClassifier 12from .glm.glmRegressor import GLMRegressor 13from .kernel.kernel import KernelRidge 14from .lazypredict.lazydeepClassifier import LazyDeepClassifier, LazyClassifier 15from .lazypredict.lazydeepRegressor import LazyDeepRegressor, LazyRegressor 16from .lazypredict.lazydeepClassifier import LazyDeepClassifier 17from .lazypredict.lazydeepRegressor import LazyDeepRegressor 18from .lazypredict.lazydeepMTS import LazyDeepMTS, LazyMTS 19from .mts.mts import MTS 20from .mts.mlarch import MLARCH 21from .mts.classical import ClassicalMTS 22from .multitask.multitaskClassifier import MultitaskClassifier 23from .multitask.simplemultitaskClassifier import SimpleMultitaskClassifier 24from .neuralnet.neuralnetregression import NeuralNetRegressor 25from .neuralnet.neuralnetclassification import NeuralNetClassifier 26from .optimizers.optimizer import Optimizer 27from .predictioninterval import PredictionInterval 28from .quantile.quantileregression import QuantileRegressor 29from .quantile.quantileclassification import QuantileClassifier 30from .randombag.randomBagClassifier import RandomBagClassifier 31from .randombag.randomBagRegressor import RandomBagRegressor 32from .ridge.ridge import RidgeRegressor 33from .ridge2.ridge2Classifier import Ridge2Classifier 34from .ridge2.ridge2Regressor import Ridge2Regressor 35from .ridge2.ridge2MultitaskClassifier import Ridge2MultitaskClassifier 36from .ridge2.elasticNet2Regressor import ElasticNet2Regressor 37from .rvfl.bayesianrvflRegressor import BayesianRVFLRegressor 38from .rvfl.bayesianrvfl2Regressor import BayesianRVFL2Regressor 39from .sampling import SubSampler 40from .updater import RegressorUpdater, ClassifierUpdater 41from .votingregressor import MedianVotingRegressor 42 43__all__ = [ 44 "AdaBoostClassifier", 45 "Base", 46 "BaseRegressor", 47 "BayesianRVFLRegressor", 48 "BayesianRVFL2Regressor", 49 "ClassicalMTS", 50 "CustomClassifier", 51 "CustomRegressor", 52 "CustomBackPropRegressor", 53 "DeepClassifier", 54 "DeepRegressor", 55 "DeepMTS", 56 "Downloader", 57 "ElasticNet2Regressor", 58 "GLMClassifier", 59 "GLMRegressor", 60 "KernelRidge", 61 "LazyClassifier", 62 "LazyRegressor", 63 "LazyDeepClassifier", 64 "LazyDeepRegressor", 65 "LazyMTS", 66 "LazyDeepMTS", 67 "MLARCH", 68 "MedianVotingRegressor", 69 "MTS", 70 "MultitaskClassifier", 71 "NeuralNetRegressor", 72 "NeuralNetClassifier", 73 "PredictionInterval", 74 "SimpleMultitaskClassifier", 75 "Optimizer", 76 "QuantileRegressor", 77 "QuantileClassifier", 78 "RandomBagRegressor", 79 "RandomBagClassifier", 80 "RegressorUpdater", 81 "ClassifierUpdater", 82 "RidgeRegressor", 83 "Ridge2Regressor", 84 "Ridge2Classifier", 85 "Ridge2MultitaskClassifier", 86 "SubSampler", 87]
21class AdaBoostClassifier(Boosting, ClassifierMixin): 22 """AdaBoost Classification (SAMME) model class derived from class Boosting 23 24 Parameters: 25 26 obj: object 27 any object containing a method fit (obj.fit()) and a method predict 28 (obj.predict()) 29 30 n_estimators: int 31 number of boosting iterations 32 33 learning_rate: float 34 learning rate of the boosting procedure 35 36 n_hidden_features: int 37 number of nodes in the hidden layer 38 39 reg_lambda: float 40 regularization parameter for weights 41 42 reg_alpha: float 43 controls compromize between l1 and l2 norm of weights 44 45 activation_name: str 46 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 47 48 a: float 49 hyperparameter for 'prelu' or 'elu' activation function 50 51 nodes_sim: str 52 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 53 'uniform' 54 55 bias: boolean 56 indicates if the hidden layer contains a bias term (True) or not 57 (False) 58 59 dropout: float 60 regularization parameter; (random) percentage of nodes dropped out 61 of the training 62 63 direct_link: boolean 64 indicates if the original predictors are included (True) in model's 65 fitting or not (False) 66 67 n_clusters: int 68 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: 69 no clustering) 70 71 cluster_encode: bool 72 defines how the variable containing clusters is treated (default is one-hot) 73 if `False`, then labels are used, without one-hot encoding 74 75 type_clust: str 76 type of clustering method: currently k-means ('kmeans') or Gaussian 77 Mixture Model ('gmm') 78 79 type_scaling: a tuple of 3 strings 80 scaling methods for inputs, hidden layer, and clustering respectively 81 (and when relevant). 82 Currently available: standardization ('std') or MinMax scaling ('minmax') 83 84 col_sample: float 85 percentage of covariates randomly chosen for training 86 87 row_sample: float 88 percentage of rows chosen for training, by stratified bootstrapping 89 90 seed: int 91 reproducibility seed for nodes_sim=='uniform' 92 93 verbose: int 94 0 for no output, 1 for a progress bar (default is 1) 95 96 method: str 97 type of Adaboost method, 'SAMME' (discrete) or 'SAMME.R' (real) 98 99 backend: str 100 "cpu" or "gpu" or "tpu" 101 102 Attributes: 103 104 alpha_: list 105 AdaBoost coefficients alpha_m 106 107 base_learners_: dict 108 a dictionary containing the base learners 109 110 Examples: 111 112 See also [https://github.com/Techtonique/nnetsauce/blob/master/examples/adaboost_classification.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/adaboost_classification.py) 113 114 ```python 115 import nnetsauce as ns 116 import numpy as np 117 from sklearn.datasets import load_breast_cancer 118 from sklearn.linear_model import LogisticRegression 119 from sklearn.model_selection import train_test_split 120 from sklearn import metrics 121 from time import time 122 123 breast_cancer = load_breast_cancer() 124 Z = breast_cancer.data 125 t = breast_cancer.target 126 np.random.seed(123) 127 X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2) 128 129 # SAMME.R 130 clf = LogisticRegression(solver='liblinear', multi_class = 'ovr', 131 random_state=123) 132 fit_obj = ns.AdaBoostClassifier(clf, 133 n_hidden_features=int(11.22338867), 134 direct_link=True, 135 n_estimators=250, learning_rate=0.01126343, 136 col_sample=0.72684326, row_sample=0.86429443, 137 dropout=0.63078613, n_clusters=2, 138 type_clust="gmm", 139 verbose=1, seed = 123, 140 method="SAMME.R") 141 142 start = time() 143 fit_obj.fit(X_train, y_train) 144 print(f"Elapsed {time() - start}") 145 146 start = time() 147 print(fit_obj.score(X_test, y_test)) 148 print(f"Elapsed {time() - start}") 149 150 preds = fit_obj.predict(X_test) 151 152 print(metrics.classification_report(preds, y_test)) 153 154 ``` 155 156 """ 157 158 # construct the object ----- 159 _estimator_type = "classifier" 160 161 def __init__( 162 self, 163 obj, 164 n_estimators=10, 165 learning_rate=0.1, 166 n_hidden_features=1, 167 reg_lambda=0, 168 reg_alpha=0.5, 169 activation_name="relu", 170 a=0.01, 171 nodes_sim="sobol", 172 bias=True, 173 dropout=0, 174 direct_link=False, 175 n_clusters=2, 176 cluster_encode=True, 177 type_clust="kmeans", 178 type_scaling=("std", "std", "std"), 179 col_sample=1, 180 row_sample=1, 181 seed=123, 182 verbose=1, 183 method="SAMME", 184 backend="cpu", 185 ): 186 self.type_fit = "classification" 187 self.verbose = verbose 188 self.method = method 189 self.reg_lambda = reg_lambda 190 self.reg_alpha = reg_alpha 191 192 super().__init__( 193 obj=obj, 194 n_estimators=n_estimators, 195 learning_rate=learning_rate, 196 n_hidden_features=n_hidden_features, 197 activation_name=activation_name, 198 a=a, 199 nodes_sim=nodes_sim, 200 bias=bias, 201 dropout=dropout, 202 direct_link=direct_link, 203 n_clusters=n_clusters, 204 cluster_encode=cluster_encode, 205 type_clust=type_clust, 206 type_scaling=type_scaling, 207 col_sample=col_sample, 208 row_sample=row_sample, 209 seed=seed, 210 backend=backend, 211 ) 212 213 self.alpha_ = [] 214 self.base_learners_ = dict.fromkeys(range(n_estimators)) 215 216 def fit(self, X, y, sample_weight=None, **kwargs): 217 """Fit Boosting model to training data (X, y). 218 219 Parameters: 220 221 X: {array-like}, shape = [n_samples, n_features] 222 Training vectors, where n_samples is the number 223 of samples and n_features is the number of features. 224 225 y: array-like, shape = [n_samples] 226 Target values. 227 228 **kwargs: additional parameters to be passed to 229 self.cook_training_set or self.obj.fit 230 231 Returns: 232 233 self: object 234 """ 235 236 assert mx.is_factor(y), "y must contain only integers" 237 238 assert self.method in ( 239 "SAMME", 240 "SAMME.R", 241 ), "`method` must be either 'SAMME' or 'SAMME.R'" 242 243 assert (self.reg_lambda <= 1) & ( 244 self.reg_lambda >= 0 245 ), "must have self.reg_lambda <= 1 & self.reg_lambda >= 0" 246 247 assert (self.reg_alpha <= 1) & ( 248 self.reg_alpha >= 0 249 ), "must have self.reg_alpha <= 1 & self.reg_alpha >= 0" 250 251 # training 252 n, p = X.shape 253 self.n_classes = len(np.unique(y)) 254 self.classes_ = np.unique(y) # for compatibility with sklearn 255 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 256 257 if sample_weight is None: 258 w_m = np.repeat(1.0 / n, n) 259 else: 260 w_m = np.asarray(sample_weight) 261 262 base_learner = CustomClassifier( 263 self.obj, 264 n_hidden_features=self.n_hidden_features, 265 activation_name=self.activation_name, 266 a=self.a, 267 nodes_sim=self.nodes_sim, 268 bias=self.bias, 269 dropout=self.dropout, 270 direct_link=self.direct_link, 271 n_clusters=self.n_clusters, 272 type_clust=self.type_clust, 273 type_scaling=self.type_scaling, 274 col_sample=self.col_sample, 275 row_sample=self.row_sample, 276 seed=self.seed, 277 ) 278 279 if self.verbose == 1: 280 pbar = Progbar(self.n_estimators) 281 282 if self.method == "SAMME": 283 err_m = 1e6 284 err_bound = 1 - 1 / self.n_classes 285 self.alpha_.append(1.0) 286 x_range_n = range(n) 287 288 for m in range(self.n_estimators): 289 preds = base_learner.fit( 290 X, y, sample_weight=w_m.ravel(), **kwargs 291 ).predict(X) 292 293 self.base_learners_.update({m: deepcopy(base_learner)}) 294 295 cond = [y[i] != preds[i] for i in x_range_n] 296 297 err_m = max( 298 sum([elt[0] * elt[1] for elt in zip(cond, w_m)]), 299 2.220446049250313e-16, 300 ) # sum(w_m) == 1 301 302 if self.reg_lambda > 0: 303 err_m += self.reg_lambda * ( 304 (1 - self.reg_alpha) * 0.5 * sum([x**2 for x in w_m]) 305 + self.reg_alpha * sum([abs(x) for x in w_m]) 306 ) 307 308 err_m = min(err_m, err_bound) 309 310 alpha_m = self.learning_rate * log( 311 (self.n_classes - 1) * (1 - err_m) / err_m 312 ) 313 314 self.alpha_.append(alpha_m) 315 316 w_m_temp = [exp(alpha_m * cond[i]) for i in x_range_n] 317 318 sum_w_m = sum(w_m_temp) 319 320 w_m = np.asarray([w_m_temp[i] / sum_w_m for i in x_range_n]) 321 322 base_learner.set_params(seed=self.seed + (m + 1) * 1000) 323 324 if self.verbose == 1: 325 pbar.update(m) 326 327 if self.verbose == 1: 328 pbar.update(self.n_estimators) 329 330 self.n_estimators = len(self.base_learners_) 331 self.classes_ = np.unique(y) 332 333 return self 334 335 if self.method == "SAMME.R": 336 Y = mo.one_hot_encode2(y, self.n_classes) 337 338 if sample_weight is None: 339 w_m = np.repeat(1.0 / n, n) # (N, 1) 340 341 else: 342 w_m = np.asarray(sample_weight) 343 344 for m in range(self.n_estimators): 345 probs = base_learner.fit( 346 X, y, sample_weight=w_m.ravel(), **kwargs 347 ).predict_proba(X) 348 349 np.clip(a=probs, a_min=2.220446049250313e-16, a_max=1.0, out=probs) 350 351 self.base_learners_.update({m: deepcopy(base_learner)}) 352 353 w_m *= np.exp( 354 -1.0 355 * self.learning_rate 356 * (1.0 - 1.0 / self.n_classes) 357 * xlogy(Y, probs).sum(axis=1) 358 ) 359 360 w_m /= np.sum(w_m) 361 362 base_learner.set_params(seed=self.seed + (m + 1) * 1000) 363 364 if self.verbose == 1: 365 pbar.update(m) 366 367 if self.verbose == 1: 368 pbar.update(self.n_estimators) 369 370 self.n_estimators = len(self.base_learners_) 371 self.classes_ = np.unique(y) 372 373 return self 374 375 def predict(self, X, **kwargs): 376 """Predict test data X. 377 378 Parameters: 379 380 X: {array-like}, shape = [n_samples, n_features] 381 Training vectors, where n_samples is the number 382 of samples and n_features is the number of features. 383 384 **kwargs: additional parameters to be passed to 385 self.cook_test_set 386 387 Returns: 388 389 model predictions: {array-like} 390 """ 391 return self.predict_proba(X, **kwargs).argmax(axis=1) 392 393 def predict_proba(self, X, **kwargs): 394 """Predict probabilities for test data X. 395 396 Parameters: 397 398 X: {array-like}, shape = [n_samples, n_features] 399 Training vectors, where n_samples is the number 400 of samples and n_features is the number of features. 401 402 **kwargs: additional parameters to be passed to 403 self.cook_test_set 404 405 Returns: 406 407 probability estimates for test data: {array-like} 408 409 """ 410 411 n_iter = len(self.base_learners_) 412 413 if self.method == "SAMME": 414 ensemble_learner = np.zeros((X.shape[0], self.n_classes)) 415 416 # if self.verbose == 1: 417 # pbar = Progbar(n_iter) 418 419 for idx, base_learner in self.base_learners_.items(): 420 preds = base_learner.predict(X, **kwargs) 421 422 ensemble_learner += self.alpha_[idx] * mo.one_hot_encode2( 423 preds, self.n_classes 424 ) 425 426 # if self.verbose == 1: 427 # pbar.update(idx) 428 429 # if self.verbose == 1: 430 # pbar.update(n_iter) 431 432 expit_ensemble_learner = expit(ensemble_learner) 433 434 sum_ensemble = expit_ensemble_learner.sum(axis=1) 435 436 return expit_ensemble_learner / sum_ensemble[:, None] 437 438 # if self.method == "SAMME.R": 439 ensemble_learner = 0 440 441 # if self.verbose == 1: 442 # pbar = Progbar(n_iter) 443 444 for idx, base_learner in self.base_learners_.items(): 445 probs = base_learner.predict_proba(X, **kwargs) 446 447 np.clip(a=probs, a_min=2.220446049250313e-16, a_max=1.0, out=probs) 448 449 log_preds_proba = np.log(probs) 450 451 ensemble_learner += log_preds_proba - log_preds_proba.mean(axis=1)[:, None] 452 453 # if self.verbose == 1: 454 # pbar.update(idx) 455 456 ensemble_learner *= self.n_classes - 1 457 458 # if self.verbose == 1: 459 # pbar.update(n_iter) 460 461 expit_ensemble_learner = expit(ensemble_learner) 462 463 sum_ensemble = expit_ensemble_learner.sum(axis=1) 464 465 return expit_ensemble_learner / sum_ensemble[:, None] 466 467 @property 468 def _estimator_type(self): 469 return "classifier"
AdaBoost Classification (SAMME) model class derived from class Boosting
Parameters:
obj: object
any object containing a method fit (obj.fit()) and a method predict
(obj.predict())
n_estimators: int
number of boosting iterations
learning_rate: float
learning rate of the boosting procedure
n_hidden_features: int
number of nodes in the hidden layer
reg_lambda: float
regularization parameter for weights
reg_alpha: float
controls compromize between l1 and l2 norm of weights
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not
(False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original predictors are included (True) in model's
fitting or not (False)
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
col_sample: float
percentage of covariates randomly chosen for training
row_sample: float
percentage of rows chosen for training, by stratified bootstrapping
seed: int
reproducibility seed for nodes_sim=='uniform'
verbose: int
0 for no output, 1 for a progress bar (default is 1)
method: str
type of Adaboost method, 'SAMME' (discrete) or 'SAMME.R' (real)
backend: str
"cpu" or "gpu" or "tpu"
Attributes:
alpha_: list
AdaBoost coefficients alpha_m
base_learners_: dict
a dictionary containing the base learners
Examples:
See also https://github.com/Techtonique/nnetsauce/blob/master/examples/adaboost_classification.py
import nnetsauce as ns
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics
from time import time
breast_cancer = load_breast_cancer()
Z = breast_cancer.data
t = breast_cancer.target
np.random.seed(123)
X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2)
# SAMME.R
clf = LogisticRegression(solver='liblinear', multi_class = 'ovr',
random_state=123)
fit_obj = ns.AdaBoostClassifier(clf,
n_hidden_features=int(11.22338867),
direct_link=True,
n_estimators=250, learning_rate=0.01126343,
col_sample=0.72684326, row_sample=0.86429443,
dropout=0.63078613, n_clusters=2,
type_clust="gmm",
verbose=1, seed = 123,
method="SAMME.R")
start = time()
fit_obj.fit(X_train, y_train)
print(f"Elapsed {time() - start}")
start = time()
print(fit_obj.score(X_test, y_test))
print(f"Elapsed {time() - start}")
preds = fit_obj.predict(X_test)
print(metrics.classification_report(preds, y_test))
216 def fit(self, X, y, sample_weight=None, **kwargs): 217 """Fit Boosting model to training data (X, y). 218 219 Parameters: 220 221 X: {array-like}, shape = [n_samples, n_features] 222 Training vectors, where n_samples is the number 223 of samples and n_features is the number of features. 224 225 y: array-like, shape = [n_samples] 226 Target values. 227 228 **kwargs: additional parameters to be passed to 229 self.cook_training_set or self.obj.fit 230 231 Returns: 232 233 self: object 234 """ 235 236 assert mx.is_factor(y), "y must contain only integers" 237 238 assert self.method in ( 239 "SAMME", 240 "SAMME.R", 241 ), "`method` must be either 'SAMME' or 'SAMME.R'" 242 243 assert (self.reg_lambda <= 1) & ( 244 self.reg_lambda >= 0 245 ), "must have self.reg_lambda <= 1 & self.reg_lambda >= 0" 246 247 assert (self.reg_alpha <= 1) & ( 248 self.reg_alpha >= 0 249 ), "must have self.reg_alpha <= 1 & self.reg_alpha >= 0" 250 251 # training 252 n, p = X.shape 253 self.n_classes = len(np.unique(y)) 254 self.classes_ = np.unique(y) # for compatibility with sklearn 255 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 256 257 if sample_weight is None: 258 w_m = np.repeat(1.0 / n, n) 259 else: 260 w_m = np.asarray(sample_weight) 261 262 base_learner = CustomClassifier( 263 self.obj, 264 n_hidden_features=self.n_hidden_features, 265 activation_name=self.activation_name, 266 a=self.a, 267 nodes_sim=self.nodes_sim, 268 bias=self.bias, 269 dropout=self.dropout, 270 direct_link=self.direct_link, 271 n_clusters=self.n_clusters, 272 type_clust=self.type_clust, 273 type_scaling=self.type_scaling, 274 col_sample=self.col_sample, 275 row_sample=self.row_sample, 276 seed=self.seed, 277 ) 278 279 if self.verbose == 1: 280 pbar = Progbar(self.n_estimators) 281 282 if self.method == "SAMME": 283 err_m = 1e6 284 err_bound = 1 - 1 / self.n_classes 285 self.alpha_.append(1.0) 286 x_range_n = range(n) 287 288 for m in range(self.n_estimators): 289 preds = base_learner.fit( 290 X, y, sample_weight=w_m.ravel(), **kwargs 291 ).predict(X) 292 293 self.base_learners_.update({m: deepcopy(base_learner)}) 294 295 cond = [y[i] != preds[i] for i in x_range_n] 296 297 err_m = max( 298 sum([elt[0] * elt[1] for elt in zip(cond, w_m)]), 299 2.220446049250313e-16, 300 ) # sum(w_m) == 1 301 302 if self.reg_lambda > 0: 303 err_m += self.reg_lambda * ( 304 (1 - self.reg_alpha) * 0.5 * sum([x**2 for x in w_m]) 305 + self.reg_alpha * sum([abs(x) for x in w_m]) 306 ) 307 308 err_m = min(err_m, err_bound) 309 310 alpha_m = self.learning_rate * log( 311 (self.n_classes - 1) * (1 - err_m) / err_m 312 ) 313 314 self.alpha_.append(alpha_m) 315 316 w_m_temp = [exp(alpha_m * cond[i]) for i in x_range_n] 317 318 sum_w_m = sum(w_m_temp) 319 320 w_m = np.asarray([w_m_temp[i] / sum_w_m for i in x_range_n]) 321 322 base_learner.set_params(seed=self.seed + (m + 1) * 1000) 323 324 if self.verbose == 1: 325 pbar.update(m) 326 327 if self.verbose == 1: 328 pbar.update(self.n_estimators) 329 330 self.n_estimators = len(self.base_learners_) 331 self.classes_ = np.unique(y) 332 333 return self 334 335 if self.method == "SAMME.R": 336 Y = mo.one_hot_encode2(y, self.n_classes) 337 338 if sample_weight is None: 339 w_m = np.repeat(1.0 / n, n) # (N, 1) 340 341 else: 342 w_m = np.asarray(sample_weight) 343 344 for m in range(self.n_estimators): 345 probs = base_learner.fit( 346 X, y, sample_weight=w_m.ravel(), **kwargs 347 ).predict_proba(X) 348 349 np.clip(a=probs, a_min=2.220446049250313e-16, a_max=1.0, out=probs) 350 351 self.base_learners_.update({m: deepcopy(base_learner)}) 352 353 w_m *= np.exp( 354 -1.0 355 * self.learning_rate 356 * (1.0 - 1.0 / self.n_classes) 357 * xlogy(Y, probs).sum(axis=1) 358 ) 359 360 w_m /= np.sum(w_m) 361 362 base_learner.set_params(seed=self.seed + (m + 1) * 1000) 363 364 if self.verbose == 1: 365 pbar.update(m) 366 367 if self.verbose == 1: 368 pbar.update(self.n_estimators) 369 370 self.n_estimators = len(self.base_learners_) 371 self.classes_ = np.unique(y) 372 373 return self
Fit Boosting model to training data (X, y).
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
375 def predict(self, X, **kwargs): 376 """Predict test data X. 377 378 Parameters: 379 380 X: {array-like}, shape = [n_samples, n_features] 381 Training vectors, where n_samples is the number 382 of samples and n_features is the number of features. 383 384 **kwargs: additional parameters to be passed to 385 self.cook_test_set 386 387 Returns: 388 389 model predictions: {array-like} 390 """ 391 return self.predict_proba(X, **kwargs).argmax(axis=1)
Predict test data X.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
393 def predict_proba(self, X, **kwargs): 394 """Predict probabilities for test data X. 395 396 Parameters: 397 398 X: {array-like}, shape = [n_samples, n_features] 399 Training vectors, where n_samples is the number 400 of samples and n_features is the number of features. 401 402 **kwargs: additional parameters to be passed to 403 self.cook_test_set 404 405 Returns: 406 407 probability estimates for test data: {array-like} 408 409 """ 410 411 n_iter = len(self.base_learners_) 412 413 if self.method == "SAMME": 414 ensemble_learner = np.zeros((X.shape[0], self.n_classes)) 415 416 # if self.verbose == 1: 417 # pbar = Progbar(n_iter) 418 419 for idx, base_learner in self.base_learners_.items(): 420 preds = base_learner.predict(X, **kwargs) 421 422 ensemble_learner += self.alpha_[idx] * mo.one_hot_encode2( 423 preds, self.n_classes 424 ) 425 426 # if self.verbose == 1: 427 # pbar.update(idx) 428 429 # if self.verbose == 1: 430 # pbar.update(n_iter) 431 432 expit_ensemble_learner = expit(ensemble_learner) 433 434 sum_ensemble = expit_ensemble_learner.sum(axis=1) 435 436 return expit_ensemble_learner / sum_ensemble[:, None] 437 438 # if self.method == "SAMME.R": 439 ensemble_learner = 0 440 441 # if self.verbose == 1: 442 # pbar = Progbar(n_iter) 443 444 for idx, base_learner in self.base_learners_.items(): 445 probs = base_learner.predict_proba(X, **kwargs) 446 447 np.clip(a=probs, a_min=2.220446049250313e-16, a_max=1.0, out=probs) 448 449 log_preds_proba = np.log(probs) 450 451 ensemble_learner += log_preds_proba - log_preds_proba.mean(axis=1)[:, None] 452 453 # if self.verbose == 1: 454 # pbar.update(idx) 455 456 ensemble_learner *= self.n_classes - 1 457 458 # if self.verbose == 1: 459 # pbar.update(n_iter) 460 461 expit_ensemble_learner = expit(ensemble_learner) 462 463 sum_ensemble = expit_ensemble_learner.sum(axis=1) 464 465 return expit_ensemble_learner / sum_ensemble[:, None]
Predict probabilities for test data X.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
probability estimates for test data: {array-like}
46class Base(BaseEstimator): 47 """Base model from which all the other classes inherit. 48 49 This class contains the most important data preprocessing/feature engineering methods. 50 51 Parameters: 52 53 n_hidden_features: int 54 number of nodes in the hidden layer 55 56 activation_name: str 57 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 58 59 a: float 60 hyperparameter for 'prelu' or 'elu' activation function 61 62 nodes_sim: str 63 type of simulation for hidden layer nodes: 'sobol', 'hammersley', 'halton', 64 'uniform' 65 66 bias: boolean 67 indicates if the hidden layer contains a bias term (True) or 68 not (False) 69 70 dropout: float 71 regularization parameter; (random) percentage of nodes dropped out 72 of the training 73 74 direct_link: boolean 75 indicates if the original features are included (True) in model's 76 fitting or not (False) 77 78 n_clusters: int 79 number of clusters for type_clust='kmeans' or type_clust='gmm' 80 clustering (could be 0: no clustering) 81 82 cluster_encode: bool 83 defines how the variable containing clusters is treated (default is one-hot); 84 if `False`, then labels are used, without one-hot encoding 85 86 type_clust: str 87 type of clustering method: currently k-means ('kmeans') or Gaussian 88 Mixture Model ('gmm') 89 90 type_scaling: a tuple of 3 strings 91 scaling methods for inputs, hidden layer, and clustering respectively 92 (and when relevant). 93 Currently available: standardization ('std') or MinMax scaling ('minmax') or robust scaling ('robust') or max absolute scaling ('maxabs') 94 95 col_sample: float 96 percentage of features randomly chosen for training 97 98 row_sample: float 99 percentage of rows chosen for training, by stratified bootstrapping 100 101 seed: int 102 reproducibility seed for nodes_sim=='uniform', clustering and dropout 103 104 backend: str 105 "cpu" or "gpu" or "tpu" 106 107 """ 108 109 # construct the object ----- 110 111 def __init__( 112 self, 113 n_hidden_features=5, 114 activation_name="relu", 115 a=0.01, 116 nodes_sim="sobol", 117 bias=True, 118 dropout=0, 119 direct_link=True, 120 n_clusters=2, 121 cluster_encode=True, 122 type_clust="kmeans", 123 type_scaling=("std", "std", "std"), 124 col_sample=1, 125 row_sample=1, 126 seed=123, 127 backend="cpu", 128 ): 129 # input checks ----- 130 131 sys_platform = platform.system() 132 133 if (sys_platform == "Windows") and (backend in ("gpu", "tpu")): 134 warnings.warn("No GPU/TPU computing on Windows yet, backend set to 'cpu'") 135 backend = "cpu" 136 137 assert activation_name in ( 138 "relu", 139 "tanh", 140 "sigmoid", 141 "prelu", 142 "elu", 143 ), "'activation_name' must be in ('relu', 'tanh', 'sigmoid','prelu', 'elu')" 144 145 assert nodes_sim in ( 146 "sobol", 147 "hammersley", 148 "uniform", 149 "halton", 150 ), "'nodes_sim' must be in ('sobol', 'hammersley', 'uniform', 'halton')" 151 152 assert type_clust in ( 153 "kmeans", 154 "gmm", 155 ), "'type_clust' must be in ('kmeans', 'gmm')" 156 157 assert (len(type_scaling) == 3) & all( 158 type_scaling[i] in ("minmax", "std", "robust", "maxabs") 159 for i in range(len(type_scaling)) 160 ), "'type_scaling' must have length 3, and available scaling methods are 'minmax' scaling, standardization ('std'), robust scaling ('robust') and max absolute ('maxabs')" 161 162 assert (col_sample >= 0) & ( 163 col_sample <= 1 164 ), "'col_sample' must be comprised between 0 and 1 (both included)" 165 166 assert backend in ( 167 "cpu", 168 "gpu", 169 "tpu", 170 ), "must have 'backend' in ('cpu', 'gpu', 'tpu')" 171 172 self.n_hidden_features = n_hidden_features 173 self.activation_name = activation_name 174 self.a = a 175 self.nodes_sim = nodes_sim 176 self.bias = bias 177 self.seed = seed 178 self.backend = backend 179 self.dropout = dropout 180 self.direct_link = direct_link 181 self.cluster_encode = cluster_encode 182 self.type_clust = type_clust 183 self.type_scaling = type_scaling 184 self.col_sample = col_sample 185 self.row_sample = row_sample 186 self.n_clusters = n_clusters 187 if isinstance(self, RegressorMixin): 188 self.type_fit = "regression" 189 elif isinstance(self, ClassifierMixin): 190 self.type_fit = "classification" 191 self.subsampler_ = None 192 self.index_col_ = None 193 self.index_row_ = True 194 self.clustering_obj_ = None 195 self.clustering_scaler_ = None 196 self.nn_scaler_ = None 197 self.scaler_ = None 198 self.encoder_ = None 199 self.W_ = None 200 self.X_ = None 201 self.y_ = None 202 self.y_mean_ = None 203 self.beta_ = None 204 205 # activation function ----- 206 if sys_platform in ("Linux", "Darwin"): 207 activation_options = { 208 "relu": ac.relu if (self.backend == "cpu") else jnn.relu, 209 "tanh": np.tanh if (self.backend == "cpu") else jnp.tanh, 210 "sigmoid": (ac.sigmoid if (self.backend == "cpu") else jnn.sigmoid), 211 "prelu": partial(ac.prelu, a=a), 212 "elu": ( 213 partial(ac.elu, a=a) 214 if (self.backend == "cpu") 215 else partial(jnn.elu, a=a) 216 ), 217 } 218 else: # on Windows currently, no JAX 219 activation_options = { 220 "relu": (ac.relu if (self.backend == "cpu") else NotImplementedError), 221 "tanh": (np.tanh if (self.backend == "cpu") else NotImplementedError), 222 "sigmoid": ( 223 ac.sigmoid if (self.backend == "cpu") else NotImplementedError 224 ), 225 "prelu": partial(ac.prelu, a=a), 226 "elu": ( 227 partial(ac.elu, a=a) 228 if (self.backend == "cpu") 229 else NotImplementedError 230 ), 231 } 232 self.activation_func = activation_options[activation_name] 233 234 # "preprocessing" methods to be inherited ----- 235 236 def encode_clusters(self, X=None, predict=False, scaler=None, **kwargs): # 237 """Create new covariates with kmeans or GMM clustering 238 239 Parameters: 240 241 X: {array-like}, shape = [n_samples, n_features] 242 Training vectors, where n_samples is the number 243 of samples and n_features is the number of features. 244 245 predict: boolean 246 is False on training set and True on test set 247 248 scaler: {object} of class StandardScaler, MinMaxScaler, RobustScaler or MaxAbsScaler 249 if scaler has already been fitted on training data (online training), it can be passed here 250 251 **kwargs: 252 additional parameters to be passed to the 253 clustering method 254 255 Returns: 256 257 Clusters' matrix, one-hot encoded: {array-like} 258 259 """ 260 261 np.random.seed(self.seed) 262 263 if X is None: 264 X = self.X_ 265 266 if isinstance(X, pd.DataFrame): 267 X = copy.deepcopy(X.values.astype(float)) 268 269 if len(X.shape) == 1: 270 X = X.reshape(1, -1) 271 272 if predict is False: # encode training set 273 274 # scale input data before clustering 275 self.clustering_scaler_, scaled_X = mo.scale_covariates( 276 X, choice=self.type_scaling[2], scaler=self.clustering_scaler_ 277 ) 278 279 self.clustering_obj_, X_clustered = mo.cluster_covariates( 280 scaled_X, 281 self.n_clusters, 282 self.seed, 283 type_clust=self.type_clust, 284 **kwargs 285 ) 286 287 if self.cluster_encode == True: 288 return mo.one_hot_encode(X_clustered, self.n_clusters).astype( 289 np.float16 290 ) 291 292 return X_clustered.astype(np.float16) 293 294 # if predict == True, encode test set 295 X_clustered = self.clustering_obj_.predict(self.clustering_scaler_.transform(X)) 296 297 if self.cluster_encode == True: 298 return mo.one_hot_encode(X_clustered, self.n_clusters).astype(np.float16) 299 300 return X_clustered.astype(np.float16) 301 302 def create_layer(self, scaled_X, W=None): 303 """Create hidden layer. 304 305 Parameters: 306 307 scaled_X: {array-like}, shape = [n_samples, n_features] 308 Training vectors, where n_samples is the number 309 of samples and n_features is the number of features 310 311 W: {array-like}, shape = [n_features, hidden_features] 312 if provided, constructs the hidden layer with W; otherwise computed internally 313 314 Returns: 315 316 Hidden layer matrix: {array-like} 317 318 """ 319 320 n_features = scaled_X.shape[1] 321 322 # hash_sim = { 323 # "sobol": generate_sobol, 324 # "hammersley": generate_hammersley, 325 # "uniform": generate_uniform, 326 # "halton": generate_halton 327 # } 328 329 if self.bias is False: # no bias term in the hidden layer 330 if W is None: 331 if self.nodes_sim == "sobol": 332 self.W_ = generate_sobol( 333 n_dims=n_features, 334 n_points=self.n_hidden_features, 335 seed=self.seed, 336 ) 337 elif self.nodes_sim == "hammersley": 338 self.W_ = generate_hammersley( 339 n_dims=n_features, 340 n_points=self.n_hidden_features, 341 seed=self.seed, 342 ) 343 elif self.nodes_sim == "uniform": 344 self.W_ = generate_uniform( 345 n_dims=n_features, 346 n_points=self.n_hidden_features, 347 seed=self.seed, 348 ) 349 else: 350 self.W_ = generate_halton( 351 n_dims=n_features, 352 n_points=self.n_hidden_features, 353 seed=self.seed, 354 ) 355 356 assert ( 357 scaled_X.shape[1] == self.W_.shape[0] 358 ), "check dimensions of covariates X and matrix W" 359 360 return mo.dropout( 361 x=self.activation_func( 362 mo.safe_sparse_dot(a=scaled_X, b=self.W_, backend=self.backend) 363 ), 364 drop_prob=self.dropout, 365 seed=self.seed, 366 ) 367 368 # W is not none 369 assert ( 370 scaled_X.shape[1] == W.shape[0] 371 ), "check dimensions of covariates X and matrix W" 372 373 # self.W_ = W 374 return mo.dropout( 375 x=self.activation_func( 376 mo.safe_sparse_dot(a=scaled_X, b=W, backend=self.backend) 377 ), 378 drop_prob=self.dropout, 379 seed=self.seed, 380 ) 381 382 # with bias term in the hidden layer 383 if W is None: 384 n_features_1 = n_features + 1 385 386 if self.nodes_sim == "sobol": 387 self.W_ = generate_sobol( 388 n_dims=n_features_1, 389 n_points=self.n_hidden_features, 390 seed=self.seed, 391 ) 392 elif self.nodes_sim == "hammersley": 393 self.W_ = generate_hammersley( 394 n_dims=n_features_1, 395 n_points=self.n_hidden_features, 396 seed=self.seed, 397 ) 398 elif self.nodes_sim == "uniform": 399 self.W_ = generate_uniform( 400 n_dims=n_features_1, 401 n_points=self.n_hidden_features, 402 seed=self.seed, 403 ) 404 else: 405 self.W_ = generate_halton( 406 n_dims=n_features_1, 407 n_points=self.n_hidden_features, 408 seed=self.seed, 409 ) 410 411 # self.W_ = hash_sim[self.nodes_sim]( 412 # n_dims=n_features_1, 413 # n_points=self.n_hidden_features, 414 # seed=self.seed, 415 # ) 416 417 return mo.dropout( 418 x=self.activation_func( 419 mo.safe_sparse_dot( 420 a=mo.cbind( 421 np.ones(scaled_X.shape[0]), 422 scaled_X, 423 backend=self.backend, 424 ), 425 b=self.W_, 426 backend=self.backend, 427 ) 428 ), 429 drop_prob=self.dropout, 430 seed=self.seed, 431 ) 432 433 # W is not None 434 # self.W_ = W 435 return mo.dropout( 436 x=self.activation_func( 437 mo.safe_sparse_dot( 438 a=mo.cbind( 439 np.ones(scaled_X.shape[0]), 440 scaled_X, 441 backend=self.backend, 442 ), 443 b=W, 444 backend=self.backend, 445 ) 446 ), 447 drop_prob=self.dropout, 448 seed=self.seed, 449 ) 450 451 def cook_training_set(self, y=None, X=None, W=None, **kwargs): 452 """Create new hidden features for training set, with hidden layer, center the response. 453 454 Parameters: 455 456 y: array-like, shape = [n_samples] 457 Target values 458 459 X: {array-like}, shape = [n_samples, n_features] 460 Training vectors, where n_samples is the number 461 of samples and n_features is the number of features 462 463 W: {array-like}, shape = [n_features, hidden_features] 464 if provided, constructs the hidden layer via W 465 466 Returns: 467 468 (centered response, direct link + hidden layer matrix): {tuple} 469 470 """ 471 472 # either X and y are stored or not 473 # assert ((y is None) & (X is None)) | ((y is not None) & (X is not None)) 474 if self.n_hidden_features > 0: # has a hidden layer 475 assert ( 476 len(self.type_scaling) >= 2 477 ), "must have len(self.type_scaling) >= 2 when self.n_hidden_features > 0" 478 479 if X is None: 480 481 if self.col_sample == 1: 482 input_X = self.X_ 483 else: 484 n_features = self.X_.shape[1] 485 new_n_features = int(np.ceil(n_features * self.col_sample)) 486 assert ( 487 new_n_features >= 1 488 ), "check class attribute 'col_sample' and the number of covariates provided for X" 489 np.random.seed(self.seed) 490 index_col = np.random.choice( 491 range(n_features), size=new_n_features, replace=False 492 ) 493 self.index_col_ = index_col 494 input_X = self.X_[:, self.index_col_] 495 496 else: # X is not None # keep X vs self.X_ 497 498 if isinstance(X, pd.DataFrame): 499 X = copy.deepcopy(X.values.astype(float)) 500 501 if self.col_sample == 1: 502 input_X = X 503 else: 504 n_features = X.shape[1] 505 new_n_features = int(np.ceil(n_features * self.col_sample)) 506 assert ( 507 new_n_features >= 1 508 ), "check class attribute 'col_sample' and the number of covariates provided for X" 509 np.random.seed(self.seed) 510 index_col = np.random.choice( 511 range(n_features), size=new_n_features, replace=False 512 ) 513 self.index_col_ = index_col 514 input_X = X[:, self.index_col_] 515 516 if self.n_clusters <= 0: 517 # data without any clustering: self.n_clusters is None ----- 518 519 if self.n_hidden_features > 0: # with hidden layer 520 521 self.nn_scaler_, scaled_X = mo.scale_covariates( 522 input_X, choice=self.type_scaling[1], scaler=self.nn_scaler_ 523 ) 524 Phi_X = ( 525 self.create_layer(scaled_X) 526 if W is None 527 else self.create_layer(scaled_X, W=W) 528 ) 529 Z = ( 530 mo.cbind(input_X, Phi_X, backend=self.backend) 531 if self.direct_link is True 532 else Phi_X 533 ) 534 self.scaler_, scaled_Z = mo.scale_covariates( 535 Z, choice=self.type_scaling[0], scaler=self.scaler_ 536 ) 537 else: # no hidden layer 538 Z = input_X 539 self.scaler_, scaled_Z = mo.scale_covariates( 540 Z, choice=self.type_scaling[0], scaler=self.scaler_ 541 ) 542 543 else: 544 545 # data with clustering: self.n_clusters is not None ----- # keep 546 547 augmented_X = mo.cbind( 548 input_X, 549 self.encode_clusters(input_X, **kwargs), 550 backend=self.backend, 551 ) 552 553 if self.n_hidden_features > 0: # with hidden layer 554 555 self.nn_scaler_, scaled_X = mo.scale_covariates( 556 augmented_X, 557 choice=self.type_scaling[1], 558 scaler=self.nn_scaler_, 559 ) 560 Phi_X = ( 561 self.create_layer(scaled_X) 562 if W is None 563 else self.create_layer(scaled_X, W=W) 564 ) 565 Z = ( 566 mo.cbind(augmented_X, Phi_X, backend=self.backend) 567 if self.direct_link is True 568 else Phi_X 569 ) 570 self.scaler_, scaled_Z = mo.scale_covariates( 571 Z, choice=self.type_scaling[0], scaler=self.scaler_ 572 ) 573 else: # no hidden layer 574 Z = augmented_X 575 self.scaler_, scaled_Z = mo.scale_covariates( 576 Z, choice=self.type_scaling[0], scaler=self.scaler_ 577 ) 578 579 # Returning model inputs ----- 580 if mx.is_factor(y) is False: # regression 581 # center y 582 if y is None: 583 self.y_mean_, centered_y = mo.center_response(self.y_) 584 else: 585 self.y_mean_, centered_y = mo.center_response(y) 586 587 # y is subsampled 588 if self.row_sample < 1: 589 n, p = Z.shape 590 591 self.subsampler_ = ( 592 SubSampler(y=self.y_, row_sample=self.row_sample, seed=self.seed) 593 if y is None 594 else SubSampler(y=y, row_sample=self.row_sample, seed=self.seed) 595 ) 596 597 self.index_row_ = self.subsampler_.subsample() 598 599 n_row_sample = len(self.index_row_) 600 # regression 601 return ( 602 centered_y[self.index_row_].reshape(n_row_sample), 603 self.scaler_.transform( 604 Z[self.index_row_, :].reshape(n_row_sample, p) 605 ), 606 ) 607 # y is not subsampled 608 # regression 609 return (centered_y, self.scaler_.transform(Z)) 610 611 # classification 612 # y is subsampled 613 if self.row_sample < 1: 614 n, p = Z.shape 615 616 self.subsampler_ = ( 617 SubSampler(y=self.y_, row_sample=self.row_sample, seed=self.seed) 618 if y is None 619 else SubSampler(y=y, row_sample=self.row_sample, seed=self.seed) 620 ) 621 622 self.index_row_ = self.subsampler_.subsample() 623 624 n_row_sample = len(self.index_row_) 625 # classification 626 return ( 627 y[self.index_row_].reshape(n_row_sample), 628 self.scaler_.transform(Z[self.index_row_, :].reshape(n_row_sample, p)), 629 ) 630 # y is not subsampled 631 # classification 632 return (y, self.scaler_.transform(Z)) 633 634 def cook_test_set(self, X, **kwargs): 635 """Transform data from test set, with hidden layer. 636 637 Parameters: 638 639 X: {array-like}, shape = [n_samples, n_features] 640 Training vectors, where n_samples is the number 641 of samples and n_features is the number of features 642 643 **kwargs: additional parameters to be passed to self.encode_cluster 644 645 Returns: 646 647 Transformed test set : {array-like} 648 """ 649 650 if isinstance(X, pd.DataFrame): 651 X = copy.deepcopy(X.values.astype(float)) 652 653 if len(X.shape) == 1: 654 X = X.reshape(1, -1) 655 656 if ( 657 self.n_clusters == 0 658 ): # data without clustering: self.n_clusters is None ----- 659 if self.n_hidden_features > 0: 660 # if hidden layer 661 scaled_X = ( 662 self.nn_scaler_.transform(X) 663 if (self.col_sample == 1) 664 else self.nn_scaler_.transform(X[:, self.index_col_]) 665 ) 666 Phi_X = self.create_layer(scaled_X, self.W_) 667 if self.direct_link == True: 668 return self.scaler_.transform( 669 mo.cbind(scaled_X, Phi_X, backend=self.backend) 670 ) 671 # when self.direct_link == False 672 return self.scaler_.transform(Phi_X) 673 # if no hidden layer # self.n_hidden_features == 0 674 return self.scaler_.transform(X) 675 676 # data with clustering: self.n_clusters > 0 ----- 677 if self.col_sample == 1: 678 predicted_clusters = self.encode_clusters(X=X, predict=True, **kwargs) 679 augmented_X = mo.cbind(X, predicted_clusters, backend=self.backend) 680 else: 681 predicted_clusters = self.encode_clusters( 682 X=X[:, self.index_col_], predict=True, **kwargs 683 ) 684 augmented_X = mo.cbind( 685 X[:, self.index_col_], predicted_clusters, backend=self.backend 686 ) 687 688 if self.n_hidden_features > 0: # if hidden layer 689 scaled_X = self.nn_scaler_.transform(augmented_X) 690 Phi_X = self.create_layer(scaled_X, self.W_) 691 if self.direct_link == True: 692 return self.scaler_.transform( 693 mo.cbind(augmented_X, Phi_X, backend=self.backend) 694 ) 695 return self.scaler_.transform(Phi_X) 696 697 # if no hidden layer 698 return self.scaler_.transform(augmented_X) 699 700 def cross_val_score( 701 self, 702 X, 703 y, 704 cv=5, 705 scoring="accuracy", 706 random_state=42, 707 n_jobs=-1, 708 epsilon=0.5, 709 penalized=True, 710 objective="abs", 711 **kwargs 712 ): 713 """ 714 Penalized Cross-validation score for a model. 715 716 Parameters: 717 718 X: {array-like}, shape = [n_samples, n_features] 719 Training vectors, where n_samples is the number 720 of samples and n_features is the number of features 721 722 y: array-like, shape = [n_samples] 723 Target values 724 725 X_test: {array-like}, shape = [n_samples, n_features] 726 Test vectors, where n_samples is the number 727 of samples and n_features is the number of features 728 729 y_test: array-like, shape = [n_samples] 730 Target values 731 732 cv: int 733 Number of folds 734 735 scoring: str 736 Scoring metric 737 738 random_state: int 739 Random state 740 741 n_jobs: int 742 Number of jobs to run in parallel 743 744 epsilon: float 745 Penalty parameter 746 747 penalized: bool 748 Whether to obtain penalized cross-validation score or not 749 750 objective: str 751 'abs': Minimize the absolute difference between cross-validation score and validation score 752 'relative': Minimize the relative difference between cross-validation score and validation score 753 Returns: 754 755 A namedtuple with the following fields: 756 - cv_score: float 757 cross-validation score 758 - val_score: float 759 validation score 760 - penalized_score: float 761 penalized cross-validation score: cv_score / val_score + epsilon*(1/val_score + 1/cv_score) 762 If higher scoring metric is better, minimize the function result. 763 If lower scoring metric is better, maximize the function result. 764 """ 765 if scoring == "accuracy": 766 scoring_func = accuracy_score 767 elif scoring == "balanced_accuracy": 768 scoring_func = balanced_accuracy_score 769 elif scoring == "f1": 770 scoring_func = f1_score 771 elif scoring == "roc_auc": 772 scoring_func = roc_auc_score 773 elif scoring == "r2": 774 scoring_func = r2_score 775 elif scoring == "mse": 776 scoring_func = mean_squared_error 777 elif scoring == "mae": 778 scoring_func = mean_absolute_error 779 elif scoring == "mape": 780 scoring_func = mean_absolute_percentage_error 781 elif scoring == "rmse": 782 783 def scoring_func(y_true, y_pred): 784 return np.sqrt(mean_squared_error(y_true, y_pred)) 785 786 X_train, X_val, y_train, y_val = train_test_split( 787 X, y, test_size=0.2, random_state=random_state 788 ) 789 790 res = cross_val_score( 791 self, X_train, y_train, cv=cv, scoring=scoring, n_jobs=n_jobs 792 ) # cross-validation error 793 794 if penalized == False: 795 return res 796 797 DescribeResult = namedtuple( 798 "DescribeResult", ["cv_score", "val_score", "penalized_score"] 799 ) 800 801 numerator = res.mean() 802 803 # Evaluate on the (cv+1)-th fold 804 preds_val = self.fit(X_train, y_train).predict(X_val) 805 try: 806 denominator = scoring(y_val, preds_val) # validation error 807 except Exception as e: 808 denominator = scoring_func(y_val, preds_val) 809 810 # if higher is better 811 if objective == "abs": 812 penalized_score = np.abs(numerator - denominator) + epsilon * ( 813 1 / denominator + 1 / numerator 814 ) 815 elif objective == "relative": 816 ratio = numerator / denominator 817 penalized_score = np.abs(ratio - 1) + epsilon * ( 818 1 / denominator + 1 / numerator 819 ) 820 821 return DescribeResult( 822 cv_score=numerator, 823 val_score=denominator, 824 penalized_score=penalized_score, 825 )
Base model from which all the other classes inherit.
This class contains the most important data preprocessing/feature engineering methods.
Parameters:
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for hidden layer nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or
not (False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original features are included (True) in model's
fitting or not (False)
n_clusters: int
number of clusters for type_clust='kmeans' or type_clust='gmm'
clustering (could be 0: no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot);
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax') or robust scaling ('robust') or max absolute scaling ('maxabs')
col_sample: float
percentage of features randomly chosen for training
row_sample: float
percentage of rows chosen for training, by stratified bootstrapping
seed: int
reproducibility seed for nodes_sim=='uniform', clustering and dropout
backend: str
"cpu" or "gpu" or "tpu"
236 def encode_clusters(self, X=None, predict=False, scaler=None, **kwargs): # 237 """Create new covariates with kmeans or GMM clustering 238 239 Parameters: 240 241 X: {array-like}, shape = [n_samples, n_features] 242 Training vectors, where n_samples is the number 243 of samples and n_features is the number of features. 244 245 predict: boolean 246 is False on training set and True on test set 247 248 scaler: {object} of class StandardScaler, MinMaxScaler, RobustScaler or MaxAbsScaler 249 if scaler has already been fitted on training data (online training), it can be passed here 250 251 **kwargs: 252 additional parameters to be passed to the 253 clustering method 254 255 Returns: 256 257 Clusters' matrix, one-hot encoded: {array-like} 258 259 """ 260 261 np.random.seed(self.seed) 262 263 if X is None: 264 X = self.X_ 265 266 if isinstance(X, pd.DataFrame): 267 X = copy.deepcopy(X.values.astype(float)) 268 269 if len(X.shape) == 1: 270 X = X.reshape(1, -1) 271 272 if predict is False: # encode training set 273 274 # scale input data before clustering 275 self.clustering_scaler_, scaled_X = mo.scale_covariates( 276 X, choice=self.type_scaling[2], scaler=self.clustering_scaler_ 277 ) 278 279 self.clustering_obj_, X_clustered = mo.cluster_covariates( 280 scaled_X, 281 self.n_clusters, 282 self.seed, 283 type_clust=self.type_clust, 284 **kwargs 285 ) 286 287 if self.cluster_encode == True: 288 return mo.one_hot_encode(X_clustered, self.n_clusters).astype( 289 np.float16 290 ) 291 292 return X_clustered.astype(np.float16) 293 294 # if predict == True, encode test set 295 X_clustered = self.clustering_obj_.predict(self.clustering_scaler_.transform(X)) 296 297 if self.cluster_encode == True: 298 return mo.one_hot_encode(X_clustered, self.n_clusters).astype(np.float16) 299 300 return X_clustered.astype(np.float16)
Create new covariates with kmeans or GMM clustering
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
predict: boolean
is False on training set and True on test set
scaler: {object} of class StandardScaler, MinMaxScaler, RobustScaler or MaxAbsScaler
if scaler has already been fitted on training data (online training), it can be passed here
**kwargs:
additional parameters to be passed to the
clustering method
Returns:
Clusters' matrix, one-hot encoded: {array-like}
302 def create_layer(self, scaled_X, W=None): 303 """Create hidden layer. 304 305 Parameters: 306 307 scaled_X: {array-like}, shape = [n_samples, n_features] 308 Training vectors, where n_samples is the number 309 of samples and n_features is the number of features 310 311 W: {array-like}, shape = [n_features, hidden_features] 312 if provided, constructs the hidden layer with W; otherwise computed internally 313 314 Returns: 315 316 Hidden layer matrix: {array-like} 317 318 """ 319 320 n_features = scaled_X.shape[1] 321 322 # hash_sim = { 323 # "sobol": generate_sobol, 324 # "hammersley": generate_hammersley, 325 # "uniform": generate_uniform, 326 # "halton": generate_halton 327 # } 328 329 if self.bias is False: # no bias term in the hidden layer 330 if W is None: 331 if self.nodes_sim == "sobol": 332 self.W_ = generate_sobol( 333 n_dims=n_features, 334 n_points=self.n_hidden_features, 335 seed=self.seed, 336 ) 337 elif self.nodes_sim == "hammersley": 338 self.W_ = generate_hammersley( 339 n_dims=n_features, 340 n_points=self.n_hidden_features, 341 seed=self.seed, 342 ) 343 elif self.nodes_sim == "uniform": 344 self.W_ = generate_uniform( 345 n_dims=n_features, 346 n_points=self.n_hidden_features, 347 seed=self.seed, 348 ) 349 else: 350 self.W_ = generate_halton( 351 n_dims=n_features, 352 n_points=self.n_hidden_features, 353 seed=self.seed, 354 ) 355 356 assert ( 357 scaled_X.shape[1] == self.W_.shape[0] 358 ), "check dimensions of covariates X and matrix W" 359 360 return mo.dropout( 361 x=self.activation_func( 362 mo.safe_sparse_dot(a=scaled_X, b=self.W_, backend=self.backend) 363 ), 364 drop_prob=self.dropout, 365 seed=self.seed, 366 ) 367 368 # W is not none 369 assert ( 370 scaled_X.shape[1] == W.shape[0] 371 ), "check dimensions of covariates X and matrix W" 372 373 # self.W_ = W 374 return mo.dropout( 375 x=self.activation_func( 376 mo.safe_sparse_dot(a=scaled_X, b=W, backend=self.backend) 377 ), 378 drop_prob=self.dropout, 379 seed=self.seed, 380 ) 381 382 # with bias term in the hidden layer 383 if W is None: 384 n_features_1 = n_features + 1 385 386 if self.nodes_sim == "sobol": 387 self.W_ = generate_sobol( 388 n_dims=n_features_1, 389 n_points=self.n_hidden_features, 390 seed=self.seed, 391 ) 392 elif self.nodes_sim == "hammersley": 393 self.W_ = generate_hammersley( 394 n_dims=n_features_1, 395 n_points=self.n_hidden_features, 396 seed=self.seed, 397 ) 398 elif self.nodes_sim == "uniform": 399 self.W_ = generate_uniform( 400 n_dims=n_features_1, 401 n_points=self.n_hidden_features, 402 seed=self.seed, 403 ) 404 else: 405 self.W_ = generate_halton( 406 n_dims=n_features_1, 407 n_points=self.n_hidden_features, 408 seed=self.seed, 409 ) 410 411 # self.W_ = hash_sim[self.nodes_sim]( 412 # n_dims=n_features_1, 413 # n_points=self.n_hidden_features, 414 # seed=self.seed, 415 # ) 416 417 return mo.dropout( 418 x=self.activation_func( 419 mo.safe_sparse_dot( 420 a=mo.cbind( 421 np.ones(scaled_X.shape[0]), 422 scaled_X, 423 backend=self.backend, 424 ), 425 b=self.W_, 426 backend=self.backend, 427 ) 428 ), 429 drop_prob=self.dropout, 430 seed=self.seed, 431 ) 432 433 # W is not None 434 # self.W_ = W 435 return mo.dropout( 436 x=self.activation_func( 437 mo.safe_sparse_dot( 438 a=mo.cbind( 439 np.ones(scaled_X.shape[0]), 440 scaled_X, 441 backend=self.backend, 442 ), 443 b=W, 444 backend=self.backend, 445 ) 446 ), 447 drop_prob=self.dropout, 448 seed=self.seed, 449 )
Create hidden layer.
Parameters:
scaled_X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features
W: {array-like}, shape = [n_features, hidden_features]
if provided, constructs the hidden layer with W; otherwise computed internally
Returns:
Hidden layer matrix: {array-like}
451 def cook_training_set(self, y=None, X=None, W=None, **kwargs): 452 """Create new hidden features for training set, with hidden layer, center the response. 453 454 Parameters: 455 456 y: array-like, shape = [n_samples] 457 Target values 458 459 X: {array-like}, shape = [n_samples, n_features] 460 Training vectors, where n_samples is the number 461 of samples and n_features is the number of features 462 463 W: {array-like}, shape = [n_features, hidden_features] 464 if provided, constructs the hidden layer via W 465 466 Returns: 467 468 (centered response, direct link + hidden layer matrix): {tuple} 469 470 """ 471 472 # either X and y are stored or not 473 # assert ((y is None) & (X is None)) | ((y is not None) & (X is not None)) 474 if self.n_hidden_features > 0: # has a hidden layer 475 assert ( 476 len(self.type_scaling) >= 2 477 ), "must have len(self.type_scaling) >= 2 when self.n_hidden_features > 0" 478 479 if X is None: 480 481 if self.col_sample == 1: 482 input_X = self.X_ 483 else: 484 n_features = self.X_.shape[1] 485 new_n_features = int(np.ceil(n_features * self.col_sample)) 486 assert ( 487 new_n_features >= 1 488 ), "check class attribute 'col_sample' and the number of covariates provided for X" 489 np.random.seed(self.seed) 490 index_col = np.random.choice( 491 range(n_features), size=new_n_features, replace=False 492 ) 493 self.index_col_ = index_col 494 input_X = self.X_[:, self.index_col_] 495 496 else: # X is not None # keep X vs self.X_ 497 498 if isinstance(X, pd.DataFrame): 499 X = copy.deepcopy(X.values.astype(float)) 500 501 if self.col_sample == 1: 502 input_X = X 503 else: 504 n_features = X.shape[1] 505 new_n_features = int(np.ceil(n_features * self.col_sample)) 506 assert ( 507 new_n_features >= 1 508 ), "check class attribute 'col_sample' and the number of covariates provided for X" 509 np.random.seed(self.seed) 510 index_col = np.random.choice( 511 range(n_features), size=new_n_features, replace=False 512 ) 513 self.index_col_ = index_col 514 input_X = X[:, self.index_col_] 515 516 if self.n_clusters <= 0: 517 # data without any clustering: self.n_clusters is None ----- 518 519 if self.n_hidden_features > 0: # with hidden layer 520 521 self.nn_scaler_, scaled_X = mo.scale_covariates( 522 input_X, choice=self.type_scaling[1], scaler=self.nn_scaler_ 523 ) 524 Phi_X = ( 525 self.create_layer(scaled_X) 526 if W is None 527 else self.create_layer(scaled_X, W=W) 528 ) 529 Z = ( 530 mo.cbind(input_X, Phi_X, backend=self.backend) 531 if self.direct_link is True 532 else Phi_X 533 ) 534 self.scaler_, scaled_Z = mo.scale_covariates( 535 Z, choice=self.type_scaling[0], scaler=self.scaler_ 536 ) 537 else: # no hidden layer 538 Z = input_X 539 self.scaler_, scaled_Z = mo.scale_covariates( 540 Z, choice=self.type_scaling[0], scaler=self.scaler_ 541 ) 542 543 else: 544 545 # data with clustering: self.n_clusters is not None ----- # keep 546 547 augmented_X = mo.cbind( 548 input_X, 549 self.encode_clusters(input_X, **kwargs), 550 backend=self.backend, 551 ) 552 553 if self.n_hidden_features > 0: # with hidden layer 554 555 self.nn_scaler_, scaled_X = mo.scale_covariates( 556 augmented_X, 557 choice=self.type_scaling[1], 558 scaler=self.nn_scaler_, 559 ) 560 Phi_X = ( 561 self.create_layer(scaled_X) 562 if W is None 563 else self.create_layer(scaled_X, W=W) 564 ) 565 Z = ( 566 mo.cbind(augmented_X, Phi_X, backend=self.backend) 567 if self.direct_link is True 568 else Phi_X 569 ) 570 self.scaler_, scaled_Z = mo.scale_covariates( 571 Z, choice=self.type_scaling[0], scaler=self.scaler_ 572 ) 573 else: # no hidden layer 574 Z = augmented_X 575 self.scaler_, scaled_Z = mo.scale_covariates( 576 Z, choice=self.type_scaling[0], scaler=self.scaler_ 577 ) 578 579 # Returning model inputs ----- 580 if mx.is_factor(y) is False: # regression 581 # center y 582 if y is None: 583 self.y_mean_, centered_y = mo.center_response(self.y_) 584 else: 585 self.y_mean_, centered_y = mo.center_response(y) 586 587 # y is subsampled 588 if self.row_sample < 1: 589 n, p = Z.shape 590 591 self.subsampler_ = ( 592 SubSampler(y=self.y_, row_sample=self.row_sample, seed=self.seed) 593 if y is None 594 else SubSampler(y=y, row_sample=self.row_sample, seed=self.seed) 595 ) 596 597 self.index_row_ = self.subsampler_.subsample() 598 599 n_row_sample = len(self.index_row_) 600 # regression 601 return ( 602 centered_y[self.index_row_].reshape(n_row_sample), 603 self.scaler_.transform( 604 Z[self.index_row_, :].reshape(n_row_sample, p) 605 ), 606 ) 607 # y is not subsampled 608 # regression 609 return (centered_y, self.scaler_.transform(Z)) 610 611 # classification 612 # y is subsampled 613 if self.row_sample < 1: 614 n, p = Z.shape 615 616 self.subsampler_ = ( 617 SubSampler(y=self.y_, row_sample=self.row_sample, seed=self.seed) 618 if y is None 619 else SubSampler(y=y, row_sample=self.row_sample, seed=self.seed) 620 ) 621 622 self.index_row_ = self.subsampler_.subsample() 623 624 n_row_sample = len(self.index_row_) 625 # classification 626 return ( 627 y[self.index_row_].reshape(n_row_sample), 628 self.scaler_.transform(Z[self.index_row_, :].reshape(n_row_sample, p)), 629 ) 630 # y is not subsampled 631 # classification 632 return (y, self.scaler_.transform(Z))
Create new hidden features for training set, with hidden layer, center the response.
Parameters:
y: array-like, shape = [n_samples]
Target values
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features
W: {array-like}, shape = [n_features, hidden_features]
if provided, constructs the hidden layer via W
Returns:
(centered response, direct link + hidden layer matrix): {tuple}
634 def cook_test_set(self, X, **kwargs): 635 """Transform data from test set, with hidden layer. 636 637 Parameters: 638 639 X: {array-like}, shape = [n_samples, n_features] 640 Training vectors, where n_samples is the number 641 of samples and n_features is the number of features 642 643 **kwargs: additional parameters to be passed to self.encode_cluster 644 645 Returns: 646 647 Transformed test set : {array-like} 648 """ 649 650 if isinstance(X, pd.DataFrame): 651 X = copy.deepcopy(X.values.astype(float)) 652 653 if len(X.shape) == 1: 654 X = X.reshape(1, -1) 655 656 if ( 657 self.n_clusters == 0 658 ): # data without clustering: self.n_clusters is None ----- 659 if self.n_hidden_features > 0: 660 # if hidden layer 661 scaled_X = ( 662 self.nn_scaler_.transform(X) 663 if (self.col_sample == 1) 664 else self.nn_scaler_.transform(X[:, self.index_col_]) 665 ) 666 Phi_X = self.create_layer(scaled_X, self.W_) 667 if self.direct_link == True: 668 return self.scaler_.transform( 669 mo.cbind(scaled_X, Phi_X, backend=self.backend) 670 ) 671 # when self.direct_link == False 672 return self.scaler_.transform(Phi_X) 673 # if no hidden layer # self.n_hidden_features == 0 674 return self.scaler_.transform(X) 675 676 # data with clustering: self.n_clusters > 0 ----- 677 if self.col_sample == 1: 678 predicted_clusters = self.encode_clusters(X=X, predict=True, **kwargs) 679 augmented_X = mo.cbind(X, predicted_clusters, backend=self.backend) 680 else: 681 predicted_clusters = self.encode_clusters( 682 X=X[:, self.index_col_], predict=True, **kwargs 683 ) 684 augmented_X = mo.cbind( 685 X[:, self.index_col_], predicted_clusters, backend=self.backend 686 ) 687 688 if self.n_hidden_features > 0: # if hidden layer 689 scaled_X = self.nn_scaler_.transform(augmented_X) 690 Phi_X = self.create_layer(scaled_X, self.W_) 691 if self.direct_link == True: 692 return self.scaler_.transform( 693 mo.cbind(augmented_X, Phi_X, backend=self.backend) 694 ) 695 return self.scaler_.transform(Phi_X) 696 697 # if no hidden layer 698 return self.scaler_.transform(augmented_X)
Transform data from test set, with hidden layer.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features
**kwargs: additional parameters to be passed to self.encode_cluster
Returns:
Transformed test set : {array-like}
15class BaseRegressor(Base, RegressorMixin): 16 """Random Vector Functional Link Network regression without shrinkage 17 18 Parameters: 19 20 n_hidden_features: int 21 number of nodes in the hidden layer 22 23 activation_name: str 24 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 25 26 a: float 27 hyperparameter for 'prelu' or 'elu' activation function 28 29 nodes_sim: str 30 type of simulation for hidden layer nodes: 'sobol', 'hammersley', 'halton', 31 'uniform' 32 33 bias: boolean 34 indicates if the hidden layer contains a bias term (True) or 35 not (False) 36 37 dropout: float 38 regularization parameter; (random) percentage of nodes dropped out 39 of the training 40 41 direct_link: boolean 42 indicates if the original features are included (True) in model's 43 fitting or not (False) 44 45 n_clusters: int 46 number of clusters for type_clust='kmeans' or type_clust='gmm' 47 clustering (could be 0: no clustering) 48 49 cluster_encode: bool 50 defines how the variable containing clusters is treated (default is one-hot); 51 if `False`, then labels are used, without one-hot encoding 52 53 type_clust: str 54 type of clustering method: currently k-means ('kmeans') or Gaussian 55 Mixture Model ('gmm') 56 57 type_scaling: a tuple of 3 strings 58 scaling methods for inputs, hidden layer, and clustering respectively 59 (and when relevant). 60 Currently available: standardization ('std') or MinMax scaling ('minmax') 61 62 col_sample: float 63 percentage of features randomly chosen for training 64 65 row_sample: float 66 percentage of rows chosen for training, by stratified bootstrapping 67 68 seed: int 69 reproducibility seed for nodes_sim=='uniform', clustering and dropout 70 71 backend: str 72 "cpu" or "gpu" or "tpu" 73 74 Attributes: 75 76 beta_: vector 77 regression coefficients 78 79 GCV_: float 80 Generalized Cross-Validation error 81 82 """ 83 84 # construct the object ----- 85 86 def __init__( 87 self, 88 n_hidden_features=5, 89 activation_name="relu", 90 a=0.01, 91 nodes_sim="sobol", 92 bias=True, 93 dropout=0, 94 direct_link=True, 95 n_clusters=2, 96 cluster_encode=True, 97 type_clust="kmeans", 98 type_scaling=("std", "std", "std"), 99 col_sample=1, 100 row_sample=1, 101 seed=123, 102 backend="cpu", 103 ): 104 super().__init__( 105 n_hidden_features=n_hidden_features, 106 activation_name=activation_name, 107 a=a, 108 nodes_sim=nodes_sim, 109 bias=bias, 110 dropout=dropout, 111 direct_link=direct_link, 112 n_clusters=n_clusters, 113 cluster_encode=cluster_encode, 114 type_clust=type_clust, 115 type_scaling=type_scaling, 116 col_sample=col_sample, 117 row_sample=row_sample, 118 seed=seed, 119 backend=backend, 120 ) 121 122 def fit(self, X, y, **kwargs): 123 """Fit BaseRegressor to training data (X, y) 124 125 Parameters: 126 127 X: {array-like}, shape = [n_samples, n_features] 128 Training vectors, where n_samples is the number 129 of samples and n_features is the number of features 130 131 y: array-like, shape = [n_samples] 132 Target values 133 134 **kwargs: additional parameters to be passed to self.cook_training_set 135 136 Returns: 137 138 self: object 139 """ 140 141 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 142 143 fit_obj = lmf.beta_Sigma_hat(X=scaled_Z, y=centered_y, backend=self.backend) 144 145 self.beta_ = fit_obj["beta_hat"] 146 147 self.GCV_ = fit_obj["GCV"] 148 149 return self 150 151 def predict(self, X, **kwargs): 152 """Predict test data X. 153 154 Parameters: 155 156 X: {array-like}, shape = [n_samples, n_features] 157 Training vectors, where n_samples is the number 158 of samples and n_features is the number of features 159 160 **kwargs: additional parameters to be passed to self.cook_test_set 161 162 Returns: 163 164 model predictions: {array-like} 165 """ 166 167 if len(X.shape) == 1: 168 n_features = X.shape[0] 169 new_X = mo.rbind( 170 X.reshape(1, n_features), 171 np.ones(n_features).reshape(1, n_features), 172 ) 173 174 return ( 175 self.y_mean_ 176 + mo.safe_sparse_dot( 177 a=self.cook_test_set(new_X, **kwargs), 178 b=self.beta_, 179 backend=self.backend, 180 ) 181 )[0] 182 183 return self.y_mean_ + mo.safe_sparse_dot( 184 a=self.cook_test_set(X, **kwargs), 185 b=self.beta_, 186 backend=self.backend, 187 )
Random Vector Functional Link Network regression without shrinkage
Parameters:
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for hidden layer nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or
not (False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original features are included (True) in model's
fitting or not (False)
n_clusters: int
number of clusters for type_clust='kmeans' or type_clust='gmm'
clustering (could be 0: no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot);
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
col_sample: float
percentage of features randomly chosen for training
row_sample: float
percentage of rows chosen for training, by stratified bootstrapping
seed: int
reproducibility seed for nodes_sim=='uniform', clustering and dropout
backend: str
"cpu" or "gpu" or "tpu"
Attributes:
beta_: vector
regression coefficients
GCV_: float
Generalized Cross-Validation error
122 def fit(self, X, y, **kwargs): 123 """Fit BaseRegressor to training data (X, y) 124 125 Parameters: 126 127 X: {array-like}, shape = [n_samples, n_features] 128 Training vectors, where n_samples is the number 129 of samples and n_features is the number of features 130 131 y: array-like, shape = [n_samples] 132 Target values 133 134 **kwargs: additional parameters to be passed to self.cook_training_set 135 136 Returns: 137 138 self: object 139 """ 140 141 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 142 143 fit_obj = lmf.beta_Sigma_hat(X=scaled_Z, y=centered_y, backend=self.backend) 144 145 self.beta_ = fit_obj["beta_hat"] 146 147 self.GCV_ = fit_obj["GCV"] 148 149 return self
Fit BaseRegressor to training data (X, y)
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features
y: array-like, shape = [n_samples]
Target values
**kwargs: additional parameters to be passed to self.cook_training_set
Returns:
self: object
151 def predict(self, X, **kwargs): 152 """Predict test data X. 153 154 Parameters: 155 156 X: {array-like}, shape = [n_samples, n_features] 157 Training vectors, where n_samples is the number 158 of samples and n_features is the number of features 159 160 **kwargs: additional parameters to be passed to self.cook_test_set 161 162 Returns: 163 164 model predictions: {array-like} 165 """ 166 167 if len(X.shape) == 1: 168 n_features = X.shape[0] 169 new_X = mo.rbind( 170 X.reshape(1, n_features), 171 np.ones(n_features).reshape(1, n_features), 172 ) 173 174 return ( 175 self.y_mean_ 176 + mo.safe_sparse_dot( 177 a=self.cook_test_set(new_X, **kwargs), 178 b=self.beta_, 179 backend=self.backend, 180 ) 181 )[0] 182 183 return self.y_mean_ + mo.safe_sparse_dot( 184 a=self.cook_test_set(X, **kwargs), 185 b=self.beta_, 186 backend=self.backend, 187 )
Predict test data X.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features
**kwargs: additional parameters to be passed to self.cook_test_set
Returns:
model predictions: {array-like}
15class BayesianRVFLRegressor(Base, RegressorMixin): 16 """Bayesian Random Vector Functional Link Network regression with one prior 17 18 Parameters: 19 20 n_hidden_features: int 21 number of nodes in the hidden layer 22 23 activation_name: str 24 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 25 26 a: float 27 hyperparameter for 'prelu' or 'elu' activation function 28 29 nodes_sim: str 30 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 'uniform' 31 32 bias: boolean 33 indicates if the hidden layer contains a bias term (True) or not (False) 34 35 dropout: float 36 regularization parameter; (random) percentage of nodes dropped out 37 of the training 38 39 direct_link: boolean 40 indicates if the original features are included (True) in model''s fitting or not (False) 41 42 n_clusters: int 43 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering) 44 45 cluster_encode: bool 46 defines how the variable containing clusters is treated (default is one-hot) 47 if `False`, then labels are used, without one-hot encoding 48 49 type_clust: str 50 type of clustering method: currently k-means ('kmeans') or Gaussian Mixture Model ('gmm') 51 52 type_scaling: a tuple of 3 strings 53 scaling methods for inputs, hidden layer, and clustering respectively 54 (and when relevant). 55 Currently available: standardization ('std') or MinMax scaling ('minmax') 56 57 seed: int 58 reproducibility seed for nodes_sim=='uniform' 59 60 s: float 61 std. dev. of regression parameters in Bayesian Ridge Regression 62 63 sigma: float 64 std. dev. of residuals in Bayesian Ridge Regression 65 66 return_std: boolean 67 if True, uncertainty around predictions is evaluated 68 69 backend: str 70 "cpu" or "gpu" or "tpu" 71 72 Attributes: 73 74 beta_: array-like 75 regression''s coefficients 76 77 Sigma_: array-like 78 covariance of the distribution of fitted parameters 79 80 GCV_: float 81 Generalized cross-validation error 82 83 y_mean_: float 84 average response 85 86 Examples: 87 88 ```python 89 TBD 90 ``` 91 92 """ 93 94 # construct the object ----- 95 96 def __init__( 97 self, 98 n_hidden_features=5, 99 activation_name="relu", 100 a=0.01, 101 nodes_sim="sobol", 102 bias=True, 103 dropout=0, 104 direct_link=True, 105 n_clusters=2, 106 cluster_encode=True, 107 type_clust="kmeans", 108 type_scaling=("std", "std", "std"), 109 seed=123, 110 s=0.1, 111 sigma=0.05, 112 return_std=True, 113 backend="cpu", 114 ): 115 super().__init__( 116 n_hidden_features=n_hidden_features, 117 activation_name=activation_name, 118 a=a, 119 nodes_sim=nodes_sim, 120 bias=bias, 121 dropout=dropout, 122 direct_link=direct_link, 123 n_clusters=n_clusters, 124 cluster_encode=cluster_encode, 125 type_clust=type_clust, 126 type_scaling=type_scaling, 127 seed=seed, 128 backend=backend, 129 ) 130 self.s = s 131 self.sigma = sigma 132 self.beta_ = None 133 self.Sigma_ = None 134 self.GCV_ = None 135 self.return_std = return_std 136 137 def fit(self, X, y, **kwargs): 138 """Fit BayesianRVFLRegressor to training data (X, y). 139 140 Parameters: 141 142 X: {array-like}, shape = [n_samples, n_features] 143 Training vectors, where n_samples is the number 144 of samples and n_features is the number of features. 145 146 y: array-like, shape = [n_samples] 147 Target values. 148 149 **kwargs: additional parameters to be passed to 150 self.cook_training_set 151 152 Returns: 153 154 self: object 155 156 """ 157 158 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 159 160 fit_obj = lmf.beta_Sigma_hat_rvfl( 161 X=scaled_Z, 162 y=centered_y, 163 s=self.s, 164 sigma=self.sigma, 165 fit_intercept=False, 166 return_cov=self.return_std, 167 backend=self.backend, 168 ) 169 170 self.beta_ = fit_obj["beta_hat"] 171 172 if self.return_std == True: 173 self.Sigma_ = fit_obj["Sigma_hat"] 174 175 self.GCV_ = fit_obj["GCV"] 176 177 return self 178 179 def predict(self, X, return_std=False, **kwargs): 180 """Predict test data X. 181 182 Parameters: 183 184 X: {array-like}, shape = [n_samples, n_features] 185 Training vectors, where n_samples is the number 186 of samples and n_features is the number of features. 187 188 return_std: {boolean}, standard dev. is returned or not 189 190 **kwargs: additional parameters to be passed to 191 self.cook_test_set 192 193 Returns: 194 195 model predictions: {array-like} 196 197 """ 198 199 if len(X.shape) == 1: # one observation in the test set only 200 n_features = X.shape[0] 201 new_X = mo.rbind( 202 x=X.reshape(1, n_features), 203 y=np.ones(n_features).reshape(1, n_features), 204 backend=self.backend, 205 ) 206 207 self.return_std = return_std 208 209 if self.return_std == False: 210 if len(X.shape) == 1: 211 return ( 212 self.y_mean_ 213 + mo.safe_sparse_dot( 214 a=self.cook_test_set(new_X, **kwargs), 215 b=self.beta_, 216 backend=self.backend, 217 ) 218 )[0] 219 220 return self.y_mean_ + mo.safe_sparse_dot( 221 a=self.cook_test_set(X, **kwargs), 222 b=self.beta_, 223 backend=self.backend, 224 ) 225 226 else: # confidence interval required for preds? 227 if len(X.shape) == 1: 228 Z = self.cook_test_set(new_X, **kwargs) 229 230 pred_obj = lmf.beta_Sigma_hat_rvfl( 231 s=self.s, 232 sigma=self.sigma, 233 X_star=Z, 234 return_cov=True, 235 beta_hat_=self.beta_, 236 Sigma_hat_=self.Sigma_, 237 backend=self.backend, 238 ) 239 240 return ( 241 self.y_mean_ + pred_obj["preds"][0], 242 pred_obj["preds_std"][0], 243 ) 244 245 Z = self.cook_test_set(X, **kwargs) 246 247 pred_obj = lmf.beta_Sigma_hat_rvfl( 248 s=self.s, 249 sigma=self.sigma, 250 X_star=Z, 251 return_cov=True, 252 beta_hat_=self.beta_, 253 Sigma_hat_=self.Sigma_, 254 backend=self.backend, 255 ) 256 257 return (self.y_mean_ + pred_obj["preds"], pred_obj["preds_std"])
Bayesian Random Vector Functional Link Network regression with one prior
Parameters:
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not (False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original features are included (True) in model''s fitting or not (False)
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
seed: int
reproducibility seed for nodes_sim=='uniform'
s: float
std. dev. of regression parameters in Bayesian Ridge Regression
sigma: float
std. dev. of residuals in Bayesian Ridge Regression
return_std: boolean
if True, uncertainty around predictions is evaluated
backend: str
"cpu" or "gpu" or "tpu"
Attributes:
beta_: array-like
regression''s coefficients
Sigma_: array-like
covariance of the distribution of fitted parameters
GCV_: float
Generalized cross-validation error
y_mean_: float
average response
Examples:
TBD
137 def fit(self, X, y, **kwargs): 138 """Fit BayesianRVFLRegressor to training data (X, y). 139 140 Parameters: 141 142 X: {array-like}, shape = [n_samples, n_features] 143 Training vectors, where n_samples is the number 144 of samples and n_features is the number of features. 145 146 y: array-like, shape = [n_samples] 147 Target values. 148 149 **kwargs: additional parameters to be passed to 150 self.cook_training_set 151 152 Returns: 153 154 self: object 155 156 """ 157 158 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 159 160 fit_obj = lmf.beta_Sigma_hat_rvfl( 161 X=scaled_Z, 162 y=centered_y, 163 s=self.s, 164 sigma=self.sigma, 165 fit_intercept=False, 166 return_cov=self.return_std, 167 backend=self.backend, 168 ) 169 170 self.beta_ = fit_obj["beta_hat"] 171 172 if self.return_std == True: 173 self.Sigma_ = fit_obj["Sigma_hat"] 174 175 self.GCV_ = fit_obj["GCV"] 176 177 return self
Fit BayesianRVFLRegressor to training data (X, y).
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
**kwargs: additional parameters to be passed to
self.cook_training_set
Returns:
self: object
179 def predict(self, X, return_std=False, **kwargs): 180 """Predict test data X. 181 182 Parameters: 183 184 X: {array-like}, shape = [n_samples, n_features] 185 Training vectors, where n_samples is the number 186 of samples and n_features is the number of features. 187 188 return_std: {boolean}, standard dev. is returned or not 189 190 **kwargs: additional parameters to be passed to 191 self.cook_test_set 192 193 Returns: 194 195 model predictions: {array-like} 196 197 """ 198 199 if len(X.shape) == 1: # one observation in the test set only 200 n_features = X.shape[0] 201 new_X = mo.rbind( 202 x=X.reshape(1, n_features), 203 y=np.ones(n_features).reshape(1, n_features), 204 backend=self.backend, 205 ) 206 207 self.return_std = return_std 208 209 if self.return_std == False: 210 if len(X.shape) == 1: 211 return ( 212 self.y_mean_ 213 + mo.safe_sparse_dot( 214 a=self.cook_test_set(new_X, **kwargs), 215 b=self.beta_, 216 backend=self.backend, 217 ) 218 )[0] 219 220 return self.y_mean_ + mo.safe_sparse_dot( 221 a=self.cook_test_set(X, **kwargs), 222 b=self.beta_, 223 backend=self.backend, 224 ) 225 226 else: # confidence interval required for preds? 227 if len(X.shape) == 1: 228 Z = self.cook_test_set(new_X, **kwargs) 229 230 pred_obj = lmf.beta_Sigma_hat_rvfl( 231 s=self.s, 232 sigma=self.sigma, 233 X_star=Z, 234 return_cov=True, 235 beta_hat_=self.beta_, 236 Sigma_hat_=self.Sigma_, 237 backend=self.backend, 238 ) 239 240 return ( 241 self.y_mean_ + pred_obj["preds"][0], 242 pred_obj["preds_std"][0], 243 ) 244 245 Z = self.cook_test_set(X, **kwargs) 246 247 pred_obj = lmf.beta_Sigma_hat_rvfl( 248 s=self.s, 249 sigma=self.sigma, 250 X_star=Z, 251 return_cov=True, 252 beta_hat_=self.beta_, 253 Sigma_hat_=self.Sigma_, 254 backend=self.backend, 255 ) 256 257 return (self.y_mean_ + pred_obj["preds"], pred_obj["preds_std"])
Predict test data X.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
return_std: {boolean}, standard dev. is returned or not
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
15class BayesianRVFL2Regressor(Base, RegressorMixin): 16 """Bayesian Random Vector Functional Link Network regression with two priors 17 18 Parameters: 19 20 n_hidden_features: int 21 number of nodes in the hidden layer 22 23 activation_name: str 24 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 25 26 a: float 27 hyperparameter for 'prelu' or 'elu' activation function 28 29 nodes_sim: str 30 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 'uniform' 31 32 bias: boolean 33 indicates if the hidden layer contains a bias term (True) or not (False) 34 35 dropout: float 36 regularization parameter; (random) percentage of nodes dropped out 37 of the training 38 39 direct_link: boolean 40 indicates if the original features are included (True) in model''s fitting or not (False) 41 42 n_clusters: int 43 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering) 44 45 cluster_encode: bool 46 defines how the variable containing clusters is treated (default is one-hot) 47 if `False`, then labels are used, without one-hot encoding 48 49 type_clust: str 50 type of clustering method: currently k-means ('kmeans') or Gaussian Mixture Model ('gmm') 51 52 type_scaling: a tuple of 3 strings 53 scaling methods for inputs, hidden layer, and clustering respectively 54 (and when relevant). 55 Currently available: standardization ('std') or MinMax scaling ('minmax') 56 57 seed: int 58 reproducibility seed for nodes_sim=='uniform' 59 60 s1: float 61 std. dev. of init. regression parameters in Bayesian Ridge Regression 62 63 s2: float 64 std. dev. of augmented regression parameters in Bayesian Ridge Regression 65 66 sigma: float 67 std. dev. of residuals in Bayesian Ridge Regression 68 69 return_std: boolean 70 if True, uncertainty around predictions is evaluated 71 72 backend: str 73 "cpu" or "gpu" or "tpu" 74 75 Attributes: 76 77 beta_: array-like 78 regression''s coefficients 79 80 Sigma_: array-like 81 covariance of the distribution of fitted parameters 82 83 GCV_: float 84 Generalized cross-validation error 85 86 y_mean_: float 87 average response 88 89 Examples: 90 91 ```python 92 TBD 93 ``` 94 95 """ 96 97 # construct the object ----- 98 99 def __init__( 100 self, 101 n_hidden_features=5, 102 activation_name="relu", 103 a=0.01, 104 nodes_sim="sobol", 105 bias=True, 106 dropout=0, 107 direct_link=True, 108 n_clusters=0, 109 cluster_encode=True, 110 type_clust="kmeans", 111 type_scaling=("std", "std", "std"), 112 seed=123, 113 s1=0.1, 114 s2=0.1, 115 sigma=0.05, 116 return_std=True, 117 backend="cpu", 118 ): 119 super().__init__( 120 n_hidden_features=n_hidden_features, 121 activation_name=activation_name, 122 a=a, 123 nodes_sim=nodes_sim, 124 bias=bias, 125 dropout=dropout, 126 direct_link=direct_link, 127 n_clusters=n_clusters, 128 cluster_encode=cluster_encode, 129 type_clust=type_clust, 130 type_scaling=type_scaling, 131 seed=seed, 132 backend=backend, 133 ) 134 135 self.s1 = s1 136 self.s2 = s2 137 self.sigma = sigma 138 self.beta_ = None 139 self.Sigma_ = None 140 self.GCV_ = None 141 self.return_std = return_std 142 143 def fit(self, X, y, **kwargs): 144 """Fit BayesianRVFL2Regressor to training data (X, y) 145 146 Parameters: 147 148 X: {array-like}, shape = [n_samples, n_features] 149 Training vectors, where n_samples is the number 150 of samples and n_features is the number of features 151 152 y: array-like, shape = [n_samples] 153 Target values 154 155 **kwargs: additional parameters to be passed to 156 self.cook_training_set 157 158 Returns: 159 160 self: object 161 162 """ 163 164 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 165 166 n, p = X.shape 167 q = self.n_hidden_features 168 169 if self.direct_link == True: 170 r = p + self.n_clusters 171 172 block11 = (self.s1**2) * np.eye(r) 173 block12 = np.zeros((r, q)) 174 block21 = np.zeros((q, r)) 175 block22 = (self.s2**2) * np.eye(q) 176 177 Sigma_prior = mo.rbind( 178 x=mo.cbind(x=block11, y=block12, backend=self.backend), 179 y=mo.cbind(x=block21, y=block22, backend=self.backend), 180 backend=self.backend, 181 ) 182 183 else: 184 Sigma_prior = (self.s2**2) * np.eye(q) 185 186 fit_obj = lmf.beta_Sigma_hat_rvfl2( 187 X=scaled_Z, 188 y=centered_y, 189 Sigma=Sigma_prior, 190 sigma=self.sigma, 191 fit_intercept=False, 192 return_cov=self.return_std, 193 backend=self.backend, 194 ) 195 196 self.beta_ = fit_obj["beta_hat"] 197 198 if self.return_std == True: 199 self.Sigma_ = fit_obj["Sigma_hat"] 200 201 self.GCV_ = fit_obj["GCV"] 202 203 return self 204 205 def predict(self, X, return_std=False, **kwargs): 206 """Predict test data X. 207 208 Parameters: 209 210 X: {array-like}, shape = [n_samples, n_features] 211 Training vectors, where n_samples is the number 212 of samples and n_features is the number of features. 213 214 return_std: {boolean}, standard dev. is returned or not 215 216 **kwargs: additional parameters to be passed to 217 self.cook_test_set 218 219 Returns: 220 221 model predictions: {array-like} 222 223 """ 224 225 if len(X.shape) == 1: # one observation in the test set only 226 n_features = X.shape[0] 227 new_X = mo.rbind( 228 x=X.reshape(1, n_features), 229 y=np.ones(n_features).reshape(1, n_features), 230 backend=self.backend, 231 ) 232 233 self.return_std = return_std 234 235 if self.return_std == False: 236 if len(X.shape) == 1: 237 return ( 238 self.y_mean_ 239 + mo.safe_sparse_dot( 240 self.cook_test_set(new_X, **kwargs), 241 self.beta_, 242 backend=self.backend, 243 ) 244 )[0] 245 246 return self.y_mean_ + mo.safe_sparse_dot( 247 self.cook_test_set(X, **kwargs), 248 self.beta_, 249 backend=self.backend, 250 ) 251 252 else: # confidence interval required for preds? 253 if len(X.shape) == 1: 254 Z = self.cook_test_set(new_X, **kwargs) 255 256 pred_obj = lmf.beta_Sigma_hat_rvfl2( 257 X_star=Z, 258 return_cov=self.return_std, 259 beta_hat_=self.beta_, 260 Sigma_hat_=self.Sigma_, 261 backend=self.backend, 262 ) 263 264 return ( 265 self.y_mean_ + pred_obj["preds"][0], 266 pred_obj["preds_std"][0], 267 ) 268 269 Z = self.cook_test_set(X, **kwargs) 270 271 pred_obj = lmf.beta_Sigma_hat_rvfl2( 272 X_star=Z, 273 return_cov=self.return_std, 274 beta_hat_=self.beta_, 275 Sigma_hat_=self.Sigma_, 276 backend=self.backend, 277 ) 278 279 return (self.y_mean_ + pred_obj["preds"], pred_obj["preds_std"])
Bayesian Random Vector Functional Link Network regression with two priors
Parameters:
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not (False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original features are included (True) in model''s fitting or not (False)
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
seed: int
reproducibility seed for nodes_sim=='uniform'
s1: float
std. dev. of init. regression parameters in Bayesian Ridge Regression
s2: float
std. dev. of augmented regression parameters in Bayesian Ridge Regression
sigma: float
std. dev. of residuals in Bayesian Ridge Regression
return_std: boolean
if True, uncertainty around predictions is evaluated
backend: str
"cpu" or "gpu" or "tpu"
Attributes:
beta_: array-like
regression''s coefficients
Sigma_: array-like
covariance of the distribution of fitted parameters
GCV_: float
Generalized cross-validation error
y_mean_: float
average response
Examples:
TBD
143 def fit(self, X, y, **kwargs): 144 """Fit BayesianRVFL2Regressor to training data (X, y) 145 146 Parameters: 147 148 X: {array-like}, shape = [n_samples, n_features] 149 Training vectors, where n_samples is the number 150 of samples and n_features is the number of features 151 152 y: array-like, shape = [n_samples] 153 Target values 154 155 **kwargs: additional parameters to be passed to 156 self.cook_training_set 157 158 Returns: 159 160 self: object 161 162 """ 163 164 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 165 166 n, p = X.shape 167 q = self.n_hidden_features 168 169 if self.direct_link == True: 170 r = p + self.n_clusters 171 172 block11 = (self.s1**2) * np.eye(r) 173 block12 = np.zeros((r, q)) 174 block21 = np.zeros((q, r)) 175 block22 = (self.s2**2) * np.eye(q) 176 177 Sigma_prior = mo.rbind( 178 x=mo.cbind(x=block11, y=block12, backend=self.backend), 179 y=mo.cbind(x=block21, y=block22, backend=self.backend), 180 backend=self.backend, 181 ) 182 183 else: 184 Sigma_prior = (self.s2**2) * np.eye(q) 185 186 fit_obj = lmf.beta_Sigma_hat_rvfl2( 187 X=scaled_Z, 188 y=centered_y, 189 Sigma=Sigma_prior, 190 sigma=self.sigma, 191 fit_intercept=False, 192 return_cov=self.return_std, 193 backend=self.backend, 194 ) 195 196 self.beta_ = fit_obj["beta_hat"] 197 198 if self.return_std == True: 199 self.Sigma_ = fit_obj["Sigma_hat"] 200 201 self.GCV_ = fit_obj["GCV"] 202 203 return self
Fit BayesianRVFL2Regressor to training data (X, y)
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features
y: array-like, shape = [n_samples]
Target values
**kwargs: additional parameters to be passed to
self.cook_training_set
Returns:
self: object
205 def predict(self, X, return_std=False, **kwargs): 206 """Predict test data X. 207 208 Parameters: 209 210 X: {array-like}, shape = [n_samples, n_features] 211 Training vectors, where n_samples is the number 212 of samples and n_features is the number of features. 213 214 return_std: {boolean}, standard dev. is returned or not 215 216 **kwargs: additional parameters to be passed to 217 self.cook_test_set 218 219 Returns: 220 221 model predictions: {array-like} 222 223 """ 224 225 if len(X.shape) == 1: # one observation in the test set only 226 n_features = X.shape[0] 227 new_X = mo.rbind( 228 x=X.reshape(1, n_features), 229 y=np.ones(n_features).reshape(1, n_features), 230 backend=self.backend, 231 ) 232 233 self.return_std = return_std 234 235 if self.return_std == False: 236 if len(X.shape) == 1: 237 return ( 238 self.y_mean_ 239 + mo.safe_sparse_dot( 240 self.cook_test_set(new_X, **kwargs), 241 self.beta_, 242 backend=self.backend, 243 ) 244 )[0] 245 246 return self.y_mean_ + mo.safe_sparse_dot( 247 self.cook_test_set(X, **kwargs), 248 self.beta_, 249 backend=self.backend, 250 ) 251 252 else: # confidence interval required for preds? 253 if len(X.shape) == 1: 254 Z = self.cook_test_set(new_X, **kwargs) 255 256 pred_obj = lmf.beta_Sigma_hat_rvfl2( 257 X_star=Z, 258 return_cov=self.return_std, 259 beta_hat_=self.beta_, 260 Sigma_hat_=self.Sigma_, 261 backend=self.backend, 262 ) 263 264 return ( 265 self.y_mean_ + pred_obj["preds"][0], 266 pred_obj["preds_std"][0], 267 ) 268 269 Z = self.cook_test_set(X, **kwargs) 270 271 pred_obj = lmf.beta_Sigma_hat_rvfl2( 272 X_star=Z, 273 return_cov=self.return_std, 274 beta_hat_=self.beta_, 275 Sigma_hat_=self.Sigma_, 276 backend=self.backend, 277 ) 278 279 return (self.y_mean_ + pred_obj["preds"], pred_obj["preds_std"])
Predict test data X.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
return_std: {boolean}, standard dev. is returned or not
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
42class ClassicalMTS(Base): 43 """Multivariate time series (FactorMTS) forecasting with Factor models 44 45 Parameters: 46 47 model: type of model: str. 48 currently, 'VAR', 'VECM', 'ARIMA', 'ETS', 'Theta' 49 50 Attributes: 51 52 df_: data frame 53 the input data frame, in case a data.frame is provided to `fit` 54 55 level_: int 56 level of confidence for prediction intervals (default is 95) 57 58 Examples: 59 See examples/classical_mts_timeseries.py 60 """ 61 62 # construct the object ----- 63 64 def __init__(self, model="VAR"): 65 66 self.model = model 67 if self.model == "VAR": 68 self.obj = VAR 69 elif self.model == "VECM": 70 self.obj = VECM 71 elif self.model == "ARIMA": 72 self.obj = ARIMA 73 elif self.model == "ETS": 74 self.obj = ExponentialSmoothing 75 elif self.model == "Theta": 76 self.obj = ThetaModel 77 else: 78 raise ValueError("model not recognized") 79 self.n_series = None 80 self.replications = None 81 self.mean_ = None 82 self.upper_ = None 83 self.lower_ = None 84 self.output_dates_ = None 85 self.alpha_ = None 86 self.df_ = None 87 self.residuals_ = [] 88 self.sims_ = None 89 self.level_ = None 90 91 def fit(self, X, **kwargs): 92 """Fit FactorMTS model to training data X, with optional regressors xreg 93 94 Parameters: 95 96 X: {array-like}, shape = [n_samples, n_features] 97 Training time series, where n_samples is the number 98 of samples and n_features is the number of features; 99 X must be in increasing order (most recent observations last) 100 101 **kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity) 102 103 Returns: 104 105 self: object 106 """ 107 108 try: 109 self.n_series = X.shape[1] 110 except Exception: 111 self.n_series = 1 112 113 if (isinstance(X, pd.DataFrame) is False) and isinstance( 114 X, pd.Series 115 ) is False: # input data set is a numpy array 116 117 X = pd.DataFrame(X) 118 if self.n_series > 1: 119 self.series_names = ["series" + str(i) for i in range(X.shape[1])] 120 else: 121 self.series_names = "series0" 122 123 else: # input data set is a DataFrame or Series with column names 124 125 X_index = None 126 if X.index is not None and len(X.shape) > 1: 127 X_index = X.index 128 X = copy.deepcopy(mo.convert_df_to_numeric(X)) 129 if X_index is not None: 130 try: 131 X.index = X_index 132 except Exception: 133 pass 134 if isinstance(X, pd.DataFrame): 135 self.series_names = X.columns.tolist() 136 else: 137 self.series_names = X.name 138 139 if isinstance(X, pd.DataFrame) or isinstance(X, pd.Series): 140 self.df_ = X 141 X = X.values 142 self.df_.columns = self.series_names 143 self.input_dates = ts.compute_input_dates(self.df_) 144 else: 145 self.df_ = pd.DataFrame(X, columns=self.series_names) 146 147 if self.model == "Theta": 148 self.obj = self.obj(self.df_, **kwargs).fit() 149 else: 150 self.obj = self.obj(X, **kwargs).fit(**kwargs) 151 152 return self 153 154 def predict(self, h=5, level=95, **kwargs): 155 """Forecast all the time series, h steps ahead 156 157 Parameters: 158 159 h: {integer} 160 Forecasting horizon 161 162 **kwargs: additional parameters to be passed to 163 self.cook_test_set 164 165 Returns: 166 167 model predictions for horizon = h: {array-like} 168 169 """ 170 171 self.output_dates_, frequency = ts.compute_output_dates(self.df_, h) 172 173 self.level_ = level 174 175 self.lower_ = None # do not remove (/!\) 176 177 self.upper_ = None # do not remove (/!\) 178 179 self.sims_ = None # do not remove (/!\) 180 181 self.level_ = level 182 183 self.alpha_ = 100 - level 184 185 pi_multiplier = norm.ppf(1 - self.alpha_ / 200) 186 187 # Named tuple for forecast results 188 DescribeResult = namedtuple("DescribeResult", ("mean", "lower", "upper")) 189 190 if self.model == "VAR": 191 mean_forecast, lower_bound, upper_bound = self.obj.forecast_interval( 192 self.obj.endog, steps=h, alpha=self.alpha_ / 100, **kwargs 193 ) 194 195 elif self.model == "VECM": 196 forecast_result = self.obj.predict(steps=h) 197 mean_forecast = forecast_result 198 lower_bound, upper_bound = self._compute_confidence_intervals( 199 forecast_result, alpha=self.alpha_ / 100, **kwargs 200 ) 201 202 elif self.model == "ARIMA": 203 forecast_result = self.obj.get_forecast(steps=h) 204 mean_forecast = forecast_result.predicted_mean 205 lower_bound = forecast_result.conf_int()[:, 0] 206 upper_bound = forecast_result.conf_int()[:, 1] 207 208 elif self.model == "ETS": 209 forecast_result = self.obj.forecast(steps=h) 210 residuals = self.obj.resid 211 std_errors = np.std(residuals) 212 mean_forecast = forecast_result 213 lower_bound = forecast_result - pi_multiplier * std_errors 214 upper_bound = forecast_result + pi_multiplier * std_errors 215 216 elif self.model == "Theta": 217 try: 218 mean_forecast = self.obj.forecast(steps=h).values 219 forecast_result = self.obj.prediction_intervals( 220 steps=h, alpha=self.alpha_ / 100, **kwargs 221 ) 222 lower_bound = forecast_result["lower"].values 223 upper_bound = forecast_result["upper"].values 224 except Exception: 225 mean_forecast = self.obj.forecast(steps=h) 226 forecast_result = self.obj.prediction_intervals( 227 steps=h, alpha=self.alpha_ / 100, **kwargs 228 ) 229 lower_bound = forecast_result["lower"] 230 upper_bound = forecast_result["upper"] 231 232 else: 233 234 raise ValueError("model not recognized") 235 236 try: 237 self.mean_ = pd.DataFrame( 238 mean_forecast, 239 columns=self.series_names, 240 index=self.output_dates_, 241 ) 242 self.lower_ = pd.DataFrame( 243 lower_bound, columns=self.series_names, index=self.output_dates_ 244 ) 245 self.upper_ = pd.DataFrame( 246 upper_bound, columns=self.series_names, index=self.output_dates_ 247 ) 248 except Exception: 249 self.mean_ = pd.Series( 250 mean_forecast, name=self.series_names, index=self.output_dates_ 251 ) 252 self.lower_ = pd.Series( 253 lower_bound, name=self.series_names, index=self.output_dates_ 254 ) 255 self.upper_ = pd.Series( 256 upper_bound, name=self.series_names, index=self.output_dates_ 257 ) 258 259 return DescribeResult(mean=self.mean_, lower=self.lower_, upper=self.upper_) 260 261 def _compute_confidence_intervals(self, forecast_result, alpha): 262 """ 263 Compute confidence intervals for VECM forecasts. 264 Uses the covariance of residuals to approximate the confidence intervals. 265 """ 266 residuals = self.obj.resid 267 cov_matrix = np.cov(residuals.T) # Covariance matrix of residuals 268 std_errors = np.sqrt(np.diag(cov_matrix)) # Standard errors 269 270 z_value = norm.ppf(1 - alpha / 2) # Z-score for the given alpha level 271 lower_bound = forecast_result - z_value * std_errors 272 upper_bound = forecast_result + z_value * std_errors 273 274 return lower_bound, upper_bound 275 276 def score(self, X, training_index, testing_index, scoring=None, **kwargs): 277 """Train on training_index, score on testing_index.""" 278 279 assert ( 280 bool(set(training_index).intersection(set(testing_index))) == False 281 ), "Non-overlapping 'training_index' and 'testing_index' required" 282 283 # Dimensions 284 try: 285 # multivariate time series 286 n, p = X.shape 287 except: 288 # univariate time series 289 n = X.shape[0] 290 p = 1 291 292 # Training and testing sets 293 if p > 1: 294 X_train = X[training_index, :] 295 X_test = X[testing_index, :] 296 else: 297 X_train = X[training_index] 298 X_test = X[testing_index] 299 300 # Horizon 301 h = len(testing_index) 302 assert ( 303 len(training_index) + h 304 ) <= n, "Please check lengths of training and testing windows" 305 306 # Fit and predict 307 self.fit(X_train, **kwargs) 308 preds = self.predict(h=h, **kwargs) 309 310 if scoring is None: 311 scoring = "neg_root_mean_squared_error" 312 313 # check inputs 314 assert scoring in ( 315 "explained_variance", 316 "neg_mean_absolute_error", 317 "neg_mean_squared_error", 318 "neg_root_mean_squared_error", 319 "neg_mean_squared_log_error", 320 "neg_median_absolute_error", 321 "r2", 322 ), "'scoring' should be in ('explained_variance', 'neg_mean_absolute_error', \ 323 'neg_mean_squared_error', 'neg_root_mean_squared_error', 'neg_mean_squared_log_error', \ 324 'neg_median_absolute_error', 'r2')" 325 326 scoring_options = { 327 "explained_variance": skm2.explained_variance_score, 328 "neg_mean_absolute_error": skm2.mean_absolute_error, 329 "neg_mean_squared_error": lambda x, y: np.mean((x - y) ** 2), 330 "neg_root_mean_squared_error": lambda x, y: np.sqrt(np.mean((x - y) ** 2)), 331 "neg_mean_squared_log_error": skm2.mean_squared_log_error, 332 "neg_median_absolute_error": skm2.median_absolute_error, 333 "r2": skm2.r2_score, 334 } 335 336 # if p > 1: 337 # return tuple( 338 # [ 339 # scoring_options[scoring]( 340 # X_test[:, i], preds[:, i]#, **kwargs 341 # ) 342 # for i in range(p) 343 # ] 344 # ) 345 # else: 346 return scoring_options[scoring](X_test, preds) 347 348 def plot(self, series=None, type_axis="dates", type_plot="pi"): 349 """Plot time series forecast 350 351 Parameters: 352 353 series: {integer} or {string} 354 series index or name 355 356 """ 357 358 assert all( 359 [ 360 self.mean_ is not None, 361 self.lower_ is not None, 362 self.upper_ is not None, 363 self.output_dates_ is not None, 364 ] 365 ), "model forecasting must be obtained first (with predict)" 366 367 if series is None: 368 assert ( 369 self.n_series == 1 370 ), "please specify series index or name (n_series > 1)" 371 series = 0 372 373 if isinstance(series, str): 374 assert ( 375 series in self.series_names 376 ), f"series {series} doesn't exist in the input dataset" 377 series_idx = self.df_.columns.get_loc(series) 378 else: 379 assert isinstance(series, int) and ( 380 0 <= series < self.n_series 381 ), f"check series index (< {self.n_series})" 382 series_idx = series 383 384 if isinstance(self.df_, pd.DataFrame): 385 y_all = list(self.df_.iloc[:, series_idx]) + list( 386 self.mean_.iloc[:, series_idx] 387 ) 388 y_test = list(self.mean_.iloc[:, series_idx]) 389 else: 390 y_all = list(self.df_.values) + list(self.mean_.values) 391 y_test = list(self.mean_.values) 392 n_points_all = len(y_all) 393 n_points_train = self.df_.shape[0] 394 395 if type_axis == "numeric": 396 x_all = [i for i in range(n_points_all)] 397 x_test = [i for i in range(n_points_train, n_points_all)] 398 399 if type_axis == "dates": # use dates 400 x_all = np.concatenate( 401 (self.input_dates.values, self.output_dates_.values), axis=None 402 ) 403 x_test = self.output_dates_.values 404 405 if type_plot == "pi": 406 fig, ax = plt.subplots() 407 ax.plot(x_all, y_all, "-") 408 ax.plot(x_test, y_test, "-", color="orange") 409 try: 410 ax.fill_between( 411 x_test, 412 self.lower_.iloc[:, series_idx], 413 self.upper_.iloc[:, series_idx], 414 alpha=0.2, 415 color="orange", 416 ) 417 except Exception: 418 ax.fill_between( 419 x_test, 420 self.lower_.values, 421 self.upper_.values, 422 alpha=0.2, 423 color="orange", 424 ) 425 if self.replications is None: 426 if self.n_series > 1: 427 plt.title( 428 f"prediction intervals for {series}", 429 loc="left", 430 fontsize=12, 431 fontweight=0, 432 color="black", 433 ) 434 else: 435 plt.title( 436 f"prediction intervals for input time series", 437 loc="left", 438 fontsize=12, 439 fontweight=0, 440 color="black", 441 ) 442 plt.show() 443 else: # self.replications is not None 444 if self.n_series > 1: 445 plt.title( 446 f"prediction intervals for {self.replications} simulations of {series}", 447 loc="left", 448 fontsize=12, 449 fontweight=0, 450 color="black", 451 ) 452 else: 453 plt.title( 454 f"prediction intervals for {self.replications} simulations of input time series", 455 loc="left", 456 fontsize=12, 457 fontweight=0, 458 color="black", 459 ) 460 plt.show() 461 462 if type_plot == "spaghetti": 463 palette = plt.get_cmap("Set1") 464 sims_ix = getsims(self.sims_, series_idx) 465 plt.plot(x_all, y_all, "-") 466 for col_ix in range( 467 sims_ix.shape[1] 468 ): # avoid this when there are thousands of simulations 469 plt.plot( 470 x_test, 471 sims_ix[:, col_ix], 472 "-", 473 color=palette(col_ix), 474 linewidth=1, 475 alpha=0.9, 476 ) 477 plt.plot(x_all, y_all, "-", color="black") 478 plt.plot(x_test, y_test, "-", color="blue") 479 # Add titles 480 if self.n_series > 1: 481 plt.title( 482 f"{self.replications} simulations of {series}", 483 loc="left", 484 fontsize=12, 485 fontweight=0, 486 color="black", 487 ) 488 else: 489 plt.title( 490 f"{self.replications} simulations of input time series", 491 loc="left", 492 fontsize=12, 493 fontweight=0, 494 color="black", 495 ) 496 plt.xlabel("Time") 497 plt.ylabel("Values") 498 # Show the graph 499 plt.show() 500 501 def cross_val_score( 502 self, 503 X, 504 scoring="root_mean_squared_error", 505 n_jobs=None, 506 verbose=0, 507 xreg=None, 508 initial_window=5, 509 horizon=3, 510 fixed_window=False, 511 show_progress=True, 512 level=95, 513 **kwargs, 514 ): 515 """Evaluate a score by time series cross-validation. 516 517 Parameters: 518 519 X: {array-like, sparse matrix} of shape (n_samples, n_features) 520 The data to fit. 521 522 scoring: str or a function 523 A str in ('root_mean_squared_error', 'mean_squared_error', 'mean_error', 524 'mean_absolute_error', 'mean_error', 'mean_percentage_error', 525 'mean_absolute_percentage_error', 'winkler_score', 'coverage') 526 Or a function defined as 'coverage' and 'winkler_score' in `utils.timeseries` 527 528 n_jobs: int, default=None 529 Number of jobs to run in parallel. 530 531 verbose: int, default=0 532 The verbosity level. 533 534 xreg: array-like, optional (default=None) 535 Additional (external) regressors to be passed to `fit` 536 xreg must be in 'increasing' order (most recent observations last) 537 538 initial_window: int 539 initial number of consecutive values in each training set sample 540 541 horizon: int 542 number of consecutive values in test set sample 543 544 fixed_window: boolean 545 if False, all training samples start at index 0, and the training 546 window's size is increasing. 547 if True, the training window's size is fixed, and the window is 548 rolling forward 549 550 show_progress: boolean 551 if True, a progress bar is printed 552 553 **kwargs: dict 554 additional parameters to be passed to `fit` and `predict` 555 556 Returns: 557 558 A tuple: descriptive statistics or errors and raw errors 559 560 """ 561 tscv = TimeSeriesSplit() 562 563 tscv_obj = tscv.split( 564 X, 565 initial_window=initial_window, 566 horizon=horizon, 567 fixed_window=fixed_window, 568 ) 569 570 if isinstance(scoring, str): 571 572 assert scoring in ( 573 "root_mean_squared_error", 574 "mean_squared_error", 575 "mean_error", 576 "mean_absolute_error", 577 "mean_percentage_error", 578 "mean_absolute_percentage_error", 579 "winkler_score", 580 "coverage", 581 ), "must have scoring in ('root_mean_squared_error', 'mean_squared_error', 'mean_error', 'mean_absolute_error', 'mean_error', 'mean_percentage_error', 'mean_absolute_percentage_error', 'winkler_score', 'coverage')" 582 583 def err_func(X_test, X_pred, scoring): 584 if (self.replications is not None) or ( 585 self.type_pi == "gaussian" 586 ): # probabilistic 587 if scoring == "winkler_score": 588 return winkler_score(X_pred, X_test, level=level) 589 elif scoring == "coverage": 590 return coverage(X_pred, X_test, level=level) 591 else: 592 return mean_errors( 593 pred=X_pred.mean, actual=X_test, scoring=scoring 594 ) 595 else: # not probabilistic 596 return mean_errors(pred=X_pred, actual=X_test, scoring=scoring) 597 598 else: # isinstance(scoring, str) = False 599 600 err_func = scoring 601 602 errors = [] 603 604 train_indices = [] 605 606 test_indices = [] 607 608 for train_index, test_index in tscv_obj: 609 train_indices.append(train_index) 610 test_indices.append(test_index) 611 612 if show_progress is True: 613 iterator = tqdm(zip(train_indices, test_indices), total=len(train_indices)) 614 else: 615 iterator = zip(train_indices, test_indices) 616 617 for train_index, test_index in iterator: 618 619 if verbose == 1: 620 print(f"TRAIN: {train_index}") 621 print(f"TEST: {test_index}") 622 623 if isinstance(X, pd.DataFrame): 624 self.fit(X.iloc[train_index, :], xreg=xreg, **kwargs) 625 X_test = X.iloc[test_index, :] 626 else: 627 self.fit(X[train_index, :], xreg=xreg, **kwargs) 628 X_test = X[test_index, :] 629 X_pred = self.predict(h=int(len(test_index)), level=level, **kwargs) 630 631 errors.append(err_func(X_test, X_pred, scoring)) 632 633 res = np.asarray(errors) 634 635 return res, describe(res)
Multivariate time series (FactorMTS) forecasting with Factor models
Parameters:
model: type of model: str.
currently, 'VAR', 'VECM', 'ARIMA', 'ETS', 'Theta'
Attributes:
df_: data frame
the input data frame, in case a data.frame is provided to `fit`
level_: int
level of confidence for prediction intervals (default is 95)
Examples: See examples/classical_mts_timeseries.py
91 def fit(self, X, **kwargs): 92 """Fit FactorMTS model to training data X, with optional regressors xreg 93 94 Parameters: 95 96 X: {array-like}, shape = [n_samples, n_features] 97 Training time series, where n_samples is the number 98 of samples and n_features is the number of features; 99 X must be in increasing order (most recent observations last) 100 101 **kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity) 102 103 Returns: 104 105 self: object 106 """ 107 108 try: 109 self.n_series = X.shape[1] 110 except Exception: 111 self.n_series = 1 112 113 if (isinstance(X, pd.DataFrame) is False) and isinstance( 114 X, pd.Series 115 ) is False: # input data set is a numpy array 116 117 X = pd.DataFrame(X) 118 if self.n_series > 1: 119 self.series_names = ["series" + str(i) for i in range(X.shape[1])] 120 else: 121 self.series_names = "series0" 122 123 else: # input data set is a DataFrame or Series with column names 124 125 X_index = None 126 if X.index is not None and len(X.shape) > 1: 127 X_index = X.index 128 X = copy.deepcopy(mo.convert_df_to_numeric(X)) 129 if X_index is not None: 130 try: 131 X.index = X_index 132 except Exception: 133 pass 134 if isinstance(X, pd.DataFrame): 135 self.series_names = X.columns.tolist() 136 else: 137 self.series_names = X.name 138 139 if isinstance(X, pd.DataFrame) or isinstance(X, pd.Series): 140 self.df_ = X 141 X = X.values 142 self.df_.columns = self.series_names 143 self.input_dates = ts.compute_input_dates(self.df_) 144 else: 145 self.df_ = pd.DataFrame(X, columns=self.series_names) 146 147 if self.model == "Theta": 148 self.obj = self.obj(self.df_, **kwargs).fit() 149 else: 150 self.obj = self.obj(X, **kwargs).fit(**kwargs) 151 152 return self
Fit FactorMTS model to training data X, with optional regressors xreg
Parameters:
X: {array-like}, shape = [n_samples, n_features] Training time series, where n_samples is the number of samples and n_features is the number of features; X must be in increasing order (most recent observations last)
**kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity)
Returns:
self: object
154 def predict(self, h=5, level=95, **kwargs): 155 """Forecast all the time series, h steps ahead 156 157 Parameters: 158 159 h: {integer} 160 Forecasting horizon 161 162 **kwargs: additional parameters to be passed to 163 self.cook_test_set 164 165 Returns: 166 167 model predictions for horizon = h: {array-like} 168 169 """ 170 171 self.output_dates_, frequency = ts.compute_output_dates(self.df_, h) 172 173 self.level_ = level 174 175 self.lower_ = None # do not remove (/!\) 176 177 self.upper_ = None # do not remove (/!\) 178 179 self.sims_ = None # do not remove (/!\) 180 181 self.level_ = level 182 183 self.alpha_ = 100 - level 184 185 pi_multiplier = norm.ppf(1 - self.alpha_ / 200) 186 187 # Named tuple for forecast results 188 DescribeResult = namedtuple("DescribeResult", ("mean", "lower", "upper")) 189 190 if self.model == "VAR": 191 mean_forecast, lower_bound, upper_bound = self.obj.forecast_interval( 192 self.obj.endog, steps=h, alpha=self.alpha_ / 100, **kwargs 193 ) 194 195 elif self.model == "VECM": 196 forecast_result = self.obj.predict(steps=h) 197 mean_forecast = forecast_result 198 lower_bound, upper_bound = self._compute_confidence_intervals( 199 forecast_result, alpha=self.alpha_ / 100, **kwargs 200 ) 201 202 elif self.model == "ARIMA": 203 forecast_result = self.obj.get_forecast(steps=h) 204 mean_forecast = forecast_result.predicted_mean 205 lower_bound = forecast_result.conf_int()[:, 0] 206 upper_bound = forecast_result.conf_int()[:, 1] 207 208 elif self.model == "ETS": 209 forecast_result = self.obj.forecast(steps=h) 210 residuals = self.obj.resid 211 std_errors = np.std(residuals) 212 mean_forecast = forecast_result 213 lower_bound = forecast_result - pi_multiplier * std_errors 214 upper_bound = forecast_result + pi_multiplier * std_errors 215 216 elif self.model == "Theta": 217 try: 218 mean_forecast = self.obj.forecast(steps=h).values 219 forecast_result = self.obj.prediction_intervals( 220 steps=h, alpha=self.alpha_ / 100, **kwargs 221 ) 222 lower_bound = forecast_result["lower"].values 223 upper_bound = forecast_result["upper"].values 224 except Exception: 225 mean_forecast = self.obj.forecast(steps=h) 226 forecast_result = self.obj.prediction_intervals( 227 steps=h, alpha=self.alpha_ / 100, **kwargs 228 ) 229 lower_bound = forecast_result["lower"] 230 upper_bound = forecast_result["upper"] 231 232 else: 233 234 raise ValueError("model not recognized") 235 236 try: 237 self.mean_ = pd.DataFrame( 238 mean_forecast, 239 columns=self.series_names, 240 index=self.output_dates_, 241 ) 242 self.lower_ = pd.DataFrame( 243 lower_bound, columns=self.series_names, index=self.output_dates_ 244 ) 245 self.upper_ = pd.DataFrame( 246 upper_bound, columns=self.series_names, index=self.output_dates_ 247 ) 248 except Exception: 249 self.mean_ = pd.Series( 250 mean_forecast, name=self.series_names, index=self.output_dates_ 251 ) 252 self.lower_ = pd.Series( 253 lower_bound, name=self.series_names, index=self.output_dates_ 254 ) 255 self.upper_ = pd.Series( 256 upper_bound, name=self.series_names, index=self.output_dates_ 257 ) 258 259 return DescribeResult(mean=self.mean_, lower=self.lower_, upper=self.upper_)
Forecast all the time series, h steps ahead
Parameters:
h: {integer} Forecasting horizon
**kwargs: additional parameters to be passed to self.cook_test_set
Returns:
model predictions for horizon = h: {array-like}
276 def score(self, X, training_index, testing_index, scoring=None, **kwargs): 277 """Train on training_index, score on testing_index.""" 278 279 assert ( 280 bool(set(training_index).intersection(set(testing_index))) == False 281 ), "Non-overlapping 'training_index' and 'testing_index' required" 282 283 # Dimensions 284 try: 285 # multivariate time series 286 n, p = X.shape 287 except: 288 # univariate time series 289 n = X.shape[0] 290 p = 1 291 292 # Training and testing sets 293 if p > 1: 294 X_train = X[training_index, :] 295 X_test = X[testing_index, :] 296 else: 297 X_train = X[training_index] 298 X_test = X[testing_index] 299 300 # Horizon 301 h = len(testing_index) 302 assert ( 303 len(training_index) + h 304 ) <= n, "Please check lengths of training and testing windows" 305 306 # Fit and predict 307 self.fit(X_train, **kwargs) 308 preds = self.predict(h=h, **kwargs) 309 310 if scoring is None: 311 scoring = "neg_root_mean_squared_error" 312 313 # check inputs 314 assert scoring in ( 315 "explained_variance", 316 "neg_mean_absolute_error", 317 "neg_mean_squared_error", 318 "neg_root_mean_squared_error", 319 "neg_mean_squared_log_error", 320 "neg_median_absolute_error", 321 "r2", 322 ), "'scoring' should be in ('explained_variance', 'neg_mean_absolute_error', \ 323 'neg_mean_squared_error', 'neg_root_mean_squared_error', 'neg_mean_squared_log_error', \ 324 'neg_median_absolute_error', 'r2')" 325 326 scoring_options = { 327 "explained_variance": skm2.explained_variance_score, 328 "neg_mean_absolute_error": skm2.mean_absolute_error, 329 "neg_mean_squared_error": lambda x, y: np.mean((x - y) ** 2), 330 "neg_root_mean_squared_error": lambda x, y: np.sqrt(np.mean((x - y) ** 2)), 331 "neg_mean_squared_log_error": skm2.mean_squared_log_error, 332 "neg_median_absolute_error": skm2.median_absolute_error, 333 "r2": skm2.r2_score, 334 } 335 336 # if p > 1: 337 # return tuple( 338 # [ 339 # scoring_options[scoring]( 340 # X_test[:, i], preds[:, i]#, **kwargs 341 # ) 342 # for i in range(p) 343 # ] 344 # ) 345 # else: 346 return scoring_options[scoring](X_test, preds)
Train on training_index, score on testing_index.
16class CustomClassifier(Custom, ClassifierMixin): 17 """Custom Classification model 18 19 Attributes: 20 21 obj: object 22 any object containing a method fit (obj.fit()) and a method predict 23 (obj.predict()) 24 25 n_hidden_features: int 26 number of nodes in the hidden layer 27 28 activation_name: str 29 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 30 31 a: float 32 hyperparameter for 'prelu' or 'elu' activation function 33 34 nodes_sim: str 35 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 36 'uniform' 37 38 bias: boolean 39 indicates if the hidden layer contains a bias term (True) or not 40 (False) 41 42 dropout: float 43 regularization parameter; (random) percentage of nodes dropped out 44 of the training 45 46 direct_link: boolean 47 indicates if the original predictors are included (True) in model''s 48 fitting or not (False) 49 50 n_clusters: int 51 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: 52 no clustering) 53 54 cluster_encode: bool 55 defines how the variable containing clusters is treated (default is one-hot) 56 if `False`, then labels are used, without one-hot encoding 57 58 type_clust: str 59 type of clustering method: currently k-means ('kmeans') or Gaussian 60 Mixture Model ('gmm') 61 62 type_scaling: a tuple of 3 strings 63 scaling methods for inputs, hidden layer, and clustering respectively 64 (and when relevant). 65 Currently available: standardization ('std') or MinMax scaling ('minmax') 66 67 col_sample: float 68 percentage of covariates randomly chosen for training 69 70 row_sample: float 71 percentage of rows chosen for training, by stratified bootstrapping 72 73 cv_calibration: int, cross-validation generator, or iterable, default=2 74 Determines the cross-validation splitting strategy. Same as 75 `sklearn.calibration.CalibratedClassifierCV` 76 77 calibration_method: str 78 {‘sigmoid’, ‘isotonic’}, default=’sigmoid’ 79 The method to use for calibration. Same as 80 `sklearn.calibration.CalibratedClassifierCV` 81 82 seed: int 83 reproducibility seed for nodes_sim=='uniform' 84 85 backend: str 86 "cpu" or "gpu" or "tpu" 87 88 Examples: 89 90 Note: it's better to use the `DeepClassifier` or `LazyDeepClassifier` classes directly 91 92 ```python 93 import nnetsauce as ns 94 from sklearn.ensemble import RandomForestClassifier 95 from sklearn.model_selection import train_test_split 96 from sklearn.datasets import load_digits 97 from time import time 98 99 digits = load_digits() 100 X = digits.data 101 y = digits.target 102 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, 103 random_state=123) 104 105 # layer 1 (base layer) ---- 106 layer1_regr = RandomForestClassifier(n_estimators=10, random_state=123) 107 108 start = time() 109 110 layer1_regr.fit(X_train, y_train) 111 112 # Accuracy in layer 1 113 print(layer1_regr.score(X_test, y_test)) 114 115 # layer 2 using layer 1 ---- 116 layer2_regr = ns.CustomClassifier(obj = layer1_regr, n_hidden_features=5, 117 direct_link=True, bias=True, 118 nodes_sim='uniform', activation_name='relu', 119 n_clusters=2, seed=123) 120 layer2_regr.fit(X_train, y_train) 121 122 # Accuracy in layer 2 123 print(layer2_regr.score(X_test, y_test)) 124 125 # layer 3 using layer 2 ---- 126 layer3_regr = ns.CustomClassifier(obj = layer2_regr, n_hidden_features=10, 127 direct_link=True, bias=True, dropout=0.7, 128 nodes_sim='uniform', activation_name='relu', 129 n_clusters=2, seed=123) 130 layer3_regr.fit(X_train, y_train) 131 132 # Accuracy in layer 3 133 print(layer3_regr.score(X_test, y_test)) 134 135 print(f"Elapsed {time() - start}") 136 ``` 137 138 """ 139 140 # construct the object ----- 141 _estimator_type = "classifier" 142 143 def __init__( 144 self, 145 obj, 146 n_hidden_features=5, 147 activation_name="relu", 148 a=0.01, 149 nodes_sim="sobol", 150 bias=True, 151 dropout=0, 152 direct_link=True, 153 n_clusters=2, 154 cluster_encode=True, 155 type_clust="kmeans", 156 type_scaling=("std", "std", "std"), 157 col_sample=1, 158 row_sample=1, 159 cv_calibration=2, 160 calibration_method="sigmoid", 161 seed=123, 162 backend="cpu", 163 ): 164 super().__init__( 165 obj=obj, 166 n_hidden_features=n_hidden_features, 167 activation_name=activation_name, 168 a=a, 169 nodes_sim=nodes_sim, 170 bias=bias, 171 dropout=dropout, 172 direct_link=direct_link, 173 n_clusters=n_clusters, 174 cluster_encode=cluster_encode, 175 type_clust=type_clust, 176 type_scaling=type_scaling, 177 col_sample=col_sample, 178 row_sample=row_sample, 179 seed=seed, 180 backend=backend, 181 ) 182 self.coef_ = None 183 self.intercept_ = None 184 self.type_fit = "classification" 185 self.cv_calibration = cv_calibration 186 self.calibration_method = calibration_method 187 188 def __sklearn_clone__(self): 189 """Create a clone of the estimator. 190 191 This is required for scikit-learn's calibration system to work properly. 192 """ 193 # Create a new instance with the same parameters 194 clone = CustomClassifier( 195 obj=self.obj, 196 n_hidden_features=self.n_hidden_features, 197 activation_name=self.activation_name, 198 a=self.a, 199 nodes_sim=self.nodes_sim, 200 bias=self.bias, 201 dropout=self.dropout, 202 direct_link=self.direct_link, 203 n_clusters=self.n_clusters, 204 cluster_encode=self.cluster_encode, 205 type_clust=self.type_clust, 206 type_scaling=self.type_scaling, 207 col_sample=self.col_sample, 208 row_sample=self.row_sample, 209 cv_calibration=self.cv_calibration, 210 calibration_method=self.calibration_method, 211 seed=self.seed, 212 backend=self.backend 213 ) 214 return clone 215 216 def fit(self, X, y, sample_weight=None, **kwargs): 217 """Fit custom model to training data (X, y). 218 219 Parameters: 220 221 X: {array-like}, shape = [n_samples, n_features] 222 Training vectors, where n_samples is the number 223 of samples and n_features is the number of features. 224 225 y: array-like, shape = [n_samples] 226 Target values. 227 228 sample_weight: array-like, shape = [n_samples] 229 Sample weights. 230 231 **kwargs: additional parameters to be passed to 232 self.cook_training_set or self.obj.fit 233 234 Returns: 235 236 self: object 237 """ 238 239 if len(X.shape) == 1: 240 if isinstance(X, pd.DataFrame): 241 X = pd.DataFrame(X.values.reshape(1, -1), columns=X.columns) 242 else: 243 X = X.reshape(1, -1) 244 245 output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 246 self.classes_ = np.unique(y) 247 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 248 249 # Wrap in CalibratedClassifierCV if needed 250 if self.cv_calibration is not None: 251 self.obj = CalibratedClassifierCV( 252 self.obj, 253 cv=self.cv_calibration, 254 method=self.calibration_method 255 ) 256 257 # if sample_weights, else: (must use self.row_index) 258 if sample_weight is not None: 259 self.obj.fit( 260 scaled_Z, 261 output_y, 262 sample_weight=sample_weight[self.index_row_].ravel(), 263 **kwargs 264 ) 265 return self 266 267 # if sample_weight is None: 268 self.obj.fit(scaled_Z, output_y, **kwargs) 269 self.classes_ = np.unique(y) # for compatibility with sklearn 270 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 271 272 if hasattr(self.obj, "coef_"): 273 self.coef_ = self.obj.coef_ 274 275 if hasattr(self.obj, "intercept_"): 276 self.intercept_ = self.obj.intercept_ 277 278 return self 279 280 def partial_fit(self, X, y, sample_weight=None, **kwargs): 281 """Partial fit custom model to training data (X, y). 282 283 Parameters: 284 285 X: {array-like}, shape = [n_samples, n_features] 286 Subset of training vectors, where n_samples is the number 287 of samples and n_features is the number of features. 288 289 y: array-like, shape = [n_samples] 290 Subset of target values. 291 292 sample_weight: array-like, shape = [n_samples] 293 Sample weights. 294 295 **kwargs: additional parameters to be passed to 296 self.cook_training_set or self.obj.fit 297 298 Returns: 299 300 self: object 301 """ 302 303 if len(X.shape) == 1: 304 if isinstance(X, pd.DataFrame): 305 X = pd.DataFrame(X.values.reshape(1, -1), columns=X.columns) 306 else: 307 X = X.reshape(1, -1) 308 y = np.array([y], dtype=np.integer) 309 310 output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 311 self.n_classes_ = len(np.unique(y)) # for compatibility with sklearn 312 313 # if sample_weights, else: (must use self.row_index) 314 if sample_weight is not None: 315 try: 316 self.obj.partial_fit( 317 scaled_Z, 318 output_y, 319 sample_weight=sample_weight[self.index_row_].ravel(), 320 # **kwargs 321 ) 322 except: 323 NotImplementedError 324 325 return self 326 327 # if sample_weight is None: 328 #try: 329 self.obj.partial_fit(scaled_Z, output_y) 330 #except: 331 # raise NotImplementedError 332 333 self.classes_ = np.unique(y) # for compatibility with sklearn 334 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 335 336 return self 337 338 def predict(self, X, **kwargs): 339 """Predict test data X. 340 341 Parameters: 342 343 X: {array-like}, shape = [n_samples, n_features] 344 Training vectors, where n_samples is the number 345 of samples and n_features is the number of features. 346 347 **kwargs: additional parameters to be passed to 348 self.cook_test_set 349 350 Returns: 351 352 model predictions: {array-like} 353 """ 354 355 if len(X.shape) == 1: 356 n_features = X.shape[0] 357 new_X = mo.rbind( 358 X.reshape(1, n_features), 359 np.ones(n_features).reshape(1, n_features), 360 ) 361 362 return (self.obj.predict(self.cook_test_set(new_X, **kwargs), **kwargs))[0] 363 364 return self.obj.predict(self.cook_test_set(X, **kwargs), **kwargs) 365 366 def predict_proba(self, X, **kwargs): 367 """Predict probabilities for test data X. 368 369 Args: 370 371 X: {array-like}, shape = [n_samples, n_features] 372 Training vectors, where n_samples is the number 373 of samples and n_features is the number of features. 374 375 **kwargs: additional parameters to be passed to 376 self.cook_test_set 377 378 Returns: 379 380 probability estimates for test data: {array-like} 381 """ 382 383 if len(X.shape) == 1: 384 n_features = X.shape[0] 385 new_X = mo.rbind( 386 X.reshape(1, n_features), 387 np.ones(n_features).reshape(1, n_features), 388 ) 389 return ( 390 self.obj.predict_proba(self.cook_test_set(new_X, **kwargs), **kwargs) 391 )[0] 392 return self.obj.predict_proba(self.cook_test_set(X, **kwargs), **kwargs) 393 394 def decision_function(self, X, **kwargs): 395 """Compute the decision function of X. 396 397 Parameters: 398 X: {array-like}, shape = [n_samples, n_features] 399 Samples to compute decision function for. 400 401 **kwargs: additional parameters to be passed to 402 self.cook_test_set 403 404 Returns: 405 array-like of shape (n_samples,) or (n_samples, n_classes) 406 Decision function of the input samples. The order of outputs is the same 407 as that of the classes passed to fit. 408 """ 409 if not hasattr(self.obj, "decision_function"): 410 # If base classifier doesn't have decision_function, use predict_proba 411 proba = self.predict_proba(X, **kwargs) 412 if proba.shape[1] == 2: 413 return proba[:, 1] # For binary classification 414 return proba # For multiclass 415 416 if len(X.shape) == 1: 417 n_features = X.shape[0] 418 new_X = mo.rbind( 419 X.reshape(1, n_features), 420 np.ones(n_features).reshape(1, n_features), 421 ) 422 423 return ( 424 self.obj.decision_function( 425 self.cook_test_set(new_X, **kwargs), **kwargs 426 ) 427 )[0] 428 429 return self.obj.decision_function(self.cook_test_set(X, **kwargs), **kwargs) 430 431 def score(self, X, y, scoring=None): 432 """Scoring function for classification. 433 434 Args: 435 436 X: {array-like}, shape = [n_samples, n_features] 437 Training vectors, where n_samples is the number 438 of samples and n_features is the number of features. 439 440 y: array-like, shape = [n_samples] 441 Target values. 442 443 scoring: str 444 scoring method (default is accuracy) 445 446 Returns: 447 448 score: float 449 """ 450 451 if scoring is None: 452 scoring = "accuracy" 453 454 if scoring == "accuracy": 455 return skm2.accuracy_score(y, self.predict(X)) 456 457 if scoring == "f1": 458 return skm2.f1_score(y, self.predict(X)) 459 460 if scoring == "precision": 461 return skm2.precision_score(y, self.predict(X)) 462 463 if scoring == "recall": 464 return skm2.recall_score(y, self.predict(X)) 465 466 if scoring == "roc_auc": 467 return skm2.roc_auc_score(y, self.predict(X)) 468 469 if scoring == "log_loss": 470 return skm2.log_loss(y, self.predict_proba(X)) 471 472 if scoring == "balanced_accuracy": 473 return skm2.balanced_accuracy_score(y, self.predict(X)) 474 475 if scoring == "average_precision": 476 return skm2.average_precision_score(y, self.predict(X)) 477 478 if scoring == "neg_brier_score": 479 return -skm2.brier_score_loss(y, self.predict_proba(X)) 480 481 if scoring == "neg_log_loss": 482 return -skm2.log_loss(y, self.predict_proba(X)) 483 484 @property 485 def _estimator_type(self): 486 return "classifier"
Custom Classification model
Attributes:
obj: object
any object containing a method fit (obj.fit()) and a method predict
(obj.predict())
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not
(False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original predictors are included (True) in model''s
fitting or not (False)
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
col_sample: float
percentage of covariates randomly chosen for training
row_sample: float
percentage of rows chosen for training, by stratified bootstrapping
cv_calibration: int, cross-validation generator, or iterable, default=2
Determines the cross-validation splitting strategy. Same as
`sklearn.calibration.CalibratedClassifierCV`
calibration_method: str
{‘sigmoid’, ‘isotonic’}, default=’sigmoid’
The method to use for calibration. Same as
`sklearn.calibration.CalibratedClassifierCV`
seed: int
reproducibility seed for nodes_sim=='uniform'
backend: str
"cpu" or "gpu" or "tpu"
Examples:
Note: it's better to use the DeepClassifier
or LazyDeepClassifier
classes directly
import nnetsauce as ns
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_digits
from time import time
digits = load_digits()
X = digits.data
y = digits.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=123)
# layer 1 (base layer) ----
layer1_regr = RandomForestClassifier(n_estimators=10, random_state=123)
start = time()
layer1_regr.fit(X_train, y_train)
# Accuracy in layer 1
print(layer1_regr.score(X_test, y_test))
# layer 2 using layer 1 ----
layer2_regr = ns.CustomClassifier(obj = layer1_regr, n_hidden_features=5,
direct_link=True, bias=True,
nodes_sim='uniform', activation_name='relu',
n_clusters=2, seed=123)
layer2_regr.fit(X_train, y_train)
# Accuracy in layer 2
print(layer2_regr.score(X_test, y_test))
# layer 3 using layer 2 ----
layer3_regr = ns.CustomClassifier(obj = layer2_regr, n_hidden_features=10,
direct_link=True, bias=True, dropout=0.7,
nodes_sim='uniform', activation_name='relu',
n_clusters=2, seed=123)
layer3_regr.fit(X_train, y_train)
# Accuracy in layer 3
print(layer3_regr.score(X_test, y_test))
print(f"Elapsed {time() - start}")
216 def fit(self, X, y, sample_weight=None, **kwargs): 217 """Fit custom model to training data (X, y). 218 219 Parameters: 220 221 X: {array-like}, shape = [n_samples, n_features] 222 Training vectors, where n_samples is the number 223 of samples and n_features is the number of features. 224 225 y: array-like, shape = [n_samples] 226 Target values. 227 228 sample_weight: array-like, shape = [n_samples] 229 Sample weights. 230 231 **kwargs: additional parameters to be passed to 232 self.cook_training_set or self.obj.fit 233 234 Returns: 235 236 self: object 237 """ 238 239 if len(X.shape) == 1: 240 if isinstance(X, pd.DataFrame): 241 X = pd.DataFrame(X.values.reshape(1, -1), columns=X.columns) 242 else: 243 X = X.reshape(1, -1) 244 245 output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 246 self.classes_ = np.unique(y) 247 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 248 249 # Wrap in CalibratedClassifierCV if needed 250 if self.cv_calibration is not None: 251 self.obj = CalibratedClassifierCV( 252 self.obj, 253 cv=self.cv_calibration, 254 method=self.calibration_method 255 ) 256 257 # if sample_weights, else: (must use self.row_index) 258 if sample_weight is not None: 259 self.obj.fit( 260 scaled_Z, 261 output_y, 262 sample_weight=sample_weight[self.index_row_].ravel(), 263 **kwargs 264 ) 265 return self 266 267 # if sample_weight is None: 268 self.obj.fit(scaled_Z, output_y, **kwargs) 269 self.classes_ = np.unique(y) # for compatibility with sklearn 270 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 271 272 if hasattr(self.obj, "coef_"): 273 self.coef_ = self.obj.coef_ 274 275 if hasattr(self.obj, "intercept_"): 276 self.intercept_ = self.obj.intercept_ 277 278 return self
Fit custom model to training data (X, y).
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
sample_weight: array-like, shape = [n_samples]
Sample weights.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
338 def predict(self, X, **kwargs): 339 """Predict test data X. 340 341 Parameters: 342 343 X: {array-like}, shape = [n_samples, n_features] 344 Training vectors, where n_samples is the number 345 of samples and n_features is the number of features. 346 347 **kwargs: additional parameters to be passed to 348 self.cook_test_set 349 350 Returns: 351 352 model predictions: {array-like} 353 """ 354 355 if len(X.shape) == 1: 356 n_features = X.shape[0] 357 new_X = mo.rbind( 358 X.reshape(1, n_features), 359 np.ones(n_features).reshape(1, n_features), 360 ) 361 362 return (self.obj.predict(self.cook_test_set(new_X, **kwargs), **kwargs))[0] 363 364 return self.obj.predict(self.cook_test_set(X, **kwargs), **kwargs)
Predict test data X.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
366 def predict_proba(self, X, **kwargs): 367 """Predict probabilities for test data X. 368 369 Args: 370 371 X: {array-like}, shape = [n_samples, n_features] 372 Training vectors, where n_samples is the number 373 of samples and n_features is the number of features. 374 375 **kwargs: additional parameters to be passed to 376 self.cook_test_set 377 378 Returns: 379 380 probability estimates for test data: {array-like} 381 """ 382 383 if len(X.shape) == 1: 384 n_features = X.shape[0] 385 new_X = mo.rbind( 386 X.reshape(1, n_features), 387 np.ones(n_features).reshape(1, n_features), 388 ) 389 return ( 390 self.obj.predict_proba(self.cook_test_set(new_X, **kwargs), **kwargs) 391 )[0] 392 return self.obj.predict_proba(self.cook_test_set(X, **kwargs), **kwargs)
Predict probabilities for test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
probability estimates for test data: {array-like}
431 def score(self, X, y, scoring=None): 432 """Scoring function for classification. 433 434 Args: 435 436 X: {array-like}, shape = [n_samples, n_features] 437 Training vectors, where n_samples is the number 438 of samples and n_features is the number of features. 439 440 y: array-like, shape = [n_samples] 441 Target values. 442 443 scoring: str 444 scoring method (default is accuracy) 445 446 Returns: 447 448 score: float 449 """ 450 451 if scoring is None: 452 scoring = "accuracy" 453 454 if scoring == "accuracy": 455 return skm2.accuracy_score(y, self.predict(X)) 456 457 if scoring == "f1": 458 return skm2.f1_score(y, self.predict(X)) 459 460 if scoring == "precision": 461 return skm2.precision_score(y, self.predict(X)) 462 463 if scoring == "recall": 464 return skm2.recall_score(y, self.predict(X)) 465 466 if scoring == "roc_auc": 467 return skm2.roc_auc_score(y, self.predict(X)) 468 469 if scoring == "log_loss": 470 return skm2.log_loss(y, self.predict_proba(X)) 471 472 if scoring == "balanced_accuracy": 473 return skm2.balanced_accuracy_score(y, self.predict(X)) 474 475 if scoring == "average_precision": 476 return skm2.average_precision_score(y, self.predict(X)) 477 478 if scoring == "neg_brier_score": 479 return -skm2.brier_score_loss(y, self.predict_proba(X)) 480 481 if scoring == "neg_log_loss": 482 return -skm2.log_loss(y, self.predict_proba(X))
Scoring function for classification.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
scoring: str
scoring method (default is accuracy)
Returns:
score: float
18class CustomRegressor(Custom, RegressorMixin): 19 """Custom Regression model 20 21 This class is used to 'augment' any regression model with transformed features. 22 23 Parameters: 24 25 obj: object 26 any object containing a method fit (obj.fit()) and a method predict 27 (obj.predict()) 28 29 n_hidden_features: int 30 number of nodes in the hidden layer 31 32 activation_name: str 33 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 34 35 a: float 36 hyperparameter for 'prelu' or 'elu' activation function 37 38 nodes_sim: str 39 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 40 'uniform' 41 42 bias: boolean 43 indicates if the hidden layer contains a bias term (True) or not 44 (False) 45 46 dropout: float 47 regularization parameter; (random) percentage of nodes dropped out 48 of the training 49 50 direct_link: boolean 51 indicates if the original predictors are included (True) in model's 52 fitting or not (False) 53 54 n_clusters: int 55 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: 56 no clustering) 57 58 cluster_encode: bool 59 defines how the variable containing clusters is treated (default is one-hot) 60 if `False`, then labels are used, without one-hot encoding 61 62 type_clust: str 63 type of clustering method: currently k-means ('kmeans') or Gaussian 64 Mixture Model ('gmm') 65 66 type_scaling: a tuple of 3 strings 67 scaling methods for inputs, hidden layer, and clustering respectively 68 (and when relevant). 69 Currently available: standardization ('std') or MinMax scaling ('minmax') 70 71 type_pi: str. 72 type of prediction interval; currently `None` (split or local 73 conformal without simulation), "kde" or "bootstrap" (simulated split 74 conformal). 75 76 replications: int. 77 number of replications (if needed) for predictive simulation. 78 Used only in `self.predict`, for `self.kernel` in ('gaussian', 79 'tophat') and `self.type_pi = 'kde'`. Default is `None`. 80 81 kernel: str. 82 the kernel to use for kernel density estimation (used for predictive 83 simulation in `self.predict`, with `method='splitconformal'` and 84 `type_pi = 'kde'`). Currently, either 'gaussian' or 'tophat'. 85 86 type_split: str. 87 Type of splitting for conformal prediction. None (default), or 88 "random" (random split of data) or "sequential" (sequential split of data) 89 90 col_sample: float 91 percentage of covariates randomly chosen for training 92 93 row_sample: float 94 percentage of rows chosen for training, by stratified bootstrapping 95 96 level: float 97 confidence level for prediction intervals 98 99 pi_method: str 100 method for prediction intervals: 'splitconformal' or 'localconformal' 101 102 seed: int 103 reproducibility seed for nodes_sim=='uniform' 104 105 type_fit: str 106 'regression' 107 108 backend: str 109 "cpu" or "gpu" or "tpu" 110 111 Examples: 112 113 See [https://thierrymoudiki.github.io/blog/2024/03/18/python/conformal-and-bayesian-regression](https://thierrymoudiki.github.io/blog/2024/03/18/python/conformal-and-bayesian-regression) 114 115 """ 116 117 # construct the object ----- 118 119 def __init__( 120 self, 121 obj, 122 n_hidden_features=5, 123 activation_name="relu", 124 a=0.01, 125 nodes_sim="sobol", 126 bias=True, 127 dropout=0, 128 direct_link=True, 129 n_clusters=2, 130 cluster_encode=True, 131 type_clust="kmeans", 132 type_scaling=("std", "std", "std"), 133 type_pi=None, 134 replications=None, 135 kernel=None, 136 type_split=None, 137 col_sample=1, 138 row_sample=1, 139 level=None, 140 pi_method=None, 141 seed=123, 142 backend="cpu", 143 ): 144 super().__init__( 145 obj=obj, 146 n_hidden_features=n_hidden_features, 147 activation_name=activation_name, 148 a=a, 149 nodes_sim=nodes_sim, 150 bias=bias, 151 dropout=dropout, 152 direct_link=direct_link, 153 n_clusters=n_clusters, 154 cluster_encode=cluster_encode, 155 type_clust=type_clust, 156 type_scaling=type_scaling, 157 col_sample=col_sample, 158 row_sample=row_sample, 159 seed=seed, 160 backend=backend, 161 ) 162 163 self.type_fit = "regression" 164 self.type_pi = type_pi 165 self.replications = replications 166 self.kernel = kernel 167 self.type_split = type_split 168 self.level = level 169 self.pi_method = pi_method 170 self.coef_ = None 171 self.intercept_ = None 172 self.X_ = None 173 self.y_ = None 174 self.aic_ = None 175 self.aicc_ = None 176 self.bic_ = None 177 178 def fit(self, X, y, sample_weight=None, **kwargs): 179 """Fit custom model to training data (X, y). 180 181 Parameters: 182 183 X: {array-like}, shape = [n_samples, n_features] 184 Training vectors, where n_samples is the number 185 of samples and n_features is the number of features. 186 187 y: array-like, shape = [n_samples] 188 Target values. 189 190 sample_weight: array-like, shape = [n_samples] 191 Sample weights. 192 193 **kwargs: additional parameters to be passed to 194 self.cook_training_set or self.obj.fit 195 196 Returns: 197 198 self: object 199 200 """ 201 202 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 203 204 if self.level is not None: 205 self.obj = PredictionInterval( 206 obj=self.obj, method=self.pi_method, level=self.level 207 ) 208 209 # if sample_weights, else: (must use self.row_index) 210 if sample_weight is not None: 211 self.obj.fit( 212 scaled_Z, 213 centered_y, 214 sample_weight=sample_weight[self.index_row_].ravel(), 215 **kwargs 216 ) 217 218 return self 219 220 self.obj.fit(scaled_Z, centered_y, **kwargs) 221 222 self.X_ = X 223 224 self.y_ = y 225 226 # Compute SSE 227 centered_y_pred = self.obj.predict(scaled_Z) 228 self.sse_ = np.sum((centered_y - centered_y_pred) ** 2) 229 230 # Get number of parameters 231 n_params = self.n_hidden_features + X.shape[1] # hidden features + original features 232 if self.n_clusters > 0: 233 n_params += self.n_clusters # add clusters if used 234 235 # Compute information criteria 236 n_samples = X.shape[0] 237 temp = n_samples * np.log(self.sse_/n_samples) 238 self.aic_ = temp + 2 * n_params 239 self.bic_ = temp + np.log(n_samples) * n_params 240 241 if hasattr(self.obj, "coef_"): 242 self.coef_ = self.obj.coef_ 243 244 if hasattr(self.obj, "intercept_"): 245 self.intercept_ = self.obj.intercept_ 246 247 return self 248 249 def partial_fit(self, X, y, **kwargs): 250 """Partial fit custom model to training data (X, y). 251 252 Parameters: 253 254 X: {array-like}, shape = [n_samples, n_features] 255 Subset of training vectors, where n_samples is the number 256 of samples and n_features is the number of features. 257 258 y: array-like, shape = [n_samples] 259 Subset of target values. 260 261 **kwargs: additional parameters to be passed to 262 self.cook_training_set or self.obj.fit 263 264 Returns: 265 266 self: object 267 268 """ 269 270 if len(X.shape) == 1: 271 if isinstance(X, pd.DataFrame): 272 X = pd.DataFrame(X.values.reshape(1, -1), columns=X.columns) 273 else: 274 X = X.reshape(1, -1) 275 y = np.array([y]) 276 277 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 278 279 self.obj.partial_fit(scaled_Z, centered_y, **kwargs) 280 281 self.X_ = X 282 283 self.y_ = y 284 285 return self 286 287 def predict(self, X, level=95, method='splitconformal', **kwargs): 288 """Predict test data X. 289 290 Parameters: 291 292 X: {array-like}, shape = [n_samples, n_features] 293 Training vectors, where n_samples is the number 294 of samples and n_features is the number of features. 295 296 level: int 297 Level of confidence (default = 95) 298 299 method: str 300 'splitconformal', 'localconformal' 301 prediction (if you specify `return_pi = True`) 302 303 **kwargs: additional parameters 304 `return_pi = True` for conformal prediction, 305 with `method` in ('splitconformal', 'localconformal') 306 or `return_std = True` for `self.obj` in 307 (`sklearn.linear_model.BayesianRidge`, 308 `sklearn.linear_model.ARDRegressor`, 309 `sklearn.gaussian_process.GaussianProcessRegressor`)` 310 311 Returns: 312 313 model predictions: 314 an array if uncertainty quantification is not requested, 315 or a tuple if with prediction intervals and simulations 316 if `return_std = True` (mean, standard deviation, 317 lower and upper prediction interval) or `return_pi = True` 318 () 319 320 """ 321 322 if "return_std" in kwargs: 323 324 alpha = 100 - level 325 pi_multiplier = norm.ppf(1 - alpha / 200) 326 327 if len(X.shape) == 1: 328 329 n_features = X.shape[0] 330 new_X = mo.rbind( 331 X.reshape(1, n_features), 332 np.ones(n_features).reshape(1, n_features), 333 ) 334 335 mean_, std_ = self.obj.predict( 336 self.cook_test_set(new_X, **kwargs), return_std=True 337 )[0] 338 339 preds = self.y_mean_ + mean_ 340 lower = self.y_mean_ + (mean_ - pi_multiplier * std_) 341 upper = self.y_mean_ + (mean_ + pi_multiplier * std_) 342 343 DescribeResults = namedtuple( 344 "DescribeResults", ["mean", "std", "lower", "upper"] 345 ) 346 347 return DescribeResults(preds, std_, lower, upper) 348 349 # len(X.shape) > 1 350 mean_, std_ = self.obj.predict( 351 self.cook_test_set(X, **kwargs), return_std=True 352 ) 353 354 preds = self.y_mean_ + mean_ 355 lower = self.y_mean_ + (mean_ - pi_multiplier * std_) 356 upper = self.y_mean_ + (mean_ + pi_multiplier * std_) 357 358 DescribeResults = namedtuple( 359 "DescribeResults", ["mean", "std", "lower", "upper"] 360 ) 361 362 return DescribeResults(preds, std_, lower, upper) 363 364 if "return_pi" in kwargs: 365 assert method in ( 366 "splitconformal", 367 "localconformal", 368 ), "method must be in ('splitconformal', 'localconformal')" 369 self.pi = PredictionInterval( 370 obj=self, 371 method=method, 372 level=level, 373 type_pi=self.type_pi, 374 replications=self.replications, 375 kernel=self.kernel, 376 ) 377 378 if len(self.X_.shape) == 1: 379 if isinstance(X, pd.DataFrame): 380 self.X_ = pd.DataFrame( 381 self.X_.values.reshape(1, -1), columns=self.X_.columns 382 ) 383 else: 384 self.X_ = self.X_.reshape(1, -1) 385 self.y_ = np.array([self.y_]) 386 387 self.pi.fit(self.X_, self.y_) 388 # self.X_ = None # consumes memory to keep, dangerous to delete (side effect) 389 # self.y_ = None # consumes memory to keep, dangerous to delete (side effect) 390 preds = self.pi.predict(X, return_pi=True) 391 return preds 392 393 # "return_std" not in kwargs 394 if len(X.shape) == 1: 395 396 n_features = X.shape[0] 397 new_X = mo.rbind( 398 X.reshape(1, n_features), 399 np.ones(n_features).reshape(1, n_features), 400 ) 401 402 return ( 403 self.y_mean_ 404 + self.obj.predict(self.cook_test_set(new_X, **kwargs), **kwargs) 405 )[0] 406 407 # len(X.shape) > 1 408 return self.y_mean_ + self.obj.predict( 409 self.cook_test_set(X, **kwargs), **kwargs 410 ) 411 412 def score(self, X, y, scoring=None): 413 """Compute the score of the model. 414 415 Parameters: 416 417 X: {array-like}, shape = [n_samples, n_features] 418 Training vectors, where n_samples is the number 419 of samples and n_features is the number of features. 420 421 y: array-like, shape = [n_samples] 422 Target values. 423 424 scoring: str 425 scoring method 426 427 Returns: 428 429 score: float 430 431 """ 432 433 if scoring is None: 434 return np.sqrt(np.mean((self.predict(X) - y) ** 2)) 435 436 return skm2.get_scorer(scoring)(self, X, y)
Custom Regression model
This class is used to 'augment' any regression model with transformed features.
Parameters:
obj: object
any object containing a method fit (obj.fit()) and a method predict
(obj.predict())
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not
(False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original predictors are included (True) in model's
fitting or not (False)
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
type_pi: str.
type of prediction interval; currently `None` (split or local
conformal without simulation), "kde" or "bootstrap" (simulated split
conformal).
replications: int.
number of replications (if needed) for predictive simulation.
Used only in `self.predict`, for `self.kernel` in ('gaussian',
'tophat') and `self.type_pi = 'kde'`. Default is `None`.
kernel: str.
the kernel to use for kernel density estimation (used for predictive
simulation in `self.predict`, with `method='splitconformal'` and
`type_pi = 'kde'`). Currently, either 'gaussian' or 'tophat'.
type_split: str.
Type of splitting for conformal prediction. None (default), or
"random" (random split of data) or "sequential" (sequential split of data)
col_sample: float
percentage of covariates randomly chosen for training
row_sample: float
percentage of rows chosen for training, by stratified bootstrapping
level: float
confidence level for prediction intervals
pi_method: str
method for prediction intervals: 'splitconformal' or 'localconformal'
seed: int
reproducibility seed for nodes_sim=='uniform'
type_fit: str
'regression'
backend: str
"cpu" or "gpu" or "tpu"
Examples:
See https://thierrymoudiki.github.io/blog/2024/03/18/python/conformal-and-bayesian-regression
178 def fit(self, X, y, sample_weight=None, **kwargs): 179 """Fit custom model to training data (X, y). 180 181 Parameters: 182 183 X: {array-like}, shape = [n_samples, n_features] 184 Training vectors, where n_samples is the number 185 of samples and n_features is the number of features. 186 187 y: array-like, shape = [n_samples] 188 Target values. 189 190 sample_weight: array-like, shape = [n_samples] 191 Sample weights. 192 193 **kwargs: additional parameters to be passed to 194 self.cook_training_set or self.obj.fit 195 196 Returns: 197 198 self: object 199 200 """ 201 202 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 203 204 if self.level is not None: 205 self.obj = PredictionInterval( 206 obj=self.obj, method=self.pi_method, level=self.level 207 ) 208 209 # if sample_weights, else: (must use self.row_index) 210 if sample_weight is not None: 211 self.obj.fit( 212 scaled_Z, 213 centered_y, 214 sample_weight=sample_weight[self.index_row_].ravel(), 215 **kwargs 216 ) 217 218 return self 219 220 self.obj.fit(scaled_Z, centered_y, **kwargs) 221 222 self.X_ = X 223 224 self.y_ = y 225 226 # Compute SSE 227 centered_y_pred = self.obj.predict(scaled_Z) 228 self.sse_ = np.sum((centered_y - centered_y_pred) ** 2) 229 230 # Get number of parameters 231 n_params = self.n_hidden_features + X.shape[1] # hidden features + original features 232 if self.n_clusters > 0: 233 n_params += self.n_clusters # add clusters if used 234 235 # Compute information criteria 236 n_samples = X.shape[0] 237 temp = n_samples * np.log(self.sse_/n_samples) 238 self.aic_ = temp + 2 * n_params 239 self.bic_ = temp + np.log(n_samples) * n_params 240 241 if hasattr(self.obj, "coef_"): 242 self.coef_ = self.obj.coef_ 243 244 if hasattr(self.obj, "intercept_"): 245 self.intercept_ = self.obj.intercept_ 246 247 return self
Fit custom model to training data (X, y).
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
sample_weight: array-like, shape = [n_samples]
Sample weights.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
287 def predict(self, X, level=95, method='splitconformal', **kwargs): 288 """Predict test data X. 289 290 Parameters: 291 292 X: {array-like}, shape = [n_samples, n_features] 293 Training vectors, where n_samples is the number 294 of samples and n_features is the number of features. 295 296 level: int 297 Level of confidence (default = 95) 298 299 method: str 300 'splitconformal', 'localconformal' 301 prediction (if you specify `return_pi = True`) 302 303 **kwargs: additional parameters 304 `return_pi = True` for conformal prediction, 305 with `method` in ('splitconformal', 'localconformal') 306 or `return_std = True` for `self.obj` in 307 (`sklearn.linear_model.BayesianRidge`, 308 `sklearn.linear_model.ARDRegressor`, 309 `sklearn.gaussian_process.GaussianProcessRegressor`)` 310 311 Returns: 312 313 model predictions: 314 an array if uncertainty quantification is not requested, 315 or a tuple if with prediction intervals and simulations 316 if `return_std = True` (mean, standard deviation, 317 lower and upper prediction interval) or `return_pi = True` 318 () 319 320 """ 321 322 if "return_std" in kwargs: 323 324 alpha = 100 - level 325 pi_multiplier = norm.ppf(1 - alpha / 200) 326 327 if len(X.shape) == 1: 328 329 n_features = X.shape[0] 330 new_X = mo.rbind( 331 X.reshape(1, n_features), 332 np.ones(n_features).reshape(1, n_features), 333 ) 334 335 mean_, std_ = self.obj.predict( 336 self.cook_test_set(new_X, **kwargs), return_std=True 337 )[0] 338 339 preds = self.y_mean_ + mean_ 340 lower = self.y_mean_ + (mean_ - pi_multiplier * std_) 341 upper = self.y_mean_ + (mean_ + pi_multiplier * std_) 342 343 DescribeResults = namedtuple( 344 "DescribeResults", ["mean", "std", "lower", "upper"] 345 ) 346 347 return DescribeResults(preds, std_, lower, upper) 348 349 # len(X.shape) > 1 350 mean_, std_ = self.obj.predict( 351 self.cook_test_set(X, **kwargs), return_std=True 352 ) 353 354 preds = self.y_mean_ + mean_ 355 lower = self.y_mean_ + (mean_ - pi_multiplier * std_) 356 upper = self.y_mean_ + (mean_ + pi_multiplier * std_) 357 358 DescribeResults = namedtuple( 359 "DescribeResults", ["mean", "std", "lower", "upper"] 360 ) 361 362 return DescribeResults(preds, std_, lower, upper) 363 364 if "return_pi" in kwargs: 365 assert method in ( 366 "splitconformal", 367 "localconformal", 368 ), "method must be in ('splitconformal', 'localconformal')" 369 self.pi = PredictionInterval( 370 obj=self, 371 method=method, 372 level=level, 373 type_pi=self.type_pi, 374 replications=self.replications, 375 kernel=self.kernel, 376 ) 377 378 if len(self.X_.shape) == 1: 379 if isinstance(X, pd.DataFrame): 380 self.X_ = pd.DataFrame( 381 self.X_.values.reshape(1, -1), columns=self.X_.columns 382 ) 383 else: 384 self.X_ = self.X_.reshape(1, -1) 385 self.y_ = np.array([self.y_]) 386 387 self.pi.fit(self.X_, self.y_) 388 # self.X_ = None # consumes memory to keep, dangerous to delete (side effect) 389 # self.y_ = None # consumes memory to keep, dangerous to delete (side effect) 390 preds = self.pi.predict(X, return_pi=True) 391 return preds 392 393 # "return_std" not in kwargs 394 if len(X.shape) == 1: 395 396 n_features = X.shape[0] 397 new_X = mo.rbind( 398 X.reshape(1, n_features), 399 np.ones(n_features).reshape(1, n_features), 400 ) 401 402 return ( 403 self.y_mean_ 404 + self.obj.predict(self.cook_test_set(new_X, **kwargs), **kwargs) 405 )[0] 406 407 # len(X.shape) > 1 408 return self.y_mean_ + self.obj.predict( 409 self.cook_test_set(X, **kwargs), **kwargs 410 )
Predict test data X.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
level: int
Level of confidence (default = 95)
method: str
'splitconformal', 'localconformal'
prediction (if you specify `return_pi = True`)
**kwargs: additional parameters
`return_pi = True` for conformal prediction,
with `method` in ('splitconformal', 'localconformal')
or `return_std = True` for `self.obj` in
(`sklearn.linear_model.BayesianRidge`,
`sklearn.linear_model.ARDRegressor`,
`sklearn.gaussian_process.GaussianProcessRegressor`)`
Returns:
model predictions:
an array if uncertainty quantification is not requested,
or a tuple if with prediction intervals and simulations
if `return_std = True` (mean, standard deviation,
lower and upper prediction interval) or `return_pi = True`
()
412 def score(self, X, y, scoring=None): 413 """Compute the score of the model. 414 415 Parameters: 416 417 X: {array-like}, shape = [n_samples, n_features] 418 Training vectors, where n_samples is the number 419 of samples and n_features is the number of features. 420 421 y: array-like, shape = [n_samples] 422 Target values. 423 424 scoring: str 425 scoring method 426 427 Returns: 428 429 score: float 430 431 """ 432 433 if scoring is None: 434 return np.sqrt(np.mean((self.predict(X) - y) ** 2)) 435 436 return skm2.get_scorer(scoring)(self, X, y)
Compute the score of the model.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
scoring: str
scoring method
Returns:
score: float
19class CustomBackPropRegressor(Custom, RegressorMixin): 20 """ 21 Finite difference trainer for nnetsauce models. 22 23 Parameters 24 ---------- 25 26 base_model : str 27 The name of the base model (e.g., 'RidgeCV'). 28 29 type_grad : {'finitediff', 'autodiff'}, optional 30 Type of gradient computation to use (default='finitediff'). 31 32 lr : float, optional 33 Learning rate for optimization (default=1e-4). 34 35 optimizer : {'gd', 'sgd', 'adam', 'cd'}, optional 36 Optimization algorithm: gradient descent ('gd'), stochastic gradient descent ('sgd'), 37 Adam ('adam'), or coordinate descent ('cd'). Default is 'gd'. 38 39 eps : float, optional 40 Scaling factor for adaptive finite difference step size (default=1e-3). 41 42 batch_size : int, optional 43 Batch size for 'sgd' optimizer (default=32). 44 45 alpha : float, optional 46 Elastic net penalty strength (default=0.0). 47 48 l1_ratio : float, optional 49 Elastic net mixing parameter (0 = Ridge, 1 = Lasso, default=0.0). 50 51 type_loss : {'mse', 'quantile'}, optional 52 Type of loss function to use (default='mse'). 53 54 q : float, optional 55 Quantile for quantile loss (default=0.5). 56 57 **kwargs 58 Additional parameters to pass to the scikit-learn model. 59 60 """ 61 62 def __init__(self, base_model, 63 type_grad='finitediff', 64 lr=1e-4, optimizer='gd', 65 eps=1e-3, batch_size=32, 66 alpha=0.0, l1_ratio=0.0, 67 type_loss="mse", q=0.5, 68 backend='cpu', 69 **kwargs): 70 super().__init__(base_model, True, **kwargs) 71 self.base_model = base_model 72 self.custom_kwargs = kwargs 73 self.backend = backend 74 self.model = ns.CustomRegressor(self.base_model, 75 backend=self.backend, 76 **self.custom_kwargs) 77 assert isinstance(self.model, ns.CustomRegressor),\ 78 "'model' must be of class ns.CustomRegressor" 79 self.type_grad = type_grad 80 self.lr = lr 81 self.optimizer = optimizer 82 self.eps = eps 83 self.loss_history_ = [] 84 self.opt_state = None 85 self.batch_size = batch_size # for SGD 86 self.loss_history_ = [] 87 self._cd_index = 0 # For coordinate descent 88 self.alpha = alpha 89 self.l1_ratio = l1_ratio 90 self.type_loss = type_loss 91 self.q = q 92 93 def _loss(self, X, y, **kwargs): 94 """ 95 Compute the loss (with elastic net penalty) for the current model. 96 97 Parameters 98 ---------- 99 100 X : array-like of shape (n_samples, n_features) 101 Input data. 102 103 y : array-like of shape (n_samples,) 104 Target values. 105 106 **kwargs 107 Additional keyword arguments for loss calculation. 108 109 Returns 110 ------- 111 float 112 The computed loss value. 113 """ 114 y_pred = self.model.predict(X) 115 if self.type_loss == "mse": 116 loss = np.mean((y - y_pred) ** 2) 117 elif self.type_loss == "quantile": 118 loss = mean_pinball_loss(y, y_pred, alpha=self.q, **kwargs) 119 W = self.model.W_ 120 l1 = np.sum(np.abs(W)) 121 l2 = np.sum(W ** 2) 122 return loss + self.alpha * (self.l1_ratio * l1 + 0.5 * (1 - self.l1_ratio) * l2) 123 124 def _compute_grad(self, X, y): 125 """ 126 Compute the gradient of the loss with respect to W_ using finite differences. 127 128 Parameters 129 ---------- 130 131 X : array-like of shape (n_samples, n_features) 132 Input data. 133 134 y : array-like of shape (n_samples,) 135 Target values. 136 137 Returns 138 ------- 139 140 ndarray 141 Gradient array with the same shape as W_. 142 """ 143 if self.type_grad == 'autodiff': 144 raise NotImplementedError("Automatic differentiation is not implemented yet.") 145 # Use JAX for automatic differentiation 146 W = deepcopy(self.model.W_) 147 W_flat = W.flatten() 148 n_params = W_flat.size 149 150 def loss_fn(W_flat): 151 W_reshaped = W_flat.reshape(W.shape) 152 self.model.W_ = W_reshaped 153 return self._loss(X, y) 154 155 grad_fn = jax.grad(loss_fn) 156 grad_flat = grad_fn(W_flat) 157 grad = grad_flat.reshape(W.shape) 158 159 # Add elastic net gradient 160 l1_grad = self.alpha * self.l1_ratio * np.sign(W) 161 l2_grad = self.alpha * (1 - self.l1_ratio) * W 162 grad += l1_grad + l2_grad 163 164 self.model.W_ = W 165 return grad 166 167 # Finite difference gradient computation 168 W = deepcopy(self.model.W_) 169 shape = W.shape 170 W_flat = W.flatten() 171 n_params = W_flat.size 172 173 # Adaptive finite difference step 174 h_vec = self.eps * np.maximum(1.0, np.abs(W_flat)) 175 eye = np.eye(n_params) 176 177 loss_plus = np.zeros(n_params) 178 loss_minus = np.zeros(n_params) 179 180 for i in range(n_params): 181 h_i = h_vec[i] 182 Wp = W_flat.copy(); Wp[i] += h_i 183 Wm = W_flat.copy(); Wm[i] -= h_i 184 185 self.model.W_ = Wp.reshape(shape) 186 loss_plus[i] = self._loss(X, y) 187 188 self.model.W_ = Wm.reshape(shape) 189 loss_minus[i] = self._loss(X, y) 190 191 grad = ((loss_plus - loss_minus) / (2 * h_vec)).reshape(shape) 192 193 # Add elastic net gradient 194 l1_grad = self.alpha * self.l1_ratio * np.sign(W) 195 l2_grad = self.alpha * (1 - self.l1_ratio) * W 196 grad += l1_grad + l2_grad 197 198 self.model.W_ = W # restore original 199 return grad 200 201 def fit(self, X, y, epochs=10, verbose=True, show_progress=True, sample_weight=None, **kwargs): 202 """ 203 Fit the model using finite difference optimization. 204 205 Parameters 206 ---------- 207 208 X : array-like of shape (n_samples, n_features) 209 Training data. 210 211 y : array-like of shape (n_samples,) 212 Target values. 213 214 epochs : int, optional 215 Number of optimization steps (default=10). 216 217 verbose : bool, optional 218 Whether to print progress messages (default=True). 219 220 show_progress : bool, optional 221 Whether to show tqdm progress bar (default=True). 222 223 sample_weight : array-like, optional 224 Sample weights. 225 226 **kwargs 227 Additional keyword arguments. 228 229 Returns 230 ------- 231 232 self : object 233 Returns self. 234 """ 235 236 self.model.fit(X, y) 237 238 iterator = tqdm(range(epochs)) if show_progress else range(epochs) 239 240 for epoch in iterator: 241 grad = self._compute_grad(X, y) 242 243 if self.optimizer == 'gd': 244 self.model.W_ -= self.lr * grad 245 self.model.W_ = np.clip(self.model.W_, 0, 1) 246 #print("self.model.W_", self.model.W_) 247 248 elif self.optimizer == 'sgd': 249 # Sample a mini-batch for stochastic gradient 250 n_samples = X.shape[0] 251 idxs = np.random.choice(n_samples, self.batch_size, replace=False) 252 if isinstance(X, pd.DataFrame): 253 X_batch = X.iloc[idxs,:] 254 else: 255 X_batch = X[idxs,:] 256 y_batch = y[idxs] 257 grad = self._compute_grad(X_batch, y_batch) 258 259 self.model.W_ -= self.lr * grad 260 self.model.W_ = np.clip(self.model.W_, 0, 1) 261 262 elif self.optimizer == 'adam': 263 if self.opt_state is None: 264 self.opt_state = {'m': np.zeros_like(grad), 'v': np.zeros_like(grad), 't': 0} 265 beta1, beta2, eps = 0.9, 0.999, 1e-8 266 self.opt_state['t'] += 1 267 self.opt_state['m'] = beta1 * self.opt_state['m'] + (1 - beta1) * grad 268 self.opt_state['v'] = beta2 * self.opt_state['v'] + (1 - beta2) * (grad ** 2) 269 m_hat = self.opt_state['m'] / (1 - beta1 ** self.opt_state['t']) 270 v_hat = self.opt_state['v'] / (1 - beta2 ** self.opt_state['t']) 271 272 self.model.W_ -= self.lr * m_hat / (np.sqrt(v_hat) + eps) 273 self.model.W_ = np.clip(self.model.W_, 0, 1) 274 #print("self.model.W_", self.model.W_) 275 276 elif self.optimizer == 'cd': # coordinate descent 277 278 W_shape = self.model.W_.shape 279 W_flat_size = self.model.W_.size 280 W_flat = self.model.W_.flatten() 281 grad_flat = grad.flatten() 282 283 # Update only one coordinate per epoch (cyclic) 284 idx = self._cd_index % W_flat_size 285 W_flat[idx] -= self.lr * grad_flat[idx] 286 # Clip the updated value 287 W_flat[idx] = np.clip(W_flat[idx], 0, 1) 288 289 # Restore W_ 290 self.model.W_ = W_flat.reshape(W_shape) 291 292 self._cd_index += 1 293 294 else: 295 raise ValueError(f"Unsupported optimizer: {self.optimizer}") 296 297 loss = self._loss(X, y) 298 self.loss_history_.append(loss) 299 300 if verbose: 301 print(f"Epoch {epoch+1}: Loss = {loss:.6f}") 302 303 # if sample_weights, else: (must use self.row_index) 304 if sample_weight in kwargs: 305 self.model.fit( 306 X, 307 y, 308 sample_weight=sample_weight[self.index_row_].ravel(), 309 **kwargs 310 ) 311 312 return self 313 314 return self 315 316 317 def predict(self, X, level=95, method='splitconformal', **kwargs): 318 """ 319 Predict using the trained model. 320 321 Parameters 322 ---------- 323 324 X : array-like of shape (n_samples, n_features) 325 Input data. 326 327 level : int, optional 328 Level of confidence for prediction intervals (default=95). 329 330 method : {'splitconformal', 'localconformal'}, optional 331 Method for conformal prediction (default='splitconformal'). 332 333 **kwargs 334 Additional keyword arguments. Use `return_pi=True` for prediction intervals, 335 or `return_std=True` for standard deviation estimates. 336 337 Returns 338 ------- 339 340 array or tuple 341 Model predictions, or a tuple with prediction intervals or standard deviations if requested. 342 """ 343 if "return_std" in kwargs: 344 345 alpha = 100 - level 346 pi_multiplier = norm.ppf(1 - alpha / 200) 347 348 if len(X.shape) == 1: 349 350 n_features = X.shape[0] 351 new_X = mo.rbind( 352 X.reshape(1, n_features), 353 np.ones(n_features).reshape(1, n_features), 354 ) 355 356 mean_, std_ = self.model.predict( 357 new_X, return_std=True 358 )[0] 359 360 preds = mean_ 361 lower = (mean_ - pi_multiplier * std_) 362 upper = (mean_ + pi_multiplier * std_) 363 364 DescribeResults = namedtuple( 365 "DescribeResults", ["mean", "std", "lower", "upper"] 366 ) 367 368 return DescribeResults(preds, std_, lower, upper) 369 370 # len(X.shape) > 1 371 mean_, std_ = self.model.predict( 372 X, return_std=True 373 ) 374 375 preds = mean_ 376 lower = (mean_ - pi_multiplier * std_) 377 upper = (mean_ + pi_multiplier * std_) 378 379 DescribeResults = namedtuple( 380 "DescribeResults", ["mean", "std", "lower", "upper"] 381 ) 382 383 return DescribeResults(preds, std_, lower, upper) 384 385 if "return_pi" in kwargs: 386 assert method in ( 387 "splitconformal", 388 "localconformal", 389 ), "method must be in ('splitconformal', 'localconformal')" 390 self.pi = ns.PredictionInterval( 391 obj=self, 392 method=method, 393 level=level, 394 type_pi=self.type_pi, 395 replications=self.replications, 396 kernel=self.kernel, 397 ) 398 399 if len(self.X_.shape) == 1: 400 if isinstance(X, pd.DataFrame): 401 self.X_ = pd.DataFrame( 402 self.X_.values.reshape(1, -1), columns=self.X_.columns 403 ) 404 else: 405 self.X_ = self.X_.reshape(1, -1) 406 self.y_ = np.array([self.y_]) 407 408 self.pi.fit(self.X_, self.y_) 409 # self.X_ = None # consumes memory to keep, dangerous to delete (side effect) 410 # self.y_ = None # consumes memory to keep, dangerous to delete (side effect) 411 preds = self.pi.predict(X, return_pi=True) 412 return preds 413 414 # "return_std" not in kwargs 415 if len(X.shape) == 1: 416 417 n_features = X.shape[0] 418 new_X = mo.rbind( 419 X.reshape(1, n_features), 420 np.ones(n_features).reshape(1, n_features), 421 ) 422 423 return ( 424 0 425 + self.model.predict(new_X, **kwargs) 426 )[0] 427 428 # len(X.shape) > 1 429 return self.model.predict( 430 X, **kwargs 431 )
Finite difference trainer for nnetsauce models.
Parameters
base_model : str The name of the base model (e.g., 'RidgeCV').
type_grad : {'finitediff', 'autodiff'}, optional Type of gradient computation to use (default='finitediff').
lr : float, optional Learning rate for optimization (default=1e-4).
optimizer : {'gd', 'sgd', 'adam', 'cd'}, optional Optimization algorithm: gradient descent ('gd'), stochastic gradient descent ('sgd'), Adam ('adam'), or coordinate descent ('cd'). Default is 'gd'.
eps : float, optional Scaling factor for adaptive finite difference step size (default=1e-3).
batch_size : int, optional Batch size for 'sgd' optimizer (default=32).
alpha : float, optional Elastic net penalty strength (default=0.0).
l1_ratio : float, optional Elastic net mixing parameter (0 = Ridge, 1 = Lasso, default=0.0).
type_loss : {'mse', 'quantile'}, optional Type of loss function to use (default='mse').
q : float, optional Quantile for quantile loss (default=0.5).
**kwargs Additional parameters to pass to the scikit-learn model.
201 def fit(self, X, y, epochs=10, verbose=True, show_progress=True, sample_weight=None, **kwargs): 202 """ 203 Fit the model using finite difference optimization. 204 205 Parameters 206 ---------- 207 208 X : array-like of shape (n_samples, n_features) 209 Training data. 210 211 y : array-like of shape (n_samples,) 212 Target values. 213 214 epochs : int, optional 215 Number of optimization steps (default=10). 216 217 verbose : bool, optional 218 Whether to print progress messages (default=True). 219 220 show_progress : bool, optional 221 Whether to show tqdm progress bar (default=True). 222 223 sample_weight : array-like, optional 224 Sample weights. 225 226 **kwargs 227 Additional keyword arguments. 228 229 Returns 230 ------- 231 232 self : object 233 Returns self. 234 """ 235 236 self.model.fit(X, y) 237 238 iterator = tqdm(range(epochs)) if show_progress else range(epochs) 239 240 for epoch in iterator: 241 grad = self._compute_grad(X, y) 242 243 if self.optimizer == 'gd': 244 self.model.W_ -= self.lr * grad 245 self.model.W_ = np.clip(self.model.W_, 0, 1) 246 #print("self.model.W_", self.model.W_) 247 248 elif self.optimizer == 'sgd': 249 # Sample a mini-batch for stochastic gradient 250 n_samples = X.shape[0] 251 idxs = np.random.choice(n_samples, self.batch_size, replace=False) 252 if isinstance(X, pd.DataFrame): 253 X_batch = X.iloc[idxs,:] 254 else: 255 X_batch = X[idxs,:] 256 y_batch = y[idxs] 257 grad = self._compute_grad(X_batch, y_batch) 258 259 self.model.W_ -= self.lr * grad 260 self.model.W_ = np.clip(self.model.W_, 0, 1) 261 262 elif self.optimizer == 'adam': 263 if self.opt_state is None: 264 self.opt_state = {'m': np.zeros_like(grad), 'v': np.zeros_like(grad), 't': 0} 265 beta1, beta2, eps = 0.9, 0.999, 1e-8 266 self.opt_state['t'] += 1 267 self.opt_state['m'] = beta1 * self.opt_state['m'] + (1 - beta1) * grad 268 self.opt_state['v'] = beta2 * self.opt_state['v'] + (1 - beta2) * (grad ** 2) 269 m_hat = self.opt_state['m'] / (1 - beta1 ** self.opt_state['t']) 270 v_hat = self.opt_state['v'] / (1 - beta2 ** self.opt_state['t']) 271 272 self.model.W_ -= self.lr * m_hat / (np.sqrt(v_hat) + eps) 273 self.model.W_ = np.clip(self.model.W_, 0, 1) 274 #print("self.model.W_", self.model.W_) 275 276 elif self.optimizer == 'cd': # coordinate descent 277 278 W_shape = self.model.W_.shape 279 W_flat_size = self.model.W_.size 280 W_flat = self.model.W_.flatten() 281 grad_flat = grad.flatten() 282 283 # Update only one coordinate per epoch (cyclic) 284 idx = self._cd_index % W_flat_size 285 W_flat[idx] -= self.lr * grad_flat[idx] 286 # Clip the updated value 287 W_flat[idx] = np.clip(W_flat[idx], 0, 1) 288 289 # Restore W_ 290 self.model.W_ = W_flat.reshape(W_shape) 291 292 self._cd_index += 1 293 294 else: 295 raise ValueError(f"Unsupported optimizer: {self.optimizer}") 296 297 loss = self._loss(X, y) 298 self.loss_history_.append(loss) 299 300 if verbose: 301 print(f"Epoch {epoch+1}: Loss = {loss:.6f}") 302 303 # if sample_weights, else: (must use self.row_index) 304 if sample_weight in kwargs: 305 self.model.fit( 306 X, 307 y, 308 sample_weight=sample_weight[self.index_row_].ravel(), 309 **kwargs 310 ) 311 312 return self 313 314 return self
Fit the model using finite difference optimization.
Parameters
X : array-like of shape (n_samples, n_features) Training data.
y : array-like of shape (n_samples,) Target values.
epochs : int, optional Number of optimization steps (default=10).
verbose : bool, optional Whether to print progress messages (default=True).
show_progress : bool, optional Whether to show tqdm progress bar (default=True).
sample_weight : array-like, optional Sample weights.
**kwargs Additional keyword arguments.
Returns
self : object Returns self.
317 def predict(self, X, level=95, method='splitconformal', **kwargs): 318 """ 319 Predict using the trained model. 320 321 Parameters 322 ---------- 323 324 X : array-like of shape (n_samples, n_features) 325 Input data. 326 327 level : int, optional 328 Level of confidence for prediction intervals (default=95). 329 330 method : {'splitconformal', 'localconformal'}, optional 331 Method for conformal prediction (default='splitconformal'). 332 333 **kwargs 334 Additional keyword arguments. Use `return_pi=True` for prediction intervals, 335 or `return_std=True` for standard deviation estimates. 336 337 Returns 338 ------- 339 340 array or tuple 341 Model predictions, or a tuple with prediction intervals or standard deviations if requested. 342 """ 343 if "return_std" in kwargs: 344 345 alpha = 100 - level 346 pi_multiplier = norm.ppf(1 - alpha / 200) 347 348 if len(X.shape) == 1: 349 350 n_features = X.shape[0] 351 new_X = mo.rbind( 352 X.reshape(1, n_features), 353 np.ones(n_features).reshape(1, n_features), 354 ) 355 356 mean_, std_ = self.model.predict( 357 new_X, return_std=True 358 )[0] 359 360 preds = mean_ 361 lower = (mean_ - pi_multiplier * std_) 362 upper = (mean_ + pi_multiplier * std_) 363 364 DescribeResults = namedtuple( 365 "DescribeResults", ["mean", "std", "lower", "upper"] 366 ) 367 368 return DescribeResults(preds, std_, lower, upper) 369 370 # len(X.shape) > 1 371 mean_, std_ = self.model.predict( 372 X, return_std=True 373 ) 374 375 preds = mean_ 376 lower = (mean_ - pi_multiplier * std_) 377 upper = (mean_ + pi_multiplier * std_) 378 379 DescribeResults = namedtuple( 380 "DescribeResults", ["mean", "std", "lower", "upper"] 381 ) 382 383 return DescribeResults(preds, std_, lower, upper) 384 385 if "return_pi" in kwargs: 386 assert method in ( 387 "splitconformal", 388 "localconformal", 389 ), "method must be in ('splitconformal', 'localconformal')" 390 self.pi = ns.PredictionInterval( 391 obj=self, 392 method=method, 393 level=level, 394 type_pi=self.type_pi, 395 replications=self.replications, 396 kernel=self.kernel, 397 ) 398 399 if len(self.X_.shape) == 1: 400 if isinstance(X, pd.DataFrame): 401 self.X_ = pd.DataFrame( 402 self.X_.values.reshape(1, -1), columns=self.X_.columns 403 ) 404 else: 405 self.X_ = self.X_.reshape(1, -1) 406 self.y_ = np.array([self.y_]) 407 408 self.pi.fit(self.X_, self.y_) 409 # self.X_ = None # consumes memory to keep, dangerous to delete (side effect) 410 # self.y_ = None # consumes memory to keep, dangerous to delete (side effect) 411 preds = self.pi.predict(X, return_pi=True) 412 return preds 413 414 # "return_std" not in kwargs 415 if len(X.shape) == 1: 416 417 n_features = X.shape[0] 418 new_X = mo.rbind( 419 X.reshape(1, n_features), 420 np.ones(n_features).reshape(1, n_features), 421 ) 422 423 return ( 424 0 425 + self.model.predict(new_X, **kwargs) 426 )[0] 427 428 # len(X.shape) > 1 429 return self.model.predict( 430 X, **kwargs 431 )
Predict using the trained model.
Parameters
X : array-like of shape (n_samples, n_features) Input data.
level : int, optional Level of confidence for prediction intervals (default=95).
method : {'splitconformal', 'localconformal'}, optional Method for conformal prediction (default='splitconformal').
**kwargs
Additional keyword arguments. Use return_pi=True
for prediction intervals,
or return_std=True
for standard deviation estimates.
Returns
array or tuple Model predictions, or a tuple with prediction intervals or standard deviations if requested.
35class DeepClassifier(CustomClassifier, ClassifierMixin): 36 """ 37 Deep Classifier 38 39 Parameters: 40 41 obj: an object 42 A base learner, see also https://www.researchgate.net/publication/380701207_Deep_Quasi-Randomized_neural_Networks_for_classification 43 44 n_layers: int (default=3) 45 Number of layers. `n_layers = 1` is a simple `CustomClassifier` 46 47 verbose : int, optional (default=0) 48 Monitor progress when fitting. 49 50 All the other parameters are nnetsauce `CustomClassifier`'s 51 52 Examples: 53 54 ```python 55 import nnetsauce as ns 56 from sklearn.datasets import load_breast_cancer 57 from sklearn.model_selection import train_test_split 58 from sklearn.linear_model import LogisticRegressionCV 59 data = load_breast_cancer() 60 X = data.data 61 y= data.target 62 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=123) 63 obj = LogisticRegressionCV() 64 clf = ns.DeepClassifier(obj) 65 clf.fit(X_train, y_train) 66 print(clf.score(clf.predict(X_test), y_test)) 67 ``` 68 """ 69 _estimator_type = "classifier" 70 71 def __init__( 72 self, 73 obj, 74 # Defining depth 75 n_layers=3, 76 verbose=0, 77 # CustomClassifier attributes 78 n_hidden_features=5, 79 activation_name="relu", 80 a=0.01, 81 nodes_sim="sobol", 82 bias=True, 83 dropout=0, 84 direct_link=True, 85 n_clusters=2, 86 cluster_encode=True, 87 type_clust="kmeans", 88 type_scaling=("std", "std", "std"), 89 col_sample=1, 90 row_sample=1, 91 cv_calibration=2, 92 calibration_method="sigmoid", 93 seed=123, 94 backend="cpu", 95 ): 96 super().__init__( 97 obj=obj, 98 n_hidden_features=n_hidden_features, 99 activation_name=activation_name, 100 a=a, 101 nodes_sim=nodes_sim, 102 bias=bias, 103 dropout=dropout, 104 direct_link=direct_link, 105 n_clusters=n_clusters, 106 cluster_encode=cluster_encode, 107 type_clust=type_clust, 108 type_scaling=type_scaling, 109 col_sample=col_sample, 110 row_sample=row_sample, 111 seed=seed, 112 backend=backend, 113 ) 114 self.coef_ = None 115 self.intercept_ = None 116 self.type_fit = "classification" 117 self.cv_calibration = cv_calibration 118 self.calibration_method = calibration_method 119 120 # Only wrap in CalibratedClassifierCV if not already wrapped 121 # if not isinstance(obj, CalibratedClassifierCV): 122 # self.obj = CalibratedClassifierCV( 123 # self.obj, 124 # cv=self.cv_calibration, 125 # method=self.calibration_method 126 # ) 127 # else: 128 self.coef_ = None 129 self.intercept_ = None 130 self.type_fit = "classification" 131 self.cv_calibration = cv_calibration 132 self.calibration_method = calibration_method 133 self.obj = obj 134 135 assert n_layers >= 1, "must have n_layers >= 1" 136 self.stacked_obj = obj 137 self.verbose = verbose 138 self.n_layers = n_layers 139 self.classes_ = None 140 self.n_classes_ = None 141 142 def fit(self, X, y, **kwargs): 143 """Fit Classification algorithms to X and y. 144 Parameters 145 ---------- 146 X : array-like, 147 Training vectors, where rows is the number of samples 148 and columns is the number of features. 149 y : array-like, 150 Training vectors, where rows is the number of samples 151 and columns is the number of features. 152 **kwargs: dict 153 Additional parameters to be passed to the fit method 154 of the base learner. For example, `sample_weight`. 155 156 Returns 157 ------- 158 A fitted object 159 """ 160 161 self.classes_ = np.unique(y) 162 self.n_classes_ = len( 163 self.classes_ 164 ) # for compatibility with scikit-learn 165 166 if isinstance(X, np.ndarray): 167 X = pd.DataFrame(X) 168 169 # init layer 170 self.stacked_obj = CustomClassifier( 171 obj=self.stacked_obj, 172 n_hidden_features=self.n_hidden_features, 173 activation_name=self.activation_name, 174 a=self.a, 175 nodes_sim=self.nodes_sim, 176 bias=self.bias, 177 dropout=self.dropout, 178 direct_link=self.direct_link, 179 n_clusters=self.n_clusters, 180 cluster_encode=self.cluster_encode, 181 type_clust=self.type_clust, 182 type_scaling=self.type_scaling, 183 col_sample=self.col_sample, 184 row_sample=self.row_sample, 185 cv_calibration=None, 186 calibration_method=None, 187 seed=self.seed, 188 backend=self.backend, 189 ) 190 191 if self.verbose > 0: 192 iterator = tqdm(range(self.n_layers - 1)) 193 else: 194 iterator = range(self.n_layers - 1) 195 196 for _ in iterator: 197 self.stacked_obj = deepcopy( 198 CustomClassifier( 199 obj=self.stacked_obj, 200 n_hidden_features=self.n_hidden_features, 201 activation_name=self.activation_name, 202 a=self.a, 203 nodes_sim=self.nodes_sim, 204 bias=self.bias, 205 dropout=self.dropout, 206 direct_link=self.direct_link, 207 n_clusters=self.n_clusters, 208 cluster_encode=self.cluster_encode, 209 type_clust=self.type_clust, 210 type_scaling=self.type_scaling, 211 col_sample=self.col_sample, 212 row_sample=self.row_sample, 213 cv_calibration=None, 214 calibration_method=None, 215 seed=self.seed, 216 backend=self.backend, 217 ) 218 ) 219 self.stacked_obj.fit(X, y, **kwargs) 220 221 return self 222 223 def partial_fit(self, X, y, **kwargs): 224 """Fit Regression algorithms to X and y. 225 Parameters 226 ---------- 227 X : array-like, 228 Training vectors, where rows is the number of samples 229 and columns is the number of features. 230 y : array-like, 231 Training vectors, where rows is the number of samples 232 and columns is the number of features. 233 **kwargs: dict 234 Additional parameters to be passed to the fit method 235 of the base learner. For example, `sample_weight`. 236 Returns 237 ------- 238 A fitted object 239 """ 240 assert hasattr(self, "stacked_obj"), "model must be fitted first" 241 current_obj = self.stacked_obj 242 for _ in range(self.n_layers): 243 try: 244 input_X = current_obj.obj.cook_test_set(X) 245 current_obj.obj.partial_fit(input_X, y, **kwargs) 246 try: 247 current_obj = current_obj.obj 248 except AttributeError: 249 pass 250 except ValueError: 251 pass 252 return self 253 254 def predict(self, X): 255 return self.stacked_obj.predict(X) 256 257 def predict_proba(self, X): 258 return self.stacked_obj.predict_proba(X) 259 260 def score(self, X, y, scoring=None): 261 return self.stacked_obj.score(X, y, scoring) 262 263 def cross_val_optim( 264 self, 265 X_train, 266 y_train, 267 X_test=None, 268 y_test=None, 269 scoring="accuracy", 270 surrogate_obj=None, 271 cv=5, 272 n_jobs=None, 273 n_init=10, 274 n_iter=190, 275 abs_tol=1e-3, 276 verbose=2, 277 seed=123, 278 **kwargs, 279 ): 280 """Cross-validation function and hyperparameters' search 281 282 Parameters: 283 284 X_train: array-like, 285 Training vectors, where rows is the number of samples 286 and columns is the number of features. 287 288 y_train: array-like, 289 Training vectors, where rows is the number of samples 290 and columns is the number of features. 291 292 X_test: array-like, 293 Testing vectors, where rows is the number of samples 294 and columns is the number of features. 295 296 y_test: array-like, 297 Testing vectors, where rows is the number of samples 298 and columns is the number of features. 299 300 scoring: str 301 scoring metric; see https://scikit-learn.org/stable/modules/model_evaluation.html#the-scoring-parameter-defining-model-evaluation-rules 302 303 surrogate_obj: an object; 304 An ML model for estimating the uncertainty around the objective function 305 306 cv: int; 307 number of cross-validation folds 308 309 n_jobs: int; 310 number of jobs for parallel execution 311 312 n_init: an integer; 313 number of points in the initial setting, when `x_init` and `y_init` are not provided 314 315 n_iter: an integer; 316 number of iterations of the minimization algorithm 317 318 abs_tol: a float; 319 tolerance for convergence of the optimizer (early stopping based on acquisition function) 320 321 verbose: int 322 controls verbosity 323 324 seed: int 325 reproducibility seed 326 327 **kwargs: dict 328 additional parameters to be passed to the estimator 329 330 Examples: 331 332 ```python 333 ``` 334 """ 335 336 num_to_activation_name = {1: "relu", 2: "sigmoid", 3: "tanh"} 337 num_to_nodes_sim = {1: "sobol", 2: "uniform", 3: "hammersley"} 338 num_to_type_clust = {1: "kmeans", 2: "gmm"} 339 340 def deepclassifier_cv( 341 X_train, 342 y_train, 343 # Defining depth 344 n_layers=3, 345 # CustomClassifier attributes 346 n_hidden_features=5, 347 activation_name="relu", 348 nodes_sim="sobol", 349 dropout=0, 350 n_clusters=2, 351 type_clust="kmeans", 352 cv=5, 353 n_jobs=None, 354 scoring="accuracy", 355 seed=123, 356 ): 357 self.set_params( 358 **{ 359 "n_layers": n_layers, 360 # CustomClassifier attributes 361 "n_hidden_features": n_hidden_features, 362 "activation_name": activation_name, 363 "nodes_sim": nodes_sim, 364 "dropout": dropout, 365 "n_clusters": n_clusters, 366 "type_clust": type_clust, 367 **kwargs, 368 } 369 ) 370 return -cross_val_score( 371 estimator=self, 372 X=X_train, 373 y=y_train, 374 scoring=scoring, 375 cv=cv, 376 n_jobs=n_jobs, 377 verbose=0, 378 ).mean() 379 380 # objective function for hyperparams tuning 381 def crossval_objective(xx): 382 return deepclassifier_cv( 383 X_train=X_train, 384 y_train=y_train, 385 # Defining depth 386 n_layers=int(np.ceil(xx[0])), 387 # CustomClassifier attributes 388 n_hidden_features=int(np.ceil(xx[1])), 389 activation_name=num_to_activation_name[np.ceil(xx[2])], 390 nodes_sim=num_to_nodes_sim[int(np.ceil(xx[3]))], 391 dropout=xx[4], 392 n_clusters=int(np.ceil(xx[5])), 393 type_clust=num_to_type_clust[int(np.ceil(xx[6]))], 394 cv=cv, 395 n_jobs=n_jobs, 396 scoring=scoring, 397 seed=seed, 398 ) 399 400 if surrogate_obj is None: 401 gp_opt = gp.GPOpt( 402 objective_func=crossval_objective, 403 lower_bound=np.array([0, 3, 0, 0, 0.0, 0, 0]), 404 upper_bound=np.array([5, 100, 3, 3, 0.4, 5, 2]), 405 params_names=[ 406 "n_layers", 407 # CustomClassifier attributes 408 "n_hidden_features", 409 "activation_name", 410 "nodes_sim", 411 "dropout", 412 "n_clusters", 413 "type_clust", 414 ], 415 method="bayesian", 416 n_init=n_init, 417 n_iter=n_iter, 418 seed=seed, 419 ) 420 else: 421 gp_opt = gp.GPOpt( 422 objective_func=crossval_objective, 423 lower_bound=np.array([0, 3, 0, 0, 0.0, 0, 0]), 424 upper_bound=np.array([5, 100, 3, 3, 0.4, 5, 2]), 425 params_names=[ 426 "n_layers", 427 # CustomClassifier attributes 428 "n_hidden_features", 429 "activation_name", 430 "nodes_sim", 431 "dropout", 432 "n_clusters", 433 "type_clust", 434 ], 435 acquisition="ucb", 436 method="splitconformal", 437 surrogate_obj=ns.PredictionInterval( 438 obj=surrogate_obj, method="splitconformal" 439 ), 440 n_init=n_init, 441 n_iter=n_iter, 442 seed=seed, 443 ) 444 445 res = gp_opt.optimize(verbose=verbose, abs_tol=abs_tol) 446 res.best_params["n_layers"] = int(np.ceil(res.best_params["n_layers"])) 447 res.best_params["n_hidden_features"] = int( 448 np.ceil(res.best_params["n_hidden_features"]) 449 ) 450 res.best_params["activation_name"] = num_to_activation_name[ 451 np.ceil(res.best_params["activation_name"]) 452 ] 453 res.best_params["nodes_sim"] = num_to_nodes_sim[ 454 int(np.ceil(res.best_params["nodes_sim"])) 455 ] 456 res.best_params["dropout"] = res.best_params["dropout"] 457 res.best_params["n_clusters"] = int(np.ceil(res.best_params["n_clusters"])) 458 res.best_params["type_clust"] = num_to_type_clust[ 459 int(np.ceil(res.best_params["type_clust"])) 460 ] 461 462 # out-of-sample error 463 if X_test is not None and y_test is not None: 464 self.set_params(**res.best_params, verbose=0, seed=seed) 465 preds = self.fit(X_train, y_train).predict(X_test) 466 # check error on y_test 467 oos_err = getattr(metrics, scoring + "_score")(y_true=y_test, y_pred=preds) 468 result = namedtuple("result", res._fields + ("test_" + scoring,)) 469 return result(*res, oos_err) 470 else: 471 return res 472 473 def lazy_cross_val_optim( 474 self, 475 X_train, 476 y_train, 477 X_test=None, 478 y_test=None, 479 scoring="accuracy", 480 surrogate_objs=None, 481 customize=False, 482 cv=5, 483 n_jobs=None, 484 n_init=10, 485 n_iter=190, 486 abs_tol=1e-3, 487 verbose=1, 488 seed=123, 489 ): 490 """Automated Cross-validation function and hyperparameters' search using multiple surrogates 491 492 Parameters: 493 494 X_train: array-like, 495 Training vectors, where rows is the number of samples 496 and columns is the number of features. 497 498 y_train: array-like, 499 Training vectors, where rows is the number of samples 500 and columns is the number of features. 501 502 X_test: array-like, 503 Testing vectors, where rows is the number of samples 504 and columns is the number of features. 505 506 y_test: array-like, 507 Testing vectors, where rows is the number of samples 508 and columns is the number of features. 509 510 scoring: str 511 scoring metric; see https://scikit-learn.org/stable/modules/model_evaluation.html#the-scoring-parameter-defining-model-evaluation-rules 512 513 surrogate_objs: object names as a list of strings; 514 ML models for estimating the uncertainty around the objective function 515 516 customize: boolean 517 if True, the surrogate is transformed into a quasi-randomized network (default is False) 518 519 cv: int; 520 number of cross-validation folds 521 522 n_jobs: int; 523 number of jobs for parallel execution 524 525 n_init: an integer; 526 number of points in the initial setting, when `x_init` and `y_init` are not provided 527 528 n_iter: an integer; 529 number of iterations of the minimization algorithm 530 531 abs_tol: a float; 532 tolerance for convergence of the optimizer (early stopping based on acquisition function) 533 534 verbose: int 535 controls verbosity 536 537 seed: int 538 reproducibility seed 539 540 Examples: 541 542 ```python 543 ``` 544 """ 545 546 removed_regressors = [ 547 "TheilSenRegressor", 548 "ARDRegression", 549 "CCA", 550 "GaussianProcessRegressor", 551 "GradientBoostingRegressor", 552 "HistGradientBoostingRegressor", 553 "IsotonicRegression", 554 "MultiOutputRegressor", 555 "MultiTaskElasticNet", 556 "MultiTaskElasticNetCV", 557 "MultiTaskLasso", 558 "MultiTaskLassoCV", 559 "OrthogonalMatchingPursuit", 560 "OrthogonalMatchingPursuitCV", 561 "PLSCanonical", 562 "PLSRegression", 563 "RadiusNeighborsRegressor", 564 "RegressorChain", 565 "StackingRegressor", 566 "VotingRegressor", 567 ] 568 569 results = [] 570 571 for est in all_estimators(): 572 573 if surrogate_objs is None: 574 575 if issubclass(est[1], RegressorMixin) and ( 576 est[0] not in removed_regressors 577 ): 578 try: 579 if customize == True: 580 surr_obj = ns.CustomClassifier(obj=est[1]()) 581 else: 582 surr_obj = est[1]() 583 res = self.cross_val_optim( 584 X_train=X_train, 585 y_train=y_train, 586 X_test=X_test, 587 y_test=y_test, 588 surrogate_obj=surr_obj, 589 cv=cv, 590 n_jobs=n_jobs, 591 scoring=scoring, 592 n_init=n_init, 593 n_iter=n_iter, 594 abs_tol=abs_tol, 595 verbose=verbose, 596 seed=seed, 597 ) 598 if customize == True: 599 results.append((f"CustomClassifier({est[0]})", res)) 600 else: 601 results.append((est[0], res)) 602 except: 603 pass 604 605 else: 606 607 if ( 608 issubclass(est[1], RegressorMixin) 609 and (est[0] not in removed_regressors) 610 and est[0] in surrogate_objs 611 ): 612 try: 613 if customize == True: 614 surr_obj = ns.CustomClassifier(obj=est[1]()) 615 else: 616 surr_obj = est[1]() 617 res = self.cross_val_optim( 618 X_train=X_train, 619 y_train=y_train, 620 X_test=X_test, 621 y_test=y_test, 622 surrogate_obj=surr_obj, 623 cv=cv, 624 n_jobs=n_jobs, 625 scoring=scoring, 626 n_init=n_init, 627 n_iter=n_iter, 628 abs_tol=abs_tol, 629 verbose=verbose, 630 seed=seed, 631 ) 632 if customize == True: 633 results.append((f"CustomClassifier({est[0]})", res)) 634 else: 635 results.append((est[0], res)) 636 except: 637 pass 638 639 return results 640 641 @property 642 def _estimator_type(self): 643 return "classifier"
Deep Classifier
Parameters:
obj: an object
A base learner, see also https://www.researchgate.net/publication/380701207_Deep_Quasi-Randomized_neural_Networks_for_classification
n_layers: int (default=3)
Number of layers. `n_layers = 1` is a simple `CustomClassifier`
verbose : int, optional (default=0)
Monitor progress when fitting.
All the other parameters are nnetsauce `CustomClassifier`'s
Examples:
import nnetsauce as ns
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegressionCV
data = load_breast_cancer()
X = data.data
y= data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=123)
obj = LogisticRegressionCV()
clf = ns.DeepClassifier(obj)
clf.fit(X_train, y_train)
print(clf.score(clf.predict(X_test), y_test))
142 def fit(self, X, y, **kwargs): 143 """Fit Classification algorithms to X and y. 144 Parameters 145 ---------- 146 X : array-like, 147 Training vectors, where rows is the number of samples 148 and columns is the number of features. 149 y : array-like, 150 Training vectors, where rows is the number of samples 151 and columns is the number of features. 152 **kwargs: dict 153 Additional parameters to be passed to the fit method 154 of the base learner. For example, `sample_weight`. 155 156 Returns 157 ------- 158 A fitted object 159 """ 160 161 self.classes_ = np.unique(y) 162 self.n_classes_ = len( 163 self.classes_ 164 ) # for compatibility with scikit-learn 165 166 if isinstance(X, np.ndarray): 167 X = pd.DataFrame(X) 168 169 # init layer 170 self.stacked_obj = CustomClassifier( 171 obj=self.stacked_obj, 172 n_hidden_features=self.n_hidden_features, 173 activation_name=self.activation_name, 174 a=self.a, 175 nodes_sim=self.nodes_sim, 176 bias=self.bias, 177 dropout=self.dropout, 178 direct_link=self.direct_link, 179 n_clusters=self.n_clusters, 180 cluster_encode=self.cluster_encode, 181 type_clust=self.type_clust, 182 type_scaling=self.type_scaling, 183 col_sample=self.col_sample, 184 row_sample=self.row_sample, 185 cv_calibration=None, 186 calibration_method=None, 187 seed=self.seed, 188 backend=self.backend, 189 ) 190 191 if self.verbose > 0: 192 iterator = tqdm(range(self.n_layers - 1)) 193 else: 194 iterator = range(self.n_layers - 1) 195 196 for _ in iterator: 197 self.stacked_obj = deepcopy( 198 CustomClassifier( 199 obj=self.stacked_obj, 200 n_hidden_features=self.n_hidden_features, 201 activation_name=self.activation_name, 202 a=self.a, 203 nodes_sim=self.nodes_sim, 204 bias=self.bias, 205 dropout=self.dropout, 206 direct_link=self.direct_link, 207 n_clusters=self.n_clusters, 208 cluster_encode=self.cluster_encode, 209 type_clust=self.type_clust, 210 type_scaling=self.type_scaling, 211 col_sample=self.col_sample, 212 row_sample=self.row_sample, 213 cv_calibration=None, 214 calibration_method=None, 215 seed=self.seed, 216 backend=self.backend, 217 ) 218 ) 219 self.stacked_obj.fit(X, y, **kwargs) 220 221 return self
Fit Classification algorithms to X and y.
Parameters
X : array-like,
Training vectors, where rows is the number of samples
and columns is the number of features.
y : array-like,
Training vectors, where rows is the number of samples
and columns is the number of features.
**kwargs: dict
Additional parameters to be passed to the fit method
of the base learner. For example, sample_weight
.
Returns
A fitted object
Predict test data X.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
Predict probabilities for test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
probability estimates for test data: {array-like}
Scoring function for classification.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
scoring: str
scoring method (default is accuracy)
Returns:
score: float
13class DeepRegressor(CustomRegressor, RegressorMixin): 14 """ 15 Deep Regressor 16 17 Parameters: 18 19 obj: an object 20 A base learner, see also https://www.researchgate.net/publication/380701207_Deep_Quasi-Randomized_neural_Networks_for_classification 21 22 verbose : int, optional (default=0) 23 Monitor progress when fitting. 24 25 n_layers: int (default=2) 26 Number of layers. `n_layers = 1` is a simple `CustomRegressor` 27 28 All the other parameters are nnetsauce `CustomRegressor`'s 29 30 Examples: 31 32 ```python 33 import nnetsauce as ns 34 from sklearn.datasets import load_diabetes 35 from sklearn.model_selection import train_test_split 36 from sklearn.linear_model import RidgeCV 37 data = load_diabetes() 38 X = data.data 39 y= data.target 40 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=123) 41 obj = RidgeCV() 42 clf = ns.DeepRegressor(obj) 43 clf.fit(X_train, y_train) 44 print(clf.score(clf.predict(X_test), y_test)) 45 ``` 46 47 """ 48 49 def __init__( 50 self, 51 obj, 52 # Defining depth 53 n_layers=2, 54 verbose=0, 55 # CustomRegressor attributes 56 n_hidden_features=5, 57 activation_name="relu", 58 a=0.01, 59 nodes_sim="sobol", 60 bias=True, 61 dropout=0, 62 direct_link=True, 63 n_clusters=2, 64 cluster_encode=True, 65 type_clust="kmeans", 66 type_scaling=("std", "std", "std"), 67 col_sample=1, 68 row_sample=1, 69 level=None, 70 pi_method="splitconformal", 71 seed=123, 72 backend="cpu", 73 ): 74 super().__init__( 75 obj=obj, 76 n_hidden_features=n_hidden_features, 77 activation_name=activation_name, 78 a=a, 79 nodes_sim=nodes_sim, 80 bias=bias, 81 dropout=dropout, 82 direct_link=direct_link, 83 n_clusters=n_clusters, 84 cluster_encode=cluster_encode, 85 type_clust=type_clust, 86 type_scaling=type_scaling, 87 col_sample=col_sample, 88 row_sample=row_sample, 89 level=level, 90 pi_method=pi_method, 91 seed=seed, 92 backend=backend, 93 ) 94 95 assert n_layers >= 1, "must have n_layers >= 1" 96 97 self.stacked_obj = deepcopy(obj) 98 self.verbose = verbose 99 self.n_layers = n_layers 100 self.level = level 101 self.pi_method = pi_method 102 self.coef_ = None 103 104 def fit(self, X, y, **kwargs): 105 """Fit Regression algorithms to X and y. 106 Parameters 107 ---------- 108 X : array-like, 109 Training vectors, where rows is the number of samples 110 and columns is the number of features. 111 y : array-like, 112 Training vectors, where rows is the number of samples 113 and columns is the number of features. 114 **kwargs: dict 115 Additional parameters to be passed to the fit method 116 of the base learner. For example, `sample_weight`. 117 Returns 118 ------- 119 A fitted object 120 """ 121 122 if isinstance(X, np.ndarray): 123 X = pd.DataFrame(X) 124 125 # init layer 126 self.stacked_obj = CustomRegressor( 127 obj=self.stacked_obj, 128 n_hidden_features=self.n_hidden_features, 129 activation_name=self.activation_name, 130 a=self.a, 131 nodes_sim=self.nodes_sim, 132 bias=self.bias, 133 dropout=self.dropout, 134 direct_link=self.direct_link, 135 n_clusters=self.n_clusters, 136 cluster_encode=self.cluster_encode, 137 type_clust=self.type_clust, 138 type_scaling=self.type_scaling, 139 col_sample=self.col_sample, 140 row_sample=self.row_sample, 141 seed=self.seed, 142 backend=self.backend, 143 ) 144 145 if self.verbose > 0: 146 iterator = tqdm(range(self.n_layers - 1)) 147 else: 148 iterator = range(self.n_layers - 1) 149 150 for _ in iterator: 151 self.stacked_obj = deepcopy( 152 CustomRegressor( 153 obj=self.stacked_obj, 154 n_hidden_features=self.n_hidden_features, 155 activation_name=self.activation_name, 156 a=self.a, 157 nodes_sim=self.nodes_sim, 158 bias=self.bias, 159 dropout=self.dropout, 160 direct_link=self.direct_link, 161 n_clusters=self.n_clusters, 162 cluster_encode=self.cluster_encode, 163 type_clust=self.type_clust, 164 type_scaling=self.type_scaling, 165 col_sample=self.col_sample, 166 row_sample=self.row_sample, 167 seed=self.seed, 168 backend=self.backend, 169 ) 170 ) 171 172 self.stacked_obj.fit(X, y, **kwargs) 173 174 if self.level is not None: 175 self.stacked_obj = PredictionInterval( 176 obj=self.stacked_obj, method=self.pi_method, level=self.level 177 ) 178 179 if hasattr(self.stacked_obj, "clustering_obj_"): 180 self.clustering_obj_ = self.stacked_obj.clustering_obj_ 181 182 if hasattr(self.stacked_obj, "coef_"): 183 self.coef_ = self.stacked_obj.coef_ 184 185 if hasattr(self.stacked_obj, "scaler_"): 186 self.scaler_ = self.stacked_obj.scaler_ 187 188 if hasattr(self.stacked_obj, "nn_scaler_"): 189 self.nn_scaler_ = self.stacked_obj.nn_scaler_ 190 191 if hasattr(self.stacked_obj, "clustering_scaler_"): 192 self.clustering_scaler_ = self.stacked_obj.clustering_scaler_ 193 194 return self 195 196 def partial_fit(self, X, y, **kwargs): 197 """Fit Regression algorithms to X and y. 198 Parameters 199 ---------- 200 X : array-like, 201 Training vectors, where rows is the number of samples 202 and columns is the number of features. 203 y : array-like, 204 Training vectors, where rows is the number of samples 205 and columns is the number of features. 206 **kwargs: dict 207 Additional parameters to be passed to the fit method 208 of the base learner. For example, `sample_weight`. 209 Returns 210 ------- 211 A fitted object 212 """ 213 assert hasattr(self, "stacked_obj"), "model must be fitted first" 214 current_obj = self.stacked_obj 215 for _ in range(self.n_layers): 216 try: 217 input_X = current_obj.obj.cook_test_set(X) 218 current_obj.obj.partial_fit(input_X, y, **kwargs) 219 try: 220 current_obj = current_obj.obj 221 except AttributeError: 222 pass 223 except ValueError as e: 224 print(e) 225 pass 226 return self 227 228 def predict(self, X, **kwargs): 229 if self.level is not None: 230 return self.stacked_obj.predict(X, return_pi=True) 231 return self.stacked_obj.predict(X, **kwargs) 232 233 def score(self, X, y, scoring=None): 234 return self.stacked_obj.score(X, y, scoring)
Deep Regressor
Parameters:
obj: an object
A base learner, see also https://www.researchgate.net/publication/380701207_Deep_Quasi-Randomized_neural_Networks_for_classification
verbose : int, optional (default=0)
Monitor progress when fitting.
n_layers: int (default=2)
Number of layers. `n_layers = 1` is a simple `CustomRegressor`
All the other parameters are nnetsauce `CustomRegressor`'s
Examples:
import nnetsauce as ns
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.linear_model import RidgeCV
data = load_diabetes()
X = data.data
y= data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=123)
obj = RidgeCV()
clf = ns.DeepRegressor(obj)
clf.fit(X_train, y_train)
print(clf.score(clf.predict(X_test), y_test))
104 def fit(self, X, y, **kwargs): 105 """Fit Regression algorithms to X and y. 106 Parameters 107 ---------- 108 X : array-like, 109 Training vectors, where rows is the number of samples 110 and columns is the number of features. 111 y : array-like, 112 Training vectors, where rows is the number of samples 113 and columns is the number of features. 114 **kwargs: dict 115 Additional parameters to be passed to the fit method 116 of the base learner. For example, `sample_weight`. 117 Returns 118 ------- 119 A fitted object 120 """ 121 122 if isinstance(X, np.ndarray): 123 X = pd.DataFrame(X) 124 125 # init layer 126 self.stacked_obj = CustomRegressor( 127 obj=self.stacked_obj, 128 n_hidden_features=self.n_hidden_features, 129 activation_name=self.activation_name, 130 a=self.a, 131 nodes_sim=self.nodes_sim, 132 bias=self.bias, 133 dropout=self.dropout, 134 direct_link=self.direct_link, 135 n_clusters=self.n_clusters, 136 cluster_encode=self.cluster_encode, 137 type_clust=self.type_clust, 138 type_scaling=self.type_scaling, 139 col_sample=self.col_sample, 140 row_sample=self.row_sample, 141 seed=self.seed, 142 backend=self.backend, 143 ) 144 145 if self.verbose > 0: 146 iterator = tqdm(range(self.n_layers - 1)) 147 else: 148 iterator = range(self.n_layers - 1) 149 150 for _ in iterator: 151 self.stacked_obj = deepcopy( 152 CustomRegressor( 153 obj=self.stacked_obj, 154 n_hidden_features=self.n_hidden_features, 155 activation_name=self.activation_name, 156 a=self.a, 157 nodes_sim=self.nodes_sim, 158 bias=self.bias, 159 dropout=self.dropout, 160 direct_link=self.direct_link, 161 n_clusters=self.n_clusters, 162 cluster_encode=self.cluster_encode, 163 type_clust=self.type_clust, 164 type_scaling=self.type_scaling, 165 col_sample=self.col_sample, 166 row_sample=self.row_sample, 167 seed=self.seed, 168 backend=self.backend, 169 ) 170 ) 171 172 self.stacked_obj.fit(X, y, **kwargs) 173 174 if self.level is not None: 175 self.stacked_obj = PredictionInterval( 176 obj=self.stacked_obj, method=self.pi_method, level=self.level 177 ) 178 179 if hasattr(self.stacked_obj, "clustering_obj_"): 180 self.clustering_obj_ = self.stacked_obj.clustering_obj_ 181 182 if hasattr(self.stacked_obj, "coef_"): 183 self.coef_ = self.stacked_obj.coef_ 184 185 if hasattr(self.stacked_obj, "scaler_"): 186 self.scaler_ = self.stacked_obj.scaler_ 187 188 if hasattr(self.stacked_obj, "nn_scaler_"): 189 self.nn_scaler_ = self.stacked_obj.nn_scaler_ 190 191 if hasattr(self.stacked_obj, "clustering_scaler_"): 192 self.clustering_scaler_ = self.stacked_obj.clustering_scaler_ 193 194 return self
Fit Regression algorithms to X and y.
Parameters
X : array-like,
Training vectors, where rows is the number of samples
and columns is the number of features.
y : array-like,
Training vectors, where rows is the number of samples
and columns is the number of features.
**kwargs: dict
Additional parameters to be passed to the fit method
of the base learner. For example, sample_weight
.
Returns
A fitted object
228 def predict(self, X, **kwargs): 229 if self.level is not None: 230 return self.stacked_obj.predict(X, return_pi=True) 231 return self.stacked_obj.predict(X, **kwargs)
Predict test data X.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
level: int
Level of confidence (default = 95)
method: str
'splitconformal', 'localconformal'
prediction (if you specify `return_pi = True`)
**kwargs: additional parameters
`return_pi = True` for conformal prediction,
with `method` in ('splitconformal', 'localconformal')
or `return_std = True` for `self.obj` in
(`sklearn.linear_model.BayesianRidge`,
`sklearn.linear_model.ARDRegressor`,
`sklearn.gaussian_process.GaussianProcessRegressor`)`
Returns:
model predictions:
an array if uncertainty quantification is not requested,
or a tuple if with prediction intervals and simulations
if `return_std = True` (mean, standard deviation,
lower and upper prediction interval) or `return_pi = True`
()
Compute the score of the model.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
scoring: str
scoring method
Returns:
score: float
11class DeepMTS(MTS): 12 """Univariate and multivariate time series (DeepMTS) forecasting with Quasi-Randomized networks (Work in progress) 13 14 Parameters: 15 16 obj: object. 17 any object containing a method fit (obj.fit()) and a method predict 18 (obj.predict()). 19 20 n_layers: int. 21 number of layers in the neural network. 22 23 n_hidden_features: int. 24 number of nodes in the hidden layer. 25 26 activation_name: str. 27 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'. 28 29 a: float. 30 hyperparameter for 'prelu' or 'elu' activation function. 31 32 nodes_sim: str. 33 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 34 'uniform'. 35 36 bias: boolean. 37 indicates if the hidden layer contains a bias term (True) or not 38 (False). 39 40 dropout: float. 41 regularization parameter; (random) percentage of nodes dropped out 42 of the training. 43 44 direct_link: boolean. 45 indicates if the original predictors are included (True) in model's fitting or not (False). 46 47 n_clusters: int. 48 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering). 49 50 cluster_encode: bool. 51 defines how the variable containing clusters is treated (default is one-hot) 52 if `False`, then labels are used, without one-hot encoding. 53 54 type_clust: str. 55 type of clustering method: currently k-means ('kmeans') or Gaussian 56 Mixture Model ('gmm'). 57 58 type_scaling: a tuple of 3 strings. 59 scaling methods for inputs, hidden layer, and clustering respectively 60 (and when relevant). 61 Currently available: standardization ('std') or MinMax scaling ('minmax'). 62 63 lags: int. 64 number of lags used for each time series. 65 66 type_pi: str. 67 type of prediction interval; currently: 68 - "gaussian": simple, fast, but: assumes stationarity of Gaussian in-sample residuals and independence in the multivariate case 69 - "kde": based on Kernel Density Estimation of in-sample residuals 70 - "bootstrap": based on independent bootstrap of in-sample residuals 71 - "block-bootstrap": based on basic block bootstrap of in-sample residuals 72 - "scp-kde": Sequential split conformal prediction with Kernel Density Estimation of calibrated residuals 73 - "scp-bootstrap": Sequential split conformal prediction with independent bootstrap of calibrated residuals 74 - "scp-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of calibrated residuals 75 - "scp2-kde": Sequential split conformal prediction with Kernel Density Estimation of standardized calibrated residuals 76 - "scp2-bootstrap": Sequential split conformal prediction with independent bootstrap of standardized calibrated residuals 77 - "scp2-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of standardized calibrated residuals 78 79 block_size: int. 80 size of block for 'type_pi' in ("block-bootstrap", "scp-block-bootstrap", "scp2-block-bootstrap"). 81 Default is round(3.15*(n_residuals^1/3)) 82 83 replications: int. 84 number of replications (if needed, for predictive simulation). Default is 'None'. 85 86 kernel: str. 87 the kernel to use for residuals density estimation (used for predictive simulation). Currently, either 'gaussian' or 'tophat'. 88 89 agg: str. 90 either "mean" or "median" for simulation of bootstrap aggregating 91 92 seed: int. 93 reproducibility seed for nodes_sim=='uniform' or predictive simulation. 94 95 backend: str. 96 "cpu" or "gpu" or "tpu". 97 98 verbose: int. 99 0: not printing; 1: printing 100 101 show_progress: bool. 102 True: progress bar when fitting each series; False: no progress bar when fitting each series 103 104 Attributes: 105 106 fit_objs_: dict 107 objects adjusted to each individual time series 108 109 y_: {array-like} 110 DeepMTS responses (most recent observations first) 111 112 X_: {array-like} 113 DeepMTS lags 114 115 xreg_: {array-like} 116 external regressors 117 118 y_means_: dict 119 a dictionary of each series mean values 120 121 preds_: {array-like} 122 successive model predictions 123 124 preds_std_: {array-like} 125 standard deviation around the predictions 126 127 return_std_: boolean 128 return uncertainty or not (set in predict) 129 130 df_: data frame 131 the input data frame, in case a data.frame is provided to `fit` 132 133 Examples: 134 135 Example 1: 136 137 ```python 138 import nnetsauce as ns 139 import numpy as np 140 from sklearn import linear_model 141 np.random.seed(123) 142 143 M = np.random.rand(10, 3) 144 M[:,0] = 10*M[:,0] 145 M[:,2] = 25*M[:,2] 146 print(M) 147 148 # Adjust Bayesian Ridge 149 regr4 = linear_model.BayesianRidge() 150 obj_DeepMTS = ns.DeepMTS(regr4, lags = 1, n_hidden_features=5) 151 obj_DeepMTS.fit(M) 152 print(obj_DeepMTS.predict()) 153 154 # with credible intervals 155 print(obj_DeepMTS.predict(return_std=True, level=80)) 156 157 print(obj_DeepMTS.predict(return_std=True, level=95)) 158 ``` 159 160 Example 2: 161 162 ```python 163 import nnetsauce as ns 164 import numpy as np 165 from sklearn import linear_model 166 167 dataset = { 168 'date' : ['2001-01-01', '2002-01-01', '2003-01-01', '2004-01-01', '2005-01-01'], 169 'series1' : [34, 30, 35.6, 33.3, 38.1], 170 'series2' : [4, 5.5, 5.6, 6.3, 5.1], 171 'series3' : [100, 100.5, 100.6, 100.2, 100.1]} 172 df = pd.DataFrame(dataset).set_index('date') 173 print(df) 174 175 # Adjust Bayesian Ridge 176 regr5 = linear_model.BayesianRidge() 177 obj_DeepMTS = ns.DeepMTS(regr5, lags = 1, n_hidden_features=5) 178 obj_DeepMTS.fit(df) 179 print(obj_DeepMTS.predict()) 180 181 # with credible intervals 182 print(obj_DeepMTS.predict(return_std=True, level=80)) 183 184 print(obj_DeepMTS.predict(return_std=True, level=95)) 185 ``` 186 187 """ 188 189 # construct the object ----- 190 191 def __init__( 192 self, 193 obj, 194 n_layers=3, 195 n_hidden_features=5, 196 activation_name="relu", 197 a=0.01, 198 nodes_sim="sobol", 199 bias=True, 200 dropout=0, 201 direct_link=True, 202 n_clusters=2, 203 cluster_encode=True, 204 type_clust="kmeans", 205 type_scaling=("std", "std", "std"), 206 lags=1, 207 type_pi="kde", 208 block_size=None, 209 replications=None, 210 kernel=None, 211 agg="mean", 212 seed=123, 213 backend="cpu", 214 verbose=0, 215 show_progress=True, 216 ): 217 assert int(lags) == lags, "parameter 'lags' should be an integer" 218 assert n_layers >= 1, "must have n_layers >= 1" 219 self.n_layers = int(n_layers) 220 221 if self.n_layers > 1: 222 223 for _ in range(self.n_layers - 1): 224 obj = CustomRegressor( 225 obj=deepcopy(obj), 226 n_hidden_features=n_hidden_features, 227 activation_name=activation_name, 228 a=a, 229 nodes_sim=nodes_sim, 230 bias=bias, 231 dropout=dropout, 232 direct_link=direct_link, 233 n_clusters=n_clusters, 234 cluster_encode=cluster_encode, 235 type_clust=type_clust, 236 type_scaling=type_scaling, 237 seed=seed, 238 backend=backend, 239 ) 240 241 self.obj = deepcopy(obj) 242 super().__init__( 243 obj=self.obj, 244 n_hidden_features=n_hidden_features, 245 activation_name=activation_name, 246 a=a, 247 nodes_sim=nodes_sim, 248 bias=bias, 249 dropout=dropout, 250 direct_link=direct_link, 251 n_clusters=n_clusters, 252 cluster_encode=cluster_encode, 253 type_clust=type_clust, 254 type_scaling=type_scaling, 255 lags=lags, 256 type_pi=type_pi, 257 block_size=block_size, 258 replications=replications, 259 kernel=kernel, 260 agg=agg, 261 seed=seed, 262 backend=backend, 263 verbose=verbose, 264 show_progress=show_progress, 265 )
Univariate and multivariate time series (DeepMTS) forecasting with Quasi-Randomized networks (Work in progress)
Parameters:
obj: object.
any object containing a method fit (obj.fit()) and a method predict
(obj.predict()).
n_layers: int.
number of layers in the neural network.
n_hidden_features: int.
number of nodes in the hidden layer.
activation_name: str.
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'.
a: float.
hyperparameter for 'prelu' or 'elu' activation function.
nodes_sim: str.
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'.
bias: boolean.
indicates if the hidden layer contains a bias term (True) or not
(False).
dropout: float.
regularization parameter; (random) percentage of nodes dropped out
of the training.
direct_link: boolean.
indicates if the original predictors are included (True) in model's fitting or not (False).
n_clusters: int.
number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering).
cluster_encode: bool.
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding.
type_clust: str.
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm').
type_scaling: a tuple of 3 strings.
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax').
lags: int.
number of lags used for each time series.
type_pi: str.
type of prediction interval; currently:
- "gaussian": simple, fast, but: assumes stationarity of Gaussian in-sample residuals and independence in the multivariate case
- "kde": based on Kernel Density Estimation of in-sample residuals
- "bootstrap": based on independent bootstrap of in-sample residuals
- "block-bootstrap": based on basic block bootstrap of in-sample residuals
- "scp-kde": Sequential split conformal prediction with Kernel Density Estimation of calibrated residuals
- "scp-bootstrap": Sequential split conformal prediction with independent bootstrap of calibrated residuals
- "scp-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of calibrated residuals
- "scp2-kde": Sequential split conformal prediction with Kernel Density Estimation of standardized calibrated residuals
- "scp2-bootstrap": Sequential split conformal prediction with independent bootstrap of standardized calibrated residuals
- "scp2-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of standardized calibrated residuals
block_size: int.
size of block for 'type_pi' in ("block-bootstrap", "scp-block-bootstrap", "scp2-block-bootstrap").
Default is round(3.15*(n_residuals^1/3))
replications: int.
number of replications (if needed, for predictive simulation). Default is 'None'.
kernel: str.
the kernel to use for residuals density estimation (used for predictive simulation). Currently, either 'gaussian' or 'tophat'.
agg: str.
either "mean" or "median" for simulation of bootstrap aggregating
seed: int.
reproducibility seed for nodes_sim=='uniform' or predictive simulation.
backend: str.
"cpu" or "gpu" or "tpu".
verbose: int.
0: not printing; 1: printing
show_progress: bool.
True: progress bar when fitting each series; False: no progress bar when fitting each series
Attributes:
fit_objs_: dict
objects adjusted to each individual time series
y_: {array-like}
DeepMTS responses (most recent observations first)
X_: {array-like}
DeepMTS lags
xreg_: {array-like}
external regressors
y_means_: dict
a dictionary of each series mean values
preds_: {array-like}
successive model predictions
preds_std_: {array-like}
standard deviation around the predictions
return_std_: boolean
return uncertainty or not (set in predict)
df_: data frame
the input data frame, in case a data.frame is provided to `fit`
Examples:
Example 1:
import nnetsauce as ns
import numpy as np
from sklearn import linear_model
np.random.seed(123)
M = np.random.rand(10, 3)
M[:,0] = 10M[:,0]
M[:,2] = 25M[:,2]
print(M)
# Adjust Bayesian Ridge
regr4 = linear_model.BayesianRidge()
obj_DeepMTS = ns.DeepMTS(regr4, lags = 1, n_hidden_features=5)
obj_DeepMTS.fit(M)
print(obj_DeepMTS.predict())
# with credible intervals
print(obj_DeepMTS.predict(return_std=True, level=80))
print(obj_DeepMTS.predict(return_std=True, level=95))
Example 2:
import nnetsauce as ns
import numpy as np
from sklearn import linear_model
dataset = {
'date' : ['2001-01-01', '2002-01-01', '2003-01-01', '2004-01-01', '2005-01-01'],
'series1' : [34, 30, 35.6, 33.3, 38.1],
'series2' : [4, 5.5, 5.6, 6.3, 5.1],
'series3' : [100, 100.5, 100.6, 100.2, 100.1]}
df = pd.DataFrame(dataset).set_index('date')
print(df)
# Adjust Bayesian Ridge
regr5 = linear_model.BayesianRidge()
obj_DeepMTS = ns.DeepMTS(regr5, lags = 1, n_hidden_features=5)
obj_DeepMTS.fit(df)
print(obj_DeepMTS.predict())
# with credible intervals
print(obj_DeepMTS.predict(return_std=True, level=80))
print(obj_DeepMTS.predict(return_std=True, level=95))
6class Downloader: 7 """Download datasets from data sources (R-universe for now)""" 8 9 def __init__(self): 10 self.pkgname = None 11 self.dataset = None 12 self.source = None 13 self.url = None 14 self.request = None 15 16 def download( 17 self, 18 pkgname="MASS", 19 dataset="Boston", 20 source="https://cran.r-universe.dev/", 21 **kwargs 22 ): 23 """Download datasets from data sources (R-universe for now) 24 25 Examples: 26 27 ```python 28 import nnetsauce as ns 29 30 downloader = ns.Downloader() 31 df = downloader.download(pkgname="MASS", dataset="Boston") 32 ``` 33 34 """ 35 self.pkgname = pkgname 36 self.dataset = dataset 37 self.source = source 38 self.url = source + pkgname + "/data/" + dataset + "/json" 39 self.request = requests.get(self.url) 40 return pd.DataFrame(self.request.json(), **kwargs)
Download datasets from data sources (R-universe for now)
16 def download( 17 self, 18 pkgname="MASS", 19 dataset="Boston", 20 source="https://cran.r-universe.dev/", 21 **kwargs 22 ): 23 """Download datasets from data sources (R-universe for now) 24 25 Examples: 26 27 ```python 28 import nnetsauce as ns 29 30 downloader = ns.Downloader() 31 df = downloader.download(pkgname="MASS", dataset="Boston") 32 ``` 33 34 """ 35 self.pkgname = pkgname 36 self.dataset = dataset 37 self.source = source 38 self.url = source + pkgname + "/data/" + dataset + "/json" 39 self.request = requests.get(self.url) 40 return pd.DataFrame(self.request.json(), **kwargs)
Download datasets from data sources (R-universe for now)
Examples:
import nnetsauce as ns
downloader = ns.Downloader()
df = downloader.download(pkgname="MASS", dataset="Boston")
23class ElasticNet2Regressor(Ridge2, RegressorMixin): 24 """Enhanced Elastic Net with dual regularization paths, JAX support, and coordinate descent. 25 26 Features: 27 - Separate L1/L2 ratios for direct (lambda1/l1_ratio1) and hidden (lambda2/l1_ratio2) paths 28 - JAX acceleration for GPU/TPU when backend != 'cpu' 29 - Choice of optimization methods (L-BFGS-B or coordinate descent) 30 31 Parameters: 32 n_hidden_features: int 33 Number of nodes in the hidden layer 34 activation_name: str 35 Activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 36 a: float 37 Hyperparameter for 'prelu' or 'elu' activation 38 nodes_sim: str 39 Node simulation type: 'sobol', 'hammersley', 'halton', 'uniform' 40 bias: bool 41 Whether to include bias term in hidden layer 42 dropout: float 43 Dropout rate (regularization) 44 n_clusters: int 45 Number of clusters (0 for no clustering) 46 cluster_encode: bool 47 Whether to one-hot encode clusters 48 type_clust: str 49 Clustering method: 'kmeans' or 'gmm' 50 type_scaling: tuple 51 Scaling methods for (inputs, hidden layer, clusters) 52 lambda1: float 53 Regularization strength for direct connections 54 lambda2: float 55 Regularization strength for hidden layer 56 l1_ratio1: float 57 L1 ratio (0-1) for direct connections 58 l1_ratio2: float 59 L1 ratio (0-1) for hidden layer 60 max_iter: int 61 Maximum optimization iterations 62 tol: float 63 Optimization tolerance 64 solver: str 65 Optimization method: 'lbfgs' or 'cd' (coordinate descent) 66 seed: int 67 Random seed 68 backend: str 69 'cpu', 'gpu', or 'tpu' 70 """ 71 72 def __init__( 73 self, 74 n_hidden_features=5, 75 activation_name="relu", 76 a=0.01, 77 nodes_sim="sobol", 78 bias=True, 79 dropout=0, 80 n_clusters=2, 81 cluster_encode=True, 82 type_clust="kmeans", 83 type_scaling=("std", "std", "std"), 84 lambda1=0.1, 85 lambda2=0.1, 86 l1_ratio1=0.5, 87 l1_ratio2=0.5, 88 max_iter=1000, 89 tol=1e-4, 90 solver="lbfgs", 91 seed=123, 92 backend="cpu", 93 ): 94 super().__init__( 95 n_hidden_features=n_hidden_features, 96 activation_name=activation_name, 97 a=a, 98 nodes_sim=nodes_sim, 99 bias=bias, 100 dropout=dropout, 101 n_clusters=n_clusters, 102 cluster_encode=cluster_encode, 103 type_clust=type_clust, 104 type_scaling=type_scaling, 105 lambda1=lambda1, 106 lambda2=lambda2, 107 seed=seed, 108 backend=backend, 109 ) 110 111 self.l1_ratio1 = l1_ratio1 112 self.l1_ratio2 = l1_ratio2 113 self.max_iter = max_iter 114 self.tol = tol 115 self.solver = solver 116 self.type_fit = "regression" 117 118 # Initialize JAX-related attributes 119 self._jax_initialized = False 120 self._init_jax_functions() 121 122 def _init_jax_functions(self): 123 """Initialize JAX functions if backend is not CPU and JAX is available""" 124 if self.backend != "cpu" and JAX_AVAILABLE and not self._jax_initialized: 125 # JIT compile key functions 126 self._jax_elastic_net_penalty = jit(self._jax_penalty) 127 self._jax_objective = jit(self._jax_obj) 128 self._jax_grad = jit(grad(self._jax_obj)) 129 self._jax_initialized = True 130 131 def _jax_penalty(self, beta, n_direct): 132 """JAX version of elastic net penalty""" 133 beta_direct = beta[:n_direct] 134 beta_hidden = beta[n_direct:] 135 136 l1_1 = self.lambda1 * self.l1_ratio1 * jnp.sum(jnp.abs(beta_direct)) 137 l2_1 = 0.5 * self.lambda1 * (1-self.l1_ratio1) * jnp.sum(beta_direct**2) 138 l1_2 = self.lambda2 * self.l1_ratio2 * jnp.sum(jnp.abs(beta_hidden)) 139 l2_2 = 0.5 * self.lambda2 * (1-self.l1_ratio2) * jnp.sum(beta_hidden**2) 140 141 return l1_1 + l2_1 + l1_2 + l2_2 142 143 def _jax_obj(self, beta, X, y, n_direct): 144 """JAX version of objective function""" 145 residuals = y - jnp.dot(X, beta) 146 mse = jnp.mean(residuals**2) 147 penalty = self._jax_penalty(beta, n_direct) 148 return 0.5 * mse + penalty 149 150 def _numpy_penalty(self, beta, n_direct): 151 """NumPy version of elastic net penalty""" 152 beta_direct = beta[:n_direct] 153 beta_hidden = beta[n_direct:] 154 155 l1_1 = self.lambda1 * self.l1_ratio1 * np.sum(np.abs(beta_direct)) 156 l2_1 = 0.5 * self.lambda1 * (1-self.l1_ratio1) * np.sum(beta_direct**2) 157 l1_2 = self.lambda2 * self.l1_ratio2 * np.sum(np.abs(beta_hidden)) 158 l2_2 = 0.5 * self.lambda2 * (1-self.l1_ratio2) * np.sum(beta_hidden**2) 159 160 return l1_1 + l2_1 + l1_2 + l2_2 161 162 def _numpy_obj(self, beta, X, y, n_direct): 163 """NumPy version of objective function""" 164 residuals = y - np.dot(X, beta) 165 mse = np.mean(residuals**2) 166 penalty = self._numpy_penalty(beta, n_direct) 167 return 0.5 * mse + penalty 168 169 def _soft_threshold(self, x, threshold): 170 """Soft thresholding operator for coordinate descent""" 171 return np.sign(x) * np.maximum(np.abs(x) - threshold, 0) 172 173 def _coordinate_descent(self, X, y, n_direct): 174 """Coordinate descent optimization""" 175 n_samples, n_features = X.shape 176 beta = np.zeros(n_features) 177 XtX = X.T @ X 178 Xty = X.T @ y 179 diag_XtX = np.diag(XtX) 180 181 for _ in range(self.max_iter): 182 beta_old = beta.copy() 183 184 for j in range(n_features): 185 # Compute partial residual 186 X_j = X[:, j] 187 r = y - X @ beta + X_j * beta[j] 188 189 # Compute unregularized update 190 update = X_j @ r / (diag_XtX[j] + 1e-10) 191 192 # Apply appropriate regularization 193 if j < n_direct: # Direct connection 194 lambda_ = self.lambda1 195 l1_ratio = self.l1_ratio1 196 else: # Hidden layer connection 197 lambda_ = self.lambda2 198 l1_ratio = self.l1_ratio2 199 200 # Apply soft thresholding for L1 and shrinkage for L2 201 beta[j] = self._soft_threshold(update, lambda_ * l1_ratio) 202 beta[j] /= (1 + lambda_ * (1 - l1_ratio)) 203 204 # Check convergence 205 if np.max(np.abs(beta - beta_old)) < self.tol: 206 break 207 208 return beta 209 210 def fit(self, X, y, **kwargs): 211 """Fit model with selected optimization method""" 212 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 213 n_X, p_X = X.shape 214 n_Z, p_Z = scaled_Z.shape 215 216 if self.n_clusters > 0: 217 n_direct = p_X + (self.n_clusters if self.cluster_encode else 1) 218 else: 219 n_direct = p_X 220 221 X_ = scaled_Z[:, 0:n_direct] 222 Phi_X_ = scaled_Z[:, n_direct:p_Z] 223 all_features = np.hstack([X_, Phi_X_]) 224 225 # Convert to JAX arrays if using GPU/TPU 226 if self.backend != "cpu" and JAX_AVAILABLE: 227 all_features = jnp.array(all_features) 228 centered_y = jnp.array(centered_y) 229 beta_init = jnp.zeros(all_features.shape[1]) 230 231 if self.solver == "lbfgs": 232 res = minimize( 233 fun=self._jax_obj, 234 x0=beta_init, 235 args=(all_features, centered_y, n_direct), 236 method='L-BFGS-B', 237 jac=self._jax_grad, 238 options={'maxiter': self.max_iter, 'gtol': self.tol} 239 ) 240 self.beta_ = np.array(res.x) 241 else: 242 # Fall back to NumPy for coordinate descent 243 self.beta_ = self._coordinate_descent( 244 np.array(all_features), 245 np.array(centered_y), 246 n_direct 247 ) 248 else: 249 # NumPy backend 250 beta_init = np.zeros(all_features.shape[1]) 251 252 if self.solver == "cd": 253 self.beta_ = self._coordinate_descent( 254 all_features, 255 centered_y, 256 n_direct 257 ) 258 else: 259 res = minimize( 260 fun=self._numpy_obj, 261 x0=beta_init, 262 args=(all_features, centered_y, n_direct), 263 method='L-BFGS-B', 264 options={'maxiter': self.max_iter, 'gtol': self.tol} 265 ) 266 self.beta_ = res.x 267 268 self.y_mean_ = np.mean(y) 269 return self 270 271 def predict(self, X, **kwargs): 272 """Predict using fitted model""" 273 if len(X.shape) == 1: 274 n_features = X.shape[0] 275 new_X = mo.rbind( 276 x=X.reshape(1, n_features), 277 y=np.ones(n_features).reshape(1, n_features), 278 backend=self.backend, 279 ) 280 return ( 281 self.y_mean_ 282 + mo.safe_sparse_dot( 283 a=self.cook_test_set(new_X, **kwargs), 284 b=self.beta_, 285 backend=self.backend, 286 ) 287 )[0] 288 289 return self.y_mean_ + mo.safe_sparse_dot( 290 a=self.cook_test_set(X, **kwargs), 291 b=self.beta_, 292 backend=self.backend, 293 )
Enhanced Elastic Net with dual regularization paths, JAX support, and coordinate descent.
Features:
- Separate L1/L2 ratios for direct (lambda1/l1_ratio1) and hidden (lambda2/l1_ratio2) paths
- JAX acceleration for GPU/TPU when backend != 'cpu'
- Choice of optimization methods (L-BFGS-B or coordinate descent)
Parameters: n_hidden_features: int Number of nodes in the hidden layer activation_name: str Activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' a: float Hyperparameter for 'prelu' or 'elu' activation nodes_sim: str Node simulation type: 'sobol', 'hammersley', 'halton', 'uniform' bias: bool Whether to include bias term in hidden layer dropout: float Dropout rate (regularization) n_clusters: int Number of clusters (0 for no clustering) cluster_encode: bool Whether to one-hot encode clusters type_clust: str Clustering method: 'kmeans' or 'gmm' type_scaling: tuple Scaling methods for (inputs, hidden layer, clusters) lambda1: float Regularization strength for direct connections lambda2: float Regularization strength for hidden layer l1_ratio1: float L1 ratio (0-1) for direct connections l1_ratio2: float L1 ratio (0-1) for hidden layer max_iter: int Maximum optimization iterations tol: float Optimization tolerance solver: str Optimization method: 'lbfgs' or 'cd' (coordinate descent) seed: int Random seed backend: str 'cpu', 'gpu', or 'tpu'
210 def fit(self, X, y, **kwargs): 211 """Fit model with selected optimization method""" 212 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 213 n_X, p_X = X.shape 214 n_Z, p_Z = scaled_Z.shape 215 216 if self.n_clusters > 0: 217 n_direct = p_X + (self.n_clusters if self.cluster_encode else 1) 218 else: 219 n_direct = p_X 220 221 X_ = scaled_Z[:, 0:n_direct] 222 Phi_X_ = scaled_Z[:, n_direct:p_Z] 223 all_features = np.hstack([X_, Phi_X_]) 224 225 # Convert to JAX arrays if using GPU/TPU 226 if self.backend != "cpu" and JAX_AVAILABLE: 227 all_features = jnp.array(all_features) 228 centered_y = jnp.array(centered_y) 229 beta_init = jnp.zeros(all_features.shape[1]) 230 231 if self.solver == "lbfgs": 232 res = minimize( 233 fun=self._jax_obj, 234 x0=beta_init, 235 args=(all_features, centered_y, n_direct), 236 method='L-BFGS-B', 237 jac=self._jax_grad, 238 options={'maxiter': self.max_iter, 'gtol': self.tol} 239 ) 240 self.beta_ = np.array(res.x) 241 else: 242 # Fall back to NumPy for coordinate descent 243 self.beta_ = self._coordinate_descent( 244 np.array(all_features), 245 np.array(centered_y), 246 n_direct 247 ) 248 else: 249 # NumPy backend 250 beta_init = np.zeros(all_features.shape[1]) 251 252 if self.solver == "cd": 253 self.beta_ = self._coordinate_descent( 254 all_features, 255 centered_y, 256 n_direct 257 ) 258 else: 259 res = minimize( 260 fun=self._numpy_obj, 261 x0=beta_init, 262 args=(all_features, centered_y, n_direct), 263 method='L-BFGS-B', 264 options={'maxiter': self.max_iter, 'gtol': self.tol} 265 ) 266 self.beta_ = res.x 267 268 self.y_mean_ = np.mean(y) 269 return self
Fit model with selected optimization method
271 def predict(self, X, **kwargs): 272 """Predict using fitted model""" 273 if len(X.shape) == 1: 274 n_features = X.shape[0] 275 new_X = mo.rbind( 276 x=X.reshape(1, n_features), 277 y=np.ones(n_features).reshape(1, n_features), 278 backend=self.backend, 279 ) 280 return ( 281 self.y_mean_ 282 + mo.safe_sparse_dot( 283 a=self.cook_test_set(new_X, **kwargs), 284 b=self.beta_, 285 backend=self.backend, 286 ) 287 )[0] 288 289 return self.y_mean_ + mo.safe_sparse_dot( 290 a=self.cook_test_set(X, **kwargs), 291 b=self.beta_, 292 backend=self.backend, 293 )
Predict using fitted model
16class GLMClassifier(GLM, ClassifierMixin): 17 """Generalized 'linear' models using quasi-randomized networks (classification) 18 19 Parameters: 20 21 n_hidden_features: int 22 number of nodes in the hidden layer 23 24 lambda1: float 25 regularization parameter for GLM coefficients on original features 26 27 alpha1: float 28 controls compromize between l1 and l2 norm of GLM coefficients on original features 29 30 lambda2: float 31 regularization parameter for GLM coefficients on nonlinear features 32 33 alpha2: float 34 controls compromize between l1 and l2 norm of GLM coefficients on nonlinear features 35 36 activation_name: str 37 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 38 39 a: float 40 hyperparameter for 'prelu' or 'elu' activation function 41 42 nodes_sim: str 43 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 44 'uniform' 45 46 bias: boolean 47 indicates if the hidden layer contains a bias term (True) or not 48 (False) 49 50 dropout: float 51 regularization parameter; (random) percentage of nodes dropped out 52 of the training 53 54 direct_link: boolean 55 indicates if the original predictors are included (True) in model's 56 fitting or not (False) 57 58 n_clusters: int 59 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: 60 no clustering) 61 62 cluster_encode: bool 63 defines how the variable containing clusters is treated (default is one-hot) 64 if `False`, then labels are used, without one-hot encoding 65 66 type_clust: str 67 type of clustering method: currently k-means ('kmeans') or Gaussian 68 Mixture Model ('gmm') 69 70 type_scaling: a tuple of 3 strings 71 scaling methods for inputs, hidden layer, and clustering respectively 72 (and when relevant). 73 Currently available: standardization ('std') or MinMax scaling ('minmax') 74 75 optimizer: object 76 optimizer, from class nnetsauce.Optimizer 77 78 seed: int 79 reproducibility seed for nodes_sim=='uniform' 80 81 Attributes: 82 83 beta_: vector 84 regression coefficients 85 86 Examples: 87 88 See [https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_classification.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_classification.py) 89 90 """ 91 92 # construct the object ----- 93 _estimator_type = "classifier" 94 95 def __init__( 96 self, 97 n_hidden_features=5, 98 lambda1=0.01, 99 alpha1=0.5, 100 lambda2=0.01, 101 alpha2=0.5, 102 family="expit", 103 activation_name="relu", 104 a=0.01, 105 nodes_sim="sobol", 106 bias=True, 107 dropout=0, 108 direct_link=True, 109 n_clusters=2, 110 cluster_encode=True, 111 type_clust="kmeans", 112 type_scaling=("std", "std", "std"), 113 optimizer=Optimizer(), 114 seed=123, 115 ): 116 super().__init__( 117 n_hidden_features=n_hidden_features, 118 lambda1=lambda1, 119 alpha1=alpha1, 120 lambda2=lambda2, 121 alpha2=alpha2, 122 activation_name=activation_name, 123 a=a, 124 nodes_sim=nodes_sim, 125 bias=bias, 126 dropout=dropout, 127 direct_link=direct_link, 128 n_clusters=n_clusters, 129 cluster_encode=cluster_encode, 130 type_clust=type_clust, 131 type_scaling=type_scaling, 132 optimizer=optimizer, 133 seed=seed, 134 ) 135 136 self.family = family 137 138 def logit_loss(self, Y, row_index, XB): 139 self.n_classes = Y.shape[1] # len(np.unique(y)) 140 # Y = mo.one_hot_encode2(y, self.n_classes) 141 # Y = self.optimizer.one_hot_encode(y, self.n_classes) 142 143 # max_double = 709.0 # only if softmax 144 # XB[XB > max_double] = max_double 145 XB[XB > 709.0] = 709.0 146 147 if row_index is None: 148 return -np.mean(np.sum(Y * XB, axis=1) - logsumexp(XB)) 149 150 return -np.mean(np.sum(Y[row_index, :] * XB, axis=1) - logsumexp(XB)) 151 152 def expit_erf_loss(self, Y, row_index, XB): 153 # self.n_classes = len(np.unique(y)) 154 # Y = mo.one_hot_encode2(y, self.n_classes) 155 # Y = self.optimizer.one_hot_encode(y, self.n_classes) 156 self.n_classes = Y.shape[1] 157 158 if row_index is None: 159 return -np.mean(np.sum(Y * XB, axis=1) - logsumexp(XB)) 160 161 return -np.mean(np.sum(Y[row_index, :] * XB, axis=1) - logsumexp(XB)) 162 163 def loss_func( 164 self, beta, group_index, X, Y, y, row_index=None, type_loss="logit", **kwargs 165 ): 166 res = { 167 "logit": self.logit_loss, 168 "expit": self.expit_erf_loss, 169 "erf": self.expit_erf_loss, 170 } 171 172 if row_index is None: 173 row_index = range(len(y)) 174 XB = self.compute_XB( 175 X, 176 beta=np.reshape(beta, (X.shape[1], self.n_classes), order="F"), 177 ) 178 179 return res[type_loss](Y, row_index, XB) + self.compute_penalty( 180 group_index=group_index, beta=beta 181 ) 182 183 XB = self.compute_XB( 184 X, 185 beta=np.reshape(beta, (X.shape[1], self.n_classes), order="F"), 186 row_index=row_index, 187 ) 188 189 return res[type_loss](Y, row_index, XB) + self.compute_penalty( 190 group_index=group_index, beta=beta 191 ) 192 193 def fit(self, X, y, **kwargs): 194 """Fit GLM model to training data (X, y). 195 196 Args: 197 198 X: {array-like}, shape = [n_samples, n_features] 199 Training vectors, where n_samples is the number 200 of samples and n_features is the number of features. 201 202 y: array-like, shape = [n_samples] 203 Target values. 204 205 **kwargs: additional parameters to be passed to 206 self.cook_training_set or self.obj.fit 207 208 Returns: 209 210 self: object 211 212 """ 213 214 assert mx.is_factor( 215 y 216 ), "y must contain only integers" # change is_factor and subsampling everywhere 217 218 self.classes_ = np.unique(y) # for compatibility with sklearn 219 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 220 221 self.beta_ = None 222 223 n, p = X.shape 224 225 self.group_index = n * X.shape[1] 226 227 self.n_classes = len(np.unique(y)) 228 229 output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 230 231 # Y = mo.one_hot_encode2(output_y, self.n_classes) 232 Y = self.optimizer.one_hot_encode(output_y, self.n_classes) 233 234 # initialization 235 beta_ = np.linalg.lstsq(scaled_Z, Y, rcond=None)[0] 236 237 # optimization 238 # fit(self, loss_func, response, x0, **kwargs): 239 # loss_func(self, beta, group_index, X, y, 240 # row_index=None, type_loss="gaussian", 241 # **kwargs) 242 self.optimizer.fit( 243 self.loss_func, 244 response=y, 245 x0=beta_.flatten(order="F"), 246 group_index=self.group_index, 247 X=scaled_Z, 248 Y=Y, 249 y=y, 250 type_loss=self.family, 251 ) 252 253 self.beta_ = self.optimizer.results[0] 254 self.classes_ = np.unique(y) 255 256 return self 257 258 def predict(self, X, **kwargs): 259 """Predict test data X. 260 261 Args: 262 263 X: {array-like}, shape = [n_samples, n_features] 264 Training vectors, where n_samples is the number 265 of samples and n_features is the number of features. 266 267 **kwargs: additional parameters to be passed to 268 self.cook_test_set 269 270 Returns: 271 272 model predictions: {array-like} 273 274 """ 275 276 return np.argmax(self.predict_proba(X, **kwargs), axis=1) 277 278 def predict_proba(self, X, **kwargs): 279 """Predict probabilities for test data X. 280 281 Args: 282 283 X: {array-like}, shape = [n_samples, n_features] 284 Training vectors, where n_samples is the number 285 of samples and n_features is the number of features. 286 287 **kwargs: additional parameters to be passed to 288 self.cook_test_set 289 290 Returns: 291 292 probability estimates for test data: {array-like} 293 294 """ 295 if len(X.shape) == 1: 296 n_features = X.shape[0] 297 new_X = mo.rbind( 298 X.reshape(1, n_features), 299 np.ones(n_features).reshape(1, n_features), 300 ) 301 302 Z = self.cook_test_set(new_X, **kwargs) 303 304 else: 305 Z = self.cook_test_set(X, **kwargs) 306 307 ZB = mo.safe_sparse_dot( 308 Z, 309 self.beta_.reshape( 310 self.n_classes, 311 X.shape[1] + self.n_hidden_features + self.n_clusters, 312 ).T, 313 ) 314 315 if self.family == "logit": 316 exp_ZB = np.exp(ZB) 317 318 return exp_ZB / exp_ZB.sum(axis=1)[:, None] 319 320 if self.family == "expit": 321 exp_ZB = expit(ZB) 322 323 return exp_ZB / exp_ZB.sum(axis=1)[:, None] 324 325 if self.family == "erf": 326 exp_ZB = 0.5 * (1 + erf(ZB)) 327 328 return exp_ZB / exp_ZB.sum(axis=1)[:, None] 329 330 def score(self, X, y, scoring=None): 331 """Scoring function for classification. 332 333 Args: 334 335 X: {array-like}, shape = [n_samples, n_features] 336 Training vectors, where n_samples is the number 337 of samples and n_features is the number of features. 338 339 y: array-like, shape = [n_samples] 340 Target values. 341 342 scoring: str 343 scoring method (default is accuracy) 344 345 Returns: 346 347 score: float 348 """ 349 350 if scoring is None: 351 scoring = "accuracy" 352 353 if scoring == "accuracy": 354 return skm2.accuracy_score(y, self.predict(X)) 355 356 if scoring == "f1": 357 return skm2.f1_score(y, self.predict(X)) 358 359 if scoring == "precision": 360 return skm2.precision_score(y, self.predict(X)) 361 362 if scoring == "recall": 363 return skm2.recall_score(y, self.predict(X)) 364 365 if scoring == "roc_auc": 366 return skm2.roc_auc_score(y, self.predict(X)) 367 368 if scoring == "log_loss": 369 return skm2.log_loss(y, self.predict_proba(X)) 370 371 if scoring == "balanced_accuracy": 372 return skm2.balanced_accuracy_score(y, self.predict(X)) 373 374 if scoring == "average_precision": 375 return skm2.average_precision_score(y, self.predict(X)) 376 377 if scoring == "neg_brier_score": 378 return -skm2.brier_score_loss(y, self.predict_proba(X)) 379 380 if scoring == "neg_log_loss": 381 return -skm2.log_loss(y, self.predict_proba(X)) 382 383 @property 384 def _estimator_type(self): 385 return "classifier"
Generalized 'linear' models using quasi-randomized networks (classification)
Parameters:
n_hidden_features: int
number of nodes in the hidden layer
lambda1: float
regularization parameter for GLM coefficients on original features
alpha1: float
controls compromize between l1 and l2 norm of GLM coefficients on original features
lambda2: float
regularization parameter for GLM coefficients on nonlinear features
alpha2: float
controls compromize between l1 and l2 norm of GLM coefficients on nonlinear features
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not
(False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original predictors are included (True) in model's
fitting or not (False)
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
optimizer: object
optimizer, from class nnetsauce.Optimizer
seed: int
reproducibility seed for nodes_sim=='uniform'
Attributes:
beta_: vector
regression coefficients
Examples:
See https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_classification.py
193 def fit(self, X, y, **kwargs): 194 """Fit GLM model to training data (X, y). 195 196 Args: 197 198 X: {array-like}, shape = [n_samples, n_features] 199 Training vectors, where n_samples is the number 200 of samples and n_features is the number of features. 201 202 y: array-like, shape = [n_samples] 203 Target values. 204 205 **kwargs: additional parameters to be passed to 206 self.cook_training_set or self.obj.fit 207 208 Returns: 209 210 self: object 211 212 """ 213 214 assert mx.is_factor( 215 y 216 ), "y must contain only integers" # change is_factor and subsampling everywhere 217 218 self.classes_ = np.unique(y) # for compatibility with sklearn 219 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 220 221 self.beta_ = None 222 223 n, p = X.shape 224 225 self.group_index = n * X.shape[1] 226 227 self.n_classes = len(np.unique(y)) 228 229 output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 230 231 # Y = mo.one_hot_encode2(output_y, self.n_classes) 232 Y = self.optimizer.one_hot_encode(output_y, self.n_classes) 233 234 # initialization 235 beta_ = np.linalg.lstsq(scaled_Z, Y, rcond=None)[0] 236 237 # optimization 238 # fit(self, loss_func, response, x0, **kwargs): 239 # loss_func(self, beta, group_index, X, y, 240 # row_index=None, type_loss="gaussian", 241 # **kwargs) 242 self.optimizer.fit( 243 self.loss_func, 244 response=y, 245 x0=beta_.flatten(order="F"), 246 group_index=self.group_index, 247 X=scaled_Z, 248 Y=Y, 249 y=y, 250 type_loss=self.family, 251 ) 252 253 self.beta_ = self.optimizer.results[0] 254 self.classes_ = np.unique(y) 255 256 return self
Fit GLM model to training data (X, y).
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
258 def predict(self, X, **kwargs): 259 """Predict test data X. 260 261 Args: 262 263 X: {array-like}, shape = [n_samples, n_features] 264 Training vectors, where n_samples is the number 265 of samples and n_features is the number of features. 266 267 **kwargs: additional parameters to be passed to 268 self.cook_test_set 269 270 Returns: 271 272 model predictions: {array-like} 273 274 """ 275 276 return np.argmax(self.predict_proba(X, **kwargs), axis=1)
Predict test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
278 def predict_proba(self, X, **kwargs): 279 """Predict probabilities for test data X. 280 281 Args: 282 283 X: {array-like}, shape = [n_samples, n_features] 284 Training vectors, where n_samples is the number 285 of samples and n_features is the number of features. 286 287 **kwargs: additional parameters to be passed to 288 self.cook_test_set 289 290 Returns: 291 292 probability estimates for test data: {array-like} 293 294 """ 295 if len(X.shape) == 1: 296 n_features = X.shape[0] 297 new_X = mo.rbind( 298 X.reshape(1, n_features), 299 np.ones(n_features).reshape(1, n_features), 300 ) 301 302 Z = self.cook_test_set(new_X, **kwargs) 303 304 else: 305 Z = self.cook_test_set(X, **kwargs) 306 307 ZB = mo.safe_sparse_dot( 308 Z, 309 self.beta_.reshape( 310 self.n_classes, 311 X.shape[1] + self.n_hidden_features + self.n_clusters, 312 ).T, 313 ) 314 315 if self.family == "logit": 316 exp_ZB = np.exp(ZB) 317 318 return exp_ZB / exp_ZB.sum(axis=1)[:, None] 319 320 if self.family == "expit": 321 exp_ZB = expit(ZB) 322 323 return exp_ZB / exp_ZB.sum(axis=1)[:, None] 324 325 if self.family == "erf": 326 exp_ZB = 0.5 * (1 + erf(ZB)) 327 328 return exp_ZB / exp_ZB.sum(axis=1)[:, None]
Predict probabilities for test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
probability estimates for test data: {array-like}
330 def score(self, X, y, scoring=None): 331 """Scoring function for classification. 332 333 Args: 334 335 X: {array-like}, shape = [n_samples, n_features] 336 Training vectors, where n_samples is the number 337 of samples and n_features is the number of features. 338 339 y: array-like, shape = [n_samples] 340 Target values. 341 342 scoring: str 343 scoring method (default is accuracy) 344 345 Returns: 346 347 score: float 348 """ 349 350 if scoring is None: 351 scoring = "accuracy" 352 353 if scoring == "accuracy": 354 return skm2.accuracy_score(y, self.predict(X)) 355 356 if scoring == "f1": 357 return skm2.f1_score(y, self.predict(X)) 358 359 if scoring == "precision": 360 return skm2.precision_score(y, self.predict(X)) 361 362 if scoring == "recall": 363 return skm2.recall_score(y, self.predict(X)) 364 365 if scoring == "roc_auc": 366 return skm2.roc_auc_score(y, self.predict(X)) 367 368 if scoring == "log_loss": 369 return skm2.log_loss(y, self.predict_proba(X)) 370 371 if scoring == "balanced_accuracy": 372 return skm2.balanced_accuracy_score(y, self.predict(X)) 373 374 if scoring == "average_precision": 375 return skm2.average_precision_score(y, self.predict(X)) 376 377 if scoring == "neg_brier_score": 378 return -skm2.brier_score_loss(y, self.predict_proba(X)) 379 380 if scoring == "neg_log_loss": 381 return -skm2.log_loss(y, self.predict_proba(X))
Scoring function for classification.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
scoring: str
scoring method (default is accuracy)
Returns:
score: float
14class GLMRegressor(GLM, RegressorMixin): 15 """Generalized 'linear' models using quasi-randomized networks (regression) 16 17 Attributes: 18 19 n_hidden_features: int 20 number of nodes in the hidden layer 21 22 lambda1: float 23 regularization parameter for GLM coefficients on original features 24 25 alpha1: float 26 controls compromize between l1 and l2 norm of GLM coefficients on original features 27 28 lambda2: float 29 regularization parameter for GLM coefficients on nonlinear features 30 31 alpha2: float 32 controls compromize between l1 and l2 norm of GLM coefficients on nonlinear features 33 34 family: str 35 "gaussian", "laplace" or "poisson" (for now) 36 37 activation_name: str 38 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 39 40 a: float 41 hyperparameter for 'prelu' or 'elu' activation function 42 43 nodes_sim: str 44 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 45 'uniform' 46 47 bias: boolean 48 indicates if the hidden layer contains a bias term (True) or not 49 (False) 50 51 dropout: float 52 regularization parameter; (random) percentage of nodes dropped out 53 of the training 54 55 direct_link: boolean 56 indicates if the original predictors are included (True) in model's 57 fitting or not (False) 58 59 n_clusters: int 60 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: 61 no clustering) 62 63 cluster_encode: bool 64 defines how the variable containing clusters is treated (default is one-hot) 65 if `False`, then labels are used, without one-hot encoding 66 67 type_clust: str 68 type of clustering method: currently k-means ('kmeans') or Gaussian 69 Mixture Model ('gmm') 70 71 type_scaling: a tuple of 3 strings 72 scaling methods for inputs, hidden layer, and clustering respectively 73 (and when relevant). 74 Currently available: standardization ('std') or MinMax scaling ('minmax') 75 76 optimizer: object 77 optimizer, from class nnetsauce.utils.Optimizer 78 79 seed: int 80 reproducibility seed for nodes_sim=='uniform' 81 82 Attributes: 83 84 beta_: vector 85 regression coefficients 86 87 Examples: 88 89 See [https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_regression.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_regression.py) 90 91 """ 92 93 # construct the object ----- 94 95 def __init__( 96 self, 97 n_hidden_features=5, 98 lambda1=0.01, 99 alpha1=0.5, 100 lambda2=0.01, 101 alpha2=0.5, 102 family="gaussian", 103 activation_name="relu", 104 a=0.01, 105 nodes_sim="sobol", 106 bias=True, 107 dropout=0, 108 direct_link=True, 109 n_clusters=2, 110 cluster_encode=True, 111 type_clust="kmeans", 112 type_scaling=("std", "std", "std"), 113 optimizer=Optimizer(), 114 seed=123, 115 ): 116 super().__init__( 117 n_hidden_features=n_hidden_features, 118 lambda1=lambda1, 119 alpha1=alpha1, 120 lambda2=lambda2, 121 alpha2=alpha2, 122 activation_name=activation_name, 123 a=a, 124 nodes_sim=nodes_sim, 125 bias=bias, 126 dropout=dropout, 127 direct_link=direct_link, 128 n_clusters=n_clusters, 129 cluster_encode=cluster_encode, 130 type_clust=type_clust, 131 type_scaling=type_scaling, 132 optimizer=optimizer, 133 seed=seed, 134 ) 135 136 self.family = family 137 138 def gaussian_loss(self, y, row_index, XB): 139 return 0.5 * np.mean(np.square(y[row_index] - XB)) 140 141 def laplace_loss(self, y, row_index, XB): 142 return 0.5 * np.mean(np.abs(y[row_index] - XB)) 143 144 def poisson_loss(self, y, row_index, XB): 145 return -np.mean(y[row_index] * XB - np.exp(XB)) 146 147 def loss_func( 148 self, beta, group_index, X, y, row_index=None, type_loss="gaussian", **kwargs 149 ): 150 res = { 151 "gaussian": self.gaussian_loss, 152 "laplace": self.laplace_loss, 153 "poisson": self.poisson_loss, 154 } 155 156 if row_index is None: 157 row_index = range(len(y)) 158 XB = self.compute_XB(X, beta=beta) 159 160 return res[type_loss](y, row_index, XB) + self.compute_penalty( 161 group_index=group_index, beta=beta 162 ) 163 164 XB = self.compute_XB(X, beta=beta, row_index=row_index) 165 166 return res[type_loss](y, row_index, XB) + self.compute_penalty( 167 group_index=group_index, beta=beta 168 ) 169 170 def fit(self, X, y, **kwargs): 171 """Fit GLM model to training data (X, y). 172 173 Args: 174 175 X: {array-like}, shape = [n_samples, n_features] 176 Training vectors, where n_samples is the number 177 of samples and n_features is the number of features. 178 179 y: array-like, shape = [n_samples] 180 Target values. 181 182 **kwargs: additional parameters to be passed to 183 self.cook_training_set or self.obj.fit 184 185 Returns: 186 187 self: object 188 189 """ 190 191 self.beta_ = None 192 193 self.n_iter = 0 194 195 n, self.group_index = X.shape 196 197 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 198 199 n_Z = scaled_Z.shape[0] 200 201 # initialization 202 beta_ = np.linalg.lstsq(scaled_Z, centered_y, rcond=None)[0] 203 204 # optimization 205 # fit(self, loss_func, response, x0, **kwargs): 206 # loss_func(self, beta, group_index, X, y, 207 # row_index=None, type_loss="gaussian", 208 # **kwargs) 209 self.optimizer.fit( 210 self.loss_func, 211 response=centered_y, 212 x0=beta_, 213 group_index=self.group_index, 214 X=scaled_Z, 215 y=centered_y, 216 type_loss=self.family, 217 **kwargs 218 ) 219 220 self.beta_ = self.optimizer.results[0] 221 222 return self 223 224 def predict(self, X, **kwargs): 225 """Predict test data X. 226 227 Args: 228 229 X: {array-like}, shape = [n_samples, n_features] 230 Training vectors, where n_samples is the number 231 of samples and n_features is the number of features. 232 233 **kwargs: additional parameters to be passed to 234 self.cook_test_set 235 236 Returns: 237 238 model predictions: {array-like} 239 240 """ 241 242 if len(X.shape) == 1: 243 n_features = X.shape[0] 244 new_X = mo.rbind( 245 X.reshape(1, n_features), 246 np.ones(n_features).reshape(1, n_features), 247 ) 248 249 return ( 250 self.y_mean_ + np.dot(self.cook_test_set(new_X, **kwargs), self.beta_) 251 )[0] 252 253 return self.y_mean_ + np.dot(self.cook_test_set(X, **kwargs), self.beta_) 254 255 def score(self, X, y, scoring=None): 256 """Compute the score of the model. 257 258 Parameters: 259 260 X: {array-like}, shape = [n_samples, n_features] 261 Training vectors, where n_samples is the number 262 of samples and n_features is the number of features. 263 264 y: array-like, shape = [n_samples] 265 Target values. 266 267 scoring: str 268 scoring method 269 270 Returns: 271 272 score: float 273 274 """ 275 276 if scoring is None: 277 return np.sqrt(np.mean((self.predict(X) - y) ** 2)) 278 279 return skm2.get_scorer(scoring)(self, X, y)
Generalized 'linear' models using quasi-randomized networks (regression)
Attributes:
n_hidden_features: int
number of nodes in the hidden layer
lambda1: float
regularization parameter for GLM coefficients on original features
alpha1: float
controls compromize between l1 and l2 norm of GLM coefficients on original features
lambda2: float
regularization parameter for GLM coefficients on nonlinear features
alpha2: float
controls compromize between l1 and l2 norm of GLM coefficients on nonlinear features
family: str
"gaussian", "laplace" or "poisson" (for now)
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not
(False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original predictors are included (True) in model's
fitting or not (False)
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
optimizer: object
optimizer, from class nnetsauce.utils.Optimizer
seed: int
reproducibility seed for nodes_sim=='uniform'
Attributes:
beta_: vector
regression coefficients
Examples:
See https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_regression.py
170 def fit(self, X, y, **kwargs): 171 """Fit GLM model to training data (X, y). 172 173 Args: 174 175 X: {array-like}, shape = [n_samples, n_features] 176 Training vectors, where n_samples is the number 177 of samples and n_features is the number of features. 178 179 y: array-like, shape = [n_samples] 180 Target values. 181 182 **kwargs: additional parameters to be passed to 183 self.cook_training_set or self.obj.fit 184 185 Returns: 186 187 self: object 188 189 """ 190 191 self.beta_ = None 192 193 self.n_iter = 0 194 195 n, self.group_index = X.shape 196 197 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 198 199 n_Z = scaled_Z.shape[0] 200 201 # initialization 202 beta_ = np.linalg.lstsq(scaled_Z, centered_y, rcond=None)[0] 203 204 # optimization 205 # fit(self, loss_func, response, x0, **kwargs): 206 # loss_func(self, beta, group_index, X, y, 207 # row_index=None, type_loss="gaussian", 208 # **kwargs) 209 self.optimizer.fit( 210 self.loss_func, 211 response=centered_y, 212 x0=beta_, 213 group_index=self.group_index, 214 X=scaled_Z, 215 y=centered_y, 216 type_loss=self.family, 217 **kwargs 218 ) 219 220 self.beta_ = self.optimizer.results[0] 221 222 return self
Fit GLM model to training data (X, y).
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
224 def predict(self, X, **kwargs): 225 """Predict test data X. 226 227 Args: 228 229 X: {array-like}, shape = [n_samples, n_features] 230 Training vectors, where n_samples is the number 231 of samples and n_features is the number of features. 232 233 **kwargs: additional parameters to be passed to 234 self.cook_test_set 235 236 Returns: 237 238 model predictions: {array-like} 239 240 """ 241 242 if len(X.shape) == 1: 243 n_features = X.shape[0] 244 new_X = mo.rbind( 245 X.reshape(1, n_features), 246 np.ones(n_features).reshape(1, n_features), 247 ) 248 249 return ( 250 self.y_mean_ + np.dot(self.cook_test_set(new_X, **kwargs), self.beta_) 251 )[0] 252 253 return self.y_mean_ + np.dot(self.cook_test_set(X, **kwargs), self.beta_)
Predict test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
255 def score(self, X, y, scoring=None): 256 """Compute the score of the model. 257 258 Parameters: 259 260 X: {array-like}, shape = [n_samples, n_features] 261 Training vectors, where n_samples is the number 262 of samples and n_features is the number of features. 263 264 y: array-like, shape = [n_samples] 265 Target values. 266 267 scoring: str 268 scoring method 269 270 Returns: 271 272 score: float 273 274 """ 275 276 if scoring is None: 277 return np.sqrt(np.mean((self.predict(X) - y) ** 2)) 278 279 return skm2.get_scorer(scoring)(self, X, y)
Compute the score of the model.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
scoring: str
scoring method
Returns:
score: float
18class KernelRidge(BaseEstimator, RegressorMixin): 19 """ 20 Kernel Ridge Regression with optional GPU support, Matérn kernels, and automatic input standardization. 21 22 Parameters: 23 - alpha: float 24 Regularization parameter. 25 - kernel: str 26 Kernel type ("linear", "rbf", or "matern"). 27 - gamma: float 28 Kernel coefficient for "rbf". Ignored for other kernels. 29 - nu: float 30 Smoothness parameter for the Matérn kernel. Default is 1.5. 31 - length_scale: float 32 Length scale parameter for the Matérn kernel. Default is 1.0. 33 - backend: str 34 "cpu" or "gpu" (uses JAX if "gpu"). 35 """ 36 37 def __init__( 38 self, 39 alpha=1.0, 40 kernel="rbf", 41 gamma=None, 42 nu=1.5, 43 length_scale=1.0, 44 backend="cpu", 45 ): 46 self.alpha = alpha 47 self.alpha_ = alpha 48 self.kernel = kernel 49 self.gamma = gamma 50 self.nu = nu 51 self.length_scale = length_scale 52 self.backend = backend 53 self.scaler = StandardScaler() 54 55 if backend == "gpu" and not JAX_AVAILABLE: 56 raise ImportError( 57 "JAX is not installed. Please install JAX to use the GPU backend." 58 ) 59 60 def _linear_kernel(self, X, Y): 61 return jnp.dot(X, Y.T) if self.backend == "gpu" else np.dot(X, Y.T) 62 63 def _rbf_kernel(self, X, Y): 64 if self.gamma is None: 65 self.gamma = 1.0 / X.shape[1] 66 if self.backend == "gpu": 67 sq_dists = ( 68 jnp.sum(X**2, axis=1)[:, None] 69 + jnp.sum(Y**2, axis=1) 70 - 2 * jnp.dot(X, Y.T) 71 ) 72 return jnp.exp(-self.gamma * sq_dists) 73 else: 74 sq_dists = ( 75 np.sum(X**2, axis=1)[:, None] 76 + np.sum(Y**2, axis=1) 77 - 2 * np.dot(X, Y.T) 78 ) 79 return np.exp(-self.gamma * sq_dists) 80 81 def _matern_kernel(self, X, Y): 82 """ 83 Compute the Matérn kernel using JAX for GPU or NumPy for CPU. 84 85 Parameters: 86 - X: array-like, shape (n_samples_X, n_features) 87 - Y: array-like, shape (n_samples_Y, n_features) 88 89 Returns: 90 - Kernel matrix, shape (n_samples_X, n_samples_Y) 91 """ 92 if self.backend == "gpu": 93 # Compute pairwise distances 94 dists = jnp.sqrt(jnp.sum((X[:, None, :] - Y[None, :, :]) ** 2, axis=2)) 95 scaled_dists = jnp.sqrt(2 * self.nu) * dists / self.length_scale 96 97 # Matérn kernel formula 98 coeff = (2 ** (1 - self.nu)) / jnp.exp(gammaln(self.nu)) 99 matern_kernel = coeff * (scaled_dists**self.nu) * kv(self.nu, scaled_dists) 100 matern_kernel = jnp.where( 101 dists == 0, 1.0, matern_kernel 102 ) # Handle the case where distance is 0 103 return matern_kernel 104 else: 105 # Use NumPy for CPU 106 from scipy.special import ( 107 gammaln, 108 kv, 109 ) # Ensure scipy.special is used for CPU 110 111 dists = np.sqrt(np.sum((X[:, None, :] - Y[None, :, :]) ** 2, axis=2)) 112 scaled_dists = np.sqrt(2 * self.nu) * dists / self.length_scale 113 114 # Matérn kernel formula 115 coeff = (2 ** (1 - self.nu)) / np.exp(gammaln(self.nu)) 116 matern_kernel = coeff * (scaled_dists**self.nu) * kv(self.nu, scaled_dists) 117 matern_kernel = np.where( 118 dists == 0, 1.0, matern_kernel 119 ) # Handle the case where distance is 0 120 return matern_kernel 121 122 def _get_kernel(self, X, Y): 123 if self.kernel == "linear": 124 return self._linear_kernel(X, Y) 125 elif self.kernel == "rbf": 126 return self._rbf_kernel(X, Y) 127 elif self.kernel == "matern": 128 return self._matern_kernel(X, Y) 129 else: 130 raise ValueError(f"Unsupported kernel: {self.kernel}") 131 132 def fit(self, X, y): 133 """ 134 Fit the Kernel Ridge Regression model. 135 136 Parameters: 137 - X: array-like, shape (n_samples, n_features) 138 Training data. 139 - y: array-like, shape (n_samples,) 140 Target values. 141 """ 142 # Standardize the inputs 143 X = self.scaler.fit_transform(X) 144 self.X_fit_ = X 145 146 # Center the response 147 self.y_mean_ = np.mean(y) 148 y_centered = y - self.y_mean_ 149 150 n_samples = X.shape[0] 151 152 # Compute the kernel matrix 153 K = self._get_kernel(X, X) 154 self.K_ = K 155 self.y_fit_ = y_centered 156 157 if isinstance(self.alpha, (list, np.ndarray)): 158 # If alpha is a list or array, compute LOOE for each alpha 159 self.alphas_ = self.alpha # Store the list of alphas 160 self.dual_coefs_ = [] # Store dual coefficients for each alpha 161 self.looe_ = [] # Store LOOE for each alpha 162 163 for alpha in self.alpha: 164 G = K + alpha * np.eye(n_samples) 165 G_inv = np.linalg.inv(G) 166 diag_G_inv = np.diag(G_inv) 167 dual_coef = np.linalg.solve(G, y_centered) 168 looe = np.sum((dual_coef / diag_G_inv) ** 2) # Compute LOOE 169 self.dual_coefs_.append(dual_coef) 170 self.looe_.append(looe) 171 172 # Select the best alpha based on the smallest LOOE 173 best_index = np.argmin(self.looe_) 174 self.alpha_ = self.alpha[best_index] 175 self.dual_coef_ = self.dual_coefs_[best_index] 176 else: 177 # If alpha is a single value, proceed as usual 178 if self.backend == "gpu": 179 self.dual_coef_ = jnp.linalg.solve( 180 K + self.alpha * jnp.eye(n_samples), y_centered 181 ) 182 else: 183 self.dual_coef_ = np.linalg.solve( 184 K + self.alpha * np.eye(n_samples), y_centered 185 ) 186 187 return self 188 189 def predict(self, X, probs=False): 190 """ 191 Predict using the Kernel Ridge Regression model. 192 193 Parameters: 194 - X: array-like, shape (n_samples, n_features) 195 Test data. 196 197 Returns: 198 - Predicted values, shape (n_samples,). 199 """ 200 # Standardize the inputs 201 X = self.scaler.transform(X) 202 K = self._get_kernel(X, self.X_fit_) 203 if self.backend == "gpu": 204 preds = jnp.dot(K, self.dual_coef_) + self.y_mean_ 205 if probs: 206 # Compute similarity to self.X_fit_ 207 similarities = jnp.dot( 208 preds, self.X_fit_.T 209 ) # Shape: (n_samples, n_fit_) 210 # Apply softmax to get probabilities 211 return jaxsoftmax(similarities, axis=1) 212 return preds 213 else: 214 preds = np.dot(K, self.dual_coef_) + self.y_mean_ 215 if probs: 216 # Compute similarity to self.X_fit_ 217 similarities = np.dot( 218 preds, self.X_fit_.T 219 ) # Shape: (n_samples, n_fit_) 220 # Apply softmax to get probabilities 221 return softmax(similarities, axis=1) 222 return preds 223 224 def partial_fit(self, X, y): 225 """ 226 Incrementally fit the Kernel Ridge Regression model with new data using a recursive approach. 227 228 Parameters: 229 - X: array-like, shape (n_samples, n_features) 230 New training data. 231 - y: array-like, shape (n_samples,) 232 New target values. 233 234 Returns: 235 - self: object 236 The updated model. 237 """ 238 # Standardize the inputs 239 X = ( 240 self.scaler.fit_transform(X) 241 if not hasattr(self, "X_fit_") 242 else self.scaler.transform(X) 243 ) 244 245 if not hasattr(self, "X_fit_"): 246 # Initialize with the first batch of data 247 self.X_fit_ = X 248 249 # Center the response 250 self.y_mean_ = np.mean(y) 251 y_centered = y - self.y_mean_ 252 self.y_fit_ = y_centered 253 254 n_samples = X.shape[0] 255 256 # Compute the kernel matrix for the initial data 257 self.K_ = self._get_kernel(X, X) 258 259 # Initialize dual coefficients for each alpha 260 if isinstance(self.alpha, (list, np.ndarray)): 261 self.dual_coefs_ = [np.zeros(n_samples) for _ in self.alpha] 262 else: 263 self.dual_coef_ = np.zeros(n_samples) 264 else: 265 # Incrementally update with new data 266 y_centered = y - self.y_mean_ # Center the new batch of responses 267 for x_new, y_new in zip(X, y_centered): 268 x_new = x_new.reshape(1, -1) # Ensure x_new is 2D 269 k_new = self._get_kernel(self.X_fit_, x_new).flatten() 270 271 # Compute the kernel value for the new data point 272 k_self = self._get_kernel(x_new, x_new).item() 273 274 if isinstance(self.alpha, (list, np.ndarray)): 275 # Update dual coefficients for each alpha 276 for idx, alpha in enumerate(self.alpha): 277 gamma_new = 1 / (k_self + alpha) 278 residual = y_new - np.dot(self.dual_coefs_[idx], k_new) 279 self.dual_coefs_[idx] = np.append( 280 self.dual_coefs_[idx], gamma_new * residual 281 ) 282 else: 283 # Update dual coefficients for a single alpha 284 gamma_new = 1 / (k_self + self.alpha) 285 residual = y_new - np.dot(self.dual_coef_, k_new) 286 self.dual_coef_ = np.append(self.dual_coef_, gamma_new * residual) 287 288 # Update the kernel matrix 289 self.K_ = np.block( 290 [[self.K_, k_new[:, None]], [k_new[None, :], np.array([[k_self]])]] 291 ) 292 293 # Update the stored data 294 self.X_fit_ = np.vstack([self.X_fit_, x_new]) 295 self.y_fit_ = np.append(self.y_fit_, y_new) 296 297 # Select the best alpha based on LOOE after the batch 298 if isinstance(self.alpha, (list, np.ndarray)): 299 self.looe_ = [] 300 for idx, alpha in enumerate(self.alpha): 301 G = self.K_ + alpha * np.eye(self.K_.shape[0]) 302 G_inv = np.linalg.inv(G) 303 diag_G_inv = np.diag(G_inv) 304 looe = np.sum((self.dual_coefs_[idx] / diag_G_inv) ** 2) 305 self.looe_.append(looe) 306 307 # Select the best alpha 308 best_index = np.argmin(self.looe_) 309 self.alpha_ = self.alpha[best_index] 310 self.dual_coef_ = self.dual_coefs_[best_index] 311 312 return self
Kernel Ridge Regression with optional GPU support, Matérn kernels, and automatic input standardization.
Parameters:
- alpha: float Regularization parameter.
- kernel: str Kernel type ("linear", "rbf", or "matern").
- gamma: float Kernel coefficient for "rbf". Ignored for other kernels.
- nu: float Smoothness parameter for the Matérn kernel. Default is 1.5.
- length_scale: float Length scale parameter for the Matérn kernel. Default is 1.0.
- backend: str "cpu" or "gpu" (uses JAX if "gpu").
132 def fit(self, X, y): 133 """ 134 Fit the Kernel Ridge Regression model. 135 136 Parameters: 137 - X: array-like, shape (n_samples, n_features) 138 Training data. 139 - y: array-like, shape (n_samples,) 140 Target values. 141 """ 142 # Standardize the inputs 143 X = self.scaler.fit_transform(X) 144 self.X_fit_ = X 145 146 # Center the response 147 self.y_mean_ = np.mean(y) 148 y_centered = y - self.y_mean_ 149 150 n_samples = X.shape[0] 151 152 # Compute the kernel matrix 153 K = self._get_kernel(X, X) 154 self.K_ = K 155 self.y_fit_ = y_centered 156 157 if isinstance(self.alpha, (list, np.ndarray)): 158 # If alpha is a list or array, compute LOOE for each alpha 159 self.alphas_ = self.alpha # Store the list of alphas 160 self.dual_coefs_ = [] # Store dual coefficients for each alpha 161 self.looe_ = [] # Store LOOE for each alpha 162 163 for alpha in self.alpha: 164 G = K + alpha * np.eye(n_samples) 165 G_inv = np.linalg.inv(G) 166 diag_G_inv = np.diag(G_inv) 167 dual_coef = np.linalg.solve(G, y_centered) 168 looe = np.sum((dual_coef / diag_G_inv) ** 2) # Compute LOOE 169 self.dual_coefs_.append(dual_coef) 170 self.looe_.append(looe) 171 172 # Select the best alpha based on the smallest LOOE 173 best_index = np.argmin(self.looe_) 174 self.alpha_ = self.alpha[best_index] 175 self.dual_coef_ = self.dual_coefs_[best_index] 176 else: 177 # If alpha is a single value, proceed as usual 178 if self.backend == "gpu": 179 self.dual_coef_ = jnp.linalg.solve( 180 K + self.alpha * jnp.eye(n_samples), y_centered 181 ) 182 else: 183 self.dual_coef_ = np.linalg.solve( 184 K + self.alpha * np.eye(n_samples), y_centered 185 ) 186 187 return self
Fit the Kernel Ridge Regression model.
Parameters:
- X: array-like, shape (n_samples, n_features) Training data.
- y: array-like, shape (n_samples,) Target values.
189 def predict(self, X, probs=False): 190 """ 191 Predict using the Kernel Ridge Regression model. 192 193 Parameters: 194 - X: array-like, shape (n_samples, n_features) 195 Test data. 196 197 Returns: 198 - Predicted values, shape (n_samples,). 199 """ 200 # Standardize the inputs 201 X = self.scaler.transform(X) 202 K = self._get_kernel(X, self.X_fit_) 203 if self.backend == "gpu": 204 preds = jnp.dot(K, self.dual_coef_) + self.y_mean_ 205 if probs: 206 # Compute similarity to self.X_fit_ 207 similarities = jnp.dot( 208 preds, self.X_fit_.T 209 ) # Shape: (n_samples, n_fit_) 210 # Apply softmax to get probabilities 211 return jaxsoftmax(similarities, axis=1) 212 return preds 213 else: 214 preds = np.dot(K, self.dual_coef_) + self.y_mean_ 215 if probs: 216 # Compute similarity to self.X_fit_ 217 similarities = np.dot( 218 preds, self.X_fit_.T 219 ) # Shape: (n_samples, n_fit_) 220 # Apply softmax to get probabilities 221 return softmax(similarities, axis=1) 222 return preds
Predict using the Kernel Ridge Regression model.
Parameters:
- X: array-like, shape (n_samples, n_features) Test data.
Returns:
- Predicted values, shape (n_samples,).
757class LazyClassifier(LazyDeepClassifier): 758 """ 759 Fitting -- almost -- all the classification algorithms with 760 nnetsauce's CustomClassifier and returning their scores (no layers). 761 762 Parameters: 763 764 verbose: int, optional (default=0) 765 Any positive number for verbosity. 766 767 ignore_warnings: bool, optional (default=True) 768 When set to True, the warning related to algorigms that are not able to run are ignored. 769 770 custom_metric: function, optional (default=None) 771 When function is provided, models are evaluated based on the custom evaluation metric provided. 772 773 predictions: bool, optional (default=False) 774 When set to True, the predictions of all the models models are returned as dataframe. 775 776 sort_by: string, optional (default='Accuracy') 777 Sort models by a metric. Available options are 'Accuracy', 'Balanced Accuracy', 'ROC AUC', 'F1 Score' 778 or a custom metric identified by its name and provided by custom_metric. 779 780 random_state: int, optional (default=42) 781 Reproducibiility seed. 782 783 estimators: list, optional (default='all') 784 list of Estimators names or just 'all' (default='all') 785 786 preprocess: bool 787 preprocessing is done when set to True 788 789 n_jobs : int, when possible, run in parallel 790 For now, only used by individual models that support it. 791 792 All the other parameters are the same as CustomClassifier's. 793 794 Attributes: 795 796 models_: dict-object 797 Returns a dictionary with each model pipeline as value 798 with key as name of models. 799 800 best_model_: object 801 Returns the best model pipeline based on the sort_by metric. 802 803 Examples: 804 805 import nnetsauce as ns 806 import numpy as np 807 from sklearn import datasets 808 from sklearn.utils import shuffle 809 810 dataset = datasets.load_iris() 811 X = dataset.data 812 y = dataset.target 813 X, y = shuffle(X, y, random_state=123) 814 X = X.astype(np.float32) 815 y = y.astype(np.float32) 816 X_train, X_test = X[:100], X[100:] 817 y_train, y_test = y[:100], y[100:] 818 819 clf = ns.LazyClassifier(verbose=0, ignore_warnings=True, custom_metric=None) 820 models, predictions = clf.fit(X_train, X_test, y_train, y_test) 821 model_dictionary = clf.provide_models(X_train,X_test,y_train,y_test) 822 print(models) 823 824 """ 825 826 def __init__( 827 self, 828 verbose=0, 829 ignore_warnings=True, 830 custom_metric=None, 831 predictions=False, 832 sort_by="Accuracy", 833 random_state=42, 834 estimators="all", 835 preprocess=False, 836 n_jobs=None, 837 # CustomClassifier attributes 838 obj=None, 839 n_hidden_features=5, 840 activation_name="relu", 841 a=0.01, 842 nodes_sim="sobol", 843 bias=True, 844 dropout=0, 845 direct_link=True, 846 n_clusters=2, 847 cluster_encode=True, 848 type_clust="kmeans", 849 type_scaling=("std", "std", "std"), 850 col_sample=1, 851 row_sample=1, 852 seed=123, 853 backend="cpu", 854 ): 855 super().__init__( 856 verbose=verbose, 857 ignore_warnings=ignore_warnings, 858 custom_metric=custom_metric, 859 predictions=predictions, 860 sort_by=sort_by, 861 random_state=random_state, 862 estimators=estimators, 863 preprocess=preprocess, 864 n_jobs=n_jobs, 865 n_layers=1, 866 obj=obj, 867 n_hidden_features=n_hidden_features, 868 activation_name=activation_name, 869 a=a, 870 nodes_sim=nodes_sim, 871 bias=bias, 872 dropout=dropout, 873 direct_link=direct_link, 874 n_clusters=n_clusters, 875 cluster_encode=cluster_encode, 876 type_clust=type_clust, 877 type_scaling=type_scaling, 878 col_sample=col_sample, 879 row_sample=row_sample, 880 seed=seed, 881 backend=backend, 882 )
Fitting -- almost -- all the classification algorithms with nnetsauce's CustomClassifier and returning their scores (no layers).
Parameters:
verbose: int, optional (default=0)
Any positive number for verbosity.
ignore_warnings: bool, optional (default=True)
When set to True, the warning related to algorigms that are not able to run are ignored.
custom_metric: function, optional (default=None)
When function is provided, models are evaluated based on the custom evaluation metric provided.
predictions: bool, optional (default=False)
When set to True, the predictions of all the models models are returned as dataframe.
sort_by: string, optional (default='Accuracy')
Sort models by a metric. Available options are 'Accuracy', 'Balanced Accuracy', 'ROC AUC', 'F1 Score'
or a custom metric identified by its name and provided by custom_metric.
random_state: int, optional (default=42)
Reproducibiility seed.
estimators: list, optional (default='all')
list of Estimators names or just 'all' (default='all')
preprocess: bool
preprocessing is done when set to True
n_jobs : int, when possible, run in parallel
For now, only used by individual models that support it.
All the other parameters are the same as CustomClassifier's.
Attributes:
models_: dict-object
Returns a dictionary with each model pipeline as value
with key as name of models.
best_model_: object
Returns the best model pipeline based on the sort_by metric.
Examples:
import nnetsauce as ns
import numpy as np
from sklearn import datasets
from sklearn.utils import shuffle
dataset = datasets.load_iris()
X = dataset.data
y = dataset.target
X, y = shuffle(X, y, random_state=123)
X = X.astype(np.float32)
y = y.astype(np.float32)
X_train, X_test = X[:100], X[100:]
y_train, y_test = y[:100], y[100:]
clf = ns.LazyClassifier(verbose=0, ignore_warnings=True, custom_metric=None)
models, predictions = clf.fit(X_train, X_test, y_train, y_test)
model_dictionary = clf.provide_models(X_train,X_test,y_train,y_test)
print(models)
646class LazyRegressor(LazyDeepRegressor): 647 """ 648 Fitting -- almost -- all the regression algorithms with 649 nnetsauce's CustomRegressor and returning their scores. 650 651 Parameters: 652 653 verbose: int, optional (default=0) 654 Any positive number for verbosity. 655 656 ignore_warnings: bool, optional (default=True) 657 When set to True, the warning related to algorigms that are not able to run are ignored. 658 659 custom_metric: function, optional (default=None) 660 When function is provided, models are evaluated based on the custom evaluation metric provided. 661 662 predictions: bool, optional (default=False) 663 When set to True, the predictions of all the models models are returned as dataframe. 664 665 sort_by: string, optional (default='RMSE') 666 Sort models by a metric. Available options are 'R-Squared', 'Adjusted R-Squared', 'RMSE', 'Time Taken' and 'Custom Metric'. 667 or a custom metric identified by its name and provided by custom_metric. 668 669 random_state: int, optional (default=42) 670 Reproducibiility seed. 671 672 estimators: list, optional (default='all') 673 list of Estimators names or just 'all' (default='all') 674 675 preprocess: bool 676 preprocessing is done when set to True 677 678 n_jobs : int, when possible, run in parallel 679 For now, only used by individual models that support it. 680 681 All the other parameters are the same as CustomRegressor's. 682 683 Attributes: 684 685 models_: dict-object 686 Returns a dictionary with each model pipeline as value 687 with key as name of models. 688 689 best_model_: object 690 Returns the best model pipeline based on the sort_by metric. 691 692 Examples: 693 694 import nnetsauce as ns 695 import numpy as np 696 from sklearn import datasets 697 from sklearn.utils import shuffle 698 699 diabetes = datasets.load_diabetes() 700 X, y = shuffle(diabetes.data, diabetes.target, random_state=13) 701 X = X.astype(np.float32) 702 703 offset = int(X.shape[0] * 0.9) 704 X_train, y_train = X[:offset], y[:offset] 705 X_test, y_test = X[offset:], y[offset:] 706 707 reg = ns.LazyRegressor(verbose=0, ignore_warnings=False, 708 custom_metric=None) 709 models, predictions = reg.fit(X_train, X_test, y_train, y_test) 710 print(models) 711 712 """ 713 714 def __init__( 715 self, 716 verbose=0, 717 ignore_warnings=True, 718 custom_metric=None, 719 predictions=False, 720 sort_by="RMSE", 721 random_state=42, 722 estimators="all", 723 preprocess=False, 724 n_jobs=None, 725 # CustomRegressor attributes 726 obj=None, 727 n_hidden_features=5, 728 activation_name="relu", 729 a=0.01, 730 nodes_sim="sobol", 731 bias=True, 732 dropout=0, 733 direct_link=True, 734 n_clusters=2, 735 cluster_encode=True, 736 type_clust="kmeans", 737 type_scaling=("std", "std", "std"), 738 col_sample=1, 739 row_sample=1, 740 seed=123, 741 backend="cpu", 742 ): 743 super().__init__( 744 verbose=verbose, 745 ignore_warnings=ignore_warnings, 746 custom_metric=custom_metric, 747 predictions=predictions, 748 sort_by=sort_by, 749 random_state=random_state, 750 estimators=estimators, 751 preprocess=preprocess, 752 n_jobs=n_jobs, 753 n_layers=1, 754 obj=obj, 755 n_hidden_features=n_hidden_features, 756 activation_name=activation_name, 757 a=a, 758 nodes_sim=nodes_sim, 759 bias=bias, 760 dropout=dropout, 761 direct_link=direct_link, 762 n_clusters=n_clusters, 763 cluster_encode=cluster_encode, 764 type_clust=type_clust, 765 type_scaling=type_scaling, 766 col_sample=col_sample, 767 row_sample=row_sample, 768 seed=seed, 769 backend=backend, 770 )
Fitting -- almost -- all the regression algorithms with nnetsauce's CustomRegressor and returning their scores.
Parameters:
verbose: int, optional (default=0)
Any positive number for verbosity.
ignore_warnings: bool, optional (default=True)
When set to True, the warning related to algorigms that are not able to run are ignored.
custom_metric: function, optional (default=None)
When function is provided, models are evaluated based on the custom evaluation metric provided.
predictions: bool, optional (default=False)
When set to True, the predictions of all the models models are returned as dataframe.
sort_by: string, optional (default='RMSE')
Sort models by a metric. Available options are 'R-Squared', 'Adjusted R-Squared', 'RMSE', 'Time Taken' and 'Custom Metric'.
or a custom metric identified by its name and provided by custom_metric.
random_state: int, optional (default=42)
Reproducibiility seed.
estimators: list, optional (default='all')
list of Estimators names or just 'all' (default='all')
preprocess: bool
preprocessing is done when set to True
n_jobs : int, when possible, run in parallel
For now, only used by individual models that support it.
All the other parameters are the same as CustomRegressor's.
Attributes:
models_: dict-object
Returns a dictionary with each model pipeline as value
with key as name of models.
best_model_: object
Returns the best model pipeline based on the sort_by metric.
Examples:
import nnetsauce as ns
import numpy as np
from sklearn import datasets
from sklearn.utils import shuffle
diabetes = datasets.load_diabetes()
X, y = shuffle(diabetes.data, diabetes.target, random_state=13)
X = X.astype(np.float32)
offset = int(X.shape[0] * 0.9)
X_train, y_train = X[:offset], y[:offset]
X_test, y_test = X[offset:], y[offset:]
reg = ns.LazyRegressor(verbose=0, ignore_warnings=False,
custom_metric=None)
models, predictions = reg.fit(X_train, X_test, y_train, y_test)
print(models)
94class LazyDeepClassifier(Custom, ClassifierMixin): 95 """ 96 97 Fitting -- almost -- all the classification algorithms with layers of 98 nnetsauce's CustomClassifier and returning their scores. 99 100 Parameters: 101 102 verbose: int, optional (default=0) 103 Any positive number for verbosity. 104 105 ignore_warnings: bool, optional (default=True) 106 When set to True, the warning related to algorigms that are not 107 able to run are ignored. 108 109 custom_metric: function, optional (default=None) 110 When function is provided, models are evaluated based on the custom 111 evaluation metric provided. 112 113 predictions: bool, optional (default=False) 114 When set to True, the predictions of all the models models are 115 returned as data frame. 116 117 sort_by: string, optional (default='Accuracy') 118 Sort models by a metric. Available options are 'Accuracy', 119 'Balanced Accuracy', 'ROC AUC', 'F1 Score' or a custom metric 120 identified by its name and provided by custom_metric. 121 122 random_state: int, optional (default=42) 123 Reproducibiility seed. 124 125 estimators: list, optional (default='all') 126 list of Estimators names or just 'all' for > 90 classifiers 127 (default='all') 128 129 preprocess: bool, preprocessing is done when set to True 130 131 n_jobs: int, when possible, run in parallel 132 For now, only used by individual models that support it. 133 134 n_layers: int, optional (default=3) 135 Number of layers of CustomClassifiers to be used. 136 137 All the other parameters are the same as CustomClassifier's. 138 139 Attributes: 140 141 models_: dict-object 142 Returns a dictionary with each model pipeline as value 143 with key as name of models. 144 145 best_model_: object 146 Returns the best model pipeline. 147 148 Examples 149 150 ```python 151 import nnetsauce as ns 152 from sklearn.datasets import load_breast_cancer 153 from sklearn.model_selection import train_test_split 154 data = load_breast_cancer() 155 X = data.data 156 y= data.target 157 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, 158 random_state=123) 159 clf = ns.LazyDeepClassifier(verbose=0, ignore_warnings=True, custom_metric=None) 160 models, predictions = clf.fit(X_train, X_test, y_train, y_test) 161 model_dictionary = clf.provide_models(X_train,X_test,y_train,y_test) 162 print(models) 163 ``` 164 165 """ 166 167 def __init__( 168 self, 169 verbose=0, 170 ignore_warnings=True, 171 custom_metric=None, 172 predictions=False, 173 sort_by="Accuracy", 174 random_state=42, 175 estimators="all", 176 preprocess=False, 177 n_jobs=None, 178 # Defining depth 179 n_layers=3, 180 # CustomClassifier attributes 181 obj=None, 182 n_hidden_features=5, 183 activation_name="relu", 184 a=0.01, 185 nodes_sim="sobol", 186 bias=True, 187 dropout=0, 188 direct_link=True, 189 n_clusters=2, 190 cluster_encode=True, 191 type_clust="kmeans", 192 type_scaling=("std", "std", "std"), 193 col_sample=1, 194 row_sample=1, 195 seed=123, 196 backend="cpu", 197 ): 198 self.verbose = verbose 199 self.ignore_warnings = ignore_warnings 200 self.custom_metric = custom_metric 201 self.predictions = predictions 202 self.sort_by = sort_by 203 self.models_ = {} 204 self.best_model_ = None 205 self.random_state = random_state 206 self.estimators = estimators 207 self.preprocess = preprocess 208 self.n_layers = n_layers - 1 209 self.n_jobs = n_jobs 210 super().__init__( 211 obj=obj, 212 n_hidden_features=n_hidden_features, 213 activation_name=activation_name, 214 a=a, 215 nodes_sim=nodes_sim, 216 bias=bias, 217 dropout=dropout, 218 direct_link=direct_link, 219 n_clusters=n_clusters, 220 cluster_encode=cluster_encode, 221 type_clust=type_clust, 222 type_scaling=type_scaling, 223 col_sample=col_sample, 224 row_sample=row_sample, 225 seed=seed, 226 backend=backend, 227 ) 228 229 def fit(self, X_train, X_test, y_train, y_test): 230 """Fit classifiers to X_train and y_train, predict and score on X_test, 231 y_test. 232 233 Parameters: 234 235 X_train: array-like, 236 Training vectors, where rows is the number of samples 237 and columns is the number of features. 238 239 X_test: array-like, 240 Testing vectors, where rows is the number of samples 241 and columns is the number of features. 242 243 y_train: array-like, 244 Training vectors, where rows is the number of samples 245 and columns is the number of features. 246 247 y_test: array-like, 248 Testing vectors, where rows is the number of samples 249 and columns is the number of features. 250 251 Returns: 252 253 scores: Pandas DataFrame 254 Returns metrics of all the models in a Pandas DataFrame. 255 256 predictions: Pandas DataFrame 257 Returns predictions of all the models in a Pandas DataFrame. 258 """ 259 Accuracy = [] 260 B_Accuracy = [] 261 ROC_AUC = [] 262 F1 = [] 263 names = [] 264 TIME = [] 265 predictions = {} 266 267 if self.custom_metric is not None: 268 CUSTOM_METRIC = [] 269 270 if isinstance(X_train, np.ndarray): 271 X_train = pd.DataFrame(X_train) 272 X_test = pd.DataFrame(X_test) 273 274 numeric_features = X_train.select_dtypes(include=[np.number]).columns 275 categorical_features = X_train.select_dtypes(include=["object"]).columns 276 277 categorical_low, categorical_high = get_card_split( 278 X_train, categorical_features 279 ) 280 281 if self.preprocess is True: 282 preprocessor = ColumnTransformer( 283 transformers=[ 284 ("numeric", numeric_transformer, numeric_features), 285 ( 286 "categorical_low", 287 categorical_transformer_low, 288 categorical_low, 289 ), 290 ( 291 "categorical_high", 292 categorical_transformer_high, 293 categorical_high, 294 ), 295 ] 296 ) 297 298 # baseline models 299 try: 300 baseline_names = ["RandomForestClassifier", "XGBClassifier"] 301 baseline_models = [RandomForestClassifier(), xgb.XGBClassifier()] 302 except Exception as exception: 303 baseline_names = ["RandomForestClassifier"] 304 baseline_models = [RandomForestClassifier()] 305 306 for name, model in zip(baseline_names, baseline_models): 307 start = time.time() 308 try: 309 model.fit(X_train, y_train) 310 self.models_[name] = model 311 y_pred = model.predict(X_test) 312 accuracy = accuracy_score(y_test, y_pred, normalize=True) 313 b_accuracy = balanced_accuracy_score(y_test, y_pred) 314 f1 = f1_score(y_test, y_pred, average="weighted") 315 try: 316 roc_auc = roc_auc_score(y_test, y_pred) 317 except Exception as exception: 318 roc_auc = None 319 if self.ignore_warnings is False: 320 print("ROC AUC couldn't be calculated for " + name) 321 print(exception) 322 names.append(name) 323 Accuracy.append(accuracy) 324 B_Accuracy.append(b_accuracy) 325 ROC_AUC.append(roc_auc) 326 F1.append(f1) 327 TIME.append(time.time() - start) 328 if self.custom_metric is not None: 329 custom_metric = self.custom_metric(y_test, y_pred) 330 CUSTOM_METRIC.append(custom_metric) 331 if self.verbose > 0: 332 if self.custom_metric is not None: 333 print( 334 { 335 "Model": name, 336 "Accuracy": accuracy, 337 "Balanced Accuracy": b_accuracy, 338 "ROC AUC": roc_auc, 339 "F1 Score": f1, 340 self.custom_metric.__name__: custom_metric, 341 "Time taken": time.time() - start, 342 } 343 ) 344 else: 345 print( 346 { 347 "Model": name, 348 "Accuracy": accuracy, 349 "Balanced Accuracy": b_accuracy, 350 "ROC AUC": roc_auc, 351 "F1 Score": f1, 352 "Time taken": time.time() - start, 353 } 354 ) 355 if self.predictions: 356 predictions[name] = y_pred 357 except Exception as exception: 358 if self.ignore_warnings is False: 359 print(name + " model failed to execute") 360 print(exception) 361 362 if self.estimators == "all": 363 self.classifiers = [ 364 item 365 for sublist in [ 366 DEEPCLASSIFIERS, 367 DEEPMULTITASKCLASSIFIERS, 368 DEEPSIMPLEMULTITASKCLASSIFIERS, 369 ] 370 for item in sublist 371 ] 372 else: 373 self.classifiers = ( 374 [ 375 ("DeepCustomClassifier(" + est[0] + ")", est[1]) 376 for est in all_estimators() 377 if ( 378 issubclass(est[1], ClassifierMixin) 379 and (est[0] in self.estimators) 380 ) 381 ] 382 + [ 383 ( 384 "DeepMultitaskClassifier(" + est[0] + ")", 385 partial(MultitaskClassifier, obj=est[1]()), 386 ) 387 for est in all_estimators() 388 if ( 389 issubclass(est[1], RegressorMixin) 390 and (est[0] in self.estimators) 391 ) 392 ] 393 + [ 394 ( 395 "DeepSimpleMultitaskClassifier(" + est[0] + ")", 396 partial(SimpleMultitaskClassifier, obj=est[1]()), 397 ) 398 for est in all_estimators() 399 if ( 400 issubclass(est[1], RegressorMixin) 401 and (est[0] in self.estimators) 402 ) 403 ] 404 ) 405 406 if self.preprocess is True: 407 408 for name, model in tqdm(self.classifiers): # do parallel exec 409 410 other_args = {} # use this trick for `random_state` too --> refactor 411 try: 412 if ( 413 "n_jobs" in model().get_params().keys() 414 and name.find("LogisticRegression") == -1 415 ): 416 other_args["n_jobs"] = self.n_jobs 417 except Exception: 418 pass 419 420 start = time.time() 421 422 try: 423 if "random_state" in model().get_params().keys(): 424 layer_clf = CustomClassifier( 425 obj=model(random_state=self.random_state), 426 n_hidden_features=self.n_hidden_features, 427 activation_name=self.activation_name, 428 a=self.a, 429 nodes_sim=self.nodes_sim, 430 bias=self.bias, 431 dropout=self.dropout, 432 direct_link=self.direct_link, 433 n_clusters=self.n_clusters, 434 cluster_encode=self.cluster_encode, 435 type_clust=self.type_clust, 436 type_scaling=self.type_scaling, 437 col_sample=self.col_sample, 438 row_sample=self.row_sample, 439 seed=self.seed, 440 backend=self.backend, 441 cv_calibration=None, 442 ) 443 444 else: 445 layer_clf = CustomClassifier( 446 obj=model(), 447 n_hidden_features=self.n_hidden_features, 448 activation_name=self.activation_name, 449 a=self.a, 450 nodes_sim=self.nodes_sim, 451 bias=self.bias, 452 dropout=self.dropout, 453 direct_link=self.direct_link, 454 n_clusters=self.n_clusters, 455 cluster_encode=self.cluster_encode, 456 type_clust=self.type_clust, 457 type_scaling=self.type_scaling, 458 col_sample=self.col_sample, 459 row_sample=self.row_sample, 460 seed=self.seed, 461 backend=self.backend, 462 cv_calibration=None, 463 ) 464 465 layer_clf.fit(X_train, y_train) 466 467 for _ in range(self.n_layers): 468 layer_clf = deepcopy( 469 CustomClassifier( 470 obj=layer_clf, 471 n_hidden_features=self.n_hidden_features, 472 activation_name=self.activation_name, 473 a=self.a, 474 nodes_sim=self.nodes_sim, 475 bias=self.bias, 476 dropout=self.dropout, 477 direct_link=self.direct_link, 478 n_clusters=self.n_clusters, 479 cluster_encode=self.cluster_encode, 480 type_clust=self.type_clust, 481 type_scaling=self.type_scaling, 482 col_sample=self.col_sample, 483 row_sample=self.row_sample, 484 seed=self.seed, 485 backend=self.backend, 486 cv_calibration=None, 487 ) 488 ) 489 490 pipe = Pipeline( 491 [ 492 ("preprocessor", preprocessor), 493 ("classifier", layer_clf), 494 ] 495 ) 496 497 pipe.fit(X_train, y_train) 498 self.models_[name] = pipe 499 y_pred = pipe.predict(X_test) 500 accuracy = accuracy_score(y_test, y_pred, normalize=True) 501 b_accuracy = balanced_accuracy_score(y_test, y_pred) 502 f1 = f1_score(y_test, y_pred, average="weighted") 503 try: 504 roc_auc = roc_auc_score(y_test, y_pred) 505 except Exception as exception: 506 roc_auc = None 507 if self.ignore_warnings is False: 508 print("ROC AUC couldn't be calculated for " + name) 509 print(exception) 510 names.append(name) 511 Accuracy.append(accuracy) 512 B_Accuracy.append(b_accuracy) 513 ROC_AUC.append(roc_auc) 514 F1.append(f1) 515 TIME.append(time.time() - start) 516 if self.custom_metric is not None: 517 custom_metric = self.custom_metric(y_test, y_pred) 518 CUSTOM_METRIC.append(custom_metric) 519 if self.verbose > 0: 520 if self.custom_metric is not None: 521 print( 522 { 523 "Model": name, 524 "Accuracy": accuracy, 525 "Balanced Accuracy": b_accuracy, 526 "ROC AUC": roc_auc, 527 "F1 Score": f1, 528 self.custom_metric.__name__: custom_metric, 529 "Time taken": time.time() - start, 530 } 531 ) 532 else: 533 print( 534 { 535 "Model": name, 536 "Accuracy": accuracy, 537 "Balanced Accuracy": b_accuracy, 538 "ROC AUC": roc_auc, 539 "F1 Score": f1, 540 "Time taken": time.time() - start, 541 } 542 ) 543 if self.predictions: 544 predictions[name] = y_pred 545 except Exception as exception: 546 if self.ignore_warnings is False: 547 print(name + " model failed to execute") 548 print(exception) 549 550 else: # no preprocessing 551 552 for name, model in tqdm(self.classifiers): # do parallel exec 553 start = time.time() 554 try: 555 if "random_state" in model().get_params().keys(): 556 layer_clf = CustomClassifier( 557 obj=model(random_state=self.random_state), 558 n_hidden_features=self.n_hidden_features, 559 activation_name=self.activation_name, 560 a=self.a, 561 nodes_sim=self.nodes_sim, 562 bias=self.bias, 563 dropout=self.dropout, 564 direct_link=self.direct_link, 565 n_clusters=self.n_clusters, 566 cluster_encode=self.cluster_encode, 567 type_clust=self.type_clust, 568 type_scaling=self.type_scaling, 569 col_sample=self.col_sample, 570 row_sample=self.row_sample, 571 seed=self.seed, 572 backend=self.backend, 573 cv_calibration=None, 574 ) 575 576 else: 577 layer_clf = CustomClassifier( 578 obj=model(), 579 n_hidden_features=self.n_hidden_features, 580 activation_name=self.activation_name, 581 a=self.a, 582 nodes_sim=self.nodes_sim, 583 bias=self.bias, 584 dropout=self.dropout, 585 direct_link=self.direct_link, 586 n_clusters=self.n_clusters, 587 cluster_encode=self.cluster_encode, 588 type_clust=self.type_clust, 589 type_scaling=self.type_scaling, 590 col_sample=self.col_sample, 591 row_sample=self.row_sample, 592 seed=self.seed, 593 backend=self.backend, 594 cv_calibration=None, 595 ) 596 597 layer_clf.fit(X_train, y_train) 598 599 for _ in range(self.n_layers): 600 layer_clf = deepcopy( 601 CustomClassifier( 602 obj=layer_clf, 603 n_hidden_features=self.n_hidden_features, 604 activation_name=self.activation_name, 605 a=self.a, 606 nodes_sim=self.nodes_sim, 607 bias=self.bias, 608 dropout=self.dropout, 609 direct_link=self.direct_link, 610 n_clusters=self.n_clusters, 611 cluster_encode=self.cluster_encode, 612 type_clust=self.type_clust, 613 type_scaling=self.type_scaling, 614 col_sample=self.col_sample, 615 row_sample=self.row_sample, 616 seed=self.seed, 617 backend=self.backend, 618 cv_calibration=None, 619 ) 620 ) 621 622 # layer_clf.fit(X_train, y_train) 623 624 layer_clf.fit(X_train, y_train) 625 626 self.models_[name] = layer_clf 627 y_pred = layer_clf.predict(X_test) 628 accuracy = accuracy_score(y_test, y_pred, normalize=True) 629 b_accuracy = balanced_accuracy_score(y_test, y_pred) 630 f1 = f1_score(y_test, y_pred, average="weighted") 631 try: 632 roc_auc = roc_auc_score(y_test, y_pred) 633 except Exception as exception: 634 roc_auc = None 635 if self.ignore_warnings is False: 636 print("ROC AUC couldn't be calculated for " + name) 637 print(exception) 638 names.append(name) 639 Accuracy.append(accuracy) 640 B_Accuracy.append(b_accuracy) 641 ROC_AUC.append(roc_auc) 642 F1.append(f1) 643 TIME.append(time.time() - start) 644 if self.custom_metric is not None: 645 custom_metric = self.custom_metric(y_test, y_pred) 646 CUSTOM_METRIC.append(custom_metric) 647 if self.verbose > 0: 648 if self.custom_metric is not None: 649 print( 650 { 651 "Model": name, 652 "Accuracy": accuracy, 653 "Balanced Accuracy": b_accuracy, 654 "ROC AUC": roc_auc, 655 "F1 Score": f1, 656 self.custom_metric.__name__: custom_metric, 657 "Time taken": time.time() - start, 658 } 659 ) 660 else: 661 print( 662 { 663 "Model": name, 664 "Accuracy": accuracy, 665 "Balanced Accuracy": b_accuracy, 666 "ROC AUC": roc_auc, 667 "F1 Score": f1, 668 "Time taken": time.time() - start, 669 } 670 ) 671 if self.predictions: 672 predictions[name] = y_pred 673 except Exception as exception: 674 if self.ignore_warnings is False: 675 print(name + " model failed to execute") 676 print(exception) 677 678 if self.custom_metric is None: 679 scores = pd.DataFrame( 680 { 681 "Model": names, 682 "Accuracy": Accuracy, 683 "Balanced Accuracy": B_Accuracy, 684 "ROC AUC": ROC_AUC, 685 "F1 Score": F1, 686 "Time Taken": TIME, 687 } 688 ) 689 else: 690 scores = pd.DataFrame( 691 { 692 "Model": names, 693 "Accuracy": Accuracy, 694 "Balanced Accuracy": B_Accuracy, 695 "ROC AUC": ROC_AUC, 696 "F1 Score": F1, 697 "Custom metric": CUSTOM_METRIC, 698 "Time Taken": TIME, 699 } 700 ) 701 scores = scores.sort_values(by=self.sort_by, ascending=False).set_index("Model") 702 703 self.best_model_ = self.models_[scores.index[0]] 704 705 if self.predictions is True: 706 707 return scores, predictions 708 709 return scores 710 711 def get_best_model(self): 712 """ 713 This function returns the best model pipeline based on the sort_by metric. 714 715 Returns: 716 717 best_model: object, 718 Returns the best model pipeline based on the sort_by metric. 719 720 """ 721 return self.best_model_ 722 723 def provide_models(self, X_train, X_test, y_train, y_test): 724 """Returns all the model objects trained. If fit hasn't been called yet, 725 then it's called to return the models. 726 727 Parameters: 728 729 X_train: array-like, 730 Training vectors, where rows is the number of samples 731 and columns is the number of features. 732 733 X_test: array-like, 734 Testing vectors, where rows is the number of samples 735 and columns is the number of features. 736 737 y_train: array-like, 738 Training vectors, where rows is the number of samples 739 and columns is the number of features. 740 741 y_test: array-like, 742 Testing vectors, where rows is the number of samples 743 and columns is the number of features. 744 745 Returns: 746 747 models: dict-object, 748 Returns a dictionary with each model's pipeline as value 749 and key = name of the model. 750 """ 751 if len(self.models_.keys()) == 0: 752 self.fit(X_train, X_test, y_train, y_test) 753 754 return self.models_
Fitting -- almost -- all the classification algorithms with layers of nnetsauce's CustomClassifier and returning their scores.
Parameters:
verbose: int, optional (default=0)
Any positive number for verbosity.
ignore_warnings: bool, optional (default=True)
When set to True, the warning related to algorigms that are not
able to run are ignored.
custom_metric: function, optional (default=None)
When function is provided, models are evaluated based on the custom
evaluation metric provided.
predictions: bool, optional (default=False)
When set to True, the predictions of all the models models are
returned as data frame.
sort_by: string, optional (default='Accuracy')
Sort models by a metric. Available options are 'Accuracy',
'Balanced Accuracy', 'ROC AUC', 'F1 Score' or a custom metric
identified by its name and provided by custom_metric.
random_state: int, optional (default=42)
Reproducibiility seed.
estimators: list, optional (default='all')
list of Estimators names or just 'all' for > 90 classifiers
(default='all')
preprocess: bool, preprocessing is done when set to True
n_jobs: int, when possible, run in parallel
For now, only used by individual models that support it.
n_layers: int, optional (default=3)
Number of layers of CustomClassifiers to be used.
All the other parameters are the same as CustomClassifier's.
Attributes:
models_: dict-object
Returns a dictionary with each model pipeline as value
with key as name of models.
best_model_: object
Returns the best model pipeline.
Examples
import nnetsauce as ns
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
data = load_breast_cancer()
X = data.data
y= data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2,
random_state=123)
clf = ns.LazyDeepClassifier(verbose=0, ignore_warnings=True, custom_metric=None)
models, predictions = clf.fit(X_train, X_test, y_train, y_test)
model_dictionary = clf.provide_models(X_train,X_test,y_train,y_test)
print(models)
229 def fit(self, X_train, X_test, y_train, y_test): 230 """Fit classifiers to X_train and y_train, predict and score on X_test, 231 y_test. 232 233 Parameters: 234 235 X_train: array-like, 236 Training vectors, where rows is the number of samples 237 and columns is the number of features. 238 239 X_test: array-like, 240 Testing vectors, where rows is the number of samples 241 and columns is the number of features. 242 243 y_train: array-like, 244 Training vectors, where rows is the number of samples 245 and columns is the number of features. 246 247 y_test: array-like, 248 Testing vectors, where rows is the number of samples 249 and columns is the number of features. 250 251 Returns: 252 253 scores: Pandas DataFrame 254 Returns metrics of all the models in a Pandas DataFrame. 255 256 predictions: Pandas DataFrame 257 Returns predictions of all the models in a Pandas DataFrame. 258 """ 259 Accuracy = [] 260 B_Accuracy = [] 261 ROC_AUC = [] 262 F1 = [] 263 names = [] 264 TIME = [] 265 predictions = {} 266 267 if self.custom_metric is not None: 268 CUSTOM_METRIC = [] 269 270 if isinstance(X_train, np.ndarray): 271 X_train = pd.DataFrame(X_train) 272 X_test = pd.DataFrame(X_test) 273 274 numeric_features = X_train.select_dtypes(include=[np.number]).columns 275 categorical_features = X_train.select_dtypes(include=["object"]).columns 276 277 categorical_low, categorical_high = get_card_split( 278 X_train, categorical_features 279 ) 280 281 if self.preprocess is True: 282 preprocessor = ColumnTransformer( 283 transformers=[ 284 ("numeric", numeric_transformer, numeric_features), 285 ( 286 "categorical_low", 287 categorical_transformer_low, 288 categorical_low, 289 ), 290 ( 291 "categorical_high", 292 categorical_transformer_high, 293 categorical_high, 294 ), 295 ] 296 ) 297 298 # baseline models 299 try: 300 baseline_names = ["RandomForestClassifier", "XGBClassifier"] 301 baseline_models = [RandomForestClassifier(), xgb.XGBClassifier()] 302 except Exception as exception: 303 baseline_names = ["RandomForestClassifier"] 304 baseline_models = [RandomForestClassifier()] 305 306 for name, model in zip(baseline_names, baseline_models): 307 start = time.time() 308 try: 309 model.fit(X_train, y_train) 310 self.models_[name] = model 311 y_pred = model.predict(X_test) 312 accuracy = accuracy_score(y_test, y_pred, normalize=True) 313 b_accuracy = balanced_accuracy_score(y_test, y_pred) 314 f1 = f1_score(y_test, y_pred, average="weighted") 315 try: 316 roc_auc = roc_auc_score(y_test, y_pred) 317 except Exception as exception: 318 roc_auc = None 319 if self.ignore_warnings is False: 320 print("ROC AUC couldn't be calculated for " + name) 321 print(exception) 322 names.append(name) 323 Accuracy.append(accuracy) 324 B_Accuracy.append(b_accuracy) 325 ROC_AUC.append(roc_auc) 326 F1.append(f1) 327 TIME.append(time.time() - start) 328 if self.custom_metric is not None: 329 custom_metric = self.custom_metric(y_test, y_pred) 330 CUSTOM_METRIC.append(custom_metric) 331 if self.verbose > 0: 332 if self.custom_metric is not None: 333 print( 334 { 335 "Model": name, 336 "Accuracy": accuracy, 337 "Balanced Accuracy": b_accuracy, 338 "ROC AUC": roc_auc, 339 "F1 Score": f1, 340 self.custom_metric.__name__: custom_metric, 341 "Time taken": time.time() - start, 342 } 343 ) 344 else: 345 print( 346 { 347 "Model": name, 348 "Accuracy": accuracy, 349 "Balanced Accuracy": b_accuracy, 350 "ROC AUC": roc_auc, 351 "F1 Score": f1, 352 "Time taken": time.time() - start, 353 } 354 ) 355 if self.predictions: 356 predictions[name] = y_pred 357 except Exception as exception: 358 if self.ignore_warnings is False: 359 print(name + " model failed to execute") 360 print(exception) 361 362 if self.estimators == "all": 363 self.classifiers = [ 364 item 365 for sublist in [ 366 DEEPCLASSIFIERS, 367 DEEPMULTITASKCLASSIFIERS, 368 DEEPSIMPLEMULTITASKCLASSIFIERS, 369 ] 370 for item in sublist 371 ] 372 else: 373 self.classifiers = ( 374 [ 375 ("DeepCustomClassifier(" + est[0] + ")", est[1]) 376 for est in all_estimators() 377 if ( 378 issubclass(est[1], ClassifierMixin) 379 and (est[0] in self.estimators) 380 ) 381 ] 382 + [ 383 ( 384 "DeepMultitaskClassifier(" + est[0] + ")", 385 partial(MultitaskClassifier, obj=est[1]()), 386 ) 387 for est in all_estimators() 388 if ( 389 issubclass(est[1], RegressorMixin) 390 and (est[0] in self.estimators) 391 ) 392 ] 393 + [ 394 ( 395 "DeepSimpleMultitaskClassifier(" + est[0] + ")", 396 partial(SimpleMultitaskClassifier, obj=est[1]()), 397 ) 398 for est in all_estimators() 399 if ( 400 issubclass(est[1], RegressorMixin) 401 and (est[0] in self.estimators) 402 ) 403 ] 404 ) 405 406 if self.preprocess is True: 407 408 for name, model in tqdm(self.classifiers): # do parallel exec 409 410 other_args = {} # use this trick for `random_state` too --> refactor 411 try: 412 if ( 413 "n_jobs" in model().get_params().keys() 414 and name.find("LogisticRegression") == -1 415 ): 416 other_args["n_jobs"] = self.n_jobs 417 except Exception: 418 pass 419 420 start = time.time() 421 422 try: 423 if "random_state" in model().get_params().keys(): 424 layer_clf = CustomClassifier( 425 obj=model(random_state=self.random_state), 426 n_hidden_features=self.n_hidden_features, 427 activation_name=self.activation_name, 428 a=self.a, 429 nodes_sim=self.nodes_sim, 430 bias=self.bias, 431 dropout=self.dropout, 432 direct_link=self.direct_link, 433 n_clusters=self.n_clusters, 434 cluster_encode=self.cluster_encode, 435 type_clust=self.type_clust, 436 type_scaling=self.type_scaling, 437 col_sample=self.col_sample, 438 row_sample=self.row_sample, 439 seed=self.seed, 440 backend=self.backend, 441 cv_calibration=None, 442 ) 443 444 else: 445 layer_clf = CustomClassifier( 446 obj=model(), 447 n_hidden_features=self.n_hidden_features, 448 activation_name=self.activation_name, 449 a=self.a, 450 nodes_sim=self.nodes_sim, 451 bias=self.bias, 452 dropout=self.dropout, 453 direct_link=self.direct_link, 454 n_clusters=self.n_clusters, 455 cluster_encode=self.cluster_encode, 456 type_clust=self.type_clust, 457 type_scaling=self.type_scaling, 458 col_sample=self.col_sample, 459 row_sample=self.row_sample, 460 seed=self.seed, 461 backend=self.backend, 462 cv_calibration=None, 463 ) 464 465 layer_clf.fit(X_train, y_train) 466 467 for _ in range(self.n_layers): 468 layer_clf = deepcopy( 469 CustomClassifier( 470 obj=layer_clf, 471 n_hidden_features=self.n_hidden_features, 472 activation_name=self.activation_name, 473 a=self.a, 474 nodes_sim=self.nodes_sim, 475 bias=self.bias, 476 dropout=self.dropout, 477 direct_link=self.direct_link, 478 n_clusters=self.n_clusters, 479 cluster_encode=self.cluster_encode, 480 type_clust=self.type_clust, 481 type_scaling=self.type_scaling, 482 col_sample=self.col_sample, 483 row_sample=self.row_sample, 484 seed=self.seed, 485 backend=self.backend, 486 cv_calibration=None, 487 ) 488 ) 489 490 pipe = Pipeline( 491 [ 492 ("preprocessor", preprocessor), 493 ("classifier", layer_clf), 494 ] 495 ) 496 497 pipe.fit(X_train, y_train) 498 self.models_[name] = pipe 499 y_pred = pipe.predict(X_test) 500 accuracy = accuracy_score(y_test, y_pred, normalize=True) 501 b_accuracy = balanced_accuracy_score(y_test, y_pred) 502 f1 = f1_score(y_test, y_pred, average="weighted") 503 try: 504 roc_auc = roc_auc_score(y_test, y_pred) 505 except Exception as exception: 506 roc_auc = None 507 if self.ignore_warnings is False: 508 print("ROC AUC couldn't be calculated for " + name) 509 print(exception) 510 names.append(name) 511 Accuracy.append(accuracy) 512 B_Accuracy.append(b_accuracy) 513 ROC_AUC.append(roc_auc) 514 F1.append(f1) 515 TIME.append(time.time() - start) 516 if self.custom_metric is not None: 517 custom_metric = self.custom_metric(y_test, y_pred) 518 CUSTOM_METRIC.append(custom_metric) 519 if self.verbose > 0: 520 if self.custom_metric is not None: 521 print( 522 { 523 "Model": name, 524 "Accuracy": accuracy, 525 "Balanced Accuracy": b_accuracy, 526 "ROC AUC": roc_auc, 527 "F1 Score": f1, 528 self.custom_metric.__name__: custom_metric, 529 "Time taken": time.time() - start, 530 } 531 ) 532 else: 533 print( 534 { 535 "Model": name, 536 "Accuracy": accuracy, 537 "Balanced Accuracy": b_accuracy, 538 "ROC AUC": roc_auc, 539 "F1 Score": f1, 540 "Time taken": time.time() - start, 541 } 542 ) 543 if self.predictions: 544 predictions[name] = y_pred 545 except Exception as exception: 546 if self.ignore_warnings is False: 547 print(name + " model failed to execute") 548 print(exception) 549 550 else: # no preprocessing 551 552 for name, model in tqdm(self.classifiers): # do parallel exec 553 start = time.time() 554 try: 555 if "random_state" in model().get_params().keys(): 556 layer_clf = CustomClassifier( 557 obj=model(random_state=self.random_state), 558 n_hidden_features=self.n_hidden_features, 559 activation_name=self.activation_name, 560 a=self.a, 561 nodes_sim=self.nodes_sim, 562 bias=self.bias, 563 dropout=self.dropout, 564 direct_link=self.direct_link, 565 n_clusters=self.n_clusters, 566 cluster_encode=self.cluster_encode, 567 type_clust=self.type_clust, 568 type_scaling=self.type_scaling, 569 col_sample=self.col_sample, 570 row_sample=self.row_sample, 571 seed=self.seed, 572 backend=self.backend, 573 cv_calibration=None, 574 ) 575 576 else: 577 layer_clf = CustomClassifier( 578 obj=model(), 579 n_hidden_features=self.n_hidden_features, 580 activation_name=self.activation_name, 581 a=self.a, 582 nodes_sim=self.nodes_sim, 583 bias=self.bias, 584 dropout=self.dropout, 585 direct_link=self.direct_link, 586 n_clusters=self.n_clusters, 587 cluster_encode=self.cluster_encode, 588 type_clust=self.type_clust, 589 type_scaling=self.type_scaling, 590 col_sample=self.col_sample, 591 row_sample=self.row_sample, 592 seed=self.seed, 593 backend=self.backend, 594 cv_calibration=None, 595 ) 596 597 layer_clf.fit(X_train, y_train) 598 599 for _ in range(self.n_layers): 600 layer_clf = deepcopy( 601 CustomClassifier( 602 obj=layer_clf, 603 n_hidden_features=self.n_hidden_features, 604 activation_name=self.activation_name, 605 a=self.a, 606 nodes_sim=self.nodes_sim, 607 bias=self.bias, 608 dropout=self.dropout, 609 direct_link=self.direct_link, 610 n_clusters=self.n_clusters, 611 cluster_encode=self.cluster_encode, 612 type_clust=self.type_clust, 613 type_scaling=self.type_scaling, 614 col_sample=self.col_sample, 615 row_sample=self.row_sample, 616 seed=self.seed, 617 backend=self.backend, 618 cv_calibration=None, 619 ) 620 ) 621 622 # layer_clf.fit(X_train, y_train) 623 624 layer_clf.fit(X_train, y_train) 625 626 self.models_[name] = layer_clf 627 y_pred = layer_clf.predict(X_test) 628 accuracy = accuracy_score(y_test, y_pred, normalize=True) 629 b_accuracy = balanced_accuracy_score(y_test, y_pred) 630 f1 = f1_score(y_test, y_pred, average="weighted") 631 try: 632 roc_auc = roc_auc_score(y_test, y_pred) 633 except Exception as exception: 634 roc_auc = None 635 if self.ignore_warnings is False: 636 print("ROC AUC couldn't be calculated for " + name) 637 print(exception) 638 names.append(name) 639 Accuracy.append(accuracy) 640 B_Accuracy.append(b_accuracy) 641 ROC_AUC.append(roc_auc) 642 F1.append(f1) 643 TIME.append(time.time() - start) 644 if self.custom_metric is not None: 645 custom_metric = self.custom_metric(y_test, y_pred) 646 CUSTOM_METRIC.append(custom_metric) 647 if self.verbose > 0: 648 if self.custom_metric is not None: 649 print( 650 { 651 "Model": name, 652 "Accuracy": accuracy, 653 "Balanced Accuracy": b_accuracy, 654 "ROC AUC": roc_auc, 655 "F1 Score": f1, 656 self.custom_metric.__name__: custom_metric, 657 "Time taken": time.time() - start, 658 } 659 ) 660 else: 661 print( 662 { 663 "Model": name, 664 "Accuracy": accuracy, 665 "Balanced Accuracy": b_accuracy, 666 "ROC AUC": roc_auc, 667 "F1 Score": f1, 668 "Time taken": time.time() - start, 669 } 670 ) 671 if self.predictions: 672 predictions[name] = y_pred 673 except Exception as exception: 674 if self.ignore_warnings is False: 675 print(name + " model failed to execute") 676 print(exception) 677 678 if self.custom_metric is None: 679 scores = pd.DataFrame( 680 { 681 "Model": names, 682 "Accuracy": Accuracy, 683 "Balanced Accuracy": B_Accuracy, 684 "ROC AUC": ROC_AUC, 685 "F1 Score": F1, 686 "Time Taken": TIME, 687 } 688 ) 689 else: 690 scores = pd.DataFrame( 691 { 692 "Model": names, 693 "Accuracy": Accuracy, 694 "Balanced Accuracy": B_Accuracy, 695 "ROC AUC": ROC_AUC, 696 "F1 Score": F1, 697 "Custom metric": CUSTOM_METRIC, 698 "Time Taken": TIME, 699 } 700 ) 701 scores = scores.sort_values(by=self.sort_by, ascending=False).set_index("Model") 702 703 self.best_model_ = self.models_[scores.index[0]] 704 705 if self.predictions is True: 706 707 return scores, predictions 708 709 return scores
Fit classifiers to X_train and y_train, predict and score on X_test, y_test.
Parameters:
X_train: array-like,
Training vectors, where rows is the number of samples
and columns is the number of features.
X_test: array-like,
Testing vectors, where rows is the number of samples
and columns is the number of features.
y_train: array-like,
Training vectors, where rows is the number of samples
and columns is the number of features.
y_test: array-like,
Testing vectors, where rows is the number of samples
and columns is the number of features.
Returns:
scores: Pandas DataFrame
Returns metrics of all the models in a Pandas DataFrame.
predictions: Pandas DataFrame
Returns predictions of all the models in a Pandas DataFrame.
723 def provide_models(self, X_train, X_test, y_train, y_test): 724 """Returns all the model objects trained. If fit hasn't been called yet, 725 then it's called to return the models. 726 727 Parameters: 728 729 X_train: array-like, 730 Training vectors, where rows is the number of samples 731 and columns is the number of features. 732 733 X_test: array-like, 734 Testing vectors, where rows is the number of samples 735 and columns is the number of features. 736 737 y_train: array-like, 738 Training vectors, where rows is the number of samples 739 and columns is the number of features. 740 741 y_test: array-like, 742 Testing vectors, where rows is the number of samples 743 and columns is the number of features. 744 745 Returns: 746 747 models: dict-object, 748 Returns a dictionary with each model's pipeline as value 749 and key = name of the model. 750 """ 751 if len(self.models_.keys()) == 0: 752 self.fit(X_train, X_test, y_train, y_test) 753 754 return self.models_
Returns all the model objects trained. If fit hasn't been called yet, then it's called to return the models.
Parameters:
X_train: array-like, Training vectors, where rows is the number of samples and columns is the number of features.
X_test: array-like, Testing vectors, where rows is the number of samples and columns is the number of features.
y_train: array-like, Training vectors, where rows is the number of samples and columns is the number of features.
y_test: array-like, Testing vectors, where rows is the number of samples and columns is the number of features.
Returns:
models: dict-object,
Returns a dictionary with each model's pipeline as value
and key = name of the model.
90class LazyDeepRegressor(Custom, RegressorMixin): 91 """ 92 Fitting -- almost -- all the regression algorithms with layers of 93 nnetsauce's CustomRegressor and returning their scores. 94 95 Parameters: 96 97 verbose: int, optional (default=0) 98 Any positive number for verbosity. 99 100 ignore_warnings: bool, optional (default=True) 101 When set to True, the warning related to algorigms that are not able to run are ignored. 102 103 custom_metric: function, optional (default=None) 104 When function is provided, models are evaluated based on the custom evaluation metric provided. 105 106 predictions: bool, optional (default=False) 107 When set to True, the predictions of all the models models are returned as dataframe. 108 109 sort_by: string, optional (default='RMSE') 110 Sort models by a metric. Available options are 'R-Squared', 'Adjusted R-Squared', 'RMSE', 'Time Taken' and 'Custom Metric'. 111 or a custom metric identified by its name and provided by custom_metric. 112 113 random_state: int, optional (default=42) 114 Reproducibiility seed. 115 116 estimators: list, optional (default='all') 117 list of Estimators names or just 'all' (default='all') 118 119 preprocess: bool 120 preprocessing is done when set to True 121 122 n_jobs : int, when possible, run in parallel 123 For now, only used by individual models that support it. 124 125 n_layers: int, optional (default=3) 126 Number of layers of CustomRegressors to be used. 127 128 All the other parameters are the same as CustomRegressor's. 129 130 Attributes: 131 132 models_: dict-object 133 Returns a dictionary with each model pipeline as value 134 with key as name of models. 135 136 best_model_: object 137 Returns the best model pipeline based on the sort_by metric. 138 139 Examples: 140 141 import nnetsauce as ns 142 import numpy as np 143 from sklearn import datasets 144 from sklearn.utils import shuffle 145 146 diabetes = datasets.load_diabetes() 147 X, y = shuffle(diabetes.data, diabetes.target, random_state=13) 148 X = X.astype(np.float32) 149 150 offset = int(X.shape[0] * 0.9) 151 X_train, y_train = X[:offset], y[:offset] 152 X_test, y_test = X[offset:], y[offset:] 153 154 reg = ns.LazyDeepRegressor(verbose=0, ignore_warnings=False, custom_metric=None) 155 models, predictions = reg.fit(X_train, X_test, y_train, y_test) 156 print(models) 157 158 """ 159 160 def __init__( 161 self, 162 verbose=0, 163 ignore_warnings=True, 164 custom_metric=None, 165 predictions=False, 166 sort_by="RMSE", 167 random_state=42, 168 estimators="all", 169 preprocess=False, 170 n_jobs=None, 171 # Defining depth 172 n_layers=3, 173 # CustomRegressor attributes 174 obj=None, 175 n_hidden_features=5, 176 activation_name="relu", 177 a=0.01, 178 nodes_sim="sobol", 179 bias=True, 180 dropout=0, 181 direct_link=True, 182 n_clusters=2, 183 cluster_encode=True, 184 type_clust="kmeans", 185 type_scaling=("std", "std", "std"), 186 col_sample=1, 187 row_sample=1, 188 seed=123, 189 backend="cpu", 190 ): 191 self.verbose = verbose 192 self.ignore_warnings = ignore_warnings 193 self.custom_metric = custom_metric 194 self.predictions = predictions 195 self.sort_by = sort_by 196 self.models_ = {} 197 self.best_model_ = None 198 self.random_state = random_state 199 self.estimators = estimators 200 self.preprocess = preprocess 201 self.n_layers = n_layers - 1 202 self.n_jobs = n_jobs 203 super().__init__( 204 obj=obj, 205 n_hidden_features=n_hidden_features, 206 activation_name=activation_name, 207 a=a, 208 nodes_sim=nodes_sim, 209 bias=bias, 210 dropout=dropout, 211 direct_link=direct_link, 212 n_clusters=n_clusters, 213 cluster_encode=cluster_encode, 214 type_clust=type_clust, 215 type_scaling=type_scaling, 216 col_sample=col_sample, 217 row_sample=row_sample, 218 seed=seed, 219 backend=backend, 220 ) 221 222 def fit(self, X_train, X_test, y_train, y_test): 223 """Fit Regression algorithms to X_train and y_train, predict and score on X_test, y_test. 224 225 Parameters: 226 227 X_train : array-like, 228 Training vectors, where rows is the number of samples 229 and columns is the number of features. 230 231 X_test : array-like, 232 Testing vectors, where rows is the number of samples 233 and columns is the number of features. 234 235 y_train : array-like, 236 Training vectors, where rows is the number of samples 237 and columns is the number of features. 238 239 y_test : array-like, 240 Testing vectors, where rows is the number of samples 241 and columns is the number of features. 242 243 Returns: 244 ------- 245 scores: Pandas DataFrame 246 Returns metrics of all the models in a Pandas DataFrame. 247 248 predictions : Pandas DataFrame 249 Returns predictions of all the models in a Pandas DataFrame. 250 251 """ 252 R2 = [] 253 ADJR2 = [] 254 RMSE = [] 255 # WIN = [] 256 names = [] 257 TIME = [] 258 predictions = {} 259 260 if self.custom_metric: 261 CUSTOM_METRIC = [] 262 263 if isinstance(X_train, np.ndarray): 264 X_train = pd.DataFrame(X_train) 265 X_test = pd.DataFrame(X_test) 266 267 numeric_features = X_train.select_dtypes(include=[np.number]).columns 268 categorical_features = X_train.select_dtypes(include=["object"]).columns 269 270 categorical_low, categorical_high = get_card_split( 271 X_train, categorical_features 272 ) 273 274 if self.preprocess is True: 275 preprocessor = ColumnTransformer( 276 transformers=[ 277 ("numeric", numeric_transformer, numeric_features), 278 ( 279 "categorical_low", 280 categorical_transformer_low, 281 categorical_low, 282 ), 283 ( 284 "categorical_high", 285 categorical_transformer_high, 286 categorical_high, 287 ), 288 ] 289 ) 290 291 # base models 292 try: 293 baseline_names = ["RandomForestRegressor", "XGBRegressor"] 294 baseline_models = [RandomForestRegressor(), xgb.XGBRegressor()] 295 except Exception as exception: 296 baseline_names = ["RandomForestRegressor"] 297 baseline_models = [RandomForestRegressor()] 298 299 for name, model in zip(baseline_names, baseline_models): 300 start = time.time() 301 try: 302 model.fit(X_train, y_train) 303 self.models_[name] = model 304 y_pred = model.predict(X_test) 305 r_squared = r2_score(y_test, y_pred) 306 adj_rsquared = adjusted_rsquared( 307 r_squared, X_test.shape[0], X_test.shape[1] 308 ) 309 rmse = np.sqrt(np.mean((y_test - y_pred) ** 2)) 310 311 names.append(name) 312 R2.append(r_squared) 313 ADJR2.append(adj_rsquared) 314 RMSE.append(rmse) 315 TIME.append(time.time() - start) 316 317 if self.custom_metric: 318 custom_metric = self.custom_metric(y_test, y_pred) 319 CUSTOM_METRIC.append(custom_metric) 320 321 if self.verbose > 0: 322 scores_verbose = { 323 "Model": name, 324 "R-Squared": r_squared, 325 "Adjusted R-Squared": adj_rsquared, 326 "RMSE": rmse, 327 "Time taken": time.time() - start, 328 } 329 330 if self.custom_metric: 331 scores_verbose[self.custom_metric.__name__] = custom_metric 332 333 print(scores_verbose) 334 if self.predictions: 335 predictions[name] = y_pred 336 except Exception as exception: 337 if self.ignore_warnings is False: 338 print(name + " model failed to execute") 339 print(exception) 340 341 if self.estimators == "all": 342 self.regressors = DEEPREGRESSORS 343 else: 344 self.regressors = [ 345 ("DeepCustomRegressor(" + est[0] + ")", est[1]) 346 for est in all_estimators() 347 if (issubclass(est[1], RegressorMixin) and (est[0] in self.estimators)) 348 ] 349 350 if self.preprocess is True: 351 352 for name, model in tqdm(self.regressors): # do parallel exec 353 start = time.time() 354 try: 355 if "random_state" in model().get_params().keys(): 356 layer_regr = CustomRegressor( 357 obj=model(random_state=self.random_state), 358 n_hidden_features=self.n_hidden_features, 359 activation_name=self.activation_name, 360 a=self.a, 361 nodes_sim=self.nodes_sim, 362 bias=self.bias, 363 dropout=self.dropout, 364 direct_link=self.direct_link, 365 n_clusters=self.n_clusters, 366 cluster_encode=self.cluster_encode, 367 type_clust=self.type_clust, 368 type_scaling=self.type_scaling, 369 col_sample=self.col_sample, 370 row_sample=self.row_sample, 371 seed=self.seed, 372 backend=self.backend, 373 ) 374 else: 375 layer_regr = CustomRegressor( 376 obj=model(), 377 n_hidden_features=self.n_hidden_features, 378 activation_name=self.activation_name, 379 a=self.a, 380 nodes_sim=self.nodes_sim, 381 bias=self.bias, 382 dropout=self.dropout, 383 direct_link=self.direct_link, 384 n_clusters=self.n_clusters, 385 cluster_encode=self.cluster_encode, 386 type_clust=self.type_clust, 387 type_scaling=self.type_scaling, 388 col_sample=self.col_sample, 389 row_sample=self.row_sample, 390 seed=self.seed, 391 backend=self.backend, 392 ) 393 394 for _ in range(self.n_layers): 395 layer_regr = deepcopy( 396 CustomRegressor( 397 obj=layer_regr, 398 n_hidden_features=self.n_hidden_features, 399 activation_name=self.activation_name, 400 a=self.a, 401 nodes_sim=self.nodes_sim, 402 bias=self.bias, 403 dropout=self.dropout, 404 direct_link=self.direct_link, 405 n_clusters=self.n_clusters, 406 cluster_encode=self.cluster_encode, 407 type_clust=self.type_clust, 408 type_scaling=self.type_scaling, 409 col_sample=self.col_sample, 410 row_sample=self.row_sample, 411 seed=self.seed, 412 backend=self.backend, 413 ) 414 ) 415 416 layer_regr.fit(X_train, y_train) 417 418 pipe = Pipeline( 419 steps=[ 420 ("preprocessor", preprocessor), 421 ("regressor", layer_regr), 422 ] 423 ) 424 425 pipe.fit(X_train, y_train) 426 427 self.models_[name] = pipe 428 y_pred = pipe.predict(X_test) 429 r_squared = r2_score(y_test, y_pred) 430 adj_rsquared = adjusted_rsquared( 431 r_squared, X_test.shape[0], X_test.shape[1] 432 ) 433 rmse = np.sqrt(np.mean((y_test - y_pred) ** 2)) 434 435 names.append(name) 436 R2.append(r_squared) 437 ADJR2.append(adj_rsquared) 438 RMSE.append(rmse) 439 TIME.append(time.time() - start) 440 441 if self.custom_metric: 442 custom_metric = self.custom_metric(y_test, y_pred) 443 CUSTOM_METRIC.append(custom_metric) 444 445 if self.verbose > 0: 446 scores_verbose = { 447 "Model": name, 448 "R-Squared": r_squared, 449 "Adjusted R-Squared": adj_rsquared, 450 "RMSE": rmse, 451 "Time taken": time.time() - start, 452 } 453 454 if self.custom_metric: 455 scores_verbose[self.custom_metric.__name__] = custom_metric 456 457 print(scores_verbose) 458 if self.predictions: 459 predictions[name] = y_pred 460 except Exception as exception: 461 if self.ignore_warnings is False: 462 print(name + " model failed to execute") 463 print(exception) 464 465 else: # no preprocessing 466 467 for name, model in tqdm(self.regressors): # do parallel exec 468 start = time.time() 469 try: 470 if "random_state" in model().get_params().keys(): 471 layer_regr = CustomRegressor( 472 obj=model(random_state=self.random_state), 473 n_hidden_features=self.n_hidden_features, 474 activation_name=self.activation_name, 475 a=self.a, 476 nodes_sim=self.nodes_sim, 477 bias=self.bias, 478 dropout=self.dropout, 479 direct_link=self.direct_link, 480 n_clusters=self.n_clusters, 481 cluster_encode=self.cluster_encode, 482 type_clust=self.type_clust, 483 type_scaling=self.type_scaling, 484 col_sample=self.col_sample, 485 row_sample=self.row_sample, 486 seed=self.seed, 487 backend=self.backend, 488 ) 489 else: 490 layer_regr = CustomRegressor( 491 obj=model(), 492 n_hidden_features=self.n_hidden_features, 493 activation_name=self.activation_name, 494 a=self.a, 495 nodes_sim=self.nodes_sim, 496 bias=self.bias, 497 dropout=self.dropout, 498 direct_link=self.direct_link, 499 n_clusters=self.n_clusters, 500 cluster_encode=self.cluster_encode, 501 type_clust=self.type_clust, 502 type_scaling=self.type_scaling, 503 col_sample=self.col_sample, 504 row_sample=self.row_sample, 505 seed=self.seed, 506 backend=self.backend, 507 ) 508 509 layer_regr.fit(X_train, y_train) 510 511 for _ in range(self.n_layers): 512 layer_regr = deepcopy( 513 CustomRegressor( 514 obj=layer_regr, 515 n_hidden_features=self.n_hidden_features, 516 activation_name=self.activation_name, 517 a=self.a, 518 nodes_sim=self.nodes_sim, 519 bias=self.bias, 520 dropout=self.dropout, 521 direct_link=self.direct_link, 522 n_clusters=self.n_clusters, 523 cluster_encode=self.cluster_encode, 524 type_clust=self.type_clust, 525 type_scaling=self.type_scaling, 526 col_sample=self.col_sample, 527 row_sample=self.row_sample, 528 seed=self.seed, 529 backend=self.backend, 530 ) 531 ) 532 533 # layer_regr.fit(X_train, y_train) 534 535 layer_regr.fit(X_train, y_train) 536 537 self.models_[name] = layer_regr 538 y_pred = layer_regr.predict(X_test) 539 540 r_squared = r2_score(y_test, y_pred) 541 adj_rsquared = adjusted_rsquared( 542 r_squared, X_test.shape[0], X_test.shape[1] 543 ) 544 rmse = np.sqrt(np.mean((y_test - y_pred) ** 2)) 545 546 names.append(name) 547 R2.append(r_squared) 548 ADJR2.append(adj_rsquared) 549 RMSE.append(rmse) 550 TIME.append(time.time() - start) 551 552 if self.custom_metric: 553 custom_metric = self.custom_metric(y_test, y_pred) 554 CUSTOM_METRIC.append(custom_metric) 555 556 if self.verbose > 0: 557 scores_verbose = { 558 "Model": name, 559 "R-Squared": r_squared, 560 "Adjusted R-Squared": adj_rsquared, 561 "RMSE": rmse, 562 "Time taken": time.time() - start, 563 } 564 565 if self.custom_metric: 566 scores_verbose[self.custom_metric.__name__] = custom_metric 567 568 print(scores_verbose) 569 if self.predictions: 570 predictions[name] = y_pred 571 except Exception as exception: 572 if self.ignore_warnings is False: 573 print(name + " model failed to execute") 574 print(exception) 575 576 scores = { 577 "Model": names, 578 "Adjusted R-Squared": ADJR2, 579 "R-Squared": R2, 580 "RMSE": RMSE, 581 "Time Taken": TIME, 582 } 583 584 if self.custom_metric: 585 scores["Custom metric"] = CUSTOM_METRIC 586 587 scores = pd.DataFrame(scores) 588 scores = scores.sort_values(by=self.sort_by, ascending=True).set_index("Model") 589 590 self.best_model_ = self.models_[scores.index[0]] 591 592 if self.predictions is True: 593 594 return scores, predictions 595 596 return scores 597 598 def get_best_model(self): 599 """ 600 This function returns the best model pipeline based on the sort_by metric. 601 602 Returns: 603 604 best_model: object, 605 Returns the best model pipeline based on the sort_by metric. 606 607 """ 608 return self.best_model_ 609 610 def provide_models(self, X_train, X_test, y_train, y_test): 611 """ 612 This function returns all the model objects trained in fit function. 613 If fit is not called already, then we call fit and then return the models. 614 615 Parameters: 616 617 X_train : array-like, 618 Training vectors, where rows is the number of samples 619 and columns is the number of features. 620 621 X_test : array-like, 622 Testing vectors, where rows is the number of samples 623 and columns is the number of features. 624 625 y_train : array-like, 626 Training vectors, where rows is the number of samples 627 and columns is the number of features. 628 629 y_test : array-like, 630 Testing vectors, where rows is the number of samples 631 and columns is the number of features. 632 633 Returns: 634 635 models: dict-object, 636 Returns a dictionary with each model pipeline as value 637 with key as name of models. 638 639 """ 640 if len(self.models_.keys()) == 0: 641 self.fit(X_train, X_test, y_train, y_test) 642 643 return self.models_
Fitting -- almost -- all the regression algorithms with layers of nnetsauce's CustomRegressor and returning their scores.
Parameters:
verbose: int, optional (default=0)
Any positive number for verbosity.
ignore_warnings: bool, optional (default=True)
When set to True, the warning related to algorigms that are not able to run are ignored.
custom_metric: function, optional (default=None)
When function is provided, models are evaluated based on the custom evaluation metric provided.
predictions: bool, optional (default=False)
When set to True, the predictions of all the models models are returned as dataframe.
sort_by: string, optional (default='RMSE')
Sort models by a metric. Available options are 'R-Squared', 'Adjusted R-Squared', 'RMSE', 'Time Taken' and 'Custom Metric'.
or a custom metric identified by its name and provided by custom_metric.
random_state: int, optional (default=42)
Reproducibiility seed.
estimators: list, optional (default='all')
list of Estimators names or just 'all' (default='all')
preprocess: bool
preprocessing is done when set to True
n_jobs : int, when possible, run in parallel
For now, only used by individual models that support it.
n_layers: int, optional (default=3)
Number of layers of CustomRegressors to be used.
All the other parameters are the same as CustomRegressor's.
Attributes:
models_: dict-object
Returns a dictionary with each model pipeline as value
with key as name of models.
best_model_: object
Returns the best model pipeline based on the sort_by metric.
Examples:
import nnetsauce as ns
import numpy as np
from sklearn import datasets
from sklearn.utils import shuffle
diabetes = datasets.load_diabetes()
X, y = shuffle(diabetes.data, diabetes.target, random_state=13)
X = X.astype(np.float32)
offset = int(X.shape[0] * 0.9)
X_train, y_train = X[:offset], y[:offset]
X_test, y_test = X[offset:], y[offset:]
reg = ns.LazyDeepRegressor(verbose=0, ignore_warnings=False, custom_metric=None)
models, predictions = reg.fit(X_train, X_test, y_train, y_test)
print(models)
222 def fit(self, X_train, X_test, y_train, y_test): 223 """Fit Regression algorithms to X_train and y_train, predict and score on X_test, y_test. 224 225 Parameters: 226 227 X_train : array-like, 228 Training vectors, where rows is the number of samples 229 and columns is the number of features. 230 231 X_test : array-like, 232 Testing vectors, where rows is the number of samples 233 and columns is the number of features. 234 235 y_train : array-like, 236 Training vectors, where rows is the number of samples 237 and columns is the number of features. 238 239 y_test : array-like, 240 Testing vectors, where rows is the number of samples 241 and columns is the number of features. 242 243 Returns: 244 ------- 245 scores: Pandas DataFrame 246 Returns metrics of all the models in a Pandas DataFrame. 247 248 predictions : Pandas DataFrame 249 Returns predictions of all the models in a Pandas DataFrame. 250 251 """ 252 R2 = [] 253 ADJR2 = [] 254 RMSE = [] 255 # WIN = [] 256 names = [] 257 TIME = [] 258 predictions = {} 259 260 if self.custom_metric: 261 CUSTOM_METRIC = [] 262 263 if isinstance(X_train, np.ndarray): 264 X_train = pd.DataFrame(X_train) 265 X_test = pd.DataFrame(X_test) 266 267 numeric_features = X_train.select_dtypes(include=[np.number]).columns 268 categorical_features = X_train.select_dtypes(include=["object"]).columns 269 270 categorical_low, categorical_high = get_card_split( 271 X_train, categorical_features 272 ) 273 274 if self.preprocess is True: 275 preprocessor = ColumnTransformer( 276 transformers=[ 277 ("numeric", numeric_transformer, numeric_features), 278 ( 279 "categorical_low", 280 categorical_transformer_low, 281 categorical_low, 282 ), 283 ( 284 "categorical_high", 285 categorical_transformer_high, 286 categorical_high, 287 ), 288 ] 289 ) 290 291 # base models 292 try: 293 baseline_names = ["RandomForestRegressor", "XGBRegressor"] 294 baseline_models = [RandomForestRegressor(), xgb.XGBRegressor()] 295 except Exception as exception: 296 baseline_names = ["RandomForestRegressor"] 297 baseline_models = [RandomForestRegressor()] 298 299 for name, model in zip(baseline_names, baseline_models): 300 start = time.time() 301 try: 302 model.fit(X_train, y_train) 303 self.models_[name] = model 304 y_pred = model.predict(X_test) 305 r_squared = r2_score(y_test, y_pred) 306 adj_rsquared = adjusted_rsquared( 307 r_squared, X_test.shape[0], X_test.shape[1] 308 ) 309 rmse = np.sqrt(np.mean((y_test - y_pred) ** 2)) 310 311 names.append(name) 312 R2.append(r_squared) 313 ADJR2.append(adj_rsquared) 314 RMSE.append(rmse) 315 TIME.append(time.time() - start) 316 317 if self.custom_metric: 318 custom_metric = self.custom_metric(y_test, y_pred) 319 CUSTOM_METRIC.append(custom_metric) 320 321 if self.verbose > 0: 322 scores_verbose = { 323 "Model": name, 324 "R-Squared": r_squared, 325 "Adjusted R-Squared": adj_rsquared, 326 "RMSE": rmse, 327 "Time taken": time.time() - start, 328 } 329 330 if self.custom_metric: 331 scores_verbose[self.custom_metric.__name__] = custom_metric 332 333 print(scores_verbose) 334 if self.predictions: 335 predictions[name] = y_pred 336 except Exception as exception: 337 if self.ignore_warnings is False: 338 print(name + " model failed to execute") 339 print(exception) 340 341 if self.estimators == "all": 342 self.regressors = DEEPREGRESSORS 343 else: 344 self.regressors = [ 345 ("DeepCustomRegressor(" + est[0] + ")", est[1]) 346 for est in all_estimators() 347 if (issubclass(est[1], RegressorMixin) and (est[0] in self.estimators)) 348 ] 349 350 if self.preprocess is True: 351 352 for name, model in tqdm(self.regressors): # do parallel exec 353 start = time.time() 354 try: 355 if "random_state" in model().get_params().keys(): 356 layer_regr = CustomRegressor( 357 obj=model(random_state=self.random_state), 358 n_hidden_features=self.n_hidden_features, 359 activation_name=self.activation_name, 360 a=self.a, 361 nodes_sim=self.nodes_sim, 362 bias=self.bias, 363 dropout=self.dropout, 364 direct_link=self.direct_link, 365 n_clusters=self.n_clusters, 366 cluster_encode=self.cluster_encode, 367 type_clust=self.type_clust, 368 type_scaling=self.type_scaling, 369 col_sample=self.col_sample, 370 row_sample=self.row_sample, 371 seed=self.seed, 372 backend=self.backend, 373 ) 374 else: 375 layer_regr = CustomRegressor( 376 obj=model(), 377 n_hidden_features=self.n_hidden_features, 378 activation_name=self.activation_name, 379 a=self.a, 380 nodes_sim=self.nodes_sim, 381 bias=self.bias, 382 dropout=self.dropout, 383 direct_link=self.direct_link, 384 n_clusters=self.n_clusters, 385 cluster_encode=self.cluster_encode, 386 type_clust=self.type_clust, 387 type_scaling=self.type_scaling, 388 col_sample=self.col_sample, 389 row_sample=self.row_sample, 390 seed=self.seed, 391 backend=self.backend, 392 ) 393 394 for _ in range(self.n_layers): 395 layer_regr = deepcopy( 396 CustomRegressor( 397 obj=layer_regr, 398 n_hidden_features=self.n_hidden_features, 399 activation_name=self.activation_name, 400 a=self.a, 401 nodes_sim=self.nodes_sim, 402 bias=self.bias, 403 dropout=self.dropout, 404 direct_link=self.direct_link, 405 n_clusters=self.n_clusters, 406 cluster_encode=self.cluster_encode, 407 type_clust=self.type_clust, 408 type_scaling=self.type_scaling, 409 col_sample=self.col_sample, 410 row_sample=self.row_sample, 411 seed=self.seed, 412 backend=self.backend, 413 ) 414 ) 415 416 layer_regr.fit(X_train, y_train) 417 418 pipe = Pipeline( 419 steps=[ 420 ("preprocessor", preprocessor), 421 ("regressor", layer_regr), 422 ] 423 ) 424 425 pipe.fit(X_train, y_train) 426 427 self.models_[name] = pipe 428 y_pred = pipe.predict(X_test) 429 r_squared = r2_score(y_test, y_pred) 430 adj_rsquared = adjusted_rsquared( 431 r_squared, X_test.shape[0], X_test.shape[1] 432 ) 433 rmse = np.sqrt(np.mean((y_test - y_pred) ** 2)) 434 435 names.append(name) 436 R2.append(r_squared) 437 ADJR2.append(adj_rsquared) 438 RMSE.append(rmse) 439 TIME.append(time.time() - start) 440 441 if self.custom_metric: 442 custom_metric = self.custom_metric(y_test, y_pred) 443 CUSTOM_METRIC.append(custom_metric) 444 445 if self.verbose > 0: 446 scores_verbose = { 447 "Model": name, 448 "R-Squared": r_squared, 449 "Adjusted R-Squared": adj_rsquared, 450 "RMSE": rmse, 451 "Time taken": time.time() - start, 452 } 453 454 if self.custom_metric: 455 scores_verbose[self.custom_metric.__name__] = custom_metric 456 457 print(scores_verbose) 458 if self.predictions: 459 predictions[name] = y_pred 460 except Exception as exception: 461 if self.ignore_warnings is False: 462 print(name + " model failed to execute") 463 print(exception) 464 465 else: # no preprocessing 466 467 for name, model in tqdm(self.regressors): # do parallel exec 468 start = time.time() 469 try: 470 if "random_state" in model().get_params().keys(): 471 layer_regr = CustomRegressor( 472 obj=model(random_state=self.random_state), 473 n_hidden_features=self.n_hidden_features, 474 activation_name=self.activation_name, 475 a=self.a, 476 nodes_sim=self.nodes_sim, 477 bias=self.bias, 478 dropout=self.dropout, 479 direct_link=self.direct_link, 480 n_clusters=self.n_clusters, 481 cluster_encode=self.cluster_encode, 482 type_clust=self.type_clust, 483 type_scaling=self.type_scaling, 484 col_sample=self.col_sample, 485 row_sample=self.row_sample, 486 seed=self.seed, 487 backend=self.backend, 488 ) 489 else: 490 layer_regr = CustomRegressor( 491 obj=model(), 492 n_hidden_features=self.n_hidden_features, 493 activation_name=self.activation_name, 494 a=self.a, 495 nodes_sim=self.nodes_sim, 496 bias=self.bias, 497 dropout=self.dropout, 498 direct_link=self.direct_link, 499 n_clusters=self.n_clusters, 500 cluster_encode=self.cluster_encode, 501 type_clust=self.type_clust, 502 type_scaling=self.type_scaling, 503 col_sample=self.col_sample, 504 row_sample=self.row_sample, 505 seed=self.seed, 506 backend=self.backend, 507 ) 508 509 layer_regr.fit(X_train, y_train) 510 511 for _ in range(self.n_layers): 512 layer_regr = deepcopy( 513 CustomRegressor( 514 obj=layer_regr, 515 n_hidden_features=self.n_hidden_features, 516 activation_name=self.activation_name, 517 a=self.a, 518 nodes_sim=self.nodes_sim, 519 bias=self.bias, 520 dropout=self.dropout, 521 direct_link=self.direct_link, 522 n_clusters=self.n_clusters, 523 cluster_encode=self.cluster_encode, 524 type_clust=self.type_clust, 525 type_scaling=self.type_scaling, 526 col_sample=self.col_sample, 527 row_sample=self.row_sample, 528 seed=self.seed, 529 backend=self.backend, 530 ) 531 ) 532 533 # layer_regr.fit(X_train, y_train) 534 535 layer_regr.fit(X_train, y_train) 536 537 self.models_[name] = layer_regr 538 y_pred = layer_regr.predict(X_test) 539 540 r_squared = r2_score(y_test, y_pred) 541 adj_rsquared = adjusted_rsquared( 542 r_squared, X_test.shape[0], X_test.shape[1] 543 ) 544 rmse = np.sqrt(np.mean((y_test - y_pred) ** 2)) 545 546 names.append(name) 547 R2.append(r_squared) 548 ADJR2.append(adj_rsquared) 549 RMSE.append(rmse) 550 TIME.append(time.time() - start) 551 552 if self.custom_metric: 553 custom_metric = self.custom_metric(y_test, y_pred) 554 CUSTOM_METRIC.append(custom_metric) 555 556 if self.verbose > 0: 557 scores_verbose = { 558 "Model": name, 559 "R-Squared": r_squared, 560 "Adjusted R-Squared": adj_rsquared, 561 "RMSE": rmse, 562 "Time taken": time.time() - start, 563 } 564 565 if self.custom_metric: 566 scores_verbose[self.custom_metric.__name__] = custom_metric 567 568 print(scores_verbose) 569 if self.predictions: 570 predictions[name] = y_pred 571 except Exception as exception: 572 if self.ignore_warnings is False: 573 print(name + " model failed to execute") 574 print(exception) 575 576 scores = { 577 "Model": names, 578 "Adjusted R-Squared": ADJR2, 579 "R-Squared": R2, 580 "RMSE": RMSE, 581 "Time Taken": TIME, 582 } 583 584 if self.custom_metric: 585 scores["Custom metric"] = CUSTOM_METRIC 586 587 scores = pd.DataFrame(scores) 588 scores = scores.sort_values(by=self.sort_by, ascending=True).set_index("Model") 589 590 self.best_model_ = self.models_[scores.index[0]] 591 592 if self.predictions is True: 593 594 return scores, predictions 595 596 return scores
Fit Regression algorithms to X_train and y_train, predict and score on X_test, y_test.
Parameters:
X_train : array-like,
Training vectors, where rows is the number of samples
and columns is the number of features.
X_test : array-like,
Testing vectors, where rows is the number of samples
and columns is the number of features.
y_train : array-like,
Training vectors, where rows is the number of samples
and columns is the number of features.
y_test : array-like,
Testing vectors, where rows is the number of samples
and columns is the number of features.
Returns:
scores: Pandas DataFrame Returns metrics of all the models in a Pandas DataFrame.
predictions : Pandas DataFrame Returns predictions of all the models in a Pandas DataFrame.
610 def provide_models(self, X_train, X_test, y_train, y_test): 611 """ 612 This function returns all the model objects trained in fit function. 613 If fit is not called already, then we call fit and then return the models. 614 615 Parameters: 616 617 X_train : array-like, 618 Training vectors, where rows is the number of samples 619 and columns is the number of features. 620 621 X_test : array-like, 622 Testing vectors, where rows is the number of samples 623 and columns is the number of features. 624 625 y_train : array-like, 626 Training vectors, where rows is the number of samples 627 and columns is the number of features. 628 629 y_test : array-like, 630 Testing vectors, where rows is the number of samples 631 and columns is the number of features. 632 633 Returns: 634 635 models: dict-object, 636 Returns a dictionary with each model pipeline as value 637 with key as name of models. 638 639 """ 640 if len(self.models_.keys()) == 0: 641 self.fit(X_train, X_test, y_train, y_test) 642 643 return self.models_
This function returns all the model objects trained in fit function. If fit is not called already, then we call fit and then return the models.
Parameters:
X_train : array-like,
Training vectors, where rows is the number of samples
and columns is the number of features.
X_test : array-like,
Testing vectors, where rows is the number of samples
and columns is the number of features.
y_train : array-like,
Training vectors, where rows is the number of samples
and columns is the number of features.
y_test : array-like,
Testing vectors, where rows is the number of samples
and columns is the number of features.
Returns:
models: dict-object,
Returns a dictionary with each model pipeline as value
with key as name of models.
988class LazyMTS(LazyDeepMTS): 989 """ 990 Fitting -- almost -- all the regression algorithms to multivariate time series 991 and returning their scores (no layers). 992 993 Parameters: 994 995 verbose: int, optional (default=0) 996 Any positive number for verbosity. 997 998 ignore_warnings: bool, optional (default=True) 999 When set to True, the warning related to algorigms that are not 1000 able to run are ignored. 1001 1002 custom_metric: function, optional (default=None) 1003 When function is provided, models are evaluated based on the custom 1004 evaluation metric provided. 1005 1006 predictions: bool, optional (default=False) 1007 When set to True, the predictions of all the models models are returned as dataframe. 1008 1009 sort_by: string, optional (default='RMSE') 1010 Sort models by a metric. Available options are 'RMSE', 'MAE', 'MPL', 'MPE', 'MAPE', 1011 'R-Squared', 'Adjusted R-Squared' or a custom metric identified by its name and 1012 provided by custom_metric. 1013 1014 random_state: int, optional (default=42) 1015 Reproducibiility seed. 1016 1017 estimators: list, optional (default='all') 1018 list of Estimators (regression algorithms) names or just 'all' (default='all') 1019 1020 preprocess: bool, preprocessing is done when set to True 1021 1022 h: int, optional (default=None) 1023 Number of steps ahead to predict (when used, must be > 0 and < X_test.shape[0]). 1024 1025 All the other parameters are the same as MTS's. 1026 1027 Attributes: 1028 1029 models_: dict-object 1030 Returns a dictionary with each model pipeline as value 1031 with key as name of models. 1032 1033 best_model_: object 1034 Returns the best model pipeline based on the sort_by metric. 1035 1036 Examples: 1037 1038 See https://thierrymoudiki.github.io/blog/2023/10/29/python/quasirandomizednn/MTS-LazyPredict 1039 1040 """ 1041 1042 def __init__( 1043 self, 1044 verbose=0, 1045 ignore_warnings=True, 1046 custom_metric=None, 1047 predictions=False, 1048 sort_by=None, # leave it as is 1049 random_state=42, 1050 estimators="all", 1051 preprocess=False, 1052 h=None, 1053 # MTS attributes 1054 obj=None, 1055 n_hidden_features=5, 1056 activation_name="relu", 1057 a=0.01, 1058 nodes_sim="sobol", 1059 bias=True, 1060 dropout=0, 1061 direct_link=True, 1062 n_clusters=2, 1063 cluster_encode=True, 1064 type_clust="kmeans", 1065 type_scaling=("std", "std", "std"), 1066 lags=15, 1067 type_pi="scp2-kde", 1068 block_size=None, 1069 replications=None, 1070 kernel=None, 1071 agg="mean", 1072 seed=123, 1073 backend="cpu", 1074 show_progress=False, 1075 ): 1076 super().__init__( 1077 verbose=verbose, 1078 ignore_warnings=ignore_warnings, 1079 custom_metric=custom_metric, 1080 predictions=predictions, 1081 sort_by=sort_by, 1082 random_state=random_state, 1083 estimators=estimators, 1084 preprocess=preprocess, 1085 n_layers=1, 1086 h=h, 1087 obj=obj, 1088 n_hidden_features=n_hidden_features, 1089 activation_name=activation_name, 1090 a=a, 1091 nodes_sim=nodes_sim, 1092 bias=bias, 1093 dropout=dropout, 1094 direct_link=direct_link, 1095 n_clusters=n_clusters, 1096 cluster_encode=cluster_encode, 1097 type_clust=type_clust, 1098 type_scaling=type_scaling, 1099 lags=lags, 1100 type_pi=type_pi, 1101 block_size=block_size, 1102 replications=replications, 1103 kernel=kernel, 1104 agg=agg, 1105 seed=seed, 1106 backend=backend, 1107 show_progress=show_progress, 1108 )
Fitting -- almost -- all the regression algorithms to multivariate time series and returning their scores (no layers).
Parameters:
verbose: int, optional (default=0)
Any positive number for verbosity.
ignore_warnings: bool, optional (default=True)
When set to True, the warning related to algorigms that are not
able to run are ignored.
custom_metric: function, optional (default=None)
When function is provided, models are evaluated based on the custom
evaluation metric provided.
predictions: bool, optional (default=False)
When set to True, the predictions of all the models models are returned as dataframe.
sort_by: string, optional (default='RMSE')
Sort models by a metric. Available options are 'RMSE', 'MAE', 'MPL', 'MPE', 'MAPE',
'R-Squared', 'Adjusted R-Squared' or a custom metric identified by its name and
provided by custom_metric.
random_state: int, optional (default=42)
Reproducibiility seed.
estimators: list, optional (default='all')
list of Estimators (regression algorithms) names or just 'all' (default='all')
preprocess: bool, preprocessing is done when set to True
h: int, optional (default=None)
Number of steps ahead to predict (when used, must be > 0 and < X_test.shape[0]).
All the other parameters are the same as MTS's.
Attributes:
models_: dict-object
Returns a dictionary with each model pipeline as value
with key as name of models.
best_model_: object
Returns the best model pipeline based on the sort_by metric.
Examples:
See https://thierrymoudiki.github.io/blog/2023/10/29/python/quasirandomizednn/MTS-LazyPredict
104class LazyDeepMTS(MTS): 105 """ 106 107 Fitting -- almost -- all the regression algorithms with layers of 108 nnetsauce's CustomRegressor to multivariate time series 109 and returning their scores. 110 111 Parameters: 112 113 verbose: int, optional (default=0) 114 Any positive number for verbosity. 115 116 ignore_warnings: bool, optional (default=True) 117 When set to True, the warning related to algorigms that are not 118 able to run are ignored. 119 120 custom_metric: function, optional (default=None) 121 When function is provided, models are evaluated based on the custom 122 evaluation metric provided. 123 124 predictions: bool, optional (default=False) 125 When set to True, the predictions of all the models models are returned as dataframe. 126 127 sort_by: string, optional (default='RMSE') 128 Sort models by a metric. Available options are 'RMSE', 'MAE', 'MPL', 'MPE', 'MAPE', 129 'R-Squared', 'Adjusted R-Squared' or a custom metric identified by its name and 130 provided by custom_metric. 131 132 random_state: int, optional (default=42) 133 Reproducibiility seed. 134 135 estimators: list, optional (default='all') 136 list of Estimators (regression algorithms) names or just 'all' (default='all') 137 138 preprocess: bool, preprocessing is done when set to True 139 140 n_layers: int, optional (default=1) 141 Number of layers in the network. When set to 1, the model is equivalent to a MTS. 142 143 h: int, optional (default=None) 144 Number of steps ahead to predict (when used, must be > 0 and < X_test.shape[0]). 145 146 All the other parameters are the same as MTS's. 147 148 Attributes: 149 150 models_: dict-object 151 Returns a dictionary with each model pipeline as value 152 with key as name of models. 153 154 best_model_: object 155 Returns the best model pipeline based on the sort_by metric. 156 157 Examples: 158 159 See https://thierrymoudiki.github.io/blog/2023/10/29/python/quasirandomizednn/MTS-LazyPredict 160 161 """ 162 163 def __init__( 164 self, 165 verbose=0, 166 ignore_warnings=True, 167 custom_metric=None, 168 predictions=False, 169 sort_by=None, # leave it as is 170 random_state=42, 171 estimators="all", 172 preprocess=False, 173 n_layers=1, 174 h=None, 175 # MTS attributes 176 obj=None, 177 n_hidden_features=5, 178 activation_name="relu", 179 a=0.01, 180 nodes_sim="sobol", 181 bias=True, 182 dropout=0, 183 direct_link=True, 184 n_clusters=2, 185 cluster_encode=True, 186 type_clust="kmeans", 187 type_scaling=("std", "std", "std"), 188 lags=15, 189 type_pi="scp2-kde", 190 block_size=None, 191 replications=None, 192 kernel=None, 193 agg="mean", 194 seed=123, 195 backend="cpu", 196 show_progress=False, 197 ): 198 self.verbose = verbose 199 self.ignore_warnings = ignore_warnings 200 self.custom_metric = custom_metric 201 self.predictions = predictions 202 self.sort_by = sort_by 203 self.models_ = {} 204 self.best_model_ = None 205 self.random_state = random_state 206 self.estimators = estimators 207 self.preprocess = preprocess 208 self.n_layers = n_layers 209 self.h = h 210 super().__init__( 211 obj=obj, 212 n_hidden_features=n_hidden_features, 213 activation_name=activation_name, 214 a=a, 215 nodes_sim=nodes_sim, 216 bias=bias, 217 dropout=dropout, 218 direct_link=direct_link, 219 n_clusters=n_clusters, 220 cluster_encode=cluster_encode, 221 type_clust=type_clust, 222 type_scaling=type_scaling, 223 seed=seed, 224 backend=backend, 225 lags=lags, 226 type_pi=type_pi, 227 block_size=block_size, 228 replications=replications, 229 kernel=kernel, 230 agg=agg, 231 verbose=verbose, 232 show_progress=show_progress, 233 ) 234 if self.replications is not None or self.type_pi == "gaussian": 235 if self.sort_by is None: 236 self.sort_by = "WINKLERSCORE" 237 else: 238 if self.sort_by is None: 239 self.sort_by = "RMSE" 240 241 def fit(self, X_train, X_test, xreg=None, per_series=False, **kwargs): 242 """Fit Regression algorithms to X_train, predict and score on X_test. 243 244 Parameters: 245 246 X_train: array-like or data frame, 247 Training vectors, where rows is the number of samples 248 and columns is the number of features. 249 250 X_test: array-like or data frame, 251 Testing vectors, where rows is the number of samples 252 and columns is the number of features. 253 254 xreg: array-like, optional (default=None) 255 Additional (external) regressors to be passed to self.obj 256 xreg must be in 'increasing' order (most recent observations last) 257 258 per_series: bool, optional (default=False) 259 When set to True, the metrics are computed series by series. 260 261 **kwargs: dict, optional (default=None) 262 Additional parameters to be passed to `fit` method of `obj`. 263 264 Returns: 265 266 scores: Pandas DataFrame 267 Returns metrics of all the models in a Pandas DataFrame. 268 269 predictions: Pandas DataFrame 270 Returns predictions of all the models in a Pandas DataFrame. 271 272 """ 273 R2 = [] 274 ADJR2 = [] 275 ME = [] 276 MPL = [] 277 RMSE = [] 278 MAE = [] 279 MPE = [] 280 MAPE = [] 281 WINKLERSCORE = [] 282 COVERAGE = [] 283 284 # WIN = [] 285 names = [] 286 TIME = [] 287 predictions = {} 288 289 if self.custom_metric is not None: 290 CUSTOM_METRIC = [] 291 292 if self.h is None: 293 assert X_test is not None, "If h is None, X_test must be provided." 294 295 if isinstance(X_train, np.ndarray): 296 X_train = pd.DataFrame(X_train) 297 X_test = pd.DataFrame(X_test) 298 299 self.series_names = X_train.columns.tolist() 300 301 X_train = convert_df_to_numeric(X_train) 302 X_test = convert_df_to_numeric(X_test) 303 304 numeric_features = X_train.select_dtypes(include=[np.number]).columns 305 categorical_features = X_train.select_dtypes(include=["object"]).columns 306 307 categorical_low, categorical_high = get_card_split( 308 X_train, categorical_features 309 ) 310 311 if self.preprocess: 312 preprocessor = ColumnTransformer( 313 transformers=[ 314 ("numeric", numeric_transformer, numeric_features), 315 ( 316 "categorical_low", 317 categorical_transformer_low, 318 categorical_low, 319 ), 320 ( 321 "categorical_high", 322 categorical_transformer_high, 323 categorical_high, 324 ), 325 ] 326 ) 327 328 # baselines (Classical MTS) ---- 329 for i, name in enumerate(["ARIMA", "ETS", "Theta", "VAR", "VECM"]): 330 try: 331 start = time.time() 332 regr = ClassicalMTS(model=name) 333 regr.fit(X_train, **kwargs) 334 self.models_[name] = regr 335 if self.h is None: 336 X_pred = regr.predict(h=X_test.shape[0], **kwargs) 337 else: 338 assert self.h > 0, "h must be > 0" 339 X_pred = regr.predict(h=self.h, **kwargs) 340 try: 341 X_test = X_test[0 : self.h, :] 342 except Exception as e: 343 X_test = X_test.iloc[0 : self.h, :] 344 345 rmse = mean_errors( 346 actual=X_test, 347 pred=X_pred, 348 scoring="root_mean_squared_error", 349 per_series=per_series, 350 ) 351 mae = mean_errors( 352 actual=X_test, 353 pred=X_pred, 354 scoring="mean_absolute_error", 355 per_series=per_series, 356 ) 357 mpl = mean_errors( 358 actual=X_test, 359 pred=X_pred, 360 scoring="mean_pinball_loss", 361 per_series=per_series, 362 ) 363 except Exception: 364 365 continue 366 367 names.append(name) 368 RMSE.append(rmse) 369 MAE.append(mae) 370 MPL.append(mpl) 371 372 if self.custom_metric is not None: 373 try: 374 if self.h is None: 375 custom_metric = self.custom_metric(X_test, X_pred) 376 else: 377 custom_metric = self.custom_metric(X_test_h, X_pred) 378 CUSTOM_METRIC.append(custom_metric) 379 except Exception as e: 380 custom_metric = np.iinfo(np.float32).max 381 CUSTOM_METRIC.append(np.iinfo(np.float32).max) 382 383 if (self.replications is not None) or (self.type_pi == "gaussian"): 384 if per_series == False: 385 winklerscore = winkler_score(obj=X_pred, actual=X_test, level=95) 386 coveragecalc = coverage(X_pred, X_test, level=95) 387 else: 388 winklerscore = winkler_score( 389 obj=X_pred, actual=X_test, level=95, per_series=True 390 ) 391 coveragecalc = coverage(X_pred, X_test, level=95, per_series=True) 392 WINKLERSCORE.append(winklerscore) 393 COVERAGE.append(coveragecalc) 394 TIME.append(time.time() - start) 395 396 if self.estimators == "all": 397 if self.n_layers <= 1: 398 self.regressors = REGRESSORSMTS 399 else: 400 self.regressors = DEEPREGRESSORSMTS 401 else: 402 if self.n_layers <= 1: 403 self.regressors = [ 404 ("MTS(" + est[0] + ")", est[1]) 405 for est in all_estimators() 406 if ( 407 issubclass(est[1], RegressorMixin) 408 and (est[0] in self.estimators) 409 ) 410 ] 411 else: # self.n_layers > 1 412 self.regressors = [ 413 ("DeepMTS(" + est[0] + ")", est[1]) 414 for est in all_estimators() 415 if ( 416 issubclass(est[1], RegressorMixin) 417 and (est[0] in self.estimators) 418 ) 419 ] 420 421 if self.preprocess is True: 422 for name, model in tqdm(self.regressors): # do parallel exec 423 start = time.time() 424 try: 425 if "random_state" in model().get_params().keys(): 426 pipe = Pipeline( 427 steps=[ 428 ("preprocessor", preprocessor), 429 ( 430 "regressor", 431 DeepMTS( 432 obj=model( 433 random_state=self.random_state, 434 **kwargs, 435 ), 436 n_layers=self.n_layers, 437 n_hidden_features=self.n_hidden_features, 438 activation_name=self.activation_name, 439 a=self.a, 440 nodes_sim=self.nodes_sim, 441 bias=self.bias, 442 dropout=self.dropout, 443 direct_link=self.direct_link, 444 n_clusters=self.n_clusters, 445 cluster_encode=self.cluster_encode, 446 type_clust=self.type_clust, 447 type_scaling=self.type_scaling, 448 lags=self.lags, 449 type_pi=self.type_pi, 450 block_size=self.block_size, 451 replications=self.replications, 452 kernel=self.kernel, 453 agg=self.agg, 454 seed=self.seed, 455 backend=self.backend, 456 show_progress=self.show_progress, 457 ), 458 ), 459 ] 460 ) 461 else: # "random_state" in model().get_params().keys() 462 pipe = Pipeline( 463 steps=[ 464 ("preprocessor", preprocessor), 465 ( 466 "regressor", 467 DeepMTS( 468 obj=model(**kwargs), 469 n_layers=self.n_layers, 470 n_hidden_features=self.n_hidden_features, 471 activation_name=self.activation_name, 472 a=self.a, 473 nodes_sim=self.nodes_sim, 474 bias=self.bias, 475 dropout=self.dropout, 476 direct_link=self.direct_link, 477 n_clusters=self.n_clusters, 478 cluster_encode=self.cluster_encode, 479 type_clust=self.type_clust, 480 type_scaling=self.type_scaling, 481 lags=self.lags, 482 type_pi=self.type_pi, 483 block_size=self.block_size, 484 replications=self.replications, 485 kernel=self.kernel, 486 agg=self.agg, 487 seed=self.seed, 488 backend=self.backend, 489 show_progress=self.show_progress, 490 ), 491 ), 492 ] 493 ) 494 495 pipe.fit(X_train, **kwargs) 496 # pipe.fit(X_train, xreg=xreg) 497 498 self.models_[name] = pipe 499 500 if self.h is None: 501 X_pred = pipe["regressor"].predict(h=self.h, **kwargs) 502 else: 503 assert self.h > 0, "h must be > 0" 504 X_pred = pipe["regressor"].predict(h=self.h, **kwargs) 505 506 if (self.replications is not None) or (self.type_pi == "gaussian"): 507 rmse = mean_errors( 508 actual=X_test, 509 pred=X_pred, 510 scoring="root_mean_squared_error", 511 per_series=per_series, 512 ) 513 mae = mean_errors( 514 actual=X_test, 515 pred=X_pred, 516 scoring="mean_absolute_error", 517 per_series=per_series, 518 ) 519 mpl = mean_errors( 520 actual=X_test, 521 pred=X_pred, 522 scoring="mean_pinball_loss", 523 per_series=per_series, 524 ) 525 winklerscore = winkler_score( 526 obj=X_pred, 527 actual=X_test, 528 level=95, 529 per_series=per_series, 530 ) 531 coveragecalc = coverage( 532 X_pred, X_test, level=95, per_series=per_series 533 ) 534 else: 535 rmse = mean_errors( 536 actual=X_test, 537 pred=X_pred, 538 scoring="root_mean_squared_error", 539 per_series=per_series, 540 ) 541 mae = mean_errors( 542 actual=X_test, 543 pred=X_pred, 544 scoring="mean_absolute_error", 545 per_series=per_series, 546 ) 547 mpl = mean_errors( 548 actual=X_test, 549 pred=X_pred, 550 scoring="mean_pinball_loss", 551 per_series=per_series, 552 ) 553 554 names.append(name) 555 RMSE.append(rmse) 556 MAE.append(mae) 557 MPL.append(mpl) 558 559 if (self.replications is not None) or (self.type_pi == "gaussian"): 560 WINKLERSCORE.append(winklerscore) 561 COVERAGE.append(coveragecalc) 562 TIME.append(time.time() - start) 563 564 if self.custom_metric is not None: 565 try: 566 custom_metric = self.custom_metric(X_test, X_pred) 567 CUSTOM_METRIC.append(custom_metric) 568 except Exception as e: 569 custom_metric = np.iinfo(np.float32).max 570 CUSTOM_METRIC.append(custom_metric) 571 572 if self.verbose > 0: 573 if (self.replications is not None) or ( 574 self.type_pi == "gaussian" 575 ): 576 scores_verbose = { 577 "Model": name, 578 "RMSE": rmse, 579 "MAE": mae, 580 "MPL": mpl, 581 "WINKLERSCORE": winklerscore, 582 "COVERAGE": coveragecalc, 583 "Time taken": time.time() - start, 584 } 585 else: 586 scores_verbose = { 587 "Model": name, 588 "RMSE": rmse, 589 "MAE": mae, 590 "MPL": mpl, 591 "Time taken": time.time() - start, 592 } 593 594 if self.custom_metric is not None: 595 scores_verbose["Custom metric"] = custom_metric 596 597 if self.predictions: 598 predictions[name] = X_pred 599 except Exception as exception: 600 if self.ignore_warnings is False: 601 print(name + " model failed to execute") 602 print(exception) 603 604 else: # no preprocessing 605 606 for name, model in tqdm(self.regressors): # do parallel exec 607 start = time.time() 608 try: 609 if "random_state" in model().get_params().keys(): 610 pipe = DeepMTS( 611 obj=model(random_state=self.random_state, **kwargs), 612 n_layers=self.n_layers, 613 n_hidden_features=self.n_hidden_features, 614 activation_name=self.activation_name, 615 a=self.a, 616 nodes_sim=self.nodes_sim, 617 bias=self.bias, 618 dropout=self.dropout, 619 direct_link=self.direct_link, 620 n_clusters=self.n_clusters, 621 cluster_encode=self.cluster_encode, 622 type_clust=self.type_clust, 623 type_scaling=self.type_scaling, 624 lags=self.lags, 625 type_pi=self.type_pi, 626 block_size=self.block_size, 627 replications=self.replications, 628 kernel=self.kernel, 629 agg=self.agg, 630 seed=self.seed, 631 backend=self.backend, 632 show_progress=self.show_progress, 633 ) 634 else: 635 pipe = DeepMTS( 636 obj=model(**kwargs), 637 n_layers=self.n_layers, 638 n_hidden_features=self.n_hidden_features, 639 activation_name=self.activation_name, 640 a=self.a, 641 nodes_sim=self.nodes_sim, 642 bias=self.bias, 643 dropout=self.dropout, 644 direct_link=self.direct_link, 645 n_clusters=self.n_clusters, 646 cluster_encode=self.cluster_encode, 647 type_clust=self.type_clust, 648 type_scaling=self.type_scaling, 649 lags=self.lags, 650 type_pi=self.type_pi, 651 block_size=self.block_size, 652 replications=self.replications, 653 kernel=self.kernel, 654 agg=self.agg, 655 seed=self.seed, 656 backend=self.backend, 657 show_progress=self.show_progress, 658 ) 659 660 pipe.fit(X_train, xreg, **kwargs) 661 # pipe.fit(X_train, xreg=xreg) # DO xreg like in `ahead` 662 663 self.models_[name] = pipe 664 665 if self.preprocess is True: 666 if self.h is None: 667 X_pred = pipe["regressor"].predict( 668 h=X_test.shape[0], **kwargs 669 ) 670 else: 671 assert ( 672 self.h > 0 and self.h <= X_test.shape[0] 673 ), "h must be > 0 and < X_test.shape[0]" 674 X_pred = pipe["regressor"].predict(h=self.h, **kwargs) 675 676 else: 677 678 if self.h is None: 679 X_pred = pipe.predict( 680 h=X_test.shape[0], 681 **kwargs, 682 # X_pred = pipe.predict(h=X_test.shape[0], new_xreg=new_xreg) ## DO xreg like in `ahead` 683 ) 684 else: 685 assert ( 686 self.h > 0 and self.h <= X_test.shape[0] 687 ), "h must be > 0 and < X_test.shape[0]" 688 X_pred = pipe.predict(h=self.h, **kwargs) 689 690 if self.h is None: 691 if (self.replications is not None) or ( 692 self.type_pi == "gaussian" 693 ): 694 rmse = mean_errors( 695 actual=X_test, 696 pred=X_pred.mean, 697 scoring="root_mean_squared_error", 698 per_series=per_series, 699 ) 700 mae = mean_errors( 701 actual=X_test, 702 pred=X_pred.mean, 703 scoring="mean_absolute_error", 704 per_series=per_series, 705 ) 706 mpl = mean_errors( 707 actual=X_test, 708 pred=X_pred.mean, 709 scoring="mean_pinball_loss", 710 per_series=per_series, 711 ) 712 winklerscore = winkler_score( 713 obj=X_pred, 714 actual=X_test, 715 level=95, 716 per_series=per_series, 717 ) 718 coveragecalc = coverage( 719 X_pred, X_test, level=95, per_series=per_series 720 ) 721 else: # no prediction interval 722 rmse = mean_errors( 723 actual=X_test, 724 pred=X_pred, 725 scoring="root_mean_squared_error", 726 per_series=per_series, 727 ) 728 mae = mean_errors( 729 actual=X_test, 730 pred=X_pred, 731 scoring="mean_absolute_error", 732 per_series=per_series, 733 ) 734 mpl = mean_errors( 735 actual=X_test, 736 pred=X_pred, 737 scoring="mean_pinball_loss", 738 per_series=per_series, 739 ) 740 else: # self.h is not None 741 if (self.replications is not None) or ( 742 self.type_pi == "gaussian" 743 ): 744 745 if isinstance(X_test, pd.DataFrame): 746 X_test_h = X_test.iloc[0 : self.h, :] 747 rmse = mean_errors( 748 actual=X_test_h, 749 pred=X_pred, 750 scoring="root_mean_squared_error", 751 per_series=per_series, 752 ) 753 mae = mean_errors( 754 actual=X_test_h, 755 pred=X_pred, 756 scoring="mean_absolute_error", 757 per_series=per_series, 758 ) 759 mpl = mean_errors( 760 actual=X_test_h, 761 pred=X_pred, 762 scoring="mean_pinball_loss", 763 per_series=per_series, 764 ) 765 winklerscore = winkler_score( 766 obj=X_pred, 767 actual=X_test_h, 768 level=95, 769 per_series=per_series, 770 ) 771 coveragecalc = coverage( 772 X_pred, 773 X_test_h, 774 level=95, 775 per_series=per_series, 776 ) 777 else: 778 X_test_h = X_test[0 : self.h, :] 779 rmse = mean_errors( 780 actual=X_test_h, 781 pred=X_pred, 782 scoring="root_mean_squared_error", 783 per_series=per_series, 784 ) 785 mae = mean_errors( 786 actual=X_test_h, 787 pred=X_pred, 788 scoring="mean_absolute_error", 789 per_series=per_series, 790 ) 791 mpl = mean_errors( 792 actual=X_test_h, 793 pred=X_pred, 794 scoring="mean_pinball_loss", 795 per_series=per_series, 796 ) 797 winklerscore = winkler_score( 798 obj=X_pred, 799 actual=X_test_h, 800 level=95, 801 per_series=per_series, 802 ) 803 coveragecalc = coverage( 804 X_pred, 805 X_test_h, 806 level=95, 807 per_series=per_series, 808 ) 809 else: # no prediction interval 810 811 if isinstance(X_test, pd.DataFrame): 812 X_test_h = X_test.iloc[0 : self.h, :] 813 rmse = mean_errors( 814 actual=X_test_h, 815 pred=X_pred, 816 scoring="root_mean_squared_error", 817 per_series=per_series, 818 ) 819 mae = mean_errors( 820 actual=X_test_h, 821 pred=X_pred, 822 scoring="mean_absolute_error", 823 per_series=per_series, 824 ) 825 mpl = mean_errors( 826 actual=X_test_h, 827 pred=X_pred, 828 scoring="mean_pinball_loss", 829 per_series=per_series, 830 ) 831 else: 832 X_test_h = X_test[0 : self.h, :] 833 rmse = mean_errors( 834 actual=X_test_h, 835 pred=X_pred, 836 scoring="root_mean_squared_error", 837 per_series=per_series, 838 ) 839 mae = mean_errors( 840 actual=X_test_h, 841 pred=X_pred, 842 scoring="mean_absolute_error", 843 per_series=per_series, 844 ) 845 846 names.append(name) 847 RMSE.append(rmse) 848 MAE.append(mae) 849 MPL.append(mpl) 850 if (self.replications is not None) or (self.type_pi == "gaussian"): 851 WINKLERSCORE.append(winklerscore) 852 COVERAGE.append(coveragecalc) 853 TIME.append(time.time() - start) 854 855 if self.custom_metric is not None: 856 try: 857 if self.h is None: 858 custom_metric = self.custom_metric(X_test, X_pred) 859 else: 860 custom_metric = self.custom_metric(X_test_h, X_pred) 861 CUSTOM_METRIC.append(custom_metric) 862 except Exception as e: 863 custom_metric = np.iinfo(np.float32).max 864 CUSTOM_METRIC.append(np.iinfo(np.float32).max) 865 866 if self.verbose > 0: 867 if (self.replications is not None) or ( 868 self.type_pi == "gaussian" 869 ): 870 scores_verbose = { 871 "Model": name, 872 "RMSE": rmse, 873 "MAE": mae, 874 "MPL": mpl, 875 "WINKLERSCORE": winklerscore, 876 "COVERAGE": coveragecalc, 877 "Time taken": time.time() - start, 878 } 879 else: 880 scores_verbose = { 881 "Model": name, 882 "RMSE": rmse, 883 "MAE": mae, 884 "MPL": mpl, 885 "Time taken": time.time() - start, 886 } 887 888 if self.custom_metric is not None: 889 scores_verbose["Custom metric"] = custom_metric 890 891 if self.predictions: 892 predictions[name] = X_pred 893 894 except Exception as exception: 895 if self.ignore_warnings is False: 896 print(name + " model failed to execute") 897 print(exception) 898 899 if (self.replications is not None) or (self.type_pi == "gaussian"): 900 scores = { 901 "Model": names, 902 "RMSE": RMSE, 903 "MAE": MAE, 904 "MPL": MPL, 905 "WINKLERSCORE": WINKLERSCORE, 906 "COVERAGE": COVERAGE, 907 "Time Taken": TIME, 908 } 909 else: 910 scores = { 911 "Model": names, 912 "RMSE": RMSE, 913 "MAE": MAE, 914 "MPL": MPL, 915 "Time Taken": TIME, 916 } 917 918 if self.custom_metric is not None: 919 scores["Custom metric"] = CUSTOM_METRIC 920 921 if per_series: 922 scores = dict_to_dataframe_series(scores, self.series_names) 923 else: 924 scores = pd.DataFrame(scores) 925 926 try: # case per_series, can't be sorted 927 scores = scores.sort_values(by=self.sort_by, ascending=True).set_index( 928 "Model" 929 ) 930 931 self.best_model_ = self.models_[scores.index[0]] 932 except Exception as e: 933 pass 934 935 if self.predictions is True: 936 937 return scores, predictions 938 939 return scores 940 941 def get_best_model(self): 942 """ 943 This function returns the best model pipeline based on the sort_by metric. 944 945 Returns: 946 947 best_model: object, 948 Returns the best model pipeline based on the sort_by metric. 949 950 """ 951 return self.best_model_ 952 953 def provide_models(self, X_train, X_test): 954 """ 955 This function returns all the model objects trained in fit function. 956 If fit is not called already, then we call fit and then return the models. 957 958 Parameters: 959 960 X_train : array-like, 961 Training vectors, where rows is the number of samples 962 and columns is the number of features. 963 964 X_test : array-like, 965 Testing vectors, where rows is the number of samples 966 and columns is the number of features. 967 968 Returns: 969 970 models: dict-object, 971 Returns a dictionary with each model pipeline as value 972 with key as name of models. 973 974 """ 975 if self.h is None: 976 if len(self.models_.keys()) == 0: 977 self.fit(X_train, X_test) 978 else: 979 if len(self.models_.keys()) == 0: 980 if isinstance(X_test, pd.DataFrame): 981 self.fit(X_train, X_test.iloc[0 : self.h, :]) 982 else: 983 self.fit(X_train, X_test[0 : self.h, :]) 984 985 return self.models_
Fitting -- almost -- all the regression algorithms with layers of nnetsauce's CustomRegressor to multivariate time series and returning their scores.
Parameters:
verbose: int, optional (default=0)
Any positive number for verbosity.
ignore_warnings: bool, optional (default=True)
When set to True, the warning related to algorigms that are not
able to run are ignored.
custom_metric: function, optional (default=None)
When function is provided, models are evaluated based on the custom
evaluation metric provided.
predictions: bool, optional (default=False)
When set to True, the predictions of all the models models are returned as dataframe.
sort_by: string, optional (default='RMSE')
Sort models by a metric. Available options are 'RMSE', 'MAE', 'MPL', 'MPE', 'MAPE',
'R-Squared', 'Adjusted R-Squared' or a custom metric identified by its name and
provided by custom_metric.
random_state: int, optional (default=42)
Reproducibiility seed.
estimators: list, optional (default='all')
list of Estimators (regression algorithms) names or just 'all' (default='all')
preprocess: bool, preprocessing is done when set to True
n_layers: int, optional (default=1)
Number of layers in the network. When set to 1, the model is equivalent to a MTS.
h: int, optional (default=None)
Number of steps ahead to predict (when used, must be > 0 and < X_test.shape[0]).
All the other parameters are the same as MTS's.
Attributes:
models_: dict-object
Returns a dictionary with each model pipeline as value
with key as name of models.
best_model_: object
Returns the best model pipeline based on the sort_by metric.
Examples:
See https://thierrymoudiki.github.io/blog/2023/10/29/python/quasirandomizednn/MTS-LazyPredict
241 def fit(self, X_train, X_test, xreg=None, per_series=False, **kwargs): 242 """Fit Regression algorithms to X_train, predict and score on X_test. 243 244 Parameters: 245 246 X_train: array-like or data frame, 247 Training vectors, where rows is the number of samples 248 and columns is the number of features. 249 250 X_test: array-like or data frame, 251 Testing vectors, where rows is the number of samples 252 and columns is the number of features. 253 254 xreg: array-like, optional (default=None) 255 Additional (external) regressors to be passed to self.obj 256 xreg must be in 'increasing' order (most recent observations last) 257 258 per_series: bool, optional (default=False) 259 When set to True, the metrics are computed series by series. 260 261 **kwargs: dict, optional (default=None) 262 Additional parameters to be passed to `fit` method of `obj`. 263 264 Returns: 265 266 scores: Pandas DataFrame 267 Returns metrics of all the models in a Pandas DataFrame. 268 269 predictions: Pandas DataFrame 270 Returns predictions of all the models in a Pandas DataFrame. 271 272 """ 273 R2 = [] 274 ADJR2 = [] 275 ME = [] 276 MPL = [] 277 RMSE = [] 278 MAE = [] 279 MPE = [] 280 MAPE = [] 281 WINKLERSCORE = [] 282 COVERAGE = [] 283 284 # WIN = [] 285 names = [] 286 TIME = [] 287 predictions = {} 288 289 if self.custom_metric is not None: 290 CUSTOM_METRIC = [] 291 292 if self.h is None: 293 assert X_test is not None, "If h is None, X_test must be provided." 294 295 if isinstance(X_train, np.ndarray): 296 X_train = pd.DataFrame(X_train) 297 X_test = pd.DataFrame(X_test) 298 299 self.series_names = X_train.columns.tolist() 300 301 X_train = convert_df_to_numeric(X_train) 302 X_test = convert_df_to_numeric(X_test) 303 304 numeric_features = X_train.select_dtypes(include=[np.number]).columns 305 categorical_features = X_train.select_dtypes(include=["object"]).columns 306 307 categorical_low, categorical_high = get_card_split( 308 X_train, categorical_features 309 ) 310 311 if self.preprocess: 312 preprocessor = ColumnTransformer( 313 transformers=[ 314 ("numeric", numeric_transformer, numeric_features), 315 ( 316 "categorical_low", 317 categorical_transformer_low, 318 categorical_low, 319 ), 320 ( 321 "categorical_high", 322 categorical_transformer_high, 323 categorical_high, 324 ), 325 ] 326 ) 327 328 # baselines (Classical MTS) ---- 329 for i, name in enumerate(["ARIMA", "ETS", "Theta", "VAR", "VECM"]): 330 try: 331 start = time.time() 332 regr = ClassicalMTS(model=name) 333 regr.fit(X_train, **kwargs) 334 self.models_[name] = regr 335 if self.h is None: 336 X_pred = regr.predict(h=X_test.shape[0], **kwargs) 337 else: 338 assert self.h > 0, "h must be > 0" 339 X_pred = regr.predict(h=self.h, **kwargs) 340 try: 341 X_test = X_test[0 : self.h, :] 342 except Exception as e: 343 X_test = X_test.iloc[0 : self.h, :] 344 345 rmse = mean_errors( 346 actual=X_test, 347 pred=X_pred, 348 scoring="root_mean_squared_error", 349 per_series=per_series, 350 ) 351 mae = mean_errors( 352 actual=X_test, 353 pred=X_pred, 354 scoring="mean_absolute_error", 355 per_series=per_series, 356 ) 357 mpl = mean_errors( 358 actual=X_test, 359 pred=X_pred, 360 scoring="mean_pinball_loss", 361 per_series=per_series, 362 ) 363 except Exception: 364 365 continue 366 367 names.append(name) 368 RMSE.append(rmse) 369 MAE.append(mae) 370 MPL.append(mpl) 371 372 if self.custom_metric is not None: 373 try: 374 if self.h is None: 375 custom_metric = self.custom_metric(X_test, X_pred) 376 else: 377 custom_metric = self.custom_metric(X_test_h, X_pred) 378 CUSTOM_METRIC.append(custom_metric) 379 except Exception as e: 380 custom_metric = np.iinfo(np.float32).max 381 CUSTOM_METRIC.append(np.iinfo(np.float32).max) 382 383 if (self.replications is not None) or (self.type_pi == "gaussian"): 384 if per_series == False: 385 winklerscore = winkler_score(obj=X_pred, actual=X_test, level=95) 386 coveragecalc = coverage(X_pred, X_test, level=95) 387 else: 388 winklerscore = winkler_score( 389 obj=X_pred, actual=X_test, level=95, per_series=True 390 ) 391 coveragecalc = coverage(X_pred, X_test, level=95, per_series=True) 392 WINKLERSCORE.append(winklerscore) 393 COVERAGE.append(coveragecalc) 394 TIME.append(time.time() - start) 395 396 if self.estimators == "all": 397 if self.n_layers <= 1: 398 self.regressors = REGRESSORSMTS 399 else: 400 self.regressors = DEEPREGRESSORSMTS 401 else: 402 if self.n_layers <= 1: 403 self.regressors = [ 404 ("MTS(" + est[0] + ")", est[1]) 405 for est in all_estimators() 406 if ( 407 issubclass(est[1], RegressorMixin) 408 and (est[0] in self.estimators) 409 ) 410 ] 411 else: # self.n_layers > 1 412 self.regressors = [ 413 ("DeepMTS(" + est[0] + ")", est[1]) 414 for est in all_estimators() 415 if ( 416 issubclass(est[1], RegressorMixin) 417 and (est[0] in self.estimators) 418 ) 419 ] 420 421 if self.preprocess is True: 422 for name, model in tqdm(self.regressors): # do parallel exec 423 start = time.time() 424 try: 425 if "random_state" in model().get_params().keys(): 426 pipe = Pipeline( 427 steps=[ 428 ("preprocessor", preprocessor), 429 ( 430 "regressor", 431 DeepMTS( 432 obj=model( 433 random_state=self.random_state, 434 **kwargs, 435 ), 436 n_layers=self.n_layers, 437 n_hidden_features=self.n_hidden_features, 438 activation_name=self.activation_name, 439 a=self.a, 440 nodes_sim=self.nodes_sim, 441 bias=self.bias, 442 dropout=self.dropout, 443 direct_link=self.direct_link, 444 n_clusters=self.n_clusters, 445 cluster_encode=self.cluster_encode, 446 type_clust=self.type_clust, 447 type_scaling=self.type_scaling, 448 lags=self.lags, 449 type_pi=self.type_pi, 450 block_size=self.block_size, 451 replications=self.replications, 452 kernel=self.kernel, 453 agg=self.agg, 454 seed=self.seed, 455 backend=self.backend, 456 show_progress=self.show_progress, 457 ), 458 ), 459 ] 460 ) 461 else: # "random_state" in model().get_params().keys() 462 pipe = Pipeline( 463 steps=[ 464 ("preprocessor", preprocessor), 465 ( 466 "regressor", 467 DeepMTS( 468 obj=model(**kwargs), 469 n_layers=self.n_layers, 470 n_hidden_features=self.n_hidden_features, 471 activation_name=self.activation_name, 472 a=self.a, 473 nodes_sim=self.nodes_sim, 474 bias=self.bias, 475 dropout=self.dropout, 476 direct_link=self.direct_link, 477 n_clusters=self.n_clusters, 478 cluster_encode=self.cluster_encode, 479 type_clust=self.type_clust, 480 type_scaling=self.type_scaling, 481 lags=self.lags, 482 type_pi=self.type_pi, 483 block_size=self.block_size, 484 replications=self.replications, 485 kernel=self.kernel, 486 agg=self.agg, 487 seed=self.seed, 488 backend=self.backend, 489 show_progress=self.show_progress, 490 ), 491 ), 492 ] 493 ) 494 495 pipe.fit(X_train, **kwargs) 496 # pipe.fit(X_train, xreg=xreg) 497 498 self.models_[name] = pipe 499 500 if self.h is None: 501 X_pred = pipe["regressor"].predict(h=self.h, **kwargs) 502 else: 503 assert self.h > 0, "h must be > 0" 504 X_pred = pipe["regressor"].predict(h=self.h, **kwargs) 505 506 if (self.replications is not None) or (self.type_pi == "gaussian"): 507 rmse = mean_errors( 508 actual=X_test, 509 pred=X_pred, 510 scoring="root_mean_squared_error", 511 per_series=per_series, 512 ) 513 mae = mean_errors( 514 actual=X_test, 515 pred=X_pred, 516 scoring="mean_absolute_error", 517 per_series=per_series, 518 ) 519 mpl = mean_errors( 520 actual=X_test, 521 pred=X_pred, 522 scoring="mean_pinball_loss", 523 per_series=per_series, 524 ) 525 winklerscore = winkler_score( 526 obj=X_pred, 527 actual=X_test, 528 level=95, 529 per_series=per_series, 530 ) 531 coveragecalc = coverage( 532 X_pred, X_test, level=95, per_series=per_series 533 ) 534 else: 535 rmse = mean_errors( 536 actual=X_test, 537 pred=X_pred, 538 scoring="root_mean_squared_error", 539 per_series=per_series, 540 ) 541 mae = mean_errors( 542 actual=X_test, 543 pred=X_pred, 544 scoring="mean_absolute_error", 545 per_series=per_series, 546 ) 547 mpl = mean_errors( 548 actual=X_test, 549 pred=X_pred, 550 scoring="mean_pinball_loss", 551 per_series=per_series, 552 ) 553 554 names.append(name) 555 RMSE.append(rmse) 556 MAE.append(mae) 557 MPL.append(mpl) 558 559 if (self.replications is not None) or (self.type_pi == "gaussian"): 560 WINKLERSCORE.append(winklerscore) 561 COVERAGE.append(coveragecalc) 562 TIME.append(time.time() - start) 563 564 if self.custom_metric is not None: 565 try: 566 custom_metric = self.custom_metric(X_test, X_pred) 567 CUSTOM_METRIC.append(custom_metric) 568 except Exception as e: 569 custom_metric = np.iinfo(np.float32).max 570 CUSTOM_METRIC.append(custom_metric) 571 572 if self.verbose > 0: 573 if (self.replications is not None) or ( 574 self.type_pi == "gaussian" 575 ): 576 scores_verbose = { 577 "Model": name, 578 "RMSE": rmse, 579 "MAE": mae, 580 "MPL": mpl, 581 "WINKLERSCORE": winklerscore, 582 "COVERAGE": coveragecalc, 583 "Time taken": time.time() - start, 584 } 585 else: 586 scores_verbose = { 587 "Model": name, 588 "RMSE": rmse, 589 "MAE": mae, 590 "MPL": mpl, 591 "Time taken": time.time() - start, 592 } 593 594 if self.custom_metric is not None: 595 scores_verbose["Custom metric"] = custom_metric 596 597 if self.predictions: 598 predictions[name] = X_pred 599 except Exception as exception: 600 if self.ignore_warnings is False: 601 print(name + " model failed to execute") 602 print(exception) 603 604 else: # no preprocessing 605 606 for name, model in tqdm(self.regressors): # do parallel exec 607 start = time.time() 608 try: 609 if "random_state" in model().get_params().keys(): 610 pipe = DeepMTS( 611 obj=model(random_state=self.random_state, **kwargs), 612 n_layers=self.n_layers, 613 n_hidden_features=self.n_hidden_features, 614 activation_name=self.activation_name, 615 a=self.a, 616 nodes_sim=self.nodes_sim, 617 bias=self.bias, 618 dropout=self.dropout, 619 direct_link=self.direct_link, 620 n_clusters=self.n_clusters, 621 cluster_encode=self.cluster_encode, 622 type_clust=self.type_clust, 623 type_scaling=self.type_scaling, 624 lags=self.lags, 625 type_pi=self.type_pi, 626 block_size=self.block_size, 627 replications=self.replications, 628 kernel=self.kernel, 629 agg=self.agg, 630 seed=self.seed, 631 backend=self.backend, 632 show_progress=self.show_progress, 633 ) 634 else: 635 pipe = DeepMTS( 636 obj=model(**kwargs), 637 n_layers=self.n_layers, 638 n_hidden_features=self.n_hidden_features, 639 activation_name=self.activation_name, 640 a=self.a, 641 nodes_sim=self.nodes_sim, 642 bias=self.bias, 643 dropout=self.dropout, 644 direct_link=self.direct_link, 645 n_clusters=self.n_clusters, 646 cluster_encode=self.cluster_encode, 647 type_clust=self.type_clust, 648 type_scaling=self.type_scaling, 649 lags=self.lags, 650 type_pi=self.type_pi, 651 block_size=self.block_size, 652 replications=self.replications, 653 kernel=self.kernel, 654 agg=self.agg, 655 seed=self.seed, 656 backend=self.backend, 657 show_progress=self.show_progress, 658 ) 659 660 pipe.fit(X_train, xreg, **kwargs) 661 # pipe.fit(X_train, xreg=xreg) # DO xreg like in `ahead` 662 663 self.models_[name] = pipe 664 665 if self.preprocess is True: 666 if self.h is None: 667 X_pred = pipe["regressor"].predict( 668 h=X_test.shape[0], **kwargs 669 ) 670 else: 671 assert ( 672 self.h > 0 and self.h <= X_test.shape[0] 673 ), "h must be > 0 and < X_test.shape[0]" 674 X_pred = pipe["regressor"].predict(h=self.h, **kwargs) 675 676 else: 677 678 if self.h is None: 679 X_pred = pipe.predict( 680 h=X_test.shape[0], 681 **kwargs, 682 # X_pred = pipe.predict(h=X_test.shape[0], new_xreg=new_xreg) ## DO xreg like in `ahead` 683 ) 684 else: 685 assert ( 686 self.h > 0 and self.h <= X_test.shape[0] 687 ), "h must be > 0 and < X_test.shape[0]" 688 X_pred = pipe.predict(h=self.h, **kwargs) 689 690 if self.h is None: 691 if (self.replications is not None) or ( 692 self.type_pi == "gaussian" 693 ): 694 rmse = mean_errors( 695 actual=X_test, 696 pred=X_pred.mean, 697 scoring="root_mean_squared_error", 698 per_series=per_series, 699 ) 700 mae = mean_errors( 701 actual=X_test, 702 pred=X_pred.mean, 703 scoring="mean_absolute_error", 704 per_series=per_series, 705 ) 706 mpl = mean_errors( 707 actual=X_test, 708 pred=X_pred.mean, 709 scoring="mean_pinball_loss", 710 per_series=per_series, 711 ) 712 winklerscore = winkler_score( 713 obj=X_pred, 714 actual=X_test, 715 level=95, 716 per_series=per_series, 717 ) 718 coveragecalc = coverage( 719 X_pred, X_test, level=95, per_series=per_series 720 ) 721 else: # no prediction interval 722 rmse = mean_errors( 723 actual=X_test, 724 pred=X_pred, 725 scoring="root_mean_squared_error", 726 per_series=per_series, 727 ) 728 mae = mean_errors( 729 actual=X_test, 730 pred=X_pred, 731 scoring="mean_absolute_error", 732 per_series=per_series, 733 ) 734 mpl = mean_errors( 735 actual=X_test, 736 pred=X_pred, 737 scoring="mean_pinball_loss", 738 per_series=per_series, 739 ) 740 else: # self.h is not None 741 if (self.replications is not None) or ( 742 self.type_pi == "gaussian" 743 ): 744 745 if isinstance(X_test, pd.DataFrame): 746 X_test_h = X_test.iloc[0 : self.h, :] 747 rmse = mean_errors( 748 actual=X_test_h, 749 pred=X_pred, 750 scoring="root_mean_squared_error", 751 per_series=per_series, 752 ) 753 mae = mean_errors( 754 actual=X_test_h, 755 pred=X_pred, 756 scoring="mean_absolute_error", 757 per_series=per_series, 758 ) 759 mpl = mean_errors( 760 actual=X_test_h, 761 pred=X_pred, 762 scoring="mean_pinball_loss", 763 per_series=per_series, 764 ) 765 winklerscore = winkler_score( 766 obj=X_pred, 767 actual=X_test_h, 768 level=95, 769 per_series=per_series, 770 ) 771 coveragecalc = coverage( 772 X_pred, 773 X_test_h, 774 level=95, 775 per_series=per_series, 776 ) 777 else: 778 X_test_h = X_test[0 : self.h, :] 779 rmse = mean_errors( 780 actual=X_test_h, 781 pred=X_pred, 782 scoring="root_mean_squared_error", 783 per_series=per_series, 784 ) 785 mae = mean_errors( 786 actual=X_test_h, 787 pred=X_pred, 788 scoring="mean_absolute_error", 789 per_series=per_series, 790 ) 791 mpl = mean_errors( 792 actual=X_test_h, 793 pred=X_pred, 794 scoring="mean_pinball_loss", 795 per_series=per_series, 796 ) 797 winklerscore = winkler_score( 798 obj=X_pred, 799 actual=X_test_h, 800 level=95, 801 per_series=per_series, 802 ) 803 coveragecalc = coverage( 804 X_pred, 805 X_test_h, 806 level=95, 807 per_series=per_series, 808 ) 809 else: # no prediction interval 810 811 if isinstance(X_test, pd.DataFrame): 812 X_test_h = X_test.iloc[0 : self.h, :] 813 rmse = mean_errors( 814 actual=X_test_h, 815 pred=X_pred, 816 scoring="root_mean_squared_error", 817 per_series=per_series, 818 ) 819 mae = mean_errors( 820 actual=X_test_h, 821 pred=X_pred, 822 scoring="mean_absolute_error", 823 per_series=per_series, 824 ) 825 mpl = mean_errors( 826 actual=X_test_h, 827 pred=X_pred, 828 scoring="mean_pinball_loss", 829 per_series=per_series, 830 ) 831 else: 832 X_test_h = X_test[0 : self.h, :] 833 rmse = mean_errors( 834 actual=X_test_h, 835 pred=X_pred, 836 scoring="root_mean_squared_error", 837 per_series=per_series, 838 ) 839 mae = mean_errors( 840 actual=X_test_h, 841 pred=X_pred, 842 scoring="mean_absolute_error", 843 per_series=per_series, 844 ) 845 846 names.append(name) 847 RMSE.append(rmse) 848 MAE.append(mae) 849 MPL.append(mpl) 850 if (self.replications is not None) or (self.type_pi == "gaussian"): 851 WINKLERSCORE.append(winklerscore) 852 COVERAGE.append(coveragecalc) 853 TIME.append(time.time() - start) 854 855 if self.custom_metric is not None: 856 try: 857 if self.h is None: 858 custom_metric = self.custom_metric(X_test, X_pred) 859 else: 860 custom_metric = self.custom_metric(X_test_h, X_pred) 861 CUSTOM_METRIC.append(custom_metric) 862 except Exception as e: 863 custom_metric = np.iinfo(np.float32).max 864 CUSTOM_METRIC.append(np.iinfo(np.float32).max) 865 866 if self.verbose > 0: 867 if (self.replications is not None) or ( 868 self.type_pi == "gaussian" 869 ): 870 scores_verbose = { 871 "Model": name, 872 "RMSE": rmse, 873 "MAE": mae, 874 "MPL": mpl, 875 "WINKLERSCORE": winklerscore, 876 "COVERAGE": coveragecalc, 877 "Time taken": time.time() - start, 878 } 879 else: 880 scores_verbose = { 881 "Model": name, 882 "RMSE": rmse, 883 "MAE": mae, 884 "MPL": mpl, 885 "Time taken": time.time() - start, 886 } 887 888 if self.custom_metric is not None: 889 scores_verbose["Custom metric"] = custom_metric 890 891 if self.predictions: 892 predictions[name] = X_pred 893 894 except Exception as exception: 895 if self.ignore_warnings is False: 896 print(name + " model failed to execute") 897 print(exception) 898 899 if (self.replications is not None) or (self.type_pi == "gaussian"): 900 scores = { 901 "Model": names, 902 "RMSE": RMSE, 903 "MAE": MAE, 904 "MPL": MPL, 905 "WINKLERSCORE": WINKLERSCORE, 906 "COVERAGE": COVERAGE, 907 "Time Taken": TIME, 908 } 909 else: 910 scores = { 911 "Model": names, 912 "RMSE": RMSE, 913 "MAE": MAE, 914 "MPL": MPL, 915 "Time Taken": TIME, 916 } 917 918 if self.custom_metric is not None: 919 scores["Custom metric"] = CUSTOM_METRIC 920 921 if per_series: 922 scores = dict_to_dataframe_series(scores, self.series_names) 923 else: 924 scores = pd.DataFrame(scores) 925 926 try: # case per_series, can't be sorted 927 scores = scores.sort_values(by=self.sort_by, ascending=True).set_index( 928 "Model" 929 ) 930 931 self.best_model_ = self.models_[scores.index[0]] 932 except Exception as e: 933 pass 934 935 if self.predictions is True: 936 937 return scores, predictions 938 939 return scores
Fit Regression algorithms to X_train, predict and score on X_test.
Parameters:
X_train: array-like or data frame,
Training vectors, where rows is the number of samples
and columns is the number of features.
X_test: array-like or data frame,
Testing vectors, where rows is the number of samples
and columns is the number of features.
xreg: array-like, optional (default=None)
Additional (external) regressors to be passed to self.obj
xreg must be in 'increasing' order (most recent observations last)
per_series: bool, optional (default=False)
When set to True, the metrics are computed series by series.
**kwargs: dict, optional (default=None)
Additional parameters to be passed to `fit` method of `obj`.
Returns:
scores: Pandas DataFrame
Returns metrics of all the models in a Pandas DataFrame.
predictions: Pandas DataFrame
Returns predictions of all the models in a Pandas DataFrame.
953 def provide_models(self, X_train, X_test): 954 """ 955 This function returns all the model objects trained in fit function. 956 If fit is not called already, then we call fit and then return the models. 957 958 Parameters: 959 960 X_train : array-like, 961 Training vectors, where rows is the number of samples 962 and columns is the number of features. 963 964 X_test : array-like, 965 Testing vectors, where rows is the number of samples 966 and columns is the number of features. 967 968 Returns: 969 970 models: dict-object, 971 Returns a dictionary with each model pipeline as value 972 with key as name of models. 973 974 """ 975 if self.h is None: 976 if len(self.models_.keys()) == 0: 977 self.fit(X_train, X_test) 978 else: 979 if len(self.models_.keys()) == 0: 980 if isinstance(X_test, pd.DataFrame): 981 self.fit(X_train, X_test.iloc[0 : self.h, :]) 982 else: 983 self.fit(X_train, X_test[0 : self.h, :]) 984 985 return self.models_
This function returns all the model objects trained in fit function. If fit is not called already, then we call fit and then return the models.
Parameters:
X_train : array-like,
Training vectors, where rows is the number of samples
and columns is the number of features.
X_test : array-like,
Testing vectors, where rows is the number of samples
and columns is the number of features.
Returns:
models: dict-object,
Returns a dictionary with each model pipeline as value
with key as name of models.
18class MLARCH(MTS): 19 """Machine Learning with ARCH effects for time series forecasting 20 21 Parameters: 22 23 model_mean: object of class nnetsauce.MTS 24 Model for mean prediction (default: None, uses obj) 25 26 model_sigma: object of class nnetsauce.MTS 27 Model for residuals volatility prediction (default: None, uses obj) 28 29 model_residuals: object of class nnetsauce.MTS 30 Model for residuals prediction (default: None, uses obj) 31 32 Examples: 33 34 See examples/mlarch.py 35 36 """ 37 def __init__( 38 self, 39 model_mean, 40 model_sigma, 41 model_residuals 42 ): 43 assert isinstance(model_mean, MTS), "model_mean must be an object of class nnetsauce.MTS" 44 assert isinstance(model_sigma, MTS), "model_sigma must be an object of class nnetsauce.MTS" 45 assert isinstance(model_residuals, MTS), "model_residuals must be an object of class nnetsauce.MTS" 46 assert model_sigma.type_pi.startswith("scp") and model_sigma.replications is not None, \ 47 "for now, the models must be conformalized, i.e type_pi must start with 'scp' and replications must be an integer" 48 assert model_residuals.type_pi.startswith("scp") and model_residuals.replications is not None, \ 49 "for now, the models must be conformalized, i.e type_pi must start with 'scp' and replications must be an integer" 50 51 self.model_mean = model_mean 52 self.model_sigma = model_sigma 53 self.model_residuals = model_residuals 54 55 self.mean_residuals_ = None 56 self.mean_residuals_wilcoxon_test_ = None 57 self.mean_residuals_kpss_test_ = None 58 self.standardized_residuals_ = None 59 60 61 def fit(self, y): 62 """Fit the MLARCH model to the time series data. 63 64 Parameters 65 ---------- 66 y : array-like of shape (n_samples,) 67 The target time series to be fitted. 68 69 Returns 70 ------- 71 self : object 72 Returns self. 73 74 Notes 75 ----- 76 This method: 77 78 1. Fits the mean model to the time series 79 2. Performs statistical tests on the residuals (Wilcoxon and KPSS) 80 3. Fits the volatility model to the squared residuals 81 4. Computes standardized residuals 82 5. Fits the residuals model to the standardized residuals 83 """ 84 n = len(y) 85 self.model_mean.fit(y.reshape(-1, 1)) 86 # Wilcoxon signed-rank test on residuals (mean = 0) 87 self.mean_residuals_wilcoxon_test_ = stats.wilcoxon(self.model_mean.residuals_) 88 # KPSS test for stationarity on residuals 89 self.mean_residuals_kpss_test_ = kpss(self.model_mean.residuals_, regression='c') 90 self.model_sigma.fit(np.log(self.model_mean.residuals_.reshape(-1, 1)**2)) 91 # n//2 here because the model is conformalized 92 fitted_sigma = self.model_sigma.residuals_ + np.log(self.model_mean.residuals_**2)[(n//2):,:] 93 # standardized residuals 94 self.standardized_residuals_ = self.model_mean.residuals_[(n//2):,:]/np.sqrt(np.exp(fitted_sigma)) 95 self.model_residuals.fit(self.standardized_residuals_.reshape(-1, 1)) 96 97 # Calculate AIC 98 # Get predictions from all models 99 mean_pred = self.model_mean.predict(h=0).values.ravel() 100 sigma_pred = self.model_sigma.predict(h=0).values.ravel() 101 z_pred = self.model_residuals.predict(h=0).values.ravel() 102 103 # Calculate combined predictions 104 combined_pred = mean_pred + z_pred * np.sqrt(np.exp(sigma_pred)) 105 106 # Calculate SSE using the last half of the data (matching standardized_residuals_) 107 y_actual = y[(n//2):].ravel() 108 self.sse_ = np.sum((y_actual - combined_pred) ** 2) 109 110 # Calculate number of parameters (sum of parameters from all three models) 111 n_params = (self.model_mean.n_hidden_features + 1 + # mean model 112 self.model_sigma.n_hidden_features + 1 + # sigma model 113 self.model_residuals.n_hidden_features + 1) # residuals model 114 115 # Calculate AIC 116 n_samples = len(y_actual) 117 self.aic_ = n_samples * np.log(self.sse_/n_samples) + 2 * n_params 118 119 return self 120 121 122 def predict(self, h=5, level=95): 123 """Predict (probabilistic) future values of the time series. 124 125 Parameters 126 ---------- 127 h : int, default=5 128 The forecast horizon. 129 level : int, default=95 130 The confidence level for prediction intervals. 131 132 Returns 133 ------- 134 DescribeResult : namedtuple 135 A named tuple containing: 136 137 - mean : array-like of shape (h,) 138 The mean forecast. 139 - sims : array-like of shape (h, n_replications) 140 The simulated forecasts. 141 - lower : array-like of shape (h,) 142 The lower bound of the prediction interval. 143 - upper : array-like of shape (h,) 144 The upper bound of the prediction interval. 145 146 Notes 147 ----- 148 This method: 149 1. Generates mean forecasts using the mean model 150 2. Generates standardized residual forecasts using the residuals model 151 3. Generates volatility forecasts using the sigma model 152 4. Combines these forecasts to generate the final predictions 153 5. Computes prediction intervals at the specified confidence level 154 """ 155 DescribeResult = namedtuple( 156 "DescribeResult", ("mean", "sims", "lower", "upper") 157 ) 158 mean_forecast = self.model_mean.predict(h=h).values.ravel() 159 preds_z = self.model_residuals.predict(h=h) 160 preds_sigma = self.model_sigma.predict(h=h) 161 sims_z = preds_z.sims 162 sims_sigma = preds_sigma.sims 163 164 f = [] 165 for i in range(len(sims_z)): 166 f.append(mean_forecast + sims_z[i].values.ravel()*np.sqrt(np.exp(sims_sigma[i].values.ravel()))) 167 168 f = np.asarray(f).T 169 mean_f = np.mean(f, axis=1) 170 alpha = 1 - level/100 171 lower_bound = np.quantile(f, alpha/2, axis=1) 172 upper_bound = np.quantile(f, 1-alpha/2, axis=1) 173 174 return DescribeResult(mean_f, f, 175 lower_bound, upper_bound)
Machine Learning with ARCH effects for time series forecasting
Parameters:
model_mean: object of class nnetsauce.MTS
Model for mean prediction (default: None, uses obj)
model_sigma: object of class nnetsauce.MTS
Model for residuals volatility prediction (default: None, uses obj)
model_residuals: object of class nnetsauce.MTS
Model for residuals prediction (default: None, uses obj)
Examples:
See examples/mlarch.py
61 def fit(self, y): 62 """Fit the MLARCH model to the time series data. 63 64 Parameters 65 ---------- 66 y : array-like of shape (n_samples,) 67 The target time series to be fitted. 68 69 Returns 70 ------- 71 self : object 72 Returns self. 73 74 Notes 75 ----- 76 This method: 77 78 1. Fits the mean model to the time series 79 2. Performs statistical tests on the residuals (Wilcoxon and KPSS) 80 3. Fits the volatility model to the squared residuals 81 4. Computes standardized residuals 82 5. Fits the residuals model to the standardized residuals 83 """ 84 n = len(y) 85 self.model_mean.fit(y.reshape(-1, 1)) 86 # Wilcoxon signed-rank test on residuals (mean = 0) 87 self.mean_residuals_wilcoxon_test_ = stats.wilcoxon(self.model_mean.residuals_) 88 # KPSS test for stationarity on residuals 89 self.mean_residuals_kpss_test_ = kpss(self.model_mean.residuals_, regression='c') 90 self.model_sigma.fit(np.log(self.model_mean.residuals_.reshape(-1, 1)**2)) 91 # n//2 here because the model is conformalized 92 fitted_sigma = self.model_sigma.residuals_ + np.log(self.model_mean.residuals_**2)[(n//2):,:] 93 # standardized residuals 94 self.standardized_residuals_ = self.model_mean.residuals_[(n//2):,:]/np.sqrt(np.exp(fitted_sigma)) 95 self.model_residuals.fit(self.standardized_residuals_.reshape(-1, 1)) 96 97 # Calculate AIC 98 # Get predictions from all models 99 mean_pred = self.model_mean.predict(h=0).values.ravel() 100 sigma_pred = self.model_sigma.predict(h=0).values.ravel() 101 z_pred = self.model_residuals.predict(h=0).values.ravel() 102 103 # Calculate combined predictions 104 combined_pred = mean_pred + z_pred * np.sqrt(np.exp(sigma_pred)) 105 106 # Calculate SSE using the last half of the data (matching standardized_residuals_) 107 y_actual = y[(n//2):].ravel() 108 self.sse_ = np.sum((y_actual - combined_pred) ** 2) 109 110 # Calculate number of parameters (sum of parameters from all three models) 111 n_params = (self.model_mean.n_hidden_features + 1 + # mean model 112 self.model_sigma.n_hidden_features + 1 + # sigma model 113 self.model_residuals.n_hidden_features + 1) # residuals model 114 115 # Calculate AIC 116 n_samples = len(y_actual) 117 self.aic_ = n_samples * np.log(self.sse_/n_samples) + 2 * n_params 118 119 return self
Fit the MLARCH model to the time series data.
Parameters
y : array-like of shape (n_samples,) The target time series to be fitted.
Returns
self : object Returns self.
Notes
This method:
- Fits the mean model to the time series
- Performs statistical tests on the residuals (Wilcoxon and KPSS)
- Fits the volatility model to the squared residuals
- Computes standardized residuals
- Fits the residuals model to the standardized residuals
122 def predict(self, h=5, level=95): 123 """Predict (probabilistic) future values of the time series. 124 125 Parameters 126 ---------- 127 h : int, default=5 128 The forecast horizon. 129 level : int, default=95 130 The confidence level for prediction intervals. 131 132 Returns 133 ------- 134 DescribeResult : namedtuple 135 A named tuple containing: 136 137 - mean : array-like of shape (h,) 138 The mean forecast. 139 - sims : array-like of shape (h, n_replications) 140 The simulated forecasts. 141 - lower : array-like of shape (h,) 142 The lower bound of the prediction interval. 143 - upper : array-like of shape (h,) 144 The upper bound of the prediction interval. 145 146 Notes 147 ----- 148 This method: 149 1. Generates mean forecasts using the mean model 150 2. Generates standardized residual forecasts using the residuals model 151 3. Generates volatility forecasts using the sigma model 152 4. Combines these forecasts to generate the final predictions 153 5. Computes prediction intervals at the specified confidence level 154 """ 155 DescribeResult = namedtuple( 156 "DescribeResult", ("mean", "sims", "lower", "upper") 157 ) 158 mean_forecast = self.model_mean.predict(h=h).values.ravel() 159 preds_z = self.model_residuals.predict(h=h) 160 preds_sigma = self.model_sigma.predict(h=h) 161 sims_z = preds_z.sims 162 sims_sigma = preds_sigma.sims 163 164 f = [] 165 for i in range(len(sims_z)): 166 f.append(mean_forecast + sims_z[i].values.ravel()*np.sqrt(np.exp(sims_sigma[i].values.ravel()))) 167 168 f = np.asarray(f).T 169 mean_f = np.mean(f, axis=1) 170 alpha = 1 - level/100 171 lower_bound = np.quantile(f, alpha/2, axis=1) 172 upper_bound = np.quantile(f, 1-alpha/2, axis=1) 173 174 return DescribeResult(mean_f, f, 175 lower_bound, upper_bound)
Predict (probabilistic) future values of the time series.
Parameters
h : int, default=5 The forecast horizon. level : int, default=95 The confidence level for prediction intervals.
Returns
DescribeResult : namedtuple A named tuple containing:
- mean : array-like of shape (h,)
The mean forecast.
- sims : array-like of shape (h, n_replications)
The simulated forecasts.
- lower : array-like of shape (h,)
The lower bound of the prediction interval.
- upper : array-like of shape (h,)
The upper bound of the prediction interval.
Notes
This method:
- Generates mean forecasts using the mean model
- Generates standardized residual forecasts using the residuals model
- Generates volatility forecasts using the sigma model
- Combines these forecasts to generate the final predictions
- Computes prediction intervals at the specified confidence level
6class MedianVotingRegressor(VotingRegressor): 7 def predict(self, X): 8 """ 9 Predict using the median of the base regressors' predictions. 10 11 Parameters: 12 X (array-like): Feature matrix for predictions. 13 14 Returns: 15 y_pred (array): Median of predictions from the base regressors. 16 """ 17 predictions = np.asarray( 18 [regressor.predict(X) for regressor in self.estimators_] 19 ) 20 return np.median(predictions, axis=0)
Prediction voting regressor for unfitted estimators.
A voting regressor is an ensemble meta-estimator that fits several base regressors, each on the whole dataset. Then it averages the individual predictions to form a final prediction.
Read more in the :ref:User Guide <voting_regressor>
.
New in version 0.21.
Parameters
estimators : list of (str, estimator) tuples
Invoking the fit
method on the VotingRegressor
will fit clones
of those original estimators that will be stored in the class attribute
self.estimators_
. An estimator can be set to 'drop'
using
set_params()
.
*Changed in version 0.21:*
``'drop'`` is accepted. Using None was deprecated in 0.22 and
support was removed in 0.24.
weights : array-like of shape (n_regressors,), default=None
Sequence of weights (float
or int
) to weight the occurrences of
predicted values before averaging. Uses uniform weights if None
.
n_jobs : int, default=None
The number of jobs to run in parallel for fit
.
None
means 1 unless in a joblib.parallel_backend
context.
-1
means using all processors. See :term:Glossary <n_jobs>
for more details.
verbose : bool, default=False If True, the time elapsed while fitting will be printed as it is completed.
*New in version 0.23.*
Attributes
estimators_ : list of regressors
The collection of fitted sub-estimators as defined in estimators
that are not 'drop'.
named_estimators_ : ~sklearn.utils.Bunch
Attribute to access any fitted sub-estimators by name.
*New in version 0.20.*
n_features_in_ : int
Number of features seen during :term:fit
. Only defined if the
underlying regressor exposes such an attribute when fit.
*New in version 0.24.*
feature_names_in_ : ndarray of shape (n_features_in_
,)
Names of features seen during :term:fit
. Only defined if the
underlying estimators expose such an attribute when fit.
*New in version 1.0.*
See Also
VotingClassifier : Soft Voting/Majority Rule classifier.
Examples
>>> import numpy as np
>>> from sklearn.linear_model import LinearRegression
>>> from sklearn.ensemble import RandomForestRegressor
>>> from sklearn.ensemble import VotingRegressor
>>> from sklearn.neighbors import KNeighborsRegressor
>>> r1 = LinearRegression()
>>> r2 = RandomForestRegressor(n_estimators=10, random_state=1)
>>> r3 = KNeighborsRegressor()
>>> X = np.array([[1, 1], [2, 4], [3, 9], [4, 16], [5, 25], [6, 36]])
>>> y = np.array([2, 6, 12, 20, 30, 42])
>>> er = VotingRegressor([('lr', r1), ('rf', r2), ('r3', r3)])
>>> print(er.fit(X, y).predict(X))
[ 6.8... 8.4... 12.5... 17.8... 26... 34...]
In the following example, we drop the 'lr'
estimator with
~VotingRegressor.set_params()
and fit the remaining two estimators:
>>> er = er.set_params(lr='drop')
>>> er = er.fit(X, y)
>>> len(er.estimators_)
2
7 def predict(self, X): 8 """ 9 Predict using the median of the base regressors' predictions. 10 11 Parameters: 12 X (array-like): Feature matrix for predictions. 13 14 Returns: 15 y_pred (array): Median of predictions from the base regressors. 16 """ 17 predictions = np.asarray( 18 [regressor.predict(X) for regressor in self.estimators_] 19 ) 20 return np.median(predictions, axis=0)
Predict using the median of the base regressors' predictions.
Parameters: X (array-like): Feature matrix for predictions.
Returns: y_pred (array): Median of predictions from the base regressors.
28class MTS(Base): 29 """Univariate and multivariate time series (MTS) forecasting with Quasi-Randomized networks 30 31 Parameters: 32 33 obj: object. 34 any object containing a method fit (obj.fit()) and a method predict 35 (obj.predict()). 36 37 n_hidden_features: int. 38 number of nodes in the hidden layer. 39 40 activation_name: str. 41 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'. 42 43 a: float. 44 hyperparameter for 'prelu' or 'elu' activation function. 45 46 nodes_sim: str. 47 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 48 'uniform'. 49 50 bias: boolean. 51 indicates if the hidden layer contains a bias term (True) or not 52 (False). 53 54 dropout: float. 55 regularization parameter; (random) percentage of nodes dropped out 56 of the training. 57 58 direct_link: boolean. 59 indicates if the original predictors are included (True) in model's fitting or not (False). 60 61 n_clusters: int. 62 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering). 63 64 cluster_encode: bool. 65 defines how the variable containing clusters is treated (default is one-hot) 66 if `False`, then labels are used, without one-hot encoding. 67 68 type_clust: str. 69 type of clustering method: currently k-means ('kmeans') or Gaussian 70 Mixture Model ('gmm'). 71 72 type_scaling: a tuple of 3 strings. 73 scaling methods for inputs, hidden layer, and clustering respectively 74 (and when relevant). 75 Currently available: standardization ('std') or MinMax scaling ('minmax'). 76 77 lags: int. 78 number of lags used for each time series. 79 If string, lags must be one of 'AIC', 'AICc', or 'BIC'. 80 81 type_pi: str. 82 type of prediction interval; currently: 83 - "gaussian": simple, fast, but: assumes stationarity of Gaussian in-sample residuals and independence in the multivariate case 84 - "kde": based on Kernel Density Estimation of in-sample residuals 85 - "bootstrap": based on independent bootstrap of in-sample residuals 86 - "block-bootstrap": based on basic block bootstrap of in-sample residuals 87 - "scp-kde": Sequential split conformal prediction with Kernel Density Estimation of calibrated residuals 88 - "scp-bootstrap": Sequential split conformal prediction with independent bootstrap of calibrated residuals 89 - "scp-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of calibrated residuals 90 - "scp2-kde": Sequential split conformal prediction with Kernel Density Estimation of standardized calibrated residuals 91 - "scp2-bootstrap": Sequential split conformal prediction with independent bootstrap of standardized calibrated residuals 92 - "scp2-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of standardized calibrated residuals 93 - based on copulas of in-sample residuals: 'vine-tll', 'vine-bb1', 'vine-bb6', 'vine-bb7', 'vine-bb8', 'vine-clayton', 94 'vine-frank', 'vine-gaussian', 'vine-gumbel', 'vine-indep', 'vine-joe', 'vine-student' 95 - 'scp-vine-tll', 'scp-vine-bb1', 'scp-vine-bb6', 'scp-vine-bb7', 'scp-vine-bb8', 'scp-vine-clayton', 96 'scp-vine-frank', 'scp-vine-gaussian', 'scp-vine-gumbel', 'scp-vine-indep', 'scp-vine-joe', 'scp-vine-student' 97 - 'scp2-vine-tll', 'scp2-vine-bb1', 'scp2-vine-bb6', 'scp2-vine-bb7', 'scp2-vine-bb8', 'scp2-vine-clayton', 98 'scp2-vine-frank', 'scp2-vine-gaussian', 'scp2-vine-gumbel', 'scp2-vine-indep', 'scp2-vine-joe', 'scp2-vine-student' 99 100 block_size: int. 101 size of block for 'type_pi' in ("block-bootstrap", "scp-block-bootstrap", "scp2-block-bootstrap"). 102 Default is round(3.15*(n_residuals^1/3)) 103 104 replications: int. 105 number of replications (if needed, for predictive simulation). Default is 'None'. 106 107 kernel: str. 108 the kernel to use for residuals density estimation (used for predictive simulation). Currently, either 'gaussian' or 'tophat'. 109 110 agg: str. 111 either "mean" or "median" for simulation of bootstrap aggregating 112 113 seed: int. 114 reproducibility seed for nodes_sim=='uniform' or predictive simulation. 115 116 backend: str. 117 "cpu" or "gpu" or "tpu". 118 119 verbose: int. 120 0: not printing; 1: printing 121 122 show_progress: bool. 123 True: progress bar when fitting each series; False: no progress bar when fitting each series 124 125 Attributes: 126 127 fit_objs_: dict 128 objects adjusted to each individual time series 129 130 y_: {array-like} 131 MTS responses (most recent observations first) 132 133 X_: {array-like} 134 MTS lags 135 136 xreg_: {array-like} 137 external regressors 138 139 y_means_: dict 140 a dictionary of each series mean values 141 142 preds_: {array-like} 143 successive model predictions 144 145 preds_std_: {array-like} 146 standard deviation around the predictions for Bayesian base learners (`obj`) 147 148 gaussian_preds_std_: {array-like} 149 standard deviation around the predictions for `type_pi='gaussian'` 150 151 return_std_: boolean 152 return uncertainty or not (set in predict) 153 154 df_: data frame 155 the input data frame, in case a data.frame is provided to `fit` 156 157 n_obs_: int 158 number of time series observations (number of rows for multivariate) 159 160 level_: int 161 level of confidence for prediction intervals (default is 95) 162 163 residuals_: {array-like} 164 in-sample residuals (for `type_pi` not conformal prediction) or calibrated residuals 165 (for `type_pi` in conformal prediction) 166 167 residuals_sims_: tuple of {array-like} 168 simulations of in-sample residuals (for `type_pi` not conformal prediction) or 169 calibrated residuals (for `type_pi` in conformal prediction) 170 171 kde_: A scikit-learn object, see https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KernelDensity.html 172 173 residuals_std_dev_: residuals standard deviation 174 175 Examples: 176 177 Example 1: 178 179 ```python 180 import nnetsauce as ns 181 import numpy as np 182 from sklearn import linear_model 183 np.random.seed(123) 184 185 M = np.random.rand(10, 3) 186 M[:,0] = 10*M[:,0] 187 M[:,2] = 25*M[:,2] 188 print(M) 189 190 # Adjust Bayesian Ridge 191 regr4 = linear_model.BayesianRidge() 192 obj_MTS = ns.MTS(regr4, lags = 1, n_hidden_features=5) 193 obj_MTS.fit(M) 194 print(obj_MTS.predict()) 195 196 # with credible intervals 197 print(obj_MTS.predict(return_std=True, level=80)) 198 199 print(obj_MTS.predict(return_std=True, level=95)) 200 ``` 201 202 Example 2: 203 204 ```python 205 import nnetsauce as ns 206 import numpy as np 207 from sklearn import linear_model 208 209 dataset = { 210 'date' : ['2001-01-01', '2002-01-01', '2003-01-01', '2004-01-01', '2005-01-01'], 211 'series1' : [34, 30, 35.6, 33.3, 38.1], 212 'series2' : [4, 5.5, 5.6, 6.3, 5.1], 213 'series3' : [100, 100.5, 100.6, 100.2, 100.1]} 214 df = pd.DataFrame(dataset).set_index('date') 215 print(df) 216 217 # Adjust Bayesian Ridge 218 regr5 = linear_model.BayesianRidge() 219 obj_MTS = ns.MTS(regr5, lags = 1, n_hidden_features=5) 220 obj_MTS.fit(df) 221 print(obj_MTS.predict()) 222 223 # with credible intervals 224 print(obj_MTS.predict(return_std=True, level=80)) 225 226 print(obj_MTS.predict(return_std=True, level=95)) 227 ``` 228 """ 229 230 # construct the object ----- 231 232 def __init__( 233 self, 234 obj, 235 n_hidden_features=5, 236 activation_name="relu", 237 a=0.01, 238 nodes_sim="sobol", 239 bias=True, 240 dropout=0, 241 direct_link=True, 242 n_clusters=2, 243 cluster_encode=True, 244 type_clust="kmeans", 245 type_scaling=("std", "std", "std"), 246 lags=1, 247 type_pi="kde", 248 block_size=None, 249 replications=None, 250 kernel="gaussian", 251 agg="mean", 252 seed=123, 253 backend="cpu", 254 verbose=0, 255 show_progress=True, 256 ): 257 258 super().__init__( 259 n_hidden_features=n_hidden_features, 260 activation_name=activation_name, 261 a=a, 262 nodes_sim=nodes_sim, 263 bias=bias, 264 dropout=dropout, 265 direct_link=direct_link, 266 n_clusters=n_clusters, 267 cluster_encode=cluster_encode, 268 type_clust=type_clust, 269 type_scaling=type_scaling, 270 seed=seed, 271 backend=backend, 272 ) 273 274 # Add validation for lags parameter 275 if isinstance(lags, str): 276 assert lags in ( 277 "AIC", 278 "AICc", 279 "BIC", 280 ), "if string, lags must be one of 'AIC', 'AICc', or 'BIC'" 281 else: 282 assert int(lags) == lags, "if numeric, lags parameter should be an integer" 283 284 self.obj = obj 285 self.n_series = None 286 self.lags = lags 287 self.type_pi = type_pi 288 self.block_size = block_size 289 self.replications = replications 290 self.kernel = kernel 291 self.agg = agg 292 self.verbose = verbose 293 self.show_progress = show_progress 294 self.series_names = None 295 self.input_dates = None 296 self.fit_objs_ = {} 297 self.y_ = None # MTS responses (most recent observations first) 298 self.X_ = None # MTS lags 299 self.xreg_ = None 300 self.y_means_ = {} 301 self.mean_ = None 302 self.median_ = None 303 self.upper_ = None 304 self.lower_ = None 305 self.output_dates_ = None 306 self.preds_std_ = [] 307 self.gaussian_preds_std_ = None 308 self.alpha_ = None 309 self.return_std_ = None 310 self.df_ = None 311 self.residuals_ = [] 312 self.abs_calib_residuals_ = None 313 self.calib_residuals_quantile_ = None 314 self.residuals_sims_ = None 315 self.kde_ = None 316 self.sims_ = None 317 self.residuals_std_dev_ = None 318 self.n_obs_ = None 319 self.level_ = None 320 self.init_n_series_ = None 321 322 def fit(self, X, xreg=None, **kwargs): 323 """Fit MTS model to training data X, with optional regressors xreg 324 325 Parameters: 326 327 X: {array-like}, shape = [n_samples, n_features] 328 Training time series, where n_samples is the number 329 of samples and n_features is the number of features; 330 X must be in increasing order (most recent observations last) 331 332 xreg: {array-like}, shape = [n_samples, n_features_xreg] 333 Additional (external) regressors to be passed to self.obj 334 xreg must be in 'increasing' order (most recent observations last) 335 336 **kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity) 337 338 Returns: 339 340 self: object 341 """ 342 343 self.init_n_series_ = X.shape[1] 344 345 # Automatic lag selection if requested 346 if isinstance(self.lags, str): 347 max_lags = min(25, X.shape[0] // 4) 348 best_ic = float("inf") 349 best_lags = 1 350 351 if self.verbose: 352 print(f"\nSelecting optimal number of lags using {self.lags}...") 353 iterator = tqdm(range(1, max_lags + 1)) 354 else: 355 iterator = range(1, max_lags + 1) 356 357 for lag in iterator: 358 # Convert DataFrame to numpy array before reversing 359 if isinstance(X, pd.DataFrame): 360 X_values = X.values[::-1] 361 else: 362 X_values = X[::-1] 363 364 # Try current lag value 365 if self.init_n_series_ > 1: 366 mts_input = ts.create_train_inputs(X_values, lag) 367 else: 368 mts_input = ts.create_train_inputs(X_values.reshape(-1, 1), lag) 369 370 # Cook training set and fit model 371 dummy_y, scaled_Z = self.cook_training_set( 372 y=np.ones(mts_input[0].shape[0]), X=mts_input[1] 373 ) 374 residuals_ = [] 375 376 for i in range(self.init_n_series_): 377 y_mean = np.mean(mts_input[0][:, i]) 378 centered_y_i = mts_input[0][:, i] - y_mean 379 self.obj.fit(X=scaled_Z, y=centered_y_i) 380 residuals_.append( 381 (centered_y_i - self.obj.predict(scaled_Z)).tolist() 382 ) 383 384 self.residuals_ = np.asarray(residuals_).T 385 ic = self._compute_information_criterion( 386 curr_lags=lag, criterion=self.lags 387 ) 388 389 if self.verbose: 390 print(f"Trying lags={lag}, {self.lags}={ic:.2f}") 391 392 if ic < best_ic: 393 best_ic = ic 394 best_lags = lag 395 396 if self.verbose: 397 print(f"\nSelected {best_lags} lags with {self.lags}={best_ic:.2f}") 398 399 self.lags = best_lags 400 401 self.input_dates = None 402 self.df_ = None 403 404 if isinstance(X, pd.DataFrame) is False: 405 # input data set is a numpy array 406 if xreg is None: 407 X = pd.DataFrame(X) 408 self.series_names = ["series" + str(i) for i in range(X.shape[1])] 409 else: 410 # xreg is not None 411 X = mo.cbind(X, xreg) 412 self.xreg_ = xreg 413 414 else: # input data set is a DataFrame with column names 415 416 X_index = None 417 if X.index is not None: 418 X_index = X.index 419 if xreg is None: 420 X = copy.deepcopy(mo.convert_df_to_numeric(X)) 421 else: 422 X = copy.deepcopy(mo.cbind(mo.convert_df_to_numeric(X), xreg)) 423 self.xreg_ = xreg 424 if X_index is not None: 425 X.index = X_index 426 self.series_names = X.columns.tolist() 427 428 if isinstance(X, pd.DataFrame): 429 if self.df_ is None: 430 self.df_ = X 431 X = X.values 432 else: 433 input_dates_prev = pd.DatetimeIndex(self.df_.index.values) 434 frequency = pd.infer_freq(input_dates_prev) 435 self.df_ = pd.concat([self.df_, X], axis=0) 436 self.input_dates = pd.date_range( 437 start=input_dates_prev[0], 438 periods=len(input_dates_prev) + X.shape[0], 439 freq=frequency, 440 ).values.tolist() 441 self.df_.index = self.input_dates 442 X = self.df_.values 443 self.df_.columns = self.series_names 444 else: 445 if self.df_ is None: 446 self.df_ = pd.DataFrame(X, columns=self.series_names) 447 else: 448 self.df_ = pd.concat( 449 [self.df_, pd.DataFrame(X, columns=self.series_names)], 450 axis=0, 451 ) 452 453 self.input_dates = ts.compute_input_dates(self.df_) 454 455 try: 456 # multivariate time series 457 n, p = X.shape 458 except: 459 # univariate time series 460 n = X.shape[0] 461 p = 1 462 self.n_obs_ = n 463 464 rep_1_n = np.repeat(1, n) 465 466 self.y_ = None 467 self.X_ = None 468 self.n_series = p 469 self.fit_objs_.clear() 470 self.y_means_.clear() 471 residuals_ = [] 472 self.residuals_ = None 473 self.residuals_sims_ = None 474 self.kde_ = None 475 self.sims_ = None 476 self.scaled_Z_ = None 477 self.centered_y_is_ = [] 478 479 if self.init_n_series_ > 1: 480 # multivariate time series 481 mts_input = ts.create_train_inputs(X[::-1], self.lags) 482 else: 483 # univariate time series 484 mts_input = ts.create_train_inputs(X.reshape(-1, 1)[::-1], self.lags) 485 486 self.y_ = mts_input[0] 487 488 self.X_ = mts_input[1] 489 490 dummy_y, scaled_Z = self.cook_training_set(y=rep_1_n, X=self.X_) 491 492 self.scaled_Z_ = scaled_Z 493 494 # loop on all the time series and adjust self.obj.fit 495 if self.verbose > 0: 496 print( 497 f"\n Adjusting {type(self.obj).__name__} to multivariate time series... \n" 498 ) 499 500 if self.show_progress is True: 501 iterator = tqdm(range(self.init_n_series_)) 502 else: 503 iterator = range(self.init_n_series_) 504 505 if self.type_pi in ( 506 "gaussian", 507 "kde", 508 "bootstrap", 509 "block-bootstrap", 510 ) or self.type_pi.startswith("vine"): 511 for i in iterator: 512 y_mean = np.mean(self.y_[:, i]) 513 self.y_means_[i] = y_mean 514 centered_y_i = self.y_[:, i] - y_mean 515 self.centered_y_is_.append(centered_y_i) 516 self.obj.fit(X=scaled_Z, y=centered_y_i) 517 self.fit_objs_[i] = deepcopy(self.obj) 518 residuals_.append( 519 (centered_y_i - self.fit_objs_[i].predict(scaled_Z)).tolist() 520 ) 521 522 if self.type_pi.startswith("scp"): 523 # split conformal prediction 524 for i in iterator: 525 n_y = self.y_.shape[0] 526 n_y_half = n_y // 2 527 first_half_idx = range(0, n_y_half) 528 second_half_idx = range(n_y_half, n_y) 529 y_mean_temp = np.mean(self.y_[first_half_idx, i]) 530 centered_y_i_temp = self.y_[first_half_idx, i] - y_mean_temp 531 self.obj.fit(X=scaled_Z[first_half_idx, :], y=centered_y_i_temp) 532 # calibrated residuals actually 533 residuals_.append( 534 ( 535 self.y_[second_half_idx, i] 536 - (y_mean_temp + self.obj.predict(scaled_Z[second_half_idx, :])) 537 ).tolist() 538 ) 539 # fit on the second half 540 y_mean = np.mean(self.y_[second_half_idx, i]) 541 self.y_means_[i] = y_mean 542 centered_y_i = self.y_[second_half_idx, i] - y_mean 543 self.obj.fit(X=scaled_Z[second_half_idx, :], y=centered_y_i) 544 self.fit_objs_[i] = deepcopy(self.obj) 545 546 self.residuals_ = np.asarray(residuals_).T 547 548 if self.type_pi == "gaussian": 549 self.gaussian_preds_std_ = np.std(self.residuals_, axis=0) 550 551 if self.type_pi.startswith("scp2"): 552 # Calculate mean and standard deviation for each column 553 data_mean = np.mean(self.residuals_, axis=0) 554 self.residuals_std_dev_ = np.std(self.residuals_, axis=0) 555 # Center and scale the array using broadcasting 556 self.residuals_ = ( 557 self.residuals_ - data_mean[np.newaxis, :] 558 ) / self.residuals_std_dev_[np.newaxis, :] 559 560 if self.replications != None and "kde" in self.type_pi: 561 if self.verbose > 0: 562 print(f"\n Simulate residuals using {self.kernel} kernel... \n") 563 assert self.kernel in ( 564 "gaussian", 565 "tophat", 566 ), "currently, 'kernel' must be either 'gaussian' or 'tophat'" 567 kernel_bandwidths = {"bandwidth": np.logspace(-6, 6, 150)} 568 grid = GridSearchCV( 569 KernelDensity(kernel=self.kernel, **kwargs), 570 param_grid=kernel_bandwidths, 571 ) 572 grid.fit(self.residuals_) 573 574 if self.verbose > 0: 575 print( 576 f"\n Best parameters for {self.kernel} kernel: {grid.best_params_} \n" 577 ) 578 579 self.kde_ = grid.best_estimator_ 580 581 return self 582 583 def partial_fit(self, X, xreg=None, **kwargs): 584 """Update the model with new observations X, with optional regressors xreg 585 586 Parameters: 587 588 X: {array-like}, shape = [n_samples, n_features] 589 Training time series, where n_samples is the number 590 of samples and n_features is the number of features; 591 X must be in increasing order (most recent observations last) 592 593 xreg: {array-like}, shape = [n_samples, n_features_xreg] 594 Additional (external) regressors to be passed to self.obj 595 xreg must be in 'increasing' order (most recent observations last) 596 597 **kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity) 598 599 Returns: 600 601 self: object 602 """ 603 604 assert self.df_ is not None, "fit() must be called before partial_fit()" 605 606 if (isinstance(X, pd.DataFrame) is False) and isinstance(X, pd.Series) is False: 607 if len(X.shape) == 1: 608 X = X.reshape(1, -1) 609 610 return self.fit(X, xreg, **kwargs) 611 612 else: 613 if len(X.shape) == 1: 614 X = pd.DataFrame(X.values.reshape(1, -1), columns=self.df_.columns) 615 616 return self.fit(X, xreg, **kwargs) 617 618 def predict(self, h=5, level=95, **kwargs): 619 """Forecast all the time series, h steps ahead""" 620 621 self.output_dates_, frequency = ts.compute_output_dates(self.df_, h) 622 623 self.level_ = level 624 625 self.return_std_ = False # do not remove (/!\) 626 627 self.mean_ = None # do not remove (/!\) 628 629 self.mean_ = deepcopy(self.y_) # do not remove (/!\) 630 631 self.lower_ = None # do not remove (/!\) 632 633 self.upper_ = None # do not remove (/!\) 634 635 self.sims_ = None # do not remove (/!\) 636 637 y_means_ = np.asarray([self.y_means_[i] for i in range(self.init_n_series_)]) 638 639 n_features = self.init_n_series_ * self.lags 640 641 self.alpha_ = 100 - level 642 643 pi_multiplier = norm.ppf(1 - self.alpha_ / 200) 644 645 if "return_std" in kwargs: # bayesian forecasting 646 self.return_std_ = True 647 self.preds_std_ = [] 648 DescribeResult = namedtuple( 649 "DescribeResult", ("mean", "lower", "upper") 650 ) # to be updated 651 652 if "return_pi" in kwargs: # split conformal, without simulation 653 mean_pi_ = [] 654 lower_pi_ = [] 655 upper_pi_ = [] 656 median_pi_ = [] 657 DescribeResult = namedtuple( 658 "DescribeResult", ("mean", "lower", "upper") 659 ) # to be updated 660 661 if self.kde_ != None and "kde" in self.type_pi: # kde 662 target_cols = self.df_.columns[ 663 : self.init_n_series_ 664 ] # Get target column names 665 if self.verbose == 1: 666 self.residuals_sims_ = tuple( 667 self.kde_.sample( 668 n_samples=h, random_state=self.seed + 100 * i 669 ) # Keep full sample 670 for i in tqdm(range(self.replications)) 671 ) 672 elif self.verbose == 0: 673 self.residuals_sims_ = tuple( 674 self.kde_.sample( 675 n_samples=h, random_state=self.seed + 100 * i 676 ) # Keep full sample 677 for i in range(self.replications) 678 ) 679 680 # Convert to DataFrames after sampling 681 self.residuals_sims_ = tuple( 682 pd.DataFrame( 683 sim, # Keep all columns 684 columns=target_cols, # Use original target column names 685 index=self.output_dates_, 686 ) 687 for sim in self.residuals_sims_ 688 ) 689 690 if self.type_pi in ("bootstrap", "scp-bootstrap", "scp2-bootstrap"): 691 assert self.replications is not None and isinstance( 692 self.replications, int 693 ), "'replications' must be provided and be an integer" 694 if self.verbose == 1: 695 self.residuals_sims_ = tuple( 696 ts.bootstrap( 697 self.residuals_, 698 h=h, 699 block_size=None, 700 seed=self.seed + 100 * i, 701 ) 702 for i in tqdm(range(self.replications)) 703 ) 704 elif self.verbose == 0: 705 self.residuals_sims_ = tuple( 706 ts.bootstrap( 707 self.residuals_, 708 h=h, 709 block_size=None, 710 seed=self.seed + 100 * i, 711 ) 712 for i in range(self.replications) 713 ) 714 715 if self.type_pi in ( 716 "block-bootstrap", 717 "scp-block-bootstrap", 718 "scp2-block-bootstrap", 719 ): 720 if self.block_size is None: 721 self.block_size = int( 722 np.ceil(3.15 * (self.residuals_.shape[0] ** (1 / 3))) 723 ) 724 725 assert self.replications is not None and isinstance( 726 self.replications, int 727 ), "'replications' must be provided and be an integer" 728 if self.verbose == 1: 729 self.residuals_sims_ = tuple( 730 ts.bootstrap( 731 self.residuals_, 732 h=h, 733 block_size=self.block_size, 734 seed=self.seed + 100 * i, 735 ) 736 for i in tqdm(range(self.replications)) 737 ) 738 elif self.verbose == 0: 739 self.residuals_sims_ = tuple( 740 ts.bootstrap( 741 self.residuals_, 742 h=h, 743 block_size=self.block_size, 744 seed=self.seed + 100 * i, 745 ) 746 for i in range(self.replications) 747 ) 748 749 if "vine" in self.type_pi: 750 if self.verbose == 1: 751 self.residuals_sims_ = tuple( 752 vinecopula_sample( 753 x=self.residuals_, 754 n_samples=h, 755 method=self.type_pi, 756 random_state=self.seed + 100 * i, 757 ) 758 for i in tqdm(range(self.replications)) 759 ) 760 elif self.verbose == 0: 761 self.residuals_sims_ = tuple( 762 vinecopula_sample( 763 x=self.residuals_, 764 n_samples=h, 765 method=self.type_pi, 766 random_state=self.seed + 100 * i, 767 ) 768 for i in range(self.replications) 769 ) 770 771 mean_ = deepcopy(self.mean_) 772 773 for i in range(h): 774 775 new_obs = ts.reformat_response(mean_, self.lags) 776 new_X = new_obs.reshape(1, -1) 777 cooked_new_X = self.cook_test_set(new_X, **kwargs) 778 779 if "return_std" in kwargs: 780 self.preds_std_.append( 781 [ 782 np.asarray( 783 self.fit_objs_[i].predict(cooked_new_X, return_std=True)[1] 784 ).item() 785 for i in range(self.n_series) 786 ] 787 ) 788 789 if "return_pi" in kwargs: 790 for i in range(self.n_series): 791 preds_pi = self.fit_objs_[i].predict(cooked_new_X, **kwargs) 792 mean_pi_.append(preds_pi.mean[0]) 793 lower_pi_.append(preds_pi.lower[0]) 794 upper_pi_.append(preds_pi.upper[0]) 795 796 predicted_cooked_new_X = np.asarray( 797 [ 798 np.asarray(self.fit_objs_[i].predict(cooked_new_X)).item() 799 for i in range(self.init_n_series_) 800 ] 801 ) 802 803 preds = np.asarray(y_means_ + predicted_cooked_new_X) 804 805 # Create full row with both predictions and external regressors 806 if self.xreg_ is not None and "xreg" in kwargs: 807 next_xreg = kwargs["xreg"].iloc[i : i + 1].values.flatten() 808 full_row = np.concatenate([preds, next_xreg]) 809 else: 810 full_row = preds 811 812 # Create a new row with same number of columns as mean_ 813 new_row = np.zeros((1, mean_.shape[1])) 814 new_row[0, : full_row.shape[0]] = full_row 815 816 # Maintain the full dimensionality by using vstack instead of rbind 817 mean_ = np.vstack([new_row, mean_[:-1]]) 818 819 # Final output should only include the target columns 820 self.mean_ = pd.DataFrame( 821 mean_[0:h, : self.init_n_series_][::-1], 822 columns=self.df_.columns[: self.init_n_series_], 823 index=self.output_dates_, 824 ) 825 826 # function's return ---------------------------------------------------------------------- 827 if ( 828 (("return_std" not in kwargs) and ("return_pi" not in kwargs)) 829 and (self.type_pi not in ("gaussian", "scp")) 830 ) or ("vine" in self.type_pi): 831 832 if self.replications is None: 833 return self.mean_.iloc[:, : self.init_n_series_] 834 835 # if "return_std" not in kwargs and self.replications is not None 836 meanf = [] 837 medianf = [] 838 lower = [] 839 upper = [] 840 841 if "scp2" in self.type_pi: 842 843 if self.verbose == 1: 844 self.sims_ = tuple( 845 ( 846 self.mean_ 847 + self.residuals_sims_[i] 848 * self.residuals_std_dev_[np.newaxis, :] 849 for i in tqdm(range(self.replications)) 850 ) 851 ) 852 elif self.verbose == 0: 853 self.sims_ = tuple( 854 ( 855 self.mean_ 856 + self.residuals_sims_[i] 857 * self.residuals_std_dev_[np.newaxis, :] 858 for i in range(self.replications) 859 ) 860 ) 861 else: 862 863 if self.verbose == 1: 864 self.sims_ = tuple( 865 ( 866 self.mean_ + self.residuals_sims_[i] 867 for i in tqdm(range(self.replications)) 868 ) 869 ) 870 elif self.verbose == 0: 871 self.sims_ = tuple( 872 ( 873 self.mean_ + self.residuals_sims_[i] 874 for i in range(self.replications) 875 ) 876 ) 877 878 DescribeResult = namedtuple( 879 "DescribeResult", ("mean", "sims", "lower", "upper") 880 ) 881 for ix in range(self.init_n_series_): 882 sims_ix = getsims(self.sims_, ix) 883 if self.agg == "mean": 884 meanf.append(np.mean(sims_ix, axis=1)) 885 else: 886 medianf.append(np.median(sims_ix, axis=1)) 887 lower.append(np.quantile(sims_ix, q=self.alpha_ / 200, axis=1)) 888 upper.append(np.quantile(sims_ix, q=1 - self.alpha_ / 200, axis=1)) 889 self.mean_ = pd.DataFrame( 890 np.asarray(meanf).T, 891 columns=self.series_names[: self.init_n_series_], # self.df_.columns, 892 index=self.output_dates_, 893 ) 894 895 self.lower_ = pd.DataFrame( 896 np.asarray(lower).T, 897 columns=self.series_names[: self.init_n_series_], # self.df_.columns, 898 index=self.output_dates_, 899 ) 900 901 self.upper_ = pd.DataFrame( 902 np.asarray(upper).T, 903 columns=self.series_names[: self.init_n_series_], # self.df_.columns, 904 index=self.output_dates_, 905 ) 906 907 try: 908 self.median_ = pd.DataFrame( 909 np.asarray(medianf).T, 910 columns=self.series_names[ 911 : self.init_n_series_ 912 ], # self.df_.columns, 913 index=self.output_dates_, 914 ) 915 except Exception as e: 916 pass 917 918 return DescribeResult(self.mean_, self.sims_, self.lower_, self.upper_) 919 920 if ( 921 (("return_std" in kwargs) or ("return_pi" in kwargs)) 922 and (self.type_pi not in ("gaussian", "scp")) 923 ) or "vine" in self.type_pi: 924 DescribeResult = namedtuple("DescribeResult", ("mean", "lower", "upper")) 925 926 self.mean_ = pd.DataFrame( 927 np.asarray(self.mean_), 928 columns=self.series_names, # self.df_.columns, 929 index=self.output_dates_, 930 ) 931 932 if "return_std" in kwargs: 933 934 self.preds_std_ = np.asarray(self.preds_std_) 935 936 self.lower_ = pd.DataFrame( 937 self.mean_.values - pi_multiplier * self.preds_std_, 938 columns=self.series_names, # self.df_.columns, 939 index=self.output_dates_, 940 ) 941 942 self.upper_ = pd.DataFrame( 943 self.mean_.values + pi_multiplier * self.preds_std_, 944 columns=self.series_names, # self.df_.columns, 945 index=self.output_dates_, 946 ) 947 948 if "return_pi" in kwargs: 949 950 self.lower_ = pd.DataFrame( 951 np.asarray(lower_pi_).reshape(h, self.n_series) 952 + y_means_[np.newaxis, :], 953 columns=self.series_names, # self.df_.columns, 954 index=self.output_dates_, 955 ) 956 957 self.upper_ = pd.DataFrame( 958 np.asarray(upper_pi_).reshape(h, self.n_series) 959 + y_means_[np.newaxis, :], 960 columns=self.series_names, # self.df_.columns, 961 index=self.output_dates_, 962 ) 963 964 res = DescribeResult(self.mean_, self.lower_, self.upper_) 965 966 if self.xreg_ is not None: 967 if len(self.xreg_.shape) > 1: 968 res2 = mx.tuple_map( 969 res, 970 lambda x: mo.delete_last_columns( 971 x, num_columns=self.xreg_.shape[1] 972 ), 973 ) 974 else: 975 res2 = mx.tuple_map( 976 res, lambda x: mo.delete_last_columns(x, num_columns=1) 977 ) 978 return DescribeResult(res2[0], res2[1], res2[2]) 979 980 return res 981 982 if self.type_pi == "gaussian": 983 984 DescribeResult = namedtuple("DescribeResult", ("mean", "lower", "upper")) 985 986 self.mean_ = pd.DataFrame( 987 np.asarray(self.mean_), 988 columns=self.series_names, # self.df_.columns, 989 index=self.output_dates_, 990 ) 991 992 self.lower_ = pd.DataFrame( 993 self.mean_.values - pi_multiplier * self.gaussian_preds_std_, 994 columns=self.series_names, # self.df_.columns, 995 index=self.output_dates_, 996 ) 997 998 self.upper_ = pd.DataFrame( 999 self.mean_.values + pi_multiplier * self.gaussian_preds_std_, 1000 columns=self.series_names, # self.df_.columns, 1001 index=self.output_dates_, 1002 ) 1003 1004 res = DescribeResult(self.mean_, self.lower_, self.upper_) 1005 1006 if self.xreg_ is not None: 1007 if len(self.xreg_.shape) > 1: 1008 res2 = mx.tuple_map( 1009 res, 1010 lambda x: mo.delete_last_columns( 1011 x, num_columns=self.xreg_.shape[1] 1012 ), 1013 ) 1014 else: 1015 res2 = mx.tuple_map( 1016 res, lambda x: mo.delete_last_columns(x, num_columns=1) 1017 ) 1018 return DescribeResult(res2[0], res2[1], res2[2]) 1019 1020 return res 1021 1022 # After prediction loop, ensure sims only contain target columns 1023 if self.sims_ is not None: 1024 if self.verbose == 1: 1025 self.sims_ = tuple( 1026 sim[:h,] # Only keep target columns and h rows 1027 for sim in tqdm(self.sims_) 1028 ) 1029 elif self.verbose == 0: 1030 self.sims_ = tuple( 1031 sim[:h,] # Only keep target columns and h rows 1032 for sim in self.sims_ 1033 ) 1034 1035 # Convert numpy arrays to DataFrames with proper columns 1036 self.sims_ = tuple( 1037 pd.DataFrame( 1038 sim, 1039 columns=self.df_.columns[: self.init_n_series_], 1040 index=self.output_dates_, 1041 ) 1042 for sim in self.sims_ 1043 ) 1044 1045 if self.type_pi in ("kde", "bootstrap", "block-bootstrap", "vine-copula"): 1046 if self.xreg_ is not None: 1047 # Use getsimsxreg when external regressors are present 1048 target_cols = self.df_.columns[: self.init_n_series_] 1049 self.sims_ = getsimsxreg(self.sims_, self.output_dates_, target_cols) 1050 else: 1051 # Use original getsims for backward compatibility 1052 self.sims_ = getsims(self.sims_) 1053 1054 def score(self, X, training_index, testing_index, scoring=None, **kwargs): 1055 """Train on training_index, score on testing_index.""" 1056 1057 assert ( 1058 bool(set(training_index).intersection(set(testing_index))) == False 1059 ), "Non-overlapping 'training_index' and 'testing_index' required" 1060 1061 # Dimensions 1062 try: 1063 # multivariate time series 1064 n, p = X.shape 1065 except: 1066 # univariate time series 1067 n = X.shape[0] 1068 p = 1 1069 1070 # Training and testing sets 1071 if p > 1: 1072 X_train = X[training_index, :] 1073 X_test = X[testing_index, :] 1074 else: 1075 X_train = X[training_index] 1076 X_test = X[testing_index] 1077 1078 # Horizon 1079 h = len(testing_index) 1080 assert ( 1081 len(training_index) + h 1082 ) <= n, "Please check lengths of training and testing windows" 1083 1084 # Fit and predict 1085 self.fit(X_train, **kwargs) 1086 preds = self.predict(h=h, **kwargs) 1087 1088 if scoring is None: 1089 scoring = "neg_root_mean_squared_error" 1090 1091 # check inputs 1092 assert scoring in ( 1093 "explained_variance", 1094 "neg_mean_absolute_error", 1095 "neg_mean_squared_error", 1096 "neg_root_mean_squared_error", 1097 "neg_mean_squared_log_error", 1098 "neg_median_absolute_error", 1099 "r2", 1100 ), "'scoring' should be in ('explained_variance', 'neg_mean_absolute_error', \ 1101 'neg_mean_squared_error', 'neg_root_mean_squared_error', 'neg_mean_squared_log_error', \ 1102 'neg_median_absolute_error', 'r2')" 1103 1104 scoring_options = { 1105 "explained_variance": skm2.explained_variance_score, 1106 "neg_mean_absolute_error": skm2.mean_absolute_error, 1107 "neg_mean_squared_error": lambda x, y: np.mean((x - y) ** 2), 1108 "neg_root_mean_squared_error": lambda x, y: np.sqrt(np.mean((x - y) ** 2)), 1109 "neg_mean_squared_log_error": skm2.mean_squared_log_error, 1110 "neg_median_absolute_error": skm2.median_absolute_error, 1111 "r2": skm2.r2_score, 1112 } 1113 1114 return scoring_options[scoring](X_test, preds) 1115 1116 def plot(self, series=None, type_axis="dates", type_plot="pi"): 1117 """Plot time series forecast 1118 1119 Parameters: 1120 1121 series: {integer} or {string} 1122 series index or name 1123 1124 """ 1125 1126 assert all( 1127 [ 1128 self.mean_ is not None, 1129 self.lower_ is not None, 1130 self.upper_ is not None, 1131 self.output_dates_ is not None, 1132 ] 1133 ), "model forecasting must be obtained first (with predict)" 1134 1135 if series is None: 1136 # assert ( 1137 # self.init_n_series_ == 1 1138 # ), "please specify series index or name (n_series > 1)" 1139 series = 0 1140 1141 if isinstance(series, str): 1142 assert ( 1143 series in self.series_names 1144 ), f"series {series} doesn't exist in the input dataset" 1145 series_idx = self.df_.columns.get_loc(series) 1146 else: 1147 assert isinstance(series, int) and ( 1148 0 <= series < self.n_series 1149 ), f"check series index (< {self.n_series})" 1150 series_idx = series 1151 1152 y_all = list(self.df_.iloc[:, series_idx]) + list( 1153 self.mean_.iloc[:, series_idx] 1154 ) 1155 y_test = list(self.mean_.iloc[:, series_idx]) 1156 n_points_all = len(y_all) 1157 n_points_train = self.df_.shape[0] 1158 1159 if type_axis == "numeric": 1160 x_all = [i for i in range(n_points_all)] 1161 x_test = [i for i in range(n_points_train, n_points_all)] 1162 1163 if type_axis == "dates": # use dates 1164 x_all = np.concatenate( 1165 (self.input_dates.values, self.output_dates_.values), axis=None 1166 ) 1167 x_test = self.output_dates_.values 1168 1169 if type_plot == "pi": 1170 fig, ax = plt.subplots() 1171 ax.plot(x_all, y_all, "-") 1172 ax.plot(x_test, y_test, "-", color="orange") 1173 ax.fill_between( 1174 x_test, 1175 self.lower_.iloc[:, series_idx], 1176 self.upper_.iloc[:, series_idx], 1177 alpha=0.2, 1178 color="orange", 1179 ) 1180 if self.replications is None: 1181 if self.n_series > 1: 1182 plt.title( 1183 f"prediction intervals for {series}", 1184 loc="left", 1185 fontsize=12, 1186 fontweight=0, 1187 color="black", 1188 ) 1189 else: 1190 plt.title( 1191 f"prediction intervals for input time series", 1192 loc="left", 1193 fontsize=12, 1194 fontweight=0, 1195 color="black", 1196 ) 1197 plt.show() 1198 else: # self.replications is not None 1199 if self.n_series > 1: 1200 plt.title( 1201 f"prediction intervals for {self.replications} simulations of {series}", 1202 loc="left", 1203 fontsize=12, 1204 fontweight=0, 1205 color="black", 1206 ) 1207 else: 1208 plt.title( 1209 f"prediction intervals for {self.replications} simulations of input time series", 1210 loc="left", 1211 fontsize=12, 1212 fontweight=0, 1213 color="black", 1214 ) 1215 plt.show() 1216 1217 if type_plot == "spaghetti": 1218 palette = plt.get_cmap("Set1") 1219 sims_ix = getsims(self.sims_, series_idx) 1220 plt.plot(x_all, y_all, "-") 1221 for col_ix in range( 1222 sims_ix.shape[1] 1223 ): # avoid this when there are thousands of simulations 1224 plt.plot( 1225 x_test, 1226 sims_ix[:, col_ix], 1227 "-", 1228 color=palette(col_ix), 1229 linewidth=1, 1230 alpha=0.9, 1231 ) 1232 plt.plot(x_all, y_all, "-", color="black") 1233 plt.plot(x_test, y_test, "-", color="blue") 1234 # Add titles 1235 if self.n_series > 1: 1236 plt.title( 1237 f"{self.replications} simulations of {series}", 1238 loc="left", 1239 fontsize=12, 1240 fontweight=0, 1241 color="black", 1242 ) 1243 else: 1244 plt.title( 1245 f"{self.replications} simulations of input time series", 1246 loc="left", 1247 fontsize=12, 1248 fontweight=0, 1249 color="black", 1250 ) 1251 plt.xlabel("Time") 1252 plt.ylabel("Values") 1253 # Show the graph 1254 plt.show() 1255 1256 def cross_val_score( 1257 self, 1258 X, 1259 scoring="root_mean_squared_error", 1260 n_jobs=None, 1261 verbose=0, 1262 xreg=None, 1263 initial_window=5, 1264 horizon=3, 1265 fixed_window=False, 1266 show_progress=True, 1267 level=95, 1268 **kwargs, 1269 ): 1270 """Evaluate a score by time series cross-validation. 1271 1272 Parameters: 1273 1274 X: {array-like, sparse matrix} of shape (n_samples, n_features) 1275 The data to fit. 1276 1277 scoring: str or a function 1278 A str in ('root_mean_squared_error', 'mean_squared_error', 'mean_error', 1279 'mean_absolute_error', 'mean_error', 'mean_percentage_error', 1280 'mean_absolute_percentage_error', 'winkler_score', 'coverage') 1281 Or a function defined as 'coverage' and 'winkler_score' in `utils.timeseries` 1282 1283 n_jobs: int, default=None 1284 Number of jobs to run in parallel. 1285 1286 verbose: int, default=0 1287 The verbosity level. 1288 1289 xreg: array-like, optional (default=None) 1290 Additional (external) regressors to be passed to `fit` 1291 xreg must be in 'increasing' order (most recent observations last) 1292 1293 initial_window: int 1294 initial number of consecutive values in each training set sample 1295 1296 horizon: int 1297 number of consecutive values in test set sample 1298 1299 fixed_window: boolean 1300 if False, all training samples start at index 0, and the training 1301 window's size is increasing. 1302 if True, the training window's size is fixed, and the window is 1303 rolling forward 1304 1305 show_progress: boolean 1306 if True, a progress bar is printed 1307 1308 **kwargs: dict 1309 additional parameters to be passed to `fit` and `predict` 1310 1311 Returns: 1312 1313 A tuple: descriptive statistics or errors and raw errors 1314 1315 """ 1316 tscv = TimeSeriesSplit() 1317 1318 tscv_obj = tscv.split( 1319 X, 1320 initial_window=initial_window, 1321 horizon=horizon, 1322 fixed_window=fixed_window, 1323 ) 1324 1325 if isinstance(scoring, str): 1326 1327 assert scoring in ( 1328 "root_mean_squared_error", 1329 "mean_squared_error", 1330 "mean_error", 1331 "mean_absolute_error", 1332 "mean_percentage_error", 1333 "mean_absolute_percentage_error", 1334 "winkler_score", 1335 "coverage", 1336 ), "must have scoring in ('root_mean_squared_error', 'mean_squared_error', 'mean_error', 'mean_absolute_error', 'mean_error', 'mean_percentage_error', 'mean_absolute_percentage_error', 'winkler_score', 'coverage')" 1337 1338 def err_func(X_test, X_pred, scoring): 1339 if (self.replications is not None) or ( 1340 self.type_pi == "gaussian" 1341 ): # probabilistic 1342 if scoring == "winkler_score": 1343 return winkler_score(X_pred, X_test, level=level) 1344 elif scoring == "coverage": 1345 return coverage(X_pred, X_test, level=level) 1346 else: 1347 return mean_errors( 1348 pred=X_pred.mean, actual=X_test, scoring=scoring 1349 ) 1350 else: # not probabilistic 1351 return mean_errors(pred=X_pred, actual=X_test, scoring=scoring) 1352 1353 else: # isinstance(scoring, str) = False 1354 1355 err_func = scoring 1356 1357 errors = [] 1358 1359 train_indices = [] 1360 1361 test_indices = [] 1362 1363 for train_index, test_index in tscv_obj: 1364 train_indices.append(train_index) 1365 test_indices.append(test_index) 1366 1367 if show_progress is True: 1368 iterator = tqdm(zip(train_indices, test_indices), total=len(train_indices)) 1369 else: 1370 iterator = zip(train_indices, test_indices) 1371 1372 for train_index, test_index in iterator: 1373 1374 if verbose == 1: 1375 print(f"TRAIN: {train_index}") 1376 print(f"TEST: {test_index}") 1377 1378 if isinstance(X, pd.DataFrame): 1379 self.fit(X.iloc[train_index, :], xreg=xreg, **kwargs) 1380 X_test = X.iloc[test_index, :] 1381 else: 1382 self.fit(X[train_index, :], xreg=xreg, **kwargs) 1383 X_test = X[test_index, :] 1384 X_pred = self.predict(h=int(len(test_index)), level=level, **kwargs) 1385 1386 errors.append(err_func(X_test, X_pred, scoring)) 1387 1388 res = np.asarray(errors) 1389 1390 return res, describe(res) 1391 1392 def _compute_information_criterion(self, curr_lags, criterion="AIC"): 1393 """Compute information criterion using existing residuals 1394 1395 Parameters 1396 ---------- 1397 curr_lags : int 1398 Current number of lags being evaluated 1399 criterion : str 1400 One of 'AIC', 'AICc', or 'BIC' 1401 1402 Returns 1403 ------- 1404 float 1405 Information criterion value or inf if parameters exceed observations 1406 """ 1407 # Get dimensions 1408 n_obs = self.residuals_.shape[0] 1409 n_features = int(self.init_n_series_ * curr_lags) 1410 n_hidden = int(self.n_hidden_features) 1411 1412 # Calculate number of parameters 1413 term1 = int(n_features * n_hidden) 1414 term2 = int(n_hidden * self.init_n_series_) 1415 n_params = term1 + term2 1416 1417 # Check if we have enough observations for the number of parameters 1418 if n_obs <= n_params + 1: 1419 return float("inf") # Return infinity if too many parameters 1420 1421 # Compute RSS using existing residuals 1422 rss = np.sum(self.residuals_**2) 1423 1424 # Compute criterion 1425 if criterion == "AIC": 1426 ic = n_obs * np.log(rss / n_obs) + 2 * n_params 1427 elif criterion == "AICc": 1428 ic = n_obs * np.log(rss / n_obs) + 2 * n_params * ( 1429 n_obs / (n_obs - n_params - 1) 1430 ) 1431 else: # BIC 1432 ic = n_obs * np.log(rss / n_obs) + n_params * np.log(n_obs) 1433 1434 return ic
Univariate and multivariate time series (MTS) forecasting with Quasi-Randomized networks
Parameters:
obj: object.
any object containing a method fit (obj.fit()) and a method predict
(obj.predict()).
n_hidden_features: int.
number of nodes in the hidden layer.
activation_name: str.
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'.
a: float.
hyperparameter for 'prelu' or 'elu' activation function.
nodes_sim: str.
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'.
bias: boolean.
indicates if the hidden layer contains a bias term (True) or not
(False).
dropout: float.
regularization parameter; (random) percentage of nodes dropped out
of the training.
direct_link: boolean.
indicates if the original predictors are included (True) in model's fitting or not (False).
n_clusters: int.
number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering).
cluster_encode: bool.
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding.
type_clust: str.
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm').
type_scaling: a tuple of 3 strings.
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax').
lags: int.
number of lags used for each time series.
If string, lags must be one of 'AIC', 'AICc', or 'BIC'.
type_pi: str.
type of prediction interval; currently:
- "gaussian": simple, fast, but: assumes stationarity of Gaussian in-sample residuals and independence in the multivariate case
- "kde": based on Kernel Density Estimation of in-sample residuals
- "bootstrap": based on independent bootstrap of in-sample residuals
- "block-bootstrap": based on basic block bootstrap of in-sample residuals
- "scp-kde": Sequential split conformal prediction with Kernel Density Estimation of calibrated residuals
- "scp-bootstrap": Sequential split conformal prediction with independent bootstrap of calibrated residuals
- "scp-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of calibrated residuals
- "scp2-kde": Sequential split conformal prediction with Kernel Density Estimation of standardized calibrated residuals
- "scp2-bootstrap": Sequential split conformal prediction with independent bootstrap of standardized calibrated residuals
- "scp2-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of standardized calibrated residuals
- based on copulas of in-sample residuals: 'vine-tll', 'vine-bb1', 'vine-bb6', 'vine-bb7', 'vine-bb8', 'vine-clayton',
'vine-frank', 'vine-gaussian', 'vine-gumbel', 'vine-indep', 'vine-joe', 'vine-student'
- 'scp-vine-tll', 'scp-vine-bb1', 'scp-vine-bb6', 'scp-vine-bb7', 'scp-vine-bb8', 'scp-vine-clayton',
'scp-vine-frank', 'scp-vine-gaussian', 'scp-vine-gumbel', 'scp-vine-indep', 'scp-vine-joe', 'scp-vine-student'
- 'scp2-vine-tll', 'scp2-vine-bb1', 'scp2-vine-bb6', 'scp2-vine-bb7', 'scp2-vine-bb8', 'scp2-vine-clayton',
'scp2-vine-frank', 'scp2-vine-gaussian', 'scp2-vine-gumbel', 'scp2-vine-indep', 'scp2-vine-joe', 'scp2-vine-student'
block_size: int.
size of block for 'type_pi' in ("block-bootstrap", "scp-block-bootstrap", "scp2-block-bootstrap").
Default is round(3.15*(n_residuals^1/3))
replications: int.
number of replications (if needed, for predictive simulation). Default is 'None'.
kernel: str.
the kernel to use for residuals density estimation (used for predictive simulation). Currently, either 'gaussian' or 'tophat'.
agg: str.
either "mean" or "median" for simulation of bootstrap aggregating
seed: int.
reproducibility seed for nodes_sim=='uniform' or predictive simulation.
backend: str.
"cpu" or "gpu" or "tpu".
verbose: int.
0: not printing; 1: printing
show_progress: bool.
True: progress bar when fitting each series; False: no progress bar when fitting each series
Attributes:
fit_objs_: dict
objects adjusted to each individual time series
y_: {array-like}
MTS responses (most recent observations first)
X_: {array-like}
MTS lags
xreg_: {array-like}
external regressors
y_means_: dict
a dictionary of each series mean values
preds_: {array-like}
successive model predictions
preds_std_: {array-like}
standard deviation around the predictions for Bayesian base learners (`obj`)
gaussian_preds_std_: {array-like}
standard deviation around the predictions for `type_pi='gaussian'`
return_std_: boolean
return uncertainty or not (set in predict)
df_: data frame
the input data frame, in case a data.frame is provided to `fit`
n_obs_: int
number of time series observations (number of rows for multivariate)
level_: int
level of confidence for prediction intervals (default is 95)
residuals_: {array-like}
in-sample residuals (for `type_pi` not conformal prediction) or calibrated residuals
(for `type_pi` in conformal prediction)
residuals_sims_: tuple of {array-like}
simulations of in-sample residuals (for `type_pi` not conformal prediction) or
calibrated residuals (for `type_pi` in conformal prediction)
kde_: A scikit-learn object, see https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KernelDensity.html
residuals_std_dev_: residuals standard deviation
Examples:
Example 1:
import nnetsauce as ns
import numpy as np
from sklearn import linear_model
np.random.seed(123)
M = np.random.rand(10, 3)
M[:,0] = 10*M[:,0]
M[:,2] = 25*M[:,2]
print(M)
# Adjust Bayesian Ridge
regr4 = linear_model.BayesianRidge()
obj_MTS = ns.MTS(regr4, lags = 1, n_hidden_features=5)
obj_MTS.fit(M)
print(obj_MTS.predict())
# with credible intervals
print(obj_MTS.predict(return_std=True, level=80))
print(obj_MTS.predict(return_std=True, level=95))
Example 2:
import nnetsauce as ns
import numpy as np
from sklearn import linear_model
dataset = {
'date' : ['2001-01-01', '2002-01-01', '2003-01-01', '2004-01-01', '2005-01-01'],
'series1' : [34, 30, 35.6, 33.3, 38.1],
'series2' : [4, 5.5, 5.6, 6.3, 5.1],
'series3' : [100, 100.5, 100.6, 100.2, 100.1]}
df = pd.DataFrame(dataset).set_index('date')
print(df)
# Adjust Bayesian Ridge
regr5 = linear_model.BayesianRidge()
obj_MTS = ns.MTS(regr5, lags = 1, n_hidden_features=5)
obj_MTS.fit(df)
print(obj_MTS.predict())
# with credible intervals
print(obj_MTS.predict(return_std=True, level=80))
print(obj_MTS.predict(return_std=True, level=95))
322 def fit(self, X, xreg=None, **kwargs): 323 """Fit MTS model to training data X, with optional regressors xreg 324 325 Parameters: 326 327 X: {array-like}, shape = [n_samples, n_features] 328 Training time series, where n_samples is the number 329 of samples and n_features is the number of features; 330 X must be in increasing order (most recent observations last) 331 332 xreg: {array-like}, shape = [n_samples, n_features_xreg] 333 Additional (external) regressors to be passed to self.obj 334 xreg must be in 'increasing' order (most recent observations last) 335 336 **kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity) 337 338 Returns: 339 340 self: object 341 """ 342 343 self.init_n_series_ = X.shape[1] 344 345 # Automatic lag selection if requested 346 if isinstance(self.lags, str): 347 max_lags = min(25, X.shape[0] // 4) 348 best_ic = float("inf") 349 best_lags = 1 350 351 if self.verbose: 352 print(f"\nSelecting optimal number of lags using {self.lags}...") 353 iterator = tqdm(range(1, max_lags + 1)) 354 else: 355 iterator = range(1, max_lags + 1) 356 357 for lag in iterator: 358 # Convert DataFrame to numpy array before reversing 359 if isinstance(X, pd.DataFrame): 360 X_values = X.values[::-1] 361 else: 362 X_values = X[::-1] 363 364 # Try current lag value 365 if self.init_n_series_ > 1: 366 mts_input = ts.create_train_inputs(X_values, lag) 367 else: 368 mts_input = ts.create_train_inputs(X_values.reshape(-1, 1), lag) 369 370 # Cook training set and fit model 371 dummy_y, scaled_Z = self.cook_training_set( 372 y=np.ones(mts_input[0].shape[0]), X=mts_input[1] 373 ) 374 residuals_ = [] 375 376 for i in range(self.init_n_series_): 377 y_mean = np.mean(mts_input[0][:, i]) 378 centered_y_i = mts_input[0][:, i] - y_mean 379 self.obj.fit(X=scaled_Z, y=centered_y_i) 380 residuals_.append( 381 (centered_y_i - self.obj.predict(scaled_Z)).tolist() 382 ) 383 384 self.residuals_ = np.asarray(residuals_).T 385 ic = self._compute_information_criterion( 386 curr_lags=lag, criterion=self.lags 387 ) 388 389 if self.verbose: 390 print(f"Trying lags={lag}, {self.lags}={ic:.2f}") 391 392 if ic < best_ic: 393 best_ic = ic 394 best_lags = lag 395 396 if self.verbose: 397 print(f"\nSelected {best_lags} lags with {self.lags}={best_ic:.2f}") 398 399 self.lags = best_lags 400 401 self.input_dates = None 402 self.df_ = None 403 404 if isinstance(X, pd.DataFrame) is False: 405 # input data set is a numpy array 406 if xreg is None: 407 X = pd.DataFrame(X) 408 self.series_names = ["series" + str(i) for i in range(X.shape[1])] 409 else: 410 # xreg is not None 411 X = mo.cbind(X, xreg) 412 self.xreg_ = xreg 413 414 else: # input data set is a DataFrame with column names 415 416 X_index = None 417 if X.index is not None: 418 X_index = X.index 419 if xreg is None: 420 X = copy.deepcopy(mo.convert_df_to_numeric(X)) 421 else: 422 X = copy.deepcopy(mo.cbind(mo.convert_df_to_numeric(X), xreg)) 423 self.xreg_ = xreg 424 if X_index is not None: 425 X.index = X_index 426 self.series_names = X.columns.tolist() 427 428 if isinstance(X, pd.DataFrame): 429 if self.df_ is None: 430 self.df_ = X 431 X = X.values 432 else: 433 input_dates_prev = pd.DatetimeIndex(self.df_.index.values) 434 frequency = pd.infer_freq(input_dates_prev) 435 self.df_ = pd.concat([self.df_, X], axis=0) 436 self.input_dates = pd.date_range( 437 start=input_dates_prev[0], 438 periods=len(input_dates_prev) + X.shape[0], 439 freq=frequency, 440 ).values.tolist() 441 self.df_.index = self.input_dates 442 X = self.df_.values 443 self.df_.columns = self.series_names 444 else: 445 if self.df_ is None: 446 self.df_ = pd.DataFrame(X, columns=self.series_names) 447 else: 448 self.df_ = pd.concat( 449 [self.df_, pd.DataFrame(X, columns=self.series_names)], 450 axis=0, 451 ) 452 453 self.input_dates = ts.compute_input_dates(self.df_) 454 455 try: 456 # multivariate time series 457 n, p = X.shape 458 except: 459 # univariate time series 460 n = X.shape[0] 461 p = 1 462 self.n_obs_ = n 463 464 rep_1_n = np.repeat(1, n) 465 466 self.y_ = None 467 self.X_ = None 468 self.n_series = p 469 self.fit_objs_.clear() 470 self.y_means_.clear() 471 residuals_ = [] 472 self.residuals_ = None 473 self.residuals_sims_ = None 474 self.kde_ = None 475 self.sims_ = None 476 self.scaled_Z_ = None 477 self.centered_y_is_ = [] 478 479 if self.init_n_series_ > 1: 480 # multivariate time series 481 mts_input = ts.create_train_inputs(X[::-1], self.lags) 482 else: 483 # univariate time series 484 mts_input = ts.create_train_inputs(X.reshape(-1, 1)[::-1], self.lags) 485 486 self.y_ = mts_input[0] 487 488 self.X_ = mts_input[1] 489 490 dummy_y, scaled_Z = self.cook_training_set(y=rep_1_n, X=self.X_) 491 492 self.scaled_Z_ = scaled_Z 493 494 # loop on all the time series and adjust self.obj.fit 495 if self.verbose > 0: 496 print( 497 f"\n Adjusting {type(self.obj).__name__} to multivariate time series... \n" 498 ) 499 500 if self.show_progress is True: 501 iterator = tqdm(range(self.init_n_series_)) 502 else: 503 iterator = range(self.init_n_series_) 504 505 if self.type_pi in ( 506 "gaussian", 507 "kde", 508 "bootstrap", 509 "block-bootstrap", 510 ) or self.type_pi.startswith("vine"): 511 for i in iterator: 512 y_mean = np.mean(self.y_[:, i]) 513 self.y_means_[i] = y_mean 514 centered_y_i = self.y_[:, i] - y_mean 515 self.centered_y_is_.append(centered_y_i) 516 self.obj.fit(X=scaled_Z, y=centered_y_i) 517 self.fit_objs_[i] = deepcopy(self.obj) 518 residuals_.append( 519 (centered_y_i - self.fit_objs_[i].predict(scaled_Z)).tolist() 520 ) 521 522 if self.type_pi.startswith("scp"): 523 # split conformal prediction 524 for i in iterator: 525 n_y = self.y_.shape[0] 526 n_y_half = n_y // 2 527 first_half_idx = range(0, n_y_half) 528 second_half_idx = range(n_y_half, n_y) 529 y_mean_temp = np.mean(self.y_[first_half_idx, i]) 530 centered_y_i_temp = self.y_[first_half_idx, i] - y_mean_temp 531 self.obj.fit(X=scaled_Z[first_half_idx, :], y=centered_y_i_temp) 532 # calibrated residuals actually 533 residuals_.append( 534 ( 535 self.y_[second_half_idx, i] 536 - (y_mean_temp + self.obj.predict(scaled_Z[second_half_idx, :])) 537 ).tolist() 538 ) 539 # fit on the second half 540 y_mean = np.mean(self.y_[second_half_idx, i]) 541 self.y_means_[i] = y_mean 542 centered_y_i = self.y_[second_half_idx, i] - y_mean 543 self.obj.fit(X=scaled_Z[second_half_idx, :], y=centered_y_i) 544 self.fit_objs_[i] = deepcopy(self.obj) 545 546 self.residuals_ = np.asarray(residuals_).T 547 548 if self.type_pi == "gaussian": 549 self.gaussian_preds_std_ = np.std(self.residuals_, axis=0) 550 551 if self.type_pi.startswith("scp2"): 552 # Calculate mean and standard deviation for each column 553 data_mean = np.mean(self.residuals_, axis=0) 554 self.residuals_std_dev_ = np.std(self.residuals_, axis=0) 555 # Center and scale the array using broadcasting 556 self.residuals_ = ( 557 self.residuals_ - data_mean[np.newaxis, :] 558 ) / self.residuals_std_dev_[np.newaxis, :] 559 560 if self.replications != None and "kde" in self.type_pi: 561 if self.verbose > 0: 562 print(f"\n Simulate residuals using {self.kernel} kernel... \n") 563 assert self.kernel in ( 564 "gaussian", 565 "tophat", 566 ), "currently, 'kernel' must be either 'gaussian' or 'tophat'" 567 kernel_bandwidths = {"bandwidth": np.logspace(-6, 6, 150)} 568 grid = GridSearchCV( 569 KernelDensity(kernel=self.kernel, **kwargs), 570 param_grid=kernel_bandwidths, 571 ) 572 grid.fit(self.residuals_) 573 574 if self.verbose > 0: 575 print( 576 f"\n Best parameters for {self.kernel} kernel: {grid.best_params_} \n" 577 ) 578 579 self.kde_ = grid.best_estimator_ 580 581 return self
Fit MTS model to training data X, with optional regressors xreg
Parameters:
X: {array-like}, shape = [n_samples, n_features] Training time series, where n_samples is the number of samples and n_features is the number of features; X must be in increasing order (most recent observations last)
xreg: {array-like}, shape = [n_samples, n_features_xreg] Additional (external) regressors to be passed to self.obj xreg must be in 'increasing' order (most recent observations last)
**kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity)
Returns:
self: object
618 def predict(self, h=5, level=95, **kwargs): 619 """Forecast all the time series, h steps ahead""" 620 621 self.output_dates_, frequency = ts.compute_output_dates(self.df_, h) 622 623 self.level_ = level 624 625 self.return_std_ = False # do not remove (/!\) 626 627 self.mean_ = None # do not remove (/!\) 628 629 self.mean_ = deepcopy(self.y_) # do not remove (/!\) 630 631 self.lower_ = None # do not remove (/!\) 632 633 self.upper_ = None # do not remove (/!\) 634 635 self.sims_ = None # do not remove (/!\) 636 637 y_means_ = np.asarray([self.y_means_[i] for i in range(self.init_n_series_)]) 638 639 n_features = self.init_n_series_ * self.lags 640 641 self.alpha_ = 100 - level 642 643 pi_multiplier = norm.ppf(1 - self.alpha_ / 200) 644 645 if "return_std" in kwargs: # bayesian forecasting 646 self.return_std_ = True 647 self.preds_std_ = [] 648 DescribeResult = namedtuple( 649 "DescribeResult", ("mean", "lower", "upper") 650 ) # to be updated 651 652 if "return_pi" in kwargs: # split conformal, without simulation 653 mean_pi_ = [] 654 lower_pi_ = [] 655 upper_pi_ = [] 656 median_pi_ = [] 657 DescribeResult = namedtuple( 658 "DescribeResult", ("mean", "lower", "upper") 659 ) # to be updated 660 661 if self.kde_ != None and "kde" in self.type_pi: # kde 662 target_cols = self.df_.columns[ 663 : self.init_n_series_ 664 ] # Get target column names 665 if self.verbose == 1: 666 self.residuals_sims_ = tuple( 667 self.kde_.sample( 668 n_samples=h, random_state=self.seed + 100 * i 669 ) # Keep full sample 670 for i in tqdm(range(self.replications)) 671 ) 672 elif self.verbose == 0: 673 self.residuals_sims_ = tuple( 674 self.kde_.sample( 675 n_samples=h, random_state=self.seed + 100 * i 676 ) # Keep full sample 677 for i in range(self.replications) 678 ) 679 680 # Convert to DataFrames after sampling 681 self.residuals_sims_ = tuple( 682 pd.DataFrame( 683 sim, # Keep all columns 684 columns=target_cols, # Use original target column names 685 index=self.output_dates_, 686 ) 687 for sim in self.residuals_sims_ 688 ) 689 690 if self.type_pi in ("bootstrap", "scp-bootstrap", "scp2-bootstrap"): 691 assert self.replications is not None and isinstance( 692 self.replications, int 693 ), "'replications' must be provided and be an integer" 694 if self.verbose == 1: 695 self.residuals_sims_ = tuple( 696 ts.bootstrap( 697 self.residuals_, 698 h=h, 699 block_size=None, 700 seed=self.seed + 100 * i, 701 ) 702 for i in tqdm(range(self.replications)) 703 ) 704 elif self.verbose == 0: 705 self.residuals_sims_ = tuple( 706 ts.bootstrap( 707 self.residuals_, 708 h=h, 709 block_size=None, 710 seed=self.seed + 100 * i, 711 ) 712 for i in range(self.replications) 713 ) 714 715 if self.type_pi in ( 716 "block-bootstrap", 717 "scp-block-bootstrap", 718 "scp2-block-bootstrap", 719 ): 720 if self.block_size is None: 721 self.block_size = int( 722 np.ceil(3.15 * (self.residuals_.shape[0] ** (1 / 3))) 723 ) 724 725 assert self.replications is not None and isinstance( 726 self.replications, int 727 ), "'replications' must be provided and be an integer" 728 if self.verbose == 1: 729 self.residuals_sims_ = tuple( 730 ts.bootstrap( 731 self.residuals_, 732 h=h, 733 block_size=self.block_size, 734 seed=self.seed + 100 * i, 735 ) 736 for i in tqdm(range(self.replications)) 737 ) 738 elif self.verbose == 0: 739 self.residuals_sims_ = tuple( 740 ts.bootstrap( 741 self.residuals_, 742 h=h, 743 block_size=self.block_size, 744 seed=self.seed + 100 * i, 745 ) 746 for i in range(self.replications) 747 ) 748 749 if "vine" in self.type_pi: 750 if self.verbose == 1: 751 self.residuals_sims_ = tuple( 752 vinecopula_sample( 753 x=self.residuals_, 754 n_samples=h, 755 method=self.type_pi, 756 random_state=self.seed + 100 * i, 757 ) 758 for i in tqdm(range(self.replications)) 759 ) 760 elif self.verbose == 0: 761 self.residuals_sims_ = tuple( 762 vinecopula_sample( 763 x=self.residuals_, 764 n_samples=h, 765 method=self.type_pi, 766 random_state=self.seed + 100 * i, 767 ) 768 for i in range(self.replications) 769 ) 770 771 mean_ = deepcopy(self.mean_) 772 773 for i in range(h): 774 775 new_obs = ts.reformat_response(mean_, self.lags) 776 new_X = new_obs.reshape(1, -1) 777 cooked_new_X = self.cook_test_set(new_X, **kwargs) 778 779 if "return_std" in kwargs: 780 self.preds_std_.append( 781 [ 782 np.asarray( 783 self.fit_objs_[i].predict(cooked_new_X, return_std=True)[1] 784 ).item() 785 for i in range(self.n_series) 786 ] 787 ) 788 789 if "return_pi" in kwargs: 790 for i in range(self.n_series): 791 preds_pi = self.fit_objs_[i].predict(cooked_new_X, **kwargs) 792 mean_pi_.append(preds_pi.mean[0]) 793 lower_pi_.append(preds_pi.lower[0]) 794 upper_pi_.append(preds_pi.upper[0]) 795 796 predicted_cooked_new_X = np.asarray( 797 [ 798 np.asarray(self.fit_objs_[i].predict(cooked_new_X)).item() 799 for i in range(self.init_n_series_) 800 ] 801 ) 802 803 preds = np.asarray(y_means_ + predicted_cooked_new_X) 804 805 # Create full row with both predictions and external regressors 806 if self.xreg_ is not None and "xreg" in kwargs: 807 next_xreg = kwargs["xreg"].iloc[i : i + 1].values.flatten() 808 full_row = np.concatenate([preds, next_xreg]) 809 else: 810 full_row = preds 811 812 # Create a new row with same number of columns as mean_ 813 new_row = np.zeros((1, mean_.shape[1])) 814 new_row[0, : full_row.shape[0]] = full_row 815 816 # Maintain the full dimensionality by using vstack instead of rbind 817 mean_ = np.vstack([new_row, mean_[:-1]]) 818 819 # Final output should only include the target columns 820 self.mean_ = pd.DataFrame( 821 mean_[0:h, : self.init_n_series_][::-1], 822 columns=self.df_.columns[: self.init_n_series_], 823 index=self.output_dates_, 824 ) 825 826 # function's return ---------------------------------------------------------------------- 827 if ( 828 (("return_std" not in kwargs) and ("return_pi" not in kwargs)) 829 and (self.type_pi not in ("gaussian", "scp")) 830 ) or ("vine" in self.type_pi): 831 832 if self.replications is None: 833 return self.mean_.iloc[:, : self.init_n_series_] 834 835 # if "return_std" not in kwargs and self.replications is not None 836 meanf = [] 837 medianf = [] 838 lower = [] 839 upper = [] 840 841 if "scp2" in self.type_pi: 842 843 if self.verbose == 1: 844 self.sims_ = tuple( 845 ( 846 self.mean_ 847 + self.residuals_sims_[i] 848 * self.residuals_std_dev_[np.newaxis, :] 849 for i in tqdm(range(self.replications)) 850 ) 851 ) 852 elif self.verbose == 0: 853 self.sims_ = tuple( 854 ( 855 self.mean_ 856 + self.residuals_sims_[i] 857 * self.residuals_std_dev_[np.newaxis, :] 858 for i in range(self.replications) 859 ) 860 ) 861 else: 862 863 if self.verbose == 1: 864 self.sims_ = tuple( 865 ( 866 self.mean_ + self.residuals_sims_[i] 867 for i in tqdm(range(self.replications)) 868 ) 869 ) 870 elif self.verbose == 0: 871 self.sims_ = tuple( 872 ( 873 self.mean_ + self.residuals_sims_[i] 874 for i in range(self.replications) 875 ) 876 ) 877 878 DescribeResult = namedtuple( 879 "DescribeResult", ("mean", "sims", "lower", "upper") 880 ) 881 for ix in range(self.init_n_series_): 882 sims_ix = getsims(self.sims_, ix) 883 if self.agg == "mean": 884 meanf.append(np.mean(sims_ix, axis=1)) 885 else: 886 medianf.append(np.median(sims_ix, axis=1)) 887 lower.append(np.quantile(sims_ix, q=self.alpha_ / 200, axis=1)) 888 upper.append(np.quantile(sims_ix, q=1 - self.alpha_ / 200, axis=1)) 889 self.mean_ = pd.DataFrame( 890 np.asarray(meanf).T, 891 columns=self.series_names[: self.init_n_series_], # self.df_.columns, 892 index=self.output_dates_, 893 ) 894 895 self.lower_ = pd.DataFrame( 896 np.asarray(lower).T, 897 columns=self.series_names[: self.init_n_series_], # self.df_.columns, 898 index=self.output_dates_, 899 ) 900 901 self.upper_ = pd.DataFrame( 902 np.asarray(upper).T, 903 columns=self.series_names[: self.init_n_series_], # self.df_.columns, 904 index=self.output_dates_, 905 ) 906 907 try: 908 self.median_ = pd.DataFrame( 909 np.asarray(medianf).T, 910 columns=self.series_names[ 911 : self.init_n_series_ 912 ], # self.df_.columns, 913 index=self.output_dates_, 914 ) 915 except Exception as e: 916 pass 917 918 return DescribeResult(self.mean_, self.sims_, self.lower_, self.upper_) 919 920 if ( 921 (("return_std" in kwargs) or ("return_pi" in kwargs)) 922 and (self.type_pi not in ("gaussian", "scp")) 923 ) or "vine" in self.type_pi: 924 DescribeResult = namedtuple("DescribeResult", ("mean", "lower", "upper")) 925 926 self.mean_ = pd.DataFrame( 927 np.asarray(self.mean_), 928 columns=self.series_names, # self.df_.columns, 929 index=self.output_dates_, 930 ) 931 932 if "return_std" in kwargs: 933 934 self.preds_std_ = np.asarray(self.preds_std_) 935 936 self.lower_ = pd.DataFrame( 937 self.mean_.values - pi_multiplier * self.preds_std_, 938 columns=self.series_names, # self.df_.columns, 939 index=self.output_dates_, 940 ) 941 942 self.upper_ = pd.DataFrame( 943 self.mean_.values + pi_multiplier * self.preds_std_, 944 columns=self.series_names, # self.df_.columns, 945 index=self.output_dates_, 946 ) 947 948 if "return_pi" in kwargs: 949 950 self.lower_ = pd.DataFrame( 951 np.asarray(lower_pi_).reshape(h, self.n_series) 952 + y_means_[np.newaxis, :], 953 columns=self.series_names, # self.df_.columns, 954 index=self.output_dates_, 955 ) 956 957 self.upper_ = pd.DataFrame( 958 np.asarray(upper_pi_).reshape(h, self.n_series) 959 + y_means_[np.newaxis, :], 960 columns=self.series_names, # self.df_.columns, 961 index=self.output_dates_, 962 ) 963 964 res = DescribeResult(self.mean_, self.lower_, self.upper_) 965 966 if self.xreg_ is not None: 967 if len(self.xreg_.shape) > 1: 968 res2 = mx.tuple_map( 969 res, 970 lambda x: mo.delete_last_columns( 971 x, num_columns=self.xreg_.shape[1] 972 ), 973 ) 974 else: 975 res2 = mx.tuple_map( 976 res, lambda x: mo.delete_last_columns(x, num_columns=1) 977 ) 978 return DescribeResult(res2[0], res2[1], res2[2]) 979 980 return res 981 982 if self.type_pi == "gaussian": 983 984 DescribeResult = namedtuple("DescribeResult", ("mean", "lower", "upper")) 985 986 self.mean_ = pd.DataFrame( 987 np.asarray(self.mean_), 988 columns=self.series_names, # self.df_.columns, 989 index=self.output_dates_, 990 ) 991 992 self.lower_ = pd.DataFrame( 993 self.mean_.values - pi_multiplier * self.gaussian_preds_std_, 994 columns=self.series_names, # self.df_.columns, 995 index=self.output_dates_, 996 ) 997 998 self.upper_ = pd.DataFrame( 999 self.mean_.values + pi_multiplier * self.gaussian_preds_std_, 1000 columns=self.series_names, # self.df_.columns, 1001 index=self.output_dates_, 1002 ) 1003 1004 res = DescribeResult(self.mean_, self.lower_, self.upper_) 1005 1006 if self.xreg_ is not None: 1007 if len(self.xreg_.shape) > 1: 1008 res2 = mx.tuple_map( 1009 res, 1010 lambda x: mo.delete_last_columns( 1011 x, num_columns=self.xreg_.shape[1] 1012 ), 1013 ) 1014 else: 1015 res2 = mx.tuple_map( 1016 res, lambda x: mo.delete_last_columns(x, num_columns=1) 1017 ) 1018 return DescribeResult(res2[0], res2[1], res2[2]) 1019 1020 return res 1021 1022 # After prediction loop, ensure sims only contain target columns 1023 if self.sims_ is not None: 1024 if self.verbose == 1: 1025 self.sims_ = tuple( 1026 sim[:h,] # Only keep target columns and h rows 1027 for sim in tqdm(self.sims_) 1028 ) 1029 elif self.verbose == 0: 1030 self.sims_ = tuple( 1031 sim[:h,] # Only keep target columns and h rows 1032 for sim in self.sims_ 1033 ) 1034 1035 # Convert numpy arrays to DataFrames with proper columns 1036 self.sims_ = tuple( 1037 pd.DataFrame( 1038 sim, 1039 columns=self.df_.columns[: self.init_n_series_], 1040 index=self.output_dates_, 1041 ) 1042 for sim in self.sims_ 1043 ) 1044 1045 if self.type_pi in ("kde", "bootstrap", "block-bootstrap", "vine-copula"): 1046 if self.xreg_ is not None: 1047 # Use getsimsxreg when external regressors are present 1048 target_cols = self.df_.columns[: self.init_n_series_] 1049 self.sims_ = getsimsxreg(self.sims_, self.output_dates_, target_cols) 1050 else: 1051 # Use original getsims for backward compatibility 1052 self.sims_ = getsims(self.sims_)
Forecast all the time series, h steps ahead
1054 def score(self, X, training_index, testing_index, scoring=None, **kwargs): 1055 """Train on training_index, score on testing_index.""" 1056 1057 assert ( 1058 bool(set(training_index).intersection(set(testing_index))) == False 1059 ), "Non-overlapping 'training_index' and 'testing_index' required" 1060 1061 # Dimensions 1062 try: 1063 # multivariate time series 1064 n, p = X.shape 1065 except: 1066 # univariate time series 1067 n = X.shape[0] 1068 p = 1 1069 1070 # Training and testing sets 1071 if p > 1: 1072 X_train = X[training_index, :] 1073 X_test = X[testing_index, :] 1074 else: 1075 X_train = X[training_index] 1076 X_test = X[testing_index] 1077 1078 # Horizon 1079 h = len(testing_index) 1080 assert ( 1081 len(training_index) + h 1082 ) <= n, "Please check lengths of training and testing windows" 1083 1084 # Fit and predict 1085 self.fit(X_train, **kwargs) 1086 preds = self.predict(h=h, **kwargs) 1087 1088 if scoring is None: 1089 scoring = "neg_root_mean_squared_error" 1090 1091 # check inputs 1092 assert scoring in ( 1093 "explained_variance", 1094 "neg_mean_absolute_error", 1095 "neg_mean_squared_error", 1096 "neg_root_mean_squared_error", 1097 "neg_mean_squared_log_error", 1098 "neg_median_absolute_error", 1099 "r2", 1100 ), "'scoring' should be in ('explained_variance', 'neg_mean_absolute_error', \ 1101 'neg_mean_squared_error', 'neg_root_mean_squared_error', 'neg_mean_squared_log_error', \ 1102 'neg_median_absolute_error', 'r2')" 1103 1104 scoring_options = { 1105 "explained_variance": skm2.explained_variance_score, 1106 "neg_mean_absolute_error": skm2.mean_absolute_error, 1107 "neg_mean_squared_error": lambda x, y: np.mean((x - y) ** 2), 1108 "neg_root_mean_squared_error": lambda x, y: np.sqrt(np.mean((x - y) ** 2)), 1109 "neg_mean_squared_log_error": skm2.mean_squared_log_error, 1110 "neg_median_absolute_error": skm2.median_absolute_error, 1111 "r2": skm2.r2_score, 1112 } 1113 1114 return scoring_options[scoring](X_test, preds)
Train on training_index, score on testing_index.
16class MultitaskClassifier(Base, ClassifierMixin): 17 """Multitask Classification model based on regression models, with shared covariates 18 19 Parameters: 20 21 obj: object 22 any object (must be a regression model) containing a method fit (obj.fit()) 23 and a method predict (obj.predict()) 24 25 n_hidden_features: int 26 number of nodes in the hidden layer 27 28 activation_name: str 29 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 30 31 a: float 32 hyperparameter for 'prelu' or 'elu' activation function 33 34 nodes_sim: str 35 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 36 'uniform' 37 38 bias: boolean 39 indicates if the hidden layer contains a bias term (True) or not 40 (False) 41 42 dropout: float 43 regularization parameter; (random) percentage of nodes dropped out 44 of the training 45 46 direct_link: boolean 47 indicates if the original predictors are included (True) in model's 48 fitting or not (False) 49 50 n_clusters: int 51 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: 52 no clustering) 53 54 cluster_encode: bool 55 defines how the variable containing clusters is treated (default is one-hot) 56 if `False`, then labels are used, without one-hot encoding 57 58 type_clust: str 59 type of clustering method: currently k-means ('kmeans') or Gaussian 60 Mixture Model ('gmm') 61 62 type_scaling: a tuple of 3 strings 63 scaling methods for inputs, hidden layer, and clustering respectively 64 (and when relevant). 65 Currently available: standardization ('std') or MinMax scaling ('minmax') 66 67 col_sample: float 68 percentage of covariates randomly chosen for training 69 70 row_sample: float 71 percentage of rows chosen for training, by stratified bootstrapping 72 73 seed: int 74 reproducibility seed for nodes_sim=='uniform' 75 76 backend: str 77 "cpu" or "gpu" or "tpu" 78 79 Attributes: 80 81 fit_objs_: dict 82 objects adjusted to each individual time series 83 84 n_classes_: int 85 number of classes for the classifier 86 87 Examples: 88 89 See also [https://github.com/Techtonique/nnetsauce/blob/master/examples/mtask_classification.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/mtask_classification.py) 90 91 ```python 92 import nnetsauce as ns 93 import numpy as np 94 from sklearn.datasets import load_breast_cancer 95 from sklearn.linear_model import LinearRegression 96 from sklearn.model_selection import train_test_split 97 from sklearn import metrics 98 from time import time 99 100 breast_cancer = load_breast_cancer() 101 Z = breast_cancer.data 102 t = breast_cancer.target 103 104 X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2, 105 random_state=123+2*10) 106 107 # Linear Regression is used 108 regr = LinearRegression() 109 fit_obj = ns.MultitaskClassifier(regr, n_hidden_features=5, 110 n_clusters=2, type_clust="gmm") 111 112 start = time() 113 fit_obj.fit(X_train, y_train) 114 print(f"Elapsed {time() - start}") 115 116 print(fit_obj.score(X_test, y_test)) 117 print(fit_obj.score(X_test, y_test, scoring="roc_auc")) 118 119 start = time() 120 preds = fit_obj.predict(X_test) 121 print(f"Elapsed {time() - start}") 122 print(metrics.classification_report(preds, y_test)) 123 ``` 124 125 """ 126 127 # construct the object ----- 128 _estimator_type = "classifier" 129 130 def __init__( 131 self, 132 obj, 133 n_hidden_features=5, 134 activation_name="relu", 135 a=0.01, 136 nodes_sim="sobol", 137 bias=True, 138 dropout=0, 139 direct_link=True, 140 n_clusters=2, 141 cluster_encode=True, 142 type_clust="kmeans", 143 type_scaling=("std", "std", "std"), 144 col_sample=1, 145 row_sample=1, 146 seed=123, 147 backend="cpu", 148 ): 149 super().__init__( 150 n_hidden_features=n_hidden_features, 151 activation_name=activation_name, 152 a=a, 153 nodes_sim=nodes_sim, 154 bias=bias, 155 dropout=dropout, 156 direct_link=direct_link, 157 n_clusters=n_clusters, 158 cluster_encode=cluster_encode, 159 type_clust=type_clust, 160 type_scaling=type_scaling, 161 col_sample=col_sample, 162 row_sample=row_sample, 163 seed=seed, 164 backend=backend, 165 ) 166 167 self.type_fit = "classification" 168 self.obj = obj 169 self.fit_objs_ = {} 170 171 def fit(self, X, y, sample_weight=None, **kwargs): 172 """Fit MultitaskClassifier to training data (X, y). 173 174 Args: 175 176 X: {array-like}, shape = [n_samples, n_features] 177 Training vectors, where n_samples is the number 178 of samples and n_features is the number of features. 179 180 y: array-like, shape = [n_samples] 181 Target values. 182 183 **kwargs: additional parameters to be passed to 184 self.cook_training_set or self.obj.fit 185 186 Returns: 187 188 self: object 189 190 """ 191 192 assert mx.is_factor(y), "y must contain only integers" 193 194 output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 195 196 self.classes_ = np.unique(y) # for compatibility with sklearn 197 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 198 199 # multitask response 200 Y = mo.one_hot_encode2(output_y, self.n_classes_) 201 202 # if sample_weight is None: 203 for i in range(self.n_classes_): 204 self.fit_objs_[i] = deepcopy(self.obj.fit(scaled_Z, Y[:, i], **kwargs)) 205 206 self.classes_ = np.unique(y) 207 return self 208 209 def predict(self, X, **kwargs): 210 """Predict test data X. 211 212 Args: 213 214 X: {array-like}, shape = [n_samples, n_features] 215 Training vectors, where n_samples is the number 216 of samples and n_features is the number of features. 217 218 **kwargs: additional parameters to be passed to 219 self.cook_test_set 220 221 Returns: 222 223 model predictions: {array-like} 224 225 """ 226 return np.argmax(self.predict_proba(X, **kwargs), axis=1) 227 228 def predict_proba(self, X, **kwargs): 229 """Predict probabilities for test data X. 230 231 Args: 232 233 X: {array-like}, shape = [n_samples, n_features] 234 Training vectors, where n_samples is the number 235 of samples and n_features is the number of features. 236 237 **kwargs: additional parameters to be passed to 238 self.cook_test_set 239 240 Returns: 241 242 probability estimates for test data: {array-like} 243 244 """ 245 246 shape_X = X.shape 247 248 probs = np.zeros((shape_X[0], self.n_classes_)) 249 250 if len(shape_X) == 1: 251 n_features = shape_X[0] 252 253 new_X = mo.rbind( 254 X.reshape(1, n_features), 255 np.ones(n_features).reshape(1, n_features), 256 ) 257 258 Z = self.cook_test_set(new_X, **kwargs) 259 260 # loop on all the classes 261 for i in range(self.n_classes_): 262 probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs)[0] 263 264 else: 265 Z = self.cook_test_set(X, **kwargs) 266 267 # loop on all the classes 268 for i in range(self.n_classes_): 269 probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs) 270 271 expit_raw_probs = expit(probs) 272 273 return expit_raw_probs / expit_raw_probs.sum(axis=1)[:, None] 274 275 def decision_function(self, X, **kwargs): 276 """Compute the decision function of X. 277 278 Parameters: 279 X: {array-like}, shape = [n_samples, n_features] 280 Samples to compute decision function for. 281 282 **kwargs: additional parameters to be passed to 283 self.cook_test_set 284 285 Returns: 286 array-like of shape (n_samples,) or (n_samples, n_classes) 287 Decision function of the input samples. The order of outputs is the same 288 as that of the classes passed to fit. 289 """ 290 if not hasattr(self.obj, "decision_function"): 291 # If base classifier doesn't have decision_function, use predict_proba 292 proba = self.predict_proba(X, **kwargs) 293 if proba.shape[1] == 2: 294 return proba[:, 1] # For binary classification 295 return proba # For multiclass 296 297 if len(X.shape) == 1: 298 n_features = X.shape[0] 299 new_X = mo.rbind( 300 X.reshape(1, n_features), 301 np.ones(n_features).reshape(1, n_features), 302 ) 303 304 return ( 305 self.obj.decision_function( 306 self.cook_test_set(new_X, **kwargs), **kwargs 307 ) 308 )[0] 309 310 return self.obj.decision_function(self.cook_test_set(X, **kwargs), **kwargs) 311 312 @property 313 def _estimator_type(self): 314 return "classifier"
Multitask Classification model based on regression models, with shared covariates
Parameters:
obj: object
any object (must be a regression model) containing a method fit (obj.fit())
and a method predict (obj.predict())
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not
(False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original predictors are included (True) in model's
fitting or not (False)
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
col_sample: float
percentage of covariates randomly chosen for training
row_sample: float
percentage of rows chosen for training, by stratified bootstrapping
seed: int
reproducibility seed for nodes_sim=='uniform'
backend: str
"cpu" or "gpu" or "tpu"
Attributes:
fit_objs_: dict
objects adjusted to each individual time series
n_classes_: int
number of classes for the classifier
Examples:
See also https://github.com/Techtonique/nnetsauce/blob/master/examples/mtask_classification.py
import nnetsauce as ns
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics
from time import time
breast_cancer = load_breast_cancer()
Z = breast_cancer.data
t = breast_cancer.target
X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2,
random_state=123+2*10)
# Linear Regression is used
regr = LinearRegression()
fit_obj = ns.MultitaskClassifier(regr, n_hidden_features=5,
n_clusters=2, type_clust="gmm")
start = time()
fit_obj.fit(X_train, y_train)
print(f"Elapsed {time() - start}")
print(fit_obj.score(X_test, y_test))
print(fit_obj.score(X_test, y_test, scoring="roc_auc"))
start = time()
preds = fit_obj.predict(X_test)
print(f"Elapsed {time() - start}")
print(metrics.classification_report(preds, y_test))
171 def fit(self, X, y, sample_weight=None, **kwargs): 172 """Fit MultitaskClassifier to training data (X, y). 173 174 Args: 175 176 X: {array-like}, shape = [n_samples, n_features] 177 Training vectors, where n_samples is the number 178 of samples and n_features is the number of features. 179 180 y: array-like, shape = [n_samples] 181 Target values. 182 183 **kwargs: additional parameters to be passed to 184 self.cook_training_set or self.obj.fit 185 186 Returns: 187 188 self: object 189 190 """ 191 192 assert mx.is_factor(y), "y must contain only integers" 193 194 output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 195 196 self.classes_ = np.unique(y) # for compatibility with sklearn 197 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 198 199 # multitask response 200 Y = mo.one_hot_encode2(output_y, self.n_classes_) 201 202 # if sample_weight is None: 203 for i in range(self.n_classes_): 204 self.fit_objs_[i] = deepcopy(self.obj.fit(scaled_Z, Y[:, i], **kwargs)) 205 206 self.classes_ = np.unique(y) 207 return self
Fit MultitaskClassifier to training data (X, y).
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
209 def predict(self, X, **kwargs): 210 """Predict test data X. 211 212 Args: 213 214 X: {array-like}, shape = [n_samples, n_features] 215 Training vectors, where n_samples is the number 216 of samples and n_features is the number of features. 217 218 **kwargs: additional parameters to be passed to 219 self.cook_test_set 220 221 Returns: 222 223 model predictions: {array-like} 224 225 """ 226 return np.argmax(self.predict_proba(X, **kwargs), axis=1)
Predict test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
228 def predict_proba(self, X, **kwargs): 229 """Predict probabilities for test data X. 230 231 Args: 232 233 X: {array-like}, shape = [n_samples, n_features] 234 Training vectors, where n_samples is the number 235 of samples and n_features is the number of features. 236 237 **kwargs: additional parameters to be passed to 238 self.cook_test_set 239 240 Returns: 241 242 probability estimates for test data: {array-like} 243 244 """ 245 246 shape_X = X.shape 247 248 probs = np.zeros((shape_X[0], self.n_classes_)) 249 250 if len(shape_X) == 1: 251 n_features = shape_X[0] 252 253 new_X = mo.rbind( 254 X.reshape(1, n_features), 255 np.ones(n_features).reshape(1, n_features), 256 ) 257 258 Z = self.cook_test_set(new_X, **kwargs) 259 260 # loop on all the classes 261 for i in range(self.n_classes_): 262 probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs)[0] 263 264 else: 265 Z = self.cook_test_set(X, **kwargs) 266 267 # loop on all the classes 268 for i in range(self.n_classes_): 269 probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs) 270 271 expit_raw_probs = expit(probs) 272 273 return expit_raw_probs / expit_raw_probs.sum(axis=1)[:, None]
Predict probabilities for test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
probability estimates for test data: {array-like}
98class NeuralNetRegressor(BaseEstimator, RegressorMixin): 99 """ 100 (Pretrained) Neural Network Regressor. 101 102 Parameters: 103 104 hidden_layer_sizes : tuple, default=(100,) 105 The number of neurons in each hidden layer. 106 max_iter : int, default=100 107 The maximum number of iterations to train the model. 108 learning_rate : float, default=0.01 109 The learning rate for the optimizer. 110 l1_ratio : float, default=0.5 111 The ratio of L1 regularization. 112 alpha : float, default=1e-6 113 The regularization parameter. 114 activation_name : str, default="relu" 115 The activation function to use. 116 dropout : float, default=0.0 117 The dropout rate. 118 random_state : int, default=None 119 The random state for the random number generator. 120 weights : list, default=None 121 The weights to initialize the model with. 122 123 Attributes: 124 125 weights : list 126 The weights of the model. 127 params : list 128 The parameters of the model. 129 scaler_ : sklearn.preprocessing.StandardScaler 130 The scaler used to standardize the input features. 131 y_mean_ : float 132 The mean of the target variable. 133 134 Methods: 135 136 fit(X, y) 137 Fit the model to the data. 138 predict(X) 139 Predict the target variable. 140 get_weights() 141 Get the weights of the model. 142 set_weights(weights) 143 Set the weights of the model. 144 """ 145 146 def __init__( 147 self, 148 hidden_layer_sizes=None, 149 max_iter=100, 150 learning_rate=0.01, 151 l1_ratio=0.5, 152 alpha=1e-6, 153 activation_name="relu", 154 dropout=0, 155 weights=None, 156 random_state=None, 157 ): 158 if weights is None and hidden_layer_sizes is None: 159 hidden_layer_sizes = (100,) # default value if neither is provided 160 self.hidden_layer_sizes = hidden_layer_sizes 161 self.max_iter = max_iter 162 self.learning_rate = learning_rate 163 self.l1_ratio = l1_ratio 164 self.alpha = alpha 165 self.activation_name = activation_name 166 self.dropout = dropout 167 self.weights = weights 168 self.random_state = random_state 169 self.params = None 170 self.scaler_ = StandardScaler() 171 self.y_mean_ = None 172 173 def _validate_weights(self, input_dim): 174 """Validate that weights dimensions are coherent.""" 175 if not self.weights: 176 return False 177 178 try: 179 # Check each layer's weights and biases 180 prev_dim = input_dim 181 for W, b in self.weights: 182 # Check weight matrix dimensions 183 if W.shape[0] != prev_dim: 184 raise ValueError( 185 f"Weight matrix input dimension {W.shape[0]} does not match, previous layer output dimension {prev_dim}" 186 ) 187 # Check bias dimension matches weight matrix output 188 if W.shape[1] != b.shape[0]: 189 raise ValueError( 190 f"Bias dimension {b.shape[0]} does not match weight matrix, output dimension {W.shape[1]}" 191 ) 192 prev_dim = W.shape[1] 193 194 # Check final output dimension is 1 for regression 195 if prev_dim != 1: 196 raise ValueError( 197 f"Final layer output dimension {prev_dim} must be 1 for regression" 198 ) 199 200 return True 201 except (AttributeError, IndexError): 202 raise ValueError( 203 "Weights format is invalid. Expected list of (weight, bias) tuples" 204 ) 205 206 def fit(self, X, y): 207 # Standardize the input features 208 X = self.scaler_.fit_transform(X) 209 # Ensure y is 2D for consistency 210 y = y.reshape(-1, 1) 211 self.y_mean_ = jnp.mean(y) 212 y = y - self.y_mean_ 213 # Validate or initialize weights 214 if self.weights is not None: 215 if self._validate_weights(X.shape[1]): 216 self.params = self.weights 217 else: 218 if self.hidden_layer_sizes is None: 219 raise ValueError( 220 "Either weights or hidden_layer_sizes must be provided" 221 ) 222 self.params = initialize_params( 223 X.shape[1], self.hidden_layer_sizes, self.random_state 224 ) 225 loss_fn = partial(loss, l1_ratio=self.l1_ratio, alpha=self.alpha) 226 grad_loss = jit(grad(loss_fn)) # compiled gradient evaluation function 227 perex_grads = jit( 228 vmap(grad_loss, in_axes=(None, 0, 0)) 229 ) # fast per-example grads 230 # Training loop 231 for _ in range(self.max_iter): 232 grads = perex_grads(self.params, X, y) 233 # Average gradients across examples 234 grads = jax.tree_map(lambda g: jnp.mean(g, axis=0), grads) 235 # Update parameters 236 self.params = [ 237 (W - self.learning_rate * dW, b - self.learning_rate * db) 238 for (W, b), (dW, db) in zip(self.params, grads) 239 ] 240 # Store final weights 241 self.weights = self.params 242 return self 243 244 def get_weights(self): 245 """Return the current weights of the model.""" 246 if self.weights is None: 247 raise ValueError("No weights available. Model has not been fitted yet.") 248 return self.weights 249 250 def set_weights(self, weights): 251 """Set the weights of the model manually.""" 252 self.weights = weights 253 self.params = weights 254 255 def predict(self, X): 256 X = self.scaler_.transform(X) 257 if self.params is None: 258 raise ValueError("Model has not been fitted yet.") 259 predictions = predict_internal( 260 self.params, 261 X, 262 activation_func=self.activation_name, 263 dropout=self.dropout, 264 seed=self.random_state, 265 ) 266 return predictions.reshape(-1) + self.y_mean_
(Pretrained) Neural Network Regressor.
Parameters:
hidden_layer_sizes : tuple, default=(100,)
The number of neurons in each hidden layer.
max_iter : int, default=100
The maximum number of iterations to train the model.
learning_rate : float, default=0.01
The learning rate for the optimizer.
l1_ratio : float, default=0.5
The ratio of L1 regularization.
alpha : float, default=1e-6
The regularization parameter.
activation_name : str, default="relu"
The activation function to use.
dropout : float, default=0.0
The dropout rate.
random_state : int, default=None
The random state for the random number generator.
weights : list, default=None
The weights to initialize the model with.
Attributes:
weights : list
The weights of the model.
params : list
The parameters of the model.
scaler_ : sklearn.preprocessing.StandardScaler
The scaler used to standardize the input features.
y_mean_ : float
The mean of the target variable.
Methods:
fit(X, y)
Fit the model to the data.
predict(X)
Predict the target variable.
get_weights()
Get the weights of the model.
set_weights(weights)
Set the weights of the model.
206 def fit(self, X, y): 207 # Standardize the input features 208 X = self.scaler_.fit_transform(X) 209 # Ensure y is 2D for consistency 210 y = y.reshape(-1, 1) 211 self.y_mean_ = jnp.mean(y) 212 y = y - self.y_mean_ 213 # Validate or initialize weights 214 if self.weights is not None: 215 if self._validate_weights(X.shape[1]): 216 self.params = self.weights 217 else: 218 if self.hidden_layer_sizes is None: 219 raise ValueError( 220 "Either weights or hidden_layer_sizes must be provided" 221 ) 222 self.params = initialize_params( 223 X.shape[1], self.hidden_layer_sizes, self.random_state 224 ) 225 loss_fn = partial(loss, l1_ratio=self.l1_ratio, alpha=self.alpha) 226 grad_loss = jit(grad(loss_fn)) # compiled gradient evaluation function 227 perex_grads = jit( 228 vmap(grad_loss, in_axes=(None, 0, 0)) 229 ) # fast per-example grads 230 # Training loop 231 for _ in range(self.max_iter): 232 grads = perex_grads(self.params, X, y) 233 # Average gradients across examples 234 grads = jax.tree_map(lambda g: jnp.mean(g, axis=0), grads) 235 # Update parameters 236 self.params = [ 237 (W - self.learning_rate * dW, b - self.learning_rate * db) 238 for (W, b), (dW, db) in zip(self.params, grads) 239 ] 240 # Store final weights 241 self.weights = self.params 242 return self
255 def predict(self, X): 256 X = self.scaler_.transform(X) 257 if self.params is None: 258 raise ValueError("Model has not been fitted yet.") 259 predictions = predict_internal( 260 self.params, 261 X, 262 activation_func=self.activation_name, 263 dropout=self.dropout, 264 seed=self.random_state, 265 ) 266 return predictions.reshape(-1) + self.y_mean_
10class NeuralNetClassifier(BaseEstimator, ClassifierMixin): 11 """ 12 (Pretrained) Neural Network Classifier. 13 14 Parameters: 15 16 hidden_layer_sizes : tuple, default=(100,) 17 The number of neurons in each hidden layer. 18 max_iter : int, default=100 19 The maximum number of iterations to train the model. 20 learning_rate : float, default=0.01 21 The learning rate for the optimizer. 22 l1_ratio : float, default=0.5 23 The ratio of L1 regularization. 24 alpha : float, default=1e-6 25 The regularization parameter. 26 activation_name : str, default="relu" 27 The activation function to use. 28 dropout : float, default=0.0 29 The dropout rate. 30 random_state : int, default=None 31 The random state for the random number generator. 32 weights : list, default=None 33 The weights to initialize the model with. 34 35 Attributes: 36 37 weights : list 38 The weights of the model. 39 params : list 40 The parameters of the model. 41 scaler_ : sklearn.preprocessing.StandardScaler 42 The scaler used to standardize the input features. 43 y_mean_ : float 44 The mean of the target variable. 45 46 Methods: 47 48 fit(X, y) 49 Fit the model to the data. 50 predict(X) 51 Predict the target variable. 52 predict_proba(X) 53 Predict the probability of the target variable. 54 get_weights() 55 Get the weights of the model. 56 set_weights(weights) 57 Set the weights of the model. 58 """ 59 _estimator_type = "classifier" 60 61 def __init__( 62 self, 63 hidden_layer_sizes=(100,), 64 max_iter=100, 65 learning_rate=0.01, 66 weights=None, 67 l1_ratio=0.5, 68 alpha=1e-6, 69 activation_name="relu", 70 dropout=0.0, 71 random_state=None, 72 ): 73 self.hidden_layer_sizes = hidden_layer_sizes 74 self.max_iter = max_iter 75 self.learning_rate = learning_rate 76 self.weights = weights 77 self.l1_ratio = l1_ratio 78 self.alpha = alpha 79 self.activation_name = activation_name 80 self.dropout = dropout 81 self.random_state = random_state 82 self.regr = None 83 84 def fit(self, X, y): 85 """Fit the model to the data. 86 87 Parameters: 88 89 X: {array-like}, shape = [n_samples, n_features] 90 Training vectors, where n_samples is the number of samples and 91 n_features is the number of features. 92 y: array-like, shape = [n_samples] 93 Target values. 94 """ 95 regressor = NeuralNetRegressor( 96 hidden_layer_sizes=self.hidden_layer_sizes, 97 max_iter=self.max_iter, 98 learning_rate=self.learning_rate, 99 weights=self.weights, 100 l1_ratio=self.l1_ratio, 101 alpha=self.alpha, 102 activation_name=self.activation_name, 103 dropout=self.dropout, 104 random_state=self.random_state, 105 ) 106 self.regr = SimpleMultitaskClassifier(regressor) 107 self.regr.fit(X, y) 108 self.classes_ = np.unique(y) 109 self.n_classes_ = len(self.classes_) 110 self.n_tasks_ = 1 111 self.n_features_in_ = X.shape[1] 112 self.n_outputs_ = 1 113 self.n_samples_fit_ = X.shape[0] 114 self.n_samples_test_ = X.shape[0] 115 self.n_features_out_ = 1 116 self.n_outputs_ = 1 117 self.n_features_in_ = X.shape[1] 118 self.n_features_out_ = 1 119 self.n_outputs_ = 1 120 return self 121 122 def predict_proba(self, X): 123 """Predict the probability of the target variable. 124 125 Parameters: 126 127 X: {array-like}, shape = [n_samples, n_features] 128 Training vectors, where n_samples is the number of samples and 129 n_features is the number of features. 130 """ 131 return self.regr.predict_proba(X) 132 133 def predict(self, X): 134 """Predict the target variable. 135 136 Parameters: 137 138 X: {array-like}, shape = [n_samples, n_features] 139 Training vectors, where n_samples is the number of samples and 140 n_features is the number of features. 141 """ 142 return self.regr.predict(X) 143 144 @property 145 def _estimator_type(self): 146 return "classifier"
(Pretrained) Neural Network Classifier.
Parameters:
hidden_layer_sizes : tuple, default=(100,)
The number of neurons in each hidden layer.
max_iter : int, default=100
The maximum number of iterations to train the model.
learning_rate : float, default=0.01
The learning rate for the optimizer.
l1_ratio : float, default=0.5
The ratio of L1 regularization.
alpha : float, default=1e-6
The regularization parameter.
activation_name : str, default="relu"
The activation function to use.
dropout : float, default=0.0
The dropout rate.
random_state : int, default=None
The random state for the random number generator.
weights : list, default=None
The weights to initialize the model with.
Attributes:
weights : list
The weights of the model.
params : list
The parameters of the model.
scaler_ : sklearn.preprocessing.StandardScaler
The scaler used to standardize the input features.
y_mean_ : float
The mean of the target variable.
Methods:
fit(X, y)
Fit the model to the data.
predict(X)
Predict the target variable.
predict_proba(X)
Predict the probability of the target variable.
get_weights()
Get the weights of the model.
set_weights(weights)
Set the weights of the model.
84 def fit(self, X, y): 85 """Fit the model to the data. 86 87 Parameters: 88 89 X: {array-like}, shape = [n_samples, n_features] 90 Training vectors, where n_samples is the number of samples and 91 n_features is the number of features. 92 y: array-like, shape = [n_samples] 93 Target values. 94 """ 95 regressor = NeuralNetRegressor( 96 hidden_layer_sizes=self.hidden_layer_sizes, 97 max_iter=self.max_iter, 98 learning_rate=self.learning_rate, 99 weights=self.weights, 100 l1_ratio=self.l1_ratio, 101 alpha=self.alpha, 102 activation_name=self.activation_name, 103 dropout=self.dropout, 104 random_state=self.random_state, 105 ) 106 self.regr = SimpleMultitaskClassifier(regressor) 107 self.regr.fit(X, y) 108 self.classes_ = np.unique(y) 109 self.n_classes_ = len(self.classes_) 110 self.n_tasks_ = 1 111 self.n_features_in_ = X.shape[1] 112 self.n_outputs_ = 1 113 self.n_samples_fit_ = X.shape[0] 114 self.n_samples_test_ = X.shape[0] 115 self.n_features_out_ = 1 116 self.n_outputs_ = 1 117 self.n_features_in_ = X.shape[1] 118 self.n_features_out_ = 1 119 self.n_outputs_ = 1 120 return self
Fit the model to the data.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number of samples and
n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
122 def predict_proba(self, X): 123 """Predict the probability of the target variable. 124 125 Parameters: 126 127 X: {array-like}, shape = [n_samples, n_features] 128 Training vectors, where n_samples is the number of samples and 129 n_features is the number of features. 130 """ 131 return self.regr.predict_proba(X)
Predict the probability of the target variable.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number of samples and
n_features is the number of features.
133 def predict(self, X): 134 """Predict the target variable. 135 136 Parameters: 137 138 X: {array-like}, shape = [n_samples, n_features] 139 Training vectors, where n_samples is the number of samples and 140 n_features is the number of features. 141 """ 142 return self.regr.predict(X)
Predict the target variable.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number of samples and
n_features is the number of features.
19class PredictionInterval(BaseEstimator, RegressorMixin): 20 """Class PredictionInterval: Obtain prediction intervals. 21 22 Attributes: 23 24 obj: an object; 25 fitted object containing methods `fit` and `predict` 26 27 method: a string; 28 method for constructing the prediction intervals. 29 Currently "splitconformal" (default) and "localconformal" 30 31 level: a float; 32 Confidence level for prediction intervals. Default is 95, 33 equivalent to a miscoverage error of 5 (%) 34 35 replications: an integer; 36 Number of replications for simulated conformal (default is `None`), 37 for type_pi = "bootstrap" or "kde" 38 39 type_pi: a string; 40 type of prediction interval: currently `None` 41 (split conformal without simulation), "kde" or "bootstrap" 42 43 type_split: a string; 44 "random" (random split of data) or "sequential" (sequential split of data) 45 46 seed: an integer; 47 Reproducibility of fit (there's a random split between fitting and calibration data) 48 """ 49 50 def __init__( 51 self, 52 obj, 53 method="splitconformal", 54 level=95, 55 type_pi=None, 56 type_split="random", 57 replications=None, 58 kernel=None, 59 agg="mean", 60 seed=123, 61 ): 62 63 self.obj = obj 64 self.method = method 65 self.level = level 66 self.type_pi = type_pi 67 self.type_split = type_split 68 self.replications = replications 69 self.kernel = kernel 70 self.agg = agg 71 self.seed = seed 72 self.alpha_ = 1 - self.level / 100 73 self.quantile_ = None 74 self.icp_ = None 75 self.calibrated_residuals_ = None 76 self.scaled_calibrated_residuals_ = None 77 self.calibrated_residuals_scaler_ = None 78 self.kde_ = None 79 self.aic_ = None 80 self.aicc_ = None 81 self.bic_ = None 82 self.sse_ = None 83 84 def fit(self, X, y, sample_weight=None, **kwargs): 85 """Fit the `method` to training data (X, y). 86 87 Args: 88 89 X: array-like, shape = [n_samples, n_features]; 90 Training set vectors, where n_samples is the number 91 of samples and n_features is the number of features. 92 93 y: array-like, shape = [n_samples, ]; Target values. 94 95 sample_weight: array-like, shape = [n_samples] 96 Sample weights. 97 98 """ 99 100 if self.type_split == "random": 101 102 X_train, X_calibration, y_train, y_calibration = train_test_split( 103 X, y, test_size=0.5, random_state=self.seed 104 ) 105 106 elif self.type_split == "sequential": 107 108 n_x = X.shape[0] 109 n_x_half = n_x // 2 110 first_half_idx = range(0, n_x_half) 111 second_half_idx = range(n_x_half, n_x) 112 X_train = X[first_half_idx, :] 113 X_calibration = X[second_half_idx, :] 114 y_train = y[first_half_idx] 115 y_calibration = y[second_half_idx] 116 117 if self.method == "splitconformal": 118 119 self.obj.fit(X_train, y_train) 120 preds_calibration = self.obj.predict(X_calibration) 121 self.calibrated_residuals_ = y_calibration - preds_calibration 122 absolute_residuals = np.abs(self.calibrated_residuals_) 123 self.calibrated_residuals_scaler_ = StandardScaler( 124 with_mean=True, with_std=True 125 ) 126 self.scaled_calibrated_residuals_ = ( 127 self.calibrated_residuals_scaler_.fit_transform( 128 self.calibrated_residuals_.reshape(-1, 1) 129 ).ravel() 130 ) 131 try: 132 # numpy version >= 1.22 133 self.quantile_ = np.quantile( 134 a=absolute_residuals, q=self.level / 100, method="higher" 135 ) 136 except Exception: 137 # numpy version < 1.22 138 self.quantile_ = np.quantile( 139 a=absolute_residuals, 140 q=self.level / 100, 141 interpolation="higher", 142 ) 143 144 if self.method == "localconformal": 145 146 mad_estimator = ExtraTreesRegressor() 147 normalizer = RegressorNormalizer(self.obj, mad_estimator, AbsErrorErrFunc()) 148 nc = RegressorNc(self.obj, AbsErrorErrFunc(), normalizer) 149 self.icp_ = IcpRegressor(nc) 150 self.icp_.fit(X_train, y_train) 151 self.icp_.calibrate(X_calibration, y_calibration) 152 153 # Calculate AIC 154 # Get predictions 155 preds = self.obj.predict(X_calibration) 156 157 # Calculate SSE 158 self.sse_ = np.sum((y_calibration - preds) ** 2) 159 160 # Get number of parameters from the base model 161 n_params = getattr(self.obj, 'n_hidden_features', 0) + X_calibration.shape[1] 162 163 # Calculate AIC 164 n_samples = len(y_calibration) 165 temp = n_samples * np.log(self.sse_/n_samples) 166 self.aic_ = temp + 2 * n_params 167 self.bic_ = temp + np.log(n_samples) * n_params 168 169 return self 170 171 def predict(self, X, return_pi=False): 172 """Obtain predictions and prediction intervals 173 174 Args: 175 176 X: array-like, shape = [n_samples, n_features]; 177 Testing set vectors, where n_samples is the number 178 of samples and n_features is the number of features. 179 180 return_pi: boolean 181 Whether the prediction interval is returned or not. 182 Default is False, for compatibility with other _estimators_. 183 If True, a tuple containing the predictions + lower and upper 184 bounds is returned. 185 186 """ 187 188 if self.method == "splitconformal": 189 pred = self.obj.predict(X) 190 191 if self.method == "localconformal": 192 pred = self.icp_.predict(X) 193 194 if self.method == "splitconformal": 195 196 if ( 197 self.replications is None and self.type_pi is None 198 ): # type_pi is not used here, no bootstrap or kde 199 200 if return_pi: 201 202 DescribeResult = namedtuple( 203 "DescribeResult", ("mean", "lower", "upper") 204 ) 205 return DescribeResult( 206 pred, pred - self.quantile_, pred + self.quantile_ 207 ) 208 209 else: 210 211 return pred 212 213 else: # self.method == "splitconformal" and if self.replications is not None, type_pi must be used 214 215 if self.type_pi is None: 216 self.type_pi = "kde" 217 raise Warning("type_pi must be set, setting to 'kde'") 218 219 if self.replications is None: 220 self.replications = 100 221 raise Warning("replications must be set, setting to 100") 222 223 assert self.type_pi in ( 224 "bootstrap", 225 "kde", 226 ), "`self.type_pi` must be in ('bootstrap', 'kde')" 227 228 if self.type_pi == "bootstrap": 229 np.random.seed(self.seed) 230 self.residuals_sims_ = np.asarray( 231 [ 232 np.random.choice( 233 a=self.scaled_calibrated_residuals_, 234 size=X.shape[0], 235 ) 236 for _ in range(self.replications) 237 ] 238 ).T 239 self.sims_ = np.asarray( 240 [ 241 pred 242 + self.calibrated_residuals_scaler_.scale_[0] 243 * self.residuals_sims_[:, i].ravel() 244 for i in range(self.replications) 245 ] 246 ).T 247 elif self.type_pi == "kde": 248 self.kde_ = gaussian_kde(dataset=self.scaled_calibrated_residuals_) 249 self.sims_ = np.asarray( 250 [ 251 pred 252 + self.calibrated_residuals_scaler_.scale_[0] 253 * self.kde_.resample( 254 size=X.shape[0], seed=self.seed + i 255 ).ravel() 256 for i in range(self.replications) 257 ] 258 ).T 259 260 self.mean_ = np.mean(self.sims_, axis=1) 261 self.lower_ = np.quantile(self.sims_, q=self.alpha_ / 200, axis=1) 262 self.upper_ = np.quantile(self.sims_, q=1 - self.alpha_ / 200, axis=1) 263 264 DescribeResult = namedtuple( 265 "DescribeResult", ("mean", "sims", "lower", "upper") 266 ) 267 268 return DescribeResult(self.mean_, self.sims_, self.lower_, self.upper_) 269 270 if self.method == "localconformal": 271 272 if self.replications is None: 273 274 if return_pi: 275 276 predictions_bounds = self.icp_.predict( 277 X, significance=1 - self.level 278 ) 279 DescribeResult = namedtuple( 280 "DescribeResult", ("mean", "lower", "upper") 281 ) 282 return DescribeResult( 283 pred, predictions_bounds[:, 0], predictions_bounds[:, 1] 284 ) 285 286 else: 287 288 return pred 289 290 else: # (self.method == "localconformal") and if self.replications is not None 291 292 raise NotImplementedError( 293 "When self.method == 'localconformal', there are no simulations" 294 )
Class PredictionInterval: Obtain prediction intervals.
Attributes:
obj: an object;
fitted object containing methods `fit` and `predict`
method: a string;
method for constructing the prediction intervals.
Currently "splitconformal" (default) and "localconformal"
level: a float;
Confidence level for prediction intervals. Default is 95,
equivalent to a miscoverage error of 5 (%)
replications: an integer;
Number of replications for simulated conformal (default is `None`),
for type_pi = "bootstrap" or "kde"
type_pi: a string;
type of prediction interval: currently `None`
(split conformal without simulation), "kde" or "bootstrap"
type_split: a string;
"random" (random split of data) or "sequential" (sequential split of data)
seed: an integer;
Reproducibility of fit (there's a random split between fitting and calibration data)
84 def fit(self, X, y, sample_weight=None, **kwargs): 85 """Fit the `method` to training data (X, y). 86 87 Args: 88 89 X: array-like, shape = [n_samples, n_features]; 90 Training set vectors, where n_samples is the number 91 of samples and n_features is the number of features. 92 93 y: array-like, shape = [n_samples, ]; Target values. 94 95 sample_weight: array-like, shape = [n_samples] 96 Sample weights. 97 98 """ 99 100 if self.type_split == "random": 101 102 X_train, X_calibration, y_train, y_calibration = train_test_split( 103 X, y, test_size=0.5, random_state=self.seed 104 ) 105 106 elif self.type_split == "sequential": 107 108 n_x = X.shape[0] 109 n_x_half = n_x // 2 110 first_half_idx = range(0, n_x_half) 111 second_half_idx = range(n_x_half, n_x) 112 X_train = X[first_half_idx, :] 113 X_calibration = X[second_half_idx, :] 114 y_train = y[first_half_idx] 115 y_calibration = y[second_half_idx] 116 117 if self.method == "splitconformal": 118 119 self.obj.fit(X_train, y_train) 120 preds_calibration = self.obj.predict(X_calibration) 121 self.calibrated_residuals_ = y_calibration - preds_calibration 122 absolute_residuals = np.abs(self.calibrated_residuals_) 123 self.calibrated_residuals_scaler_ = StandardScaler( 124 with_mean=True, with_std=True 125 ) 126 self.scaled_calibrated_residuals_ = ( 127 self.calibrated_residuals_scaler_.fit_transform( 128 self.calibrated_residuals_.reshape(-1, 1) 129 ).ravel() 130 ) 131 try: 132 # numpy version >= 1.22 133 self.quantile_ = np.quantile( 134 a=absolute_residuals, q=self.level / 100, method="higher" 135 ) 136 except Exception: 137 # numpy version < 1.22 138 self.quantile_ = np.quantile( 139 a=absolute_residuals, 140 q=self.level / 100, 141 interpolation="higher", 142 ) 143 144 if self.method == "localconformal": 145 146 mad_estimator = ExtraTreesRegressor() 147 normalizer = RegressorNormalizer(self.obj, mad_estimator, AbsErrorErrFunc()) 148 nc = RegressorNc(self.obj, AbsErrorErrFunc(), normalizer) 149 self.icp_ = IcpRegressor(nc) 150 self.icp_.fit(X_train, y_train) 151 self.icp_.calibrate(X_calibration, y_calibration) 152 153 # Calculate AIC 154 # Get predictions 155 preds = self.obj.predict(X_calibration) 156 157 # Calculate SSE 158 self.sse_ = np.sum((y_calibration - preds) ** 2) 159 160 # Get number of parameters from the base model 161 n_params = getattr(self.obj, 'n_hidden_features', 0) + X_calibration.shape[1] 162 163 # Calculate AIC 164 n_samples = len(y_calibration) 165 temp = n_samples * np.log(self.sse_/n_samples) 166 self.aic_ = temp + 2 * n_params 167 self.bic_ = temp + np.log(n_samples) * n_params 168 169 return self
Fit the method
to training data (X, y).
Args:
X: array-like, shape = [n_samples, n_features];
Training set vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples, ]; Target values.
sample_weight: array-like, shape = [n_samples]
Sample weights.
171 def predict(self, X, return_pi=False): 172 """Obtain predictions and prediction intervals 173 174 Args: 175 176 X: array-like, shape = [n_samples, n_features]; 177 Testing set vectors, where n_samples is the number 178 of samples and n_features is the number of features. 179 180 return_pi: boolean 181 Whether the prediction interval is returned or not. 182 Default is False, for compatibility with other _estimators_. 183 If True, a tuple containing the predictions + lower and upper 184 bounds is returned. 185 186 """ 187 188 if self.method == "splitconformal": 189 pred = self.obj.predict(X) 190 191 if self.method == "localconformal": 192 pred = self.icp_.predict(X) 193 194 if self.method == "splitconformal": 195 196 if ( 197 self.replications is None and self.type_pi is None 198 ): # type_pi is not used here, no bootstrap or kde 199 200 if return_pi: 201 202 DescribeResult = namedtuple( 203 "DescribeResult", ("mean", "lower", "upper") 204 ) 205 return DescribeResult( 206 pred, pred - self.quantile_, pred + self.quantile_ 207 ) 208 209 else: 210 211 return pred 212 213 else: # self.method == "splitconformal" and if self.replications is not None, type_pi must be used 214 215 if self.type_pi is None: 216 self.type_pi = "kde" 217 raise Warning("type_pi must be set, setting to 'kde'") 218 219 if self.replications is None: 220 self.replications = 100 221 raise Warning("replications must be set, setting to 100") 222 223 assert self.type_pi in ( 224 "bootstrap", 225 "kde", 226 ), "`self.type_pi` must be in ('bootstrap', 'kde')" 227 228 if self.type_pi == "bootstrap": 229 np.random.seed(self.seed) 230 self.residuals_sims_ = np.asarray( 231 [ 232 np.random.choice( 233 a=self.scaled_calibrated_residuals_, 234 size=X.shape[0], 235 ) 236 for _ in range(self.replications) 237 ] 238 ).T 239 self.sims_ = np.asarray( 240 [ 241 pred 242 + self.calibrated_residuals_scaler_.scale_[0] 243 * self.residuals_sims_[:, i].ravel() 244 for i in range(self.replications) 245 ] 246 ).T 247 elif self.type_pi == "kde": 248 self.kde_ = gaussian_kde(dataset=self.scaled_calibrated_residuals_) 249 self.sims_ = np.asarray( 250 [ 251 pred 252 + self.calibrated_residuals_scaler_.scale_[0] 253 * self.kde_.resample( 254 size=X.shape[0], seed=self.seed + i 255 ).ravel() 256 for i in range(self.replications) 257 ] 258 ).T 259 260 self.mean_ = np.mean(self.sims_, axis=1) 261 self.lower_ = np.quantile(self.sims_, q=self.alpha_ / 200, axis=1) 262 self.upper_ = np.quantile(self.sims_, q=1 - self.alpha_ / 200, axis=1) 263 264 DescribeResult = namedtuple( 265 "DescribeResult", ("mean", "sims", "lower", "upper") 266 ) 267 268 return DescribeResult(self.mean_, self.sims_, self.lower_, self.upper_) 269 270 if self.method == "localconformal": 271 272 if self.replications is None: 273 274 if return_pi: 275 276 predictions_bounds = self.icp_.predict( 277 X, significance=1 - self.level 278 ) 279 DescribeResult = namedtuple( 280 "DescribeResult", ("mean", "lower", "upper") 281 ) 282 return DescribeResult( 283 pred, predictions_bounds[:, 0], predictions_bounds[:, 1] 284 ) 285 286 else: 287 288 return pred 289 290 else: # (self.method == "localconformal") and if self.replications is not None 291 292 raise NotImplementedError( 293 "When self.method == 'localconformal', there are no simulations" 294 )
Obtain predictions and prediction intervals
Args:
X: array-like, shape = [n_samples, n_features];
Testing set vectors, where n_samples is the number
of samples and n_features is the number of features.
return_pi: boolean
Whether the prediction interval is returned or not.
Default is False, for compatibility with other _estimators_.
If True, a tuple containing the predictions + lower and upper
bounds is returned.
18class SimpleMultitaskClassifier(Base, ClassifierMixin): 19 """Multitask Classification model based on regression models, with shared covariates 20 21 Parameters: 22 23 obj: object 24 any object (must be a regression model) containing a method fit (obj.fit()) 25 and a method predict (obj.predict()) 26 27 seed: int 28 reproducibility seed 29 30 Attributes: 31 32 fit_objs_: dict 33 objects adjusted to each individual time series 34 35 n_classes_: int 36 number of classes for the classifier 37 38 Examples: 39 40 ```python 41 import nnetsauce as ns 42 import numpy as np 43 from sklearn.datasets import load_breast_cancer 44 from sklearn.linear_model import LinearRegression 45 from sklearn.model_selection import train_test_split 46 from sklearn import metrics 47 from time import time 48 49 breast_cancer = load_breast_cancer() 50 Z = breast_cancer.data 51 t = breast_cancer.target 52 53 X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2, 54 random_state=123+2*10) 55 56 # Linear Regression is used 57 regr = LinearRegression() 58 fit_obj = ns.SimpleMultitaskClassifier(regr) 59 60 start = time() 61 fit_obj.fit(X_train, y_train) 62 print(f"Elapsed {time() - start}") 63 64 print(fit_obj.score(X_test, y_test)) 65 print(fit_obj.score(X_test, y_test, scoring="roc_auc")) 66 67 start = time() 68 preds = fit_obj.predict(X_test) 69 print(f"Elapsed {time() - start}") 70 print(metrics.classification_report(preds, y_test)) 71 ``` 72 73 """ 74 75 # construct the object ----- 76 _estimator_type = "classifier" 77 78 def __init__( 79 self, 80 obj, 81 ): 82 self.type_fit = "classification" 83 self.obj = obj 84 self.fit_objs_ = {} 85 self.X_scaler_ = StandardScaler() 86 self.scaled_X_ = None 87 88 def fit(self, X, y, sample_weight=None, **kwargs): 89 """Fit SimpleMultitaskClassifier to training data (X, y). 90 91 Args: 92 93 X: {array-like}, shape = [n_samples, n_features] 94 Training vectors, where n_samples is the number 95 of samples and n_features is the number of features. 96 97 y: array-like, shape = [n_samples] 98 Target values. 99 100 **kwargs: additional parameters to be passed to 101 self.cook_training_set or self.obj.fit 102 103 Returns: 104 105 self: object 106 107 """ 108 109 assert mx.is_factor(y), "y must contain only integers" 110 111 self.classes_ = np.unique(y) # for compatibility with sklearn 112 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 113 114 self.scaled_X_ = self.X_scaler_.fit_transform(X) 115 116 # multitask response 117 Y = mo.one_hot_encode2(y, self.n_classes_) 118 119 try: 120 for i in range(self.n_classes_): 121 self.fit_objs_[i] = deepcopy( 122 self.obj.fit(self.scaled_X_, Y[:, i], sample_weight=sample_weight, **kwargs) 123 ) 124 except Exception as e: 125 for i in range(self.n_classes_): 126 self.fit_objs_[i] = deepcopy( 127 self.obj.fit(self.scaled_X_, Y[:, i], **kwargs) 128 ) 129 return self 130 131 def predict(self, X, **kwargs): 132 """Predict test data X. 133 134 Args: 135 136 X: {array-like}, shape = [n_samples, n_features] 137 Training vectors, where n_samples is the number 138 of samples and n_features is the number of features. 139 140 **kwargs: additional parameters 141 142 Returns: 143 144 model predictions: {array-like} 145 146 """ 147 return np.argmax(self.predict_proba(X, **kwargs), axis=1) 148 149 def predict_proba(self, X, **kwargs): 150 """Predict probabilities for test data X. 151 152 Args: 153 154 X: {array-like}, shape = [n_samples, n_features] 155 Training vectors, where n_samples is the number 156 of samples and n_features is the number of features. 157 158 **kwargs: additional parameters 159 160 Returns: 161 162 probability estimates for test data: {array-like} 163 164 """ 165 166 shape_X = X.shape 167 168 probs = np.zeros((shape_X[0], self.n_classes_)) 169 170 if len(shape_X) == 1: # one example 171 172 n_features = shape_X[0] 173 174 new_X = mo.rbind( 175 X.reshape(1, n_features), 176 np.ones(n_features).reshape(1, n_features), 177 ) 178 179 Z = self.X_scaler_.transform(new_X, **kwargs) 180 181 # Fallback to standard model 182 for i in range(self.n_classes_): 183 probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs)[0] 184 185 else: # multiple rows 186 187 Z = self.X_scaler_.transform(X, **kwargs) 188 189 # Fallback to standard model 190 for i in range(self.n_classes_): 191 probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs) 192 193 expit_raw_probs = expit(probs) 194 195 # Add small epsilon to avoid division by zero 196 row_sums = expit_raw_probs.sum(axis=1)[:, None] 197 row_sums[row_sums < 1e-10] = 1e-10 198 199 return expit_raw_probs / row_sums 200 201 def decision_function(self, X, **kwargs): 202 """Compute the decision function of X. 203 204 Parameters: 205 X: {array-like}, shape = [n_samples, n_features] 206 Samples to compute decision function for. 207 208 **kwargs: additional parameters to be passed to 209 self.cook_test_set 210 211 Returns: 212 array-like of shape (n_samples,) or (n_samples, n_classes) 213 Decision function of the input samples. The order of outputs is the same 214 as that of the classes passed to fit. 215 """ 216 if not hasattr(self.obj, "decision_function"): 217 # If base classifier doesn't have decision_function, use predict_proba 218 proba = self.predict_proba(X, **kwargs) 219 if proba.shape[1] == 2: 220 return proba[:, 1] # For binary classification 221 return proba # For multiclass 222 223 if len(X.shape) == 1: 224 n_features = X.shape[0] 225 new_X = mo.rbind( 226 X.reshape(1, n_features), 227 np.ones(n_features).reshape(1, n_features), 228 ) 229 230 return ( 231 self.obj.decision_function( 232 self.cook_test_set(new_X, **kwargs), **kwargs 233 ) 234 )[0] 235 236 return self.obj.decision_function(self.cook_test_set(X, **kwargs), **kwargs) 237 238 @property 239 def _estimator_type(self): 240 return "classifier"
Multitask Classification model based on regression models, with shared covariates
Parameters:
obj: object
any object (must be a regression model) containing a method fit (obj.fit())
and a method predict (obj.predict())
seed: int
reproducibility seed
Attributes:
fit_objs_: dict
objects adjusted to each individual time series
n_classes_: int
number of classes for the classifier
Examples:
import nnetsauce as ns
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics
from time import time
breast_cancer = load_breast_cancer()
Z = breast_cancer.data
t = breast_cancer.target
X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2,
random_state=123+2*10)
# Linear Regression is used
regr = LinearRegression()
fit_obj = ns.SimpleMultitaskClassifier(regr)
start = time()
fit_obj.fit(X_train, y_train)
print(f"Elapsed {time() - start}")
print(fit_obj.score(X_test, y_test))
print(fit_obj.score(X_test, y_test, scoring="roc_auc"))
start = time()
preds = fit_obj.predict(X_test)
print(f"Elapsed {time() - start}")
print(metrics.classification_report(preds, y_test))
88 def fit(self, X, y, sample_weight=None, **kwargs): 89 """Fit SimpleMultitaskClassifier to training data (X, y). 90 91 Args: 92 93 X: {array-like}, shape = [n_samples, n_features] 94 Training vectors, where n_samples is the number 95 of samples and n_features is the number of features. 96 97 y: array-like, shape = [n_samples] 98 Target values. 99 100 **kwargs: additional parameters to be passed to 101 self.cook_training_set or self.obj.fit 102 103 Returns: 104 105 self: object 106 107 """ 108 109 assert mx.is_factor(y), "y must contain only integers" 110 111 self.classes_ = np.unique(y) # for compatibility with sklearn 112 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 113 114 self.scaled_X_ = self.X_scaler_.fit_transform(X) 115 116 # multitask response 117 Y = mo.one_hot_encode2(y, self.n_classes_) 118 119 try: 120 for i in range(self.n_classes_): 121 self.fit_objs_[i] = deepcopy( 122 self.obj.fit(self.scaled_X_, Y[:, i], sample_weight=sample_weight, **kwargs) 123 ) 124 except Exception as e: 125 for i in range(self.n_classes_): 126 self.fit_objs_[i] = deepcopy( 127 self.obj.fit(self.scaled_X_, Y[:, i], **kwargs) 128 ) 129 return self
Fit SimpleMultitaskClassifier to training data (X, y).
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
131 def predict(self, X, **kwargs): 132 """Predict test data X. 133 134 Args: 135 136 X: {array-like}, shape = [n_samples, n_features] 137 Training vectors, where n_samples is the number 138 of samples and n_features is the number of features. 139 140 **kwargs: additional parameters 141 142 Returns: 143 144 model predictions: {array-like} 145 146 """ 147 return np.argmax(self.predict_proba(X, **kwargs), axis=1)
Predict test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters
Returns:
model predictions: {array-like}
149 def predict_proba(self, X, **kwargs): 150 """Predict probabilities for test data X. 151 152 Args: 153 154 X: {array-like}, shape = [n_samples, n_features] 155 Training vectors, where n_samples is the number 156 of samples and n_features is the number of features. 157 158 **kwargs: additional parameters 159 160 Returns: 161 162 probability estimates for test data: {array-like} 163 164 """ 165 166 shape_X = X.shape 167 168 probs = np.zeros((shape_X[0], self.n_classes_)) 169 170 if len(shape_X) == 1: # one example 171 172 n_features = shape_X[0] 173 174 new_X = mo.rbind( 175 X.reshape(1, n_features), 176 np.ones(n_features).reshape(1, n_features), 177 ) 178 179 Z = self.X_scaler_.transform(new_X, **kwargs) 180 181 # Fallback to standard model 182 for i in range(self.n_classes_): 183 probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs)[0] 184 185 else: # multiple rows 186 187 Z = self.X_scaler_.transform(X, **kwargs) 188 189 # Fallback to standard model 190 for i in range(self.n_classes_): 191 probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs) 192 193 expit_raw_probs = expit(probs) 194 195 # Add small epsilon to avoid division by zero 196 row_sums = expit_raw_probs.sum(axis=1)[:, None] 197 row_sums[row_sums < 1e-10] = 1e-10 198 199 return expit_raw_probs / row_sums
Predict probabilities for test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters
Returns:
probability estimates for test data: {array-like}
9class Optimizer: 10 """Optimizer class 11 12 Attributes: 13 14 type_optim: str 15 type of optimizer, (currently) either 'sgd' (stochastic minibatch gradient descent) 16 or 'scd' (stochastic minibatch coordinate descent) 17 18 num_iters: int 19 number of iterations of the optimizer 20 21 learning_rate: float 22 step size 23 24 batch_prop: float 25 proportion of the initial data used at each optimization step 26 27 learning_method: str 28 "poly" - learning rate decreasing as a polynomial function 29 of # of iterations (default) 30 "exp" - learning rate decreasing as an exponential function 31 of # of iterations 32 "momentum" - gradient descent using momentum 33 34 randomization: str 35 type of randomization applied at each step 36 "strat" - stratified subsampling (default) 37 "shuffle" - random subsampling 38 39 mass: float 40 mass on velocity, for `method` == "momentum" 41 42 decay: float 43 coefficient of decrease of the learning rate for 44 `method` == "poly" and `method` == "exp" 45 46 tolerance: float 47 early stopping parameter (convergence of loss function) 48 49 verbose: int 50 controls verbosity of gradient descent 51 0 - nothing is printed 52 1 - a progress bar is printed 53 2 - successive loss function values are printed 54 55 """ 56 57 # construct the object ----- 58 59 def __init__( 60 self, 61 type_optim="sgd", 62 num_iters=100, 63 learning_rate=0.01, 64 batch_prop=1.0, 65 learning_method="momentum", 66 randomization="strat", 67 mass=0.9, 68 decay=0.1, 69 tolerance=1e-3, 70 verbose=1, 71 ): 72 self.type_optim = type_optim 73 self.num_iters = num_iters 74 self.learning_rate = learning_rate 75 self.batch_prop = batch_prop 76 self.learning_method = learning_method 77 self.randomization = randomization 78 self.mass = mass 79 self.decay = decay 80 self.tolerance = tolerance 81 self.verbose = verbose 82 self.opt = None 83 84 def fit(self, loss_func, response, x0, **kwargs): 85 """Fit GLM model to training data (X, y). 86 87 Args: 88 89 loss_func: loss function 90 91 response: array-like, shape = [n_samples] 92 target variable (used for subsampling) 93 94 x0: array-like, shape = [n_features] 95 initial value provided to the optimizer 96 97 **kwargs: additional parameters to be passed to 98 loss function 99 100 Returns: 101 102 self: object 103 104 """ 105 106 if self.type_optim == "scd": 107 self.results = scd( 108 loss_func, 109 response=response, 110 x=x0, 111 num_iters=self.num_iters, 112 batch_prop=self.batch_prop, 113 learning_rate=self.learning_rate, 114 learning_method=self.learning_method, 115 mass=self.mass, 116 decay=self.decay, 117 randomization=self.randomization, 118 tolerance=self.tolerance, 119 verbose=self.verbose, 120 **kwargs 121 ) 122 123 if self.type_optim == "sgd": 124 self.results = sgd( 125 loss_func, 126 response=response, 127 x=x0, 128 num_iters=self.num_iters, 129 batch_prop=self.batch_prop, 130 learning_rate=self.learning_rate, 131 learning_method=self.learning_method, 132 mass=self.mass, 133 decay=self.decay, 134 randomization=self.randomization, 135 tolerance=self.tolerance, 136 verbose=self.verbose, 137 **kwargs 138 ) 139 140 return self 141 142 def one_hot_encode(self, y, n_classes): 143 return one_hot_encode(y, n_classes)
Optimizer class
Attributes:
type_optim: str
type of optimizer, (currently) either 'sgd' (stochastic minibatch gradient descent)
or 'scd' (stochastic minibatch coordinate descent)
num_iters: int
number of iterations of the optimizer
learning_rate: float
step size
batch_prop: float
proportion of the initial data used at each optimization step
learning_method: str
"poly" - learning rate decreasing as a polynomial function
of # of iterations (default)
"exp" - learning rate decreasing as an exponential function
of # of iterations
"momentum" - gradient descent using momentum
randomization: str
type of randomization applied at each step
"strat" - stratified subsampling (default)
"shuffle" - random subsampling
mass: float
mass on velocity, for `method` == "momentum"
decay: float
coefficient of decrease of the learning rate for
`method` == "poly" and `method` == "exp"
tolerance: float
early stopping parameter (convergence of loss function)
verbose: int
controls verbosity of gradient descent
0 - nothing is printed
1 - a progress bar is printed
2 - successive loss function values are printed
84 def fit(self, loss_func, response, x0, **kwargs): 85 """Fit GLM model to training data (X, y). 86 87 Args: 88 89 loss_func: loss function 90 91 response: array-like, shape = [n_samples] 92 target variable (used for subsampling) 93 94 x0: array-like, shape = [n_features] 95 initial value provided to the optimizer 96 97 **kwargs: additional parameters to be passed to 98 loss function 99 100 Returns: 101 102 self: object 103 104 """ 105 106 if self.type_optim == "scd": 107 self.results = scd( 108 loss_func, 109 response=response, 110 x=x0, 111 num_iters=self.num_iters, 112 batch_prop=self.batch_prop, 113 learning_rate=self.learning_rate, 114 learning_method=self.learning_method, 115 mass=self.mass, 116 decay=self.decay, 117 randomization=self.randomization, 118 tolerance=self.tolerance, 119 verbose=self.verbose, 120 **kwargs 121 ) 122 123 if self.type_optim == "sgd": 124 self.results = sgd( 125 loss_func, 126 response=response, 127 x=x0, 128 num_iters=self.num_iters, 129 batch_prop=self.batch_prop, 130 learning_rate=self.learning_rate, 131 learning_method=self.learning_method, 132 mass=self.mass, 133 decay=self.decay, 134 randomization=self.randomization, 135 tolerance=self.tolerance, 136 verbose=self.verbose, 137 **kwargs 138 ) 139 140 return self
Fit GLM model to training data (X, y).
Args:
loss_func: loss function
response: array-like, shape = [n_samples]
target variable (used for subsampling)
x0: array-like, shape = [n_features]
initial value provided to the optimizer
**kwargs: additional parameters to be passed to
loss function
Returns:
self: object
37class QuantileRegressor(BaseEstimator, RegressorMixin): 38 """ 39 Quantile Regressor. 40 41 Parameters: 42 43 obj: base model (regression model) 44 The base regressor from which to build a 45 quantile regressor. 46 47 level: int, default=95 48 The level of the quantiles to compute. 49 50 scoring: str, default="predictions" 51 The scoring to use for the optimization and constructing 52 prediction intervals (predictions, residuals, conformal, 53 studentized, conformal-studentized). 54 55 Attributes: 56 57 obj_ : base model (regression model) 58 The base regressor from which to build a 59 quantile regressor. 60 61 offset_multipliers_ : list 62 The multipliers for the offset. 63 64 scoring_residuals_ : list 65 The residuals for the scoring. 66 67 student_multiplier_ : float 68 The multiplier for the student. 69 70 """ 71 72 def __init__(self, obj, level=95, scoring="predictions"): 73 assert scoring in ( 74 "predictions", 75 "residuals", 76 "conformal", 77 "studentized", 78 "conformal-studentized", 79 ), "scoring must be 'predictions' or 'residuals'" 80 self.obj = obj 81 low_risk_level = (1 - level / 100) / 2 82 self.quantiles = [low_risk_level, 0.5, 1 - low_risk_level] 83 self.scoring = scoring 84 self.offset_multipliers_ = None 85 self.obj_ = None 86 self.scoring_residuals_ = None 87 self.student_multiplier_ = None 88 89 def _compute_quantile_loss(self, residuals, quantile): 90 """ 91 Compute the quantile loss for a given set of residuals and quantile. 92 """ 93 if not 0 < quantile < 1: 94 raise ValueError("Quantile should be between 0 and 1.") 95 loss = quantile * (residuals >= 0) + (quantile - 1) * (residuals < 0) 96 return np.mean(residuals * loss) 97 98 def _optimize_multiplier( 99 self, 100 y, 101 base_predictions, 102 prev_predictions, 103 scoring_residuals = None, 104 quantile = 0.5, 105 ): 106 """ 107 Optimize the multiplier for a given quantile. 108 """ 109 if not 0 < quantile < 1: 110 raise ValueError("Quantile should be between 0 and 1.") 111 112 n = len(y) 113 114 def objective(log_multiplier): 115 """ 116 Objective function for optimization. 117 """ 118 # Convert to positive multiplier using exp 119 multiplier = np.exp(log_multiplier[0]) 120 if self.scoring == "predictions": 121 assert base_predictions is not None, "base_predictions must be not None" 122 # Calculate predictions 123 if prev_predictions is None: 124 # For first quantile, subtract from conditional expectation 125 predictions = base_predictions - multiplier * np.abs( 126 base_predictions 127 ) 128 else: 129 # For other quantiles, add to previous quantile 130 offset = multiplier * np.abs(base_predictions) 131 predictions = prev_predictions + offset 132 elif self.scoring in ("residuals", "conformal"): 133 assert ( 134 scoring_residuals is not None 135 ), "scoring_residuals must be not None" 136 # print("scoring_residuals", scoring_residuals) 137 # Calculate predictions 138 if prev_predictions is None: 139 # For first quantile, subtract from conditional expectation 140 predictions = base_predictions - multiplier * np.std( 141 scoring_residuals 142 )/np.sqrt(len(scoring_residuals)) 143 # print("predictions", predictions) 144 else: 145 # For other quantiles, add to previous quantile 146 offset = multiplier * np.std(scoring_residuals)/np.sqrt(len(scoring_residuals)) 147 predictions = prev_predictions + offset 148 elif self.scoring in ("studentized", "conformal-studentized"): 149 assert ( 150 scoring_residuals is not None 151 ), "scoring_residuals must be not None" 152 # Calculate predictions 153 if prev_predictions is None: 154 # For first quantile, subtract from conditional expectation 155 predictions = ( 156 base_predictions - multiplier * self.student_multiplier_ 157 ) 158 # print("predictions", predictions) 159 else: 160 # For other quantiles, add to previous quantile 161 offset = multiplier * self.student_multiplier_ 162 predictions = prev_predictions + offset 163 else: 164 raise ValueError("Invalid argument 'scoring'") 165 166 residuals = y - predictions 167 return self._compute_quantile_loss(residuals, quantile) 168 169 # Optimize in log space for numerical stability 170 # bounds = [(-10, 10)] # log space bounds 171 bounds = [(-100, 100)] # log space bounds 172 result = differential_evolution( 173 objective, 174 bounds, 175 # popsize=15, 176 # maxiter=100, 177 # tol=1e-4, 178 popsize=25, 179 maxiter=200, 180 tol=1e-6, 181 disp=False, 182 ) 183 184 return np.exp(result.x[0]) 185 186 187 def fit(self, X, y): 188 """Fit the model to the data. 189 190 Parameters: 191 192 X: {array-like}, shape = [n_samples, n_features] 193 Training vectors, where n_samples is the number of samples and 194 n_features is the number of features. 195 y: array-like, shape = [n_samples] 196 Target values. 197 """ 198 self.obj_ = clone(self.obj) 199 200 if self.scoring in ("predictions", "residuals"): 201 202 self.obj_.fit(X, y) 203 base_predictions = self.obj_.predict(X) 204 scoring_residuals = y - base_predictions 205 self.scoring_residuals_ = scoring_residuals 206 207 elif self.scoring == "conformal": 208 209 X_train, X_calib, y_train, y_calib = train_test_split( 210 X, y, test_size=0.5, random_state=42 211 ) 212 self.obj_.fit(X_train, y_train) 213 scoring_residuals = y_calib - self.obj_.predict( 214 X_calib 215 ) # These are calibration predictions 216 self.scoring_residuals_ = scoring_residuals 217 # Update base_predictions to use training predictions for optimization 218 self.obj_.fit(X_calib, y_calib) 219 base_predictions = self.obj_.predict(X_calib) 220 221 elif self.scoring in ("studentized", "conformal-studentized"): 222 223 # Calculate student multiplier 224 if self.scoring == "conformal-studentized": 225 X_train, X_calib, y_train, y_calib = train_test_split( 226 X, y, test_size=0.5, random_state=42 227 ) 228 self.obj_.fit(X_train, y_train) 229 scoring_residuals = y_calib - self.obj_.predict(X_calib) 230 # Calculate studentized multiplier using calibration data 231 self.student_multiplier_ = np.std(y_calib, ddof=1) / np.sqrt( 232 len(y_calib) - 1 233 ) 234 self.obj_.fit(X_calib, y_calib) 235 base_predictions = self.obj_.predict(X_calib) 236 else: # regular studentized 237 self.obj_.fit(X, y) 238 base_predictions = self.obj_.predict(X) 239 scoring_residuals = y - base_predictions 240 self.student_multiplier_ = np.std(y, ddof=1) / np.sqrt(len(y) - 1) 241 242 # Initialize storage for multipliers 243 self.offset_multipliers_ = [] 244 # Keep track of current predictions for each quantile 245 current_predictions = None 246 247 # Fit each quantile sequentially 248 for i, quantile in enumerate(self.quantiles): 249 250 if self.scoring == "predictions": 251 252 multiplier = self._optimize_multiplier( 253 y=y, 254 base_predictions=base_predictions, 255 prev_predictions=current_predictions, 256 quantile=quantile, 257 ) 258 259 self.offset_multipliers_.append(multiplier) 260 261 # Update current predictions 262 if current_predictions is None: 263 # First quantile (lowest) 264 current_predictions = base_predictions - multiplier * np.abs( 265 base_predictions 266 ) 267 else: 268 # Subsequent quantiles 269 offset = multiplier * np.abs(base_predictions) 270 current_predictions = current_predictions + offset 271 272 elif self.scoring == "residuals": 273 274 multiplier = self._optimize_multiplier( 275 y=y, 276 base_predictions=base_predictions, 277 scoring_residuals=scoring_residuals, 278 prev_predictions=current_predictions, 279 quantile=quantile, 280 ) 281 282 self.offset_multipliers_.append(multiplier) 283 284 # Update current predictions 285 if current_predictions is None: 286 # First quantile (lowest) 287 current_predictions = base_predictions - multiplier * np.std( 288 scoring_residuals 289 )/np.sqrt(len(scoring_residuals)) 290 else: 291 # Subsequent quantiles 292 offset = multiplier * np.std(scoring_residuals)/np.sqrt(len(scoring_residuals)) 293 current_predictions = current_predictions + offset 294 295 elif self.scoring == "conformal": 296 297 multiplier = self._optimize_multiplier( 298 y=y_calib, 299 base_predictions=base_predictions, 300 scoring_residuals=scoring_residuals, 301 prev_predictions=current_predictions, 302 quantile=quantile, 303 ) 304 305 self.offset_multipliers_.append(multiplier) 306 307 # Update current predictions 308 if current_predictions is None: 309 # First quantile (lowest) 310 current_predictions = base_predictions - multiplier * np.std( 311 scoring_residuals 312 )/np.sqrt(len(scoring_residuals)) 313 else: 314 # Subsequent quantiles 315 offset = multiplier * np.std(scoring_residuals)/np.sqrt(len(scoring_residuals)) 316 current_predictions = current_predictions + offset 317 318 elif self.scoring in ("studentized", "conformal-studentized"): 319 320 multiplier = self._optimize_multiplier( 321 y=y_calib if self.scoring == "conformal-studentized" else y, 322 base_predictions=base_predictions, 323 scoring_residuals=scoring_residuals, 324 prev_predictions=current_predictions, 325 quantile=quantile, 326 ) 327 328 self.offset_multipliers_.append(multiplier) 329 330 # Update current predictions 331 if current_predictions is None: 332 current_predictions = ( 333 base_predictions - multiplier * self.student_multiplier_ 334 ) 335 else: 336 offset = multiplier * self.student_multiplier_ 337 current_predictions = current_predictions + offset 338 339 return self 340 341 342 def predict(self, X, return_pi=False): 343 """Predict the target variable. 344 345 Parameters: 346 347 X: {array-like}, shape = [n_samples, n_features] 348 Training vectors, where n_samples is the number of samples and 349 n_features is the number of features. 350 351 return_pi: bool, default=True 352 Whether to return the prediction intervals. 353 """ 354 if self.obj_ is None or self.offset_multipliers_ is None: 355 raise ValueError("Model not fitted yet.") 356 357 base_predictions = self.obj_.predict(X) 358 all_predictions = [] 359 360 if self.scoring == "predictions": 361 362 # Generate first quantile 363 current_predictions = base_predictions - self.offset_multipliers_[ 364 0 365 ] * np.abs(base_predictions) 366 all_predictions.append(current_predictions) 367 368 # Generate remaining quantiles 369 for multiplier in self.offset_multipliers_[1:]: 370 offset = multiplier * np.abs(base_predictions) 371 current_predictions = current_predictions + offset 372 all_predictions.append(current_predictions) 373 374 elif self.scoring in ("residuals", "conformal"): 375 376 # Generate first quantile 377 current_predictions = base_predictions - self.offset_multipliers_[ 378 0 379 ] * np.std(self.scoring_residuals_)/np.sqrt(len(self.scoring_residuals_)) 380 all_predictions.append(current_predictions) 381 382 # Generate remaining quantiles 383 for multiplier in self.offset_multipliers_[1:]: 384 offset = multiplier * np.std(self.scoring_residuals_)/np.sqrt(len(self.scoring_residuals_)) 385 current_predictions = current_predictions + offset 386 all_predictions.append(current_predictions) 387 388 elif self.scoring in ("studentized", "conformal-studentized"): 389 # Generate first quantile 390 current_predictions = ( 391 base_predictions 392 - self.offset_multipliers_[0] * self.student_multiplier_ 393 ) 394 all_predictions.append(current_predictions) 395 396 # Generate remaining quantiles 397 for multiplier in self.offset_multipliers_[1:]: 398 offset = multiplier * self.student_multiplier_ 399 current_predictions = current_predictions + offset 400 all_predictions.append(current_predictions) 401 402 if return_pi == False: 403 return np.asarray(all_predictions[1]) 404 405 DescribeResult = namedtuple( 406 "DecribeResult", ["mean", "lower", "upper", "median"] 407 ) 408 DescribeResult.mean = base_predictions 409 DescribeResult.lower = np.asarray(all_predictions[0]) 410 DescribeResult.median = np.asarray(all_predictions[1]) 411 DescribeResult.upper = np.asarray(all_predictions[2]) 412 413 return DescribeResult
Quantile Regressor.
Parameters:
obj: base model (regression model)
The base regressor from which to build a
quantile regressor.
level: int, default=95
The level of the quantiles to compute.
scoring: str, default="predictions"
The scoring to use for the optimization and constructing
prediction intervals (predictions, residuals, conformal,
studentized, conformal-studentized).
Attributes:
obj_ : base model (regression model)
The base regressor from which to build a
quantile regressor.
offset_multipliers_ : list
The multipliers for the offset.
scoring_residuals_ : list
The residuals for the scoring.
student_multiplier_ : float
The multiplier for the student.
187 def fit(self, X, y): 188 """Fit the model to the data. 189 190 Parameters: 191 192 X: {array-like}, shape = [n_samples, n_features] 193 Training vectors, where n_samples is the number of samples and 194 n_features is the number of features. 195 y: array-like, shape = [n_samples] 196 Target values. 197 """ 198 self.obj_ = clone(self.obj) 199 200 if self.scoring in ("predictions", "residuals"): 201 202 self.obj_.fit(X, y) 203 base_predictions = self.obj_.predict(X) 204 scoring_residuals = y - base_predictions 205 self.scoring_residuals_ = scoring_residuals 206 207 elif self.scoring == "conformal": 208 209 X_train, X_calib, y_train, y_calib = train_test_split( 210 X, y, test_size=0.5, random_state=42 211 ) 212 self.obj_.fit(X_train, y_train) 213 scoring_residuals = y_calib - self.obj_.predict( 214 X_calib 215 ) # These are calibration predictions 216 self.scoring_residuals_ = scoring_residuals 217 # Update base_predictions to use training predictions for optimization 218 self.obj_.fit(X_calib, y_calib) 219 base_predictions = self.obj_.predict(X_calib) 220 221 elif self.scoring in ("studentized", "conformal-studentized"): 222 223 # Calculate student multiplier 224 if self.scoring == "conformal-studentized": 225 X_train, X_calib, y_train, y_calib = train_test_split( 226 X, y, test_size=0.5, random_state=42 227 ) 228 self.obj_.fit(X_train, y_train) 229 scoring_residuals = y_calib - self.obj_.predict(X_calib) 230 # Calculate studentized multiplier using calibration data 231 self.student_multiplier_ = np.std(y_calib, ddof=1) / np.sqrt( 232 len(y_calib) - 1 233 ) 234 self.obj_.fit(X_calib, y_calib) 235 base_predictions = self.obj_.predict(X_calib) 236 else: # regular studentized 237 self.obj_.fit(X, y) 238 base_predictions = self.obj_.predict(X) 239 scoring_residuals = y - base_predictions 240 self.student_multiplier_ = np.std(y, ddof=1) / np.sqrt(len(y) - 1) 241 242 # Initialize storage for multipliers 243 self.offset_multipliers_ = [] 244 # Keep track of current predictions for each quantile 245 current_predictions = None 246 247 # Fit each quantile sequentially 248 for i, quantile in enumerate(self.quantiles): 249 250 if self.scoring == "predictions": 251 252 multiplier = self._optimize_multiplier( 253 y=y, 254 base_predictions=base_predictions, 255 prev_predictions=current_predictions, 256 quantile=quantile, 257 ) 258 259 self.offset_multipliers_.append(multiplier) 260 261 # Update current predictions 262 if current_predictions is None: 263 # First quantile (lowest) 264 current_predictions = base_predictions - multiplier * np.abs( 265 base_predictions 266 ) 267 else: 268 # Subsequent quantiles 269 offset = multiplier * np.abs(base_predictions) 270 current_predictions = current_predictions + offset 271 272 elif self.scoring == "residuals": 273 274 multiplier = self._optimize_multiplier( 275 y=y, 276 base_predictions=base_predictions, 277 scoring_residuals=scoring_residuals, 278 prev_predictions=current_predictions, 279 quantile=quantile, 280 ) 281 282 self.offset_multipliers_.append(multiplier) 283 284 # Update current predictions 285 if current_predictions is None: 286 # First quantile (lowest) 287 current_predictions = base_predictions - multiplier * np.std( 288 scoring_residuals 289 )/np.sqrt(len(scoring_residuals)) 290 else: 291 # Subsequent quantiles 292 offset = multiplier * np.std(scoring_residuals)/np.sqrt(len(scoring_residuals)) 293 current_predictions = current_predictions + offset 294 295 elif self.scoring == "conformal": 296 297 multiplier = self._optimize_multiplier( 298 y=y_calib, 299 base_predictions=base_predictions, 300 scoring_residuals=scoring_residuals, 301 prev_predictions=current_predictions, 302 quantile=quantile, 303 ) 304 305 self.offset_multipliers_.append(multiplier) 306 307 # Update current predictions 308 if current_predictions is None: 309 # First quantile (lowest) 310 current_predictions = base_predictions - multiplier * np.std( 311 scoring_residuals 312 )/np.sqrt(len(scoring_residuals)) 313 else: 314 # Subsequent quantiles 315 offset = multiplier * np.std(scoring_residuals)/np.sqrt(len(scoring_residuals)) 316 current_predictions = current_predictions + offset 317 318 elif self.scoring in ("studentized", "conformal-studentized"): 319 320 multiplier = self._optimize_multiplier( 321 y=y_calib if self.scoring == "conformal-studentized" else y, 322 base_predictions=base_predictions, 323 scoring_residuals=scoring_residuals, 324 prev_predictions=current_predictions, 325 quantile=quantile, 326 ) 327 328 self.offset_multipliers_.append(multiplier) 329 330 # Update current predictions 331 if current_predictions is None: 332 current_predictions = ( 333 base_predictions - multiplier * self.student_multiplier_ 334 ) 335 else: 336 offset = multiplier * self.student_multiplier_ 337 current_predictions = current_predictions + offset 338 339 return self
Fit the model to the data.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number of samples and
n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
342 def predict(self, X, return_pi=False): 343 """Predict the target variable. 344 345 Parameters: 346 347 X: {array-like}, shape = [n_samples, n_features] 348 Training vectors, where n_samples is the number of samples and 349 n_features is the number of features. 350 351 return_pi: bool, default=True 352 Whether to return the prediction intervals. 353 """ 354 if self.obj_ is None or self.offset_multipliers_ is None: 355 raise ValueError("Model not fitted yet.") 356 357 base_predictions = self.obj_.predict(X) 358 all_predictions = [] 359 360 if self.scoring == "predictions": 361 362 # Generate first quantile 363 current_predictions = base_predictions - self.offset_multipliers_[ 364 0 365 ] * np.abs(base_predictions) 366 all_predictions.append(current_predictions) 367 368 # Generate remaining quantiles 369 for multiplier in self.offset_multipliers_[1:]: 370 offset = multiplier * np.abs(base_predictions) 371 current_predictions = current_predictions + offset 372 all_predictions.append(current_predictions) 373 374 elif self.scoring in ("residuals", "conformal"): 375 376 # Generate first quantile 377 current_predictions = base_predictions - self.offset_multipliers_[ 378 0 379 ] * np.std(self.scoring_residuals_)/np.sqrt(len(self.scoring_residuals_)) 380 all_predictions.append(current_predictions) 381 382 # Generate remaining quantiles 383 for multiplier in self.offset_multipliers_[1:]: 384 offset = multiplier * np.std(self.scoring_residuals_)/np.sqrt(len(self.scoring_residuals_)) 385 current_predictions = current_predictions + offset 386 all_predictions.append(current_predictions) 387 388 elif self.scoring in ("studentized", "conformal-studentized"): 389 # Generate first quantile 390 current_predictions = ( 391 base_predictions 392 - self.offset_multipliers_[0] * self.student_multiplier_ 393 ) 394 all_predictions.append(current_predictions) 395 396 # Generate remaining quantiles 397 for multiplier in self.offset_multipliers_[1:]: 398 offset = multiplier * self.student_multiplier_ 399 current_predictions = current_predictions + offset 400 all_predictions.append(current_predictions) 401 402 if return_pi == False: 403 return np.asarray(all_predictions[1]) 404 405 DescribeResult = namedtuple( 406 "DecribeResult", ["mean", "lower", "upper", "median"] 407 ) 408 DescribeResult.mean = base_predictions 409 DescribeResult.lower = np.asarray(all_predictions[0]) 410 DescribeResult.median = np.asarray(all_predictions[1]) 411 DescribeResult.upper = np.asarray(all_predictions[2]) 412 413 return DescribeResult
Predict the target variable.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number of samples and
n_features is the number of features.
return_pi: bool, default=True
Whether to return the prediction intervals.
41class QuantileClassifier(BaseEstimator, ClassifierMixin): 42 """ 43 Quantile Classifier. 44 45 Parameters: 46 47 obj: base model (classification model) 48 The base classifier from which to build a 49 quantile classifier. 50 51 level: int, default=95 52 The level of the quantiles to compute. 53 54 scoring: str, default="predictions" 55 The scoring to use for the optimization and constructing 56 prediction intervals (predictions, residuals, conformal, 57 studentized, conformal-studentized). 58 59 Attributes: 60 61 obj_ : base model (classification model) 62 The base classifier from which to build a 63 quantile classifier. 64 65 offset_multipliers_ : list 66 The multipliers for the offset. 67 68 scoring_residuals_ : list 69 The residuals for the scoring. 70 71 student_multiplier_ : float 72 The multiplier for the student. 73 74 75 """ 76 77 def __init__(self, obj, level=95, scoring="predictions"): 78 assert scoring in ( 79 "predictions", 80 "residuals", 81 "conformal", 82 "studentized", 83 "conformal-studentized", 84 ), "scoring must be 'predictions' or 'residuals'" 85 self.obj = obj 86 quantileregressor = QuantileRegressor(self.obj) 87 quantileregressor.predict = partial(quantileregressor.predict, return_pi=False) 88 self.obj_ = SimpleMultitaskClassifier(quantileregressor) 89 90 def fit(self, X, y, **kwargs): 91 self.obj_.fit(X, y, **kwargs) 92 93 def predict(self, X, **kwargs): 94 return self.obj_.predict(X, **kwargs) 95 96 def predict_proba(self, X, **kwargs): 97 return self.obj_.predict_proba(X, **kwargs)
Quantile Classifier.
Parameters:
obj: base model (classification model)
The base classifier from which to build a
quantile classifier.
level: int, default=95
The level of the quantiles to compute.
scoring: str, default="predictions"
The scoring to use for the optimization and constructing
prediction intervals (predictions, residuals, conformal,
studentized, conformal-studentized).
Attributes:
obj_ : base model (classification model)
The base classifier from which to build a
quantile classifier.
offset_multipliers_ : list
The multipliers for the offset.
scoring_residuals_ : list
The residuals for the scoring.
student_multiplier_ : float
The multiplier for the student.
18class RandomBagRegressor(RandomBag, RegressorMixin): 19 """Randomized 'Bagging' Regression model 20 21 Parameters: 22 23 obj: object 24 any object containing a method fit (obj.fit()) and a method predict 25 (obj.predict()) 26 27 n_estimators: int 28 number of boosting iterations 29 30 n_hidden_features: int 31 number of nodes in the hidden layer 32 33 activation_name: str 34 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 35 36 a: float 37 hyperparameter for 'prelu' or 'elu' activation function 38 39 nodes_sim: str 40 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 41 'uniform' 42 43 bias: boolean 44 indicates if the hidden layer contains a bias term (True) or not 45 (False) 46 47 dropout: float 48 regularization parameter; (random) percentage of nodes dropped out 49 of the training 50 51 direct_link: boolean 52 indicates if the original predictors are included (True) in model''s 53 fitting or not (False) 54 55 n_clusters: int 56 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: 57 no clustering) 58 59 cluster_encode: bool 60 defines how the variable containing clusters is treated (default is one-hot) 61 if `False`, then labels are used, without one-hot encoding 62 63 type_clust: str 64 type of clustering method: currently k-means ('kmeans') or Gaussian 65 Mixture Model ('gmm') 66 67 type_scaling: a tuple of 3 strings 68 scaling methods for inputs, hidden layer, and clustering respectively 69 (and when relevant). 70 Currently available: standardization ('std') or MinMax scaling ('minmax') 71 72 col_sample: float 73 percentage of covariates randomly chosen for training 74 75 row_sample: float 76 percentage of rows chosen for training, by stratified bootstrapping 77 78 seed: int 79 reproducibility seed for nodes_sim=='uniform' 80 81 backend: str 82 "cpu" or "gpu" or "tpu" 83 84 Attributes: 85 86 voter_: dict 87 dictionary containing all the fitted base-learners 88 89 90 Examples: 91 92 ```python 93 import numpy as np 94 import nnetsauce as ns 95 from sklearn.datasets import fetch_california_housing 96 from sklearn.tree import DecisionTreeRegressor 97 from sklearn.model_selection import train_test_split 98 99 X, y = fetch_california_housing(return_X_y=True, as_frame=False) 100 101 # split data into training test and test set 102 X_train, X_test, y_train, y_test = train_test_split(X, y, 103 test_size=0.2, random_state=13) 104 105 # Requires further tuning 106 obj = DecisionTreeRegressor(max_depth=3, random_state=123) 107 obj2 = ns.RandomBagRegressor(obj=obj, direct_link=False, 108 n_estimators=50, 109 col_sample=0.9, row_sample=0.9, 110 dropout=0, n_clusters=0, verbose=1) 111 112 obj2.fit(X_train, y_train) 113 114 print(np.sqrt(obj2.score(X_test, y_test))) # RMSE 115 116 ``` 117 118 """ 119 120 # construct the object ----- 121 122 def __init__( 123 self, 124 obj, 125 n_estimators=10, 126 n_hidden_features=1, 127 activation_name="relu", 128 a=0.01, 129 nodes_sim="sobol", 130 bias=True, 131 dropout=0, 132 direct_link=False, 133 n_clusters=2, 134 cluster_encode=True, 135 type_clust="kmeans", 136 type_scaling=("std", "std", "std"), 137 col_sample=1, 138 row_sample=1, 139 n_jobs=None, 140 seed=123, 141 verbose=1, 142 backend="cpu", 143 ): 144 super().__init__( 145 obj=obj, 146 n_estimators=n_estimators, 147 n_hidden_features=n_hidden_features, 148 activation_name=activation_name, 149 a=a, 150 nodes_sim=nodes_sim, 151 bias=bias, 152 dropout=dropout, 153 direct_link=direct_link, 154 n_clusters=n_clusters, 155 cluster_encode=cluster_encode, 156 type_clust=type_clust, 157 type_scaling=type_scaling, 158 col_sample=col_sample, 159 row_sample=row_sample, 160 seed=seed, 161 backend=backend, 162 ) 163 164 self.type_fit = "regression" 165 self.verbose = verbose 166 self.n_jobs = n_jobs 167 self.voter_ = {} 168 169 def fit(self, X, y, **kwargs): 170 """Fit Random 'Bagging' model to training data (X, y). 171 172 Args: 173 174 X: {array-like}, shape = [n_samples, n_features] 175 Training vectors, where n_samples is the number 176 of samples and n_features is the number of features. 177 178 y: array-like, shape = [n_samples] 179 Target values. 180 181 **kwargs: additional parameters to be passed to 182 self.cook_training_set or self.obj.fit 183 184 Returns: 185 186 self: object 187 188 """ 189 190 base_learner = CustomRegressor( 191 self.obj, 192 n_hidden_features=self.n_hidden_features, 193 activation_name=self.activation_name, 194 a=self.a, 195 nodes_sim=self.nodes_sim, 196 bias=self.bias, 197 dropout=self.dropout, 198 direct_link=self.direct_link, 199 n_clusters=self.n_clusters, 200 type_clust=self.type_clust, 201 type_scaling=self.type_scaling, 202 col_sample=self.col_sample, 203 row_sample=self.row_sample, 204 seed=self.seed, 205 ) 206 207 # 1 - Sequential training ----- 208 209 if self.n_jobs is None: 210 self.voter_ = rbagloop_regression( 211 base_learner, X, y, self.n_estimators, self.verbose, self.seed 212 ) 213 214 self.n_estimators = len(self.voter_) 215 216 return self 217 218 # 2 - Parallel training ----- 219 # buggy 220 # if self.n_jobs is not None: 221 def fit_estimators(m): 222 base_learner__ = deepcopy(base_learner) 223 base_learner__.set_params(seed=self.seed + m * 1000) 224 base_learner__.fit(X, y, **kwargs) 225 return base_learner__ 226 227 if self.verbose == 1: 228 voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")( 229 delayed(fit_estimators)(m) for m in tqdm(range(self.n_estimators)) 230 ) 231 else: 232 voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")( 233 delayed(fit_estimators)(m) for m in range(self.n_estimators) 234 ) 235 236 self.voter_ = {i: elt for i, elt in enumerate(voters_list)} 237 238 self.n_estimators = len(self.voter_) 239 240 return self 241 242 def predict(self, X, weights=None, **kwargs): 243 """Predict for test data X. 244 245 Args: 246 247 X: {array-like}, shape = [n_samples, n_features] 248 Training vectors, where n_samples is the number 249 of samples and n_features is the number of features. 250 251 **kwargs: additional parameters to be passed to 252 self.cook_test_set 253 254 Returns: 255 256 estimates for test data: {array-like} 257 258 """ 259 260 def calculate_preds(voter, weights=None): 261 ensemble_preds = 0 262 263 n_iter = len(voter) 264 265 assert n_iter > 0, "no estimator found in `RandomBag` ensemble" 266 267 if weights is None: 268 for idx, elt in voter.items(): 269 ensemble_preds += elt.predict(X) 270 271 return ensemble_preds / n_iter 272 273 # if weights is not None: 274 for idx, elt in voter.items(): 275 ensemble_preds += weights[idx] * elt.predict(X) 276 277 return ensemble_preds 278 279 # end calculate_preds ---- 280 281 if weights is None: 282 return calculate_preds(self.voter_) 283 284 # if weights is not None: 285 self.weights = weights 286 287 return calculate_preds(self.voter_, weights)
Randomized 'Bagging' Regression model
Parameters:
obj: object
any object containing a method fit (obj.fit()) and a method predict
(obj.predict())
n_estimators: int
number of boosting iterations
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not
(False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original predictors are included (True) in model''s
fitting or not (False)
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
col_sample: float
percentage of covariates randomly chosen for training
row_sample: float
percentage of rows chosen for training, by stratified bootstrapping
seed: int
reproducibility seed for nodes_sim=='uniform'
backend: str
"cpu" or "gpu" or "tpu"
Attributes:
voter_: dict
dictionary containing all the fitted base-learners
Examples:
import numpy as np
import nnetsauce as ns
from sklearn.datasets import fetch_california_housing
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split
X, y = fetch_california_housing(return_X_y=True, as_frame=False)
# split data into training test and test set
X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size=0.2, random_state=13)
# Requires further tuning
obj = DecisionTreeRegressor(max_depth=3, random_state=123)
obj2 = ns.RandomBagRegressor(obj=obj, direct_link=False,
n_estimators=50,
col_sample=0.9, row_sample=0.9,
dropout=0, n_clusters=0, verbose=1)
obj2.fit(X_train, y_train)
print(np.sqrt(obj2.score(X_test, y_test))) # RMSE
169 def fit(self, X, y, **kwargs): 170 """Fit Random 'Bagging' model to training data (X, y). 171 172 Args: 173 174 X: {array-like}, shape = [n_samples, n_features] 175 Training vectors, where n_samples is the number 176 of samples and n_features is the number of features. 177 178 y: array-like, shape = [n_samples] 179 Target values. 180 181 **kwargs: additional parameters to be passed to 182 self.cook_training_set or self.obj.fit 183 184 Returns: 185 186 self: object 187 188 """ 189 190 base_learner = CustomRegressor( 191 self.obj, 192 n_hidden_features=self.n_hidden_features, 193 activation_name=self.activation_name, 194 a=self.a, 195 nodes_sim=self.nodes_sim, 196 bias=self.bias, 197 dropout=self.dropout, 198 direct_link=self.direct_link, 199 n_clusters=self.n_clusters, 200 type_clust=self.type_clust, 201 type_scaling=self.type_scaling, 202 col_sample=self.col_sample, 203 row_sample=self.row_sample, 204 seed=self.seed, 205 ) 206 207 # 1 - Sequential training ----- 208 209 if self.n_jobs is None: 210 self.voter_ = rbagloop_regression( 211 base_learner, X, y, self.n_estimators, self.verbose, self.seed 212 ) 213 214 self.n_estimators = len(self.voter_) 215 216 return self 217 218 # 2 - Parallel training ----- 219 # buggy 220 # if self.n_jobs is not None: 221 def fit_estimators(m): 222 base_learner__ = deepcopy(base_learner) 223 base_learner__.set_params(seed=self.seed + m * 1000) 224 base_learner__.fit(X, y, **kwargs) 225 return base_learner__ 226 227 if self.verbose == 1: 228 voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")( 229 delayed(fit_estimators)(m) for m in tqdm(range(self.n_estimators)) 230 ) 231 else: 232 voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")( 233 delayed(fit_estimators)(m) for m in range(self.n_estimators) 234 ) 235 236 self.voter_ = {i: elt for i, elt in enumerate(voters_list)} 237 238 self.n_estimators = len(self.voter_) 239 240 return self
Fit Random 'Bagging' model to training data (X, y).
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
242 def predict(self, X, weights=None, **kwargs): 243 """Predict for test data X. 244 245 Args: 246 247 X: {array-like}, shape = [n_samples, n_features] 248 Training vectors, where n_samples is the number 249 of samples and n_features is the number of features. 250 251 **kwargs: additional parameters to be passed to 252 self.cook_test_set 253 254 Returns: 255 256 estimates for test data: {array-like} 257 258 """ 259 260 def calculate_preds(voter, weights=None): 261 ensemble_preds = 0 262 263 n_iter = len(voter) 264 265 assert n_iter > 0, "no estimator found in `RandomBag` ensemble" 266 267 if weights is None: 268 for idx, elt in voter.items(): 269 ensemble_preds += elt.predict(X) 270 271 return ensemble_preds / n_iter 272 273 # if weights is not None: 274 for idx, elt in voter.items(): 275 ensemble_preds += weights[idx] * elt.predict(X) 276 277 return ensemble_preds 278 279 # end calculate_preds ---- 280 281 if weights is None: 282 return calculate_preds(self.voter_) 283 284 # if weights is not None: 285 self.weights = weights 286 287 return calculate_preds(self.voter_, weights)
Predict for test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
estimates for test data: {array-like}
18class RandomBagClassifier(RandomBag, ClassifierMixin): 19 """Randomized 'Bagging' Classification model 20 21 Parameters: 22 23 obj: object 24 any object containing a method fit (obj.fit()) and a method predict 25 (obj.predict()) 26 27 n_estimators: int 28 number of boosting iterations 29 30 n_hidden_features: int 31 number of nodes in the hidden layer 32 33 activation_name: str 34 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 35 36 a: float 37 hyperparameter for 'prelu' or 'elu' activation function 38 39 nodes_sim: str 40 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 41 'uniform' 42 43 bias: boolean 44 indicates if the hidden layer contains a bias term (True) or not 45 (False) 46 47 dropout: float 48 regularization parameter; (random) percentage of nodes dropped out 49 of the training 50 51 direct_link: boolean 52 indicates if the original predictors are included (True) in model's 53 fitting or not (False) 54 55 n_clusters: int 56 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: 57 no clustering) 58 59 cluster_encode: bool 60 defines how the variable containing clusters is treated (default is one-hot) 61 if `False`, then labels are used, without one-hot encoding 62 63 type_clust: str 64 type of clustering method: currently k-means ('kmeans') or Gaussian 65 Mixture Model ('gmm') 66 67 type_scaling: a tuple of 3 strings 68 scaling methods for inputs, hidden layer, and clustering respectively 69 (and when relevant). 70 Currently available: standardization ('std') or MinMax scaling ('minmax') 71 72 col_sample: float 73 percentage of covariates randomly chosen for training 74 75 row_sample: float 76 percentage of rows chosen for training, by stratified bootstrapping 77 78 seed: int 79 reproducibility seed for nodes_sim=='uniform' 80 81 backend: str 82 "cpu" or "gpu" or "tpu" 83 84 Attributes: 85 86 voter_: dict 87 dictionary containing all the fitted base-learners 88 89 90 Examples: 91 92 See also [https://github.com/Techtonique/nnetsauce/blob/master/examples/randombag_classification.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/randombag_classification.py) 93 94 ```python 95 import nnetsauce as ns 96 from sklearn.datasets import load_breast_cancer 97 from sklearn.tree import DecisionTreeClassifier 98 from sklearn.model_selection import train_test_split 99 from sklearn import metrics 100 from time import time 101 102 103 breast_cancer = load_breast_cancer() 104 Z = breast_cancer.data 105 t = breast_cancer.target 106 np.random.seed(123) 107 X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2) 108 109 # decision tree 110 clf = DecisionTreeClassifier(max_depth=2, random_state=123) 111 fit_obj = ns.RandomBagClassifier(clf, n_hidden_features=2, 112 direct_link=True, 113 n_estimators=100, 114 col_sample=0.9, row_sample=0.9, 115 dropout=0.3, n_clusters=0, verbose=1) 116 117 start = time() 118 fit_obj.fit(X_train, y_train) 119 print(f"Elapsed {time() - start}") 120 121 print(fit_obj.score(X_test, y_test)) 122 print(fit_obj.score(X_test, y_test, scoring="roc_auc")) 123 124 start = time() 125 preds = fit_obj.predict(X_test) 126 print(f"Elapsed {time() - start}") 127 print(metrics.classification_report(preds, y_test)) 128 ``` 129 130 """ 131 132 # construct the object ----- 133 _estimator_type = "classifier" 134 135 def __init__( 136 self, 137 obj, 138 n_estimators=10, 139 n_hidden_features=1, 140 activation_name="relu", 141 a=0.01, 142 nodes_sim="sobol", 143 bias=True, 144 dropout=0, 145 direct_link=False, 146 n_clusters=2, 147 cluster_encode=True, 148 type_clust="kmeans", 149 type_scaling=("std", "std", "std"), 150 col_sample=1, 151 row_sample=1, 152 n_jobs=None, 153 seed=123, 154 verbose=1, 155 backend="cpu", 156 ): 157 super().__init__( 158 obj=obj, 159 n_estimators=n_estimators, 160 n_hidden_features=n_hidden_features, 161 activation_name=activation_name, 162 a=a, 163 nodes_sim=nodes_sim, 164 bias=bias, 165 dropout=dropout, 166 direct_link=direct_link, 167 n_clusters=n_clusters, 168 cluster_encode=cluster_encode, 169 type_clust=type_clust, 170 type_scaling=type_scaling, 171 col_sample=col_sample, 172 row_sample=row_sample, 173 seed=seed, 174 backend=backend, 175 ) 176 177 self.type_fit = "classification" 178 self.verbose = verbose 179 self.n_jobs = n_jobs 180 self.voter_ = {} 181 182 def fit(self, X, y, **kwargs): 183 """Fit Random 'Bagging' model to training data (X, y). 184 185 Args: 186 187 X: {array-like}, shape = [n_samples, n_features] 188 Training vectors, where n_samples is the number 189 of samples and n_features is the number of features. 190 191 y: array-like, shape = [n_samples] 192 Target values. 193 194 **kwargs: additional parameters to be passed to 195 self.cook_training_set or self.obj.fit 196 197 Returns: 198 199 self: object 200 201 """ 202 203 assert mx.is_factor(y), "y must contain only integers" 204 205 self.n_classes_ = len(np.unique(y)) # for compatibility with sklearn 206 207 # training 208 self.n_classes = len(np.unique(y)) 209 210 base_learner = CustomClassifier( 211 self.obj, 212 n_hidden_features=self.n_hidden_features, 213 activation_name=self.activation_name, 214 a=self.a, 215 nodes_sim=self.nodes_sim, 216 bias=self.bias, 217 dropout=self.dropout, 218 direct_link=self.direct_link, 219 n_clusters=self.n_clusters, 220 type_clust=self.type_clust, 221 type_scaling=self.type_scaling, 222 col_sample=self.col_sample, 223 row_sample=self.row_sample, 224 seed=self.seed, 225 cv_calibration=None 226 ) 227 228 # 1 - Sequential training ----- 229 230 if self.n_jobs is None: 231 self.voter_ = rbagloop_classification( 232 base_learner, X, y, self.n_estimators, self.verbose, self.seed 233 ) 234 235 self.n_estimators = len(self.voter_) 236 237 return self 238 239 # 2 - Parallel training ----- 240 # buggy 241 # if self.n_jobs is not None: 242 def fit_estimators(m): 243 base_learner__ = deepcopy(base_learner) 244 base_learner__.set_params(seed=self.seed + m * 1000) 245 base_learner__.fit(X, y, **kwargs) 246 return base_learner__ 247 248 if self.verbose == 1: 249 voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")( 250 delayed(fit_estimators)(m) for m in tqdm(range(self.n_estimators)) 251 ) 252 else: 253 voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")( 254 delayed(fit_estimators)(m) for m in range(self.n_estimators) 255 ) 256 257 self.voter_ = {idx: elt for idx, elt in enumerate(voters_list)} 258 259 self.n_estimators = len(self.voter_) 260 self.classes_ = np.unique(y) 261 return self 262 263 def predict(self, X, weights=None, **kwargs): 264 """Predict test data X. 265 266 Args: 267 268 X: {array-like}, shape = [n_samples, n_features] 269 Training vectors, where n_samples is the number 270 of samples and n_features is the number of features. 271 272 **kwargs: additional parameters to be passed to 273 self.cook_test_set 274 275 Returns: 276 277 model predictions: {array-like} 278 279 """ 280 return self.predict_proba(X, weights, **kwargs).argmax(axis=1) 281 282 def predict_proba(self, X, weights=None, **kwargs): 283 """Predict probabilities for test data X. 284 285 Args: 286 287 X: {array-like}, shape = [n_samples, n_features] 288 Training vectors, where n_samples is the number 289 of samples and n_features is the number of features. 290 291 **kwargs: additional parameters to be passed to 292 self.cook_test_set 293 294 Returns: 295 296 probability estimates for test data: {array-like} 297 298 """ 299 300 def calculate_probas(voter, weights=None, verbose=None): 301 ensemble_proba = 0 302 303 n_iter = len(voter) 304 305 assert n_iter > 0, "no estimator found in `RandomBag` ensemble" 306 307 if weights is None: 308 for idx, elt in voter.items(): 309 try: 310 ensemble_proba += elt.predict_proba(X) 311 312 # if verbose == 1: 313 # pbar.update(idx) 314 315 except: 316 continue 317 318 # if verbose == 1: 319 # pbar.update(n_iter) 320 321 return ensemble_proba / n_iter 322 323 # if weights is not None: 324 for idx, elt in voter.items(): 325 ensemble_proba += weights[idx] * elt.predict_proba(X) 326 327 # if verbose == 1: 328 # pbar.update(idx) 329 330 # if verbose == 1: 331 # pbar.update(n_iter) 332 333 return ensemble_proba 334 335 # end calculate_probas ---- 336 337 if self.n_jobs is None: 338 # if self.verbose == 1: 339 # pbar = Progbar(self.n_estimators) 340 341 if weights is None: 342 return calculate_probas(self.voter_, verbose=self.verbose) 343 344 # if weights is not None: 345 self.weights = weights 346 347 return calculate_probas(self.voter_, weights, verbose=self.verbose) 348 349 # if self.n_jobs is not None: 350 def predict_estimator(m): 351 try: 352 return self.voter_[m].predict_proba(X) 353 except: 354 pass 355 356 if self.verbose == 1: 357 preds = Parallel(n_jobs=self.n_jobs, prefer="threads")( 358 delayed(predict_estimator)(m) for m in tqdm(range(self.n_estimators)) 359 ) 360 361 else: 362 preds = Parallel(n_jobs=self.n_jobs, prefer="threads")( 363 delayed(predict_estimator)(m) for m in range(self.n_estimators) 364 ) 365 366 ensemble_proba = 0 367 368 if weights is None: 369 for i in range(self.n_estimators): 370 ensemble_proba += preds[i] 371 372 return ensemble_proba / self.n_estimators 373 374 for i in range(self.n_estimators): 375 ensemble_proba += weights[i] * preds[i] 376 377 return ensemble_proba 378 379 380 @property 381 def _estimator_type(self): 382 return "classifier"
Randomized 'Bagging' Classification model
Parameters:
obj: object
any object containing a method fit (obj.fit()) and a method predict
(obj.predict())
n_estimators: int
number of boosting iterations
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not
(False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original predictors are included (True) in model's
fitting or not (False)
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
col_sample: float
percentage of covariates randomly chosen for training
row_sample: float
percentage of rows chosen for training, by stratified bootstrapping
seed: int
reproducibility seed for nodes_sim=='uniform'
backend: str
"cpu" or "gpu" or "tpu"
Attributes:
voter_: dict
dictionary containing all the fitted base-learners
Examples:
See also https://github.com/Techtonique/nnetsauce/blob/master/examples/randombag_classification.py
import nnetsauce as ns
from sklearn.datasets import load_breast_cancer
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn import metrics
from time import time
breast_cancer = load_breast_cancer()
Z = breast_cancer.data
t = breast_cancer.target
np.random.seed(123)
X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2)
# decision tree
clf = DecisionTreeClassifier(max_depth=2, random_state=123)
fit_obj = ns.RandomBagClassifier(clf, n_hidden_features=2,
direct_link=True,
n_estimators=100,
col_sample=0.9, row_sample=0.9,
dropout=0.3, n_clusters=0, verbose=1)
start = time()
fit_obj.fit(X_train, y_train)
print(f"Elapsed {time() - start}")
print(fit_obj.score(X_test, y_test))
print(fit_obj.score(X_test, y_test, scoring="roc_auc"))
start = time()
preds = fit_obj.predict(X_test)
print(f"Elapsed {time() - start}")
print(metrics.classification_report(preds, y_test))
182 def fit(self, X, y, **kwargs): 183 """Fit Random 'Bagging' model to training data (X, y). 184 185 Args: 186 187 X: {array-like}, shape = [n_samples, n_features] 188 Training vectors, where n_samples is the number 189 of samples and n_features is the number of features. 190 191 y: array-like, shape = [n_samples] 192 Target values. 193 194 **kwargs: additional parameters to be passed to 195 self.cook_training_set or self.obj.fit 196 197 Returns: 198 199 self: object 200 201 """ 202 203 assert mx.is_factor(y), "y must contain only integers" 204 205 self.n_classes_ = len(np.unique(y)) # for compatibility with sklearn 206 207 # training 208 self.n_classes = len(np.unique(y)) 209 210 base_learner = CustomClassifier( 211 self.obj, 212 n_hidden_features=self.n_hidden_features, 213 activation_name=self.activation_name, 214 a=self.a, 215 nodes_sim=self.nodes_sim, 216 bias=self.bias, 217 dropout=self.dropout, 218 direct_link=self.direct_link, 219 n_clusters=self.n_clusters, 220 type_clust=self.type_clust, 221 type_scaling=self.type_scaling, 222 col_sample=self.col_sample, 223 row_sample=self.row_sample, 224 seed=self.seed, 225 cv_calibration=None 226 ) 227 228 # 1 - Sequential training ----- 229 230 if self.n_jobs is None: 231 self.voter_ = rbagloop_classification( 232 base_learner, X, y, self.n_estimators, self.verbose, self.seed 233 ) 234 235 self.n_estimators = len(self.voter_) 236 237 return self 238 239 # 2 - Parallel training ----- 240 # buggy 241 # if self.n_jobs is not None: 242 def fit_estimators(m): 243 base_learner__ = deepcopy(base_learner) 244 base_learner__.set_params(seed=self.seed + m * 1000) 245 base_learner__.fit(X, y, **kwargs) 246 return base_learner__ 247 248 if self.verbose == 1: 249 voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")( 250 delayed(fit_estimators)(m) for m in tqdm(range(self.n_estimators)) 251 ) 252 else: 253 voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")( 254 delayed(fit_estimators)(m) for m in range(self.n_estimators) 255 ) 256 257 self.voter_ = {idx: elt for idx, elt in enumerate(voters_list)} 258 259 self.n_estimators = len(self.voter_) 260 self.classes_ = np.unique(y) 261 return self
Fit Random 'Bagging' model to training data (X, y).
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
263 def predict(self, X, weights=None, **kwargs): 264 """Predict test data X. 265 266 Args: 267 268 X: {array-like}, shape = [n_samples, n_features] 269 Training vectors, where n_samples is the number 270 of samples and n_features is the number of features. 271 272 **kwargs: additional parameters to be passed to 273 self.cook_test_set 274 275 Returns: 276 277 model predictions: {array-like} 278 279 """ 280 return self.predict_proba(X, weights, **kwargs).argmax(axis=1)
Predict test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
282 def predict_proba(self, X, weights=None, **kwargs): 283 """Predict probabilities for test data X. 284 285 Args: 286 287 X: {array-like}, shape = [n_samples, n_features] 288 Training vectors, where n_samples is the number 289 of samples and n_features is the number of features. 290 291 **kwargs: additional parameters to be passed to 292 self.cook_test_set 293 294 Returns: 295 296 probability estimates for test data: {array-like} 297 298 """ 299 300 def calculate_probas(voter, weights=None, verbose=None): 301 ensemble_proba = 0 302 303 n_iter = len(voter) 304 305 assert n_iter > 0, "no estimator found in `RandomBag` ensemble" 306 307 if weights is None: 308 for idx, elt in voter.items(): 309 try: 310 ensemble_proba += elt.predict_proba(X) 311 312 # if verbose == 1: 313 # pbar.update(idx) 314 315 except: 316 continue 317 318 # if verbose == 1: 319 # pbar.update(n_iter) 320 321 return ensemble_proba / n_iter 322 323 # if weights is not None: 324 for idx, elt in voter.items(): 325 ensemble_proba += weights[idx] * elt.predict_proba(X) 326 327 # if verbose == 1: 328 # pbar.update(idx) 329 330 # if verbose == 1: 331 # pbar.update(n_iter) 332 333 return ensemble_proba 334 335 # end calculate_probas ---- 336 337 if self.n_jobs is None: 338 # if self.verbose == 1: 339 # pbar = Progbar(self.n_estimators) 340 341 if weights is None: 342 return calculate_probas(self.voter_, verbose=self.verbose) 343 344 # if weights is not None: 345 self.weights = weights 346 347 return calculate_probas(self.voter_, weights, verbose=self.verbose) 348 349 # if self.n_jobs is not None: 350 def predict_estimator(m): 351 try: 352 return self.voter_[m].predict_proba(X) 353 except: 354 pass 355 356 if self.verbose == 1: 357 preds = Parallel(n_jobs=self.n_jobs, prefer="threads")( 358 delayed(predict_estimator)(m) for m in tqdm(range(self.n_estimators)) 359 ) 360 361 else: 362 preds = Parallel(n_jobs=self.n_jobs, prefer="threads")( 363 delayed(predict_estimator)(m) for m in range(self.n_estimators) 364 ) 365 366 ensemble_proba = 0 367 368 if weights is None: 369 for i in range(self.n_estimators): 370 ensemble_proba += preds[i] 371 372 return ensemble_proba / self.n_estimators 373 374 for i in range(self.n_estimators): 375 ensemble_proba += weights[i] * preds[i] 376 377 return ensemble_proba
Predict probabilities for test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
probability estimates for test data: {array-like}
16class RegressorUpdater(BaseEstimator, RegressorMixin): 17 """ 18 Update a regression model with new observations 19 20 Parameters 21 ---------- 22 regr: object 23 A regression model with a coef_ attribute 24 alpha: float 25 Updating factor's exponent 26 27 Attributes 28 ---------- 29 n_obs_: int 30 Number of observations 31 coef_: np.ndarray 32 Coefficients of the model 33 updating_factor_: float 34 Updating factor 35 36 """ 37 38 def __init__(self, regr, alpha=0.5): 39 self.regr = regr 40 self.alpha = alpha 41 self.n_obs_ = None 42 self.coef_ = None 43 self.updating_factor_ = None 44 try: 45 self.coef_ = self.regr.coef_ 46 if isinstance(self.regr, Base): 47 self.n_obs_ = self.regr.scaler_.n_samples_seen_ 48 except AttributeError: 49 pass 50 51 def fit(self, X, y, **kwargs): 52 53 if isinstance(self.regr, CustomRegressor): # nnetsauce model not deep --- 54 if check_is_fitted(self.regr) == False: 55 self.regr.fit(X, y, **kwargs) 56 self.n_obs_ = X.shape[0] 57 if hasattr(self.regr, "coef_"): 58 self.coef_ = self.regr.coef_ 59 return self 60 self.n_obs_ = self.regr.scaler_.n_samples_seen_ 61 if hasattr(self.regr, "coef_"): 62 self.coef_ = self.regr.coef_ 63 return self 64 65 if ( 66 hasattr(self.regr, "coef_") == False 67 ): # sklearn model or CustomRegressor model --- 68 self.regr.fit(X, y) 69 self.n_obs_ = X.shape[0] 70 self.regr.fit(X, y) 71 if hasattr(self.regr, "stacked_obj"): 72 self.coef_ = self.regr.stacked_obj.coef_ 73 else: 74 self.coef_ = self.regr.coef_ 75 return self 76 self.n_obs_ = X.shape[0] 77 if hasattr(self.regr, "coef_"): 78 self.coef_ = self.regr.coef_ 79 return self 80 81 def predict(self, X): 82 # assert hasattr(self.regr, "coef_"), "model must have coef_ attribute" 83 return self.regr.predict(X) 84 85 def partial_fit(self, X, y): 86 87 assert hasattr( 88 self.regr, "coef_" 89 ), "model must be fitted first (i.e have 'coef_' attribute)" 90 assert ( 91 self.n_obs_ is not None 92 ), "model must be fitted first (i.e have 'n_obs_' attribute)" 93 94 if len(X.shape) == 1: 95 X = X.reshape(1, -1) 96 97 assert X.shape[0] == 1, "X must have one row" 98 99 self.updating_factor_ = self.n_obs_ ** (-self.alpha) 100 101 if isinstance(self.regr, Base): # nnetsauce model --- 102 103 newX = deepcopy(X) 104 105 if isinstance( 106 self.regr, CustomRegressor 107 ): # other nnetsauce model (CustomRegressor) --- 108 newX = self.regr.cook_test_set(X=X) 109 if isinstance(X, pd.DataFrame): 110 newx = newX.values.ravel() 111 else: 112 newx = newX.ravel() 113 114 else: # an sklearn model --- 115 116 if isinstance(X, pd.DataFrame): 117 newx = X.values.ravel() 118 else: 119 newx = X.ravel() 120 121 new_coef = self.regr.coef_ + self.updating_factor_ * np.dot( 122 newx, y - np.dot(newx, self.regr.coef_) 123 ) 124 self.regr.coef_ = _update_mean(self.regr.coef_, self.n_obs_, new_coef) 125 self.coef_ = deepcopy(self.regr.coef_) 126 self.n_obs_ += 1 127 return self
Update a regression model with new observations
Parameters
regr: object A regression model with a coef_ attribute alpha: float Updating factor's exponent
Attributes
n_obs_: int Number of observations coef_: np.ndarray Coefficients of the model updating_factor_: float Updating factor
51 def fit(self, X, y, **kwargs): 52 53 if isinstance(self.regr, CustomRegressor): # nnetsauce model not deep --- 54 if check_is_fitted(self.regr) == False: 55 self.regr.fit(X, y, **kwargs) 56 self.n_obs_ = X.shape[0] 57 if hasattr(self.regr, "coef_"): 58 self.coef_ = self.regr.coef_ 59 return self 60 self.n_obs_ = self.regr.scaler_.n_samples_seen_ 61 if hasattr(self.regr, "coef_"): 62 self.coef_ = self.regr.coef_ 63 return self 64 65 if ( 66 hasattr(self.regr, "coef_") == False 67 ): # sklearn model or CustomRegressor model --- 68 self.regr.fit(X, y) 69 self.n_obs_ = X.shape[0] 70 self.regr.fit(X, y) 71 if hasattr(self.regr, "stacked_obj"): 72 self.coef_ = self.regr.stacked_obj.coef_ 73 else: 74 self.coef_ = self.regr.coef_ 75 return self 76 self.n_obs_ = X.shape[0] 77 if hasattr(self.regr, "coef_"): 78 self.coef_ = self.regr.coef_ 79 return self
16class ClassifierUpdater(BaseEstimator, ClassifierMixin): 17 """ 18 Update a regression model with new observations 19 20 Parameters 21 ---------- 22 clf: object 23 A regression model with a coef_ attribute 24 alpha: float 25 Updating factor's exponent 26 27 Attributes 28 ---------- 29 n_obs_: int 30 Number of observations 31 coef_: np.ndarray 32 Coefficients of the model 33 updating_factor_: float 34 Updating factor 35 36 """ 37 _estimator_type = "classifier" 38 39 def __init__(self, clf, alpha=0.5): 40 self.clf = clf 41 self.alpha = alpha 42 self.n_obs_ = None 43 self.coef_ = None 44 self.updating_factor_ = None 45 try: 46 self.coef_ = self.clf.coef_ 47 if isinstance(self.clf, Base): 48 self.n_obs_ = self.clf.scaler_.n_samples_seen_ 49 except AttributeError: 50 pass 51 52 def fit(self, X, y, **kwargs): 53 54 raise NotImplementedError("fit method is not implemented for ClassifierUpdater") 55 56 if isinstance(self.clf, CustomClassifier): # nnetsauce model not deep --- 57 if check_is_fitted(self.clf) == False: 58 self.clf.fit(X, y, **kwargs) 59 self.n_obs_ = X.shape[0] 60 if hasattr(self.clf, "coef_"): 61 self.coef_ = self.clf.coef_ 62 return self 63 self.n_obs_ = self.clf.scaler_.n_samples_seen_ 64 if hasattr(self.clf, "coef_"): 65 self.coef_ = self.clf.coef_ 66 return self 67 68 if ( 69 hasattr(self.clf, "coef_") == False 70 ): # sklearn model or CustomClassifier model --- 71 self.clf.fit(X, y) 72 self.n_obs_ = X.shape[0] 73 self.clf.fit(X, y) 74 if hasattr(self.clf, "stacked_obj"): 75 self.coef_ = self.clf.stacked_obj.coef_ 76 else: 77 self.coef_ = self.clf.coef_ 78 return self 79 self.n_obs_ = X.shape[0] 80 if hasattr(self.clf, "coef_"): 81 self.coef_ = self.clf.coef_ 82 return self 83 84 def predict(self, X): 85 86 raise NotImplementedError( 87 "predict method is not implemented for ClassifierUpdater" 88 ) 89 # assert hasattr(self.clf, "coef_"), "model must have coef_ attribute" 90 return self.clf.predict(X) 91 92 def partial_fit(self, X, y): 93 94 raise NotImplementedError( 95 "partial_fit method is not implemented for ClassifierUpdater" 96 ) 97 98 assert hasattr( 99 self.clf, "coef_" 100 ), "model must be fitted first (i.e have 'coef_' attribute)" 101 assert ( 102 self.n_obs_ is not None 103 ), "model must be fitted first (i.e have 'n_obs_' attribute)" 104 105 if len(X.shape) == 1: 106 X = X.reshape(1, -1) 107 108 assert X.shape[0] == 1, "X must have one row" 109 110 self.updating_factor_ = self.n_obs_ ** (-self.alpha) 111 112 if isinstance(self.clf, Base): # nnetsauce model --- 113 114 newX = deepcopy(X) 115 116 if isinstance( 117 self.clf, CustomClassifier 118 ): # other nnetsauce model (CustomClassifier) --- 119 newX = self.clf.cook_test_set(X=X) 120 if isinstance(X, pd.DataFrame): 121 newx = newX.values.ravel() 122 else: 123 newx = newX.ravel() 124 125 else: # an sklearn model --- 126 127 if isinstance(X, pd.DataFrame): 128 newx = X.values.ravel() 129 else: 130 newx = X.ravel() 131 132 new_coef = self.clf.coef_ + self.updating_factor_ * np.dot( 133 newx, y - np.dot(newx, self.clf.coef_) 134 ) 135 self.clf.coef_ = _update_mean(self.clf.coef_, self.n_obs_, new_coef) 136 self.coef_ = deepcopy(self.clf.coef_) 137 self.n_obs_ += 1 138 return self
Update a regression model with new observations
Parameters
clf: object A regression model with a coef_ attribute alpha: float Updating factor's exponent
Attributes
n_obs_: int Number of observations coef_: np.ndarray Coefficients of the model updating_factor_: float Updating factor
52 def fit(self, X, y, **kwargs): 53 54 raise NotImplementedError("fit method is not implemented for ClassifierUpdater") 55 56 if isinstance(self.clf, CustomClassifier): # nnetsauce model not deep --- 57 if check_is_fitted(self.clf) == False: 58 self.clf.fit(X, y, **kwargs) 59 self.n_obs_ = X.shape[0] 60 if hasattr(self.clf, "coef_"): 61 self.coef_ = self.clf.coef_ 62 return self 63 self.n_obs_ = self.clf.scaler_.n_samples_seen_ 64 if hasattr(self.clf, "coef_"): 65 self.coef_ = self.clf.coef_ 66 return self 67 68 if ( 69 hasattr(self.clf, "coef_") == False 70 ): # sklearn model or CustomClassifier model --- 71 self.clf.fit(X, y) 72 self.n_obs_ = X.shape[0] 73 self.clf.fit(X, y) 74 if hasattr(self.clf, "stacked_obj"): 75 self.coef_ = self.clf.stacked_obj.coef_ 76 else: 77 self.coef_ = self.clf.coef_ 78 return self 79 self.n_obs_ = X.shape[0] 80 if hasattr(self.clf, "coef_"): 81 self.coef_ = self.clf.coef_ 82 return self
24class RidgeRegressor(BaseEstimator, RegressorMixin): 25 """Ridge. 26 27 Attributes: 28 29 reg_lambda: float 30 regularization parameter. 31 32 backend: str 33 type of backend; must be in ('cpu', 'gpu', 'tpu') 34 35 """ 36 def __init__(self, reg_lambda=0.1, backend="cpu"): 37 assert backend in ( 38 "cpu", 39 "gpu", 40 "tpu", 41 ), "`backend` must be in ('cpu', 'gpu', 'tpu')" 42 43 sys_platform = platform.system() 44 45 if (sys_platform == "Windows") and (backend in ("gpu", "tpu")): 46 warnings.warn( 47 "No GPU/TPU computing on Windows yet, backend set to 'cpu'" 48 ) 49 backend = "cpu" 50 51 self.reg_lambda = reg_lambda 52 self.backend = backend 53 self.coef_ = None 54 55 def fit(self, X, y, **kwargs): 56 """Fit matrixops (classifier) to training data (X, y) 57 58 Args: 59 60 X: {array-like}, shape = [n_samples, n_features] 61 Training vectors, where n_samples is the number 62 of samples and n_features is the number of features. 63 64 y: array-like, shape = [n_samples] 65 Target values. 66 67 **kwargs: additional parameters to be passed to self.cook_training_set. 68 69 Returns: 70 71 self: object. 72 73 """ 74 self.ym, centered_y = mo.center_response(y) 75 self.xm = X.mean(axis=0) 76 self.xsd = X.std(axis=0) 77 self.xsd[self.xsd == 0] = 1 # avoid division by zero 78 X_ = (X - self.xm[None, :]) / self.xsd[None, :] 79 80 if self.backend == "cpu": 81 if len(centered_y.shape) <= 1: 82 eye_term = np.sqrt(self.reg_lambda) * np.eye(X.shape[1]) 83 X_ = np.row_stack((X_, eye_term)) 84 y_ = np.concatenate((centered_y, np.zeros(X.shape[1]))) 85 beta_info = get_beta(X_, y_) 86 self.coef_ = beta_info[0] 87 else: 88 try: 89 eye_term = np.sqrt(self.reg_lambda) * np.eye(X.shape[1]) 90 X_ = np.row_stack((X_, eye_term)) 91 y_ = np.row_stack( 92 ( 93 centered_y, 94 np.zeros((eye_term.shape[0], centered_y.shape[1])), 95 ) 96 ) 97 beta_info = get_beta(X_, y_) 98 self.coef_ = beta_info[0] 99 except Exception: 100 x = inv( 101 mo.crossprod(X_) + self.reg_lambda * np.eye(X_.shape[1]) 102 ) 103 hat_matrix = mo.tcrossprod(x, X_) 104 self.coef_ = mo.safe_sparse_dot(hat_matrix, centered_y) 105 return self 106 107 x = jinv(mo.crossprod(X_, backend=self.backend) 108 + self.reg_lambda * jnp.eye(X_.shape[1])) 109 110 hat_matrix = mo.tcrossprod(x, X_, backend=self.backend) 111 self.coef_ = mo.safe_sparse_dot(hat_matrix, centered_y, 112 backend=self.backend) 113 return self 114 115 def predict(self, X, **kwargs): 116 """Predict test data X. 117 118 Args: 119 120 X: {array-like}, shape = [n_samples, n_features] 121 Training vectors, where n_samples is the number 122 of samples and n_features is the number of features. 123 124 **kwargs: additional parameters to be passed to `predict_proba` 125 126 Returns: 127 128 model predictions: {array-like} 129 130 """ 131 X_ = (X - self.xm[None, :]) / self.xsd[None, :] 132 133 if self.backend == "cpu": 134 if isinstance(self.ym, float): 135 return self.ym + mo.safe_sparse_dot(X_, self.coef_) 136 return self.ym[None, :] + mo.safe_sparse_dot(X_, self.coef_) 137 138 # if self.backend in ("gpu", "tpu"): 139 if isinstance(self.ym, float): 140 return self.ym + mo.safe_sparse_dot( 141 X_, self.coef_, backend=self.backend 142 ) 143 return self.ym[None, :] + mo.safe_sparse_dot( 144 X_, self.coef_, backend=self.backend 145 )
Ridge.
Attributes:
reg_lambda: float
regularization parameter.
backend: str
type of backend; must be in ('cpu', 'gpu', 'tpu')
55 def fit(self, X, y, **kwargs): 56 """Fit matrixops (classifier) to training data (X, y) 57 58 Args: 59 60 X: {array-like}, shape = [n_samples, n_features] 61 Training vectors, where n_samples is the number 62 of samples and n_features is the number of features. 63 64 y: array-like, shape = [n_samples] 65 Target values. 66 67 **kwargs: additional parameters to be passed to self.cook_training_set. 68 69 Returns: 70 71 self: object. 72 73 """ 74 self.ym, centered_y = mo.center_response(y) 75 self.xm = X.mean(axis=0) 76 self.xsd = X.std(axis=0) 77 self.xsd[self.xsd == 0] = 1 # avoid division by zero 78 X_ = (X - self.xm[None, :]) / self.xsd[None, :] 79 80 if self.backend == "cpu": 81 if len(centered_y.shape) <= 1: 82 eye_term = np.sqrt(self.reg_lambda) * np.eye(X.shape[1]) 83 X_ = np.row_stack((X_, eye_term)) 84 y_ = np.concatenate((centered_y, np.zeros(X.shape[1]))) 85 beta_info = get_beta(X_, y_) 86 self.coef_ = beta_info[0] 87 else: 88 try: 89 eye_term = np.sqrt(self.reg_lambda) * np.eye(X.shape[1]) 90 X_ = np.row_stack((X_, eye_term)) 91 y_ = np.row_stack( 92 ( 93 centered_y, 94 np.zeros((eye_term.shape[0], centered_y.shape[1])), 95 ) 96 ) 97 beta_info = get_beta(X_, y_) 98 self.coef_ = beta_info[0] 99 except Exception: 100 x = inv( 101 mo.crossprod(X_) + self.reg_lambda * np.eye(X_.shape[1]) 102 ) 103 hat_matrix = mo.tcrossprod(x, X_) 104 self.coef_ = mo.safe_sparse_dot(hat_matrix, centered_y) 105 return self 106 107 x = jinv(mo.crossprod(X_, backend=self.backend) 108 + self.reg_lambda * jnp.eye(X_.shape[1])) 109 110 hat_matrix = mo.tcrossprod(x, X_, backend=self.backend) 111 self.coef_ = mo.safe_sparse_dot(hat_matrix, centered_y, 112 backend=self.backend) 113 return self
Fit matrixops (classifier) to training data (X, y)
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
**kwargs: additional parameters to be passed to self.cook_training_set.
Returns:
self: object.
115 def predict(self, X, **kwargs): 116 """Predict test data X. 117 118 Args: 119 120 X: {array-like}, shape = [n_samples, n_features] 121 Training vectors, where n_samples is the number 122 of samples and n_features is the number of features. 123 124 **kwargs: additional parameters to be passed to `predict_proba` 125 126 Returns: 127 128 model predictions: {array-like} 129 130 """ 131 X_ = (X - self.xm[None, :]) / self.xsd[None, :] 132 133 if self.backend == "cpu": 134 if isinstance(self.ym, float): 135 return self.ym + mo.safe_sparse_dot(X_, self.coef_) 136 return self.ym[None, :] + mo.safe_sparse_dot(X_, self.coef_) 137 138 # if self.backend in ("gpu", "tpu"): 139 if isinstance(self.ym, float): 140 return self.ym + mo.safe_sparse_dot( 141 X_, self.coef_, backend=self.backend 142 ) 143 return self.ym[None, :] + mo.safe_sparse_dot( 144 X_, self.coef_, backend=self.backend 145 )
Predict test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to `predict_proba`
Returns:
model predictions: {array-like}
23class Ridge2Regressor(Ridge2, RegressorMixin): 24 """Ridge regression with 2 regularization parameters derived from class Ridge 25 26 Parameters: 27 28 n_hidden_features: int 29 number of nodes in the hidden layer 30 31 activation_name: str 32 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 33 34 a: float 35 hyperparameter for 'prelu' or 'elu' activation function 36 37 nodes_sim: str 38 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 39 'uniform' 40 41 bias: boolean 42 indicates if the hidden layer contains a bias term (True) or not 43 (False) 44 45 dropout: float 46 regularization parameter; (random) percentage of nodes dropped out 47 of the training 48 49 n_clusters: int 50 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: 51 no clustering) 52 53 cluster_encode: bool 54 defines how the variable containing clusters is treated (default is one-hot) 55 if `False`, then labels are used, without one-hot encoding 56 57 type_clust: str 58 type of clustering method: currently k-means ('kmeans') or Gaussian 59 Mixture Model ('gmm') 60 61 type_scaling: a tuple of 3 strings 62 scaling methods for inputs, hidden layer, and clustering respectively 63 (and when relevant). 64 Currently available: standardization ('std') or MinMax scaling ('minmax') 65 66 lambda1: float 67 regularization parameter on direct link 68 69 lambda2: float 70 regularization parameter on hidden layer 71 72 seed: int 73 reproducibility seed for nodes_sim=='uniform' 74 75 backend: str 76 'cpu' or 'gpu' or 'tpu' 77 78 Attributes: 79 80 beta_: {array-like} 81 regression coefficients 82 83 y_mean_: float 84 average response 85 86 """ 87 88 # construct the object ----- 89 90 def __init__( 91 self, 92 n_hidden_features=5, 93 activation_name="relu", 94 a=0.01, 95 nodes_sim="sobol", 96 bias=True, 97 dropout=0, 98 n_clusters=2, 99 cluster_encode=True, 100 type_clust="kmeans", 101 type_scaling=("std", "std", "std"), 102 lambda1=0.1, 103 lambda2=0.1, 104 seed=123, 105 backend="cpu", 106 ): 107 super().__init__( 108 n_hidden_features=n_hidden_features, 109 activation_name=activation_name, 110 a=a, 111 nodes_sim=nodes_sim, 112 bias=bias, 113 dropout=dropout, 114 n_clusters=n_clusters, 115 cluster_encode=cluster_encode, 116 type_clust=type_clust, 117 type_scaling=type_scaling, 118 lambda1=lambda1, 119 lambda2=lambda2, 120 seed=seed, 121 backend=backend, 122 ) 123 124 self.type_fit = "regression" 125 126 def fit(self, X, y, **kwargs): 127 """Fit Ridge model to training data (X, y). 128 129 Args: 130 131 X: {array-like}, shape = [n_samples, n_features] 132 Training vectors, where n_samples is the number 133 of samples and n_features is the number of features. 134 135 y: array-like, shape = [n_samples] 136 Target values. 137 138 **kwargs: additional parameters to be passed to 139 self.cook_training_set or self.obj.fit 140 141 Returns: 142 143 self: object 144 145 """ 146 147 sys_platform = platform.system() 148 149 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 150 151 n_X, p_X = X.shape 152 n_Z, p_Z = scaled_Z.shape 153 154 if self.n_clusters > 0: 155 if self.encode_clusters == True: 156 n_features = p_X + self.n_clusters 157 else: 158 n_features = p_X + 1 159 else: 160 n_features = p_X 161 162 X_ = scaled_Z[:, 0:n_features] 163 Phi_X_ = scaled_Z[:, n_features:p_Z] 164 165 B = mo.crossprod(x=X_, backend=self.backend) + self.lambda1 * np.diag( 166 np.repeat(1, n_features) 167 ) 168 C = mo.crossprod(x=Phi_X_, y=X_, backend=self.backend) 169 D = mo.crossprod(x=Phi_X_, backend=self.backend) + self.lambda2 * np.diag( 170 np.repeat(1, Phi_X_.shape[1]) 171 ) 172 173 if sys_platform in ("Linux", "Darwin"): 174 B_inv = pinv(B) if self.backend == "cpu" else jpinv(B) 175 else: 176 B_inv = pinv(B) 177 178 W = mo.safe_sparse_dot(a=C, b=B_inv, backend=self.backend) 179 S_mat = D - mo.tcrossprod(x=W, y=C, backend=self.backend) 180 181 if sys_platform in ("Linux", "Darwin"): 182 S_inv = pinv(S_mat) if self.backend == "cpu" else jpinv(S_mat) 183 else: 184 S_inv = pinv(S_mat) 185 186 Y = mo.safe_sparse_dot(a=S_inv, b=W, backend=self.backend) 187 inv = mo.rbind( 188 mo.cbind( 189 x=B_inv + mo.crossprod(x=W, y=Y, backend=self.backend), 190 y=-np.transpose(Y), 191 backend=self.backend, 192 ), 193 mo.cbind(x=-Y, y=S_inv, backend=self.backend), 194 backend=self.backend, 195 ) 196 197 self.beta_ = mo.safe_sparse_dot( 198 a=inv, 199 b=mo.crossprod(x=scaled_Z, y=centered_y, backend=self.backend), 200 backend=self.backend, 201 ) 202 203 return self 204 205 def predict(self, X, **kwargs): 206 """Predict test data X. 207 208 Args: 209 210 X: {array-like}, shape = [n_samples, n_features] 211 Training vectors, where n_samples is the number 212 of samples and n_features is the number of features. 213 214 **kwargs: additional parameters to be passed to 215 self.cook_test_set 216 217 Returns: 218 219 model predictions: {array-like} 220 221 """ 222 223 if len(X.shape) == 1: 224 n_features = X.shape[0] 225 new_X = mo.rbind( 226 x=X.reshape(1, n_features), 227 y=np.ones(n_features).reshape(1, n_features), 228 backend=self.backend, 229 ) 230 231 return ( 232 self.y_mean_ 233 + mo.safe_sparse_dot( 234 a=self.cook_test_set(new_X, **kwargs), 235 b=self.beta_, 236 backend=self.backend, 237 ) 238 )[0] 239 240 return self.y_mean_ + mo.safe_sparse_dot( 241 a=self.cook_test_set(X, **kwargs), 242 b=self.beta_, 243 backend=self.backend, 244 )
Ridge regression with 2 regularization parameters derived from class Ridge
Parameters:
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not
(False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
lambda1: float
regularization parameter on direct link
lambda2: float
regularization parameter on hidden layer
seed: int
reproducibility seed for nodes_sim=='uniform'
backend: str
'cpu' or 'gpu' or 'tpu'
Attributes:
beta_: {array-like}
regression coefficients
y_mean_: float
average response
126 def fit(self, X, y, **kwargs): 127 """Fit Ridge model to training data (X, y). 128 129 Args: 130 131 X: {array-like}, shape = [n_samples, n_features] 132 Training vectors, where n_samples is the number 133 of samples and n_features is the number of features. 134 135 y: array-like, shape = [n_samples] 136 Target values. 137 138 **kwargs: additional parameters to be passed to 139 self.cook_training_set or self.obj.fit 140 141 Returns: 142 143 self: object 144 145 """ 146 147 sys_platform = platform.system() 148 149 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 150 151 n_X, p_X = X.shape 152 n_Z, p_Z = scaled_Z.shape 153 154 if self.n_clusters > 0: 155 if self.encode_clusters == True: 156 n_features = p_X + self.n_clusters 157 else: 158 n_features = p_X + 1 159 else: 160 n_features = p_X 161 162 X_ = scaled_Z[:, 0:n_features] 163 Phi_X_ = scaled_Z[:, n_features:p_Z] 164 165 B = mo.crossprod(x=X_, backend=self.backend) + self.lambda1 * np.diag( 166 np.repeat(1, n_features) 167 ) 168 C = mo.crossprod(x=Phi_X_, y=X_, backend=self.backend) 169 D = mo.crossprod(x=Phi_X_, backend=self.backend) + self.lambda2 * np.diag( 170 np.repeat(1, Phi_X_.shape[1]) 171 ) 172 173 if sys_platform in ("Linux", "Darwin"): 174 B_inv = pinv(B) if self.backend == "cpu" else jpinv(B) 175 else: 176 B_inv = pinv(B) 177 178 W = mo.safe_sparse_dot(a=C, b=B_inv, backend=self.backend) 179 S_mat = D - mo.tcrossprod(x=W, y=C, backend=self.backend) 180 181 if sys_platform in ("Linux", "Darwin"): 182 S_inv = pinv(S_mat) if self.backend == "cpu" else jpinv(S_mat) 183 else: 184 S_inv = pinv(S_mat) 185 186 Y = mo.safe_sparse_dot(a=S_inv, b=W, backend=self.backend) 187 inv = mo.rbind( 188 mo.cbind( 189 x=B_inv + mo.crossprod(x=W, y=Y, backend=self.backend), 190 y=-np.transpose(Y), 191 backend=self.backend, 192 ), 193 mo.cbind(x=-Y, y=S_inv, backend=self.backend), 194 backend=self.backend, 195 ) 196 197 self.beta_ = mo.safe_sparse_dot( 198 a=inv, 199 b=mo.crossprod(x=scaled_Z, y=centered_y, backend=self.backend), 200 backend=self.backend, 201 ) 202 203 return self
Fit Ridge model to training data (X, y).
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
205 def predict(self, X, **kwargs): 206 """Predict test data X. 207 208 Args: 209 210 X: {array-like}, shape = [n_samples, n_features] 211 Training vectors, where n_samples is the number 212 of samples and n_features is the number of features. 213 214 **kwargs: additional parameters to be passed to 215 self.cook_test_set 216 217 Returns: 218 219 model predictions: {array-like} 220 221 """ 222 223 if len(X.shape) == 1: 224 n_features = X.shape[0] 225 new_X = mo.rbind( 226 x=X.reshape(1, n_features), 227 y=np.ones(n_features).reshape(1, n_features), 228 backend=self.backend, 229 ) 230 231 return ( 232 self.y_mean_ 233 + mo.safe_sparse_dot( 234 a=self.cook_test_set(new_X, **kwargs), 235 b=self.beta_, 236 backend=self.backend, 237 ) 238 )[0] 239 240 return self.y_mean_ + mo.safe_sparse_dot( 241 a=self.cook_test_set(X, **kwargs), 242 b=self.beta_, 243 backend=self.backend, 244 )
Predict test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
18class Ridge2Classifier(Ridge2, ClassifierMixin): 19 """Multinomial logit classification with 2 regularization parameters 20 21 Parameters: 22 23 n_hidden_features: int 24 number of nodes in the hidden layer 25 26 activation_name: str 27 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 28 29 a: float 30 hyperparameter for 'prelu' or 'elu' activation function 31 32 nodes_sim: str 33 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 34 'uniform' 35 36 bias: boolean 37 indicates if the hidden layer contains a bias term (True) or not 38 (False) 39 40 dropout: float 41 regularization parameter; (random) percentage of nodes dropped out 42 of the training 43 44 direct_link: boolean 45 indicates if the original predictors are included (True) in model's 46 fitting or not (False) 47 48 n_clusters: int 49 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: 50 no clustering) 51 52 cluster_encode: bool 53 defines how the variable containing clusters is treated (default is one-hot) 54 if `False`, then labels are used, without one-hot encoding 55 56 type_clust: str 57 type of clustering method: currently k-means ('kmeans') or Gaussian 58 Mixture Model ('gmm') 59 60 type_scaling: a tuple of 3 strings 61 scaling methods for inputs, hidden layer, and clustering respectively 62 (and when relevant). 63 Currently available: standardization ('std') or MinMax scaling ('minmax') 64 65 lambda1: float 66 regularization parameter on direct link 67 68 lambda2: float 69 regularization parameter on hidden layer 70 71 solver: str 72 optimization function "L-BFGS-B", "Newton-CG", 73 "trust-ncg", "L-BFGS-B-lstsq", "Newton-CG-lstsq", 74 "trust-ncg-lstsq" (see scipy.optimize.minimize) 75 When using "L-BFGS-B-lstsq", "Newton-CG-lstsq", or "trust-ncg-lstsq", 76 the initial value for the optimization is set to the least squares solution 77 78 seed: int 79 reproducibility seed for nodes_sim=='uniform' 80 81 backend: str 82 "cpu" or "gpu" or "tpu" 83 84 Attributes: 85 86 beta_: {array-like} 87 regression coefficients 88 89 classes_: {array-like} 90 unique classes in the target variable 91 92 minloglik_: float 93 minimum value of the negative log-likelihood 94 95 Examples: 96 97 See also [https://github.com/Techtonique/nnetsauce/blob/master/examples/ridge_classification.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/ridge_classification.py) 98 99 ```python 100 import nnetsauce as ns 101 import numpy as np 102 from sklearn.datasets import load_breast_cancer 103 from sklearn.model_selection import train_test_split 104 from time import time 105 106 107 breast_cancer = load_breast_cancer() 108 X = breast_cancer.data 109 y = breast_cancer.target 110 111 # split data into training test and test set 112 np.random.seed(123) 113 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) 114 115 # create the model with nnetsauce 116 fit_obj = ns.Ridge2Classifier(lambda1 = 6.90185578e+04, 117 lambda2 = 3.17392781e+02, 118 n_hidden_features=95, 119 n_clusters=2, 120 dropout = 3.62817383e-01, 121 type_clust = "gmm") 122 123 # fit the model on training set 124 start = time() 125 fit_obj.fit(X_train, y_train) 126 print(f"Elapsed {time() - start}") 127 128 # get the accuracy on test set 129 start = time() 130 print(fit_obj.score(X_test, y_test)) 131 print(f"Elapsed {time() - start}") 132 133 # get area under the curve on test set (auc) 134 print(fit_obj.score(X_test, y_test, scoring="roc_auc")) 135 ``` 136 137 138 """ 139 _estimator_type = "classifier" 140 141 # construct the object ----- 142 143 def __init__( 144 self, 145 n_hidden_features=5, 146 activation_name="relu", 147 a=0.01, 148 nodes_sim="sobol", 149 bias=True, 150 dropout=0, 151 direct_link=True, 152 n_clusters=2, 153 cluster_encode=True, 154 type_clust="kmeans", 155 type_scaling=("std", "std", "std"), 156 lambda1=0.1, 157 lambda2=0.1, 158 solver="L-BFGS-B", 159 seed=123, 160 backend="cpu", 161 ): 162 super().__init__( 163 n_hidden_features=n_hidden_features, 164 activation_name=activation_name, 165 a=a, 166 nodes_sim=nodes_sim, 167 bias=bias, 168 dropout=dropout, 169 direct_link=direct_link, 170 n_clusters=n_clusters, 171 cluster_encode=cluster_encode, 172 type_clust=type_clust, 173 type_scaling=type_scaling, 174 lambda1=lambda1, 175 lambda2=lambda2, 176 seed=seed, 177 backend=backend, 178 ) 179 180 self.type_fit = "classification" 181 self.solver = solver 182 self.beta_ = None 183 self.classes_ = None 184 self.minloglik_ = None 185 186 def loglik(self, X, Y, **kwargs): 187 """Log-likelihood for training data (X, Y). 188 189 Args: 190 191 X: {array-like}, shape = [n_samples, n_features] 192 Training vectors, where n_samples is the number 193 of samples and n_features is the number of features. 194 195 Y: array-like, shape = [n_samples] 196 One-hot encode target values. 197 198 **kwargs: additional parameters to be passed to 199 self.cook_training_set or self.obj.fit 200 201 Returns: 202 203 """ 204 205 def loglik_grad_hess(Y, X, B, XB, hessian=True, **kwargs): 206 # nobs, n_classes 207 n, K = Y.shape 208 209 # total number of covariates 210 p = X.shape[1] 211 212 # initial number of covariates 213 init_p = p - self.n_hidden_features 214 215 max_double = 709.0 216 XB[XB > max_double] = max_double 217 exp_XB = np.exp(XB) 218 probs = exp_XB / exp_XB.sum(axis=1)[:, None] 219 220 # gradient ----- 221 # (Y - p) -> (n, K) 222 # X -> (n, p) 223 # (K, n) %*% (n, p) -> (K, p) 224 if hessian is False: 225 grad = ( 226 -mo.safe_sparse_dot(a=(Y - probs).T, b=X, backend=self.backend) / n 227 ) 228 grad += self.lambda1 * B[0:init_p, :].sum(axis=0)[:, None] 229 grad += self.lambda2 * B[init_p:p, :].sum(axis=0)[:, None] 230 231 return grad.flatten() 232 233 # hessian ----- 234 if hessian is True: 235 Kp = K * p 236 hess = np.zeros((Kp, Kp), float) 237 for k1 in range(K): 238 x_index = range(k1 * p, (k1 + 1) * p) 239 for k2 in range(k1, K): 240 y_index = range(k2 * p, (k2 + 1) * p) 241 H_sub = ( 242 -mo.safe_sparse_dot( 243 a=X.T, 244 b=(probs[:, k1] * probs[:, k2])[:, None] * X, 245 backend=self.backend, 246 ) 247 / n 248 ) # do not store 249 hess[np.ix_(x_index, y_index)] = hess[ 250 np.ix_(y_index, x_index) 251 ] = H_sub 252 253 return hess + (self.lambda1 + self.lambda2) * np.identity(Kp) 254 255 # total number of covariates 256 p = X.shape[1] 257 258 # initial number of covariates 259 init_p = p - self.n_hidden_features 260 261 # log-likelihood (1st return) 262 def loglik_func(x): 263 # (p, K) 264 B = x.reshape(Y.shape[1], p).T 265 266 # (n, K) 267 XB = mo.safe_sparse_dot(X, B, backend=self.backend) 268 269 res = -(np.sum(Y * XB, axis=1) - logsumexp(XB)).mean() 270 271 res += ( 272 0.5 273 * self.lambda1 274 * mo.squared_norm(B[0:init_p, :], backend=self.backend) 275 ) 276 res += ( 277 0.5 278 * self.lambda2 279 * mo.squared_norm(B[init_p:p, :], backend=self.backend) 280 ) 281 282 return res 283 284 # gradient of log-likelihood 285 def grad_func(x): 286 # (p, K) 287 B = x.reshape(Y.shape[1], p).T 288 289 return loglik_grad_hess( 290 Y=Y, 291 X=X, 292 B=B, 293 XB=mo.safe_sparse_dot(X, B, backend=self.backend), 294 hessian=False, 295 **kwargs 296 ) 297 298 # hessian of log-likelihood 299 def hessian_func(x): 300 # (p, K) 301 B = x.reshape(Y.shape[1], p).T 302 303 return loglik_grad_hess( 304 Y=Y, 305 X=X, 306 B=B, 307 XB=mo.safe_sparse_dot(X, B, backend=self.backend), 308 hessian=True, 309 **kwargs 310 ) 311 312 return loglik_func, grad_func, hessian_func 313 314 # newton-cg 315 # L-BFGS-B 316 def fit(self, X, y, **kwargs): 317 """Fit Ridge model to training data (X, y). 318 319 for beta: regression coeffs (beta11, ..., beta1p, ..., betaK1, ..., betaKp) 320 for K classes and p covariates. 321 322 Args: 323 324 X: {array-like}, shape = [n_samples, n_features] 325 Training vectors, where n_samples is the number 326 of samples and n_features is the number of features. 327 328 y: array-like, shape = [n_samples] 329 Target values. 330 331 **kwargs: additional parameters to be passed to 332 self.cook_training_set or self.obj.fit 333 334 Returns: 335 336 self: object 337 338 """ 339 340 assert mx.is_factor(y), "y must contain only integers" 341 342 output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 343 344 self.n_classes = len(np.unique(y)) 345 self.classes_ = np.unique(y) # for compatibility with sklearn 346 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 347 348 Y = mo.one_hot_encode2(output_y, self.n_classes) 349 350 # optimize for beta, minimize self.loglik (maximize loglik) ----- 351 loglik_func, grad_func, hessian_func = self.loglik(X=scaled_Z, Y=Y) 352 353 if self.solver == "L-BFGS-B": 354 opt = minimize( 355 fun=loglik_func, 356 x0=np.zeros(scaled_Z.shape[1] * self.n_classes), 357 jac=grad_func, 358 method=self.solver, 359 ) 360 self.beta_ = opt.x 361 self.minloglik_ = opt.fun 362 363 if self.solver in ("Newton-CG", "trust-ncg"): 364 opt = minimize( 365 fun=loglik_func, 366 x0=np.zeros(scaled_Z.shape[1] * self.n_classes), 367 jac=grad_func, 368 hess=hessian_func, 369 method=self.solver, 370 ) 371 self.beta_ = opt.x 372 self.minloglik_ = opt.fun 373 374 if self.solver == "L-BFGS-B-lstsq": 375 opt = minimize( 376 fun=loglik_func, 377 x0=np.linalg.lstsq(scaled_Z, Y, rcond=None)[0].flatten(order="F"), 378 jac=grad_func, 379 method="L-BFGS-B", 380 ) 381 self.beta_ = opt.x 382 self.minloglik_ = opt.fun 383 384 if self.solver in "Newton-CG-lstsq": 385 opt = minimize( 386 fun=loglik_func, 387 x0=np.linalg.lstsq(scaled_Z, Y, rcond=None)[0].flatten(order="F"), 388 jac=grad_func, 389 hess=hessian_func, 390 method="Newton-CG", 391 ) 392 self.beta_ = opt.x 393 self.minloglik_ = opt.fun 394 395 if self.solver in "trust-ncg-lstsq": 396 opt = minimize( 397 fun=loglik_func, 398 x0=np.linalg.lstsq(scaled_Z, Y, rcond=None)[0].flatten(order="F"), 399 jac=grad_func, 400 hess=hessian_func, 401 method="trust-ncg", 402 ) 403 self.beta_ = opt.x 404 self.minloglik_ = opt.fun 405 406 self.classes_ = np.unique(y) 407 408 return self 409 410 def predict(self, X, **kwargs): 411 """Predict test data X. 412 413 Args: 414 415 X: {array-like}, shape = [n_samples, n_features] 416 Training vectors, where n_samples is the number 417 of samples and n_features is the number of features. 418 419 **kwargs: additional parameters to be passed to 420 self.cook_test_set 421 422 Returns: 423 424 model predictions: {array-like} 425 """ 426 427 return np.argmax(self.predict_proba(X, **kwargs), axis=1) 428 429 def predict_proba(self, X, **kwargs): 430 """Predict probabilities for test data X. 431 432 Args: 433 434 X: {array-like}, shape = [n_samples, n_features] 435 Training vectors, where n_samples is the number 436 of samples and n_features is the number of features. 437 438 **kwargs: additional parameters to be passed to 439 self.cook_test_set 440 441 Returns: 442 443 probability estimates for test data: {array-like} 444 445 """ 446 if len(X.shape) == 1: 447 n_features = X.shape[0] 448 new_X = mo.rbind( 449 X.reshape(1, n_features), 450 np.ones(n_features).reshape(1, n_features), 451 ) 452 453 Z = self.cook_test_set(new_X, **kwargs) 454 455 else: 456 Z = self.cook_test_set(X, **kwargs) 457 458 ZB = mo.safe_sparse_dot( 459 a=Z, 460 b=self.beta_.reshape( 461 self.n_classes, 462 X.shape[1] + self.n_hidden_features + self.n_clusters, 463 ).T, 464 backend=self.backend, 465 ) 466 467 exp_ZB = np.exp(ZB) 468 469 return exp_ZB / exp_ZB.sum(axis=1)[:, None] 470 471 @property 472 def _estimator_type(self): 473 return "classifier"
Multinomial logit classification with 2 regularization parameters
Parameters:
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not
(False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original predictors are included (True) in model's
fitting or not (False)
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
lambda1: float
regularization parameter on direct link
lambda2: float
regularization parameter on hidden layer
solver: str
optimization function "L-BFGS-B", "Newton-CG",
"trust-ncg", "L-BFGS-B-lstsq", "Newton-CG-lstsq",
"trust-ncg-lstsq" (see scipy.optimize.minimize)
When using "L-BFGS-B-lstsq", "Newton-CG-lstsq", or "trust-ncg-lstsq",
the initial value for the optimization is set to the least squares solution
seed: int
reproducibility seed for nodes_sim=='uniform'
backend: str
"cpu" or "gpu" or "tpu"
Attributes:
beta_: {array-like}
regression coefficients
classes_: {array-like}
unique classes in the target variable
minloglik_: float
minimum value of the negative log-likelihood
Examples:
See also https://github.com/Techtonique/nnetsauce/blob/master/examples/ridge_classification.py
import nnetsauce as ns
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from time import time
breast_cancer = load_breast_cancer()
X = breast_cancer.data
y = breast_cancer.target
# split data into training test and test set
np.random.seed(123)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
# create the model with nnetsauce
fit_obj = ns.Ridge2Classifier(lambda1 = 6.90185578e+04,
lambda2 = 3.17392781e+02,
n_hidden_features=95,
n_clusters=2,
dropout = 3.62817383e-01,
type_clust = "gmm")
# fit the model on training set
start = time()
fit_obj.fit(X_train, y_train)
print(f"Elapsed {time() - start}")
# get the accuracy on test set
start = time()
print(fit_obj.score(X_test, y_test))
print(f"Elapsed {time() - start}")
# get area under the curve on test set (auc)
print(fit_obj.score(X_test, y_test, scoring="roc_auc"))
316 def fit(self, X, y, **kwargs): 317 """Fit Ridge model to training data (X, y). 318 319 for beta: regression coeffs (beta11, ..., beta1p, ..., betaK1, ..., betaKp) 320 for K classes and p covariates. 321 322 Args: 323 324 X: {array-like}, shape = [n_samples, n_features] 325 Training vectors, where n_samples is the number 326 of samples and n_features is the number of features. 327 328 y: array-like, shape = [n_samples] 329 Target values. 330 331 **kwargs: additional parameters to be passed to 332 self.cook_training_set or self.obj.fit 333 334 Returns: 335 336 self: object 337 338 """ 339 340 assert mx.is_factor(y), "y must contain only integers" 341 342 output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 343 344 self.n_classes = len(np.unique(y)) 345 self.classes_ = np.unique(y) # for compatibility with sklearn 346 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 347 348 Y = mo.one_hot_encode2(output_y, self.n_classes) 349 350 # optimize for beta, minimize self.loglik (maximize loglik) ----- 351 loglik_func, grad_func, hessian_func = self.loglik(X=scaled_Z, Y=Y) 352 353 if self.solver == "L-BFGS-B": 354 opt = minimize( 355 fun=loglik_func, 356 x0=np.zeros(scaled_Z.shape[1] * self.n_classes), 357 jac=grad_func, 358 method=self.solver, 359 ) 360 self.beta_ = opt.x 361 self.minloglik_ = opt.fun 362 363 if self.solver in ("Newton-CG", "trust-ncg"): 364 opt = minimize( 365 fun=loglik_func, 366 x0=np.zeros(scaled_Z.shape[1] * self.n_classes), 367 jac=grad_func, 368 hess=hessian_func, 369 method=self.solver, 370 ) 371 self.beta_ = opt.x 372 self.minloglik_ = opt.fun 373 374 if self.solver == "L-BFGS-B-lstsq": 375 opt = minimize( 376 fun=loglik_func, 377 x0=np.linalg.lstsq(scaled_Z, Y, rcond=None)[0].flatten(order="F"), 378 jac=grad_func, 379 method="L-BFGS-B", 380 ) 381 self.beta_ = opt.x 382 self.minloglik_ = opt.fun 383 384 if self.solver in "Newton-CG-lstsq": 385 opt = minimize( 386 fun=loglik_func, 387 x0=np.linalg.lstsq(scaled_Z, Y, rcond=None)[0].flatten(order="F"), 388 jac=grad_func, 389 hess=hessian_func, 390 method="Newton-CG", 391 ) 392 self.beta_ = opt.x 393 self.minloglik_ = opt.fun 394 395 if self.solver in "trust-ncg-lstsq": 396 opt = minimize( 397 fun=loglik_func, 398 x0=np.linalg.lstsq(scaled_Z, Y, rcond=None)[0].flatten(order="F"), 399 jac=grad_func, 400 hess=hessian_func, 401 method="trust-ncg", 402 ) 403 self.beta_ = opt.x 404 self.minloglik_ = opt.fun 405 406 self.classes_ = np.unique(y) 407 408 return self
Fit Ridge model to training data (X, y).
for beta: regression coeffs (beta11, ..., beta1p, ..., betaK1, ..., betaKp) for K classes and p covariates.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
410 def predict(self, X, **kwargs): 411 """Predict test data X. 412 413 Args: 414 415 X: {array-like}, shape = [n_samples, n_features] 416 Training vectors, where n_samples is the number 417 of samples and n_features is the number of features. 418 419 **kwargs: additional parameters to be passed to 420 self.cook_test_set 421 422 Returns: 423 424 model predictions: {array-like} 425 """ 426 427 return np.argmax(self.predict_proba(X, **kwargs), axis=1)
Predict test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
429 def predict_proba(self, X, **kwargs): 430 """Predict probabilities for test data X. 431 432 Args: 433 434 X: {array-like}, shape = [n_samples, n_features] 435 Training vectors, where n_samples is the number 436 of samples and n_features is the number of features. 437 438 **kwargs: additional parameters to be passed to 439 self.cook_test_set 440 441 Returns: 442 443 probability estimates for test data: {array-like} 444 445 """ 446 if len(X.shape) == 1: 447 n_features = X.shape[0] 448 new_X = mo.rbind( 449 X.reshape(1, n_features), 450 np.ones(n_features).reshape(1, n_features), 451 ) 452 453 Z = self.cook_test_set(new_X, **kwargs) 454 455 else: 456 Z = self.cook_test_set(X, **kwargs) 457 458 ZB = mo.safe_sparse_dot( 459 a=Z, 460 b=self.beta_.reshape( 461 self.n_classes, 462 X.shape[1] + self.n_hidden_features + self.n_clusters, 463 ).T, 464 backend=self.backend, 465 ) 466 467 exp_ZB = np.exp(ZB) 468 469 return exp_ZB / exp_ZB.sum(axis=1)[:, None]
Predict probabilities for test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
probability estimates for test data: {array-like}
23class Ridge2MultitaskClassifier(Ridge2, ClassifierMixin): 24 """Multitask Ridge classification with 2 regularization parameters 25 26 Parameters: 27 28 n_hidden_features: int 29 number of nodes in the hidden layer 30 31 activation_name: str 32 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 33 34 a: float 35 hyperparameter for 'prelu' or 'elu' activation function 36 37 nodes_sim: str 38 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 39 'uniform' 40 41 bias: boolean 42 indicates if the hidden layer contains a bias term (True) or not 43 (False) 44 45 dropout: float 46 regularization parameter; (random) percentage of nodes dropped out 47 of the training 48 49 n_clusters: int 50 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: 51 no clustering) 52 53 cluster_encode: bool 54 defines how the variable containing clusters is treated (default is one-hot) 55 if `False`, then labels are used, without one-hot encoding 56 57 type_clust: str 58 type of clustering method: currently k-means ('kmeans') or Gaussian 59 Mixture Model ('gmm') 60 61 type_scaling: a tuple of 3 strings 62 scaling methods for inputs, hidden layer, and clustering respectively 63 (and when relevant). 64 Currently available: standardization ('std') or MinMax scaling ('minmax') 65 66 lambda1: float 67 regularization parameter on direct link 68 69 lambda2: float 70 regularization parameter on hidden layer 71 72 seed: int 73 reproducibility seed for nodes_sim=='uniform' 74 75 backend: str 76 "cpu" or "gpu" or "tpu" 77 78 Attributes: 79 80 beta_: {array-like} 81 regression coefficients 82 83 Examples: 84 85 See also [https://github.com/Techtonique/nnetsauce/blob/master/examples/ridgemtask_classification.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/ridgemtask_classification.py) 86 87 ```python 88 import nnetsauce as ns 89 import numpy as np 90 from sklearn.datasets import load_breast_cancer 91 from sklearn.model_selection import train_test_split 92 from sklearn import metrics 93 from time import time 94 95 breast_cancer = load_breast_cancer() 96 Z = breast_cancer.data 97 t = breast_cancer.target 98 np.random.seed(123) 99 X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2) 100 101 fit_obj = ns.Ridge2MultitaskClassifier(n_hidden_features=int(9.83730469e+01), 102 dropout=4.31054687e-01, 103 n_clusters=int(1.71484375e+00), 104 lambda1=1.24023438e+01, lambda2=7.30263672e+03) 105 106 start = time() 107 fit_obj.fit(X_train, y_train) 108 print(f"Elapsed {time() - start}") 109 110 print(fit_obj.score(X_test, y_test)) 111 print(fit_obj.score(X_test, y_test, scoring="roc_auc")) 112 113 start = time() 114 preds = fit_obj.predict(X_test) 115 print(f"Elapsed {time() - start}") 116 print(metrics.classification_report(preds, y_test)) 117 ``` 118 119 """ 120 121 # construct the object ----- 122 _estimator_type = "classifier" 123 124 def __init__( 125 self, 126 n_hidden_features=5, 127 activation_name="relu", 128 a=0.01, 129 nodes_sim="sobol", 130 bias=True, 131 dropout=0, 132 n_clusters=2, 133 cluster_encode=True, 134 type_clust="kmeans", 135 type_scaling=("std", "std", "std"), 136 lambda1=0.1, 137 lambda2=0.1, 138 seed=123, 139 backend="cpu", 140 ): 141 super().__init__( 142 n_hidden_features=n_hidden_features, 143 activation_name=activation_name, 144 a=a, 145 nodes_sim=nodes_sim, 146 bias=bias, 147 dropout=dropout, 148 n_clusters=n_clusters, 149 cluster_encode=cluster_encode, 150 type_clust=type_clust, 151 type_scaling=type_scaling, 152 lambda1=lambda1, 153 lambda2=lambda2, 154 seed=seed, 155 backend=backend, 156 ) 157 158 self.type_fit = "classification" 159 160 def fit(self, X, y, **kwargs): 161 """Fit Ridge model to training data (X, y). 162 163 Args: 164 165 X: {array-like}, shape = [n_samples, n_features] 166 Training vectors, where n_samples is the number 167 of samples and n_features is the number of features. 168 169 y: array-like, shape = [n_samples] 170 Target values. 171 172 **kwargs: additional parameters to be passed to 173 self.cook_training_set or self.obj.fit 174 175 Returns: 176 177 self: object 178 179 """ 180 181 sys_platform = platform.system() 182 183 assert mx.is_factor(y), "y must contain only integers" 184 185 self.classes_ = np.unique(y) # for compatibility with sklearn 186 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 187 188 output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 189 190 n_X, p_X = X.shape 191 n_Z, p_Z = scaled_Z.shape 192 193 self.n_classes = len(np.unique(y)) 194 195 # multitask response 196 Y = mo.one_hot_encode2(output_y, self.n_classes) 197 198 if self.n_clusters > 0: 199 if self.encode_clusters == True: 200 n_features = p_X + self.n_clusters 201 else: 202 n_features = p_X + 1 203 else: 204 n_features = p_X 205 206 X_ = scaled_Z[:, 0:n_features] 207 Phi_X_ = scaled_Z[:, n_features:p_Z] 208 209 B = mo.crossprod(x=X_, backend=self.backend) + self.lambda1 * np.diag( 210 np.repeat(1, X_.shape[1]) 211 ) 212 C = mo.crossprod(x=Phi_X_, y=X_, backend=self.backend) 213 D = mo.crossprod(x=Phi_X_, backend=self.backend) + self.lambda2 * np.diag( 214 np.repeat(1, Phi_X_.shape[1]) 215 ) 216 217 if sys_platform in ("Linux", "Darwin"): 218 B_inv = pinv(B) if self.backend == "cpu" else jpinv(B) 219 else: 220 B_inv = pinv(B) 221 222 W = mo.safe_sparse_dot(a=C, b=B_inv, backend=self.backend) 223 S_mat = D - mo.tcrossprod(x=W, y=C, backend=self.backend) 224 225 if sys_platform in ("Linux", "Darwin"): 226 S_inv = pinv(S_mat) if self.backend == "cpu" else jpinv(S_mat) 227 else: 228 S_inv = pinv(S_mat) 229 230 Y2 = mo.safe_sparse_dot(a=S_inv, b=W, backend=self.backend) 231 inv = mo.rbind( 232 mo.cbind( 233 x=B_inv + mo.crossprod(x=W, y=Y2, backend=self.backend), 234 y=-np.transpose(Y2), 235 backend=self.backend, 236 ), 237 mo.cbind(x=-Y2, y=S_inv, backend=self.backend), 238 backend=self.backend, 239 ) 240 241 self.beta_ = mo.safe_sparse_dot( 242 a=inv, 243 b=mo.crossprod(x=scaled_Z, y=Y, backend=self.backend), 244 backend=self.backend, 245 ) 246 self.classes_ = np.unique(y) 247 return self 248 249 def predict(self, X, **kwargs): 250 """Predict test data X. 251 252 Args: 253 254 X: {array-like}, shape = [n_samples, n_features] 255 Training vectors, where n_samples is the number 256 of samples and n_features is the number of features. 257 258 **kwargs: additional parameters to be passed to 259 self.cook_test_set 260 261 Returns: 262 263 model predictions: {array-like} 264 265 """ 266 267 return np.argmax(self.predict_proba(X, **kwargs), axis=1) 268 269 def predict_proba(self, X, **kwargs): 270 """Predict probabilities for test data X. 271 272 Args: 273 274 X: {array-like}, shape = [n_samples, n_features] 275 Training vectors, where n_samples is the number 276 of samples and n_features is the number of features. 277 278 **kwargs: additional parameters to be passed to 279 self.cook_test_set 280 281 Returns: 282 283 probability estimates for test data: {array-like} 284 285 """ 286 287 if len(X.shape) == 1: 288 n_features = X.shape[0] 289 new_X = mo.rbind( 290 x=X.reshape(1, n_features), 291 y=np.ones(n_features).reshape(1, n_features), 292 backend=self.backend, 293 ) 294 295 Z = self.cook_test_set(new_X, **kwargs) 296 297 else: 298 Z = self.cook_test_set(X, **kwargs) 299 300 ZB = mo.safe_sparse_dot(a=Z, b=self.beta_, backend=self.backend) 301 302 exp_ZB = np.exp(ZB) 303 304 return exp_ZB / exp_ZB.sum(axis=1)[:, None] 305 306 def score(self, X, y, scoring=None): 307 """Scoring function for classification. 308 309 Args: 310 311 X: {array-like}, shape = [n_samples, n_features] 312 Training vectors, where n_samples is the number 313 of samples and n_features is the number of features. 314 315 y: array-like, shape = [n_samples] 316 Target values. 317 318 scoring: str 319 scoring method (default is accuracy) 320 321 Returns: 322 323 score: float 324 """ 325 326 if scoring is None: 327 scoring = "accuracy" 328 329 if scoring == "accuracy": 330 return skm2.accuracy_score(y, self.predict(X)) 331 332 if scoring == "f1": 333 return skm2.f1_score(y, self.predict(X)) 334 335 if scoring == "precision": 336 return skm2.precision_score(y, self.predict(X)) 337 338 if scoring == "recall": 339 return skm2.recall_score(y, self.predict(X)) 340 341 if scoring == "roc_auc": 342 return skm2.roc_auc_score(y, self.predict(X)) 343 344 if scoring == "log_loss": 345 return skm2.log_loss(y, self.predict_proba(X)) 346 347 if scoring == "balanced_accuracy": 348 return skm2.balanced_accuracy_score(y, self.predict(X)) 349 350 if scoring == "average_precision": 351 return skm2.average_precision_score(y, self.predict(X)) 352 353 if scoring == "neg_brier_score": 354 return -skm2.brier_score_loss(y, self.predict_proba(X)) 355 356 if scoring == "neg_log_loss": 357 return -skm2.log_loss(y, self.predict_proba(X)) 358 359 @property 360 def _estimator_type(self): 361 return "classifier"
Multitask Ridge classification with 2 regularization parameters
Parameters:
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not
(False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
lambda1: float
regularization parameter on direct link
lambda2: float
regularization parameter on hidden layer
seed: int
reproducibility seed for nodes_sim=='uniform'
backend: str
"cpu" or "gpu" or "tpu"
Attributes:
beta_: {array-like}
regression coefficients
Examples:
See also https://github.com/Techtonique/nnetsauce/blob/master/examples/ridgemtask_classification.py
import nnetsauce as ns
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn import metrics
from time import time
breast_cancer = load_breast_cancer()
Z = breast_cancer.data
t = breast_cancer.target
np.random.seed(123)
X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2)
fit_obj = ns.Ridge2MultitaskClassifier(n_hidden_features=int(9.83730469e+01),
dropout=4.31054687e-01,
n_clusters=int(1.71484375e+00),
lambda1=1.24023438e+01, lambda2=7.30263672e+03)
start = time()
fit_obj.fit(X_train, y_train)
print(f"Elapsed {time() - start}")
print(fit_obj.score(X_test, y_test))
print(fit_obj.score(X_test, y_test, scoring="roc_auc"))
start = time()
preds = fit_obj.predict(X_test)
print(f"Elapsed {time() - start}")
print(metrics.classification_report(preds, y_test))
160 def fit(self, X, y, **kwargs): 161 """Fit Ridge model to training data (X, y). 162 163 Args: 164 165 X: {array-like}, shape = [n_samples, n_features] 166 Training vectors, where n_samples is the number 167 of samples and n_features is the number of features. 168 169 y: array-like, shape = [n_samples] 170 Target values. 171 172 **kwargs: additional parameters to be passed to 173 self.cook_training_set or self.obj.fit 174 175 Returns: 176 177 self: object 178 179 """ 180 181 sys_platform = platform.system() 182 183 assert mx.is_factor(y), "y must contain only integers" 184 185 self.classes_ = np.unique(y) # for compatibility with sklearn 186 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 187 188 output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 189 190 n_X, p_X = X.shape 191 n_Z, p_Z = scaled_Z.shape 192 193 self.n_classes = len(np.unique(y)) 194 195 # multitask response 196 Y = mo.one_hot_encode2(output_y, self.n_classes) 197 198 if self.n_clusters > 0: 199 if self.encode_clusters == True: 200 n_features = p_X + self.n_clusters 201 else: 202 n_features = p_X + 1 203 else: 204 n_features = p_X 205 206 X_ = scaled_Z[:, 0:n_features] 207 Phi_X_ = scaled_Z[:, n_features:p_Z] 208 209 B = mo.crossprod(x=X_, backend=self.backend) + self.lambda1 * np.diag( 210 np.repeat(1, X_.shape[1]) 211 ) 212 C = mo.crossprod(x=Phi_X_, y=X_, backend=self.backend) 213 D = mo.crossprod(x=Phi_X_, backend=self.backend) + self.lambda2 * np.diag( 214 np.repeat(1, Phi_X_.shape[1]) 215 ) 216 217 if sys_platform in ("Linux", "Darwin"): 218 B_inv = pinv(B) if self.backend == "cpu" else jpinv(B) 219 else: 220 B_inv = pinv(B) 221 222 W = mo.safe_sparse_dot(a=C, b=B_inv, backend=self.backend) 223 S_mat = D - mo.tcrossprod(x=W, y=C, backend=self.backend) 224 225 if sys_platform in ("Linux", "Darwin"): 226 S_inv = pinv(S_mat) if self.backend == "cpu" else jpinv(S_mat) 227 else: 228 S_inv = pinv(S_mat) 229 230 Y2 = mo.safe_sparse_dot(a=S_inv, b=W, backend=self.backend) 231 inv = mo.rbind( 232 mo.cbind( 233 x=B_inv + mo.crossprod(x=W, y=Y2, backend=self.backend), 234 y=-np.transpose(Y2), 235 backend=self.backend, 236 ), 237 mo.cbind(x=-Y2, y=S_inv, backend=self.backend), 238 backend=self.backend, 239 ) 240 241 self.beta_ = mo.safe_sparse_dot( 242 a=inv, 243 b=mo.crossprod(x=scaled_Z, y=Y, backend=self.backend), 244 backend=self.backend, 245 ) 246 self.classes_ = np.unique(y) 247 return self
Fit Ridge model to training data (X, y).
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
249 def predict(self, X, **kwargs): 250 """Predict test data X. 251 252 Args: 253 254 X: {array-like}, shape = [n_samples, n_features] 255 Training vectors, where n_samples is the number 256 of samples and n_features is the number of features. 257 258 **kwargs: additional parameters to be passed to 259 self.cook_test_set 260 261 Returns: 262 263 model predictions: {array-like} 264 265 """ 266 267 return np.argmax(self.predict_proba(X, **kwargs), axis=1)
Predict test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
269 def predict_proba(self, X, **kwargs): 270 """Predict probabilities for test data X. 271 272 Args: 273 274 X: {array-like}, shape = [n_samples, n_features] 275 Training vectors, where n_samples is the number 276 of samples and n_features is the number of features. 277 278 **kwargs: additional parameters to be passed to 279 self.cook_test_set 280 281 Returns: 282 283 probability estimates for test data: {array-like} 284 285 """ 286 287 if len(X.shape) == 1: 288 n_features = X.shape[0] 289 new_X = mo.rbind( 290 x=X.reshape(1, n_features), 291 y=np.ones(n_features).reshape(1, n_features), 292 backend=self.backend, 293 ) 294 295 Z = self.cook_test_set(new_X, **kwargs) 296 297 else: 298 Z = self.cook_test_set(X, **kwargs) 299 300 ZB = mo.safe_sparse_dot(a=Z, b=self.beta_, backend=self.backend) 301 302 exp_ZB = np.exp(ZB) 303 304 return exp_ZB / exp_ZB.sum(axis=1)[:, None]
Predict probabilities for test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
probability estimates for test data: {array-like}
306 def score(self, X, y, scoring=None): 307 """Scoring function for classification. 308 309 Args: 310 311 X: {array-like}, shape = [n_samples, n_features] 312 Training vectors, where n_samples is the number 313 of samples and n_features is the number of features. 314 315 y: array-like, shape = [n_samples] 316 Target values. 317 318 scoring: str 319 scoring method (default is accuracy) 320 321 Returns: 322 323 score: float 324 """ 325 326 if scoring is None: 327 scoring = "accuracy" 328 329 if scoring == "accuracy": 330 return skm2.accuracy_score(y, self.predict(X)) 331 332 if scoring == "f1": 333 return skm2.f1_score(y, self.predict(X)) 334 335 if scoring == "precision": 336 return skm2.precision_score(y, self.predict(X)) 337 338 if scoring == "recall": 339 return skm2.recall_score(y, self.predict(X)) 340 341 if scoring == "roc_auc": 342 return skm2.roc_auc_score(y, self.predict(X)) 343 344 if scoring == "log_loss": 345 return skm2.log_loss(y, self.predict_proba(X)) 346 347 if scoring == "balanced_accuracy": 348 return skm2.balanced_accuracy_score(y, self.predict(X)) 349 350 if scoring == "average_precision": 351 return skm2.average_precision_score(y, self.predict(X)) 352 353 if scoring == "neg_brier_score": 354 return -skm2.brier_score_loss(y, self.predict_proba(X)) 355 356 if scoring == "neg_log_loss": 357 return -skm2.log_loss(y, self.predict_proba(X))
Scoring function for classification.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
scoring: str
scoring method (default is accuracy)
Returns:
score: float
6class SubSampler: 7 """Subsampling class. 8 9 Attributes: 10 11 y: array-like, shape = [n_samples] 12 Target values. 13 14 row_sample: double 15 subsampling fraction 16 17 n_samples: int 18 subsampling by using the number of rows (supersedes row_sample) 19 20 seed: int 21 reproductibility seed 22 23 n_jobs: int 24 number of jobs to run in parallel 25 26 verbose: bool 27 print progress messages and bars 28 """ 29 30 def __init__( 31 self, 32 y, 33 row_sample=0.8, 34 n_samples=None, 35 seed=123, 36 n_jobs=None, 37 verbose=False, 38 ): 39 self.y = y 40 self.n_samples = n_samples 41 if self.n_samples is None: 42 assert ( 43 row_sample < 1 and row_sample >= 0 44 ), "'row_sample' must be provided, plus < 1 and >= 0" 45 self.row_sample = row_sample 46 else: 47 assert self.n_samples < len(y), "'n_samples' must be < len(y)" 48 self.row_sample = self.n_samples / len(y) 49 self.seed = seed 50 self.indices = None 51 self.n_jobs = n_jobs 52 self.verbose = verbose 53 54 def subsample(self): 55 """Returns indices of subsampled input data. 56 57 Examples: 58 59 <ul> 60 <li> <a href="https://github.com/Techtonique/nnetsauce/blob/master/nnetsauce/demo/thierrymoudiki_20240105_subsampling.ipynb">20240105_subsampling.ipynb</a> </li> 61 <li> <a href="https://github.com/Techtonique/nnetsauce/blob/master/nnetsauce/demo/thierrymoudiki_20240131_subsampling_nsamples.ipynb">20240131_subsampling_nsamples.ipynb</a> </li> 62 </ul> 63 64 """ 65 self.indices = dosubsample( 66 y=self.y, 67 row_sample=self.row_sample, 68 seed=self.seed, 69 n_jobs=self.n_jobs, 70 verbose=self.verbose, 71 ) 72 return self.indices
Subsampling class.
Attributes:
y: array-like, shape = [n_samples] Target values.
row_sample: double subsampling fraction
n_samples: int subsampling by using the number of rows (supersedes row_sample)
seed: int reproductibility seed
n_jobs: int number of jobs to run in parallel
verbose: bool print progress messages and bars
54 def subsample(self): 55 """Returns indices of subsampled input data. 56 57 Examples: 58 59 <ul> 60 <li> <a href="https://github.com/Techtonique/nnetsauce/blob/master/nnetsauce/demo/thierrymoudiki_20240105_subsampling.ipynb">20240105_subsampling.ipynb</a> </li> 61 <li> <a href="https://github.com/Techtonique/nnetsauce/blob/master/nnetsauce/demo/thierrymoudiki_20240131_subsampling_nsamples.ipynb">20240131_subsampling_nsamples.ipynb</a> </li> 62 </ul> 63 64 """ 65 self.indices = dosubsample( 66 y=self.y, 67 row_sample=self.row_sample, 68 seed=self.seed, 69 n_jobs=self.n_jobs, 70 verbose=self.verbose, 71 ) 72 return self.indices
Returns indices of subsampled input data.
Examples: