nnetsauce
1from .base.base import Base 2from .base.baseRegressor import BaseRegressor 3from .boosting.adaBoostClassifier import AdaBoostClassifier 4from .custom.customClassifier import CustomClassifier 5from .custom.customRegressor import CustomRegressor 6from .datasets import Downloader 7from .deep.deepClassifier import DeepClassifier 8from .deep.deepRegressor import DeepRegressor 9from .deep.deepMTS import DeepMTS 10from .glm.glmClassifier import GLMClassifier 11from .glm.glmRegressor import GLMRegressor 12from .kernel.kernel import KernelRidge 13from .lazypredict.lazydeepClassifier import LazyDeepClassifier, LazyClassifier 14from .lazypredict.lazydeepRegressor import LazyDeepRegressor, LazyRegressor 15from .lazypredict.lazydeepClassifier import LazyDeepClassifier 16from .lazypredict.lazydeepRegressor import LazyDeepRegressor 17from .lazypredict.lazydeepMTS import LazyDeepMTS, LazyMTS 18from .mts.mts import MTS 19from .mts.mlarch import MLARCH 20from .mts.classical import ClassicalMTS 21from .multitask.multitaskClassifier import MultitaskClassifier 22from .multitask.simplemultitaskClassifier import SimpleMultitaskClassifier 23from .neuralnet.neuralnetregression import NeuralNetRegressor 24from .neuralnet.neuralnetclassification import NeuralNetClassifier 25from .optimizers.optimizer import Optimizer 26from .predictioninterval import PredictionInterval 27from .quantile.quantileregression import QuantileRegressor 28from .quantile.quantileclassification import QuantileClassifier 29from .randombag.randomBagClassifier import RandomBagClassifier 30from .randombag.randomBagRegressor import RandomBagRegressor 31from .ridge.ridge import RidgeRegressor 32from .ridge2.ridge2Classifier import Ridge2Classifier 33from .ridge2.ridge2Regressor import Ridge2Regressor 34from .ridge2.ridge2MultitaskClassifier import Ridge2MultitaskClassifier 35from .rvfl.bayesianrvflRegressor import BayesianRVFLRegressor 36from .rvfl.bayesianrvfl2Regressor import BayesianRVFL2Regressor 37from .sampling import SubSampler 38from .updater import RegressorUpdater, ClassifierUpdater 39from .votingregressor import MedianVotingRegressor 40 41__all__ = [ 42 "AdaBoostClassifier", 43 "Base", 44 "BaseRegressor", 45 "BayesianRVFLRegressor", 46 "BayesianRVFL2Regressor", 47 "ClassicalMTS", 48 "CustomClassifier", 49 "CustomRegressor", 50 "DeepClassifier", 51 "DeepRegressor", 52 "DeepMTS", 53 "Downloader", 54 "GLMClassifier", 55 "GLMRegressor", 56 "KernelRidge", 57 "LazyClassifier", 58 "LazyRegressor", 59 "LazyDeepClassifier", 60 "LazyDeepRegressor", 61 "LazyMTS", 62 "LazyDeepMTS", 63 "MLARCH", 64 "MedianVotingRegressor", 65 "MTS", 66 "MultitaskClassifier", 67 "NeuralNetRegressor", 68 "NeuralNetClassifier", 69 "PredictionInterval", 70 "SimpleMultitaskClassifier", 71 "Optimizer", 72 "QuantileRegressor", 73 "QuantileClassifier", 74 "RandomBagRegressor", 75 "RandomBagClassifier", 76 "RegressorUpdater", 77 "ClassifierUpdater", 78 "RidgeRegressor", 79 "Ridge2Regressor", 80 "Ridge2Classifier", 81 "Ridge2MultitaskClassifier", 82 "SubSampler", 83]
21class AdaBoostClassifier(Boosting, ClassifierMixin): 22 """AdaBoost Classification (SAMME) model class derived from class Boosting 23 24 Parameters: 25 26 obj: object 27 any object containing a method fit (obj.fit()) and a method predict 28 (obj.predict()) 29 30 n_estimators: int 31 number of boosting iterations 32 33 learning_rate: float 34 learning rate of the boosting procedure 35 36 n_hidden_features: int 37 number of nodes in the hidden layer 38 39 reg_lambda: float 40 regularization parameter for weights 41 42 reg_alpha: float 43 controls compromize between l1 and l2 norm of weights 44 45 activation_name: str 46 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 47 48 a: float 49 hyperparameter for 'prelu' or 'elu' activation function 50 51 nodes_sim: str 52 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 53 'uniform' 54 55 bias: boolean 56 indicates if the hidden layer contains a bias term (True) or not 57 (False) 58 59 dropout: float 60 regularization parameter; (random) percentage of nodes dropped out 61 of the training 62 63 direct_link: boolean 64 indicates if the original predictors are included (True) in model's 65 fitting or not (False) 66 67 n_clusters: int 68 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: 69 no clustering) 70 71 cluster_encode: bool 72 defines how the variable containing clusters is treated (default is one-hot) 73 if `False`, then labels are used, without one-hot encoding 74 75 type_clust: str 76 type of clustering method: currently k-means ('kmeans') or Gaussian 77 Mixture Model ('gmm') 78 79 type_scaling: a tuple of 3 strings 80 scaling methods for inputs, hidden layer, and clustering respectively 81 (and when relevant). 82 Currently available: standardization ('std') or MinMax scaling ('minmax') 83 84 col_sample: float 85 percentage of covariates randomly chosen for training 86 87 row_sample: float 88 percentage of rows chosen for training, by stratified bootstrapping 89 90 seed: int 91 reproducibility seed for nodes_sim=='uniform' 92 93 verbose: int 94 0 for no output, 1 for a progress bar (default is 1) 95 96 method: str 97 type of Adaboost method, 'SAMME' (discrete) or 'SAMME.R' (real) 98 99 backend: str 100 "cpu" or "gpu" or "tpu" 101 102 Attributes: 103 104 alpha_: list 105 AdaBoost coefficients alpha_m 106 107 base_learners_: dict 108 a dictionary containing the base learners 109 110 Examples: 111 112 See also [https://github.com/Techtonique/nnetsauce/blob/master/examples/adaboost_classification.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/adaboost_classification.py) 113 114 ```python 115 import nnetsauce as ns 116 import numpy as np 117 from sklearn.datasets import load_breast_cancer 118 from sklearn.linear_model import LogisticRegression 119 from sklearn.model_selection import train_test_split 120 from sklearn import metrics 121 from time import time 122 123 breast_cancer = load_breast_cancer() 124 Z = breast_cancer.data 125 t = breast_cancer.target 126 np.random.seed(123) 127 X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2) 128 129 # SAMME.R 130 clf = LogisticRegression(solver='liblinear', multi_class = 'ovr', 131 random_state=123) 132 fit_obj = ns.AdaBoostClassifier(clf, 133 n_hidden_features=int(11.22338867), 134 direct_link=True, 135 n_estimators=250, learning_rate=0.01126343, 136 col_sample=0.72684326, row_sample=0.86429443, 137 dropout=0.63078613, n_clusters=2, 138 type_clust="gmm", 139 verbose=1, seed = 123, 140 method="SAMME.R") 141 142 start = time() 143 fit_obj.fit(X_train, y_train) 144 print(f"Elapsed {time() - start}") 145 146 start = time() 147 print(fit_obj.score(X_test, y_test)) 148 print(f"Elapsed {time() - start}") 149 150 preds = fit_obj.predict(X_test) 151 152 print(metrics.classification_report(preds, y_test)) 153 154 ``` 155 156 """ 157 158 # construct the object ----- 159 160 def __init__( 161 self, 162 obj, 163 n_estimators=10, 164 learning_rate=0.1, 165 n_hidden_features=1, 166 reg_lambda=0, 167 reg_alpha=0.5, 168 activation_name="relu", 169 a=0.01, 170 nodes_sim="sobol", 171 bias=True, 172 dropout=0, 173 direct_link=False, 174 n_clusters=2, 175 cluster_encode=True, 176 type_clust="kmeans", 177 type_scaling=("std", "std", "std"), 178 col_sample=1, 179 row_sample=1, 180 seed=123, 181 verbose=1, 182 method="SAMME", 183 backend="cpu", 184 ): 185 self.type_fit = "classification" 186 self.verbose = verbose 187 self.method = method 188 self.reg_lambda = reg_lambda 189 self.reg_alpha = reg_alpha 190 191 super().__init__( 192 obj=obj, 193 n_estimators=n_estimators, 194 learning_rate=learning_rate, 195 n_hidden_features=n_hidden_features, 196 activation_name=activation_name, 197 a=a, 198 nodes_sim=nodes_sim, 199 bias=bias, 200 dropout=dropout, 201 direct_link=direct_link, 202 n_clusters=n_clusters, 203 cluster_encode=cluster_encode, 204 type_clust=type_clust, 205 type_scaling=type_scaling, 206 col_sample=col_sample, 207 row_sample=row_sample, 208 seed=seed, 209 backend=backend, 210 ) 211 212 self.alpha_ = [] 213 self.base_learners_ = dict.fromkeys(range(n_estimators)) 214 215 def fit(self, X, y, sample_weight=None, **kwargs): 216 """Fit Boosting model to training data (X, y). 217 218 Parameters: 219 220 X: {array-like}, shape = [n_samples, n_features] 221 Training vectors, where n_samples is the number 222 of samples and n_features is the number of features. 223 224 y: array-like, shape = [n_samples] 225 Target values. 226 227 **kwargs: additional parameters to be passed to 228 self.cook_training_set or self.obj.fit 229 230 Returns: 231 232 self: object 233 """ 234 235 assert mx.is_factor(y), "y must contain only integers" 236 237 assert self.method in ( 238 "SAMME", 239 "SAMME.R", 240 ), "`method` must be either 'SAMME' or 'SAMME.R'" 241 242 assert (self.reg_lambda <= 1) & ( 243 self.reg_lambda >= 0 244 ), "must have self.reg_lambda <= 1 & self.reg_lambda >= 0" 245 246 assert (self.reg_alpha <= 1) & ( 247 self.reg_alpha >= 0 248 ), "must have self.reg_alpha <= 1 & self.reg_alpha >= 0" 249 250 # training 251 n, p = X.shape 252 self.n_classes = len(np.unique(y)) 253 self.classes_ = np.unique(y) # for compatibility with sklearn 254 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 255 256 if sample_weight is None: 257 w_m = np.repeat(1.0 / n, n) 258 else: 259 w_m = np.asarray(sample_weight) 260 261 base_learner = CustomClassifier( 262 self.obj, 263 n_hidden_features=self.n_hidden_features, 264 activation_name=self.activation_name, 265 a=self.a, 266 nodes_sim=self.nodes_sim, 267 bias=self.bias, 268 dropout=self.dropout, 269 direct_link=self.direct_link, 270 n_clusters=self.n_clusters, 271 type_clust=self.type_clust, 272 type_scaling=self.type_scaling, 273 col_sample=self.col_sample, 274 row_sample=self.row_sample, 275 seed=self.seed, 276 ) 277 278 if self.verbose == 1: 279 pbar = Progbar(self.n_estimators) 280 281 if self.method == "SAMME": 282 err_m = 1e6 283 err_bound = 1 - 1 / self.n_classes 284 self.alpha_.append(1.0) 285 x_range_n = range(n) 286 287 for m in range(self.n_estimators): 288 preds = base_learner.fit( 289 X, y, sample_weight=w_m.ravel(), **kwargs 290 ).predict(X) 291 292 self.base_learners_.update({m: deepcopy(base_learner)}) 293 294 cond = [y[i] != preds[i] for i in x_range_n] 295 296 err_m = max( 297 sum([elt[0] * elt[1] for elt in zip(cond, w_m)]), 298 2.220446049250313e-16, 299 ) # sum(w_m) == 1 300 301 if self.reg_lambda > 0: 302 err_m += self.reg_lambda * ( 303 (1 - self.reg_alpha) * 0.5 * sum([x**2 for x in w_m]) 304 + self.reg_alpha * sum([abs(x) for x in w_m]) 305 ) 306 307 err_m = min(err_m, err_bound) 308 309 alpha_m = self.learning_rate * log( 310 (self.n_classes - 1) * (1 - err_m) / err_m 311 ) 312 313 self.alpha_.append(alpha_m) 314 315 w_m_temp = [exp(alpha_m * cond[i]) for i in x_range_n] 316 317 sum_w_m = sum(w_m_temp) 318 319 w_m = np.asarray([w_m_temp[i] / sum_w_m for i in x_range_n]) 320 321 base_learner.set_params(seed=self.seed + (m + 1) * 1000) 322 323 if self.verbose == 1: 324 pbar.update(m) 325 326 if self.verbose == 1: 327 pbar.update(self.n_estimators) 328 329 self.n_estimators = len(self.base_learners_) 330 self.classes_ = np.unique(y) 331 332 return self 333 334 if self.method == "SAMME.R": 335 Y = mo.one_hot_encode2(y, self.n_classes) 336 337 if sample_weight is None: 338 w_m = np.repeat(1.0 / n, n) # (N, 1) 339 340 else: 341 w_m = np.asarray(sample_weight) 342 343 for m in range(self.n_estimators): 344 probs = base_learner.fit( 345 X, y, sample_weight=w_m.ravel(), **kwargs 346 ).predict_proba(X) 347 348 np.clip(a=probs, a_min=2.220446049250313e-16, a_max=1.0, out=probs) 349 350 self.base_learners_.update({m: deepcopy(base_learner)}) 351 352 w_m *= np.exp( 353 -1.0 354 * self.learning_rate 355 * (1.0 - 1.0 / self.n_classes) 356 * xlogy(Y, probs).sum(axis=1) 357 ) 358 359 w_m /= np.sum(w_m) 360 361 base_learner.set_params(seed=self.seed + (m + 1) * 1000) 362 363 if self.verbose == 1: 364 pbar.update(m) 365 366 if self.verbose == 1: 367 pbar.update(self.n_estimators) 368 369 self.n_estimators = len(self.base_learners_) 370 self.classes_ = np.unique(y) 371 372 return self 373 374 def predict(self, X, **kwargs): 375 """Predict test data X. 376 377 Parameters: 378 379 X: {array-like}, shape = [n_samples, n_features] 380 Training vectors, where n_samples is the number 381 of samples and n_features is the number of features. 382 383 **kwargs: additional parameters to be passed to 384 self.cook_test_set 385 386 Returns: 387 388 model predictions: {array-like} 389 """ 390 return self.predict_proba(X, **kwargs).argmax(axis=1) 391 392 def predict_proba(self, X, **kwargs): 393 """Predict probabilities for test data X. 394 395 Parameters: 396 397 X: {array-like}, shape = [n_samples, n_features] 398 Training vectors, where n_samples is the number 399 of samples and n_features is the number of features. 400 401 **kwargs: additional parameters to be passed to 402 self.cook_test_set 403 404 Returns: 405 406 probability estimates for test data: {array-like} 407 408 """ 409 410 n_iter = len(self.base_learners_) 411 412 if self.method == "SAMME": 413 ensemble_learner = np.zeros((X.shape[0], self.n_classes)) 414 415 # if self.verbose == 1: 416 # pbar = Progbar(n_iter) 417 418 for idx, base_learner in self.base_learners_.items(): 419 preds = base_learner.predict(X, **kwargs) 420 421 ensemble_learner += self.alpha_[idx] * mo.one_hot_encode2( 422 preds, self.n_classes 423 ) 424 425 # if self.verbose == 1: 426 # pbar.update(idx) 427 428 # if self.verbose == 1: 429 # pbar.update(n_iter) 430 431 expit_ensemble_learner = expit(ensemble_learner) 432 433 sum_ensemble = expit_ensemble_learner.sum(axis=1) 434 435 return expit_ensemble_learner / sum_ensemble[:, None] 436 437 # if self.method == "SAMME.R": 438 ensemble_learner = 0 439 440 # if self.verbose == 1: 441 # pbar = Progbar(n_iter) 442 443 for idx, base_learner in self.base_learners_.items(): 444 probs = base_learner.predict_proba(X, **kwargs) 445 446 np.clip(a=probs, a_min=2.220446049250313e-16, a_max=1.0, out=probs) 447 448 log_preds_proba = np.log(probs) 449 450 ensemble_learner += log_preds_proba - log_preds_proba.mean(axis=1)[:, None] 451 452 # if self.verbose == 1: 453 # pbar.update(idx) 454 455 ensemble_learner *= self.n_classes - 1 456 457 # if self.verbose == 1: 458 # pbar.update(n_iter) 459 460 expit_ensemble_learner = expit(ensemble_learner) 461 462 sum_ensemble = expit_ensemble_learner.sum(axis=1) 463 464 return expit_ensemble_learner / sum_ensemble[:, None]
AdaBoost Classification (SAMME) model class derived from class Boosting
Parameters:
obj: object
any object containing a method fit (obj.fit()) and a method predict
(obj.predict())
n_estimators: int
number of boosting iterations
learning_rate: float
learning rate of the boosting procedure
n_hidden_features: int
number of nodes in the hidden layer
reg_lambda: float
regularization parameter for weights
reg_alpha: float
controls compromize between l1 and l2 norm of weights
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not
(False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original predictors are included (True) in model's
fitting or not (False)
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
col_sample: float
percentage of covariates randomly chosen for training
row_sample: float
percentage of rows chosen for training, by stratified bootstrapping
seed: int
reproducibility seed for nodes_sim=='uniform'
verbose: int
0 for no output, 1 for a progress bar (default is 1)
method: str
type of Adaboost method, 'SAMME' (discrete) or 'SAMME.R' (real)
backend: str
"cpu" or "gpu" or "tpu"
Attributes:
alpha_: list
AdaBoost coefficients alpha_m
base_learners_: dict
a dictionary containing the base learners
Examples:
See also https://github.com/Techtonique/nnetsauce/blob/master/examples/adaboost_classification.py
import nnetsauce as ns
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics
from time import time
breast_cancer = load_breast_cancer()
Z = breast_cancer.data
t = breast_cancer.target
np.random.seed(123)
X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2)
# SAMME.R
clf = LogisticRegression(solver='liblinear', multi_class = 'ovr',
random_state=123)
fit_obj = ns.AdaBoostClassifier(clf,
n_hidden_features=int(11.22338867),
direct_link=True,
n_estimators=250, learning_rate=0.01126343,
col_sample=0.72684326, row_sample=0.86429443,
dropout=0.63078613, n_clusters=2,
type_clust="gmm",
verbose=1, seed = 123,
method="SAMME.R")
start = time()
fit_obj.fit(X_train, y_train)
print(f"Elapsed {time() - start}")
start = time()
print(fit_obj.score(X_test, y_test))
print(f"Elapsed {time() - start}")
preds = fit_obj.predict(X_test)
print(metrics.classification_report(preds, y_test))
215 def fit(self, X, y, sample_weight=None, **kwargs): 216 """Fit Boosting model to training data (X, y). 217 218 Parameters: 219 220 X: {array-like}, shape = [n_samples, n_features] 221 Training vectors, where n_samples is the number 222 of samples and n_features is the number of features. 223 224 y: array-like, shape = [n_samples] 225 Target values. 226 227 **kwargs: additional parameters to be passed to 228 self.cook_training_set or self.obj.fit 229 230 Returns: 231 232 self: object 233 """ 234 235 assert mx.is_factor(y), "y must contain only integers" 236 237 assert self.method in ( 238 "SAMME", 239 "SAMME.R", 240 ), "`method` must be either 'SAMME' or 'SAMME.R'" 241 242 assert (self.reg_lambda <= 1) & ( 243 self.reg_lambda >= 0 244 ), "must have self.reg_lambda <= 1 & self.reg_lambda >= 0" 245 246 assert (self.reg_alpha <= 1) & ( 247 self.reg_alpha >= 0 248 ), "must have self.reg_alpha <= 1 & self.reg_alpha >= 0" 249 250 # training 251 n, p = X.shape 252 self.n_classes = len(np.unique(y)) 253 self.classes_ = np.unique(y) # for compatibility with sklearn 254 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 255 256 if sample_weight is None: 257 w_m = np.repeat(1.0 / n, n) 258 else: 259 w_m = np.asarray(sample_weight) 260 261 base_learner = CustomClassifier( 262 self.obj, 263 n_hidden_features=self.n_hidden_features, 264 activation_name=self.activation_name, 265 a=self.a, 266 nodes_sim=self.nodes_sim, 267 bias=self.bias, 268 dropout=self.dropout, 269 direct_link=self.direct_link, 270 n_clusters=self.n_clusters, 271 type_clust=self.type_clust, 272 type_scaling=self.type_scaling, 273 col_sample=self.col_sample, 274 row_sample=self.row_sample, 275 seed=self.seed, 276 ) 277 278 if self.verbose == 1: 279 pbar = Progbar(self.n_estimators) 280 281 if self.method == "SAMME": 282 err_m = 1e6 283 err_bound = 1 - 1 / self.n_classes 284 self.alpha_.append(1.0) 285 x_range_n = range(n) 286 287 for m in range(self.n_estimators): 288 preds = base_learner.fit( 289 X, y, sample_weight=w_m.ravel(), **kwargs 290 ).predict(X) 291 292 self.base_learners_.update({m: deepcopy(base_learner)}) 293 294 cond = [y[i] != preds[i] for i in x_range_n] 295 296 err_m = max( 297 sum([elt[0] * elt[1] for elt in zip(cond, w_m)]), 298 2.220446049250313e-16, 299 ) # sum(w_m) == 1 300 301 if self.reg_lambda > 0: 302 err_m += self.reg_lambda * ( 303 (1 - self.reg_alpha) * 0.5 * sum([x**2 for x in w_m]) 304 + self.reg_alpha * sum([abs(x) for x in w_m]) 305 ) 306 307 err_m = min(err_m, err_bound) 308 309 alpha_m = self.learning_rate * log( 310 (self.n_classes - 1) * (1 - err_m) / err_m 311 ) 312 313 self.alpha_.append(alpha_m) 314 315 w_m_temp = [exp(alpha_m * cond[i]) for i in x_range_n] 316 317 sum_w_m = sum(w_m_temp) 318 319 w_m = np.asarray([w_m_temp[i] / sum_w_m for i in x_range_n]) 320 321 base_learner.set_params(seed=self.seed + (m + 1) * 1000) 322 323 if self.verbose == 1: 324 pbar.update(m) 325 326 if self.verbose == 1: 327 pbar.update(self.n_estimators) 328 329 self.n_estimators = len(self.base_learners_) 330 self.classes_ = np.unique(y) 331 332 return self 333 334 if self.method == "SAMME.R": 335 Y = mo.one_hot_encode2(y, self.n_classes) 336 337 if sample_weight is None: 338 w_m = np.repeat(1.0 / n, n) # (N, 1) 339 340 else: 341 w_m = np.asarray(sample_weight) 342 343 for m in range(self.n_estimators): 344 probs = base_learner.fit( 345 X, y, sample_weight=w_m.ravel(), **kwargs 346 ).predict_proba(X) 347 348 np.clip(a=probs, a_min=2.220446049250313e-16, a_max=1.0, out=probs) 349 350 self.base_learners_.update({m: deepcopy(base_learner)}) 351 352 w_m *= np.exp( 353 -1.0 354 * self.learning_rate 355 * (1.0 - 1.0 / self.n_classes) 356 * xlogy(Y, probs).sum(axis=1) 357 ) 358 359 w_m /= np.sum(w_m) 360 361 base_learner.set_params(seed=self.seed + (m + 1) * 1000) 362 363 if self.verbose == 1: 364 pbar.update(m) 365 366 if self.verbose == 1: 367 pbar.update(self.n_estimators) 368 369 self.n_estimators = len(self.base_learners_) 370 self.classes_ = np.unique(y) 371 372 return self
Fit Boosting model to training data (X, y).
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
374 def predict(self, X, **kwargs): 375 """Predict test data X. 376 377 Parameters: 378 379 X: {array-like}, shape = [n_samples, n_features] 380 Training vectors, where n_samples is the number 381 of samples and n_features is the number of features. 382 383 **kwargs: additional parameters to be passed to 384 self.cook_test_set 385 386 Returns: 387 388 model predictions: {array-like} 389 """ 390 return self.predict_proba(X, **kwargs).argmax(axis=1)
Predict test data X.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
392 def predict_proba(self, X, **kwargs): 393 """Predict probabilities for test data X. 394 395 Parameters: 396 397 X: {array-like}, shape = [n_samples, n_features] 398 Training vectors, where n_samples is the number 399 of samples and n_features is the number of features. 400 401 **kwargs: additional parameters to be passed to 402 self.cook_test_set 403 404 Returns: 405 406 probability estimates for test data: {array-like} 407 408 """ 409 410 n_iter = len(self.base_learners_) 411 412 if self.method == "SAMME": 413 ensemble_learner = np.zeros((X.shape[0], self.n_classes)) 414 415 # if self.verbose == 1: 416 # pbar = Progbar(n_iter) 417 418 for idx, base_learner in self.base_learners_.items(): 419 preds = base_learner.predict(X, **kwargs) 420 421 ensemble_learner += self.alpha_[idx] * mo.one_hot_encode2( 422 preds, self.n_classes 423 ) 424 425 # if self.verbose == 1: 426 # pbar.update(idx) 427 428 # if self.verbose == 1: 429 # pbar.update(n_iter) 430 431 expit_ensemble_learner = expit(ensemble_learner) 432 433 sum_ensemble = expit_ensemble_learner.sum(axis=1) 434 435 return expit_ensemble_learner / sum_ensemble[:, None] 436 437 # if self.method == "SAMME.R": 438 ensemble_learner = 0 439 440 # if self.verbose == 1: 441 # pbar = Progbar(n_iter) 442 443 for idx, base_learner in self.base_learners_.items(): 444 probs = base_learner.predict_proba(X, **kwargs) 445 446 np.clip(a=probs, a_min=2.220446049250313e-16, a_max=1.0, out=probs) 447 448 log_preds_proba = np.log(probs) 449 450 ensemble_learner += log_preds_proba - log_preds_proba.mean(axis=1)[:, None] 451 452 # if self.verbose == 1: 453 # pbar.update(idx) 454 455 ensemble_learner *= self.n_classes - 1 456 457 # if self.verbose == 1: 458 # pbar.update(n_iter) 459 460 expit_ensemble_learner = expit(ensemble_learner) 461 462 sum_ensemble = expit_ensemble_learner.sum(axis=1) 463 464 return expit_ensemble_learner / sum_ensemble[:, None]
Predict probabilities for test data X.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
probability estimates for test data: {array-like}
46class Base(BaseEstimator): 47 """Base model from which all the other classes inherit. 48 49 This class contains the most important data preprocessing/feature engineering methods. 50 51 Parameters: 52 53 n_hidden_features: int 54 number of nodes in the hidden layer 55 56 activation_name: str 57 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 58 59 a: float 60 hyperparameter for 'prelu' or 'elu' activation function 61 62 nodes_sim: str 63 type of simulation for hidden layer nodes: 'sobol', 'hammersley', 'halton', 64 'uniform' 65 66 bias: boolean 67 indicates if the hidden layer contains a bias term (True) or 68 not (False) 69 70 dropout: float 71 regularization parameter; (random) percentage of nodes dropped out 72 of the training 73 74 direct_link: boolean 75 indicates if the original features are included (True) in model's 76 fitting or not (False) 77 78 n_clusters: int 79 number of clusters for type_clust='kmeans' or type_clust='gmm' 80 clustering (could be 0: no clustering) 81 82 cluster_encode: bool 83 defines how the variable containing clusters is treated (default is one-hot); 84 if `False`, then labels are used, without one-hot encoding 85 86 type_clust: str 87 type of clustering method: currently k-means ('kmeans') or Gaussian 88 Mixture Model ('gmm') 89 90 type_scaling: a tuple of 3 strings 91 scaling methods for inputs, hidden layer, and clustering respectively 92 (and when relevant). 93 Currently available: standardization ('std') or MinMax scaling ('minmax') or robust scaling ('robust') or max absolute scaling ('maxabs') 94 95 col_sample: float 96 percentage of features randomly chosen for training 97 98 row_sample: float 99 percentage of rows chosen for training, by stratified bootstrapping 100 101 seed: int 102 reproducibility seed for nodes_sim=='uniform', clustering and dropout 103 104 backend: str 105 "cpu" or "gpu" or "tpu" 106 107 """ 108 109 # construct the object ----- 110 111 def __init__( 112 self, 113 n_hidden_features=5, 114 activation_name="relu", 115 a=0.01, 116 nodes_sim="sobol", 117 bias=True, 118 dropout=0, 119 direct_link=True, 120 n_clusters=2, 121 cluster_encode=True, 122 type_clust="kmeans", 123 type_scaling=("std", "std", "std"), 124 col_sample=1, 125 row_sample=1, 126 seed=123, 127 backend="cpu", 128 ): 129 # input checks ----- 130 131 sys_platform = platform.system() 132 133 if (sys_platform == "Windows") and (backend in ("gpu", "tpu")): 134 warnings.warn("No GPU/TPU computing on Windows yet, backend set to 'cpu'") 135 backend = "cpu" 136 137 assert activation_name in ( 138 "relu", 139 "tanh", 140 "sigmoid", 141 "prelu", 142 "elu", 143 ), "'activation_name' must be in ('relu', 'tanh', 'sigmoid','prelu', 'elu')" 144 145 assert nodes_sim in ( 146 "sobol", 147 "hammersley", 148 "uniform", 149 "halton", 150 ), "'nodes_sim' must be in ('sobol', 'hammersley', 'uniform', 'halton')" 151 152 assert type_clust in ( 153 "kmeans", 154 "gmm", 155 ), "'type_clust' must be in ('kmeans', 'gmm')" 156 157 assert (len(type_scaling) == 3) & all( 158 type_scaling[i] in ("minmax", "std", "robust", "maxabs") 159 for i in range(len(type_scaling)) 160 ), "'type_scaling' must have length 3, and available scaling methods are 'minmax' scaling, standardization ('std'), robust scaling ('robust') and max absolute ('maxabs')" 161 162 assert (col_sample >= 0) & ( 163 col_sample <= 1 164 ), "'col_sample' must be comprised between 0 and 1 (both included)" 165 166 assert backend in ( 167 "cpu", 168 "gpu", 169 "tpu", 170 ), "must have 'backend' in ('cpu', 'gpu', 'tpu')" 171 172 self.n_hidden_features = n_hidden_features 173 self.activation_name = activation_name 174 self.a = a 175 self.nodes_sim = nodes_sim 176 self.bias = bias 177 self.seed = seed 178 self.backend = backend 179 self.dropout = dropout 180 self.direct_link = direct_link 181 self.cluster_encode = cluster_encode 182 self.type_clust = type_clust 183 self.type_scaling = type_scaling 184 self.col_sample = col_sample 185 self.row_sample = row_sample 186 self.n_clusters = n_clusters 187 if isinstance(self, RegressorMixin): 188 self.type_fit = "regression" 189 elif isinstance(self, ClassifierMixin): 190 self.type_fit = "classification" 191 self.subsampler_ = None 192 self.index_col_ = None 193 self.index_row_ = True 194 self.clustering_obj_ = None 195 self.clustering_scaler_ = None 196 self.nn_scaler_ = None 197 self.scaler_ = None 198 self.encoder_ = None 199 self.W_ = None 200 self.X_ = None 201 self.y_ = None 202 self.y_mean_ = None 203 self.beta_ = None 204 205 # activation function ----- 206 if sys_platform in ("Linux", "Darwin"): 207 activation_options = { 208 "relu": ac.relu if (self.backend == "cpu") else jnn.relu, 209 "tanh": np.tanh if (self.backend == "cpu") else jnp.tanh, 210 "sigmoid": (ac.sigmoid if (self.backend == "cpu") else jnn.sigmoid), 211 "prelu": partial(ac.prelu, a=a), 212 "elu": ( 213 partial(ac.elu, a=a) 214 if (self.backend == "cpu") 215 else partial(jnn.elu, a=a) 216 ), 217 } 218 else: # on Windows currently, no JAX 219 activation_options = { 220 "relu": (ac.relu if (self.backend == "cpu") else NotImplementedError), 221 "tanh": (np.tanh if (self.backend == "cpu") else NotImplementedError), 222 "sigmoid": ( 223 ac.sigmoid if (self.backend == "cpu") else NotImplementedError 224 ), 225 "prelu": partial(ac.prelu, a=a), 226 "elu": ( 227 partial(ac.elu, a=a) 228 if (self.backend == "cpu") 229 else NotImplementedError 230 ), 231 } 232 self.activation_func = activation_options[activation_name] 233 234 # "preprocessing" methods to be inherited ----- 235 236 def encode_clusters(self, X=None, predict=False, scaler=None, **kwargs): # 237 """Create new covariates with kmeans or GMM clustering 238 239 Parameters: 240 241 X: {array-like}, shape = [n_samples, n_features] 242 Training vectors, where n_samples is the number 243 of samples and n_features is the number of features. 244 245 predict: boolean 246 is False on training set and True on test set 247 248 scaler: {object} of class StandardScaler, MinMaxScaler, RobustScaler or MaxAbsScaler 249 if scaler has already been fitted on training data (online training), it can be passed here 250 251 **kwargs: 252 additional parameters to be passed to the 253 clustering method 254 255 Returns: 256 257 Clusters' matrix, one-hot encoded: {array-like} 258 259 """ 260 261 np.random.seed(self.seed) 262 263 if X is None: 264 X = self.X_ 265 266 if isinstance(X, pd.DataFrame): 267 X = copy.deepcopy(X.values.astype(float)) 268 269 if len(X.shape) == 1: 270 X = X.reshape(1, -1) 271 272 if predict is False: # encode training set 273 274 # scale input data before clustering 275 self.clustering_scaler_, scaled_X = mo.scale_covariates( 276 X, choice=self.type_scaling[2], scaler=self.clustering_scaler_ 277 ) 278 279 self.clustering_obj_, X_clustered = mo.cluster_covariates( 280 scaled_X, 281 self.n_clusters, 282 self.seed, 283 type_clust=self.type_clust, 284 **kwargs 285 ) 286 287 if self.cluster_encode == True: 288 return mo.one_hot_encode(X_clustered, self.n_clusters).astype( 289 np.float16 290 ) 291 292 return X_clustered.astype(np.float16) 293 294 # if predict == True, encode test set 295 X_clustered = self.clustering_obj_.predict(self.clustering_scaler_.transform(X)) 296 297 if self.cluster_encode == True: 298 return mo.one_hot_encode(X_clustered, self.n_clusters).astype(np.float16) 299 300 return X_clustered.astype(np.float16) 301 302 def create_layer(self, scaled_X, W=None): 303 """Create hidden layer. 304 305 Parameters: 306 307 scaled_X: {array-like}, shape = [n_samples, n_features] 308 Training vectors, where n_samples is the number 309 of samples and n_features is the number of features 310 311 W: {array-like}, shape = [n_features, hidden_features] 312 if provided, constructs the hidden layer with W; otherwise computed internally 313 314 Returns: 315 316 Hidden layer matrix: {array-like} 317 318 """ 319 320 n_features = scaled_X.shape[1] 321 322 # hash_sim = { 323 # "sobol": generate_sobol, 324 # "hammersley": generate_hammersley, 325 # "uniform": generate_uniform, 326 # "halton": generate_halton 327 # } 328 329 if self.bias is False: # no bias term in the hidden layer 330 if W is None: 331 if self.nodes_sim == "sobol": 332 self.W_ = generate_sobol( 333 n_dims=n_features, 334 n_points=self.n_hidden_features, 335 seed=self.seed, 336 ) 337 elif self.nodes_sim == "hammersley": 338 self.W_ = generate_hammersley( 339 n_dims=n_features, 340 n_points=self.n_hidden_features, 341 seed=self.seed, 342 ) 343 elif self.nodes_sim == "uniform": 344 self.W_ = generate_uniform( 345 n_dims=n_features, 346 n_points=self.n_hidden_features, 347 seed=self.seed, 348 ) 349 else: 350 self.W_ = generate_halton( 351 n_dims=n_features, 352 n_points=self.n_hidden_features, 353 seed=self.seed, 354 ) 355 356 # self.W_ = hash_sim[self.nodes_sim]( 357 # n_dims=n_features, 358 # n_points=self.n_hidden_features, 359 # seed=self.seed, 360 # ) 361 362 assert ( 363 scaled_X.shape[1] == self.W_.shape[0] 364 ), "check dimensions of covariates X and matrix W" 365 366 return mo.dropout( 367 x=self.activation_func( 368 mo.safe_sparse_dot(a=scaled_X, b=self.W_, backend=self.backend) 369 ), 370 drop_prob=self.dropout, 371 seed=self.seed, 372 ) 373 374 # W is not none 375 assert ( 376 scaled_X.shape[1] == W.shape[0] 377 ), "check dimensions of covariates X and matrix W" 378 379 # self.W_ = W 380 return mo.dropout( 381 x=self.activation_func( 382 mo.safe_sparse_dot(a=scaled_X, b=W, backend=self.backend) 383 ), 384 drop_prob=self.dropout, 385 seed=self.seed, 386 ) 387 388 # with bias term in the hidden layer 389 if W is None: 390 n_features_1 = n_features + 1 391 392 if self.nodes_sim == "sobol": 393 self.W_ = generate_sobol( 394 n_dims=n_features_1, 395 n_points=self.n_hidden_features, 396 seed=self.seed, 397 ) 398 elif self.nodes_sim == "hammersley": 399 self.W_ = generate_hammersley( 400 n_dims=n_features_1, 401 n_points=self.n_hidden_features, 402 seed=self.seed, 403 ) 404 elif self.nodes_sim == "uniform": 405 self.W_ = generate_uniform( 406 n_dims=n_features_1, 407 n_points=self.n_hidden_features, 408 seed=self.seed, 409 ) 410 else: 411 self.W_ = generate_halton( 412 n_dims=n_features_1, 413 n_points=self.n_hidden_features, 414 seed=self.seed, 415 ) 416 417 # self.W_ = hash_sim[self.nodes_sim]( 418 # n_dims=n_features_1, 419 # n_points=self.n_hidden_features, 420 # seed=self.seed, 421 # ) 422 423 return mo.dropout( 424 x=self.activation_func( 425 mo.safe_sparse_dot( 426 a=mo.cbind( 427 np.ones(scaled_X.shape[0]), 428 scaled_X, 429 backend=self.backend, 430 ), 431 b=self.W_, 432 backend=self.backend, 433 ) 434 ), 435 drop_prob=self.dropout, 436 seed=self.seed, 437 ) 438 439 # W is not None 440 # self.W_ = W 441 return mo.dropout( 442 x=self.activation_func( 443 mo.safe_sparse_dot( 444 a=mo.cbind( 445 np.ones(scaled_X.shape[0]), 446 scaled_X, 447 backend=self.backend, 448 ), 449 b=W, 450 backend=self.backend, 451 ) 452 ), 453 drop_prob=self.dropout, 454 seed=self.seed, 455 ) 456 457 def cook_training_set(self, y=None, X=None, W=None, **kwargs): 458 """Create new hidden features for training set, with hidden layer, center the response. 459 460 Parameters: 461 462 y: array-like, shape = [n_samples] 463 Target values 464 465 X: {array-like}, shape = [n_samples, n_features] 466 Training vectors, where n_samples is the number 467 of samples and n_features is the number of features 468 469 W: {array-like}, shape = [n_features, hidden_features] 470 if provided, constructs the hidden layer via W 471 472 Returns: 473 474 (centered response, direct link + hidden layer matrix): {tuple} 475 476 """ 477 478 # either X and y are stored or not 479 # assert ((y is None) & (X is None)) | ((y is not None) & (X is not None)) 480 if self.n_hidden_features > 0: # has a hidden layer 481 assert ( 482 len(self.type_scaling) >= 2 483 ), "must have len(self.type_scaling) >= 2 when self.n_hidden_features > 0" 484 485 if X is None: 486 487 if self.col_sample == 1: 488 input_X = self.X_ 489 else: 490 n_features = self.X_.shape[1] 491 new_n_features = int(np.ceil(n_features * self.col_sample)) 492 assert ( 493 new_n_features >= 1 494 ), "check class attribute 'col_sample' and the number of covariates provided for X" 495 np.random.seed(self.seed) 496 index_col = np.random.choice( 497 range(n_features), size=new_n_features, replace=False 498 ) 499 self.index_col_ = index_col 500 input_X = self.X_[:, self.index_col_] 501 502 else: # X is not None # keep X vs self.X_ 503 504 if isinstance(X, pd.DataFrame): 505 X = copy.deepcopy(X.values.astype(float)) 506 507 if self.col_sample == 1: 508 input_X = X 509 else: 510 n_features = X.shape[1] 511 new_n_features = int(np.ceil(n_features * self.col_sample)) 512 assert ( 513 new_n_features >= 1 514 ), "check class attribute 'col_sample' and the number of covariates provided for X" 515 np.random.seed(self.seed) 516 index_col = np.random.choice( 517 range(n_features), size=new_n_features, replace=False 518 ) 519 self.index_col_ = index_col 520 input_X = X[:, self.index_col_] 521 522 if self.n_clusters <= 0: 523 # data without any clustering: self.n_clusters is None ----- 524 525 if self.n_hidden_features > 0: # with hidden layer 526 527 self.nn_scaler_, scaled_X = mo.scale_covariates( 528 input_X, choice=self.type_scaling[1], scaler=self.nn_scaler_ 529 ) 530 Phi_X = ( 531 self.create_layer(scaled_X) 532 if W is None 533 else self.create_layer(scaled_X, W=W) 534 ) 535 Z = ( 536 mo.cbind(input_X, Phi_X, backend=self.backend) 537 if self.direct_link is True 538 else Phi_X 539 ) 540 self.scaler_, scaled_Z = mo.scale_covariates( 541 Z, choice=self.type_scaling[0], scaler=self.scaler_ 542 ) 543 else: # no hidden layer 544 Z = input_X 545 self.scaler_, scaled_Z = mo.scale_covariates( 546 Z, choice=self.type_scaling[0], scaler=self.scaler_ 547 ) 548 549 else: 550 551 # data with clustering: self.n_clusters is not None ----- # keep 552 553 augmented_X = mo.cbind( 554 input_X, 555 self.encode_clusters(input_X, **kwargs), 556 backend=self.backend, 557 ) 558 559 if self.n_hidden_features > 0: # with hidden layer 560 561 self.nn_scaler_, scaled_X = mo.scale_covariates( 562 augmented_X, 563 choice=self.type_scaling[1], 564 scaler=self.nn_scaler_, 565 ) 566 Phi_X = ( 567 self.create_layer(scaled_X) 568 if W is None 569 else self.create_layer(scaled_X, W=W) 570 ) 571 Z = ( 572 mo.cbind(augmented_X, Phi_X, backend=self.backend) 573 if self.direct_link is True 574 else Phi_X 575 ) 576 self.scaler_, scaled_Z = mo.scale_covariates( 577 Z, choice=self.type_scaling[0], scaler=self.scaler_ 578 ) 579 else: # no hidden layer 580 Z = augmented_X 581 self.scaler_, scaled_Z = mo.scale_covariates( 582 Z, choice=self.type_scaling[0], scaler=self.scaler_ 583 ) 584 585 # Returning model inputs ----- 586 if mx.is_factor(y) is False: # regression 587 # center y 588 if y is None: 589 self.y_mean_, centered_y = mo.center_response(self.y_) 590 else: 591 self.y_mean_, centered_y = mo.center_response(y) 592 593 # y is subsampled 594 if self.row_sample < 1: 595 n, p = Z.shape 596 597 self.subsampler_ = ( 598 SubSampler(y=self.y_, row_sample=self.row_sample, seed=self.seed) 599 if y is None 600 else SubSampler(y=y, row_sample=self.row_sample, seed=self.seed) 601 ) 602 603 self.index_row_ = self.subsampler_.subsample() 604 605 n_row_sample = len(self.index_row_) 606 # regression 607 return ( 608 centered_y[self.index_row_].reshape(n_row_sample), 609 self.scaler_.transform( 610 Z[self.index_row_, :].reshape(n_row_sample, p) 611 ), 612 ) 613 # y is not subsampled 614 # regression 615 return (centered_y, self.scaler_.transform(Z)) 616 617 # classification 618 # y is subsampled 619 if self.row_sample < 1: 620 n, p = Z.shape 621 622 self.subsampler_ = ( 623 SubSampler(y=self.y_, row_sample=self.row_sample, seed=self.seed) 624 if y is None 625 else SubSampler(y=y, row_sample=self.row_sample, seed=self.seed) 626 ) 627 628 self.index_row_ = self.subsampler_.subsample() 629 630 n_row_sample = len(self.index_row_) 631 # classification 632 return ( 633 y[self.index_row_].reshape(n_row_sample), 634 self.scaler_.transform(Z[self.index_row_, :].reshape(n_row_sample, p)), 635 ) 636 # y is not subsampled 637 # classification 638 return (y, self.scaler_.transform(Z)) 639 640 def cook_test_set(self, X, **kwargs): 641 """Transform data from test set, with hidden layer. 642 643 Parameters: 644 645 X: {array-like}, shape = [n_samples, n_features] 646 Training vectors, where n_samples is the number 647 of samples and n_features is the number of features 648 649 **kwargs: additional parameters to be passed to self.encode_cluster 650 651 Returns: 652 653 Transformed test set : {array-like} 654 """ 655 656 if isinstance(X, pd.DataFrame): 657 X = copy.deepcopy(X.values.astype(float)) 658 659 if len(X.shape) == 1: 660 X = X.reshape(1, -1) 661 662 if ( 663 self.n_clusters == 0 664 ): # data without clustering: self.n_clusters is None ----- 665 if self.n_hidden_features > 0: 666 # if hidden layer 667 scaled_X = ( 668 self.nn_scaler_.transform(X) 669 if (self.col_sample == 1) 670 else self.nn_scaler_.transform(X[:, self.index_col_]) 671 ) 672 Phi_X = self.create_layer(scaled_X, self.W_) 673 if self.direct_link == True: 674 return self.scaler_.transform( 675 mo.cbind(scaled_X, Phi_X, backend=self.backend) 676 ) 677 # when self.direct_link == False 678 return self.scaler_.transform(Phi_X) 679 # if no hidden layer # self.n_hidden_features == 0 680 return self.scaler_.transform(X) 681 682 # data with clustering: self.n_clusters > 0 ----- 683 if self.col_sample == 1: 684 predicted_clusters = self.encode_clusters(X=X, predict=True, **kwargs) 685 augmented_X = mo.cbind(X, predicted_clusters, backend=self.backend) 686 else: 687 predicted_clusters = self.encode_clusters( 688 X=X[:, self.index_col_], predict=True, **kwargs 689 ) 690 augmented_X = mo.cbind( 691 X[:, self.index_col_], predicted_clusters, backend=self.backend 692 ) 693 694 if self.n_hidden_features > 0: # if hidden layer 695 scaled_X = self.nn_scaler_.transform(augmented_X) 696 Phi_X = self.create_layer(scaled_X, self.W_) 697 if self.direct_link == True: 698 return self.scaler_.transform( 699 mo.cbind(augmented_X, Phi_X, backend=self.backend) 700 ) 701 return self.scaler_.transform(Phi_X) 702 703 # if no hidden layer 704 return self.scaler_.transform(augmented_X) 705 706 def cross_val_score( 707 self, 708 X, 709 y, 710 cv=5, 711 scoring="accuracy", 712 random_state=42, 713 n_jobs=-1, 714 epsilon=0.5, 715 penalized=True, 716 objective="abs", 717 **kwargs 718 ): 719 """ 720 Penalized Cross-validation score for a model. 721 722 Parameters: 723 724 X: {array-like}, shape = [n_samples, n_features] 725 Training vectors, where n_samples is the number 726 of samples and n_features is the number of features 727 728 y: array-like, shape = [n_samples] 729 Target values 730 731 X_test: {array-like}, shape = [n_samples, n_features] 732 Test vectors, where n_samples is the number 733 of samples and n_features is the number of features 734 735 y_test: array-like, shape = [n_samples] 736 Target values 737 738 cv: int 739 Number of folds 740 741 scoring: str 742 Scoring metric 743 744 random_state: int 745 Random state 746 747 n_jobs: int 748 Number of jobs to run in parallel 749 750 epsilon: float 751 Penalty parameter 752 753 penalized: bool 754 Whether to obtain penalized cross-validation score or not 755 756 objective: str 757 'abs': Minimize the absolute difference between cross-validation score and validation score 758 'relative': Minimize the relative difference between cross-validation score and validation score 759 Returns: 760 761 A namedtuple with the following fields: 762 - cv_score: float 763 cross-validation score 764 - val_score: float 765 validation score 766 - penalized_score: float 767 penalized cross-validation score: cv_score / val_score + epsilon*(1/val_score + 1/cv_score) 768 If higher scoring metric is better, minimize the function result. 769 If lower scoring metric is better, maximize the function result. 770 """ 771 if scoring == "accuracy": 772 scoring_func = accuracy_score 773 elif scoring == "balanced_accuracy": 774 scoring_func = balanced_accuracy_score 775 elif scoring == "f1": 776 scoring_func = f1_score 777 elif scoring == "roc_auc": 778 scoring_func = roc_auc_score 779 elif scoring == "r2": 780 scoring_func = r2_score 781 elif scoring == "mse": 782 scoring_func = mean_squared_error 783 elif scoring == "mae": 784 scoring_func = mean_absolute_error 785 elif scoring == "mape": 786 scoring_func = mean_absolute_percentage_error 787 elif scoring == "rmse": 788 789 def scoring_func(y_true, y_pred): 790 return np.sqrt(mean_squared_error(y_true, y_pred)) 791 792 X_train, X_val, y_train, y_val = train_test_split( 793 X, y, test_size=0.2, random_state=random_state 794 ) 795 796 res = cross_val_score( 797 self, X_train, y_train, cv=cv, scoring=scoring, n_jobs=n_jobs 798 ) # cross-validation error 799 800 if penalized == False: 801 return res 802 803 DescribeResult = namedtuple( 804 "DescribeResult", ["cv_score", "val_score", "penalized_score"] 805 ) 806 807 numerator = res.mean() 808 809 # Evaluate on the (cv+1)-th fold 810 preds_val = self.fit(X_train, y_train).predict(X_val) 811 try: 812 denominator = scoring(y_val, preds_val) # validation error 813 except Exception as e: 814 denominator = scoring_func(y_val, preds_val) 815 816 # if higher is better 817 if objective == "abs": 818 penalized_score = np.abs(numerator - denominator) + epsilon * ( 819 1 / denominator + 1 / numerator 820 ) 821 elif objective == "relative": 822 ratio = numerator / denominator 823 penalized_score = np.abs(ratio - 1) + epsilon * ( 824 1 / denominator + 1 / numerator 825 ) 826 827 return DescribeResult( 828 cv_score=numerator, 829 val_score=denominator, 830 penalized_score=penalized_score, 831 )
Base model from which all the other classes inherit.
This class contains the most important data preprocessing/feature engineering methods.
Parameters:
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for hidden layer nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or
not (False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original features are included (True) in model's
fitting or not (False)
n_clusters: int
number of clusters for type_clust='kmeans' or type_clust='gmm'
clustering (could be 0: no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot);
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax') or robust scaling ('robust') or max absolute scaling ('maxabs')
col_sample: float
percentage of features randomly chosen for training
row_sample: float
percentage of rows chosen for training, by stratified bootstrapping
seed: int
reproducibility seed for nodes_sim=='uniform', clustering and dropout
backend: str
"cpu" or "gpu" or "tpu"
236 def encode_clusters(self, X=None, predict=False, scaler=None, **kwargs): # 237 """Create new covariates with kmeans or GMM clustering 238 239 Parameters: 240 241 X: {array-like}, shape = [n_samples, n_features] 242 Training vectors, where n_samples is the number 243 of samples and n_features is the number of features. 244 245 predict: boolean 246 is False on training set and True on test set 247 248 scaler: {object} of class StandardScaler, MinMaxScaler, RobustScaler or MaxAbsScaler 249 if scaler has already been fitted on training data (online training), it can be passed here 250 251 **kwargs: 252 additional parameters to be passed to the 253 clustering method 254 255 Returns: 256 257 Clusters' matrix, one-hot encoded: {array-like} 258 259 """ 260 261 np.random.seed(self.seed) 262 263 if X is None: 264 X = self.X_ 265 266 if isinstance(X, pd.DataFrame): 267 X = copy.deepcopy(X.values.astype(float)) 268 269 if len(X.shape) == 1: 270 X = X.reshape(1, -1) 271 272 if predict is False: # encode training set 273 274 # scale input data before clustering 275 self.clustering_scaler_, scaled_X = mo.scale_covariates( 276 X, choice=self.type_scaling[2], scaler=self.clustering_scaler_ 277 ) 278 279 self.clustering_obj_, X_clustered = mo.cluster_covariates( 280 scaled_X, 281 self.n_clusters, 282 self.seed, 283 type_clust=self.type_clust, 284 **kwargs 285 ) 286 287 if self.cluster_encode == True: 288 return mo.one_hot_encode(X_clustered, self.n_clusters).astype( 289 np.float16 290 ) 291 292 return X_clustered.astype(np.float16) 293 294 # if predict == True, encode test set 295 X_clustered = self.clustering_obj_.predict(self.clustering_scaler_.transform(X)) 296 297 if self.cluster_encode == True: 298 return mo.one_hot_encode(X_clustered, self.n_clusters).astype(np.float16) 299 300 return X_clustered.astype(np.float16)
Create new covariates with kmeans or GMM clustering
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
predict: boolean
is False on training set and True on test set
scaler: {object} of class StandardScaler, MinMaxScaler, RobustScaler or MaxAbsScaler
if scaler has already been fitted on training data (online training), it can be passed here
**kwargs:
additional parameters to be passed to the
clustering method
Returns:
Clusters' matrix, one-hot encoded: {array-like}
302 def create_layer(self, scaled_X, W=None): 303 """Create hidden layer. 304 305 Parameters: 306 307 scaled_X: {array-like}, shape = [n_samples, n_features] 308 Training vectors, where n_samples is the number 309 of samples and n_features is the number of features 310 311 W: {array-like}, shape = [n_features, hidden_features] 312 if provided, constructs the hidden layer with W; otherwise computed internally 313 314 Returns: 315 316 Hidden layer matrix: {array-like} 317 318 """ 319 320 n_features = scaled_X.shape[1] 321 322 # hash_sim = { 323 # "sobol": generate_sobol, 324 # "hammersley": generate_hammersley, 325 # "uniform": generate_uniform, 326 # "halton": generate_halton 327 # } 328 329 if self.bias is False: # no bias term in the hidden layer 330 if W is None: 331 if self.nodes_sim == "sobol": 332 self.W_ = generate_sobol( 333 n_dims=n_features, 334 n_points=self.n_hidden_features, 335 seed=self.seed, 336 ) 337 elif self.nodes_sim == "hammersley": 338 self.W_ = generate_hammersley( 339 n_dims=n_features, 340 n_points=self.n_hidden_features, 341 seed=self.seed, 342 ) 343 elif self.nodes_sim == "uniform": 344 self.W_ = generate_uniform( 345 n_dims=n_features, 346 n_points=self.n_hidden_features, 347 seed=self.seed, 348 ) 349 else: 350 self.W_ = generate_halton( 351 n_dims=n_features, 352 n_points=self.n_hidden_features, 353 seed=self.seed, 354 ) 355 356 # self.W_ = hash_sim[self.nodes_sim]( 357 # n_dims=n_features, 358 # n_points=self.n_hidden_features, 359 # seed=self.seed, 360 # ) 361 362 assert ( 363 scaled_X.shape[1] == self.W_.shape[0] 364 ), "check dimensions of covariates X and matrix W" 365 366 return mo.dropout( 367 x=self.activation_func( 368 mo.safe_sparse_dot(a=scaled_X, b=self.W_, backend=self.backend) 369 ), 370 drop_prob=self.dropout, 371 seed=self.seed, 372 ) 373 374 # W is not none 375 assert ( 376 scaled_X.shape[1] == W.shape[0] 377 ), "check dimensions of covariates X and matrix W" 378 379 # self.W_ = W 380 return mo.dropout( 381 x=self.activation_func( 382 mo.safe_sparse_dot(a=scaled_X, b=W, backend=self.backend) 383 ), 384 drop_prob=self.dropout, 385 seed=self.seed, 386 ) 387 388 # with bias term in the hidden layer 389 if W is None: 390 n_features_1 = n_features + 1 391 392 if self.nodes_sim == "sobol": 393 self.W_ = generate_sobol( 394 n_dims=n_features_1, 395 n_points=self.n_hidden_features, 396 seed=self.seed, 397 ) 398 elif self.nodes_sim == "hammersley": 399 self.W_ = generate_hammersley( 400 n_dims=n_features_1, 401 n_points=self.n_hidden_features, 402 seed=self.seed, 403 ) 404 elif self.nodes_sim == "uniform": 405 self.W_ = generate_uniform( 406 n_dims=n_features_1, 407 n_points=self.n_hidden_features, 408 seed=self.seed, 409 ) 410 else: 411 self.W_ = generate_halton( 412 n_dims=n_features_1, 413 n_points=self.n_hidden_features, 414 seed=self.seed, 415 ) 416 417 # self.W_ = hash_sim[self.nodes_sim]( 418 # n_dims=n_features_1, 419 # n_points=self.n_hidden_features, 420 # seed=self.seed, 421 # ) 422 423 return mo.dropout( 424 x=self.activation_func( 425 mo.safe_sparse_dot( 426 a=mo.cbind( 427 np.ones(scaled_X.shape[0]), 428 scaled_X, 429 backend=self.backend, 430 ), 431 b=self.W_, 432 backend=self.backend, 433 ) 434 ), 435 drop_prob=self.dropout, 436 seed=self.seed, 437 ) 438 439 # W is not None 440 # self.W_ = W 441 return mo.dropout( 442 x=self.activation_func( 443 mo.safe_sparse_dot( 444 a=mo.cbind( 445 np.ones(scaled_X.shape[0]), 446 scaled_X, 447 backend=self.backend, 448 ), 449 b=W, 450 backend=self.backend, 451 ) 452 ), 453 drop_prob=self.dropout, 454 seed=self.seed, 455 )
Create hidden layer.
Parameters:
scaled_X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features
W: {array-like}, shape = [n_features, hidden_features]
if provided, constructs the hidden layer with W; otherwise computed internally
Returns:
Hidden layer matrix: {array-like}
457 def cook_training_set(self, y=None, X=None, W=None, **kwargs): 458 """Create new hidden features for training set, with hidden layer, center the response. 459 460 Parameters: 461 462 y: array-like, shape = [n_samples] 463 Target values 464 465 X: {array-like}, shape = [n_samples, n_features] 466 Training vectors, where n_samples is the number 467 of samples and n_features is the number of features 468 469 W: {array-like}, shape = [n_features, hidden_features] 470 if provided, constructs the hidden layer via W 471 472 Returns: 473 474 (centered response, direct link + hidden layer matrix): {tuple} 475 476 """ 477 478 # either X and y are stored or not 479 # assert ((y is None) & (X is None)) | ((y is not None) & (X is not None)) 480 if self.n_hidden_features > 0: # has a hidden layer 481 assert ( 482 len(self.type_scaling) >= 2 483 ), "must have len(self.type_scaling) >= 2 when self.n_hidden_features > 0" 484 485 if X is None: 486 487 if self.col_sample == 1: 488 input_X = self.X_ 489 else: 490 n_features = self.X_.shape[1] 491 new_n_features = int(np.ceil(n_features * self.col_sample)) 492 assert ( 493 new_n_features >= 1 494 ), "check class attribute 'col_sample' and the number of covariates provided for X" 495 np.random.seed(self.seed) 496 index_col = np.random.choice( 497 range(n_features), size=new_n_features, replace=False 498 ) 499 self.index_col_ = index_col 500 input_X = self.X_[:, self.index_col_] 501 502 else: # X is not None # keep X vs self.X_ 503 504 if isinstance(X, pd.DataFrame): 505 X = copy.deepcopy(X.values.astype(float)) 506 507 if self.col_sample == 1: 508 input_X = X 509 else: 510 n_features = X.shape[1] 511 new_n_features = int(np.ceil(n_features * self.col_sample)) 512 assert ( 513 new_n_features >= 1 514 ), "check class attribute 'col_sample' and the number of covariates provided for X" 515 np.random.seed(self.seed) 516 index_col = np.random.choice( 517 range(n_features), size=new_n_features, replace=False 518 ) 519 self.index_col_ = index_col 520 input_X = X[:, self.index_col_] 521 522 if self.n_clusters <= 0: 523 # data without any clustering: self.n_clusters is None ----- 524 525 if self.n_hidden_features > 0: # with hidden layer 526 527 self.nn_scaler_, scaled_X = mo.scale_covariates( 528 input_X, choice=self.type_scaling[1], scaler=self.nn_scaler_ 529 ) 530 Phi_X = ( 531 self.create_layer(scaled_X) 532 if W is None 533 else self.create_layer(scaled_X, W=W) 534 ) 535 Z = ( 536 mo.cbind(input_X, Phi_X, backend=self.backend) 537 if self.direct_link is True 538 else Phi_X 539 ) 540 self.scaler_, scaled_Z = mo.scale_covariates( 541 Z, choice=self.type_scaling[0], scaler=self.scaler_ 542 ) 543 else: # no hidden layer 544 Z = input_X 545 self.scaler_, scaled_Z = mo.scale_covariates( 546 Z, choice=self.type_scaling[0], scaler=self.scaler_ 547 ) 548 549 else: 550 551 # data with clustering: self.n_clusters is not None ----- # keep 552 553 augmented_X = mo.cbind( 554 input_X, 555 self.encode_clusters(input_X, **kwargs), 556 backend=self.backend, 557 ) 558 559 if self.n_hidden_features > 0: # with hidden layer 560 561 self.nn_scaler_, scaled_X = mo.scale_covariates( 562 augmented_X, 563 choice=self.type_scaling[1], 564 scaler=self.nn_scaler_, 565 ) 566 Phi_X = ( 567 self.create_layer(scaled_X) 568 if W is None 569 else self.create_layer(scaled_X, W=W) 570 ) 571 Z = ( 572 mo.cbind(augmented_X, Phi_X, backend=self.backend) 573 if self.direct_link is True 574 else Phi_X 575 ) 576 self.scaler_, scaled_Z = mo.scale_covariates( 577 Z, choice=self.type_scaling[0], scaler=self.scaler_ 578 ) 579 else: # no hidden layer 580 Z = augmented_X 581 self.scaler_, scaled_Z = mo.scale_covariates( 582 Z, choice=self.type_scaling[0], scaler=self.scaler_ 583 ) 584 585 # Returning model inputs ----- 586 if mx.is_factor(y) is False: # regression 587 # center y 588 if y is None: 589 self.y_mean_, centered_y = mo.center_response(self.y_) 590 else: 591 self.y_mean_, centered_y = mo.center_response(y) 592 593 # y is subsampled 594 if self.row_sample < 1: 595 n, p = Z.shape 596 597 self.subsampler_ = ( 598 SubSampler(y=self.y_, row_sample=self.row_sample, seed=self.seed) 599 if y is None 600 else SubSampler(y=y, row_sample=self.row_sample, seed=self.seed) 601 ) 602 603 self.index_row_ = self.subsampler_.subsample() 604 605 n_row_sample = len(self.index_row_) 606 # regression 607 return ( 608 centered_y[self.index_row_].reshape(n_row_sample), 609 self.scaler_.transform( 610 Z[self.index_row_, :].reshape(n_row_sample, p) 611 ), 612 ) 613 # y is not subsampled 614 # regression 615 return (centered_y, self.scaler_.transform(Z)) 616 617 # classification 618 # y is subsampled 619 if self.row_sample < 1: 620 n, p = Z.shape 621 622 self.subsampler_ = ( 623 SubSampler(y=self.y_, row_sample=self.row_sample, seed=self.seed) 624 if y is None 625 else SubSampler(y=y, row_sample=self.row_sample, seed=self.seed) 626 ) 627 628 self.index_row_ = self.subsampler_.subsample() 629 630 n_row_sample = len(self.index_row_) 631 # classification 632 return ( 633 y[self.index_row_].reshape(n_row_sample), 634 self.scaler_.transform(Z[self.index_row_, :].reshape(n_row_sample, p)), 635 ) 636 # y is not subsampled 637 # classification 638 return (y, self.scaler_.transform(Z))
Create new hidden features for training set, with hidden layer, center the response.
Parameters:
y: array-like, shape = [n_samples]
Target values
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features
W: {array-like}, shape = [n_features, hidden_features]
if provided, constructs the hidden layer via W
Returns:
(centered response, direct link + hidden layer matrix): {tuple}
640 def cook_test_set(self, X, **kwargs): 641 """Transform data from test set, with hidden layer. 642 643 Parameters: 644 645 X: {array-like}, shape = [n_samples, n_features] 646 Training vectors, where n_samples is the number 647 of samples and n_features is the number of features 648 649 **kwargs: additional parameters to be passed to self.encode_cluster 650 651 Returns: 652 653 Transformed test set : {array-like} 654 """ 655 656 if isinstance(X, pd.DataFrame): 657 X = copy.deepcopy(X.values.astype(float)) 658 659 if len(X.shape) == 1: 660 X = X.reshape(1, -1) 661 662 if ( 663 self.n_clusters == 0 664 ): # data without clustering: self.n_clusters is None ----- 665 if self.n_hidden_features > 0: 666 # if hidden layer 667 scaled_X = ( 668 self.nn_scaler_.transform(X) 669 if (self.col_sample == 1) 670 else self.nn_scaler_.transform(X[:, self.index_col_]) 671 ) 672 Phi_X = self.create_layer(scaled_X, self.W_) 673 if self.direct_link == True: 674 return self.scaler_.transform( 675 mo.cbind(scaled_X, Phi_X, backend=self.backend) 676 ) 677 # when self.direct_link == False 678 return self.scaler_.transform(Phi_X) 679 # if no hidden layer # self.n_hidden_features == 0 680 return self.scaler_.transform(X) 681 682 # data with clustering: self.n_clusters > 0 ----- 683 if self.col_sample == 1: 684 predicted_clusters = self.encode_clusters(X=X, predict=True, **kwargs) 685 augmented_X = mo.cbind(X, predicted_clusters, backend=self.backend) 686 else: 687 predicted_clusters = self.encode_clusters( 688 X=X[:, self.index_col_], predict=True, **kwargs 689 ) 690 augmented_X = mo.cbind( 691 X[:, self.index_col_], predicted_clusters, backend=self.backend 692 ) 693 694 if self.n_hidden_features > 0: # if hidden layer 695 scaled_X = self.nn_scaler_.transform(augmented_X) 696 Phi_X = self.create_layer(scaled_X, self.W_) 697 if self.direct_link == True: 698 return self.scaler_.transform( 699 mo.cbind(augmented_X, Phi_X, backend=self.backend) 700 ) 701 return self.scaler_.transform(Phi_X) 702 703 # if no hidden layer 704 return self.scaler_.transform(augmented_X)
Transform data from test set, with hidden layer.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features
**kwargs: additional parameters to be passed to self.encode_cluster
Returns:
Transformed test set : {array-like}
15class BaseRegressor(Base, RegressorMixin): 16 """Random Vector Functional Link Network regression without shrinkage 17 18 Parameters: 19 20 n_hidden_features: int 21 number of nodes in the hidden layer 22 23 activation_name: str 24 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 25 26 a: float 27 hyperparameter for 'prelu' or 'elu' activation function 28 29 nodes_sim: str 30 type of simulation for hidden layer nodes: 'sobol', 'hammersley', 'halton', 31 'uniform' 32 33 bias: boolean 34 indicates if the hidden layer contains a bias term (True) or 35 not (False) 36 37 dropout: float 38 regularization parameter; (random) percentage of nodes dropped out 39 of the training 40 41 direct_link: boolean 42 indicates if the original features are included (True) in model's 43 fitting or not (False) 44 45 n_clusters: int 46 number of clusters for type_clust='kmeans' or type_clust='gmm' 47 clustering (could be 0: no clustering) 48 49 cluster_encode: bool 50 defines how the variable containing clusters is treated (default is one-hot); 51 if `False`, then labels are used, without one-hot encoding 52 53 type_clust: str 54 type of clustering method: currently k-means ('kmeans') or Gaussian 55 Mixture Model ('gmm') 56 57 type_scaling: a tuple of 3 strings 58 scaling methods for inputs, hidden layer, and clustering respectively 59 (and when relevant). 60 Currently available: standardization ('std') or MinMax scaling ('minmax') 61 62 col_sample: float 63 percentage of features randomly chosen for training 64 65 row_sample: float 66 percentage of rows chosen for training, by stratified bootstrapping 67 68 seed: int 69 reproducibility seed for nodes_sim=='uniform', clustering and dropout 70 71 backend: str 72 "cpu" or "gpu" or "tpu" 73 74 Attributes: 75 76 beta_: vector 77 regression coefficients 78 79 GCV_: float 80 Generalized Cross-Validation error 81 82 """ 83 84 # construct the object ----- 85 86 def __init__( 87 self, 88 n_hidden_features=5, 89 activation_name="relu", 90 a=0.01, 91 nodes_sim="sobol", 92 bias=True, 93 dropout=0, 94 direct_link=True, 95 n_clusters=2, 96 cluster_encode=True, 97 type_clust="kmeans", 98 type_scaling=("std", "std", "std"), 99 col_sample=1, 100 row_sample=1, 101 seed=123, 102 backend="cpu", 103 ): 104 super().__init__( 105 n_hidden_features=n_hidden_features, 106 activation_name=activation_name, 107 a=a, 108 nodes_sim=nodes_sim, 109 bias=bias, 110 dropout=dropout, 111 direct_link=direct_link, 112 n_clusters=n_clusters, 113 cluster_encode=cluster_encode, 114 type_clust=type_clust, 115 type_scaling=type_scaling, 116 col_sample=col_sample, 117 row_sample=row_sample, 118 seed=seed, 119 backend=backend, 120 ) 121 122 def fit(self, X, y, **kwargs): 123 """Fit BaseRegressor to training data (X, y) 124 125 Parameters: 126 127 X: {array-like}, shape = [n_samples, n_features] 128 Training vectors, where n_samples is the number 129 of samples and n_features is the number of features 130 131 y: array-like, shape = [n_samples] 132 Target values 133 134 **kwargs: additional parameters to be passed to self.cook_training_set 135 136 Returns: 137 138 self: object 139 """ 140 141 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 142 143 fit_obj = lmf.beta_Sigma_hat(X=scaled_Z, y=centered_y, backend=self.backend) 144 145 self.beta_ = fit_obj["beta_hat"] 146 147 self.GCV_ = fit_obj["GCV"] 148 149 return self 150 151 def predict(self, X, **kwargs): 152 """Predict test data X. 153 154 Parameters: 155 156 X: {array-like}, shape = [n_samples, n_features] 157 Training vectors, where n_samples is the number 158 of samples and n_features is the number of features 159 160 **kwargs: additional parameters to be passed to self.cook_test_set 161 162 Returns: 163 164 model predictions: {array-like} 165 """ 166 167 if len(X.shape) == 1: 168 n_features = X.shape[0] 169 new_X = mo.rbind( 170 X.reshape(1, n_features), 171 np.ones(n_features).reshape(1, n_features), 172 ) 173 174 return ( 175 self.y_mean_ 176 + mo.safe_sparse_dot( 177 a=self.cook_test_set(new_X, **kwargs), 178 b=self.beta_, 179 backend=self.backend, 180 ) 181 )[0] 182 183 return self.y_mean_ + mo.safe_sparse_dot( 184 a=self.cook_test_set(X, **kwargs), 185 b=self.beta_, 186 backend=self.backend, 187 )
Random Vector Functional Link Network regression without shrinkage
Parameters:
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for hidden layer nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or
not (False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original features are included (True) in model's
fitting or not (False)
n_clusters: int
number of clusters for type_clust='kmeans' or type_clust='gmm'
clustering (could be 0: no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot);
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
col_sample: float
percentage of features randomly chosen for training
row_sample: float
percentage of rows chosen for training, by stratified bootstrapping
seed: int
reproducibility seed for nodes_sim=='uniform', clustering and dropout
backend: str
"cpu" or "gpu" or "tpu"
Attributes:
beta_: vector
regression coefficients
GCV_: float
Generalized Cross-Validation error
122 def fit(self, X, y, **kwargs): 123 """Fit BaseRegressor to training data (X, y) 124 125 Parameters: 126 127 X: {array-like}, shape = [n_samples, n_features] 128 Training vectors, where n_samples is the number 129 of samples and n_features is the number of features 130 131 y: array-like, shape = [n_samples] 132 Target values 133 134 **kwargs: additional parameters to be passed to self.cook_training_set 135 136 Returns: 137 138 self: object 139 """ 140 141 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 142 143 fit_obj = lmf.beta_Sigma_hat(X=scaled_Z, y=centered_y, backend=self.backend) 144 145 self.beta_ = fit_obj["beta_hat"] 146 147 self.GCV_ = fit_obj["GCV"] 148 149 return self
Fit BaseRegressor to training data (X, y)
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features
y: array-like, shape = [n_samples]
Target values
**kwargs: additional parameters to be passed to self.cook_training_set
Returns:
self: object
151 def predict(self, X, **kwargs): 152 """Predict test data X. 153 154 Parameters: 155 156 X: {array-like}, shape = [n_samples, n_features] 157 Training vectors, where n_samples is the number 158 of samples and n_features is the number of features 159 160 **kwargs: additional parameters to be passed to self.cook_test_set 161 162 Returns: 163 164 model predictions: {array-like} 165 """ 166 167 if len(X.shape) == 1: 168 n_features = X.shape[0] 169 new_X = mo.rbind( 170 X.reshape(1, n_features), 171 np.ones(n_features).reshape(1, n_features), 172 ) 173 174 return ( 175 self.y_mean_ 176 + mo.safe_sparse_dot( 177 a=self.cook_test_set(new_X, **kwargs), 178 b=self.beta_, 179 backend=self.backend, 180 ) 181 )[0] 182 183 return self.y_mean_ + mo.safe_sparse_dot( 184 a=self.cook_test_set(X, **kwargs), 185 b=self.beta_, 186 backend=self.backend, 187 )
Predict test data X.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features
**kwargs: additional parameters to be passed to self.cook_test_set
Returns:
model predictions: {array-like}
15class BayesianRVFLRegressor(Base, RegressorMixin): 16 """Bayesian Random Vector Functional Link Network regression with one prior 17 18 Parameters: 19 20 n_hidden_features: int 21 number of nodes in the hidden layer 22 23 activation_name: str 24 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 25 26 a: float 27 hyperparameter for 'prelu' or 'elu' activation function 28 29 nodes_sim: str 30 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 'uniform' 31 32 bias: boolean 33 indicates if the hidden layer contains a bias term (True) or not (False) 34 35 dropout: float 36 regularization parameter; (random) percentage of nodes dropped out 37 of the training 38 39 direct_link: boolean 40 indicates if the original features are included (True) in model''s fitting or not (False) 41 42 n_clusters: int 43 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering) 44 45 cluster_encode: bool 46 defines how the variable containing clusters is treated (default is one-hot) 47 if `False`, then labels are used, without one-hot encoding 48 49 type_clust: str 50 type of clustering method: currently k-means ('kmeans') or Gaussian Mixture Model ('gmm') 51 52 type_scaling: a tuple of 3 strings 53 scaling methods for inputs, hidden layer, and clustering respectively 54 (and when relevant). 55 Currently available: standardization ('std') or MinMax scaling ('minmax') 56 57 seed: int 58 reproducibility seed for nodes_sim=='uniform' 59 60 s: float 61 std. dev. of regression parameters in Bayesian Ridge Regression 62 63 sigma: float 64 std. dev. of residuals in Bayesian Ridge Regression 65 66 return_std: boolean 67 if True, uncertainty around predictions is evaluated 68 69 backend: str 70 "cpu" or "gpu" or "tpu" 71 72 Attributes: 73 74 beta_: array-like 75 regression''s coefficients 76 77 Sigma_: array-like 78 covariance of the distribution of fitted parameters 79 80 GCV_: float 81 Generalized cross-validation error 82 83 y_mean_: float 84 average response 85 86 Examples: 87 88 ```python 89 TBD 90 ``` 91 92 """ 93 94 # construct the object ----- 95 96 def __init__( 97 self, 98 n_hidden_features=5, 99 activation_name="relu", 100 a=0.01, 101 nodes_sim="sobol", 102 bias=True, 103 dropout=0, 104 direct_link=True, 105 n_clusters=2, 106 cluster_encode=True, 107 type_clust="kmeans", 108 type_scaling=("std", "std", "std"), 109 seed=123, 110 s=0.1, 111 sigma=0.05, 112 return_std=True, 113 backend="cpu", 114 ): 115 super().__init__( 116 n_hidden_features=n_hidden_features, 117 activation_name=activation_name, 118 a=a, 119 nodes_sim=nodes_sim, 120 bias=bias, 121 dropout=dropout, 122 direct_link=direct_link, 123 n_clusters=n_clusters, 124 cluster_encode=cluster_encode, 125 type_clust=type_clust, 126 type_scaling=type_scaling, 127 seed=seed, 128 backend=backend, 129 ) 130 self.s = s 131 self.sigma = sigma 132 self.beta_ = None 133 self.Sigma_ = None 134 self.GCV_ = None 135 self.return_std = return_std 136 137 def fit(self, X, y, **kwargs): 138 """Fit BayesianRVFLRegressor to training data (X, y). 139 140 Parameters: 141 142 X: {array-like}, shape = [n_samples, n_features] 143 Training vectors, where n_samples is the number 144 of samples and n_features is the number of features. 145 146 y: array-like, shape = [n_samples] 147 Target values. 148 149 **kwargs: additional parameters to be passed to 150 self.cook_training_set 151 152 Returns: 153 154 self: object 155 156 """ 157 158 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 159 160 fit_obj = lmf.beta_Sigma_hat_rvfl( 161 X=scaled_Z, 162 y=centered_y, 163 s=self.s, 164 sigma=self.sigma, 165 fit_intercept=False, 166 return_cov=self.return_std, 167 backend=self.backend, 168 ) 169 170 self.beta_ = fit_obj["beta_hat"] 171 172 if self.return_std == True: 173 self.Sigma_ = fit_obj["Sigma_hat"] 174 175 self.GCV_ = fit_obj["GCV"] 176 177 return self 178 179 def predict(self, X, return_std=False, **kwargs): 180 """Predict test data X. 181 182 Parameters: 183 184 X: {array-like}, shape = [n_samples, n_features] 185 Training vectors, where n_samples is the number 186 of samples and n_features is the number of features. 187 188 return_std: {boolean}, standard dev. is returned or not 189 190 **kwargs: additional parameters to be passed to 191 self.cook_test_set 192 193 Returns: 194 195 model predictions: {array-like} 196 197 """ 198 199 if len(X.shape) == 1: # one observation in the test set only 200 n_features = X.shape[0] 201 new_X = mo.rbind( 202 x=X.reshape(1, n_features), 203 y=np.ones(n_features).reshape(1, n_features), 204 backend=self.backend, 205 ) 206 207 self.return_std = return_std 208 209 if self.return_std == False: 210 if len(X.shape) == 1: 211 return ( 212 self.y_mean_ 213 + mo.safe_sparse_dot( 214 a=self.cook_test_set(new_X, **kwargs), 215 b=self.beta_, 216 backend=self.backend, 217 ) 218 )[0] 219 220 return self.y_mean_ + mo.safe_sparse_dot( 221 a=self.cook_test_set(X, **kwargs), 222 b=self.beta_, 223 backend=self.backend, 224 ) 225 226 else: # confidence interval required for preds? 227 if len(X.shape) == 1: 228 Z = self.cook_test_set(new_X, **kwargs) 229 230 pred_obj = lmf.beta_Sigma_hat_rvfl( 231 s=self.s, 232 sigma=self.sigma, 233 X_star=Z, 234 return_cov=True, 235 beta_hat_=self.beta_, 236 Sigma_hat_=self.Sigma_, 237 backend=self.backend, 238 ) 239 240 return ( 241 self.y_mean_ + pred_obj["preds"][0], 242 pred_obj["preds_std"][0], 243 ) 244 245 Z = self.cook_test_set(X, **kwargs) 246 247 pred_obj = lmf.beta_Sigma_hat_rvfl( 248 s=self.s, 249 sigma=self.sigma, 250 X_star=Z, 251 return_cov=True, 252 beta_hat_=self.beta_, 253 Sigma_hat_=self.Sigma_, 254 backend=self.backend, 255 ) 256 257 return (self.y_mean_ + pred_obj["preds"], pred_obj["preds_std"])
Bayesian Random Vector Functional Link Network regression with one prior
Parameters:
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not (False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original features are included (True) in model''s fitting or not (False)
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
seed: int
reproducibility seed for nodes_sim=='uniform'
s: float
std. dev. of regression parameters in Bayesian Ridge Regression
sigma: float
std. dev. of residuals in Bayesian Ridge Regression
return_std: boolean
if True, uncertainty around predictions is evaluated
backend: str
"cpu" or "gpu" or "tpu"
Attributes:
beta_: array-like
regression''s coefficients
Sigma_: array-like
covariance of the distribution of fitted parameters
GCV_: float
Generalized cross-validation error
y_mean_: float
average response
Examples:
TBD
137 def fit(self, X, y, **kwargs): 138 """Fit BayesianRVFLRegressor to training data (X, y). 139 140 Parameters: 141 142 X: {array-like}, shape = [n_samples, n_features] 143 Training vectors, where n_samples is the number 144 of samples and n_features is the number of features. 145 146 y: array-like, shape = [n_samples] 147 Target values. 148 149 **kwargs: additional parameters to be passed to 150 self.cook_training_set 151 152 Returns: 153 154 self: object 155 156 """ 157 158 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 159 160 fit_obj = lmf.beta_Sigma_hat_rvfl( 161 X=scaled_Z, 162 y=centered_y, 163 s=self.s, 164 sigma=self.sigma, 165 fit_intercept=False, 166 return_cov=self.return_std, 167 backend=self.backend, 168 ) 169 170 self.beta_ = fit_obj["beta_hat"] 171 172 if self.return_std == True: 173 self.Sigma_ = fit_obj["Sigma_hat"] 174 175 self.GCV_ = fit_obj["GCV"] 176 177 return self
Fit BayesianRVFLRegressor to training data (X, y).
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
**kwargs: additional parameters to be passed to
self.cook_training_set
Returns:
self: object
179 def predict(self, X, return_std=False, **kwargs): 180 """Predict test data X. 181 182 Parameters: 183 184 X: {array-like}, shape = [n_samples, n_features] 185 Training vectors, where n_samples is the number 186 of samples and n_features is the number of features. 187 188 return_std: {boolean}, standard dev. is returned or not 189 190 **kwargs: additional parameters to be passed to 191 self.cook_test_set 192 193 Returns: 194 195 model predictions: {array-like} 196 197 """ 198 199 if len(X.shape) == 1: # one observation in the test set only 200 n_features = X.shape[0] 201 new_X = mo.rbind( 202 x=X.reshape(1, n_features), 203 y=np.ones(n_features).reshape(1, n_features), 204 backend=self.backend, 205 ) 206 207 self.return_std = return_std 208 209 if self.return_std == False: 210 if len(X.shape) == 1: 211 return ( 212 self.y_mean_ 213 + mo.safe_sparse_dot( 214 a=self.cook_test_set(new_X, **kwargs), 215 b=self.beta_, 216 backend=self.backend, 217 ) 218 )[0] 219 220 return self.y_mean_ + mo.safe_sparse_dot( 221 a=self.cook_test_set(X, **kwargs), 222 b=self.beta_, 223 backend=self.backend, 224 ) 225 226 else: # confidence interval required for preds? 227 if len(X.shape) == 1: 228 Z = self.cook_test_set(new_X, **kwargs) 229 230 pred_obj = lmf.beta_Sigma_hat_rvfl( 231 s=self.s, 232 sigma=self.sigma, 233 X_star=Z, 234 return_cov=True, 235 beta_hat_=self.beta_, 236 Sigma_hat_=self.Sigma_, 237 backend=self.backend, 238 ) 239 240 return ( 241 self.y_mean_ + pred_obj["preds"][0], 242 pred_obj["preds_std"][0], 243 ) 244 245 Z = self.cook_test_set(X, **kwargs) 246 247 pred_obj = lmf.beta_Sigma_hat_rvfl( 248 s=self.s, 249 sigma=self.sigma, 250 X_star=Z, 251 return_cov=True, 252 beta_hat_=self.beta_, 253 Sigma_hat_=self.Sigma_, 254 backend=self.backend, 255 ) 256 257 return (self.y_mean_ + pred_obj["preds"], pred_obj["preds_std"])
Predict test data X.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
return_std: {boolean}, standard dev. is returned or not
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
15class BayesianRVFL2Regressor(Base, RegressorMixin): 16 """Bayesian Random Vector Functional Link Network regression with two priors 17 18 Parameters: 19 20 n_hidden_features: int 21 number of nodes in the hidden layer 22 23 activation_name: str 24 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 25 26 a: float 27 hyperparameter for 'prelu' or 'elu' activation function 28 29 nodes_sim: str 30 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 'uniform' 31 32 bias: boolean 33 indicates if the hidden layer contains a bias term (True) or not (False) 34 35 dropout: float 36 regularization parameter; (random) percentage of nodes dropped out 37 of the training 38 39 direct_link: boolean 40 indicates if the original features are included (True) in model''s fitting or not (False) 41 42 n_clusters: int 43 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering) 44 45 cluster_encode: bool 46 defines how the variable containing clusters is treated (default is one-hot) 47 if `False`, then labels are used, without one-hot encoding 48 49 type_clust: str 50 type of clustering method: currently k-means ('kmeans') or Gaussian Mixture Model ('gmm') 51 52 type_scaling: a tuple of 3 strings 53 scaling methods for inputs, hidden layer, and clustering respectively 54 (and when relevant). 55 Currently available: standardization ('std') or MinMax scaling ('minmax') 56 57 seed: int 58 reproducibility seed for nodes_sim=='uniform' 59 60 s1: float 61 std. dev. of init. regression parameters in Bayesian Ridge Regression 62 63 s2: float 64 std. dev. of augmented regression parameters in Bayesian Ridge Regression 65 66 sigma: float 67 std. dev. of residuals in Bayesian Ridge Regression 68 69 return_std: boolean 70 if True, uncertainty around predictions is evaluated 71 72 backend: str 73 "cpu" or "gpu" or "tpu" 74 75 Attributes: 76 77 beta_: array-like 78 regression''s coefficients 79 80 Sigma_: array-like 81 covariance of the distribution of fitted parameters 82 83 GCV_: float 84 Generalized cross-validation error 85 86 y_mean_: float 87 average response 88 89 Examples: 90 91 ```python 92 TBD 93 ``` 94 95 """ 96 97 # construct the object ----- 98 99 def __init__( 100 self, 101 n_hidden_features=5, 102 activation_name="relu", 103 a=0.01, 104 nodes_sim="sobol", 105 bias=True, 106 dropout=0, 107 direct_link=True, 108 n_clusters=0, 109 cluster_encode=True, 110 type_clust="kmeans", 111 type_scaling=("std", "std", "std"), 112 seed=123, 113 s1=0.1, 114 s2=0.1, 115 sigma=0.05, 116 return_std=True, 117 backend="cpu", 118 ): 119 super().__init__( 120 n_hidden_features=n_hidden_features, 121 activation_name=activation_name, 122 a=a, 123 nodes_sim=nodes_sim, 124 bias=bias, 125 dropout=dropout, 126 direct_link=direct_link, 127 n_clusters=n_clusters, 128 cluster_encode=cluster_encode, 129 type_clust=type_clust, 130 type_scaling=type_scaling, 131 seed=seed, 132 backend=backend, 133 ) 134 135 self.s1 = s1 136 self.s2 = s2 137 self.sigma = sigma 138 self.beta_ = None 139 self.Sigma_ = None 140 self.GCV_ = None 141 self.return_std = return_std 142 143 def fit(self, X, y, **kwargs): 144 """Fit BayesianRVFL2Regressor to training data (X, y) 145 146 Parameters: 147 148 X: {array-like}, shape = [n_samples, n_features] 149 Training vectors, where n_samples is the number 150 of samples and n_features is the number of features 151 152 y: array-like, shape = [n_samples] 153 Target values 154 155 **kwargs: additional parameters to be passed to 156 self.cook_training_set 157 158 Returns: 159 160 self: object 161 162 """ 163 164 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 165 166 n, p = X.shape 167 q = self.n_hidden_features 168 169 if self.direct_link == True: 170 r = p + self.n_clusters 171 172 block11 = (self.s1**2) * np.eye(r) 173 block12 = np.zeros((r, q)) 174 block21 = np.zeros((q, r)) 175 block22 = (self.s2**2) * np.eye(q) 176 177 Sigma_prior = mo.rbind( 178 x=mo.cbind(x=block11, y=block12, backend=self.backend), 179 y=mo.cbind(x=block21, y=block22, backend=self.backend), 180 backend=self.backend, 181 ) 182 183 else: 184 Sigma_prior = (self.s2**2) * np.eye(q) 185 186 fit_obj = lmf.beta_Sigma_hat_rvfl2( 187 X=scaled_Z, 188 y=centered_y, 189 Sigma=Sigma_prior, 190 sigma=self.sigma, 191 fit_intercept=False, 192 return_cov=self.return_std, 193 backend=self.backend, 194 ) 195 196 self.beta_ = fit_obj["beta_hat"] 197 198 if self.return_std == True: 199 self.Sigma_ = fit_obj["Sigma_hat"] 200 201 self.GCV_ = fit_obj["GCV"] 202 203 return self 204 205 def predict(self, X, return_std=False, **kwargs): 206 """Predict test data X. 207 208 Parameters: 209 210 X: {array-like}, shape = [n_samples, n_features] 211 Training vectors, where n_samples is the number 212 of samples and n_features is the number of features. 213 214 return_std: {boolean}, standard dev. is returned or not 215 216 **kwargs: additional parameters to be passed to 217 self.cook_test_set 218 219 Returns: 220 221 model predictions: {array-like} 222 223 """ 224 225 if len(X.shape) == 1: # one observation in the test set only 226 n_features = X.shape[0] 227 new_X = mo.rbind( 228 x=X.reshape(1, n_features), 229 y=np.ones(n_features).reshape(1, n_features), 230 backend=self.backend, 231 ) 232 233 self.return_std = return_std 234 235 if self.return_std == False: 236 if len(X.shape) == 1: 237 return ( 238 self.y_mean_ 239 + mo.safe_sparse_dot( 240 self.cook_test_set(new_X, **kwargs), 241 self.beta_, 242 backend=self.backend, 243 ) 244 )[0] 245 246 return self.y_mean_ + mo.safe_sparse_dot( 247 self.cook_test_set(X, **kwargs), 248 self.beta_, 249 backend=self.backend, 250 ) 251 252 else: # confidence interval required for preds? 253 if len(X.shape) == 1: 254 Z = self.cook_test_set(new_X, **kwargs) 255 256 pred_obj = lmf.beta_Sigma_hat_rvfl2( 257 X_star=Z, 258 return_cov=self.return_std, 259 beta_hat_=self.beta_, 260 Sigma_hat_=self.Sigma_, 261 backend=self.backend, 262 ) 263 264 return ( 265 self.y_mean_ + pred_obj["preds"][0], 266 pred_obj["preds_std"][0], 267 ) 268 269 Z = self.cook_test_set(X, **kwargs) 270 271 pred_obj = lmf.beta_Sigma_hat_rvfl2( 272 X_star=Z, 273 return_cov=self.return_std, 274 beta_hat_=self.beta_, 275 Sigma_hat_=self.Sigma_, 276 backend=self.backend, 277 ) 278 279 return (self.y_mean_ + pred_obj["preds"], pred_obj["preds_std"])
Bayesian Random Vector Functional Link Network regression with two priors
Parameters:
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not (False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original features are included (True) in model''s fitting or not (False)
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
seed: int
reproducibility seed for nodes_sim=='uniform'
s1: float
std. dev. of init. regression parameters in Bayesian Ridge Regression
s2: float
std. dev. of augmented regression parameters in Bayesian Ridge Regression
sigma: float
std. dev. of residuals in Bayesian Ridge Regression
return_std: boolean
if True, uncertainty around predictions is evaluated
backend: str
"cpu" or "gpu" or "tpu"
Attributes:
beta_: array-like
regression''s coefficients
Sigma_: array-like
covariance of the distribution of fitted parameters
GCV_: float
Generalized cross-validation error
y_mean_: float
average response
Examples:
TBD
143 def fit(self, X, y, **kwargs): 144 """Fit BayesianRVFL2Regressor to training data (X, y) 145 146 Parameters: 147 148 X: {array-like}, shape = [n_samples, n_features] 149 Training vectors, where n_samples is the number 150 of samples and n_features is the number of features 151 152 y: array-like, shape = [n_samples] 153 Target values 154 155 **kwargs: additional parameters to be passed to 156 self.cook_training_set 157 158 Returns: 159 160 self: object 161 162 """ 163 164 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 165 166 n, p = X.shape 167 q = self.n_hidden_features 168 169 if self.direct_link == True: 170 r = p + self.n_clusters 171 172 block11 = (self.s1**2) * np.eye(r) 173 block12 = np.zeros((r, q)) 174 block21 = np.zeros((q, r)) 175 block22 = (self.s2**2) * np.eye(q) 176 177 Sigma_prior = mo.rbind( 178 x=mo.cbind(x=block11, y=block12, backend=self.backend), 179 y=mo.cbind(x=block21, y=block22, backend=self.backend), 180 backend=self.backend, 181 ) 182 183 else: 184 Sigma_prior = (self.s2**2) * np.eye(q) 185 186 fit_obj = lmf.beta_Sigma_hat_rvfl2( 187 X=scaled_Z, 188 y=centered_y, 189 Sigma=Sigma_prior, 190 sigma=self.sigma, 191 fit_intercept=False, 192 return_cov=self.return_std, 193 backend=self.backend, 194 ) 195 196 self.beta_ = fit_obj["beta_hat"] 197 198 if self.return_std == True: 199 self.Sigma_ = fit_obj["Sigma_hat"] 200 201 self.GCV_ = fit_obj["GCV"] 202 203 return self
Fit BayesianRVFL2Regressor to training data (X, y)
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features
y: array-like, shape = [n_samples]
Target values
**kwargs: additional parameters to be passed to
self.cook_training_set
Returns:
self: object
205 def predict(self, X, return_std=False, **kwargs): 206 """Predict test data X. 207 208 Parameters: 209 210 X: {array-like}, shape = [n_samples, n_features] 211 Training vectors, where n_samples is the number 212 of samples and n_features is the number of features. 213 214 return_std: {boolean}, standard dev. is returned or not 215 216 **kwargs: additional parameters to be passed to 217 self.cook_test_set 218 219 Returns: 220 221 model predictions: {array-like} 222 223 """ 224 225 if len(X.shape) == 1: # one observation in the test set only 226 n_features = X.shape[0] 227 new_X = mo.rbind( 228 x=X.reshape(1, n_features), 229 y=np.ones(n_features).reshape(1, n_features), 230 backend=self.backend, 231 ) 232 233 self.return_std = return_std 234 235 if self.return_std == False: 236 if len(X.shape) == 1: 237 return ( 238 self.y_mean_ 239 + mo.safe_sparse_dot( 240 self.cook_test_set(new_X, **kwargs), 241 self.beta_, 242 backend=self.backend, 243 ) 244 )[0] 245 246 return self.y_mean_ + mo.safe_sparse_dot( 247 self.cook_test_set(X, **kwargs), 248 self.beta_, 249 backend=self.backend, 250 ) 251 252 else: # confidence interval required for preds? 253 if len(X.shape) == 1: 254 Z = self.cook_test_set(new_X, **kwargs) 255 256 pred_obj = lmf.beta_Sigma_hat_rvfl2( 257 X_star=Z, 258 return_cov=self.return_std, 259 beta_hat_=self.beta_, 260 Sigma_hat_=self.Sigma_, 261 backend=self.backend, 262 ) 263 264 return ( 265 self.y_mean_ + pred_obj["preds"][0], 266 pred_obj["preds_std"][0], 267 ) 268 269 Z = self.cook_test_set(X, **kwargs) 270 271 pred_obj = lmf.beta_Sigma_hat_rvfl2( 272 X_star=Z, 273 return_cov=self.return_std, 274 beta_hat_=self.beta_, 275 Sigma_hat_=self.Sigma_, 276 backend=self.backend, 277 ) 278 279 return (self.y_mean_ + pred_obj["preds"], pred_obj["preds_std"])
Predict test data X.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
return_std: {boolean}, standard dev. is returned or not
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
42class ClassicalMTS(Base): 43 """Multivariate time series (FactorMTS) forecasting with Factor models 44 45 Parameters: 46 47 model: type of model: str. 48 currently, 'VAR', 'VECM', 'ARIMA', 'ETS', 'Theta' 49 50 Attributes: 51 52 df_: data frame 53 the input data frame, in case a data.frame is provided to `fit` 54 55 level_: int 56 level of confidence for prediction intervals (default is 95) 57 58 Examples: 59 See examples/classical_mts_timeseries.py 60 """ 61 62 # construct the object ----- 63 64 def __init__(self, model="VAR"): 65 66 self.model = model 67 if self.model == "VAR": 68 self.obj = VAR 69 elif self.model == "VECM": 70 self.obj = VECM 71 elif self.model == "ARIMA": 72 self.obj = ARIMA 73 elif self.model == "ETS": 74 self.obj = ExponentialSmoothing 75 elif self.model == "Theta": 76 self.obj = ThetaModel 77 else: 78 raise ValueError("model not recognized") 79 self.n_series = None 80 self.replications = None 81 self.mean_ = None 82 self.upper_ = None 83 self.lower_ = None 84 self.output_dates_ = None 85 self.alpha_ = None 86 self.df_ = None 87 self.residuals_ = [] 88 self.sims_ = None 89 self.level_ = None 90 91 def fit(self, X, **kwargs): 92 """Fit FactorMTS model to training data X, with optional regressors xreg 93 94 Parameters: 95 96 X: {array-like}, shape = [n_samples, n_features] 97 Training time series, where n_samples is the number 98 of samples and n_features is the number of features; 99 X must be in increasing order (most recent observations last) 100 101 **kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity) 102 103 Returns: 104 105 self: object 106 """ 107 108 try: 109 self.n_series = X.shape[1] 110 except Exception: 111 self.n_series = 1 112 113 if (isinstance(X, pd.DataFrame) is False) and isinstance( 114 X, pd.Series 115 ) is False: # input data set is a numpy array 116 117 X = pd.DataFrame(X) 118 if self.n_series > 1: 119 self.series_names = ["series" + str(i) for i in range(X.shape[1])] 120 else: 121 self.series_names = "series0" 122 123 else: # input data set is a DataFrame or Series with column names 124 125 X_index = None 126 if X.index is not None and len(X.shape) > 1: 127 X_index = X.index 128 X = copy.deepcopy(mo.convert_df_to_numeric(X)) 129 if X_index is not None: 130 try: 131 X.index = X_index 132 except Exception: 133 pass 134 if isinstance(X, pd.DataFrame): 135 self.series_names = X.columns.tolist() 136 else: 137 self.series_names = X.name 138 139 if isinstance(X, pd.DataFrame) or isinstance(X, pd.Series): 140 self.df_ = X 141 X = X.values 142 self.df_.columns = self.series_names 143 self.input_dates = ts.compute_input_dates(self.df_) 144 else: 145 self.df_ = pd.DataFrame(X, columns=self.series_names) 146 147 if self.model == "Theta": 148 self.obj = self.obj(self.df_, **kwargs).fit() 149 else: 150 self.obj = self.obj(X, **kwargs).fit(**kwargs) 151 152 return self 153 154 def predict(self, h=5, level=95, **kwargs): 155 """Forecast all the time series, h steps ahead 156 157 Parameters: 158 159 h: {integer} 160 Forecasting horizon 161 162 **kwargs: additional parameters to be passed to 163 self.cook_test_set 164 165 Returns: 166 167 model predictions for horizon = h: {array-like} 168 169 """ 170 171 self.output_dates_, frequency = ts.compute_output_dates(self.df_, h) 172 173 self.level_ = level 174 175 self.lower_ = None # do not remove (/!\) 176 177 self.upper_ = None # do not remove (/!\) 178 179 self.sims_ = None # do not remove (/!\) 180 181 self.level_ = level 182 183 self.alpha_ = 100 - level 184 185 pi_multiplier = norm.ppf(1 - self.alpha_ / 200) 186 187 # Named tuple for forecast results 188 DescribeResult = namedtuple("DescribeResult", ("mean", "lower", "upper")) 189 190 if self.model == "VAR": 191 mean_forecast, lower_bound, upper_bound = self.obj.forecast_interval( 192 self.obj.endog, steps=h, alpha=self.alpha_ / 100, **kwargs 193 ) 194 195 elif self.model == "VECM": 196 forecast_result = self.obj.predict(steps=h) 197 mean_forecast = forecast_result 198 lower_bound, upper_bound = self._compute_confidence_intervals( 199 forecast_result, alpha=self.alpha_ / 100, **kwargs 200 ) 201 202 elif self.model == "ARIMA": 203 forecast_result = self.obj.get_forecast(steps=h) 204 mean_forecast = forecast_result.predicted_mean 205 lower_bound = forecast_result.conf_int()[:, 0] 206 upper_bound = forecast_result.conf_int()[:, 1] 207 208 elif self.model == "ETS": 209 forecast_result = self.obj.forecast(steps=h) 210 residuals = self.obj.resid 211 std_errors = np.std(residuals) 212 mean_forecast = forecast_result 213 lower_bound = forecast_result - pi_multiplier * std_errors 214 upper_bound = forecast_result + pi_multiplier * std_errors 215 216 elif self.model == "Theta": 217 try: 218 mean_forecast = self.obj.forecast(steps=h).values 219 forecast_result = self.obj.prediction_intervals( 220 steps=h, alpha=self.alpha_ / 100, **kwargs 221 ) 222 lower_bound = forecast_result["lower"].values 223 upper_bound = forecast_result["upper"].values 224 except Exception: 225 mean_forecast = self.obj.forecast(steps=h) 226 forecast_result = self.obj.prediction_intervals( 227 steps=h, alpha=self.alpha_ / 100, **kwargs 228 ) 229 lower_bound = forecast_result["lower"] 230 upper_bound = forecast_result["upper"] 231 232 else: 233 234 raise ValueError("model not recognized") 235 236 try: 237 self.mean_ = pd.DataFrame( 238 mean_forecast, 239 columns=self.series_names, 240 index=self.output_dates_, 241 ) 242 self.lower_ = pd.DataFrame( 243 lower_bound, columns=self.series_names, index=self.output_dates_ 244 ) 245 self.upper_ = pd.DataFrame( 246 upper_bound, columns=self.series_names, index=self.output_dates_ 247 ) 248 except Exception: 249 self.mean_ = pd.Series( 250 mean_forecast, name=self.series_names, index=self.output_dates_ 251 ) 252 self.lower_ = pd.Series( 253 lower_bound, name=self.series_names, index=self.output_dates_ 254 ) 255 self.upper_ = pd.Series( 256 upper_bound, name=self.series_names, index=self.output_dates_ 257 ) 258 259 return DescribeResult(mean=self.mean_, lower=self.lower_, upper=self.upper_) 260 261 def _compute_confidence_intervals(self, forecast_result, alpha): 262 """ 263 Compute confidence intervals for VECM forecasts. 264 Uses the covariance of residuals to approximate the confidence intervals. 265 """ 266 residuals = self.obj.resid 267 cov_matrix = np.cov(residuals.T) # Covariance matrix of residuals 268 std_errors = np.sqrt(np.diag(cov_matrix)) # Standard errors 269 270 z_value = norm.ppf(1 - alpha / 2) # Z-score for the given alpha level 271 lower_bound = forecast_result - z_value * std_errors 272 upper_bound = forecast_result + z_value * std_errors 273 274 return lower_bound, upper_bound 275 276 def score(self, X, training_index, testing_index, scoring=None, **kwargs): 277 """Train on training_index, score on testing_index.""" 278 279 assert ( 280 bool(set(training_index).intersection(set(testing_index))) == False 281 ), "Non-overlapping 'training_index' and 'testing_index' required" 282 283 # Dimensions 284 try: 285 # multivariate time series 286 n, p = X.shape 287 except: 288 # univariate time series 289 n = X.shape[0] 290 p = 1 291 292 # Training and testing sets 293 if p > 1: 294 X_train = X[training_index, :] 295 X_test = X[testing_index, :] 296 else: 297 X_train = X[training_index] 298 X_test = X[testing_index] 299 300 # Horizon 301 h = len(testing_index) 302 assert ( 303 len(training_index) + h 304 ) <= n, "Please check lengths of training and testing windows" 305 306 # Fit and predict 307 self.fit(X_train, **kwargs) 308 preds = self.predict(h=h, **kwargs) 309 310 if scoring is None: 311 scoring = "neg_root_mean_squared_error" 312 313 # check inputs 314 assert scoring in ( 315 "explained_variance", 316 "neg_mean_absolute_error", 317 "neg_mean_squared_error", 318 "neg_root_mean_squared_error", 319 "neg_mean_squared_log_error", 320 "neg_median_absolute_error", 321 "r2", 322 ), "'scoring' should be in ('explained_variance', 'neg_mean_absolute_error', \ 323 'neg_mean_squared_error', 'neg_root_mean_squared_error', 'neg_mean_squared_log_error', \ 324 'neg_median_absolute_error', 'r2')" 325 326 scoring_options = { 327 "explained_variance": skm2.explained_variance_score, 328 "neg_mean_absolute_error": skm2.mean_absolute_error, 329 "neg_mean_squared_error": lambda x, y: np.mean((x - y) ** 2), 330 "neg_root_mean_squared_error": lambda x, y: np.sqrt(np.mean((x - y) ** 2)), 331 "neg_mean_squared_log_error": skm2.mean_squared_log_error, 332 "neg_median_absolute_error": skm2.median_absolute_error, 333 "r2": skm2.r2_score, 334 } 335 336 # if p > 1: 337 # return tuple( 338 # [ 339 # scoring_options[scoring]( 340 # X_test[:, i], preds[:, i]#, **kwargs 341 # ) 342 # for i in range(p) 343 # ] 344 # ) 345 # else: 346 return scoring_options[scoring](X_test, preds) 347 348 def plot(self, series=None, type_axis="dates", type_plot="pi"): 349 """Plot time series forecast 350 351 Parameters: 352 353 series: {integer} or {string} 354 series index or name 355 356 """ 357 358 assert all( 359 [ 360 self.mean_ is not None, 361 self.lower_ is not None, 362 self.upper_ is not None, 363 self.output_dates_ is not None, 364 ] 365 ), "model forecasting must be obtained first (with predict)" 366 367 if series is None: 368 assert ( 369 self.n_series == 1 370 ), "please specify series index or name (n_series > 1)" 371 series = 0 372 373 if isinstance(series, str): 374 assert ( 375 series in self.series_names 376 ), f"series {series} doesn't exist in the input dataset" 377 series_idx = self.df_.columns.get_loc(series) 378 else: 379 assert isinstance(series, int) and ( 380 0 <= series < self.n_series 381 ), f"check series index (< {self.n_series})" 382 series_idx = series 383 384 if isinstance(self.df_, pd.DataFrame): 385 y_all = list(self.df_.iloc[:, series_idx]) + list( 386 self.mean_.iloc[:, series_idx] 387 ) 388 y_test = list(self.mean_.iloc[:, series_idx]) 389 else: 390 y_all = list(self.df_.values) + list(self.mean_.values) 391 y_test = list(self.mean_.values) 392 n_points_all = len(y_all) 393 n_points_train = self.df_.shape[0] 394 395 if type_axis == "numeric": 396 x_all = [i for i in range(n_points_all)] 397 x_test = [i for i in range(n_points_train, n_points_all)] 398 399 if type_axis == "dates": # use dates 400 x_all = np.concatenate( 401 (self.input_dates.values, self.output_dates_.values), axis=None 402 ) 403 x_test = self.output_dates_.values 404 405 if type_plot == "pi": 406 fig, ax = plt.subplots() 407 ax.plot(x_all, y_all, "-") 408 ax.plot(x_test, y_test, "-", color="orange") 409 try: 410 ax.fill_between( 411 x_test, 412 self.lower_.iloc[:, series_idx], 413 self.upper_.iloc[:, series_idx], 414 alpha=0.2, 415 color="orange", 416 ) 417 except Exception: 418 ax.fill_between( 419 x_test, 420 self.lower_.values, 421 self.upper_.values, 422 alpha=0.2, 423 color="orange", 424 ) 425 if self.replications is None: 426 if self.n_series > 1: 427 plt.title( 428 f"prediction intervals for {series}", 429 loc="left", 430 fontsize=12, 431 fontweight=0, 432 color="black", 433 ) 434 else: 435 plt.title( 436 f"prediction intervals for input time series", 437 loc="left", 438 fontsize=12, 439 fontweight=0, 440 color="black", 441 ) 442 plt.show() 443 else: # self.replications is not None 444 if self.n_series > 1: 445 plt.title( 446 f"prediction intervals for {self.replications} simulations of {series}", 447 loc="left", 448 fontsize=12, 449 fontweight=0, 450 color="black", 451 ) 452 else: 453 plt.title( 454 f"prediction intervals for {self.replications} simulations of input time series", 455 loc="left", 456 fontsize=12, 457 fontweight=0, 458 color="black", 459 ) 460 plt.show() 461 462 if type_plot == "spaghetti": 463 palette = plt.get_cmap("Set1") 464 sims_ix = getsims(self.sims_, series_idx) 465 plt.plot(x_all, y_all, "-") 466 for col_ix in range( 467 sims_ix.shape[1] 468 ): # avoid this when there are thousands of simulations 469 plt.plot( 470 x_test, 471 sims_ix[:, col_ix], 472 "-", 473 color=palette(col_ix), 474 linewidth=1, 475 alpha=0.9, 476 ) 477 plt.plot(x_all, y_all, "-", color="black") 478 plt.plot(x_test, y_test, "-", color="blue") 479 # Add titles 480 if self.n_series > 1: 481 plt.title( 482 f"{self.replications} simulations of {series}", 483 loc="left", 484 fontsize=12, 485 fontweight=0, 486 color="black", 487 ) 488 else: 489 plt.title( 490 f"{self.replications} simulations of input time series", 491 loc="left", 492 fontsize=12, 493 fontweight=0, 494 color="black", 495 ) 496 plt.xlabel("Time") 497 plt.ylabel("Values") 498 # Show the graph 499 plt.show() 500 501 def cross_val_score( 502 self, 503 X, 504 scoring="root_mean_squared_error", 505 n_jobs=None, 506 verbose=0, 507 xreg=None, 508 initial_window=5, 509 horizon=3, 510 fixed_window=False, 511 show_progress=True, 512 level=95, 513 **kwargs, 514 ): 515 """Evaluate a score by time series cross-validation. 516 517 Parameters: 518 519 X: {array-like, sparse matrix} of shape (n_samples, n_features) 520 The data to fit. 521 522 scoring: str or a function 523 A str in ('root_mean_squared_error', 'mean_squared_error', 'mean_error', 524 'mean_absolute_error', 'mean_error', 'mean_percentage_error', 525 'mean_absolute_percentage_error', 'winkler_score', 'coverage') 526 Or a function defined as 'coverage' and 'winkler_score' in `utils.timeseries` 527 528 n_jobs: int, default=None 529 Number of jobs to run in parallel. 530 531 verbose: int, default=0 532 The verbosity level. 533 534 xreg: array-like, optional (default=None) 535 Additional (external) regressors to be passed to `fit` 536 xreg must be in 'increasing' order (most recent observations last) 537 538 initial_window: int 539 initial number of consecutive values in each training set sample 540 541 horizon: int 542 number of consecutive values in test set sample 543 544 fixed_window: boolean 545 if False, all training samples start at index 0, and the training 546 window's size is increasing. 547 if True, the training window's size is fixed, and the window is 548 rolling forward 549 550 show_progress: boolean 551 if True, a progress bar is printed 552 553 **kwargs: dict 554 additional parameters to be passed to `fit` and `predict` 555 556 Returns: 557 558 A tuple: descriptive statistics or errors and raw errors 559 560 """ 561 tscv = TimeSeriesSplit() 562 563 tscv_obj = tscv.split( 564 X, 565 initial_window=initial_window, 566 horizon=horizon, 567 fixed_window=fixed_window, 568 ) 569 570 if isinstance(scoring, str): 571 572 assert scoring in ( 573 "root_mean_squared_error", 574 "mean_squared_error", 575 "mean_error", 576 "mean_absolute_error", 577 "mean_percentage_error", 578 "mean_absolute_percentage_error", 579 "winkler_score", 580 "coverage", 581 ), "must have scoring in ('root_mean_squared_error', 'mean_squared_error', 'mean_error', 'mean_absolute_error', 'mean_error', 'mean_percentage_error', 'mean_absolute_percentage_error', 'winkler_score', 'coverage')" 582 583 def err_func(X_test, X_pred, scoring): 584 if (self.replications is not None) or ( 585 self.type_pi == "gaussian" 586 ): # probabilistic 587 if scoring == "winkler_score": 588 return winkler_score(X_pred, X_test, level=level) 589 elif scoring == "coverage": 590 return coverage(X_pred, X_test, level=level) 591 else: 592 return mean_errors( 593 pred=X_pred.mean, actual=X_test, scoring=scoring 594 ) 595 else: # not probabilistic 596 return mean_errors(pred=X_pred, actual=X_test, scoring=scoring) 597 598 else: # isinstance(scoring, str) = False 599 600 err_func = scoring 601 602 errors = [] 603 604 train_indices = [] 605 606 test_indices = [] 607 608 for train_index, test_index in tscv_obj: 609 train_indices.append(train_index) 610 test_indices.append(test_index) 611 612 if show_progress is True: 613 iterator = tqdm(zip(train_indices, test_indices), total=len(train_indices)) 614 else: 615 iterator = zip(train_indices, test_indices) 616 617 for train_index, test_index in iterator: 618 619 if verbose == 1: 620 print(f"TRAIN: {train_index}") 621 print(f"TEST: {test_index}") 622 623 if isinstance(X, pd.DataFrame): 624 self.fit(X.iloc[train_index, :], xreg=xreg, **kwargs) 625 X_test = X.iloc[test_index, :] 626 else: 627 self.fit(X[train_index, :], xreg=xreg, **kwargs) 628 X_test = X[test_index, :] 629 X_pred = self.predict(h=int(len(test_index)), level=level, **kwargs) 630 631 errors.append(err_func(X_test, X_pred, scoring)) 632 633 res = np.asarray(errors) 634 635 return res, describe(res)
Multivariate time series (FactorMTS) forecasting with Factor models
Parameters:
model: type of model: str.
currently, 'VAR', 'VECM', 'ARIMA', 'ETS', 'Theta'
Attributes:
df_: data frame
the input data frame, in case a data.frame is provided to `fit`
level_: int
level of confidence for prediction intervals (default is 95)
Examples: See examples/classical_mts_timeseries.py
91 def fit(self, X, **kwargs): 92 """Fit FactorMTS model to training data X, with optional regressors xreg 93 94 Parameters: 95 96 X: {array-like}, shape = [n_samples, n_features] 97 Training time series, where n_samples is the number 98 of samples and n_features is the number of features; 99 X must be in increasing order (most recent observations last) 100 101 **kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity) 102 103 Returns: 104 105 self: object 106 """ 107 108 try: 109 self.n_series = X.shape[1] 110 except Exception: 111 self.n_series = 1 112 113 if (isinstance(X, pd.DataFrame) is False) and isinstance( 114 X, pd.Series 115 ) is False: # input data set is a numpy array 116 117 X = pd.DataFrame(X) 118 if self.n_series > 1: 119 self.series_names = ["series" + str(i) for i in range(X.shape[1])] 120 else: 121 self.series_names = "series0" 122 123 else: # input data set is a DataFrame or Series with column names 124 125 X_index = None 126 if X.index is not None and len(X.shape) > 1: 127 X_index = X.index 128 X = copy.deepcopy(mo.convert_df_to_numeric(X)) 129 if X_index is not None: 130 try: 131 X.index = X_index 132 except Exception: 133 pass 134 if isinstance(X, pd.DataFrame): 135 self.series_names = X.columns.tolist() 136 else: 137 self.series_names = X.name 138 139 if isinstance(X, pd.DataFrame) or isinstance(X, pd.Series): 140 self.df_ = X 141 X = X.values 142 self.df_.columns = self.series_names 143 self.input_dates = ts.compute_input_dates(self.df_) 144 else: 145 self.df_ = pd.DataFrame(X, columns=self.series_names) 146 147 if self.model == "Theta": 148 self.obj = self.obj(self.df_, **kwargs).fit() 149 else: 150 self.obj = self.obj(X, **kwargs).fit(**kwargs) 151 152 return self
Fit FactorMTS model to training data X, with optional regressors xreg
Parameters:
X: {array-like}, shape = [n_samples, n_features] Training time series, where n_samples is the number of samples and n_features is the number of features; X must be in increasing order (most recent observations last)
**kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity)
Returns:
self: object
154 def predict(self, h=5, level=95, **kwargs): 155 """Forecast all the time series, h steps ahead 156 157 Parameters: 158 159 h: {integer} 160 Forecasting horizon 161 162 **kwargs: additional parameters to be passed to 163 self.cook_test_set 164 165 Returns: 166 167 model predictions for horizon = h: {array-like} 168 169 """ 170 171 self.output_dates_, frequency = ts.compute_output_dates(self.df_, h) 172 173 self.level_ = level 174 175 self.lower_ = None # do not remove (/!\) 176 177 self.upper_ = None # do not remove (/!\) 178 179 self.sims_ = None # do not remove (/!\) 180 181 self.level_ = level 182 183 self.alpha_ = 100 - level 184 185 pi_multiplier = norm.ppf(1 - self.alpha_ / 200) 186 187 # Named tuple for forecast results 188 DescribeResult = namedtuple("DescribeResult", ("mean", "lower", "upper")) 189 190 if self.model == "VAR": 191 mean_forecast, lower_bound, upper_bound = self.obj.forecast_interval( 192 self.obj.endog, steps=h, alpha=self.alpha_ / 100, **kwargs 193 ) 194 195 elif self.model == "VECM": 196 forecast_result = self.obj.predict(steps=h) 197 mean_forecast = forecast_result 198 lower_bound, upper_bound = self._compute_confidence_intervals( 199 forecast_result, alpha=self.alpha_ / 100, **kwargs 200 ) 201 202 elif self.model == "ARIMA": 203 forecast_result = self.obj.get_forecast(steps=h) 204 mean_forecast = forecast_result.predicted_mean 205 lower_bound = forecast_result.conf_int()[:, 0] 206 upper_bound = forecast_result.conf_int()[:, 1] 207 208 elif self.model == "ETS": 209 forecast_result = self.obj.forecast(steps=h) 210 residuals = self.obj.resid 211 std_errors = np.std(residuals) 212 mean_forecast = forecast_result 213 lower_bound = forecast_result - pi_multiplier * std_errors 214 upper_bound = forecast_result + pi_multiplier * std_errors 215 216 elif self.model == "Theta": 217 try: 218 mean_forecast = self.obj.forecast(steps=h).values 219 forecast_result = self.obj.prediction_intervals( 220 steps=h, alpha=self.alpha_ / 100, **kwargs 221 ) 222 lower_bound = forecast_result["lower"].values 223 upper_bound = forecast_result["upper"].values 224 except Exception: 225 mean_forecast = self.obj.forecast(steps=h) 226 forecast_result = self.obj.prediction_intervals( 227 steps=h, alpha=self.alpha_ / 100, **kwargs 228 ) 229 lower_bound = forecast_result["lower"] 230 upper_bound = forecast_result["upper"] 231 232 else: 233 234 raise ValueError("model not recognized") 235 236 try: 237 self.mean_ = pd.DataFrame( 238 mean_forecast, 239 columns=self.series_names, 240 index=self.output_dates_, 241 ) 242 self.lower_ = pd.DataFrame( 243 lower_bound, columns=self.series_names, index=self.output_dates_ 244 ) 245 self.upper_ = pd.DataFrame( 246 upper_bound, columns=self.series_names, index=self.output_dates_ 247 ) 248 except Exception: 249 self.mean_ = pd.Series( 250 mean_forecast, name=self.series_names, index=self.output_dates_ 251 ) 252 self.lower_ = pd.Series( 253 lower_bound, name=self.series_names, index=self.output_dates_ 254 ) 255 self.upper_ = pd.Series( 256 upper_bound, name=self.series_names, index=self.output_dates_ 257 ) 258 259 return DescribeResult(mean=self.mean_, lower=self.lower_, upper=self.upper_)
Forecast all the time series, h steps ahead
Parameters:
h: {integer} Forecasting horizon
**kwargs: additional parameters to be passed to self.cook_test_set
Returns:
model predictions for horizon = h: {array-like}
276 def score(self, X, training_index, testing_index, scoring=None, **kwargs): 277 """Train on training_index, score on testing_index.""" 278 279 assert ( 280 bool(set(training_index).intersection(set(testing_index))) == False 281 ), "Non-overlapping 'training_index' and 'testing_index' required" 282 283 # Dimensions 284 try: 285 # multivariate time series 286 n, p = X.shape 287 except: 288 # univariate time series 289 n = X.shape[0] 290 p = 1 291 292 # Training and testing sets 293 if p > 1: 294 X_train = X[training_index, :] 295 X_test = X[testing_index, :] 296 else: 297 X_train = X[training_index] 298 X_test = X[testing_index] 299 300 # Horizon 301 h = len(testing_index) 302 assert ( 303 len(training_index) + h 304 ) <= n, "Please check lengths of training and testing windows" 305 306 # Fit and predict 307 self.fit(X_train, **kwargs) 308 preds = self.predict(h=h, **kwargs) 309 310 if scoring is None: 311 scoring = "neg_root_mean_squared_error" 312 313 # check inputs 314 assert scoring in ( 315 "explained_variance", 316 "neg_mean_absolute_error", 317 "neg_mean_squared_error", 318 "neg_root_mean_squared_error", 319 "neg_mean_squared_log_error", 320 "neg_median_absolute_error", 321 "r2", 322 ), "'scoring' should be in ('explained_variance', 'neg_mean_absolute_error', \ 323 'neg_mean_squared_error', 'neg_root_mean_squared_error', 'neg_mean_squared_log_error', \ 324 'neg_median_absolute_error', 'r2')" 325 326 scoring_options = { 327 "explained_variance": skm2.explained_variance_score, 328 "neg_mean_absolute_error": skm2.mean_absolute_error, 329 "neg_mean_squared_error": lambda x, y: np.mean((x - y) ** 2), 330 "neg_root_mean_squared_error": lambda x, y: np.sqrt(np.mean((x - y) ** 2)), 331 "neg_mean_squared_log_error": skm2.mean_squared_log_error, 332 "neg_median_absolute_error": skm2.median_absolute_error, 333 "r2": skm2.r2_score, 334 } 335 336 # if p > 1: 337 # return tuple( 338 # [ 339 # scoring_options[scoring]( 340 # X_test[:, i], preds[:, i]#, **kwargs 341 # ) 342 # for i in range(p) 343 # ] 344 # ) 345 # else: 346 return scoring_options[scoring](X_test, preds)
Train on training_index, score on testing_index.
16class CustomClassifier(Custom, ClassifierMixin): 17 """Custom Classification model 18 19 Attributes: 20 21 obj: object 22 any object containing a method fit (obj.fit()) and a method predict 23 (obj.predict()) 24 25 n_hidden_features: int 26 number of nodes in the hidden layer 27 28 activation_name: str 29 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 30 31 a: float 32 hyperparameter for 'prelu' or 'elu' activation function 33 34 nodes_sim: str 35 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 36 'uniform' 37 38 bias: boolean 39 indicates if the hidden layer contains a bias term (True) or not 40 (False) 41 42 dropout: float 43 regularization parameter; (random) percentage of nodes dropped out 44 of the training 45 46 direct_link: boolean 47 indicates if the original predictors are included (True) in model''s 48 fitting or not (False) 49 50 n_clusters: int 51 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: 52 no clustering) 53 54 cluster_encode: bool 55 defines how the variable containing clusters is treated (default is one-hot) 56 if `False`, then labels are used, without one-hot encoding 57 58 type_clust: str 59 type of clustering method: currently k-means ('kmeans') or Gaussian 60 Mixture Model ('gmm') 61 62 type_scaling: a tuple of 3 strings 63 scaling methods for inputs, hidden layer, and clustering respectively 64 (and when relevant). 65 Currently available: standardization ('std') or MinMax scaling ('minmax') 66 67 col_sample: float 68 percentage of covariates randomly chosen for training 69 70 row_sample: float 71 percentage of rows chosen for training, by stratified bootstrapping 72 73 cv_calibration: int, cross-validation generator, or iterable, default=2 74 Determines the cross-validation splitting strategy. Same as 75 `sklearn.calibration.CalibratedClassifierCV` 76 77 calibration_method: str 78 {‘sigmoid’, ‘isotonic’}, default=’sigmoid’ 79 The method to use for calibration. Same as 80 `sklearn.calibration.CalibratedClassifierCV` 81 82 seed: int 83 reproducibility seed for nodes_sim=='uniform' 84 85 backend: str 86 "cpu" or "gpu" or "tpu" 87 88 Examples: 89 90 Note: it's better to use the `DeepClassifier` or `LazyDeepClassifier` classes directly 91 92 ```python 93 import nnetsauce as ns 94 from sklearn.ensemble import RandomForestClassifier 95 from sklearn.model_selection import train_test_split 96 from sklearn.datasets import load_digits 97 from time import time 98 99 digits = load_digits() 100 X = digits.data 101 y = digits.target 102 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, 103 random_state=123) 104 105 # layer 1 (base layer) ---- 106 layer1_regr = RandomForestClassifier(n_estimators=10, random_state=123) 107 108 start = time() 109 110 layer1_regr.fit(X_train, y_train) 111 112 # Accuracy in layer 1 113 print(layer1_regr.score(X_test, y_test)) 114 115 # layer 2 using layer 1 ---- 116 layer2_regr = ns.CustomClassifier(obj = layer1_regr, n_hidden_features=5, 117 direct_link=True, bias=True, 118 nodes_sim='uniform', activation_name='relu', 119 n_clusters=2, seed=123) 120 layer2_regr.fit(X_train, y_train) 121 122 # Accuracy in layer 2 123 print(layer2_regr.score(X_test, y_test)) 124 125 # layer 3 using layer 2 ---- 126 layer3_regr = ns.CustomClassifier(obj = layer2_regr, n_hidden_features=10, 127 direct_link=True, bias=True, dropout=0.7, 128 nodes_sim='uniform', activation_name='relu', 129 n_clusters=2, seed=123) 130 layer3_regr.fit(X_train, y_train) 131 132 # Accuracy in layer 3 133 print(layer3_regr.score(X_test, y_test)) 134 135 print(f"Elapsed {time() - start}") 136 ``` 137 138 """ 139 140 # construct the object ----- 141 142 def __init__( 143 self, 144 obj, 145 n_hidden_features=5, 146 activation_name="relu", 147 a=0.01, 148 nodes_sim="sobol", 149 bias=True, 150 dropout=0, 151 direct_link=True, 152 n_clusters=2, 153 cluster_encode=True, 154 type_clust="kmeans", 155 type_scaling=("std", "std", "std"), 156 col_sample=1, 157 row_sample=1, 158 cv_calibration=2, 159 calibration_method="sigmoid", 160 seed=123, 161 backend="cpu", 162 ): 163 super().__init__( 164 obj=obj, 165 n_hidden_features=n_hidden_features, 166 activation_name=activation_name, 167 a=a, 168 nodes_sim=nodes_sim, 169 bias=bias, 170 dropout=dropout, 171 direct_link=direct_link, 172 n_clusters=n_clusters, 173 cluster_encode=cluster_encode, 174 type_clust=type_clust, 175 type_scaling=type_scaling, 176 col_sample=col_sample, 177 row_sample=row_sample, 178 seed=seed, 179 backend=backend, 180 ) 181 self.coef_ = None 182 self.intercept_ = None 183 self.type_fit = "classification" 184 self.cv_calibration = cv_calibration 185 self.calibration_method = calibration_method 186 self._estimator_type = "classifier" # Explicitly mark as classifier 187 188 def __sklearn_clone__(self): 189 """Create a clone of the estimator. 190 191 This is required for scikit-learn's calibration system to work properly. 192 """ 193 # Create a new instance with the same parameters 194 clone = CustomClassifier( 195 obj=self.obj, 196 n_hidden_features=self.n_hidden_features, 197 activation_name=self.activation_name, 198 a=self.a, 199 nodes_sim=self.nodes_sim, 200 bias=self.bias, 201 dropout=self.dropout, 202 direct_link=self.direct_link, 203 n_clusters=self.n_clusters, 204 cluster_encode=self.cluster_encode, 205 type_clust=self.type_clust, 206 type_scaling=self.type_scaling, 207 col_sample=self.col_sample, 208 row_sample=self.row_sample, 209 cv_calibration=self.cv_calibration, 210 calibration_method=self.calibration_method, 211 seed=self.seed, 212 backend=self.backend 213 ) 214 return clone 215 216 def fit(self, X, y, sample_weight=None, **kwargs): 217 """Fit custom model to training data (X, y). 218 219 Parameters: 220 221 X: {array-like}, shape = [n_samples, n_features] 222 Training vectors, where n_samples is the number 223 of samples and n_features is the number of features. 224 225 y: array-like, shape = [n_samples] 226 Target values. 227 228 sample_weight: array-like, shape = [n_samples] 229 Sample weights. 230 231 **kwargs: additional parameters to be passed to 232 self.cook_training_set or self.obj.fit 233 234 Returns: 235 236 self: object 237 """ 238 239 if len(X.shape) == 1: 240 if isinstance(X, pd.DataFrame): 241 X = pd.DataFrame(X.values.reshape(1, -1), columns=X.columns) 242 else: 243 X = X.reshape(1, -1) 244 245 output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 246 self.classes_ = np.unique(y) 247 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 248 249 # Wrap in CalibratedClassifierCV if needed 250 if self.cv_calibration is not None: 251 self.obj = CalibratedClassifierCV( 252 self.obj, 253 cv=self.cv_calibration, 254 method=self.calibration_method 255 ) 256 257 # if sample_weights, else: (must use self.row_index) 258 if sample_weight is not None: 259 self.obj.fit( 260 scaled_Z, 261 output_y, 262 sample_weight=sample_weight[self.index_row_].ravel(), 263 **kwargs 264 ) 265 return self 266 267 # if sample_weight is None: 268 self.obj.fit(scaled_Z, output_y, **kwargs) 269 self.classes_ = np.unique(y) # for compatibility with sklearn 270 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 271 272 if hasattr(self.obj, "coef_"): 273 self.coef_ = self.obj.coef_ 274 275 if hasattr(self.obj, "intercept_"): 276 self.intercept_ = self.obj.intercept_ 277 278 return self 279 280 def partial_fit(self, X, y, sample_weight=None, **kwargs): 281 """Partial fit custom model to training data (X, y). 282 283 Parameters: 284 285 X: {array-like}, shape = [n_samples, n_features] 286 Subset of training vectors, where n_samples is the number 287 of samples and n_features is the number of features. 288 289 y: array-like, shape = [n_samples] 290 Subset of target values. 291 292 sample_weight: array-like, shape = [n_samples] 293 Sample weights. 294 295 **kwargs: additional parameters to be passed to 296 self.cook_training_set or self.obj.fit 297 298 Returns: 299 300 self: object 301 """ 302 303 if len(X.shape) == 1: 304 if isinstance(X, pd.DataFrame): 305 X = pd.DataFrame(X.values.reshape(1, -1), columns=X.columns) 306 else: 307 X = X.reshape(1, -1) 308 y = np.array([y], dtype=np.integer) 309 310 output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 311 self.n_classes_ = len(np.unique(y)) # for compatibility with sklearn 312 313 # if sample_weights, else: (must use self.row_index) 314 if sample_weight is not None: 315 try: 316 self.obj.partial_fit( 317 scaled_Z, 318 output_y, 319 sample_weight=sample_weight[self.index_row_].ravel(), 320 # **kwargs 321 ) 322 except: 323 NotImplementedError 324 325 return self 326 327 # if sample_weight is None: 328 #try: 329 self.obj.partial_fit(scaled_Z, output_y) 330 #except: 331 # raise NotImplementedError 332 333 self.classes_ = np.unique(y) # for compatibility with sklearn 334 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 335 336 return self 337 338 def predict(self, X, **kwargs): 339 """Predict test data X. 340 341 Parameters: 342 343 X: {array-like}, shape = [n_samples, n_features] 344 Training vectors, where n_samples is the number 345 of samples and n_features is the number of features. 346 347 **kwargs: additional parameters to be passed to 348 self.cook_test_set 349 350 Returns: 351 352 model predictions: {array-like} 353 """ 354 355 if len(X.shape) == 1: 356 n_features = X.shape[0] 357 new_X = mo.rbind( 358 X.reshape(1, n_features), 359 np.ones(n_features).reshape(1, n_features), 360 ) 361 362 return (self.obj.predict(self.cook_test_set(new_X, **kwargs), **kwargs))[0] 363 364 return self.obj.predict(self.cook_test_set(X, **kwargs), **kwargs) 365 366 def predict_proba(self, X, **kwargs): 367 """Predict probabilities for test data X. 368 369 Args: 370 371 X: {array-like}, shape = [n_samples, n_features] 372 Training vectors, where n_samples is the number 373 of samples and n_features is the number of features. 374 375 **kwargs: additional parameters to be passed to 376 self.cook_test_set 377 378 Returns: 379 380 probability estimates for test data: {array-like} 381 """ 382 383 if len(X.shape) == 1: 384 n_features = X.shape[0] 385 new_X = mo.rbind( 386 X.reshape(1, n_features), 387 np.ones(n_features).reshape(1, n_features), 388 ) 389 return ( 390 self.obj.predict_proba(self.cook_test_set(new_X, **kwargs), **kwargs) 391 )[0] 392 return self.obj.predict_proba(self.cook_test_set(X, **kwargs), **kwargs) 393 394 def decision_function(self, X, **kwargs): 395 """Compute the decision function of X. 396 397 Parameters: 398 X: {array-like}, shape = [n_samples, n_features] 399 Samples to compute decision function for. 400 401 **kwargs: additional parameters to be passed to 402 self.cook_test_set 403 404 Returns: 405 array-like of shape (n_samples,) or (n_samples, n_classes) 406 Decision function of the input samples. The order of outputs is the same 407 as that of the classes passed to fit. 408 """ 409 if not hasattr(self.obj, "decision_function"): 410 # If base classifier doesn't have decision_function, use predict_proba 411 proba = self.predict_proba(X, **kwargs) 412 if proba.shape[1] == 2: 413 return proba[:, 1] # For binary classification 414 return proba # For multiclass 415 416 if len(X.shape) == 1: 417 n_features = X.shape[0] 418 new_X = mo.rbind( 419 X.reshape(1, n_features), 420 np.ones(n_features).reshape(1, n_features), 421 ) 422 423 return ( 424 self.obj.decision_function( 425 self.cook_test_set(new_X, **kwargs), **kwargs 426 ) 427 )[0] 428 429 return self.obj.decision_function(self.cook_test_set(X, **kwargs), **kwargs) 430 431 def score(self, X, y, scoring=None): 432 """Scoring function for classification. 433 434 Args: 435 436 X: {array-like}, shape = [n_samples, n_features] 437 Training vectors, where n_samples is the number 438 of samples and n_features is the number of features. 439 440 y: array-like, shape = [n_samples] 441 Target values. 442 443 scoring: str 444 scoring method (default is accuracy) 445 446 Returns: 447 448 score: float 449 """ 450 451 if scoring is None: 452 scoring = "accuracy" 453 454 if scoring == "accuracy": 455 return skm2.accuracy_score(y, self.predict(X)) 456 457 if scoring == "f1": 458 return skm2.f1_score(y, self.predict(X)) 459 460 if scoring == "precision": 461 return skm2.precision_score(y, self.predict(X)) 462 463 if scoring == "recall": 464 return skm2.recall_score(y, self.predict(X)) 465 466 if scoring == "roc_auc": 467 return skm2.roc_auc_score(y, self.predict(X)) 468 469 if scoring == "log_loss": 470 return skm2.log_loss(y, self.predict_proba(X)) 471 472 if scoring == "balanced_accuracy": 473 return skm2.balanced_accuracy_score(y, self.predict(X)) 474 475 if scoring == "average_precision": 476 return skm2.average_precision_score(y, self.predict(X)) 477 478 if scoring == "neg_brier_score": 479 return -skm2.brier_score_loss(y, self.predict_proba(X)) 480 481 if scoring == "neg_log_loss": 482 return -skm2.log_loss(y, self.predict_proba(X))
Custom Classification model
Attributes:
obj: object
any object containing a method fit (obj.fit()) and a method predict
(obj.predict())
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not
(False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original predictors are included (True) in model''s
fitting or not (False)
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
col_sample: float
percentage of covariates randomly chosen for training
row_sample: float
percentage of rows chosen for training, by stratified bootstrapping
cv_calibration: int, cross-validation generator, or iterable, default=2
Determines the cross-validation splitting strategy. Same as
`sklearn.calibration.CalibratedClassifierCV`
calibration_method: str
{‘sigmoid’, ‘isotonic’}, default=’sigmoid’
The method to use for calibration. Same as
`sklearn.calibration.CalibratedClassifierCV`
seed: int
reproducibility seed for nodes_sim=='uniform'
backend: str
"cpu" or "gpu" or "tpu"
Examples:
Note: it's better to use the DeepClassifier
or LazyDeepClassifier
classes directly
import nnetsauce as ns
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_digits
from time import time
digits = load_digits()
X = digits.data
y = digits.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=123)
# layer 1 (base layer) ----
layer1_regr = RandomForestClassifier(n_estimators=10, random_state=123)
start = time()
layer1_regr.fit(X_train, y_train)
# Accuracy in layer 1
print(layer1_regr.score(X_test, y_test))
# layer 2 using layer 1 ----
layer2_regr = ns.CustomClassifier(obj = layer1_regr, n_hidden_features=5,
direct_link=True, bias=True,
nodes_sim='uniform', activation_name='relu',
n_clusters=2, seed=123)
layer2_regr.fit(X_train, y_train)
# Accuracy in layer 2
print(layer2_regr.score(X_test, y_test))
# layer 3 using layer 2 ----
layer3_regr = ns.CustomClassifier(obj = layer2_regr, n_hidden_features=10,
direct_link=True, bias=True, dropout=0.7,
nodes_sim='uniform', activation_name='relu',
n_clusters=2, seed=123)
layer3_regr.fit(X_train, y_train)
# Accuracy in layer 3
print(layer3_regr.score(X_test, y_test))
print(f"Elapsed {time() - start}")
216 def fit(self, X, y, sample_weight=None, **kwargs): 217 """Fit custom model to training data (X, y). 218 219 Parameters: 220 221 X: {array-like}, shape = [n_samples, n_features] 222 Training vectors, where n_samples is the number 223 of samples and n_features is the number of features. 224 225 y: array-like, shape = [n_samples] 226 Target values. 227 228 sample_weight: array-like, shape = [n_samples] 229 Sample weights. 230 231 **kwargs: additional parameters to be passed to 232 self.cook_training_set or self.obj.fit 233 234 Returns: 235 236 self: object 237 """ 238 239 if len(X.shape) == 1: 240 if isinstance(X, pd.DataFrame): 241 X = pd.DataFrame(X.values.reshape(1, -1), columns=X.columns) 242 else: 243 X = X.reshape(1, -1) 244 245 output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 246 self.classes_ = np.unique(y) 247 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 248 249 # Wrap in CalibratedClassifierCV if needed 250 if self.cv_calibration is not None: 251 self.obj = CalibratedClassifierCV( 252 self.obj, 253 cv=self.cv_calibration, 254 method=self.calibration_method 255 ) 256 257 # if sample_weights, else: (must use self.row_index) 258 if sample_weight is not None: 259 self.obj.fit( 260 scaled_Z, 261 output_y, 262 sample_weight=sample_weight[self.index_row_].ravel(), 263 **kwargs 264 ) 265 return self 266 267 # if sample_weight is None: 268 self.obj.fit(scaled_Z, output_y, **kwargs) 269 self.classes_ = np.unique(y) # for compatibility with sklearn 270 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 271 272 if hasattr(self.obj, "coef_"): 273 self.coef_ = self.obj.coef_ 274 275 if hasattr(self.obj, "intercept_"): 276 self.intercept_ = self.obj.intercept_ 277 278 return self
Fit custom model to training data (X, y).
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
sample_weight: array-like, shape = [n_samples]
Sample weights.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
338 def predict(self, X, **kwargs): 339 """Predict test data X. 340 341 Parameters: 342 343 X: {array-like}, shape = [n_samples, n_features] 344 Training vectors, where n_samples is the number 345 of samples and n_features is the number of features. 346 347 **kwargs: additional parameters to be passed to 348 self.cook_test_set 349 350 Returns: 351 352 model predictions: {array-like} 353 """ 354 355 if len(X.shape) == 1: 356 n_features = X.shape[0] 357 new_X = mo.rbind( 358 X.reshape(1, n_features), 359 np.ones(n_features).reshape(1, n_features), 360 ) 361 362 return (self.obj.predict(self.cook_test_set(new_X, **kwargs), **kwargs))[0] 363 364 return self.obj.predict(self.cook_test_set(X, **kwargs), **kwargs)
Predict test data X.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
366 def predict_proba(self, X, **kwargs): 367 """Predict probabilities for test data X. 368 369 Args: 370 371 X: {array-like}, shape = [n_samples, n_features] 372 Training vectors, where n_samples is the number 373 of samples and n_features is the number of features. 374 375 **kwargs: additional parameters to be passed to 376 self.cook_test_set 377 378 Returns: 379 380 probability estimates for test data: {array-like} 381 """ 382 383 if len(X.shape) == 1: 384 n_features = X.shape[0] 385 new_X = mo.rbind( 386 X.reshape(1, n_features), 387 np.ones(n_features).reshape(1, n_features), 388 ) 389 return ( 390 self.obj.predict_proba(self.cook_test_set(new_X, **kwargs), **kwargs) 391 )[0] 392 return self.obj.predict_proba(self.cook_test_set(X, **kwargs), **kwargs)
Predict probabilities for test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
probability estimates for test data: {array-like}
431 def score(self, X, y, scoring=None): 432 """Scoring function for classification. 433 434 Args: 435 436 X: {array-like}, shape = [n_samples, n_features] 437 Training vectors, where n_samples is the number 438 of samples and n_features is the number of features. 439 440 y: array-like, shape = [n_samples] 441 Target values. 442 443 scoring: str 444 scoring method (default is accuracy) 445 446 Returns: 447 448 score: float 449 """ 450 451 if scoring is None: 452 scoring = "accuracy" 453 454 if scoring == "accuracy": 455 return skm2.accuracy_score(y, self.predict(X)) 456 457 if scoring == "f1": 458 return skm2.f1_score(y, self.predict(X)) 459 460 if scoring == "precision": 461 return skm2.precision_score(y, self.predict(X)) 462 463 if scoring == "recall": 464 return skm2.recall_score(y, self.predict(X)) 465 466 if scoring == "roc_auc": 467 return skm2.roc_auc_score(y, self.predict(X)) 468 469 if scoring == "log_loss": 470 return skm2.log_loss(y, self.predict_proba(X)) 471 472 if scoring == "balanced_accuracy": 473 return skm2.balanced_accuracy_score(y, self.predict(X)) 474 475 if scoring == "average_precision": 476 return skm2.average_precision_score(y, self.predict(X)) 477 478 if scoring == "neg_brier_score": 479 return -skm2.brier_score_loss(y, self.predict_proba(X)) 480 481 if scoring == "neg_log_loss": 482 return -skm2.log_loss(y, self.predict_proba(X))
Scoring function for classification.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
scoring: str
scoring method (default is accuracy)
Returns:
score: float
18class CustomRegressor(Custom, RegressorMixin): 19 """Custom Regression model 20 21 This class is used to 'augment' any regression model with transformed features. 22 23 Parameters: 24 25 obj: object 26 any object containing a method fit (obj.fit()) and a method predict 27 (obj.predict()) 28 29 n_hidden_features: int 30 number of nodes in the hidden layer 31 32 activation_name: str 33 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 34 35 a: float 36 hyperparameter for 'prelu' or 'elu' activation function 37 38 nodes_sim: str 39 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 40 'uniform' 41 42 bias: boolean 43 indicates if the hidden layer contains a bias term (True) or not 44 (False) 45 46 dropout: float 47 regularization parameter; (random) percentage of nodes dropped out 48 of the training 49 50 direct_link: boolean 51 indicates if the original predictors are included (True) in model's 52 fitting or not (False) 53 54 n_clusters: int 55 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: 56 no clustering) 57 58 cluster_encode: bool 59 defines how the variable containing clusters is treated (default is one-hot) 60 if `False`, then labels are used, without one-hot encoding 61 62 type_clust: str 63 type of clustering method: currently k-means ('kmeans') or Gaussian 64 Mixture Model ('gmm') 65 66 type_scaling: a tuple of 3 strings 67 scaling methods for inputs, hidden layer, and clustering respectively 68 (and when relevant). 69 Currently available: standardization ('std') or MinMax scaling ('minmax') 70 71 type_pi: str. 72 type of prediction interval; currently `None` (split or local 73 conformal without simulation), "kde" or "bootstrap" (simulated split 74 conformal). 75 76 replications: int. 77 number of replications (if needed) for predictive simulation. 78 Used only in `self.predict`, for `self.kernel` in ('gaussian', 79 'tophat') and `self.type_pi = 'kde'`. Default is `None`. 80 81 kernel: str. 82 the kernel to use for kernel density estimation (used for predictive 83 simulation in `self.predict`, with `method='splitconformal'` and 84 `type_pi = 'kde'`). Currently, either 'gaussian' or 'tophat'. 85 86 type_split: str. 87 Type of splitting for conformal prediction. None (default), or 88 "random" (random split of data) or "sequential" (sequential split of data) 89 90 col_sample: float 91 percentage of covariates randomly chosen for training 92 93 row_sample: float 94 percentage of rows chosen for training, by stratified bootstrapping 95 96 level: float 97 confidence level for prediction intervals 98 99 pi_method: str 100 method for prediction intervals: 'splitconformal' or 'localconformal' 101 102 seed: int 103 reproducibility seed for nodes_sim=='uniform' 104 105 type_fit: str 106 'regression' 107 108 backend: str 109 "cpu" or "gpu" or "tpu" 110 111 Examples: 112 113 See [https://thierrymoudiki.github.io/blog/2024/03/18/python/conformal-and-bayesian-regression](https://thierrymoudiki.github.io/blog/2024/03/18/python/conformal-and-bayesian-regression) 114 115 """ 116 117 # construct the object ----- 118 119 def __init__( 120 self, 121 obj, 122 n_hidden_features=5, 123 activation_name="relu", 124 a=0.01, 125 nodes_sim="sobol", 126 bias=True, 127 dropout=0, 128 direct_link=True, 129 n_clusters=2, 130 cluster_encode=True, 131 type_clust="kmeans", 132 type_scaling=("std", "std", "std"), 133 type_pi=None, 134 replications=None, 135 kernel=None, 136 type_split=None, 137 col_sample=1, 138 row_sample=1, 139 level=None, 140 pi_method=None, 141 seed=123, 142 backend="cpu", 143 ): 144 super().__init__( 145 obj=obj, 146 n_hidden_features=n_hidden_features, 147 activation_name=activation_name, 148 a=a, 149 nodes_sim=nodes_sim, 150 bias=bias, 151 dropout=dropout, 152 direct_link=direct_link, 153 n_clusters=n_clusters, 154 cluster_encode=cluster_encode, 155 type_clust=type_clust, 156 type_scaling=type_scaling, 157 col_sample=col_sample, 158 row_sample=row_sample, 159 seed=seed, 160 backend=backend, 161 ) 162 163 self.type_fit = "regression" 164 self.type_pi = type_pi 165 self.replications = replications 166 self.kernel = kernel 167 self.type_split = type_split 168 self.level = level 169 self.pi_method = pi_method 170 self.coef_ = None 171 self.intercept_ = None 172 self.X_ = None 173 self.y_ = None 174 self.aic_ = None 175 self.aicc_ = None 176 self.bic_ = None 177 178 def fit(self, X, y, sample_weight=None, **kwargs): 179 """Fit custom model to training data (X, y). 180 181 Parameters: 182 183 X: {array-like}, shape = [n_samples, n_features] 184 Training vectors, where n_samples is the number 185 of samples and n_features is the number of features. 186 187 y: array-like, shape = [n_samples] 188 Target values. 189 190 sample_weight: array-like, shape = [n_samples] 191 Sample weights. 192 193 **kwargs: additional parameters to be passed to 194 self.cook_training_set or self.obj.fit 195 196 Returns: 197 198 self: object 199 200 """ 201 202 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 203 204 if self.level is not None: 205 self.obj = PredictionInterval( 206 obj=self.obj, method=self.pi_method, level=self.level 207 ) 208 209 # if sample_weights, else: (must use self.row_index) 210 if sample_weight is not None: 211 self.obj.fit( 212 scaled_Z, 213 centered_y, 214 sample_weight=sample_weight[self.index_row_].ravel(), 215 **kwargs 216 ) 217 218 return self 219 220 self.obj.fit(scaled_Z, centered_y, **kwargs) 221 222 self.X_ = X 223 224 self.y_ = y 225 226 # Compute SSE 227 y_pred = self.predict(X) 228 self.sse_ = np.sum((y - y_pred) ** 2) 229 230 # Get number of parameters 231 n_params = self.n_hidden_features + X.shape[1] # hidden features + original features 232 if self.n_clusters > 0: 233 n_params += self.n_clusters # add clusters if used 234 235 # Compute information criteria 236 n_samples = X.shape[0] 237 temp = n_samples * np.log(self.sse_/n_samples) 238 self.aic_ = temp + 2 * n_params 239 self.aicc_ = self.aic_ + (2 * n_params * (n_params + 1))/(n_samples - n_params - 1) 240 self.bic_ = temp + n_params * np.log(n_samples) 241 242 if hasattr(self.obj, "coef_"): 243 self.coef_ = self.obj.coef_ 244 245 if hasattr(self.obj, "intercept_"): 246 self.intercept_ = self.obj.intercept_ 247 248 return self 249 250 def partial_fit(self, X, y, **kwargs): 251 """Partial fit custom model to training data (X, y). 252 253 Parameters: 254 255 X: {array-like}, shape = [n_samples, n_features] 256 Subset of training vectors, where n_samples is the number 257 of samples and n_features is the number of features. 258 259 y: array-like, shape = [n_samples] 260 Subset of target values. 261 262 **kwargs: additional parameters to be passed to 263 self.cook_training_set or self.obj.fit 264 265 Returns: 266 267 self: object 268 269 """ 270 271 if len(X.shape) == 1: 272 if isinstance(X, pd.DataFrame): 273 X = pd.DataFrame(X.values.reshape(1, -1), columns=X.columns) 274 else: 275 X = X.reshape(1, -1) 276 y = np.array([y]) 277 278 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 279 280 self.obj.partial_fit(scaled_Z, centered_y, **kwargs) 281 282 self.X_ = X 283 284 self.y_ = y 285 286 return self 287 288 def predict(self, X, level=95, method=None, **kwargs): 289 """Predict test data X. 290 291 Parameters: 292 293 X: {array-like}, shape = [n_samples, n_features] 294 Training vectors, where n_samples is the number 295 of samples and n_features is the number of features. 296 297 level: int 298 Level of confidence (default = 95) 299 300 method: str 301 `None`, or 'splitconformal', 'localconformal' 302 prediction (if you specify `return_pi = True`) 303 304 **kwargs: additional parameters 305 `return_pi = True` for conformal prediction, 306 with `method` in ('splitconformal', 'localconformal') 307 or `return_std = True` for `self.obj` in 308 (`sklearn.linear_model.BayesianRidge`, 309 `sklearn.linear_model.ARDRegressor`, 310 `sklearn.gaussian_process.GaussianProcessRegressor`)` 311 312 Returns: 313 314 model predictions: 315 an array if uncertainty quantification is not requested, 316 or a tuple if with prediction intervals and simulations 317 if `return_std = True` (mean, standard deviation, 318 lower and upper prediction interval) or `return_pi = True` 319 () 320 321 """ 322 323 if "return_std" in kwargs: 324 325 alpha = 100 - level 326 pi_multiplier = norm.ppf(1 - alpha / 200) 327 328 if len(X.shape) == 1: 329 330 n_features = X.shape[0] 331 new_X = mo.rbind( 332 X.reshape(1, n_features), 333 np.ones(n_features).reshape(1, n_features), 334 ) 335 336 mean_, std_ = self.obj.predict( 337 self.cook_test_set(new_X, **kwargs), return_std=True 338 )[0] 339 340 preds = self.y_mean_ + mean_ 341 lower = self.y_mean_ + (mean_ - pi_multiplier * std_) 342 upper = self.y_mean_ + (mean_ + pi_multiplier * std_) 343 344 DescribeResults = namedtuple( 345 "DescribeResults", ["mean", "std", "lower", "upper"] 346 ) 347 348 return DescribeResults(preds, std_, lower, upper) 349 350 # len(X.shape) > 1 351 mean_, std_ = self.obj.predict( 352 self.cook_test_set(X, **kwargs), return_std=True 353 ) 354 355 preds = self.y_mean_ + mean_ 356 lower = self.y_mean_ + (mean_ - pi_multiplier * std_) 357 upper = self.y_mean_ + (mean_ + pi_multiplier * std_) 358 359 DescribeResults = namedtuple( 360 "DescribeResults", ["mean", "std", "lower", "upper"] 361 ) 362 363 return DescribeResults(preds, std_, lower, upper) 364 365 if "return_pi" in kwargs: 366 assert method in ( 367 "splitconformal", 368 "localconformal", 369 ), "method must be in ('splitconformal', 'localconformal')" 370 self.pi = PredictionInterval( 371 obj=self, 372 method=method, 373 level=level, 374 type_pi=self.type_pi, 375 replications=self.replications, 376 kernel=self.kernel, 377 ) 378 379 if len(self.X_.shape) == 1: 380 if isinstance(X, pd.DataFrame): 381 self.X_ = pd.DataFrame( 382 self.X_.values.reshape(1, -1), columns=self.X_.columns 383 ) 384 else: 385 self.X_ = self.X_.reshape(1, -1) 386 self.y_ = np.array([self.y_]) 387 388 self.pi.fit(self.X_, self.y_) 389 # self.X_ = None # consumes memory to keep, dangerous to delete (side effect) 390 # self.y_ = None # consumes memory to keep, dangerous to delete (side effect) 391 preds = self.pi.predict(X, return_pi=True) 392 return preds 393 394 # "return_std" not in kwargs 395 if len(X.shape) == 1: 396 397 n_features = X.shape[0] 398 new_X = mo.rbind( 399 X.reshape(1, n_features), 400 np.ones(n_features).reshape(1, n_features), 401 ) 402 403 return ( 404 self.y_mean_ 405 + self.obj.predict(self.cook_test_set(new_X, **kwargs), **kwargs) 406 )[0] 407 408 # len(X.shape) > 1 409 return self.y_mean_ + self.obj.predict( 410 self.cook_test_set(X, **kwargs), **kwargs 411 ) 412 413 def score(self, X, y, scoring=None): 414 """Compute the score of the model. 415 416 Parameters: 417 418 X: {array-like}, shape = [n_samples, n_features] 419 Training vectors, where n_samples is the number 420 of samples and n_features is the number of features. 421 422 y: array-like, shape = [n_samples] 423 Target values. 424 425 scoring: str 426 scoring method 427 428 Returns: 429 430 score: float 431 432 """ 433 434 if scoring is None: 435 return np.sqrt(np.mean((self.predict(X) - y) ** 2)) 436 437 return skm2.get_scorer(scoring)(self, X, y)
Custom Regression model
This class is used to 'augment' any regression model with transformed features.
Parameters:
obj: object
any object containing a method fit (obj.fit()) and a method predict
(obj.predict())
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not
(False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original predictors are included (True) in model's
fitting or not (False)
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
type_pi: str.
type of prediction interval; currently `None` (split or local
conformal without simulation), "kde" or "bootstrap" (simulated split
conformal).
replications: int.
number of replications (if needed) for predictive simulation.
Used only in `self.predict`, for `self.kernel` in ('gaussian',
'tophat') and `self.type_pi = 'kde'`. Default is `None`.
kernel: str.
the kernel to use for kernel density estimation (used for predictive
simulation in `self.predict`, with `method='splitconformal'` and
`type_pi = 'kde'`). Currently, either 'gaussian' or 'tophat'.
type_split: str.
Type of splitting for conformal prediction. None (default), or
"random" (random split of data) or "sequential" (sequential split of data)
col_sample: float
percentage of covariates randomly chosen for training
row_sample: float
percentage of rows chosen for training, by stratified bootstrapping
level: float
confidence level for prediction intervals
pi_method: str
method for prediction intervals: 'splitconformal' or 'localconformal'
seed: int
reproducibility seed for nodes_sim=='uniform'
type_fit: str
'regression'
backend: str
"cpu" or "gpu" or "tpu"
Examples:
See https://thierrymoudiki.github.io/blog/2024/03/18/python/conformal-and-bayesian-regression
178 def fit(self, X, y, sample_weight=None, **kwargs): 179 """Fit custom model to training data (X, y). 180 181 Parameters: 182 183 X: {array-like}, shape = [n_samples, n_features] 184 Training vectors, where n_samples is the number 185 of samples and n_features is the number of features. 186 187 y: array-like, shape = [n_samples] 188 Target values. 189 190 sample_weight: array-like, shape = [n_samples] 191 Sample weights. 192 193 **kwargs: additional parameters to be passed to 194 self.cook_training_set or self.obj.fit 195 196 Returns: 197 198 self: object 199 200 """ 201 202 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 203 204 if self.level is not None: 205 self.obj = PredictionInterval( 206 obj=self.obj, method=self.pi_method, level=self.level 207 ) 208 209 # if sample_weights, else: (must use self.row_index) 210 if sample_weight is not None: 211 self.obj.fit( 212 scaled_Z, 213 centered_y, 214 sample_weight=sample_weight[self.index_row_].ravel(), 215 **kwargs 216 ) 217 218 return self 219 220 self.obj.fit(scaled_Z, centered_y, **kwargs) 221 222 self.X_ = X 223 224 self.y_ = y 225 226 # Compute SSE 227 y_pred = self.predict(X) 228 self.sse_ = np.sum((y - y_pred) ** 2) 229 230 # Get number of parameters 231 n_params = self.n_hidden_features + X.shape[1] # hidden features + original features 232 if self.n_clusters > 0: 233 n_params += self.n_clusters # add clusters if used 234 235 # Compute information criteria 236 n_samples = X.shape[0] 237 temp = n_samples * np.log(self.sse_/n_samples) 238 self.aic_ = temp + 2 * n_params 239 self.aicc_ = self.aic_ + (2 * n_params * (n_params + 1))/(n_samples - n_params - 1) 240 self.bic_ = temp + n_params * np.log(n_samples) 241 242 if hasattr(self.obj, "coef_"): 243 self.coef_ = self.obj.coef_ 244 245 if hasattr(self.obj, "intercept_"): 246 self.intercept_ = self.obj.intercept_ 247 248 return self
Fit custom model to training data (X, y).
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
sample_weight: array-like, shape = [n_samples]
Sample weights.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
288 def predict(self, X, level=95, method=None, **kwargs): 289 """Predict test data X. 290 291 Parameters: 292 293 X: {array-like}, shape = [n_samples, n_features] 294 Training vectors, where n_samples is the number 295 of samples and n_features is the number of features. 296 297 level: int 298 Level of confidence (default = 95) 299 300 method: str 301 `None`, or 'splitconformal', 'localconformal' 302 prediction (if you specify `return_pi = True`) 303 304 **kwargs: additional parameters 305 `return_pi = True` for conformal prediction, 306 with `method` in ('splitconformal', 'localconformal') 307 or `return_std = True` for `self.obj` in 308 (`sklearn.linear_model.BayesianRidge`, 309 `sklearn.linear_model.ARDRegressor`, 310 `sklearn.gaussian_process.GaussianProcessRegressor`)` 311 312 Returns: 313 314 model predictions: 315 an array if uncertainty quantification is not requested, 316 or a tuple if with prediction intervals and simulations 317 if `return_std = True` (mean, standard deviation, 318 lower and upper prediction interval) or `return_pi = True` 319 () 320 321 """ 322 323 if "return_std" in kwargs: 324 325 alpha = 100 - level 326 pi_multiplier = norm.ppf(1 - alpha / 200) 327 328 if len(X.shape) == 1: 329 330 n_features = X.shape[0] 331 new_X = mo.rbind( 332 X.reshape(1, n_features), 333 np.ones(n_features).reshape(1, n_features), 334 ) 335 336 mean_, std_ = self.obj.predict( 337 self.cook_test_set(new_X, **kwargs), return_std=True 338 )[0] 339 340 preds = self.y_mean_ + mean_ 341 lower = self.y_mean_ + (mean_ - pi_multiplier * std_) 342 upper = self.y_mean_ + (mean_ + pi_multiplier * std_) 343 344 DescribeResults = namedtuple( 345 "DescribeResults", ["mean", "std", "lower", "upper"] 346 ) 347 348 return DescribeResults(preds, std_, lower, upper) 349 350 # len(X.shape) > 1 351 mean_, std_ = self.obj.predict( 352 self.cook_test_set(X, **kwargs), return_std=True 353 ) 354 355 preds = self.y_mean_ + mean_ 356 lower = self.y_mean_ + (mean_ - pi_multiplier * std_) 357 upper = self.y_mean_ + (mean_ + pi_multiplier * std_) 358 359 DescribeResults = namedtuple( 360 "DescribeResults", ["mean", "std", "lower", "upper"] 361 ) 362 363 return DescribeResults(preds, std_, lower, upper) 364 365 if "return_pi" in kwargs: 366 assert method in ( 367 "splitconformal", 368 "localconformal", 369 ), "method must be in ('splitconformal', 'localconformal')" 370 self.pi = PredictionInterval( 371 obj=self, 372 method=method, 373 level=level, 374 type_pi=self.type_pi, 375 replications=self.replications, 376 kernel=self.kernel, 377 ) 378 379 if len(self.X_.shape) == 1: 380 if isinstance(X, pd.DataFrame): 381 self.X_ = pd.DataFrame( 382 self.X_.values.reshape(1, -1), columns=self.X_.columns 383 ) 384 else: 385 self.X_ = self.X_.reshape(1, -1) 386 self.y_ = np.array([self.y_]) 387 388 self.pi.fit(self.X_, self.y_) 389 # self.X_ = None # consumes memory to keep, dangerous to delete (side effect) 390 # self.y_ = None # consumes memory to keep, dangerous to delete (side effect) 391 preds = self.pi.predict(X, return_pi=True) 392 return preds 393 394 # "return_std" not in kwargs 395 if len(X.shape) == 1: 396 397 n_features = X.shape[0] 398 new_X = mo.rbind( 399 X.reshape(1, n_features), 400 np.ones(n_features).reshape(1, n_features), 401 ) 402 403 return ( 404 self.y_mean_ 405 + self.obj.predict(self.cook_test_set(new_X, **kwargs), **kwargs) 406 )[0] 407 408 # len(X.shape) > 1 409 return self.y_mean_ + self.obj.predict( 410 self.cook_test_set(X, **kwargs), **kwargs 411 )
Predict test data X.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
level: int
Level of confidence (default = 95)
method: str
`None`, or 'splitconformal', 'localconformal'
prediction (if you specify `return_pi = True`)
**kwargs: additional parameters
`return_pi = True` for conformal prediction,
with `method` in ('splitconformal', 'localconformal')
or `return_std = True` for `self.obj` in
(`sklearn.linear_model.BayesianRidge`,
`sklearn.linear_model.ARDRegressor`,
`sklearn.gaussian_process.GaussianProcessRegressor`)`
Returns:
model predictions:
an array if uncertainty quantification is not requested,
or a tuple if with prediction intervals and simulations
if `return_std = True` (mean, standard deviation,
lower and upper prediction interval) or `return_pi = True`
()
413 def score(self, X, y, scoring=None): 414 """Compute the score of the model. 415 416 Parameters: 417 418 X: {array-like}, shape = [n_samples, n_features] 419 Training vectors, where n_samples is the number 420 of samples and n_features is the number of features. 421 422 y: array-like, shape = [n_samples] 423 Target values. 424 425 scoring: str 426 scoring method 427 428 Returns: 429 430 score: float 431 432 """ 433 434 if scoring is None: 435 return np.sqrt(np.mean((self.predict(X) - y) ** 2)) 436 437 return skm2.get_scorer(scoring)(self, X, y)
Compute the score of the model.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
scoring: str
scoring method
Returns:
score: float
35class DeepClassifier(CustomClassifier, ClassifierMixin): 36 """ 37 Deep Classifier 38 39 Parameters: 40 41 obj: an object 42 A base learner, see also https://www.researchgate.net/publication/380701207_Deep_Quasi-Randomized_neural_Networks_for_classification 43 44 n_layers: int (default=3) 45 Number of layers. `n_layers = 1` is a simple `CustomClassifier` 46 47 verbose : int, optional (default=0) 48 Monitor progress when fitting. 49 50 All the other parameters are nnetsauce `CustomClassifier`'s 51 52 Examples: 53 54 ```python 55 import nnetsauce as ns 56 from sklearn.datasets import load_breast_cancer 57 from sklearn.model_selection import train_test_split 58 from sklearn.linear_model import LogisticRegressionCV 59 data = load_breast_cancer() 60 X = data.data 61 y= data.target 62 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=123) 63 obj = LogisticRegressionCV() 64 clf = ns.DeepClassifier(obj) 65 clf.fit(X_train, y_train) 66 print(clf.score(clf.predict(X_test), y_test)) 67 ``` 68 """ 69 70 def __init__( 71 self, 72 obj, 73 # Defining depth 74 n_layers=3, 75 verbose=0, 76 # CustomClassifier attributes 77 n_hidden_features=5, 78 activation_name="relu", 79 a=0.01, 80 nodes_sim="sobol", 81 bias=True, 82 dropout=0, 83 direct_link=True, 84 n_clusters=2, 85 cluster_encode=True, 86 type_clust="kmeans", 87 type_scaling=("std", "std", "std"), 88 col_sample=1, 89 row_sample=1, 90 cv_calibration=2, 91 calibration_method="sigmoid", 92 seed=123, 93 backend="cpu", 94 ): 95 super().__init__( 96 obj=obj, 97 n_hidden_features=n_hidden_features, 98 activation_name=activation_name, 99 a=a, 100 nodes_sim=nodes_sim, 101 bias=bias, 102 dropout=dropout, 103 direct_link=direct_link, 104 n_clusters=n_clusters, 105 cluster_encode=cluster_encode, 106 type_clust=type_clust, 107 type_scaling=type_scaling, 108 col_sample=col_sample, 109 row_sample=row_sample, 110 seed=seed, 111 backend=backend, 112 ) 113 self.coef_ = None 114 self.intercept_ = None 115 self.type_fit = "classification" 116 self.cv_calibration = cv_calibration 117 self.calibration_method = calibration_method 118 119 # Only wrap in CalibratedClassifierCV if not already wrapped 120 # if not isinstance(obj, CalibratedClassifierCV): 121 # self.obj = CalibratedClassifierCV( 122 # self.obj, 123 # cv=self.cv_calibration, 124 # method=self.calibration_method 125 # ) 126 # else: 127 self.coef_ = None 128 self.intercept_ = None 129 self.type_fit = "classification" 130 self.cv_calibration = cv_calibration 131 self.calibration_method = calibration_method 132 self.obj = obj 133 self._estimator_type = "classifier" # Add this line to explicitly mark as classifier 134 135 assert n_layers >= 1, "must have n_layers >= 1" 136 self.stacked_obj = obj 137 self.verbose = verbose 138 self.n_layers = n_layers 139 self.classes_ = None 140 self.n_classes_ = None 141 142 def fit(self, X, y, **kwargs): 143 """Fit Classification algorithms to X and y. 144 Parameters 145 ---------- 146 X : array-like, 147 Training vectors, where rows is the number of samples 148 and columns is the number of features. 149 y : array-like, 150 Training vectors, where rows is the number of samples 151 and columns is the number of features. 152 **kwargs: dict 153 Additional parameters to be passed to the fit method 154 of the base learner. For example, `sample_weight`. 155 156 Returns 157 ------- 158 A fitted object 159 """ 160 161 self.classes_ = np.unique(y) 162 self.n_classes_ = len( 163 self.classes_ 164 ) # for compatibility with scikit-learn 165 166 if isinstance(X, np.ndarray): 167 X = pd.DataFrame(X) 168 169 # init layer 170 self.stacked_obj = CustomClassifier( 171 obj=self.stacked_obj, 172 n_hidden_features=self.n_hidden_features, 173 activation_name=self.activation_name, 174 a=self.a, 175 nodes_sim=self.nodes_sim, 176 bias=self.bias, 177 dropout=self.dropout, 178 direct_link=self.direct_link, 179 n_clusters=self.n_clusters, 180 cluster_encode=self.cluster_encode, 181 type_clust=self.type_clust, 182 type_scaling=self.type_scaling, 183 col_sample=self.col_sample, 184 row_sample=self.row_sample, 185 cv_calibration=None, 186 calibration_method=None, 187 seed=self.seed, 188 backend=self.backend, 189 ) 190 191 if self.verbose > 0: 192 iterator = tqdm(range(self.n_layers - 1)) 193 else: 194 iterator = range(self.n_layers - 1) 195 196 for _ in iterator: 197 self.stacked_obj = deepcopy( 198 CustomClassifier( 199 obj=self.stacked_obj, 200 n_hidden_features=self.n_hidden_features, 201 activation_name=self.activation_name, 202 a=self.a, 203 nodes_sim=self.nodes_sim, 204 bias=self.bias, 205 dropout=self.dropout, 206 direct_link=self.direct_link, 207 n_clusters=self.n_clusters, 208 cluster_encode=self.cluster_encode, 209 type_clust=self.type_clust, 210 type_scaling=self.type_scaling, 211 col_sample=self.col_sample, 212 row_sample=self.row_sample, 213 cv_calibration=None, 214 calibration_method=None, 215 seed=self.seed, 216 backend=self.backend, 217 ) 218 ) 219 self.stacked_obj.fit(X, y, **kwargs) 220 221 return self 222 223 def partial_fit(self, X, y, **kwargs): 224 """Fit Regression algorithms to X and y. 225 Parameters 226 ---------- 227 X : array-like, 228 Training vectors, where rows is the number of samples 229 and columns is the number of features. 230 y : array-like, 231 Training vectors, where rows is the number of samples 232 and columns is the number of features. 233 **kwargs: dict 234 Additional parameters to be passed to the fit method 235 of the base learner. For example, `sample_weight`. 236 Returns 237 ------- 238 A fitted object 239 """ 240 assert hasattr(self, "stacked_obj"), "model must be fitted first" 241 current_obj = self.stacked_obj 242 for _ in range(self.n_layers): 243 try: 244 input_X = current_obj.obj.cook_test_set(X) 245 current_obj.obj.partial_fit(input_X, y, **kwargs) 246 try: 247 current_obj = current_obj.obj 248 except AttributeError: 249 pass 250 except ValueError: 251 pass 252 return self 253 254 def predict(self, X): 255 return self.stacked_obj.predict(X) 256 257 def predict_proba(self, X): 258 return self.stacked_obj.predict_proba(X) 259 260 def score(self, X, y, scoring=None): 261 return self.stacked_obj.score(X, y, scoring) 262 263 def cross_val_optim( 264 self, 265 X_train, 266 y_train, 267 X_test=None, 268 y_test=None, 269 scoring="accuracy", 270 surrogate_obj=None, 271 cv=5, 272 n_jobs=None, 273 n_init=10, 274 n_iter=190, 275 abs_tol=1e-3, 276 verbose=2, 277 seed=123, 278 **kwargs, 279 ): 280 """Cross-validation function and hyperparameters' search 281 282 Parameters: 283 284 X_train: array-like, 285 Training vectors, where rows is the number of samples 286 and columns is the number of features. 287 288 y_train: array-like, 289 Training vectors, where rows is the number of samples 290 and columns is the number of features. 291 292 X_test: array-like, 293 Testing vectors, where rows is the number of samples 294 and columns is the number of features. 295 296 y_test: array-like, 297 Testing vectors, where rows is the number of samples 298 and columns is the number of features. 299 300 scoring: str 301 scoring metric; see https://scikit-learn.org/stable/modules/model_evaluation.html#the-scoring-parameter-defining-model-evaluation-rules 302 303 surrogate_obj: an object; 304 An ML model for estimating the uncertainty around the objective function 305 306 cv: int; 307 number of cross-validation folds 308 309 n_jobs: int; 310 number of jobs for parallel execution 311 312 n_init: an integer; 313 number of points in the initial setting, when `x_init` and `y_init` are not provided 314 315 n_iter: an integer; 316 number of iterations of the minimization algorithm 317 318 abs_tol: a float; 319 tolerance for convergence of the optimizer (early stopping based on acquisition function) 320 321 verbose: int 322 controls verbosity 323 324 seed: int 325 reproducibility seed 326 327 **kwargs: dict 328 additional parameters to be passed to the estimator 329 330 Examples: 331 332 ```python 333 ``` 334 """ 335 336 num_to_activation_name = {1: "relu", 2: "sigmoid", 3: "tanh"} 337 num_to_nodes_sim = {1: "sobol", 2: "uniform", 3: "hammersley"} 338 num_to_type_clust = {1: "kmeans", 2: "gmm"} 339 340 def deepclassifier_cv( 341 X_train, 342 y_train, 343 # Defining depth 344 n_layers=3, 345 # CustomClassifier attributes 346 n_hidden_features=5, 347 activation_name="relu", 348 nodes_sim="sobol", 349 dropout=0, 350 n_clusters=2, 351 type_clust="kmeans", 352 cv=5, 353 n_jobs=None, 354 scoring="accuracy", 355 seed=123, 356 ): 357 self.set_params( 358 **{ 359 "n_layers": n_layers, 360 # CustomClassifier attributes 361 "n_hidden_features": n_hidden_features, 362 "activation_name": activation_name, 363 "nodes_sim": nodes_sim, 364 "dropout": dropout, 365 "n_clusters": n_clusters, 366 "type_clust": type_clust, 367 **kwargs, 368 } 369 ) 370 return -cross_val_score( 371 estimator=self, 372 X=X_train, 373 y=y_train, 374 scoring=scoring, 375 cv=cv, 376 n_jobs=n_jobs, 377 verbose=0, 378 ).mean() 379 380 # objective function for hyperparams tuning 381 def crossval_objective(xx): 382 return deepclassifier_cv( 383 X_train=X_train, 384 y_train=y_train, 385 # Defining depth 386 n_layers=int(np.ceil(xx[0])), 387 # CustomClassifier attributes 388 n_hidden_features=int(np.ceil(xx[1])), 389 activation_name=num_to_activation_name[np.ceil(xx[2])], 390 nodes_sim=num_to_nodes_sim[int(np.ceil(xx[3]))], 391 dropout=xx[4], 392 n_clusters=int(np.ceil(xx[5])), 393 type_clust=num_to_type_clust[int(np.ceil(xx[6]))], 394 cv=cv, 395 n_jobs=n_jobs, 396 scoring=scoring, 397 seed=seed, 398 ) 399 400 if surrogate_obj is None: 401 gp_opt = gp.GPOpt( 402 objective_func=crossval_objective, 403 lower_bound=np.array([0, 3, 0, 0, 0.0, 0, 0]), 404 upper_bound=np.array([5, 100, 3, 3, 0.4, 5, 2]), 405 params_names=[ 406 "n_layers", 407 # CustomClassifier attributes 408 "n_hidden_features", 409 "activation_name", 410 "nodes_sim", 411 "dropout", 412 "n_clusters", 413 "type_clust", 414 ], 415 method="bayesian", 416 n_init=n_init, 417 n_iter=n_iter, 418 seed=seed, 419 ) 420 else: 421 gp_opt = gp.GPOpt( 422 objective_func=crossval_objective, 423 lower_bound=np.array([0, 3, 0, 0, 0.0, 0, 0]), 424 upper_bound=np.array([5, 100, 3, 3, 0.4, 5, 2]), 425 params_names=[ 426 "n_layers", 427 # CustomClassifier attributes 428 "n_hidden_features", 429 "activation_name", 430 "nodes_sim", 431 "dropout", 432 "n_clusters", 433 "type_clust", 434 ], 435 acquisition="ucb", 436 method="splitconformal", 437 surrogate_obj=ns.PredictionInterval( 438 obj=surrogate_obj, method="splitconformal" 439 ), 440 n_init=n_init, 441 n_iter=n_iter, 442 seed=seed, 443 ) 444 445 res = gp_opt.optimize(verbose=verbose, abs_tol=abs_tol) 446 res.best_params["n_layers"] = int(np.ceil(res.best_params["n_layers"])) 447 res.best_params["n_hidden_features"] = int( 448 np.ceil(res.best_params["n_hidden_features"]) 449 ) 450 res.best_params["activation_name"] = num_to_activation_name[ 451 np.ceil(res.best_params["activation_name"]) 452 ] 453 res.best_params["nodes_sim"] = num_to_nodes_sim[ 454 int(np.ceil(res.best_params["nodes_sim"])) 455 ] 456 res.best_params["dropout"] = res.best_params["dropout"] 457 res.best_params["n_clusters"] = int(np.ceil(res.best_params["n_clusters"])) 458 res.best_params["type_clust"] = num_to_type_clust[ 459 int(np.ceil(res.best_params["type_clust"])) 460 ] 461 462 # out-of-sample error 463 if X_test is not None and y_test is not None: 464 self.set_params(**res.best_params, verbose=0, seed=seed) 465 preds = self.fit(X_train, y_train).predict(X_test) 466 # check error on y_test 467 oos_err = getattr(metrics, scoring + "_score")(y_true=y_test, y_pred=preds) 468 result = namedtuple("result", res._fields + ("test_" + scoring,)) 469 return result(*res, oos_err) 470 else: 471 return res 472 473 def lazy_cross_val_optim( 474 self, 475 X_train, 476 y_train, 477 X_test=None, 478 y_test=None, 479 scoring="accuracy", 480 surrogate_objs=None, 481 customize=False, 482 cv=5, 483 n_jobs=None, 484 n_init=10, 485 n_iter=190, 486 abs_tol=1e-3, 487 verbose=1, 488 seed=123, 489 ): 490 """Automated Cross-validation function and hyperparameters' search using multiple surrogates 491 492 Parameters: 493 494 X_train: array-like, 495 Training vectors, where rows is the number of samples 496 and columns is the number of features. 497 498 y_train: array-like, 499 Training vectors, where rows is the number of samples 500 and columns is the number of features. 501 502 X_test: array-like, 503 Testing vectors, where rows is the number of samples 504 and columns is the number of features. 505 506 y_test: array-like, 507 Testing vectors, where rows is the number of samples 508 and columns is the number of features. 509 510 scoring: str 511 scoring metric; see https://scikit-learn.org/stable/modules/model_evaluation.html#the-scoring-parameter-defining-model-evaluation-rules 512 513 surrogate_objs: object names as a list of strings; 514 ML models for estimating the uncertainty around the objective function 515 516 customize: boolean 517 if True, the surrogate is transformed into a quasi-randomized network (default is False) 518 519 cv: int; 520 number of cross-validation folds 521 522 n_jobs: int; 523 number of jobs for parallel execution 524 525 n_init: an integer; 526 number of points in the initial setting, when `x_init` and `y_init` are not provided 527 528 n_iter: an integer; 529 number of iterations of the minimization algorithm 530 531 abs_tol: a float; 532 tolerance for convergence of the optimizer (early stopping based on acquisition function) 533 534 verbose: int 535 controls verbosity 536 537 seed: int 538 reproducibility seed 539 540 Examples: 541 542 ```python 543 ``` 544 """ 545 546 removed_regressors = [ 547 "TheilSenRegressor", 548 "ARDRegression", 549 "CCA", 550 "GaussianProcessRegressor", 551 "GradientBoostingRegressor", 552 "HistGradientBoostingRegressor", 553 "IsotonicRegression", 554 "MultiOutputRegressor", 555 "MultiTaskElasticNet", 556 "MultiTaskElasticNetCV", 557 "MultiTaskLasso", 558 "MultiTaskLassoCV", 559 "OrthogonalMatchingPursuit", 560 "OrthogonalMatchingPursuitCV", 561 "PLSCanonical", 562 "PLSRegression", 563 "RadiusNeighborsRegressor", 564 "RegressorChain", 565 "StackingRegressor", 566 "VotingRegressor", 567 ] 568 569 results = [] 570 571 for est in all_estimators(): 572 573 if surrogate_objs is None: 574 575 if issubclass(est[1], RegressorMixin) and ( 576 est[0] not in removed_regressors 577 ): 578 try: 579 if customize == True: 580 surr_obj = ns.CustomClassifier(obj=est[1]()) 581 else: 582 surr_obj = est[1]() 583 res = self.cross_val_optim( 584 X_train=X_train, 585 y_train=y_train, 586 X_test=X_test, 587 y_test=y_test, 588 surrogate_obj=surr_obj, 589 cv=cv, 590 n_jobs=n_jobs, 591 scoring=scoring, 592 n_init=n_init, 593 n_iter=n_iter, 594 abs_tol=abs_tol, 595 verbose=verbose, 596 seed=seed, 597 ) 598 if customize == True: 599 results.append((f"CustomClassifier({est[0]})", res)) 600 else: 601 results.append((est[0], res)) 602 except: 603 pass 604 605 else: 606 607 if ( 608 issubclass(est[1], RegressorMixin) 609 and (est[0] not in removed_regressors) 610 and est[0] in surrogate_objs 611 ): 612 try: 613 if customize == True: 614 surr_obj = ns.CustomClassifier(obj=est[1]()) 615 else: 616 surr_obj = est[1]() 617 res = self.cross_val_optim( 618 X_train=X_train, 619 y_train=y_train, 620 X_test=X_test, 621 y_test=y_test, 622 surrogate_obj=surr_obj, 623 cv=cv, 624 n_jobs=n_jobs, 625 scoring=scoring, 626 n_init=n_init, 627 n_iter=n_iter, 628 abs_tol=abs_tol, 629 verbose=verbose, 630 seed=seed, 631 ) 632 if customize == True: 633 results.append((f"CustomClassifier({est[0]})", res)) 634 else: 635 results.append((est[0], res)) 636 except: 637 pass 638 639 return results
Deep Classifier
Parameters:
obj: an object
A base learner, see also https://www.researchgate.net/publication/380701207_Deep_Quasi-Randomized_neural_Networks_for_classification
n_layers: int (default=3)
Number of layers. `n_layers = 1` is a simple `CustomClassifier`
verbose : int, optional (default=0)
Monitor progress when fitting.
All the other parameters are nnetsauce `CustomClassifier`'s
Examples:
import nnetsauce as ns
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegressionCV
data = load_breast_cancer()
X = data.data
y= data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=123)
obj = LogisticRegressionCV()
clf = ns.DeepClassifier(obj)
clf.fit(X_train, y_train)
print(clf.score(clf.predict(X_test), y_test))
142 def fit(self, X, y, **kwargs): 143 """Fit Classification algorithms to X and y. 144 Parameters 145 ---------- 146 X : array-like, 147 Training vectors, where rows is the number of samples 148 and columns is the number of features. 149 y : array-like, 150 Training vectors, where rows is the number of samples 151 and columns is the number of features. 152 **kwargs: dict 153 Additional parameters to be passed to the fit method 154 of the base learner. For example, `sample_weight`. 155 156 Returns 157 ------- 158 A fitted object 159 """ 160 161 self.classes_ = np.unique(y) 162 self.n_classes_ = len( 163 self.classes_ 164 ) # for compatibility with scikit-learn 165 166 if isinstance(X, np.ndarray): 167 X = pd.DataFrame(X) 168 169 # init layer 170 self.stacked_obj = CustomClassifier( 171 obj=self.stacked_obj, 172 n_hidden_features=self.n_hidden_features, 173 activation_name=self.activation_name, 174 a=self.a, 175 nodes_sim=self.nodes_sim, 176 bias=self.bias, 177 dropout=self.dropout, 178 direct_link=self.direct_link, 179 n_clusters=self.n_clusters, 180 cluster_encode=self.cluster_encode, 181 type_clust=self.type_clust, 182 type_scaling=self.type_scaling, 183 col_sample=self.col_sample, 184 row_sample=self.row_sample, 185 cv_calibration=None, 186 calibration_method=None, 187 seed=self.seed, 188 backend=self.backend, 189 ) 190 191 if self.verbose > 0: 192 iterator = tqdm(range(self.n_layers - 1)) 193 else: 194 iterator = range(self.n_layers - 1) 195 196 for _ in iterator: 197 self.stacked_obj = deepcopy( 198 CustomClassifier( 199 obj=self.stacked_obj, 200 n_hidden_features=self.n_hidden_features, 201 activation_name=self.activation_name, 202 a=self.a, 203 nodes_sim=self.nodes_sim, 204 bias=self.bias, 205 dropout=self.dropout, 206 direct_link=self.direct_link, 207 n_clusters=self.n_clusters, 208 cluster_encode=self.cluster_encode, 209 type_clust=self.type_clust, 210 type_scaling=self.type_scaling, 211 col_sample=self.col_sample, 212 row_sample=self.row_sample, 213 cv_calibration=None, 214 calibration_method=None, 215 seed=self.seed, 216 backend=self.backend, 217 ) 218 ) 219 self.stacked_obj.fit(X, y, **kwargs) 220 221 return self
Fit Classification algorithms to X and y.
Parameters
X : array-like,
Training vectors, where rows is the number of samples
and columns is the number of features.
y : array-like,
Training vectors, where rows is the number of samples
and columns is the number of features.
**kwargs: dict
Additional parameters to be passed to the fit method
of the base learner. For example, sample_weight
.
Returns
A fitted object
Predict test data X.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
Predict probabilities for test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
probability estimates for test data: {array-like}
Scoring function for classification.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
scoring: str
scoring method (default is accuracy)
Returns:
score: float
13class DeepRegressor(CustomRegressor, RegressorMixin): 14 """ 15 Deep Regressor 16 17 Parameters: 18 19 obj: an object 20 A base learner, see also https://www.researchgate.net/publication/380701207_Deep_Quasi-Randomized_neural_Networks_for_classification 21 22 verbose : int, optional (default=0) 23 Monitor progress when fitting. 24 25 n_layers: int (default=2) 26 Number of layers. `n_layers = 1` is a simple `CustomRegressor` 27 28 All the other parameters are nnetsauce `CustomRegressor`'s 29 30 Examples: 31 32 ```python 33 import nnetsauce as ns 34 from sklearn.datasets import load_diabetes 35 from sklearn.model_selection import train_test_split 36 from sklearn.linear_model import RidgeCV 37 data = load_diabetes() 38 X = data.data 39 y= data.target 40 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=123) 41 obj = RidgeCV() 42 clf = ns.DeepRegressor(obj) 43 clf.fit(X_train, y_train) 44 print(clf.score(clf.predict(X_test), y_test)) 45 ``` 46 47 """ 48 49 def __init__( 50 self, 51 obj, 52 # Defining depth 53 n_layers=2, 54 verbose=0, 55 # CustomRegressor attributes 56 n_hidden_features=5, 57 activation_name="relu", 58 a=0.01, 59 nodes_sim="sobol", 60 bias=True, 61 dropout=0, 62 direct_link=True, 63 n_clusters=2, 64 cluster_encode=True, 65 type_clust="kmeans", 66 type_scaling=("std", "std", "std"), 67 col_sample=1, 68 row_sample=1, 69 level=None, 70 pi_method="splitconformal", 71 seed=123, 72 backend="cpu", 73 ): 74 super().__init__( 75 obj=obj, 76 n_hidden_features=n_hidden_features, 77 activation_name=activation_name, 78 a=a, 79 nodes_sim=nodes_sim, 80 bias=bias, 81 dropout=dropout, 82 direct_link=direct_link, 83 n_clusters=n_clusters, 84 cluster_encode=cluster_encode, 85 type_clust=type_clust, 86 type_scaling=type_scaling, 87 col_sample=col_sample, 88 row_sample=row_sample, 89 level=level, 90 pi_method=pi_method, 91 seed=seed, 92 backend=backend, 93 ) 94 95 assert n_layers >= 1, "must have n_layers >= 1" 96 97 self.stacked_obj = deepcopy(obj) 98 self.verbose = verbose 99 self.n_layers = n_layers 100 self.level = level 101 self.pi_method = pi_method 102 self.coef_ = None 103 104 def fit(self, X, y, **kwargs): 105 """Fit Regression algorithms to X and y. 106 Parameters 107 ---------- 108 X : array-like, 109 Training vectors, where rows is the number of samples 110 and columns is the number of features. 111 y : array-like, 112 Training vectors, where rows is the number of samples 113 and columns is the number of features. 114 **kwargs: dict 115 Additional parameters to be passed to the fit method 116 of the base learner. For example, `sample_weight`. 117 Returns 118 ------- 119 A fitted object 120 """ 121 122 if isinstance(X, np.ndarray): 123 X = pd.DataFrame(X) 124 125 # init layer 126 self.stacked_obj = CustomRegressor( 127 obj=self.stacked_obj, 128 n_hidden_features=self.n_hidden_features, 129 activation_name=self.activation_name, 130 a=self.a, 131 nodes_sim=self.nodes_sim, 132 bias=self.bias, 133 dropout=self.dropout, 134 direct_link=self.direct_link, 135 n_clusters=self.n_clusters, 136 cluster_encode=self.cluster_encode, 137 type_clust=self.type_clust, 138 type_scaling=self.type_scaling, 139 col_sample=self.col_sample, 140 row_sample=self.row_sample, 141 seed=self.seed, 142 backend=self.backend, 143 ) 144 145 if self.verbose > 0: 146 iterator = tqdm(range(self.n_layers - 1)) 147 else: 148 iterator = range(self.n_layers - 1) 149 150 for _ in iterator: 151 self.stacked_obj = deepcopy( 152 CustomRegressor( 153 obj=self.stacked_obj, 154 n_hidden_features=self.n_hidden_features, 155 activation_name=self.activation_name, 156 a=self.a, 157 nodes_sim=self.nodes_sim, 158 bias=self.bias, 159 dropout=self.dropout, 160 direct_link=self.direct_link, 161 n_clusters=self.n_clusters, 162 cluster_encode=self.cluster_encode, 163 type_clust=self.type_clust, 164 type_scaling=self.type_scaling, 165 col_sample=self.col_sample, 166 row_sample=self.row_sample, 167 seed=self.seed, 168 backend=self.backend, 169 ) 170 ) 171 172 self.stacked_obj.fit(X, y, **kwargs) 173 174 if self.level is not None: 175 self.stacked_obj = PredictionInterval( 176 obj=self.stacked_obj, method=self.pi_method, level=self.level 177 ) 178 179 if hasattr(self.stacked_obj, "clustering_obj_"): 180 self.clustering_obj_ = self.stacked_obj.clustering_obj_ 181 182 if hasattr(self.stacked_obj, "coef_"): 183 self.coef_ = self.stacked_obj.coef_ 184 185 if hasattr(self.stacked_obj, "scaler_"): 186 self.scaler_ = self.stacked_obj.scaler_ 187 188 if hasattr(self.stacked_obj, "nn_scaler_"): 189 self.nn_scaler_ = self.stacked_obj.nn_scaler_ 190 191 if hasattr(self.stacked_obj, "clustering_scaler_"): 192 self.clustering_scaler_ = self.stacked_obj.clustering_scaler_ 193 194 return self 195 196 def partial_fit(self, X, y, **kwargs): 197 """Fit Regression algorithms to X and y. 198 Parameters 199 ---------- 200 X : array-like, 201 Training vectors, where rows is the number of samples 202 and columns is the number of features. 203 y : array-like, 204 Training vectors, where rows is the number of samples 205 and columns is the number of features. 206 **kwargs: dict 207 Additional parameters to be passed to the fit method 208 of the base learner. For example, `sample_weight`. 209 Returns 210 ------- 211 A fitted object 212 """ 213 assert hasattr(self, "stacked_obj"), "model must be fitted first" 214 current_obj = self.stacked_obj 215 for _ in range(self.n_layers): 216 try: 217 input_X = current_obj.obj.cook_test_set(X) 218 current_obj.obj.partial_fit(input_X, y, **kwargs) 219 try: 220 current_obj = current_obj.obj 221 except AttributeError: 222 pass 223 except ValueError as e: 224 print(e) 225 pass 226 return self 227 228 def predict(self, X, **kwargs): 229 if self.level is not None: 230 return self.stacked_obj.predict(X, return_pi=True) 231 return self.stacked_obj.predict(X, **kwargs) 232 233 def score(self, X, y, scoring=None): 234 return self.stacked_obj.score(X, y, scoring)
Deep Regressor
Parameters:
obj: an object
A base learner, see also https://www.researchgate.net/publication/380701207_Deep_Quasi-Randomized_neural_Networks_for_classification
verbose : int, optional (default=0)
Monitor progress when fitting.
n_layers: int (default=2)
Number of layers. `n_layers = 1` is a simple `CustomRegressor`
All the other parameters are nnetsauce `CustomRegressor`'s
Examples:
import nnetsauce as ns
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.linear_model import RidgeCV
data = load_diabetes()
X = data.data
y= data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=123)
obj = RidgeCV()
clf = ns.DeepRegressor(obj)
clf.fit(X_train, y_train)
print(clf.score(clf.predict(X_test), y_test))
104 def fit(self, X, y, **kwargs): 105 """Fit Regression algorithms to X and y. 106 Parameters 107 ---------- 108 X : array-like, 109 Training vectors, where rows is the number of samples 110 and columns is the number of features. 111 y : array-like, 112 Training vectors, where rows is the number of samples 113 and columns is the number of features. 114 **kwargs: dict 115 Additional parameters to be passed to the fit method 116 of the base learner. For example, `sample_weight`. 117 Returns 118 ------- 119 A fitted object 120 """ 121 122 if isinstance(X, np.ndarray): 123 X = pd.DataFrame(X) 124 125 # init layer 126 self.stacked_obj = CustomRegressor( 127 obj=self.stacked_obj, 128 n_hidden_features=self.n_hidden_features, 129 activation_name=self.activation_name, 130 a=self.a, 131 nodes_sim=self.nodes_sim, 132 bias=self.bias, 133 dropout=self.dropout, 134 direct_link=self.direct_link, 135 n_clusters=self.n_clusters, 136 cluster_encode=self.cluster_encode, 137 type_clust=self.type_clust, 138 type_scaling=self.type_scaling, 139 col_sample=self.col_sample, 140 row_sample=self.row_sample, 141 seed=self.seed, 142 backend=self.backend, 143 ) 144 145 if self.verbose > 0: 146 iterator = tqdm(range(self.n_layers - 1)) 147 else: 148 iterator = range(self.n_layers - 1) 149 150 for _ in iterator: 151 self.stacked_obj = deepcopy( 152 CustomRegressor( 153 obj=self.stacked_obj, 154 n_hidden_features=self.n_hidden_features, 155 activation_name=self.activation_name, 156 a=self.a, 157 nodes_sim=self.nodes_sim, 158 bias=self.bias, 159 dropout=self.dropout, 160 direct_link=self.direct_link, 161 n_clusters=self.n_clusters, 162 cluster_encode=self.cluster_encode, 163 type_clust=self.type_clust, 164 type_scaling=self.type_scaling, 165 col_sample=self.col_sample, 166 row_sample=self.row_sample, 167 seed=self.seed, 168 backend=self.backend, 169 ) 170 ) 171 172 self.stacked_obj.fit(X, y, **kwargs) 173 174 if self.level is not None: 175 self.stacked_obj = PredictionInterval( 176 obj=self.stacked_obj, method=self.pi_method, level=self.level 177 ) 178 179 if hasattr(self.stacked_obj, "clustering_obj_"): 180 self.clustering_obj_ = self.stacked_obj.clustering_obj_ 181 182 if hasattr(self.stacked_obj, "coef_"): 183 self.coef_ = self.stacked_obj.coef_ 184 185 if hasattr(self.stacked_obj, "scaler_"): 186 self.scaler_ = self.stacked_obj.scaler_ 187 188 if hasattr(self.stacked_obj, "nn_scaler_"): 189 self.nn_scaler_ = self.stacked_obj.nn_scaler_ 190 191 if hasattr(self.stacked_obj, "clustering_scaler_"): 192 self.clustering_scaler_ = self.stacked_obj.clustering_scaler_ 193 194 return self
Fit Regression algorithms to X and y.
Parameters
X : array-like,
Training vectors, where rows is the number of samples
and columns is the number of features.
y : array-like,
Training vectors, where rows is the number of samples
and columns is the number of features.
**kwargs: dict
Additional parameters to be passed to the fit method
of the base learner. For example, sample_weight
.
Returns
A fitted object
228 def predict(self, X, **kwargs): 229 if self.level is not None: 230 return self.stacked_obj.predict(X, return_pi=True) 231 return self.stacked_obj.predict(X, **kwargs)
Predict test data X.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
level: int
Level of confidence (default = 95)
method: str
`None`, or 'splitconformal', 'localconformal'
prediction (if you specify `return_pi = True`)
**kwargs: additional parameters
`return_pi = True` for conformal prediction,
with `method` in ('splitconformal', 'localconformal')
or `return_std = True` for `self.obj` in
(`sklearn.linear_model.BayesianRidge`,
`sklearn.linear_model.ARDRegressor`,
`sklearn.gaussian_process.GaussianProcessRegressor`)`
Returns:
model predictions:
an array if uncertainty quantification is not requested,
or a tuple if with prediction intervals and simulations
if `return_std = True` (mean, standard deviation,
lower and upper prediction interval) or `return_pi = True`
()
Compute the score of the model.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
scoring: str
scoring method
Returns:
score: float
11class DeepMTS(MTS): 12 """Univariate and multivariate time series (DeepMTS) forecasting with Quasi-Randomized networks (Work in progress) 13 14 Parameters: 15 16 obj: object. 17 any object containing a method fit (obj.fit()) and a method predict 18 (obj.predict()). 19 20 n_layers: int. 21 number of layers in the neural network. 22 23 n_hidden_features: int. 24 number of nodes in the hidden layer. 25 26 activation_name: str. 27 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'. 28 29 a: float. 30 hyperparameter for 'prelu' or 'elu' activation function. 31 32 nodes_sim: str. 33 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 34 'uniform'. 35 36 bias: boolean. 37 indicates if the hidden layer contains a bias term (True) or not 38 (False). 39 40 dropout: float. 41 regularization parameter; (random) percentage of nodes dropped out 42 of the training. 43 44 direct_link: boolean. 45 indicates if the original predictors are included (True) in model's fitting or not (False). 46 47 n_clusters: int. 48 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering). 49 50 cluster_encode: bool. 51 defines how the variable containing clusters is treated (default is one-hot) 52 if `False`, then labels are used, without one-hot encoding. 53 54 type_clust: str. 55 type of clustering method: currently k-means ('kmeans') or Gaussian 56 Mixture Model ('gmm'). 57 58 type_scaling: a tuple of 3 strings. 59 scaling methods for inputs, hidden layer, and clustering respectively 60 (and when relevant). 61 Currently available: standardization ('std') or MinMax scaling ('minmax'). 62 63 lags: int. 64 number of lags used for each time series. 65 66 type_pi: str. 67 type of prediction interval; currently: 68 - "gaussian": simple, fast, but: assumes stationarity of Gaussian in-sample residuals and independence in the multivariate case 69 - "kde": based on Kernel Density Estimation of in-sample residuals 70 - "bootstrap": based on independent bootstrap of in-sample residuals 71 - "block-bootstrap": based on basic block bootstrap of in-sample residuals 72 - "scp-kde": Sequential split conformal prediction with Kernel Density Estimation of calibrated residuals 73 - "scp-bootstrap": Sequential split conformal prediction with independent bootstrap of calibrated residuals 74 - "scp-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of calibrated residuals 75 - "scp2-kde": Sequential split conformal prediction with Kernel Density Estimation of standardized calibrated residuals 76 - "scp2-bootstrap": Sequential split conformal prediction with independent bootstrap of standardized calibrated residuals 77 - "scp2-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of standardized calibrated residuals 78 79 block_size: int. 80 size of block for 'type_pi' in ("block-bootstrap", "scp-block-bootstrap", "scp2-block-bootstrap"). 81 Default is round(3.15*(n_residuals^1/3)) 82 83 replications: int. 84 number of replications (if needed, for predictive simulation). Default is 'None'. 85 86 kernel: str. 87 the kernel to use for residuals density estimation (used for predictive simulation). Currently, either 'gaussian' or 'tophat'. 88 89 agg: str. 90 either "mean" or "median" for simulation of bootstrap aggregating 91 92 seed: int. 93 reproducibility seed for nodes_sim=='uniform' or predictive simulation. 94 95 backend: str. 96 "cpu" or "gpu" or "tpu". 97 98 verbose: int. 99 0: not printing; 1: printing 100 101 show_progress: bool. 102 True: progress bar when fitting each series; False: no progress bar when fitting each series 103 104 Attributes: 105 106 fit_objs_: dict 107 objects adjusted to each individual time series 108 109 y_: {array-like} 110 DeepMTS responses (most recent observations first) 111 112 X_: {array-like} 113 DeepMTS lags 114 115 xreg_: {array-like} 116 external regressors 117 118 y_means_: dict 119 a dictionary of each series mean values 120 121 preds_: {array-like} 122 successive model predictions 123 124 preds_std_: {array-like} 125 standard deviation around the predictions 126 127 return_std_: boolean 128 return uncertainty or not (set in predict) 129 130 df_: data frame 131 the input data frame, in case a data.frame is provided to `fit` 132 133 Examples: 134 135 Example 1: 136 137 ```python 138 import nnetsauce as ns 139 import numpy as np 140 from sklearn import linear_model 141 np.random.seed(123) 142 143 M = np.random.rand(10, 3) 144 M[:,0] = 10*M[:,0] 145 M[:,2] = 25*M[:,2] 146 print(M) 147 148 # Adjust Bayesian Ridge 149 regr4 = linear_model.BayesianRidge() 150 obj_DeepMTS = ns.DeepMTS(regr4, lags = 1, n_hidden_features=5) 151 obj_DeepMTS.fit(M) 152 print(obj_DeepMTS.predict()) 153 154 # with credible intervals 155 print(obj_DeepMTS.predict(return_std=True, level=80)) 156 157 print(obj_DeepMTS.predict(return_std=True, level=95)) 158 ``` 159 160 Example 2: 161 162 ```python 163 import nnetsauce as ns 164 import numpy as np 165 from sklearn import linear_model 166 167 dataset = { 168 'date' : ['2001-01-01', '2002-01-01', '2003-01-01', '2004-01-01', '2005-01-01'], 169 'series1' : [34, 30, 35.6, 33.3, 38.1], 170 'series2' : [4, 5.5, 5.6, 6.3, 5.1], 171 'series3' : [100, 100.5, 100.6, 100.2, 100.1]} 172 df = pd.DataFrame(dataset).set_index('date') 173 print(df) 174 175 # Adjust Bayesian Ridge 176 regr5 = linear_model.BayesianRidge() 177 obj_DeepMTS = ns.DeepMTS(regr5, lags = 1, n_hidden_features=5) 178 obj_DeepMTS.fit(df) 179 print(obj_DeepMTS.predict()) 180 181 # with credible intervals 182 print(obj_DeepMTS.predict(return_std=True, level=80)) 183 184 print(obj_DeepMTS.predict(return_std=True, level=95)) 185 ``` 186 187 """ 188 189 # construct the object ----- 190 191 def __init__( 192 self, 193 obj, 194 n_layers=3, 195 n_hidden_features=5, 196 activation_name="relu", 197 a=0.01, 198 nodes_sim="sobol", 199 bias=True, 200 dropout=0, 201 direct_link=True, 202 n_clusters=2, 203 cluster_encode=True, 204 type_clust="kmeans", 205 type_scaling=("std", "std", "std"), 206 lags=1, 207 type_pi="kde", 208 block_size=None, 209 replications=None, 210 kernel=None, 211 agg="mean", 212 seed=123, 213 backend="cpu", 214 verbose=0, 215 show_progress=True, 216 ): 217 assert int(lags) == lags, "parameter 'lags' should be an integer" 218 assert n_layers >= 1, "must have n_layers >= 1" 219 self.n_layers = int(n_layers) 220 221 if self.n_layers > 1: 222 223 for _ in range(self.n_layers - 1): 224 obj = CustomRegressor( 225 obj=deepcopy(obj), 226 n_hidden_features=n_hidden_features, 227 activation_name=activation_name, 228 a=a, 229 nodes_sim=nodes_sim, 230 bias=bias, 231 dropout=dropout, 232 direct_link=direct_link, 233 n_clusters=n_clusters, 234 cluster_encode=cluster_encode, 235 type_clust=type_clust, 236 type_scaling=type_scaling, 237 seed=seed, 238 backend=backend, 239 ) 240 241 self.obj = deepcopy(obj) 242 super().__init__( 243 obj=self.obj, 244 n_hidden_features=n_hidden_features, 245 activation_name=activation_name, 246 a=a, 247 nodes_sim=nodes_sim, 248 bias=bias, 249 dropout=dropout, 250 direct_link=direct_link, 251 n_clusters=n_clusters, 252 cluster_encode=cluster_encode, 253 type_clust=type_clust, 254 type_scaling=type_scaling, 255 lags=lags, 256 type_pi=type_pi, 257 block_size=block_size, 258 replications=replications, 259 kernel=kernel, 260 agg=agg, 261 seed=seed, 262 backend=backend, 263 verbose=verbose, 264 show_progress=show_progress, 265 )
Univariate and multivariate time series (DeepMTS) forecasting with Quasi-Randomized networks (Work in progress)
Parameters:
obj: object.
any object containing a method fit (obj.fit()) and a method predict
(obj.predict()).
n_layers: int.
number of layers in the neural network.
n_hidden_features: int.
number of nodes in the hidden layer.
activation_name: str.
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'.
a: float.
hyperparameter for 'prelu' or 'elu' activation function.
nodes_sim: str.
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'.
bias: boolean.
indicates if the hidden layer contains a bias term (True) or not
(False).
dropout: float.
regularization parameter; (random) percentage of nodes dropped out
of the training.
direct_link: boolean.
indicates if the original predictors are included (True) in model's fitting or not (False).
n_clusters: int.
number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering).
cluster_encode: bool.
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding.
type_clust: str.
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm').
type_scaling: a tuple of 3 strings.
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax').
lags: int.
number of lags used for each time series.
type_pi: str.
type of prediction interval; currently:
- "gaussian": simple, fast, but: assumes stationarity of Gaussian in-sample residuals and independence in the multivariate case
- "kde": based on Kernel Density Estimation of in-sample residuals
- "bootstrap": based on independent bootstrap of in-sample residuals
- "block-bootstrap": based on basic block bootstrap of in-sample residuals
- "scp-kde": Sequential split conformal prediction with Kernel Density Estimation of calibrated residuals
- "scp-bootstrap": Sequential split conformal prediction with independent bootstrap of calibrated residuals
- "scp-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of calibrated residuals
- "scp2-kde": Sequential split conformal prediction with Kernel Density Estimation of standardized calibrated residuals
- "scp2-bootstrap": Sequential split conformal prediction with independent bootstrap of standardized calibrated residuals
- "scp2-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of standardized calibrated residuals
block_size: int.
size of block for 'type_pi' in ("block-bootstrap", "scp-block-bootstrap", "scp2-block-bootstrap").
Default is round(3.15*(n_residuals^1/3))
replications: int.
number of replications (if needed, for predictive simulation). Default is 'None'.
kernel: str.
the kernel to use for residuals density estimation (used for predictive simulation). Currently, either 'gaussian' or 'tophat'.
agg: str.
either "mean" or "median" for simulation of bootstrap aggregating
seed: int.
reproducibility seed for nodes_sim=='uniform' or predictive simulation.
backend: str.
"cpu" or "gpu" or "tpu".
verbose: int.
0: not printing; 1: printing
show_progress: bool.
True: progress bar when fitting each series; False: no progress bar when fitting each series
Attributes:
fit_objs_: dict
objects adjusted to each individual time series
y_: {array-like}
DeepMTS responses (most recent observations first)
X_: {array-like}
DeepMTS lags
xreg_: {array-like}
external regressors
y_means_: dict
a dictionary of each series mean values
preds_: {array-like}
successive model predictions
preds_std_: {array-like}
standard deviation around the predictions
return_std_: boolean
return uncertainty or not (set in predict)
df_: data frame
the input data frame, in case a data.frame is provided to `fit`
Examples:
Example 1:
import nnetsauce as ns
import numpy as np
from sklearn import linear_model
np.random.seed(123)
M = np.random.rand(10, 3)
M[:,0] = 10M[:,0]
M[:,2] = 25M[:,2]
print(M)
# Adjust Bayesian Ridge
regr4 = linear_model.BayesianRidge()
obj_DeepMTS = ns.DeepMTS(regr4, lags = 1, n_hidden_features=5)
obj_DeepMTS.fit(M)
print(obj_DeepMTS.predict())
# with credible intervals
print(obj_DeepMTS.predict(return_std=True, level=80))
print(obj_DeepMTS.predict(return_std=True, level=95))
Example 2:
import nnetsauce as ns
import numpy as np
from sklearn import linear_model
dataset = {
'date' : ['2001-01-01', '2002-01-01', '2003-01-01', '2004-01-01', '2005-01-01'],
'series1' : [34, 30, 35.6, 33.3, 38.1],
'series2' : [4, 5.5, 5.6, 6.3, 5.1],
'series3' : [100, 100.5, 100.6, 100.2, 100.1]}
df = pd.DataFrame(dataset).set_index('date')
print(df)
# Adjust Bayesian Ridge
regr5 = linear_model.BayesianRidge()
obj_DeepMTS = ns.DeepMTS(regr5, lags = 1, n_hidden_features=5)
obj_DeepMTS.fit(df)
print(obj_DeepMTS.predict())
# with credible intervals
print(obj_DeepMTS.predict(return_std=True, level=80))
print(obj_DeepMTS.predict(return_std=True, level=95))
6class Downloader: 7 """Download datasets from data sources (R-universe for now)""" 8 9 def __init__(self): 10 self.pkgname = None 11 self.dataset = None 12 self.source = None 13 self.url = None 14 self.request = None 15 16 def download( 17 self, 18 pkgname="MASS", 19 dataset="Boston", 20 source="https://cran.r-universe.dev/", 21 **kwargs 22 ): 23 """Download datasets from data sources (R-universe for now) 24 25 Examples: 26 27 ```python 28 import nnetsauce as ns 29 30 downloader = ns.Downloader() 31 df = downloader.download(pkgname="MASS", dataset="Boston") 32 ``` 33 34 """ 35 self.pkgname = pkgname 36 self.dataset = dataset 37 self.source = source 38 self.url = source + pkgname + "/data/" + dataset + "/json" 39 self.request = requests.get(self.url) 40 return pd.DataFrame(self.request.json(), **kwargs)
Download datasets from data sources (R-universe for now)
16 def download( 17 self, 18 pkgname="MASS", 19 dataset="Boston", 20 source="https://cran.r-universe.dev/", 21 **kwargs 22 ): 23 """Download datasets from data sources (R-universe for now) 24 25 Examples: 26 27 ```python 28 import nnetsauce as ns 29 30 downloader = ns.Downloader() 31 df = downloader.download(pkgname="MASS", dataset="Boston") 32 ``` 33 34 """ 35 self.pkgname = pkgname 36 self.dataset = dataset 37 self.source = source 38 self.url = source + pkgname + "/data/" + dataset + "/json" 39 self.request = requests.get(self.url) 40 return pd.DataFrame(self.request.json(), **kwargs)
Download datasets from data sources (R-universe for now)
Examples:
import nnetsauce as ns
downloader = ns.Downloader()
df = downloader.download(pkgname="MASS", dataset="Boston")
16class GLMClassifier(GLM, ClassifierMixin): 17 """Generalized 'linear' models using quasi-randomized networks (classification) 18 19 Parameters: 20 21 n_hidden_features: int 22 number of nodes in the hidden layer 23 24 lambda1: float 25 regularization parameter for GLM coefficients on original features 26 27 alpha1: float 28 controls compromize between l1 and l2 norm of GLM coefficients on original features 29 30 lambda2: float 31 regularization parameter for GLM coefficients on nonlinear features 32 33 alpha2: float 34 controls compromize between l1 and l2 norm of GLM coefficients on nonlinear features 35 36 activation_name: str 37 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 38 39 a: float 40 hyperparameter for 'prelu' or 'elu' activation function 41 42 nodes_sim: str 43 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 44 'uniform' 45 46 bias: boolean 47 indicates if the hidden layer contains a bias term (True) or not 48 (False) 49 50 dropout: float 51 regularization parameter; (random) percentage of nodes dropped out 52 of the training 53 54 direct_link: boolean 55 indicates if the original predictors are included (True) in model's 56 fitting or not (False) 57 58 n_clusters: int 59 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: 60 no clustering) 61 62 cluster_encode: bool 63 defines how the variable containing clusters is treated (default is one-hot) 64 if `False`, then labels are used, without one-hot encoding 65 66 type_clust: str 67 type of clustering method: currently k-means ('kmeans') or Gaussian 68 Mixture Model ('gmm') 69 70 type_scaling: a tuple of 3 strings 71 scaling methods for inputs, hidden layer, and clustering respectively 72 (and when relevant). 73 Currently available: standardization ('std') or MinMax scaling ('minmax') 74 75 optimizer: object 76 optimizer, from class nnetsauce.Optimizer 77 78 seed: int 79 reproducibility seed for nodes_sim=='uniform' 80 81 Attributes: 82 83 beta_: vector 84 regression coefficients 85 86 Examples: 87 88 See [https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_classification.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_classification.py) 89 90 """ 91 92 # construct the object ----- 93 94 def __init__( 95 self, 96 n_hidden_features=5, 97 lambda1=0.01, 98 alpha1=0.5, 99 lambda2=0.01, 100 alpha2=0.5, 101 family="expit", 102 activation_name="relu", 103 a=0.01, 104 nodes_sim="sobol", 105 bias=True, 106 dropout=0, 107 direct_link=True, 108 n_clusters=2, 109 cluster_encode=True, 110 type_clust="kmeans", 111 type_scaling=("std", "std", "std"), 112 optimizer=Optimizer(), 113 seed=123, 114 ): 115 super().__init__( 116 n_hidden_features=n_hidden_features, 117 lambda1=lambda1, 118 alpha1=alpha1, 119 lambda2=lambda2, 120 alpha2=alpha2, 121 activation_name=activation_name, 122 a=a, 123 nodes_sim=nodes_sim, 124 bias=bias, 125 dropout=dropout, 126 direct_link=direct_link, 127 n_clusters=n_clusters, 128 cluster_encode=cluster_encode, 129 type_clust=type_clust, 130 type_scaling=type_scaling, 131 optimizer=optimizer, 132 seed=seed, 133 ) 134 135 self.family = family 136 137 def logit_loss(self, Y, row_index, XB): 138 self.n_classes = Y.shape[1] # len(np.unique(y)) 139 # Y = mo.one_hot_encode2(y, self.n_classes) 140 # Y = self.optimizer.one_hot_encode(y, self.n_classes) 141 142 # max_double = 709.0 # only if softmax 143 # XB[XB > max_double] = max_double 144 XB[XB > 709.0] = 709.0 145 146 if row_index is None: 147 return -np.mean(np.sum(Y * XB, axis=1) - logsumexp(XB)) 148 149 return -np.mean(np.sum(Y[row_index, :] * XB, axis=1) - logsumexp(XB)) 150 151 def expit_erf_loss(self, Y, row_index, XB): 152 # self.n_classes = len(np.unique(y)) 153 # Y = mo.one_hot_encode2(y, self.n_classes) 154 # Y = self.optimizer.one_hot_encode(y, self.n_classes) 155 self.n_classes = Y.shape[1] 156 157 if row_index is None: 158 return -np.mean(np.sum(Y * XB, axis=1) - logsumexp(XB)) 159 160 return -np.mean(np.sum(Y[row_index, :] * XB, axis=1) - logsumexp(XB)) 161 162 def loss_func( 163 self, beta, group_index, X, Y, y, row_index=None, type_loss="logit", **kwargs 164 ): 165 res = { 166 "logit": self.logit_loss, 167 "expit": self.expit_erf_loss, 168 "erf": self.expit_erf_loss, 169 } 170 171 if row_index is None: 172 row_index = range(len(y)) 173 XB = self.compute_XB( 174 X, 175 beta=np.reshape(beta, (X.shape[1], self.n_classes), order="F"), 176 ) 177 178 return res[type_loss](Y, row_index, XB) + self.compute_penalty( 179 group_index=group_index, beta=beta 180 ) 181 182 XB = self.compute_XB( 183 X, 184 beta=np.reshape(beta, (X.shape[1], self.n_classes), order="F"), 185 row_index=row_index, 186 ) 187 188 return res[type_loss](Y, row_index, XB) + self.compute_penalty( 189 group_index=group_index, beta=beta 190 ) 191 192 def fit(self, X, y, **kwargs): 193 """Fit GLM model to training data (X, y). 194 195 Args: 196 197 X: {array-like}, shape = [n_samples, n_features] 198 Training vectors, where n_samples is the number 199 of samples and n_features is the number of features. 200 201 y: array-like, shape = [n_samples] 202 Target values. 203 204 **kwargs: additional parameters to be passed to 205 self.cook_training_set or self.obj.fit 206 207 Returns: 208 209 self: object 210 211 """ 212 213 assert mx.is_factor( 214 y 215 ), "y must contain only integers" # change is_factor and subsampling everywhere 216 217 self.classes_ = np.unique(y) # for compatibility with sklearn 218 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 219 220 self.beta_ = None 221 222 n, p = X.shape 223 224 self.group_index = n * X.shape[1] 225 226 self.n_classes = len(np.unique(y)) 227 228 output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 229 230 # Y = mo.one_hot_encode2(output_y, self.n_classes) 231 Y = self.optimizer.one_hot_encode(output_y, self.n_classes) 232 233 # initialization 234 beta_ = np.linalg.lstsq(scaled_Z, Y, rcond=None)[0] 235 236 # optimization 237 # fit(self, loss_func, response, x0, **kwargs): 238 # loss_func(self, beta, group_index, X, y, 239 # row_index=None, type_loss="gaussian", 240 # **kwargs) 241 self.optimizer.fit( 242 self.loss_func, 243 response=y, 244 x0=beta_.flatten(order="F"), 245 group_index=self.group_index, 246 X=scaled_Z, 247 Y=Y, 248 y=y, 249 type_loss=self.family, 250 ) 251 252 self.beta_ = self.optimizer.results[0] 253 self.classes_ = np.unique(y) 254 255 return self 256 257 def predict(self, X, **kwargs): 258 """Predict test data X. 259 260 Args: 261 262 X: {array-like}, shape = [n_samples, n_features] 263 Training vectors, where n_samples is the number 264 of samples and n_features is the number of features. 265 266 **kwargs: additional parameters to be passed to 267 self.cook_test_set 268 269 Returns: 270 271 model predictions: {array-like} 272 273 """ 274 275 return np.argmax(self.predict_proba(X, **kwargs), axis=1) 276 277 def predict_proba(self, X, **kwargs): 278 """Predict probabilities for test data X. 279 280 Args: 281 282 X: {array-like}, shape = [n_samples, n_features] 283 Training vectors, where n_samples is the number 284 of samples and n_features is the number of features. 285 286 **kwargs: additional parameters to be passed to 287 self.cook_test_set 288 289 Returns: 290 291 probability estimates for test data: {array-like} 292 293 """ 294 if len(X.shape) == 1: 295 n_features = X.shape[0] 296 new_X = mo.rbind( 297 X.reshape(1, n_features), 298 np.ones(n_features).reshape(1, n_features), 299 ) 300 301 Z = self.cook_test_set(new_X, **kwargs) 302 303 else: 304 Z = self.cook_test_set(X, **kwargs) 305 306 ZB = mo.safe_sparse_dot( 307 Z, 308 self.beta_.reshape( 309 self.n_classes, 310 X.shape[1] + self.n_hidden_features + self.n_clusters, 311 ).T, 312 ) 313 314 if self.family == "logit": 315 exp_ZB = np.exp(ZB) 316 317 return exp_ZB / exp_ZB.sum(axis=1)[:, None] 318 319 if self.family == "expit": 320 exp_ZB = expit(ZB) 321 322 return exp_ZB / exp_ZB.sum(axis=1)[:, None] 323 324 if self.family == "erf": 325 exp_ZB = 0.5 * (1 + erf(ZB)) 326 327 return exp_ZB / exp_ZB.sum(axis=1)[:, None] 328 329 def score(self, X, y, scoring=None): 330 """Scoring function for classification. 331 332 Args: 333 334 X: {array-like}, shape = [n_samples, n_features] 335 Training vectors, where n_samples is the number 336 of samples and n_features is the number of features. 337 338 y: array-like, shape = [n_samples] 339 Target values. 340 341 scoring: str 342 scoring method (default is accuracy) 343 344 Returns: 345 346 score: float 347 """ 348 349 if scoring is None: 350 scoring = "accuracy" 351 352 if scoring == "accuracy": 353 return skm2.accuracy_score(y, self.predict(X)) 354 355 if scoring == "f1": 356 return skm2.f1_score(y, self.predict(X)) 357 358 if scoring == "precision": 359 return skm2.precision_score(y, self.predict(X)) 360 361 if scoring == "recall": 362 return skm2.recall_score(y, self.predict(X)) 363 364 if scoring == "roc_auc": 365 return skm2.roc_auc_score(y, self.predict(X)) 366 367 if scoring == "log_loss": 368 return skm2.log_loss(y, self.predict_proba(X)) 369 370 if scoring == "balanced_accuracy": 371 return skm2.balanced_accuracy_score(y, self.predict(X)) 372 373 if scoring == "average_precision": 374 return skm2.average_precision_score(y, self.predict(X)) 375 376 if scoring == "neg_brier_score": 377 return -skm2.brier_score_loss(y, self.predict_proba(X)) 378 379 if scoring == "neg_log_loss": 380 return -skm2.log_loss(y, self.predict_proba(X))
Generalized 'linear' models using quasi-randomized networks (classification)
Parameters:
n_hidden_features: int
number of nodes in the hidden layer
lambda1: float
regularization parameter for GLM coefficients on original features
alpha1: float
controls compromize between l1 and l2 norm of GLM coefficients on original features
lambda2: float
regularization parameter for GLM coefficients on nonlinear features
alpha2: float
controls compromize between l1 and l2 norm of GLM coefficients on nonlinear features
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not
(False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original predictors are included (True) in model's
fitting or not (False)
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
optimizer: object
optimizer, from class nnetsauce.Optimizer
seed: int
reproducibility seed for nodes_sim=='uniform'
Attributes:
beta_: vector
regression coefficients
Examples:
See https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_classification.py
192 def fit(self, X, y, **kwargs): 193 """Fit GLM model to training data (X, y). 194 195 Args: 196 197 X: {array-like}, shape = [n_samples, n_features] 198 Training vectors, where n_samples is the number 199 of samples and n_features is the number of features. 200 201 y: array-like, shape = [n_samples] 202 Target values. 203 204 **kwargs: additional parameters to be passed to 205 self.cook_training_set or self.obj.fit 206 207 Returns: 208 209 self: object 210 211 """ 212 213 assert mx.is_factor( 214 y 215 ), "y must contain only integers" # change is_factor and subsampling everywhere 216 217 self.classes_ = np.unique(y) # for compatibility with sklearn 218 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 219 220 self.beta_ = None 221 222 n, p = X.shape 223 224 self.group_index = n * X.shape[1] 225 226 self.n_classes = len(np.unique(y)) 227 228 output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 229 230 # Y = mo.one_hot_encode2(output_y, self.n_classes) 231 Y = self.optimizer.one_hot_encode(output_y, self.n_classes) 232 233 # initialization 234 beta_ = np.linalg.lstsq(scaled_Z, Y, rcond=None)[0] 235 236 # optimization 237 # fit(self, loss_func, response, x0, **kwargs): 238 # loss_func(self, beta, group_index, X, y, 239 # row_index=None, type_loss="gaussian", 240 # **kwargs) 241 self.optimizer.fit( 242 self.loss_func, 243 response=y, 244 x0=beta_.flatten(order="F"), 245 group_index=self.group_index, 246 X=scaled_Z, 247 Y=Y, 248 y=y, 249 type_loss=self.family, 250 ) 251 252 self.beta_ = self.optimizer.results[0] 253 self.classes_ = np.unique(y) 254 255 return self
Fit GLM model to training data (X, y).
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
257 def predict(self, X, **kwargs): 258 """Predict test data X. 259 260 Args: 261 262 X: {array-like}, shape = [n_samples, n_features] 263 Training vectors, where n_samples is the number 264 of samples and n_features is the number of features. 265 266 **kwargs: additional parameters to be passed to 267 self.cook_test_set 268 269 Returns: 270 271 model predictions: {array-like} 272 273 """ 274 275 return np.argmax(self.predict_proba(X, **kwargs), axis=1)
Predict test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
277 def predict_proba(self, X, **kwargs): 278 """Predict probabilities for test data X. 279 280 Args: 281 282 X: {array-like}, shape = [n_samples, n_features] 283 Training vectors, where n_samples is the number 284 of samples and n_features is the number of features. 285 286 **kwargs: additional parameters to be passed to 287 self.cook_test_set 288 289 Returns: 290 291 probability estimates for test data: {array-like} 292 293 """ 294 if len(X.shape) == 1: 295 n_features = X.shape[0] 296 new_X = mo.rbind( 297 X.reshape(1, n_features), 298 np.ones(n_features).reshape(1, n_features), 299 ) 300 301 Z = self.cook_test_set(new_X, **kwargs) 302 303 else: 304 Z = self.cook_test_set(X, **kwargs) 305 306 ZB = mo.safe_sparse_dot( 307 Z, 308 self.beta_.reshape( 309 self.n_classes, 310 X.shape[1] + self.n_hidden_features + self.n_clusters, 311 ).T, 312 ) 313 314 if self.family == "logit": 315 exp_ZB = np.exp(ZB) 316 317 return exp_ZB / exp_ZB.sum(axis=1)[:, None] 318 319 if self.family == "expit": 320 exp_ZB = expit(ZB) 321 322 return exp_ZB / exp_ZB.sum(axis=1)[:, None] 323 324 if self.family == "erf": 325 exp_ZB = 0.5 * (1 + erf(ZB)) 326 327 return exp_ZB / exp_ZB.sum(axis=1)[:, None]
Predict probabilities for test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
probability estimates for test data: {array-like}
329 def score(self, X, y, scoring=None): 330 """Scoring function for classification. 331 332 Args: 333 334 X: {array-like}, shape = [n_samples, n_features] 335 Training vectors, where n_samples is the number 336 of samples and n_features is the number of features. 337 338 y: array-like, shape = [n_samples] 339 Target values. 340 341 scoring: str 342 scoring method (default is accuracy) 343 344 Returns: 345 346 score: float 347 """ 348 349 if scoring is None: 350 scoring = "accuracy" 351 352 if scoring == "accuracy": 353 return skm2.accuracy_score(y, self.predict(X)) 354 355 if scoring == "f1": 356 return skm2.f1_score(y, self.predict(X)) 357 358 if scoring == "precision": 359 return skm2.precision_score(y, self.predict(X)) 360 361 if scoring == "recall": 362 return skm2.recall_score(y, self.predict(X)) 363 364 if scoring == "roc_auc": 365 return skm2.roc_auc_score(y, self.predict(X)) 366 367 if scoring == "log_loss": 368 return skm2.log_loss(y, self.predict_proba(X)) 369 370 if scoring == "balanced_accuracy": 371 return skm2.balanced_accuracy_score(y, self.predict(X)) 372 373 if scoring == "average_precision": 374 return skm2.average_precision_score(y, self.predict(X)) 375 376 if scoring == "neg_brier_score": 377 return -skm2.brier_score_loss(y, self.predict_proba(X)) 378 379 if scoring == "neg_log_loss": 380 return -skm2.log_loss(y, self.predict_proba(X))
Scoring function for classification.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
scoring: str
scoring method (default is accuracy)
Returns:
score: float
14class GLMRegressor(GLM, RegressorMixin): 15 """Generalized 'linear' models using quasi-randomized networks (regression) 16 17 Attributes: 18 19 n_hidden_features: int 20 number of nodes in the hidden layer 21 22 lambda1: float 23 regularization parameter for GLM coefficients on original features 24 25 alpha1: float 26 controls compromize between l1 and l2 norm of GLM coefficients on original features 27 28 lambda2: float 29 regularization parameter for GLM coefficients on nonlinear features 30 31 alpha2: float 32 controls compromize between l1 and l2 norm of GLM coefficients on nonlinear features 33 34 family: str 35 "gaussian", "laplace" or "poisson" (for now) 36 37 activation_name: str 38 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 39 40 a: float 41 hyperparameter for 'prelu' or 'elu' activation function 42 43 nodes_sim: str 44 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 45 'uniform' 46 47 bias: boolean 48 indicates if the hidden layer contains a bias term (True) or not 49 (False) 50 51 dropout: float 52 regularization parameter; (random) percentage of nodes dropped out 53 of the training 54 55 direct_link: boolean 56 indicates if the original predictors are included (True) in model's 57 fitting or not (False) 58 59 n_clusters: int 60 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: 61 no clustering) 62 63 cluster_encode: bool 64 defines how the variable containing clusters is treated (default is one-hot) 65 if `False`, then labels are used, without one-hot encoding 66 67 type_clust: str 68 type of clustering method: currently k-means ('kmeans') or Gaussian 69 Mixture Model ('gmm') 70 71 type_scaling: a tuple of 3 strings 72 scaling methods for inputs, hidden layer, and clustering respectively 73 (and when relevant). 74 Currently available: standardization ('std') or MinMax scaling ('minmax') 75 76 optimizer: object 77 optimizer, from class nnetsauce.utils.Optimizer 78 79 seed: int 80 reproducibility seed for nodes_sim=='uniform' 81 82 Attributes: 83 84 beta_: vector 85 regression coefficients 86 87 Examples: 88 89 See [https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_regression.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_regression.py) 90 91 """ 92 93 # construct the object ----- 94 95 def __init__( 96 self, 97 n_hidden_features=5, 98 lambda1=0.01, 99 alpha1=0.5, 100 lambda2=0.01, 101 alpha2=0.5, 102 family="gaussian", 103 activation_name="relu", 104 a=0.01, 105 nodes_sim="sobol", 106 bias=True, 107 dropout=0, 108 direct_link=True, 109 n_clusters=2, 110 cluster_encode=True, 111 type_clust="kmeans", 112 type_scaling=("std", "std", "std"), 113 optimizer=Optimizer(), 114 seed=123, 115 ): 116 super().__init__( 117 n_hidden_features=n_hidden_features, 118 lambda1=lambda1, 119 alpha1=alpha1, 120 lambda2=lambda2, 121 alpha2=alpha2, 122 activation_name=activation_name, 123 a=a, 124 nodes_sim=nodes_sim, 125 bias=bias, 126 dropout=dropout, 127 direct_link=direct_link, 128 n_clusters=n_clusters, 129 cluster_encode=cluster_encode, 130 type_clust=type_clust, 131 type_scaling=type_scaling, 132 optimizer=optimizer, 133 seed=seed, 134 ) 135 136 self.family = family 137 138 def gaussian_loss(self, y, row_index, XB): 139 return 0.5 * np.mean(np.square(y[row_index] - XB)) 140 141 def laplace_loss(self, y, row_index, XB): 142 return 0.5 * np.mean(np.abs(y[row_index] - XB)) 143 144 def poisson_loss(self, y, row_index, XB): 145 return -np.mean(y[row_index] * XB - np.exp(XB)) 146 147 def loss_func( 148 self, beta, group_index, X, y, row_index=None, type_loss="gaussian", **kwargs 149 ): 150 res = { 151 "gaussian": self.gaussian_loss, 152 "laplace": self.laplace_loss, 153 "poisson": self.poisson_loss, 154 } 155 156 if row_index is None: 157 row_index = range(len(y)) 158 XB = self.compute_XB(X, beta=beta) 159 160 return res[type_loss](y, row_index, XB) + self.compute_penalty( 161 group_index=group_index, beta=beta 162 ) 163 164 XB = self.compute_XB(X, beta=beta, row_index=row_index) 165 166 return res[type_loss](y, row_index, XB) + self.compute_penalty( 167 group_index=group_index, beta=beta 168 ) 169 170 def fit(self, X, y, **kwargs): 171 """Fit GLM model to training data (X, y). 172 173 Args: 174 175 X: {array-like}, shape = [n_samples, n_features] 176 Training vectors, where n_samples is the number 177 of samples and n_features is the number of features. 178 179 y: array-like, shape = [n_samples] 180 Target values. 181 182 **kwargs: additional parameters to be passed to 183 self.cook_training_set or self.obj.fit 184 185 Returns: 186 187 self: object 188 189 """ 190 191 self.beta_ = None 192 193 self.n_iter = 0 194 195 n, self.group_index = X.shape 196 197 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 198 199 n_Z = scaled_Z.shape[0] 200 201 # initialization 202 beta_ = np.linalg.lstsq(scaled_Z, centered_y, rcond=None)[0] 203 204 # optimization 205 # fit(self, loss_func, response, x0, **kwargs): 206 # loss_func(self, beta, group_index, X, y, 207 # row_index=None, type_loss="gaussian", 208 # **kwargs) 209 self.optimizer.fit( 210 self.loss_func, 211 response=centered_y, 212 x0=beta_, 213 group_index=self.group_index, 214 X=scaled_Z, 215 y=centered_y, 216 type_loss=self.family, 217 **kwargs 218 ) 219 220 self.beta_ = self.optimizer.results[0] 221 222 return self 223 224 def predict(self, X, **kwargs): 225 """Predict test data X. 226 227 Args: 228 229 X: {array-like}, shape = [n_samples, n_features] 230 Training vectors, where n_samples is the number 231 of samples and n_features is the number of features. 232 233 **kwargs: additional parameters to be passed to 234 self.cook_test_set 235 236 Returns: 237 238 model predictions: {array-like} 239 240 """ 241 242 if len(X.shape) == 1: 243 n_features = X.shape[0] 244 new_X = mo.rbind( 245 X.reshape(1, n_features), 246 np.ones(n_features).reshape(1, n_features), 247 ) 248 249 return ( 250 self.y_mean_ + np.dot(self.cook_test_set(new_X, **kwargs), self.beta_) 251 )[0] 252 253 return self.y_mean_ + np.dot(self.cook_test_set(X, **kwargs), self.beta_) 254 255 def score(self, X, y, scoring=None): 256 """Compute the score of the model. 257 258 Parameters: 259 260 X: {array-like}, shape = [n_samples, n_features] 261 Training vectors, where n_samples is the number 262 of samples and n_features is the number of features. 263 264 y: array-like, shape = [n_samples] 265 Target values. 266 267 scoring: str 268 scoring method 269 270 Returns: 271 272 score: float 273 274 """ 275 276 if scoring is None: 277 return np.sqrt(np.mean((self.predict(X) - y) ** 2)) 278 279 return skm2.get_scorer(scoring)(self, X, y)
Generalized 'linear' models using quasi-randomized networks (regression)
Attributes:
n_hidden_features: int
number of nodes in the hidden layer
lambda1: float
regularization parameter for GLM coefficients on original features
alpha1: float
controls compromize between l1 and l2 norm of GLM coefficients on original features
lambda2: float
regularization parameter for GLM coefficients on nonlinear features
alpha2: float
controls compromize between l1 and l2 norm of GLM coefficients on nonlinear features
family: str
"gaussian", "laplace" or "poisson" (for now)
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not
(False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original predictors are included (True) in model's
fitting or not (False)
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
optimizer: object
optimizer, from class nnetsauce.utils.Optimizer
seed: int
reproducibility seed for nodes_sim=='uniform'
Attributes:
beta_: vector
regression coefficients
Examples:
See https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_regression.py
170 def fit(self, X, y, **kwargs): 171 """Fit GLM model to training data (X, y). 172 173 Args: 174 175 X: {array-like}, shape = [n_samples, n_features] 176 Training vectors, where n_samples is the number 177 of samples and n_features is the number of features. 178 179 y: array-like, shape = [n_samples] 180 Target values. 181 182 **kwargs: additional parameters to be passed to 183 self.cook_training_set or self.obj.fit 184 185 Returns: 186 187 self: object 188 189 """ 190 191 self.beta_ = None 192 193 self.n_iter = 0 194 195 n, self.group_index = X.shape 196 197 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 198 199 n_Z = scaled_Z.shape[0] 200 201 # initialization 202 beta_ = np.linalg.lstsq(scaled_Z, centered_y, rcond=None)[0] 203 204 # optimization 205 # fit(self, loss_func, response, x0, **kwargs): 206 # loss_func(self, beta, group_index, X, y, 207 # row_index=None, type_loss="gaussian", 208 # **kwargs) 209 self.optimizer.fit( 210 self.loss_func, 211 response=centered_y, 212 x0=beta_, 213 group_index=self.group_index, 214 X=scaled_Z, 215 y=centered_y, 216 type_loss=self.family, 217 **kwargs 218 ) 219 220 self.beta_ = self.optimizer.results[0] 221 222 return self
Fit GLM model to training data (X, y).
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
224 def predict(self, X, **kwargs): 225 """Predict test data X. 226 227 Args: 228 229 X: {array-like}, shape = [n_samples, n_features] 230 Training vectors, where n_samples is the number 231 of samples and n_features is the number of features. 232 233 **kwargs: additional parameters to be passed to 234 self.cook_test_set 235 236 Returns: 237 238 model predictions: {array-like} 239 240 """ 241 242 if len(X.shape) == 1: 243 n_features = X.shape[0] 244 new_X = mo.rbind( 245 X.reshape(1, n_features), 246 np.ones(n_features).reshape(1, n_features), 247 ) 248 249 return ( 250 self.y_mean_ + np.dot(self.cook_test_set(new_X, **kwargs), self.beta_) 251 )[0] 252 253 return self.y_mean_ + np.dot(self.cook_test_set(X, **kwargs), self.beta_)
Predict test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
255 def score(self, X, y, scoring=None): 256 """Compute the score of the model. 257 258 Parameters: 259 260 X: {array-like}, shape = [n_samples, n_features] 261 Training vectors, where n_samples is the number 262 of samples and n_features is the number of features. 263 264 y: array-like, shape = [n_samples] 265 Target values. 266 267 scoring: str 268 scoring method 269 270 Returns: 271 272 score: float 273 274 """ 275 276 if scoring is None: 277 return np.sqrt(np.mean((self.predict(X) - y) ** 2)) 278 279 return skm2.get_scorer(scoring)(self, X, y)
Compute the score of the model.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
scoring: str
scoring method
Returns:
score: float
18class KernelRidge(BaseEstimator, RegressorMixin): 19 """ 20 Kernel Ridge Regression with optional GPU support, Matérn kernels, and automatic input standardization. 21 22 Parameters: 23 - alpha: float 24 Regularization parameter. 25 - kernel: str 26 Kernel type ("linear", "rbf", or "matern"). 27 - gamma: float 28 Kernel coefficient for "rbf". Ignored for other kernels. 29 - nu: float 30 Smoothness parameter for the Matérn kernel. Default is 1.5. 31 - length_scale: float 32 Length scale parameter for the Matérn kernel. Default is 1.0. 33 - backend: str 34 "cpu" or "gpu" (uses JAX if "gpu"). 35 """ 36 37 def __init__( 38 self, 39 alpha=1.0, 40 kernel="rbf", 41 gamma=None, 42 nu=1.5, 43 length_scale=1.0, 44 backend="cpu", 45 ): 46 self.alpha = alpha 47 self.alpha_ = alpha 48 self.kernel = kernel 49 self.gamma = gamma 50 self.nu = nu 51 self.length_scale = length_scale 52 self.backend = backend 53 self.scaler = StandardScaler() 54 55 if backend == "gpu" and not JAX_AVAILABLE: 56 raise ImportError( 57 "JAX is not installed. Please install JAX to use the GPU backend." 58 ) 59 60 def _linear_kernel(self, X, Y): 61 return jnp.dot(X, Y.T) if self.backend == "gpu" else np.dot(X, Y.T) 62 63 def _rbf_kernel(self, X, Y): 64 if self.gamma is None: 65 self.gamma = 1.0 / X.shape[1] 66 if self.backend == "gpu": 67 sq_dists = ( 68 jnp.sum(X**2, axis=1)[:, None] 69 + jnp.sum(Y**2, axis=1) 70 - 2 * jnp.dot(X, Y.T) 71 ) 72 return jnp.exp(-self.gamma * sq_dists) 73 else: 74 sq_dists = ( 75 np.sum(X**2, axis=1)[:, None] 76 + np.sum(Y**2, axis=1) 77 - 2 * np.dot(X, Y.T) 78 ) 79 return np.exp(-self.gamma * sq_dists) 80 81 def _matern_kernel(self, X, Y): 82 """ 83 Compute the Matérn kernel using JAX for GPU or NumPy for CPU. 84 85 Parameters: 86 - X: array-like, shape (n_samples_X, n_features) 87 - Y: array-like, shape (n_samples_Y, n_features) 88 89 Returns: 90 - Kernel matrix, shape (n_samples_X, n_samples_Y) 91 """ 92 if self.backend == "gpu": 93 # Compute pairwise distances 94 dists = jnp.sqrt(jnp.sum((X[:, None, :] - Y[None, :, :]) ** 2, axis=2)) 95 scaled_dists = jnp.sqrt(2 * self.nu) * dists / self.length_scale 96 97 # Matérn kernel formula 98 coeff = (2 ** (1 - self.nu)) / jnp.exp(gammaln(self.nu)) 99 matern_kernel = coeff * (scaled_dists**self.nu) * kv(self.nu, scaled_dists) 100 matern_kernel = jnp.where( 101 dists == 0, 1.0, matern_kernel 102 ) # Handle the case where distance is 0 103 return matern_kernel 104 else: 105 # Use NumPy for CPU 106 from scipy.special import ( 107 gammaln, 108 kv, 109 ) # Ensure scipy.special is used for CPU 110 111 dists = np.sqrt(np.sum((X[:, None, :] - Y[None, :, :]) ** 2, axis=2)) 112 scaled_dists = np.sqrt(2 * self.nu) * dists / self.length_scale 113 114 # Matérn kernel formula 115 coeff = (2 ** (1 - self.nu)) / np.exp(gammaln(self.nu)) 116 matern_kernel = coeff * (scaled_dists**self.nu) * kv(self.nu, scaled_dists) 117 matern_kernel = np.where( 118 dists == 0, 1.0, matern_kernel 119 ) # Handle the case where distance is 0 120 return matern_kernel 121 122 def _get_kernel(self, X, Y): 123 if self.kernel == "linear": 124 return self._linear_kernel(X, Y) 125 elif self.kernel == "rbf": 126 return self._rbf_kernel(X, Y) 127 elif self.kernel == "matern": 128 return self._matern_kernel(X, Y) 129 else: 130 raise ValueError(f"Unsupported kernel: {self.kernel}") 131 132 def fit(self, X, y): 133 """ 134 Fit the Kernel Ridge Regression model. 135 136 Parameters: 137 - X: array-like, shape (n_samples, n_features) 138 Training data. 139 - y: array-like, shape (n_samples,) 140 Target values. 141 """ 142 # Standardize the inputs 143 X = self.scaler.fit_transform(X) 144 self.X_fit_ = X 145 146 # Center the response 147 self.y_mean_ = np.mean(y) 148 y_centered = y - self.y_mean_ 149 150 n_samples = X.shape[0] 151 152 # Compute the kernel matrix 153 K = self._get_kernel(X, X) 154 self.K_ = K 155 self.y_fit_ = y_centered 156 157 if isinstance(self.alpha, (list, np.ndarray)): 158 # If alpha is a list or array, compute LOOE for each alpha 159 self.alphas_ = self.alpha # Store the list of alphas 160 self.dual_coefs_ = [] # Store dual coefficients for each alpha 161 self.looe_ = [] # Store LOOE for each alpha 162 163 for alpha in self.alpha: 164 G = K + alpha * np.eye(n_samples) 165 G_inv = np.linalg.inv(G) 166 diag_G_inv = np.diag(G_inv) 167 dual_coef = np.linalg.solve(G, y_centered) 168 looe = np.sum((dual_coef / diag_G_inv) ** 2) # Compute LOOE 169 self.dual_coefs_.append(dual_coef) 170 self.looe_.append(looe) 171 172 # Select the best alpha based on the smallest LOOE 173 best_index = np.argmin(self.looe_) 174 self.alpha_ = self.alpha[best_index] 175 self.dual_coef_ = self.dual_coefs_[best_index] 176 else: 177 # If alpha is a single value, proceed as usual 178 if self.backend == "gpu": 179 self.dual_coef_ = jnp.linalg.solve( 180 K + self.alpha * jnp.eye(n_samples), y_centered 181 ) 182 else: 183 self.dual_coef_ = np.linalg.solve( 184 K + self.alpha * np.eye(n_samples), y_centered 185 ) 186 187 return self 188 189 def predict(self, X, probs=False): 190 """ 191 Predict using the Kernel Ridge Regression model. 192 193 Parameters: 194 - X: array-like, shape (n_samples, n_features) 195 Test data. 196 197 Returns: 198 - Predicted values, shape (n_samples,). 199 """ 200 # Standardize the inputs 201 X = self.scaler.transform(X) 202 K = self._get_kernel(X, self.X_fit_) 203 if self.backend == "gpu": 204 preds = jnp.dot(K, self.dual_coef_) + self.y_mean_ 205 if probs: 206 # Compute similarity to self.X_fit_ 207 similarities = jnp.dot( 208 preds, self.X_fit_.T 209 ) # Shape: (n_samples, n_fit_) 210 # Apply softmax to get probabilities 211 return jaxsoftmax(similarities, axis=1) 212 return preds 213 else: 214 preds = np.dot(K, self.dual_coef_) + self.y_mean_ 215 if probs: 216 # Compute similarity to self.X_fit_ 217 similarities = np.dot( 218 preds, self.X_fit_.T 219 ) # Shape: (n_samples, n_fit_) 220 # Apply softmax to get probabilities 221 return softmax(similarities, axis=1) 222 return preds 223 224 def partial_fit(self, X, y): 225 """ 226 Incrementally fit the Kernel Ridge Regression model with new data using a recursive approach. 227 228 Parameters: 229 - X: array-like, shape (n_samples, n_features) 230 New training data. 231 - y: array-like, shape (n_samples,) 232 New target values. 233 234 Returns: 235 - self: object 236 The updated model. 237 """ 238 # Standardize the inputs 239 X = ( 240 self.scaler.fit_transform(X) 241 if not hasattr(self, "X_fit_") 242 else self.scaler.transform(X) 243 ) 244 245 if not hasattr(self, "X_fit_"): 246 # Initialize with the first batch of data 247 self.X_fit_ = X 248 249 # Center the response 250 self.y_mean_ = np.mean(y) 251 y_centered = y - self.y_mean_ 252 self.y_fit_ = y_centered 253 254 n_samples = X.shape[0] 255 256 # Compute the kernel matrix for the initial data 257 self.K_ = self._get_kernel(X, X) 258 259 # Initialize dual coefficients for each alpha 260 if isinstance(self.alpha, (list, np.ndarray)): 261 self.dual_coefs_ = [np.zeros(n_samples) for _ in self.alpha] 262 else: 263 self.dual_coef_ = np.zeros(n_samples) 264 else: 265 # Incrementally update with new data 266 y_centered = y - self.y_mean_ # Center the new batch of responses 267 for x_new, y_new in zip(X, y_centered): 268 x_new = x_new.reshape(1, -1) # Ensure x_new is 2D 269 k_new = self._get_kernel(self.X_fit_, x_new).flatten() 270 271 # Compute the kernel value for the new data point 272 k_self = self._get_kernel(x_new, x_new).item() 273 274 if isinstance(self.alpha, (list, np.ndarray)): 275 # Update dual coefficients for each alpha 276 for idx, alpha in enumerate(self.alpha): 277 gamma_new = 1 / (k_self + alpha) 278 residual = y_new - np.dot(self.dual_coefs_[idx], k_new) 279 self.dual_coefs_[idx] = np.append( 280 self.dual_coefs_[idx], gamma_new * residual 281 ) 282 else: 283 # Update dual coefficients for a single alpha 284 gamma_new = 1 / (k_self + self.alpha) 285 residual = y_new - np.dot(self.dual_coef_, k_new) 286 self.dual_coef_ = np.append(self.dual_coef_, gamma_new * residual) 287 288 # Update the kernel matrix 289 self.K_ = np.block( 290 [[self.K_, k_new[:, None]], [k_new[None, :], np.array([[k_self]])]] 291 ) 292 293 # Update the stored data 294 self.X_fit_ = np.vstack([self.X_fit_, x_new]) 295 self.y_fit_ = np.append(self.y_fit_, y_new) 296 297 # Select the best alpha based on LOOE after the batch 298 if isinstance(self.alpha, (list, np.ndarray)): 299 self.looe_ = [] 300 for idx, alpha in enumerate(self.alpha): 301 G = self.K_ + alpha * np.eye(self.K_.shape[0]) 302 G_inv = np.linalg.inv(G) 303 diag_G_inv = np.diag(G_inv) 304 looe = np.sum((self.dual_coefs_[idx] / diag_G_inv) ** 2) 305 self.looe_.append(looe) 306 307 # Select the best alpha 308 best_index = np.argmin(self.looe_) 309 self.alpha_ = self.alpha[best_index] 310 self.dual_coef_ = self.dual_coefs_[best_index] 311 312 return self
Kernel Ridge Regression with optional GPU support, Matérn kernels, and automatic input standardization.
Parameters:
- alpha: float Regularization parameter.
- kernel: str Kernel type ("linear", "rbf", or "matern").
- gamma: float Kernel coefficient for "rbf". Ignored for other kernels.
- nu: float Smoothness parameter for the Matérn kernel. Default is 1.5.
- length_scale: float Length scale parameter for the Matérn kernel. Default is 1.0.
- backend: str "cpu" or "gpu" (uses JAX if "gpu").
132 def fit(self, X, y): 133 """ 134 Fit the Kernel Ridge Regression model. 135 136 Parameters: 137 - X: array-like, shape (n_samples, n_features) 138 Training data. 139 - y: array-like, shape (n_samples,) 140 Target values. 141 """ 142 # Standardize the inputs 143 X = self.scaler.fit_transform(X) 144 self.X_fit_ = X 145 146 # Center the response 147 self.y_mean_ = np.mean(y) 148 y_centered = y - self.y_mean_ 149 150 n_samples = X.shape[0] 151 152 # Compute the kernel matrix 153 K = self._get_kernel(X, X) 154 self.K_ = K 155 self.y_fit_ = y_centered 156 157 if isinstance(self.alpha, (list, np.ndarray)): 158 # If alpha is a list or array, compute LOOE for each alpha 159 self.alphas_ = self.alpha # Store the list of alphas 160 self.dual_coefs_ = [] # Store dual coefficients for each alpha 161 self.looe_ = [] # Store LOOE for each alpha 162 163 for alpha in self.alpha: 164 G = K + alpha * np.eye(n_samples) 165 G_inv = np.linalg.inv(G) 166 diag_G_inv = np.diag(G_inv) 167 dual_coef = np.linalg.solve(G, y_centered) 168 looe = np.sum((dual_coef / diag_G_inv) ** 2) # Compute LOOE 169 self.dual_coefs_.append(dual_coef) 170 self.looe_.append(looe) 171 172 # Select the best alpha based on the smallest LOOE 173 best_index = np.argmin(self.looe_) 174 self.alpha_ = self.alpha[best_index] 175 self.dual_coef_ = self.dual_coefs_[best_index] 176 else: 177 # If alpha is a single value, proceed as usual 178 if self.backend == "gpu": 179 self.dual_coef_ = jnp.linalg.solve( 180 K + self.alpha * jnp.eye(n_samples), y_centered 181 ) 182 else: 183 self.dual_coef_ = np.linalg.solve( 184 K + self.alpha * np.eye(n_samples), y_centered 185 ) 186 187 return self
Fit the Kernel Ridge Regression model.
Parameters:
- X: array-like, shape (n_samples, n_features) Training data.
- y: array-like, shape (n_samples,) Target values.
189 def predict(self, X, probs=False): 190 """ 191 Predict using the Kernel Ridge Regression model. 192 193 Parameters: 194 - X: array-like, shape (n_samples, n_features) 195 Test data. 196 197 Returns: 198 - Predicted values, shape (n_samples,). 199 """ 200 # Standardize the inputs 201 X = self.scaler.transform(X) 202 K = self._get_kernel(X, self.X_fit_) 203 if self.backend == "gpu": 204 preds = jnp.dot(K, self.dual_coef_) + self.y_mean_ 205 if probs: 206 # Compute similarity to self.X_fit_ 207 similarities = jnp.dot( 208 preds, self.X_fit_.T 209 ) # Shape: (n_samples, n_fit_) 210 # Apply softmax to get probabilities 211 return jaxsoftmax(similarities, axis=1) 212 return preds 213 else: 214 preds = np.dot(K, self.dual_coef_) + self.y_mean_ 215 if probs: 216 # Compute similarity to self.X_fit_ 217 similarities = np.dot( 218 preds, self.X_fit_.T 219 ) # Shape: (n_samples, n_fit_) 220 # Apply softmax to get probabilities 221 return softmax(similarities, axis=1) 222 return preds
Predict using the Kernel Ridge Regression model.
Parameters:
- X: array-like, shape (n_samples, n_features) Test data.
Returns:
- Predicted values, shape (n_samples,).
757class LazyClassifier(LazyDeepClassifier): 758 """ 759 Fitting -- almost -- all the classification algorithms with 760 nnetsauce's CustomClassifier and returning their scores (no layers). 761 762 Parameters: 763 764 verbose: int, optional (default=0) 765 Any positive number for verbosity. 766 767 ignore_warnings: bool, optional (default=True) 768 When set to True, the warning related to algorigms that are not able to run are ignored. 769 770 custom_metric: function, optional (default=None) 771 When function is provided, models are evaluated based on the custom evaluation metric provided. 772 773 predictions: bool, optional (default=False) 774 When set to True, the predictions of all the models models are returned as dataframe. 775 776 sort_by: string, optional (default='Accuracy') 777 Sort models by a metric. Available options are 'Accuracy', 'Balanced Accuracy', 'ROC AUC', 'F1 Score' 778 or a custom metric identified by its name and provided by custom_metric. 779 780 random_state: int, optional (default=42) 781 Reproducibiility seed. 782 783 estimators: list, optional (default='all') 784 list of Estimators names or just 'all' (default='all') 785 786 preprocess: bool 787 preprocessing is done when set to True 788 789 n_jobs : int, when possible, run in parallel 790 For now, only used by individual models that support it. 791 792 All the other parameters are the same as CustomClassifier's. 793 794 Attributes: 795 796 models_: dict-object 797 Returns a dictionary with each model pipeline as value 798 with key as name of models. 799 800 best_model_: object 801 Returns the best model pipeline based on the sort_by metric. 802 803 Examples: 804 805 import nnetsauce as ns 806 import numpy as np 807 from sklearn import datasets 808 from sklearn.utils import shuffle 809 810 dataset = datasets.load_iris() 811 X = dataset.data 812 y = dataset.target 813 X, y = shuffle(X, y, random_state=123) 814 X = X.astype(np.float32) 815 y = y.astype(np.float32) 816 X_train, X_test = X[:100], X[100:] 817 y_train, y_test = y[:100], y[100:] 818 819 clf = ns.LazyClassifier(verbose=0, ignore_warnings=True, custom_metric=None) 820 models, predictions = clf.fit(X_train, X_test, y_train, y_test) 821 model_dictionary = clf.provide_models(X_train,X_test,y_train,y_test) 822 print(models) 823 824 """ 825 826 def __init__( 827 self, 828 verbose=0, 829 ignore_warnings=True, 830 custom_metric=None, 831 predictions=False, 832 sort_by="Accuracy", 833 random_state=42, 834 estimators="all", 835 preprocess=False, 836 n_jobs=None, 837 # CustomClassifier attributes 838 obj=None, 839 n_hidden_features=5, 840 activation_name="relu", 841 a=0.01, 842 nodes_sim="sobol", 843 bias=True, 844 dropout=0, 845 direct_link=True, 846 n_clusters=2, 847 cluster_encode=True, 848 type_clust="kmeans", 849 type_scaling=("std", "std", "std"), 850 col_sample=1, 851 row_sample=1, 852 seed=123, 853 backend="cpu", 854 ): 855 super().__init__( 856 verbose=verbose, 857 ignore_warnings=ignore_warnings, 858 custom_metric=custom_metric, 859 predictions=predictions, 860 sort_by=sort_by, 861 random_state=random_state, 862 estimators=estimators, 863 preprocess=preprocess, 864 n_jobs=n_jobs, 865 n_layers=1, 866 obj=obj, 867 n_hidden_features=n_hidden_features, 868 activation_name=activation_name, 869 a=a, 870 nodes_sim=nodes_sim, 871 bias=bias, 872 dropout=dropout, 873 direct_link=direct_link, 874 n_clusters=n_clusters, 875 cluster_encode=cluster_encode, 876 type_clust=type_clust, 877 type_scaling=type_scaling, 878 col_sample=col_sample, 879 row_sample=row_sample, 880 seed=seed, 881 backend=backend, 882 )
Fitting -- almost -- all the classification algorithms with nnetsauce's CustomClassifier and returning their scores (no layers).
Parameters:
verbose: int, optional (default=0)
Any positive number for verbosity.
ignore_warnings: bool, optional (default=True)
When set to True, the warning related to algorigms that are not able to run are ignored.
custom_metric: function, optional (default=None)
When function is provided, models are evaluated based on the custom evaluation metric provided.
predictions: bool, optional (default=False)
When set to True, the predictions of all the models models are returned as dataframe.
sort_by: string, optional (default='Accuracy')
Sort models by a metric. Available options are 'Accuracy', 'Balanced Accuracy', 'ROC AUC', 'F1 Score'
or a custom metric identified by its name and provided by custom_metric.
random_state: int, optional (default=42)
Reproducibiility seed.
estimators: list, optional (default='all')
list of Estimators names or just 'all' (default='all')
preprocess: bool
preprocessing is done when set to True
n_jobs : int, when possible, run in parallel
For now, only used by individual models that support it.
All the other parameters are the same as CustomClassifier's.
Attributes:
models_: dict-object
Returns a dictionary with each model pipeline as value
with key as name of models.
best_model_: object
Returns the best model pipeline based on the sort_by metric.
Examples:
import nnetsauce as ns
import numpy as np
from sklearn import datasets
from sklearn.utils import shuffle
dataset = datasets.load_iris()
X = dataset.data
y = dataset.target
X, y = shuffle(X, y, random_state=123)
X = X.astype(np.float32)
y = y.astype(np.float32)
X_train, X_test = X[:100], X[100:]
y_train, y_test = y[:100], y[100:]
clf = ns.LazyClassifier(verbose=0, ignore_warnings=True, custom_metric=None)
models, predictions = clf.fit(X_train, X_test, y_train, y_test)
model_dictionary = clf.provide_models(X_train,X_test,y_train,y_test)
print(models)
646class LazyRegressor(LazyDeepRegressor): 647 """ 648 Fitting -- almost -- all the regression algorithms with 649 nnetsauce's CustomRegressor and returning their scores. 650 651 Parameters: 652 653 verbose: int, optional (default=0) 654 Any positive number for verbosity. 655 656 ignore_warnings: bool, optional (default=True) 657 When set to True, the warning related to algorigms that are not able to run are ignored. 658 659 custom_metric: function, optional (default=None) 660 When function is provided, models are evaluated based on the custom evaluation metric provided. 661 662 predictions: bool, optional (default=False) 663 When set to True, the predictions of all the models models are returned as dataframe. 664 665 sort_by: string, optional (default='RMSE') 666 Sort models by a metric. Available options are 'R-Squared', 'Adjusted R-Squared', 'RMSE', 'Time Taken' and 'Custom Metric'. 667 or a custom metric identified by its name and provided by custom_metric. 668 669 random_state: int, optional (default=42) 670 Reproducibiility seed. 671 672 estimators: list, optional (default='all') 673 list of Estimators names or just 'all' (default='all') 674 675 preprocess: bool 676 preprocessing is done when set to True 677 678 n_jobs : int, when possible, run in parallel 679 For now, only used by individual models that support it. 680 681 All the other parameters are the same as CustomRegressor's. 682 683 Attributes: 684 685 models_: dict-object 686 Returns a dictionary with each model pipeline as value 687 with key as name of models. 688 689 best_model_: object 690 Returns the best model pipeline based on the sort_by metric. 691 692 Examples: 693 694 import nnetsauce as ns 695 import numpy as np 696 from sklearn import datasets 697 from sklearn.utils import shuffle 698 699 diabetes = datasets.load_diabetes() 700 X, y = shuffle(diabetes.data, diabetes.target, random_state=13) 701 X = X.astype(np.float32) 702 703 offset = int(X.shape[0] * 0.9) 704 X_train, y_train = X[:offset], y[:offset] 705 X_test, y_test = X[offset:], y[offset:] 706 707 reg = ns.LazyRegressor(verbose=0, ignore_warnings=False, 708 custom_metric=None) 709 models, predictions = reg.fit(X_train, X_test, y_train, y_test) 710 print(models) 711 712 """ 713 714 def __init__( 715 self, 716 verbose=0, 717 ignore_warnings=True, 718 custom_metric=None, 719 predictions=False, 720 sort_by="RMSE", 721 random_state=42, 722 estimators="all", 723 preprocess=False, 724 n_jobs=None, 725 # CustomRegressor attributes 726 obj=None, 727 n_hidden_features=5, 728 activation_name="relu", 729 a=0.01, 730 nodes_sim="sobol", 731 bias=True, 732 dropout=0, 733 direct_link=True, 734 n_clusters=2, 735 cluster_encode=True, 736 type_clust="kmeans", 737 type_scaling=("std", "std", "std"), 738 col_sample=1, 739 row_sample=1, 740 seed=123, 741 backend="cpu", 742 ): 743 super().__init__( 744 verbose=verbose, 745 ignore_warnings=ignore_warnings, 746 custom_metric=custom_metric, 747 predictions=predictions, 748 sort_by=sort_by, 749 random_state=random_state, 750 estimators=estimators, 751 preprocess=preprocess, 752 n_jobs=n_jobs, 753 n_layers=1, 754 obj=obj, 755 n_hidden_features=n_hidden_features, 756 activation_name=activation_name, 757 a=a, 758 nodes_sim=nodes_sim, 759 bias=bias, 760 dropout=dropout, 761 direct_link=direct_link, 762 n_clusters=n_clusters, 763 cluster_encode=cluster_encode, 764 type_clust=type_clust, 765 type_scaling=type_scaling, 766 col_sample=col_sample, 767 row_sample=row_sample, 768 seed=seed, 769 backend=backend, 770 )
Fitting -- almost -- all the regression algorithms with nnetsauce's CustomRegressor and returning their scores.
Parameters:
verbose: int, optional (default=0)
Any positive number for verbosity.
ignore_warnings: bool, optional (default=True)
When set to True, the warning related to algorigms that are not able to run are ignored.
custom_metric: function, optional (default=None)
When function is provided, models are evaluated based on the custom evaluation metric provided.
predictions: bool, optional (default=False)
When set to True, the predictions of all the models models are returned as dataframe.
sort_by: string, optional (default='RMSE')
Sort models by a metric. Available options are 'R-Squared', 'Adjusted R-Squared', 'RMSE', 'Time Taken' and 'Custom Metric'.
or a custom metric identified by its name and provided by custom_metric.
random_state: int, optional (default=42)
Reproducibiility seed.
estimators: list, optional (default='all')
list of Estimators names or just 'all' (default='all')
preprocess: bool
preprocessing is done when set to True
n_jobs : int, when possible, run in parallel
For now, only used by individual models that support it.
All the other parameters are the same as CustomRegressor's.
Attributes:
models_: dict-object
Returns a dictionary with each model pipeline as value
with key as name of models.
best_model_: object
Returns the best model pipeline based on the sort_by metric.
Examples:
import nnetsauce as ns
import numpy as np
from sklearn import datasets
from sklearn.utils import shuffle
diabetes = datasets.load_diabetes()
X, y = shuffle(diabetes.data, diabetes.target, random_state=13)
X = X.astype(np.float32)
offset = int(X.shape[0] * 0.9)
X_train, y_train = X[:offset], y[:offset]
X_test, y_test = X[offset:], y[offset:]
reg = ns.LazyRegressor(verbose=0, ignore_warnings=False,
custom_metric=None)
models, predictions = reg.fit(X_train, X_test, y_train, y_test)
print(models)
94class LazyDeepClassifier(Custom, ClassifierMixin): 95 """ 96 97 Fitting -- almost -- all the classification algorithms with layers of 98 nnetsauce's CustomClassifier and returning their scores. 99 100 Parameters: 101 102 verbose: int, optional (default=0) 103 Any positive number for verbosity. 104 105 ignore_warnings: bool, optional (default=True) 106 When set to True, the warning related to algorigms that are not 107 able to run are ignored. 108 109 custom_metric: function, optional (default=None) 110 When function is provided, models are evaluated based on the custom 111 evaluation metric provided. 112 113 predictions: bool, optional (default=False) 114 When set to True, the predictions of all the models models are 115 returned as data frame. 116 117 sort_by: string, optional (default='Accuracy') 118 Sort models by a metric. Available options are 'Accuracy', 119 'Balanced Accuracy', 'ROC AUC', 'F1 Score' or a custom metric 120 identified by its name and provided by custom_metric. 121 122 random_state: int, optional (default=42) 123 Reproducibiility seed. 124 125 estimators: list, optional (default='all') 126 list of Estimators names or just 'all' for > 90 classifiers 127 (default='all') 128 129 preprocess: bool, preprocessing is done when set to True 130 131 n_jobs: int, when possible, run in parallel 132 For now, only used by individual models that support it. 133 134 n_layers: int, optional (default=3) 135 Number of layers of CustomClassifiers to be used. 136 137 All the other parameters are the same as CustomClassifier's. 138 139 Attributes: 140 141 models_: dict-object 142 Returns a dictionary with each model pipeline as value 143 with key as name of models. 144 145 best_model_: object 146 Returns the best model pipeline. 147 148 Examples 149 150 ```python 151 import nnetsauce as ns 152 from sklearn.datasets import load_breast_cancer 153 from sklearn.model_selection import train_test_split 154 data = load_breast_cancer() 155 X = data.data 156 y= data.target 157 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, 158 random_state=123) 159 clf = ns.LazyDeepClassifier(verbose=0, ignore_warnings=True, custom_metric=None) 160 models, predictions = clf.fit(X_train, X_test, y_train, y_test) 161 model_dictionary = clf.provide_models(X_train,X_test,y_train,y_test) 162 print(models) 163 ``` 164 165 """ 166 167 def __init__( 168 self, 169 verbose=0, 170 ignore_warnings=True, 171 custom_metric=None, 172 predictions=False, 173 sort_by="Accuracy", 174 random_state=42, 175 estimators="all", 176 preprocess=False, 177 n_jobs=None, 178 # Defining depth 179 n_layers=3, 180 # CustomClassifier attributes 181 obj=None, 182 n_hidden_features=5, 183 activation_name="relu", 184 a=0.01, 185 nodes_sim="sobol", 186 bias=True, 187 dropout=0, 188 direct_link=True, 189 n_clusters=2, 190 cluster_encode=True, 191 type_clust="kmeans", 192 type_scaling=("std", "std", "std"), 193 col_sample=1, 194 row_sample=1, 195 seed=123, 196 backend="cpu", 197 ): 198 self.verbose = verbose 199 self.ignore_warnings = ignore_warnings 200 self.custom_metric = custom_metric 201 self.predictions = predictions 202 self.sort_by = sort_by 203 self.models_ = {} 204 self.best_model_ = None 205 self.random_state = random_state 206 self.estimators = estimators 207 self.preprocess = preprocess 208 self.n_layers = n_layers - 1 209 self.n_jobs = n_jobs 210 super().__init__( 211 obj=obj, 212 n_hidden_features=n_hidden_features, 213 activation_name=activation_name, 214 a=a, 215 nodes_sim=nodes_sim, 216 bias=bias, 217 dropout=dropout, 218 direct_link=direct_link, 219 n_clusters=n_clusters, 220 cluster_encode=cluster_encode, 221 type_clust=type_clust, 222 type_scaling=type_scaling, 223 col_sample=col_sample, 224 row_sample=row_sample, 225 seed=seed, 226 backend=backend, 227 ) 228 229 def fit(self, X_train, X_test, y_train, y_test): 230 """Fit classifiers to X_train and y_train, predict and score on X_test, 231 y_test. 232 233 Parameters: 234 235 X_train: array-like, 236 Training vectors, where rows is the number of samples 237 and columns is the number of features. 238 239 X_test: array-like, 240 Testing vectors, where rows is the number of samples 241 and columns is the number of features. 242 243 y_train: array-like, 244 Training vectors, where rows is the number of samples 245 and columns is the number of features. 246 247 y_test: array-like, 248 Testing vectors, where rows is the number of samples 249 and columns is the number of features. 250 251 Returns: 252 253 scores: Pandas DataFrame 254 Returns metrics of all the models in a Pandas DataFrame. 255 256 predictions: Pandas DataFrame 257 Returns predictions of all the models in a Pandas DataFrame. 258 """ 259 Accuracy = [] 260 B_Accuracy = [] 261 ROC_AUC = [] 262 F1 = [] 263 names = [] 264 TIME = [] 265 predictions = {} 266 267 if self.custom_metric is not None: 268 CUSTOM_METRIC = [] 269 270 if isinstance(X_train, np.ndarray): 271 X_train = pd.DataFrame(X_train) 272 X_test = pd.DataFrame(X_test) 273 274 numeric_features = X_train.select_dtypes(include=[np.number]).columns 275 categorical_features = X_train.select_dtypes(include=["object"]).columns 276 277 categorical_low, categorical_high = get_card_split( 278 X_train, categorical_features 279 ) 280 281 if self.preprocess is True: 282 preprocessor = ColumnTransformer( 283 transformers=[ 284 ("numeric", numeric_transformer, numeric_features), 285 ( 286 "categorical_low", 287 categorical_transformer_low, 288 categorical_low, 289 ), 290 ( 291 "categorical_high", 292 categorical_transformer_high, 293 categorical_high, 294 ), 295 ] 296 ) 297 298 # baseline models 299 try: 300 baseline_names = ["RandomForestClassifier", "XGBClassifier"] 301 baseline_models = [RandomForestClassifier(), xgb.XGBClassifier()] 302 except Exception as exception: 303 baseline_names = ["RandomForestClassifier"] 304 baseline_models = [RandomForestClassifier()] 305 306 for name, model in zip(baseline_names, baseline_models): 307 start = time.time() 308 try: 309 model.fit(X_train, y_train) 310 self.models_[name] = model 311 y_pred = model.predict(X_test) 312 accuracy = accuracy_score(y_test, y_pred, normalize=True) 313 b_accuracy = balanced_accuracy_score(y_test, y_pred) 314 f1 = f1_score(y_test, y_pred, average="weighted") 315 try: 316 roc_auc = roc_auc_score(y_test, y_pred) 317 except Exception as exception: 318 roc_auc = None 319 if self.ignore_warnings is False: 320 print("ROC AUC couldn't be calculated for " + name) 321 print(exception) 322 names.append(name) 323 Accuracy.append(accuracy) 324 B_Accuracy.append(b_accuracy) 325 ROC_AUC.append(roc_auc) 326 F1.append(f1) 327 TIME.append(time.time() - start) 328 if self.custom_metric is not None: 329 custom_metric = self.custom_metric(y_test, y_pred) 330 CUSTOM_METRIC.append(custom_metric) 331 if self.verbose > 0: 332 if self.custom_metric is not None: 333 print( 334 { 335 "Model": name, 336 "Accuracy": accuracy, 337 "Balanced Accuracy": b_accuracy, 338 "ROC AUC": roc_auc, 339 "F1 Score": f1, 340 self.custom_metric.__name__: custom_metric, 341 "Time taken": time.time() - start, 342 } 343 ) 344 else: 345 print( 346 { 347 "Model": name, 348 "Accuracy": accuracy, 349 "Balanced Accuracy": b_accuracy, 350 "ROC AUC": roc_auc, 351 "F1 Score": f1, 352 "Time taken": time.time() - start, 353 } 354 ) 355 if self.predictions: 356 predictions[name] = y_pred 357 except Exception as exception: 358 if self.ignore_warnings is False: 359 print(name + " model failed to execute") 360 print(exception) 361 362 if self.estimators == "all": 363 self.classifiers = [ 364 item 365 for sublist in [ 366 DEEPCLASSIFIERS, 367 DEEPMULTITASKCLASSIFIERS, 368 DEEPSIMPLEMULTITASKCLASSIFIERS, 369 ] 370 for item in sublist 371 ] 372 else: 373 self.classifiers = ( 374 [ 375 ("DeepCustomClassifier(" + est[0] + ")", est[1]) 376 for est in all_estimators() 377 if ( 378 issubclass(est[1], ClassifierMixin) 379 and (est[0] in self.estimators) 380 ) 381 ] 382 + [ 383 ( 384 "DeepMultitaskClassifier(" + est[0] + ")", 385 partial(MultitaskClassifier, obj=est[1]()), 386 ) 387 for est in all_estimators() 388 if ( 389 issubclass(est[1], RegressorMixin) 390 and (est[0] in self.estimators) 391 ) 392 ] 393 + [ 394 ( 395 "DeepSimpleMultitaskClassifier(" + est[0] + ")", 396 partial(SimpleMultitaskClassifier, obj=est[1]()), 397 ) 398 for est in all_estimators() 399 if ( 400 issubclass(est[1], RegressorMixin) 401 and (est[0] in self.estimators) 402 ) 403 ] 404 ) 405 406 if self.preprocess is True: 407 408 for name, model in tqdm(self.classifiers): # do parallel exec 409 410 other_args = {} # use this trick for `random_state` too --> refactor 411 try: 412 if ( 413 "n_jobs" in model().get_params().keys() 414 and name.find("LogisticRegression") == -1 415 ): 416 other_args["n_jobs"] = self.n_jobs 417 except Exception: 418 pass 419 420 start = time.time() 421 422 try: 423 if "random_state" in model().get_params().keys(): 424 layer_clf = CustomClassifier( 425 obj=model(random_state=self.random_state), 426 n_hidden_features=self.n_hidden_features, 427 activation_name=self.activation_name, 428 a=self.a, 429 nodes_sim=self.nodes_sim, 430 bias=self.bias, 431 dropout=self.dropout, 432 direct_link=self.direct_link, 433 n_clusters=self.n_clusters, 434 cluster_encode=self.cluster_encode, 435 type_clust=self.type_clust, 436 type_scaling=self.type_scaling, 437 col_sample=self.col_sample, 438 row_sample=self.row_sample, 439 seed=self.seed, 440 backend=self.backend, 441 cv_calibration=None, 442 ) 443 444 else: 445 layer_clf = CustomClassifier( 446 obj=model(), 447 n_hidden_features=self.n_hidden_features, 448 activation_name=self.activation_name, 449 a=self.a, 450 nodes_sim=self.nodes_sim, 451 bias=self.bias, 452 dropout=self.dropout, 453 direct_link=self.direct_link, 454 n_clusters=self.n_clusters, 455 cluster_encode=self.cluster_encode, 456 type_clust=self.type_clust, 457 type_scaling=self.type_scaling, 458 col_sample=self.col_sample, 459 row_sample=self.row_sample, 460 seed=self.seed, 461 backend=self.backend, 462 cv_calibration=None, 463 ) 464 465 layer_clf.fit(X_train, y_train) 466 467 for _ in range(self.n_layers): 468 layer_clf = deepcopy( 469 CustomClassifier( 470 obj=layer_clf, 471 n_hidden_features=self.n_hidden_features, 472 activation_name=self.activation_name, 473 a=self.a, 474 nodes_sim=self.nodes_sim, 475 bias=self.bias, 476 dropout=self.dropout, 477 direct_link=self.direct_link, 478 n_clusters=self.n_clusters, 479 cluster_encode=self.cluster_encode, 480 type_clust=self.type_clust, 481 type_scaling=self.type_scaling, 482 col_sample=self.col_sample, 483 row_sample=self.row_sample, 484 seed=self.seed, 485 backend=self.backend, 486 cv_calibration=None, 487 ) 488 ) 489 490 pipe = Pipeline( 491 [ 492 ("preprocessor", preprocessor), 493 ("classifier", layer_clf), 494 ] 495 ) 496 497 pipe.fit(X_train, y_train) 498 self.models_[name] = pipe 499 y_pred = pipe.predict(X_test) 500 accuracy = accuracy_score(y_test, y_pred, normalize=True) 501 b_accuracy = balanced_accuracy_score(y_test, y_pred) 502 f1 = f1_score(y_test, y_pred, average="weighted") 503 try: 504 roc_auc = roc_auc_score(y_test, y_pred) 505 except Exception as exception: 506 roc_auc = None 507 if self.ignore_warnings is False: 508 print("ROC AUC couldn't be calculated for " + name) 509 print(exception) 510 names.append(name) 511 Accuracy.append(accuracy) 512 B_Accuracy.append(b_accuracy) 513 ROC_AUC.append(roc_auc) 514 F1.append(f1) 515 TIME.append(time.time() - start) 516 if self.custom_metric is not None: 517 custom_metric = self.custom_metric(y_test, y_pred) 518 CUSTOM_METRIC.append(custom_metric) 519 if self.verbose > 0: 520 if self.custom_metric is not None: 521 print( 522 { 523 "Model": name, 524 "Accuracy": accuracy, 525 "Balanced Accuracy": b_accuracy, 526 "ROC AUC": roc_auc, 527 "F1 Score": f1, 528 self.custom_metric.__name__: custom_metric, 529 "Time taken": time.time() - start, 530 } 531 ) 532 else: 533 print( 534 { 535 "Model": name, 536 "Accuracy": accuracy, 537 "Balanced Accuracy": b_accuracy, 538 "ROC AUC": roc_auc, 539 "F1 Score": f1, 540 "Time taken": time.time() - start, 541 } 542 ) 543 if self.predictions: 544 predictions[name] = y_pred 545 except Exception as exception: 546 if self.ignore_warnings is False: 547 print(name + " model failed to execute") 548 print(exception) 549 550 else: # no preprocessing 551 552 for name, model in tqdm(self.classifiers): # do parallel exec 553 start = time.time() 554 try: 555 if "random_state" in model().get_params().keys(): 556 layer_clf = CustomClassifier( 557 obj=model(random_state=self.random_state), 558 n_hidden_features=self.n_hidden_features, 559 activation_name=self.activation_name, 560 a=self.a, 561 nodes_sim=self.nodes_sim, 562 bias=self.bias, 563 dropout=self.dropout, 564 direct_link=self.direct_link, 565 n_clusters=self.n_clusters, 566 cluster_encode=self.cluster_encode, 567 type_clust=self.type_clust, 568 type_scaling=self.type_scaling, 569 col_sample=self.col_sample, 570 row_sample=self.row_sample, 571 seed=self.seed, 572 backend=self.backend, 573 cv_calibration=None, 574 ) 575 576 else: 577 layer_clf = CustomClassifier( 578 obj=model(), 579 n_hidden_features=self.n_hidden_features, 580 activation_name=self.activation_name, 581 a=self.a, 582 nodes_sim=self.nodes_sim, 583 bias=self.bias, 584 dropout=self.dropout, 585 direct_link=self.direct_link, 586 n_clusters=self.n_clusters, 587 cluster_encode=self.cluster_encode, 588 type_clust=self.type_clust, 589 type_scaling=self.type_scaling, 590 col_sample=self.col_sample, 591 row_sample=self.row_sample, 592 seed=self.seed, 593 backend=self.backend, 594 cv_calibration=None, 595 ) 596 597 layer_clf.fit(X_train, y_train) 598 599 for _ in range(self.n_layers): 600 layer_clf = deepcopy( 601 CustomClassifier( 602 obj=layer_clf, 603 n_hidden_features=self.n_hidden_features, 604 activation_name=self.activation_name, 605 a=self.a, 606 nodes_sim=self.nodes_sim, 607 bias=self.bias, 608 dropout=self.dropout, 609 direct_link=self.direct_link, 610 n_clusters=self.n_clusters, 611 cluster_encode=self.cluster_encode, 612 type_clust=self.type_clust, 613 type_scaling=self.type_scaling, 614 col_sample=self.col_sample, 615 row_sample=self.row_sample, 616 seed=self.seed, 617 backend=self.backend, 618 cv_calibration=None, 619 ) 620 ) 621 622 # layer_clf.fit(X_train, y_train) 623 624 layer_clf.fit(X_train, y_train) 625 626 self.models_[name] = layer_clf 627 y_pred = layer_clf.predict(X_test) 628 accuracy = accuracy_score(y_test, y_pred, normalize=True) 629 b_accuracy = balanced_accuracy_score(y_test, y_pred) 630 f1 = f1_score(y_test, y_pred, average="weighted") 631 try: 632 roc_auc = roc_auc_score(y_test, y_pred) 633 except Exception as exception: 634 roc_auc = None 635 if self.ignore_warnings is False: 636 print("ROC AUC couldn't be calculated for " + name) 637 print(exception) 638 names.append(name) 639 Accuracy.append(accuracy) 640 B_Accuracy.append(b_accuracy) 641 ROC_AUC.append(roc_auc) 642 F1.append(f1) 643 TIME.append(time.time() - start) 644 if self.custom_metric is not None: 645 custom_metric = self.custom_metric(y_test, y_pred) 646 CUSTOM_METRIC.append(custom_metric) 647 if self.verbose > 0: 648 if self.custom_metric is not None: 649 print( 650 { 651 "Model": name, 652 "Accuracy": accuracy, 653 "Balanced Accuracy": b_accuracy, 654 "ROC AUC": roc_auc, 655 "F1 Score": f1, 656 self.custom_metric.__name__: custom_metric, 657 "Time taken": time.time() - start, 658 } 659 ) 660 else: 661 print( 662 { 663 "Model": name, 664 "Accuracy": accuracy, 665 "Balanced Accuracy": b_accuracy, 666 "ROC AUC": roc_auc, 667 "F1 Score": f1, 668 "Time taken": time.time() - start, 669 } 670 ) 671 if self.predictions: 672 predictions[name] = y_pred 673 except Exception as exception: 674 if self.ignore_warnings is False: 675 print(name + " model failed to execute") 676 print(exception) 677 678 if self.custom_metric is None: 679 scores = pd.DataFrame( 680 { 681 "Model": names, 682 "Accuracy": Accuracy, 683 "Balanced Accuracy": B_Accuracy, 684 "ROC AUC": ROC_AUC, 685 "F1 Score": F1, 686 "Time Taken": TIME, 687 } 688 ) 689 else: 690 scores = pd.DataFrame( 691 { 692 "Model": names, 693 "Accuracy": Accuracy, 694 "Balanced Accuracy": B_Accuracy, 695 "ROC AUC": ROC_AUC, 696 "F1 Score": F1, 697 "Custom metric": CUSTOM_METRIC, 698 "Time Taken": TIME, 699 } 700 ) 701 scores = scores.sort_values(by=self.sort_by, ascending=False).set_index("Model") 702 703 self.best_model_ = self.models_[scores.index[0]] 704 705 if self.predictions is True: 706 707 return scores, predictions 708 709 return scores 710 711 def get_best_model(self): 712 """ 713 This function returns the best model pipeline based on the sort_by metric. 714 715 Returns: 716 717 best_model: object, 718 Returns the best model pipeline based on the sort_by metric. 719 720 """ 721 return self.best_model_ 722 723 def provide_models(self, X_train, X_test, y_train, y_test): 724 """Returns all the model objects trained. If fit hasn't been called yet, 725 then it's called to return the models. 726 727 Parameters: 728 729 X_train: array-like, 730 Training vectors, where rows is the number of samples 731 and columns is the number of features. 732 733 X_test: array-like, 734 Testing vectors, where rows is the number of samples 735 and columns is the number of features. 736 737 y_train: array-like, 738 Training vectors, where rows is the number of samples 739 and columns is the number of features. 740 741 y_test: array-like, 742 Testing vectors, where rows is the number of samples 743 and columns is the number of features. 744 745 Returns: 746 747 models: dict-object, 748 Returns a dictionary with each model's pipeline as value 749 and key = name of the model. 750 """ 751 if len(self.models_.keys()) == 0: 752 self.fit(X_train, X_test, y_train, y_test) 753 754 return self.models_
Fitting -- almost -- all the classification algorithms with layers of nnetsauce's CustomClassifier and returning their scores.
Parameters:
verbose: int, optional (default=0)
Any positive number for verbosity.
ignore_warnings: bool, optional (default=True)
When set to True, the warning related to algorigms that are not
able to run are ignored.
custom_metric: function, optional (default=None)
When function is provided, models are evaluated based on the custom
evaluation metric provided.
predictions: bool, optional (default=False)
When set to True, the predictions of all the models models are
returned as data frame.
sort_by: string, optional (default='Accuracy')
Sort models by a metric. Available options are 'Accuracy',
'Balanced Accuracy', 'ROC AUC', 'F1 Score' or a custom metric
identified by its name and provided by custom_metric.
random_state: int, optional (default=42)
Reproducibiility seed.
estimators: list, optional (default='all')
list of Estimators names or just 'all' for > 90 classifiers
(default='all')
preprocess: bool, preprocessing is done when set to True
n_jobs: int, when possible, run in parallel
For now, only used by individual models that support it.
n_layers: int, optional (default=3)
Number of layers of CustomClassifiers to be used.
All the other parameters are the same as CustomClassifier's.
Attributes:
models_: dict-object
Returns a dictionary with each model pipeline as value
with key as name of models.
best_model_: object
Returns the best model pipeline.
Examples
import nnetsauce as ns
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
data = load_breast_cancer()
X = data.data
y= data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2,
random_state=123)
clf = ns.LazyDeepClassifier(verbose=0, ignore_warnings=True, custom_metric=None)
models, predictions = clf.fit(X_train, X_test, y_train, y_test)
model_dictionary = clf.provide_models(X_train,X_test,y_train,y_test)
print(models)
229 def fit(self, X_train, X_test, y_train, y_test): 230 """Fit classifiers to X_train and y_train, predict and score on X_test, 231 y_test. 232 233 Parameters: 234 235 X_train: array-like, 236 Training vectors, where rows is the number of samples 237 and columns is the number of features. 238 239 X_test: array-like, 240 Testing vectors, where rows is the number of samples 241 and columns is the number of features. 242 243 y_train: array-like, 244 Training vectors, where rows is the number of samples 245 and columns is the number of features. 246 247 y_test: array-like, 248 Testing vectors, where rows is the number of samples 249 and columns is the number of features. 250 251 Returns: 252 253 scores: Pandas DataFrame 254 Returns metrics of all the models in a Pandas DataFrame. 255 256 predictions: Pandas DataFrame 257 Returns predictions of all the models in a Pandas DataFrame. 258 """ 259 Accuracy = [] 260 B_Accuracy = [] 261 ROC_AUC = [] 262 F1 = [] 263 names = [] 264 TIME = [] 265 predictions = {} 266 267 if self.custom_metric is not None: 268 CUSTOM_METRIC = [] 269 270 if isinstance(X_train, np.ndarray): 271 X_train = pd.DataFrame(X_train) 272 X_test = pd.DataFrame(X_test) 273 274 numeric_features = X_train.select_dtypes(include=[np.number]).columns 275 categorical_features = X_train.select_dtypes(include=["object"]).columns 276 277 categorical_low, categorical_high = get_card_split( 278 X_train, categorical_features 279 ) 280 281 if self.preprocess is True: 282 preprocessor = ColumnTransformer( 283 transformers=[ 284 ("numeric", numeric_transformer, numeric_features), 285 ( 286 "categorical_low", 287 categorical_transformer_low, 288 categorical_low, 289 ), 290 ( 291 "categorical_high", 292 categorical_transformer_high, 293 categorical_high, 294 ), 295 ] 296 ) 297 298 # baseline models 299 try: 300 baseline_names = ["RandomForestClassifier", "XGBClassifier"] 301 baseline_models = [RandomForestClassifier(), xgb.XGBClassifier()] 302 except Exception as exception: 303 baseline_names = ["RandomForestClassifier"] 304 baseline_models = [RandomForestClassifier()] 305 306 for name, model in zip(baseline_names, baseline_models): 307 start = time.time() 308 try: 309 model.fit(X_train, y_train) 310 self.models_[name] = model 311 y_pred = model.predict(X_test) 312 accuracy = accuracy_score(y_test, y_pred, normalize=True) 313 b_accuracy = balanced_accuracy_score(y_test, y_pred) 314 f1 = f1_score(y_test, y_pred, average="weighted") 315 try: 316 roc_auc = roc_auc_score(y_test, y_pred) 317 except Exception as exception: 318 roc_auc = None 319 if self.ignore_warnings is False: 320 print("ROC AUC couldn't be calculated for " + name) 321 print(exception) 322 names.append(name) 323 Accuracy.append(accuracy) 324 B_Accuracy.append(b_accuracy) 325 ROC_AUC.append(roc_auc) 326 F1.append(f1) 327 TIME.append(time.time() - start) 328 if self.custom_metric is not None: 329 custom_metric = self.custom_metric(y_test, y_pred) 330 CUSTOM_METRIC.append(custom_metric) 331 if self.verbose > 0: 332 if self.custom_metric is not None: 333 print( 334 { 335 "Model": name, 336 "Accuracy": accuracy, 337 "Balanced Accuracy": b_accuracy, 338 "ROC AUC": roc_auc, 339 "F1 Score": f1, 340 self.custom_metric.__name__: custom_metric, 341 "Time taken": time.time() - start, 342 } 343 ) 344 else: 345 print( 346 { 347 "Model": name, 348 "Accuracy": accuracy, 349 "Balanced Accuracy": b_accuracy, 350 "ROC AUC": roc_auc, 351 "F1 Score": f1, 352 "Time taken": time.time() - start, 353 } 354 ) 355 if self.predictions: 356 predictions[name] = y_pred 357 except Exception as exception: 358 if self.ignore_warnings is False: 359 print(name + " model failed to execute") 360 print(exception) 361 362 if self.estimators == "all": 363 self.classifiers = [ 364 item 365 for sublist in [ 366 DEEPCLASSIFIERS, 367 DEEPMULTITASKCLASSIFIERS, 368 DEEPSIMPLEMULTITASKCLASSIFIERS, 369 ] 370 for item in sublist 371 ] 372 else: 373 self.classifiers = ( 374 [ 375 ("DeepCustomClassifier(" + est[0] + ")", est[1]) 376 for est in all_estimators() 377 if ( 378 issubclass(est[1], ClassifierMixin) 379 and (est[0] in self.estimators) 380 ) 381 ] 382 + [ 383 ( 384 "DeepMultitaskClassifier(" + est[0] + ")", 385 partial(MultitaskClassifier, obj=est[1]()), 386 ) 387 for est in all_estimators() 388 if ( 389 issubclass(est[1], RegressorMixin) 390 and (est[0] in self.estimators) 391 ) 392 ] 393 + [ 394 ( 395 "DeepSimpleMultitaskClassifier(" + est[0] + ")", 396 partial(SimpleMultitaskClassifier, obj=est[1]()), 397 ) 398 for est in all_estimators() 399 if ( 400 issubclass(est[1], RegressorMixin) 401 and (est[0] in self.estimators) 402 ) 403 ] 404 ) 405 406 if self.preprocess is True: 407 408 for name, model in tqdm(self.classifiers): # do parallel exec 409 410 other_args = {} # use this trick for `random_state` too --> refactor 411 try: 412 if ( 413 "n_jobs" in model().get_params().keys() 414 and name.find("LogisticRegression") == -1 415 ): 416 other_args["n_jobs"] = self.n_jobs 417 except Exception: 418 pass 419 420 start = time.time() 421 422 try: 423 if "random_state" in model().get_params().keys(): 424 layer_clf = CustomClassifier( 425 obj=model(random_state=self.random_state), 426 n_hidden_features=self.n_hidden_features, 427 activation_name=self.activation_name, 428 a=self.a, 429 nodes_sim=self.nodes_sim, 430 bias=self.bias, 431 dropout=self.dropout, 432 direct_link=self.direct_link, 433 n_clusters=self.n_clusters, 434 cluster_encode=self.cluster_encode, 435 type_clust=self.type_clust, 436 type_scaling=self.type_scaling, 437 col_sample=self.col_sample, 438 row_sample=self.row_sample, 439 seed=self.seed, 440 backend=self.backend, 441 cv_calibration=None, 442 ) 443 444 else: 445 layer_clf = CustomClassifier( 446 obj=model(), 447 n_hidden_features=self.n_hidden_features, 448 activation_name=self.activation_name, 449 a=self.a, 450 nodes_sim=self.nodes_sim, 451 bias=self.bias, 452 dropout=self.dropout, 453 direct_link=self.direct_link, 454 n_clusters=self.n_clusters, 455 cluster_encode=self.cluster_encode, 456 type_clust=self.type_clust, 457 type_scaling=self.type_scaling, 458 col_sample=self.col_sample, 459 row_sample=self.row_sample, 460 seed=self.seed, 461 backend=self.backend, 462 cv_calibration=None, 463 ) 464 465 layer_clf.fit(X_train, y_train) 466 467 for _ in range(self.n_layers): 468 layer_clf = deepcopy( 469 CustomClassifier( 470 obj=layer_clf, 471 n_hidden_features=self.n_hidden_features, 472 activation_name=self.activation_name, 473 a=self.a, 474 nodes_sim=self.nodes_sim, 475 bias=self.bias, 476 dropout=self.dropout, 477 direct_link=self.direct_link, 478 n_clusters=self.n_clusters, 479 cluster_encode=self.cluster_encode, 480 type_clust=self.type_clust, 481 type_scaling=self.type_scaling, 482 col_sample=self.col_sample, 483 row_sample=self.row_sample, 484 seed=self.seed, 485 backend=self.backend, 486 cv_calibration=None, 487 ) 488 ) 489 490 pipe = Pipeline( 491 [ 492 ("preprocessor", preprocessor), 493 ("classifier", layer_clf), 494 ] 495 ) 496 497 pipe.fit(X_train, y_train) 498 self.models_[name] = pipe 499 y_pred = pipe.predict(X_test) 500 accuracy = accuracy_score(y_test, y_pred, normalize=True) 501 b_accuracy = balanced_accuracy_score(y_test, y_pred) 502 f1 = f1_score(y_test, y_pred, average="weighted") 503 try: 504 roc_auc = roc_auc_score(y_test, y_pred) 505 except Exception as exception: 506 roc_auc = None 507 if self.ignore_warnings is False: 508 print("ROC AUC couldn't be calculated for " + name) 509 print(exception) 510 names.append(name) 511 Accuracy.append(accuracy) 512 B_Accuracy.append(b_accuracy) 513 ROC_AUC.append(roc_auc) 514 F1.append(f1) 515 TIME.append(time.time() - start) 516 if self.custom_metric is not None: 517 custom_metric = self.custom_metric(y_test, y_pred) 518 CUSTOM_METRIC.append(custom_metric) 519 if self.verbose > 0: 520 if self.custom_metric is not None: 521 print( 522 { 523 "Model": name, 524 "Accuracy": accuracy, 525 "Balanced Accuracy": b_accuracy, 526 "ROC AUC": roc_auc, 527 "F1 Score": f1, 528 self.custom_metric.__name__: custom_metric, 529 "Time taken": time.time() - start, 530 } 531 ) 532 else: 533 print( 534 { 535 "Model": name, 536 "Accuracy": accuracy, 537 "Balanced Accuracy": b_accuracy, 538 "ROC AUC": roc_auc, 539 "F1 Score": f1, 540 "Time taken": time.time() - start, 541 } 542 ) 543 if self.predictions: 544 predictions[name] = y_pred 545 except Exception as exception: 546 if self.ignore_warnings is False: 547 print(name + " model failed to execute") 548 print(exception) 549 550 else: # no preprocessing 551 552 for name, model in tqdm(self.classifiers): # do parallel exec 553 start = time.time() 554 try: 555 if "random_state" in model().get_params().keys(): 556 layer_clf = CustomClassifier( 557 obj=model(random_state=self.random_state), 558 n_hidden_features=self.n_hidden_features, 559 activation_name=self.activation_name, 560 a=self.a, 561 nodes_sim=self.nodes_sim, 562 bias=self.bias, 563 dropout=self.dropout, 564 direct_link=self.direct_link, 565 n_clusters=self.n_clusters, 566 cluster_encode=self.cluster_encode, 567 type_clust=self.type_clust, 568 type_scaling=self.type_scaling, 569 col_sample=self.col_sample, 570 row_sample=self.row_sample, 571 seed=self.seed, 572 backend=self.backend, 573 cv_calibration=None, 574 ) 575 576 else: 577 layer_clf = CustomClassifier( 578 obj=model(), 579 n_hidden_features=self.n_hidden_features, 580 activation_name=self.activation_name, 581 a=self.a, 582 nodes_sim=self.nodes_sim, 583 bias=self.bias, 584 dropout=self.dropout, 585 direct_link=self.direct_link, 586 n_clusters=self.n_clusters, 587 cluster_encode=self.cluster_encode, 588 type_clust=self.type_clust, 589 type_scaling=self.type_scaling, 590 col_sample=self.col_sample, 591 row_sample=self.row_sample, 592 seed=self.seed, 593 backend=self.backend, 594 cv_calibration=None, 595 ) 596 597 layer_clf.fit(X_train, y_train) 598 599 for _ in range(self.n_layers): 600 layer_clf = deepcopy( 601 CustomClassifier( 602 obj=layer_clf, 603 n_hidden_features=self.n_hidden_features, 604 activation_name=self.activation_name, 605 a=self.a, 606 nodes_sim=self.nodes_sim, 607 bias=self.bias, 608 dropout=self.dropout, 609 direct_link=self.direct_link, 610 n_clusters=self.n_clusters, 611 cluster_encode=self.cluster_encode, 612 type_clust=self.type_clust, 613 type_scaling=self.type_scaling, 614 col_sample=self.col_sample, 615 row_sample=self.row_sample, 616 seed=self.seed, 617 backend=self.backend, 618 cv_calibration=None, 619 ) 620 ) 621 622 # layer_clf.fit(X_train, y_train) 623 624 layer_clf.fit(X_train, y_train) 625 626 self.models_[name] = layer_clf 627 y_pred = layer_clf.predict(X_test) 628 accuracy = accuracy_score(y_test, y_pred, normalize=True) 629 b_accuracy = balanced_accuracy_score(y_test, y_pred) 630 f1 = f1_score(y_test, y_pred, average="weighted") 631 try: 632 roc_auc = roc_auc_score(y_test, y_pred) 633 except Exception as exception: 634 roc_auc = None 635 if self.ignore_warnings is False: 636 print("ROC AUC couldn't be calculated for " + name) 637 print(exception) 638 names.append(name) 639 Accuracy.append(accuracy) 640 B_Accuracy.append(b_accuracy) 641 ROC_AUC.append(roc_auc) 642 F1.append(f1) 643 TIME.append(time.time() - start) 644 if self.custom_metric is not None: 645 custom_metric = self.custom_metric(y_test, y_pred) 646 CUSTOM_METRIC.append(custom_metric) 647 if self.verbose > 0: 648 if self.custom_metric is not None: 649 print( 650 { 651 "Model": name, 652 "Accuracy": accuracy, 653 "Balanced Accuracy": b_accuracy, 654 "ROC AUC": roc_auc, 655 "F1 Score": f1, 656 self.custom_metric.__name__: custom_metric, 657 "Time taken": time.time() - start, 658 } 659 ) 660 else: 661 print( 662 { 663 "Model": name, 664 "Accuracy": accuracy, 665 "Balanced Accuracy": b_accuracy, 666 "ROC AUC": roc_auc, 667 "F1 Score": f1, 668 "Time taken": time.time() - start, 669 } 670 ) 671 if self.predictions: 672 predictions[name] = y_pred 673 except Exception as exception: 674 if self.ignore_warnings is False: 675 print(name + " model failed to execute") 676 print(exception) 677 678 if self.custom_metric is None: 679 scores = pd.DataFrame( 680 { 681 "Model": names, 682 "Accuracy": Accuracy, 683 "Balanced Accuracy": B_Accuracy, 684 "ROC AUC": ROC_AUC, 685 "F1 Score": F1, 686 "Time Taken": TIME, 687 } 688 ) 689 else: 690 scores = pd.DataFrame( 691 { 692 "Model": names, 693 "Accuracy": Accuracy, 694 "Balanced Accuracy": B_Accuracy, 695 "ROC AUC": ROC_AUC, 696 "F1 Score": F1, 697 "Custom metric": CUSTOM_METRIC, 698 "Time Taken": TIME, 699 } 700 ) 701 scores = scores.sort_values(by=self.sort_by, ascending=False).set_index("Model") 702 703 self.best_model_ = self.models_[scores.index[0]] 704 705 if self.predictions is True: 706 707 return scores, predictions 708 709 return scores
Fit classifiers to X_train and y_train, predict and score on X_test, y_test.
Parameters:
X_train: array-like,
Training vectors, where rows is the number of samples
and columns is the number of features.
X_test: array-like,
Testing vectors, where rows is the number of samples
and columns is the number of features.
y_train: array-like,
Training vectors, where rows is the number of samples
and columns is the number of features.
y_test: array-like,
Testing vectors, where rows is the number of samples
and columns is the number of features.
Returns:
scores: Pandas DataFrame
Returns metrics of all the models in a Pandas DataFrame.
predictions: Pandas DataFrame
Returns predictions of all the models in a Pandas DataFrame.
723 def provide_models(self, X_train, X_test, y_train, y_test): 724 """Returns all the model objects trained. If fit hasn't been called yet, 725 then it's called to return the models. 726 727 Parameters: 728 729 X_train: array-like, 730 Training vectors, where rows is the number of samples 731 and columns is the number of features. 732 733 X_test: array-like, 734 Testing vectors, where rows is the number of samples 735 and columns is the number of features. 736 737 y_train: array-like, 738 Training vectors, where rows is the number of samples 739 and columns is the number of features. 740 741 y_test: array-like, 742 Testing vectors, where rows is the number of samples 743 and columns is the number of features. 744 745 Returns: 746 747 models: dict-object, 748 Returns a dictionary with each model's pipeline as value 749 and key = name of the model. 750 """ 751 if len(self.models_.keys()) == 0: 752 self.fit(X_train, X_test, y_train, y_test) 753 754 return self.models_
Returns all the model objects trained. If fit hasn't been called yet, then it's called to return the models.
Parameters:
X_train: array-like, Training vectors, where rows is the number of samples and columns is the number of features.
X_test: array-like, Testing vectors, where rows is the number of samples and columns is the number of features.
y_train: array-like, Training vectors, where rows is the number of samples and columns is the number of features.
y_test: array-like, Testing vectors, where rows is the number of samples and columns is the number of features.
Returns:
models: dict-object,
Returns a dictionary with each model's pipeline as value
and key = name of the model.
90class LazyDeepRegressor(Custom, RegressorMixin): 91 """ 92 Fitting -- almost -- all the regression algorithms with layers of 93 nnetsauce's CustomRegressor and returning their scores. 94 95 Parameters: 96 97 verbose: int, optional (default=0) 98 Any positive number for verbosity. 99 100 ignore_warnings: bool, optional (default=True) 101 When set to True, the warning related to algorigms that are not able to run are ignored. 102 103 custom_metric: function, optional (default=None) 104 When function is provided, models are evaluated based on the custom evaluation metric provided. 105 106 predictions: bool, optional (default=False) 107 When set to True, the predictions of all the models models are returned as dataframe. 108 109 sort_by: string, optional (default='RMSE') 110 Sort models by a metric. Available options are 'R-Squared', 'Adjusted R-Squared', 'RMSE', 'Time Taken' and 'Custom Metric'. 111 or a custom metric identified by its name and provided by custom_metric. 112 113 random_state: int, optional (default=42) 114 Reproducibiility seed. 115 116 estimators: list, optional (default='all') 117 list of Estimators names or just 'all' (default='all') 118 119 preprocess: bool 120 preprocessing is done when set to True 121 122 n_jobs : int, when possible, run in parallel 123 For now, only used by individual models that support it. 124 125 n_layers: int, optional (default=3) 126 Number of layers of CustomRegressors to be used. 127 128 All the other parameters are the same as CustomRegressor's. 129 130 Attributes: 131 132 models_: dict-object 133 Returns a dictionary with each model pipeline as value 134 with key as name of models. 135 136 best_model_: object 137 Returns the best model pipeline based on the sort_by metric. 138 139 Examples: 140 141 import nnetsauce as ns 142 import numpy as np 143 from sklearn import datasets 144 from sklearn.utils import shuffle 145 146 diabetes = datasets.load_diabetes() 147 X, y = shuffle(diabetes.data, diabetes.target, random_state=13) 148 X = X.astype(np.float32) 149 150 offset = int(X.shape[0] * 0.9) 151 X_train, y_train = X[:offset], y[:offset] 152 X_test, y_test = X[offset:], y[offset:] 153 154 reg = ns.LazyDeepRegressor(verbose=0, ignore_warnings=False, custom_metric=None) 155 models, predictions = reg.fit(X_train, X_test, y_train, y_test) 156 print(models) 157 158 """ 159 160 def __init__( 161 self, 162 verbose=0, 163 ignore_warnings=True, 164 custom_metric=None, 165 predictions=False, 166 sort_by="RMSE", 167 random_state=42, 168 estimators="all", 169 preprocess=False, 170 n_jobs=None, 171 # Defining depth 172 n_layers=3, 173 # CustomRegressor attributes 174 obj=None, 175 n_hidden_features=5, 176 activation_name="relu", 177 a=0.01, 178 nodes_sim="sobol", 179 bias=True, 180 dropout=0, 181 direct_link=True, 182 n_clusters=2, 183 cluster_encode=True, 184 type_clust="kmeans", 185 type_scaling=("std", "std", "std"), 186 col_sample=1, 187 row_sample=1, 188 seed=123, 189 backend="cpu", 190 ): 191 self.verbose = verbose 192 self.ignore_warnings = ignore_warnings 193 self.custom_metric = custom_metric 194 self.predictions = predictions 195 self.sort_by = sort_by 196 self.models_ = {} 197 self.best_model_ = None 198 self.random_state = random_state 199 self.estimators = estimators 200 self.preprocess = preprocess 201 self.n_layers = n_layers - 1 202 self.n_jobs = n_jobs 203 super().__init__( 204 obj=obj, 205 n_hidden_features=n_hidden_features, 206 activation_name=activation_name, 207 a=a, 208 nodes_sim=nodes_sim, 209 bias=bias, 210 dropout=dropout, 211 direct_link=direct_link, 212 n_clusters=n_clusters, 213 cluster_encode=cluster_encode, 214 type_clust=type_clust, 215 type_scaling=type_scaling, 216 col_sample=col_sample, 217 row_sample=row_sample, 218 seed=seed, 219 backend=backend, 220 ) 221 222 def fit(self, X_train, X_test, y_train, y_test): 223 """Fit Regression algorithms to X_train and y_train, predict and score on X_test, y_test. 224 225 Parameters: 226 227 X_train : array-like, 228 Training vectors, where rows is the number of samples 229 and columns is the number of features. 230 231 X_test : array-like, 232 Testing vectors, where rows is the number of samples 233 and columns is the number of features. 234 235 y_train : array-like, 236 Training vectors, where rows is the number of samples 237 and columns is the number of features. 238 239 y_test : array-like, 240 Testing vectors, where rows is the number of samples 241 and columns is the number of features. 242 243 Returns: 244 ------- 245 scores: Pandas DataFrame 246 Returns metrics of all the models in a Pandas DataFrame. 247 248 predictions : Pandas DataFrame 249 Returns predictions of all the models in a Pandas DataFrame. 250 251 """ 252 R2 = [] 253 ADJR2 = [] 254 RMSE = [] 255 # WIN = [] 256 names = [] 257 TIME = [] 258 predictions = {} 259 260 if self.custom_metric: 261 CUSTOM_METRIC = [] 262 263 if isinstance(X_train, np.ndarray): 264 X_train = pd.DataFrame(X_train) 265 X_test = pd.DataFrame(X_test) 266 267 numeric_features = X_train.select_dtypes(include=[np.number]).columns 268 categorical_features = X_train.select_dtypes(include=["object"]).columns 269 270 categorical_low, categorical_high = get_card_split( 271 X_train, categorical_features 272 ) 273 274 if self.preprocess is True: 275 preprocessor = ColumnTransformer( 276 transformers=[ 277 ("numeric", numeric_transformer, numeric_features), 278 ( 279 "categorical_low", 280 categorical_transformer_low, 281 categorical_low, 282 ), 283 ( 284 "categorical_high", 285 categorical_transformer_high, 286 categorical_high, 287 ), 288 ] 289 ) 290 291 # base models 292 try: 293 baseline_names = ["RandomForestRegressor", "XGBRegressor"] 294 baseline_models = [RandomForestRegressor(), xgb.XGBRegressor()] 295 except Exception as exception: 296 baseline_names = ["RandomForestRegressor"] 297 baseline_models = [RandomForestRegressor()] 298 299 for name, model in zip(baseline_names, baseline_models): 300 start = time.time() 301 try: 302 model.fit(X_train, y_train) 303 self.models_[name] = model 304 y_pred = model.predict(X_test) 305 r_squared = r2_score(y_test, y_pred) 306 adj_rsquared = adjusted_rsquared( 307 r_squared, X_test.shape[0], X_test.shape[1] 308 ) 309 rmse = np.sqrt(np.mean((y_test - y_pred) ** 2)) 310 311 names.append(name) 312 R2.append(r_squared) 313 ADJR2.append(adj_rsquared) 314 RMSE.append(rmse) 315 TIME.append(time.time() - start) 316 317 if self.custom_metric: 318 custom_metric = self.custom_metric(y_test, y_pred) 319 CUSTOM_METRIC.append(custom_metric) 320 321 if self.verbose > 0: 322 scores_verbose = { 323 "Model": name, 324 "R-Squared": r_squared, 325 "Adjusted R-Squared": adj_rsquared, 326 "RMSE": rmse, 327 "Time taken": time.time() - start, 328 } 329 330 if self.custom_metric: 331 scores_verbose[self.custom_metric.__name__] = custom_metric 332 333 print(scores_verbose) 334 if self.predictions: 335 predictions[name] = y_pred 336 except Exception as exception: 337 if self.ignore_warnings is False: 338 print(name + " model failed to execute") 339 print(exception) 340 341 if self.estimators == "all": 342 self.regressors = DEEPREGRESSORS 343 else: 344 self.regressors = [ 345 ("DeepCustomRegressor(" + est[0] + ")", est[1]) 346 for est in all_estimators() 347 if (issubclass(est[1], RegressorMixin) and (est[0] in self.estimators)) 348 ] 349 350 if self.preprocess is True: 351 352 for name, model in tqdm(self.regressors): # do parallel exec 353 start = time.time() 354 try: 355 if "random_state" in model().get_params().keys(): 356 layer_regr = CustomRegressor( 357 obj=model(random_state=self.random_state), 358 n_hidden_features=self.n_hidden_features, 359 activation_name=self.activation_name, 360 a=self.a, 361 nodes_sim=self.nodes_sim, 362 bias=self.bias, 363 dropout=self.dropout, 364 direct_link=self.direct_link, 365 n_clusters=self.n_clusters, 366 cluster_encode=self.cluster_encode, 367 type_clust=self.type_clust, 368 type_scaling=self.type_scaling, 369 col_sample=self.col_sample, 370 row_sample=self.row_sample, 371 seed=self.seed, 372 backend=self.backend, 373 ) 374 else: 375 layer_regr = CustomRegressor( 376 obj=model(), 377 n_hidden_features=self.n_hidden_features, 378 activation_name=self.activation_name, 379 a=self.a, 380 nodes_sim=self.nodes_sim, 381 bias=self.bias, 382 dropout=self.dropout, 383 direct_link=self.direct_link, 384 n_clusters=self.n_clusters, 385 cluster_encode=self.cluster_encode, 386 type_clust=self.type_clust, 387 type_scaling=self.type_scaling, 388 col_sample=self.col_sample, 389 row_sample=self.row_sample, 390 seed=self.seed, 391 backend=self.backend, 392 ) 393 394 for _ in range(self.n_layers): 395 layer_regr = deepcopy( 396 CustomRegressor( 397 obj=layer_regr, 398 n_hidden_features=self.n_hidden_features, 399 activation_name=self.activation_name, 400 a=self.a, 401 nodes_sim=self.nodes_sim, 402 bias=self.bias, 403 dropout=self.dropout, 404 direct_link=self.direct_link, 405 n_clusters=self.n_clusters, 406 cluster_encode=self.cluster_encode, 407 type_clust=self.type_clust, 408 type_scaling=self.type_scaling, 409 col_sample=self.col_sample, 410 row_sample=self.row_sample, 411 seed=self.seed, 412 backend=self.backend, 413 ) 414 ) 415 416 layer_regr.fit(X_train, y_train) 417 418 pipe = Pipeline( 419 steps=[ 420 ("preprocessor", preprocessor), 421 ("regressor", layer_regr), 422 ] 423 ) 424 425 pipe.fit(X_train, y_train) 426 427 self.models_[name] = pipe 428 y_pred = pipe.predict(X_test) 429 r_squared = r2_score(y_test, y_pred) 430 adj_rsquared = adjusted_rsquared( 431 r_squared, X_test.shape[0], X_test.shape[1] 432 ) 433 rmse = np.sqrt(np.mean((y_test - y_pred) ** 2)) 434 435 names.append(name) 436 R2.append(r_squared) 437 ADJR2.append(adj_rsquared) 438 RMSE.append(rmse) 439 TIME.append(time.time() - start) 440 441 if self.custom_metric: 442 custom_metric = self.custom_metric(y_test, y_pred) 443 CUSTOM_METRIC.append(custom_metric) 444 445 if self.verbose > 0: 446 scores_verbose = { 447 "Model": name, 448 "R-Squared": r_squared, 449 "Adjusted R-Squared": adj_rsquared, 450 "RMSE": rmse, 451 "Time taken": time.time() - start, 452 } 453 454 if self.custom_metric: 455 scores_verbose[self.custom_metric.__name__] = custom_metric 456 457 print(scores_verbose) 458 if self.predictions: 459 predictions[name] = y_pred 460 except Exception as exception: 461 if self.ignore_warnings is False: 462 print(name + " model failed to execute") 463 print(exception) 464 465 else: # no preprocessing 466 467 for name, model in tqdm(self.regressors): # do parallel exec 468 start = time.time() 469 try: 470 if "random_state" in model().get_params().keys(): 471 layer_regr = CustomRegressor( 472 obj=model(random_state=self.random_state), 473 n_hidden_features=self.n_hidden_features, 474 activation_name=self.activation_name, 475 a=self.a, 476 nodes_sim=self.nodes_sim, 477 bias=self.bias, 478 dropout=self.dropout, 479 direct_link=self.direct_link, 480 n_clusters=self.n_clusters, 481 cluster_encode=self.cluster_encode, 482 type_clust=self.type_clust, 483 type_scaling=self.type_scaling, 484 col_sample=self.col_sample, 485 row_sample=self.row_sample, 486 seed=self.seed, 487 backend=self.backend, 488 ) 489 else: 490 layer_regr = CustomRegressor( 491 obj=model(), 492 n_hidden_features=self.n_hidden_features, 493 activation_name=self.activation_name, 494 a=self.a, 495 nodes_sim=self.nodes_sim, 496 bias=self.bias, 497 dropout=self.dropout, 498 direct_link=self.direct_link, 499 n_clusters=self.n_clusters, 500 cluster_encode=self.cluster_encode, 501 type_clust=self.type_clust, 502 type_scaling=self.type_scaling, 503 col_sample=self.col_sample, 504 row_sample=self.row_sample, 505 seed=self.seed, 506 backend=self.backend, 507 ) 508 509 layer_regr.fit(X_train, y_train) 510 511 for _ in range(self.n_layers): 512 layer_regr = deepcopy( 513 CustomRegressor( 514 obj=layer_regr, 515 n_hidden_features=self.n_hidden_features, 516 activation_name=self.activation_name, 517 a=self.a, 518 nodes_sim=self.nodes_sim, 519 bias=self.bias, 520 dropout=self.dropout, 521 direct_link=self.direct_link, 522 n_clusters=self.n_clusters, 523 cluster_encode=self.cluster_encode, 524 type_clust=self.type_clust, 525 type_scaling=self.type_scaling, 526 col_sample=self.col_sample, 527 row_sample=self.row_sample, 528 seed=self.seed, 529 backend=self.backend, 530 ) 531 ) 532 533 # layer_regr.fit(X_train, y_train) 534 535 layer_regr.fit(X_train, y_train) 536 537 self.models_[name] = layer_regr 538 y_pred = layer_regr.predict(X_test) 539 540 r_squared = r2_score(y_test, y_pred) 541 adj_rsquared = adjusted_rsquared( 542 r_squared, X_test.shape[0], X_test.shape[1] 543 ) 544 rmse = np.sqrt(np.mean((y_test - y_pred) ** 2)) 545 546 names.append(name) 547 R2.append(r_squared) 548 ADJR2.append(adj_rsquared) 549 RMSE.append(rmse) 550 TIME.append(time.time() - start) 551 552 if self.custom_metric: 553 custom_metric = self.custom_metric(y_test, y_pred) 554 CUSTOM_METRIC.append(custom_metric) 555 556 if self.verbose > 0: 557 scores_verbose = { 558 "Model": name, 559 "R-Squared": r_squared, 560 "Adjusted R-Squared": adj_rsquared, 561 "RMSE": rmse, 562 "Time taken": time.time() - start, 563 } 564 565 if self.custom_metric: 566 scores_verbose[self.custom_metric.__name__] = custom_metric 567 568 print(scores_verbose) 569 if self.predictions: 570 predictions[name] = y_pred 571 except Exception as exception: 572 if self.ignore_warnings is False: 573 print(name + " model failed to execute") 574 print(exception) 575 576 scores = { 577 "Model": names, 578 "Adjusted R-Squared": ADJR2, 579 "R-Squared": R2, 580 "RMSE": RMSE, 581 "Time Taken": TIME, 582 } 583 584 if self.custom_metric: 585 scores["Custom metric"] = CUSTOM_METRIC 586 587 scores = pd.DataFrame(scores) 588 scores = scores.sort_values(by=self.sort_by, ascending=True).set_index("Model") 589 590 self.best_model_ = self.models_[scores.index[0]] 591 592 if self.predictions is True: 593 594 return scores, predictions 595 596 return scores 597 598 def get_best_model(self): 599 """ 600 This function returns the best model pipeline based on the sort_by metric. 601 602 Returns: 603 604 best_model: object, 605 Returns the best model pipeline based on the sort_by metric. 606 607 """ 608 return self.best_model_ 609 610 def provide_models(self, X_train, X_test, y_train, y_test): 611 """ 612 This function returns all the model objects trained in fit function. 613 If fit is not called already, then we call fit and then return the models. 614 615 Parameters: 616 617 X_train : array-like, 618 Training vectors, where rows is the number of samples 619 and columns is the number of features. 620 621 X_test : array-like, 622 Testing vectors, where rows is the number of samples 623 and columns is the number of features. 624 625 y_train : array-like, 626 Training vectors, where rows is the number of samples 627 and columns is the number of features. 628 629 y_test : array-like, 630 Testing vectors, where rows is the number of samples 631 and columns is the number of features. 632 633 Returns: 634 635 models: dict-object, 636 Returns a dictionary with each model pipeline as value 637 with key as name of models. 638 639 """ 640 if len(self.models_.keys()) == 0: 641 self.fit(X_train, X_test, y_train, y_test) 642 643 return self.models_
Fitting -- almost -- all the regression algorithms with layers of nnetsauce's CustomRegressor and returning their scores.
Parameters:
verbose: int, optional (default=0)
Any positive number for verbosity.
ignore_warnings: bool, optional (default=True)
When set to True, the warning related to algorigms that are not able to run are ignored.
custom_metric: function, optional (default=None)
When function is provided, models are evaluated based on the custom evaluation metric provided.
predictions: bool, optional (default=False)
When set to True, the predictions of all the models models are returned as dataframe.
sort_by: string, optional (default='RMSE')
Sort models by a metric. Available options are 'R-Squared', 'Adjusted R-Squared', 'RMSE', 'Time Taken' and 'Custom Metric'.
or a custom metric identified by its name and provided by custom_metric.
random_state: int, optional (default=42)
Reproducibiility seed.
estimators: list, optional (default='all')
list of Estimators names or just 'all' (default='all')
preprocess: bool
preprocessing is done when set to True
n_jobs : int, when possible, run in parallel
For now, only used by individual models that support it.
n_layers: int, optional (default=3)
Number of layers of CustomRegressors to be used.
All the other parameters are the same as CustomRegressor's.
Attributes:
models_: dict-object
Returns a dictionary with each model pipeline as value
with key as name of models.
best_model_: object
Returns the best model pipeline based on the sort_by metric.
Examples:
import nnetsauce as ns
import numpy as np
from sklearn import datasets
from sklearn.utils import shuffle
diabetes = datasets.load_diabetes()
X, y = shuffle(diabetes.data, diabetes.target, random_state=13)
X = X.astype(np.float32)
offset = int(X.shape[0] * 0.9)
X_train, y_train = X[:offset], y[:offset]
X_test, y_test = X[offset:], y[offset:]
reg = ns.LazyDeepRegressor(verbose=0, ignore_warnings=False, custom_metric=None)
models, predictions = reg.fit(X_train, X_test, y_train, y_test)
print(models)
222 def fit(self, X_train, X_test, y_train, y_test): 223 """Fit Regression algorithms to X_train and y_train, predict and score on X_test, y_test. 224 225 Parameters: 226 227 X_train : array-like, 228 Training vectors, where rows is the number of samples 229 and columns is the number of features. 230 231 X_test : array-like, 232 Testing vectors, where rows is the number of samples 233 and columns is the number of features. 234 235 y_train : array-like, 236 Training vectors, where rows is the number of samples 237 and columns is the number of features. 238 239 y_test : array-like, 240 Testing vectors, where rows is the number of samples 241 and columns is the number of features. 242 243 Returns: 244 ------- 245 scores: Pandas DataFrame 246 Returns metrics of all the models in a Pandas DataFrame. 247 248 predictions : Pandas DataFrame 249 Returns predictions of all the models in a Pandas DataFrame. 250 251 """ 252 R2 = [] 253 ADJR2 = [] 254 RMSE = [] 255 # WIN = [] 256 names = [] 257 TIME = [] 258 predictions = {} 259 260 if self.custom_metric: 261 CUSTOM_METRIC = [] 262 263 if isinstance(X_train, np.ndarray): 264 X_train = pd.DataFrame(X_train) 265 X_test = pd.DataFrame(X_test) 266 267 numeric_features = X_train.select_dtypes(include=[np.number]).columns 268 categorical_features = X_train.select_dtypes(include=["object"]).columns 269 270 categorical_low, categorical_high = get_card_split( 271 X_train, categorical_features 272 ) 273 274 if self.preprocess is True: 275 preprocessor = ColumnTransformer( 276 transformers=[ 277 ("numeric", numeric_transformer, numeric_features), 278 ( 279 "categorical_low", 280 categorical_transformer_low, 281 categorical_low, 282 ), 283 ( 284 "categorical_high", 285 categorical_transformer_high, 286 categorical_high, 287 ), 288 ] 289 ) 290 291 # base models 292 try: 293 baseline_names = ["RandomForestRegressor", "XGBRegressor"] 294 baseline_models = [RandomForestRegressor(), xgb.XGBRegressor()] 295 except Exception as exception: 296 baseline_names = ["RandomForestRegressor"] 297 baseline_models = [RandomForestRegressor()] 298 299 for name, model in zip(baseline_names, baseline_models): 300 start = time.time() 301 try: 302 model.fit(X_train, y_train) 303 self.models_[name] = model 304 y_pred = model.predict(X_test) 305 r_squared = r2_score(y_test, y_pred) 306 adj_rsquared = adjusted_rsquared( 307 r_squared, X_test.shape[0], X_test.shape[1] 308 ) 309 rmse = np.sqrt(np.mean((y_test - y_pred) ** 2)) 310 311 names.append(name) 312 R2.append(r_squared) 313 ADJR2.append(adj_rsquared) 314 RMSE.append(rmse) 315 TIME.append(time.time() - start) 316 317 if self.custom_metric: 318 custom_metric = self.custom_metric(y_test, y_pred) 319 CUSTOM_METRIC.append(custom_metric) 320 321 if self.verbose > 0: 322 scores_verbose = { 323 "Model": name, 324 "R-Squared": r_squared, 325 "Adjusted R-Squared": adj_rsquared, 326 "RMSE": rmse, 327 "Time taken": time.time() - start, 328 } 329 330 if self.custom_metric: 331 scores_verbose[self.custom_metric.__name__] = custom_metric 332 333 print(scores_verbose) 334 if self.predictions: 335 predictions[name] = y_pred 336 except Exception as exception: 337 if self.ignore_warnings is False: 338 print(name + " model failed to execute") 339 print(exception) 340 341 if self.estimators == "all": 342 self.regressors = DEEPREGRESSORS 343 else: 344 self.regressors = [ 345 ("DeepCustomRegressor(" + est[0] + ")", est[1]) 346 for est in all_estimators() 347 if (issubclass(est[1], RegressorMixin) and (est[0] in self.estimators)) 348 ] 349 350 if self.preprocess is True: 351 352 for name, model in tqdm(self.regressors): # do parallel exec 353 start = time.time() 354 try: 355 if "random_state" in model().get_params().keys(): 356 layer_regr = CustomRegressor( 357 obj=model(random_state=self.random_state), 358 n_hidden_features=self.n_hidden_features, 359 activation_name=self.activation_name, 360 a=self.a, 361 nodes_sim=self.nodes_sim, 362 bias=self.bias, 363 dropout=self.dropout, 364 direct_link=self.direct_link, 365 n_clusters=self.n_clusters, 366 cluster_encode=self.cluster_encode, 367 type_clust=self.type_clust, 368 type_scaling=self.type_scaling, 369 col_sample=self.col_sample, 370 row_sample=self.row_sample, 371 seed=self.seed, 372 backend=self.backend, 373 ) 374 else: 375 layer_regr = CustomRegressor( 376 obj=model(), 377 n_hidden_features=self.n_hidden_features, 378 activation_name=self.activation_name, 379 a=self.a, 380 nodes_sim=self.nodes_sim, 381 bias=self.bias, 382 dropout=self.dropout, 383 direct_link=self.direct_link, 384 n_clusters=self.n_clusters, 385 cluster_encode=self.cluster_encode, 386 type_clust=self.type_clust, 387 type_scaling=self.type_scaling, 388 col_sample=self.col_sample, 389 row_sample=self.row_sample, 390 seed=self.seed, 391 backend=self.backend, 392 ) 393 394 for _ in range(self.n_layers): 395 layer_regr = deepcopy( 396 CustomRegressor( 397 obj=layer_regr, 398 n_hidden_features=self.n_hidden_features, 399 activation_name=self.activation_name, 400 a=self.a, 401 nodes_sim=self.nodes_sim, 402 bias=self.bias, 403 dropout=self.dropout, 404 direct_link=self.direct_link, 405 n_clusters=self.n_clusters, 406 cluster_encode=self.cluster_encode, 407 type_clust=self.type_clust, 408 type_scaling=self.type_scaling, 409 col_sample=self.col_sample, 410 row_sample=self.row_sample, 411 seed=self.seed, 412 backend=self.backend, 413 ) 414 ) 415 416 layer_regr.fit(X_train, y_train) 417 418 pipe = Pipeline( 419 steps=[ 420 ("preprocessor", preprocessor), 421 ("regressor", layer_regr), 422 ] 423 ) 424 425 pipe.fit(X_train, y_train) 426 427 self.models_[name] = pipe 428 y_pred = pipe.predict(X_test) 429 r_squared = r2_score(y_test, y_pred) 430 adj_rsquared = adjusted_rsquared( 431 r_squared, X_test.shape[0], X_test.shape[1] 432 ) 433 rmse = np.sqrt(np.mean((y_test - y_pred) ** 2)) 434 435 names.append(name) 436 R2.append(r_squared) 437 ADJR2.append(adj_rsquared) 438 RMSE.append(rmse) 439 TIME.append(time.time() - start) 440 441 if self.custom_metric: 442 custom_metric = self.custom_metric(y_test, y_pred) 443 CUSTOM_METRIC.append(custom_metric) 444 445 if self.verbose > 0: 446 scores_verbose = { 447 "Model": name, 448 "R-Squared": r_squared, 449 "Adjusted R-Squared": adj_rsquared, 450 "RMSE": rmse, 451 "Time taken": time.time() - start, 452 } 453 454 if self.custom_metric: 455 scores_verbose[self.custom_metric.__name__] = custom_metric 456 457 print(scores_verbose) 458 if self.predictions: 459 predictions[name] = y_pred 460 except Exception as exception: 461 if self.ignore_warnings is False: 462 print(name + " model failed to execute") 463 print(exception) 464 465 else: # no preprocessing 466 467 for name, model in tqdm(self.regressors): # do parallel exec 468 start = time.time() 469 try: 470 if "random_state" in model().get_params().keys(): 471 layer_regr = CustomRegressor( 472 obj=model(random_state=self.random_state), 473 n_hidden_features=self.n_hidden_features, 474 activation_name=self.activation_name, 475 a=self.a, 476 nodes_sim=self.nodes_sim, 477 bias=self.bias, 478 dropout=self.dropout, 479 direct_link=self.direct_link, 480 n_clusters=self.n_clusters, 481 cluster_encode=self.cluster_encode, 482 type_clust=self.type_clust, 483 type_scaling=self.type_scaling, 484 col_sample=self.col_sample, 485 row_sample=self.row_sample, 486 seed=self.seed, 487 backend=self.backend, 488 ) 489 else: 490 layer_regr = CustomRegressor( 491 obj=model(), 492 n_hidden_features=self.n_hidden_features, 493 activation_name=self.activation_name, 494 a=self.a, 495 nodes_sim=self.nodes_sim, 496 bias=self.bias, 497 dropout=self.dropout, 498 direct_link=self.direct_link, 499 n_clusters=self.n_clusters, 500 cluster_encode=self.cluster_encode, 501 type_clust=self.type_clust, 502 type_scaling=self.type_scaling, 503 col_sample=self.col_sample, 504 row_sample=self.row_sample, 505 seed=self.seed, 506 backend=self.backend, 507 ) 508 509 layer_regr.fit(X_train, y_train) 510 511 for _ in range(self.n_layers): 512 layer_regr = deepcopy( 513 CustomRegressor( 514 obj=layer_regr, 515 n_hidden_features=self.n_hidden_features, 516 activation_name=self.activation_name, 517 a=self.a, 518 nodes_sim=self.nodes_sim, 519 bias=self.bias, 520 dropout=self.dropout, 521 direct_link=self.direct_link, 522 n_clusters=self.n_clusters, 523 cluster_encode=self.cluster_encode, 524 type_clust=self.type_clust, 525 type_scaling=self.type_scaling, 526 col_sample=self.col_sample, 527 row_sample=self.row_sample, 528 seed=self.seed, 529 backend=self.backend, 530 ) 531 ) 532 533 # layer_regr.fit(X_train, y_train) 534 535 layer_regr.fit(X_train, y_train) 536 537 self.models_[name] = layer_regr 538 y_pred = layer_regr.predict(X_test) 539 540 r_squared = r2_score(y_test, y_pred) 541 adj_rsquared = adjusted_rsquared( 542 r_squared, X_test.shape[0], X_test.shape[1] 543 ) 544 rmse = np.sqrt(np.mean((y_test - y_pred) ** 2)) 545 546 names.append(name) 547 R2.append(r_squared) 548 ADJR2.append(adj_rsquared) 549 RMSE.append(rmse) 550 TIME.append(time.time() - start) 551 552 if self.custom_metric: 553 custom_metric = self.custom_metric(y_test, y_pred) 554 CUSTOM_METRIC.append(custom_metric) 555 556 if self.verbose > 0: 557 scores_verbose = { 558 "Model": name, 559 "R-Squared": r_squared, 560 "Adjusted R-Squared": adj_rsquared, 561 "RMSE": rmse, 562 "Time taken": time.time() - start, 563 } 564 565 if self.custom_metric: 566 scores_verbose[self.custom_metric.__name__] = custom_metric 567 568 print(scores_verbose) 569 if self.predictions: 570 predictions[name] = y_pred 571 except Exception as exception: 572 if self.ignore_warnings is False: 573 print(name + " model failed to execute") 574 print(exception) 575 576 scores = { 577 "Model": names, 578 "Adjusted R-Squared": ADJR2, 579 "R-Squared": R2, 580 "RMSE": RMSE, 581 "Time Taken": TIME, 582 } 583 584 if self.custom_metric: 585 scores["Custom metric"] = CUSTOM_METRIC 586 587 scores = pd.DataFrame(scores) 588 scores = scores.sort_values(by=self.sort_by, ascending=True).set_index("Model") 589 590 self.best_model_ = self.models_[scores.index[0]] 591 592 if self.predictions is True: 593 594 return scores, predictions 595 596 return scores
Fit Regression algorithms to X_train and y_train, predict and score on X_test, y_test.
Parameters:
X_train : array-like,
Training vectors, where rows is the number of samples
and columns is the number of features.
X_test : array-like,
Testing vectors, where rows is the number of samples
and columns is the number of features.
y_train : array-like,
Training vectors, where rows is the number of samples
and columns is the number of features.
y_test : array-like,
Testing vectors, where rows is the number of samples
and columns is the number of features.
Returns:
scores: Pandas DataFrame Returns metrics of all the models in a Pandas DataFrame.
predictions : Pandas DataFrame Returns predictions of all the models in a Pandas DataFrame.
610 def provide_models(self, X_train, X_test, y_train, y_test): 611 """ 612 This function returns all the model objects trained in fit function. 613 If fit is not called already, then we call fit and then return the models. 614 615 Parameters: 616 617 X_train : array-like, 618 Training vectors, where rows is the number of samples 619 and columns is the number of features. 620 621 X_test : array-like, 622 Testing vectors, where rows is the number of samples 623 and columns is the number of features. 624 625 y_train : array-like, 626 Training vectors, where rows is the number of samples 627 and columns is the number of features. 628 629 y_test : array-like, 630 Testing vectors, where rows is the number of samples 631 and columns is the number of features. 632 633 Returns: 634 635 models: dict-object, 636 Returns a dictionary with each model pipeline as value 637 with key as name of models. 638 639 """ 640 if len(self.models_.keys()) == 0: 641 self.fit(X_train, X_test, y_train, y_test) 642 643 return self.models_
This function returns all the model objects trained in fit function. If fit is not called already, then we call fit and then return the models.
Parameters:
X_train : array-like,
Training vectors, where rows is the number of samples
and columns is the number of features.
X_test : array-like,
Testing vectors, where rows is the number of samples
and columns is the number of features.
y_train : array-like,
Training vectors, where rows is the number of samples
and columns is the number of features.
y_test : array-like,
Testing vectors, where rows is the number of samples
and columns is the number of features.
Returns:
models: dict-object,
Returns a dictionary with each model pipeline as value
with key as name of models.
988class LazyMTS(LazyDeepMTS): 989 """ 990 Fitting -- almost -- all the regression algorithms to multivariate time series 991 and returning their scores (no layers). 992 993 Parameters: 994 995 verbose: int, optional (default=0) 996 Any positive number for verbosity. 997 998 ignore_warnings: bool, optional (default=True) 999 When set to True, the warning related to algorigms that are not 1000 able to run are ignored. 1001 1002 custom_metric: function, optional (default=None) 1003 When function is provided, models are evaluated based on the custom 1004 evaluation metric provided. 1005 1006 predictions: bool, optional (default=False) 1007 When set to True, the predictions of all the models models are returned as dataframe. 1008 1009 sort_by: string, optional (default='RMSE') 1010 Sort models by a metric. Available options are 'RMSE', 'MAE', 'MPL', 'MPE', 'MAPE', 1011 'R-Squared', 'Adjusted R-Squared' or a custom metric identified by its name and 1012 provided by custom_metric. 1013 1014 random_state: int, optional (default=42) 1015 Reproducibiility seed. 1016 1017 estimators: list, optional (default='all') 1018 list of Estimators (regression algorithms) names or just 'all' (default='all') 1019 1020 preprocess: bool, preprocessing is done when set to True 1021 1022 h: int, optional (default=None) 1023 Number of steps ahead to predict (when used, must be > 0 and < X_test.shape[0]). 1024 1025 All the other parameters are the same as MTS's. 1026 1027 Attributes: 1028 1029 models_: dict-object 1030 Returns a dictionary with each model pipeline as value 1031 with key as name of models. 1032 1033 best_model_: object 1034 Returns the best model pipeline based on the sort_by metric. 1035 1036 Examples: 1037 1038 See https://thierrymoudiki.github.io/blog/2023/10/29/python/quasirandomizednn/MTS-LazyPredict 1039 1040 """ 1041 1042 def __init__( 1043 self, 1044 verbose=0, 1045 ignore_warnings=True, 1046 custom_metric=None, 1047 predictions=False, 1048 sort_by=None, # leave it as is 1049 random_state=42, 1050 estimators="all", 1051 preprocess=False, 1052 h=None, 1053 # MTS attributes 1054 obj=None, 1055 n_hidden_features=5, 1056 activation_name="relu", 1057 a=0.01, 1058 nodes_sim="sobol", 1059 bias=True, 1060 dropout=0, 1061 direct_link=True, 1062 n_clusters=2, 1063 cluster_encode=True, 1064 type_clust="kmeans", 1065 type_scaling=("std", "std", "std"), 1066 lags=15, 1067 type_pi="scp2-kde", 1068 block_size=None, 1069 replications=None, 1070 kernel=None, 1071 agg="mean", 1072 seed=123, 1073 backend="cpu", 1074 show_progress=False, 1075 ): 1076 super().__init__( 1077 verbose=verbose, 1078 ignore_warnings=ignore_warnings, 1079 custom_metric=custom_metric, 1080 predictions=predictions, 1081 sort_by=sort_by, 1082 random_state=random_state, 1083 estimators=estimators, 1084 preprocess=preprocess, 1085 n_layers=1, 1086 h=h, 1087 obj=obj, 1088 n_hidden_features=n_hidden_features, 1089 activation_name=activation_name, 1090 a=a, 1091 nodes_sim=nodes_sim, 1092 bias=bias, 1093 dropout=dropout, 1094 direct_link=direct_link, 1095 n_clusters=n_clusters, 1096 cluster_encode=cluster_encode, 1097 type_clust=type_clust, 1098 type_scaling=type_scaling, 1099 lags=lags, 1100 type_pi=type_pi, 1101 block_size=block_size, 1102 replications=replications, 1103 kernel=kernel, 1104 agg=agg, 1105 seed=seed, 1106 backend=backend, 1107 show_progress=show_progress, 1108 )
Fitting -- almost -- all the regression algorithms to multivariate time series and returning their scores (no layers).
Parameters:
verbose: int, optional (default=0)
Any positive number for verbosity.
ignore_warnings: bool, optional (default=True)
When set to True, the warning related to algorigms that are not
able to run are ignored.
custom_metric: function, optional (default=None)
When function is provided, models are evaluated based on the custom
evaluation metric provided.
predictions: bool, optional (default=False)
When set to True, the predictions of all the models models are returned as dataframe.
sort_by: string, optional (default='RMSE')
Sort models by a metric. Available options are 'RMSE', 'MAE', 'MPL', 'MPE', 'MAPE',
'R-Squared', 'Adjusted R-Squared' or a custom metric identified by its name and
provided by custom_metric.
random_state: int, optional (default=42)
Reproducibiility seed.
estimators: list, optional (default='all')
list of Estimators (regression algorithms) names or just 'all' (default='all')
preprocess: bool, preprocessing is done when set to True
h: int, optional (default=None)
Number of steps ahead to predict (when used, must be > 0 and < X_test.shape[0]).
All the other parameters are the same as MTS's.
Attributes:
models_: dict-object
Returns a dictionary with each model pipeline as value
with key as name of models.
best_model_: object
Returns the best model pipeline based on the sort_by metric.
Examples:
See https://thierrymoudiki.github.io/blog/2023/10/29/python/quasirandomizednn/MTS-LazyPredict
104class LazyDeepMTS(MTS): 105 """ 106 107 Fitting -- almost -- all the regression algorithms with layers of 108 nnetsauce's CustomRegressor to multivariate time series 109 and returning their scores. 110 111 Parameters: 112 113 verbose: int, optional (default=0) 114 Any positive number for verbosity. 115 116 ignore_warnings: bool, optional (default=True) 117 When set to True, the warning related to algorigms that are not 118 able to run are ignored. 119 120 custom_metric: function, optional (default=None) 121 When function is provided, models are evaluated based on the custom 122 evaluation metric provided. 123 124 predictions: bool, optional (default=False) 125 When set to True, the predictions of all the models models are returned as dataframe. 126 127 sort_by: string, optional (default='RMSE') 128 Sort models by a metric. Available options are 'RMSE', 'MAE', 'MPL', 'MPE', 'MAPE', 129 'R-Squared', 'Adjusted R-Squared' or a custom metric identified by its name and 130 provided by custom_metric. 131 132 random_state: int, optional (default=42) 133 Reproducibiility seed. 134 135 estimators: list, optional (default='all') 136 list of Estimators (regression algorithms) names or just 'all' (default='all') 137 138 preprocess: bool, preprocessing is done when set to True 139 140 n_layers: int, optional (default=1) 141 Number of layers in the network. When set to 1, the model is equivalent to a MTS. 142 143 h: int, optional (default=None) 144 Number of steps ahead to predict (when used, must be > 0 and < X_test.shape[0]). 145 146 All the other parameters are the same as MTS's. 147 148 Attributes: 149 150 models_: dict-object 151 Returns a dictionary with each model pipeline as value 152 with key as name of models. 153 154 best_model_: object 155 Returns the best model pipeline based on the sort_by metric. 156 157 Examples: 158 159 See https://thierrymoudiki.github.io/blog/2023/10/29/python/quasirandomizednn/MTS-LazyPredict 160 161 """ 162 163 def __init__( 164 self, 165 verbose=0, 166 ignore_warnings=True, 167 custom_metric=None, 168 predictions=False, 169 sort_by=None, # leave it as is 170 random_state=42, 171 estimators="all", 172 preprocess=False, 173 n_layers=1, 174 h=None, 175 # MTS attributes 176 obj=None, 177 n_hidden_features=5, 178 activation_name="relu", 179 a=0.01, 180 nodes_sim="sobol", 181 bias=True, 182 dropout=0, 183 direct_link=True, 184 n_clusters=2, 185 cluster_encode=True, 186 type_clust="kmeans", 187 type_scaling=("std", "std", "std"), 188 lags=15, 189 type_pi="scp2-kde", 190 block_size=None, 191 replications=None, 192 kernel=None, 193 agg="mean", 194 seed=123, 195 backend="cpu", 196 show_progress=False, 197 ): 198 self.verbose = verbose 199 self.ignore_warnings = ignore_warnings 200 self.custom_metric = custom_metric 201 self.predictions = predictions 202 self.sort_by = sort_by 203 self.models_ = {} 204 self.best_model_ = None 205 self.random_state = random_state 206 self.estimators = estimators 207 self.preprocess = preprocess 208 self.n_layers = n_layers 209 self.h = h 210 super().__init__( 211 obj=obj, 212 n_hidden_features=n_hidden_features, 213 activation_name=activation_name, 214 a=a, 215 nodes_sim=nodes_sim, 216 bias=bias, 217 dropout=dropout, 218 direct_link=direct_link, 219 n_clusters=n_clusters, 220 cluster_encode=cluster_encode, 221 type_clust=type_clust, 222 type_scaling=type_scaling, 223 seed=seed, 224 backend=backend, 225 lags=lags, 226 type_pi=type_pi, 227 block_size=block_size, 228 replications=replications, 229 kernel=kernel, 230 agg=agg, 231 verbose=verbose, 232 show_progress=show_progress, 233 ) 234 if self.replications is not None or self.type_pi == "gaussian": 235 if self.sort_by is None: 236 self.sort_by = "WINKLERSCORE" 237 else: 238 if self.sort_by is None: 239 self.sort_by = "RMSE" 240 241 def fit(self, X_train, X_test, xreg=None, per_series=False, **kwargs): 242 """Fit Regression algorithms to X_train, predict and score on X_test. 243 244 Parameters: 245 246 X_train: array-like or data frame, 247 Training vectors, where rows is the number of samples 248 and columns is the number of features. 249 250 X_test: array-like or data frame, 251 Testing vectors, where rows is the number of samples 252 and columns is the number of features. 253 254 xreg: array-like, optional (default=None) 255 Additional (external) regressors to be passed to self.obj 256 xreg must be in 'increasing' order (most recent observations last) 257 258 per_series: bool, optional (default=False) 259 When set to True, the metrics are computed series by series. 260 261 **kwargs: dict, optional (default=None) 262 Additional parameters to be passed to `fit` method of `obj`. 263 264 Returns: 265 266 scores: Pandas DataFrame 267 Returns metrics of all the models in a Pandas DataFrame. 268 269 predictions: Pandas DataFrame 270 Returns predictions of all the models in a Pandas DataFrame. 271 272 """ 273 R2 = [] 274 ADJR2 = [] 275 ME = [] 276 MPL = [] 277 RMSE = [] 278 MAE = [] 279 MPE = [] 280 MAPE = [] 281 WINKLERSCORE = [] 282 COVERAGE = [] 283 284 # WIN = [] 285 names = [] 286 TIME = [] 287 predictions = {} 288 289 if self.custom_metric is not None: 290 CUSTOM_METRIC = [] 291 292 if self.h is None: 293 assert X_test is not None, "If h is None, X_test must be provided." 294 295 if isinstance(X_train, np.ndarray): 296 X_train = pd.DataFrame(X_train) 297 X_test = pd.DataFrame(X_test) 298 299 self.series_names = X_train.columns.tolist() 300 301 X_train = convert_df_to_numeric(X_train) 302 X_test = convert_df_to_numeric(X_test) 303 304 numeric_features = X_train.select_dtypes(include=[np.number]).columns 305 categorical_features = X_train.select_dtypes(include=["object"]).columns 306 307 categorical_low, categorical_high = get_card_split( 308 X_train, categorical_features 309 ) 310 311 if self.preprocess: 312 preprocessor = ColumnTransformer( 313 transformers=[ 314 ("numeric", numeric_transformer, numeric_features), 315 ( 316 "categorical_low", 317 categorical_transformer_low, 318 categorical_low, 319 ), 320 ( 321 "categorical_high", 322 categorical_transformer_high, 323 categorical_high, 324 ), 325 ] 326 ) 327 328 # baselines (Classical MTS) ---- 329 for i, name in enumerate(["ARIMA", "ETS", "Theta", "VAR", "VECM"]): 330 try: 331 start = time.time() 332 regr = ClassicalMTS(model=name) 333 regr.fit(X_train, **kwargs) 334 self.models_[name] = regr 335 if self.h is None: 336 X_pred = regr.predict(h=X_test.shape[0], **kwargs) 337 else: 338 assert self.h > 0, "h must be > 0" 339 X_pred = regr.predict(h=self.h, **kwargs) 340 try: 341 X_test = X_test[0 : self.h, :] 342 except Exception as e: 343 X_test = X_test.iloc[0 : self.h, :] 344 345 rmse = mean_errors( 346 actual=X_test, 347 pred=X_pred, 348 scoring="root_mean_squared_error", 349 per_series=per_series, 350 ) 351 mae = mean_errors( 352 actual=X_test, 353 pred=X_pred, 354 scoring="mean_absolute_error", 355 per_series=per_series, 356 ) 357 mpl = mean_errors( 358 actual=X_test, 359 pred=X_pred, 360 scoring="mean_pinball_loss", 361 per_series=per_series, 362 ) 363 except Exception: 364 365 continue 366 367 names.append(name) 368 RMSE.append(rmse) 369 MAE.append(mae) 370 MPL.append(mpl) 371 372 if self.custom_metric is not None: 373 try: 374 if self.h is None: 375 custom_metric = self.custom_metric(X_test, X_pred) 376 else: 377 custom_metric = self.custom_metric(X_test_h, X_pred) 378 CUSTOM_METRIC.append(custom_metric) 379 except Exception as e: 380 custom_metric = np.iinfo(np.float32).max 381 CUSTOM_METRIC.append(np.iinfo(np.float32).max) 382 383 if (self.replications is not None) or (self.type_pi == "gaussian"): 384 if per_series == False: 385 winklerscore = winkler_score(obj=X_pred, actual=X_test, level=95) 386 coveragecalc = coverage(X_pred, X_test, level=95) 387 else: 388 winklerscore = winkler_score( 389 obj=X_pred, actual=X_test, level=95, per_series=True 390 ) 391 coveragecalc = coverage(X_pred, X_test, level=95, per_series=True) 392 WINKLERSCORE.append(winklerscore) 393 COVERAGE.append(coveragecalc) 394 TIME.append(time.time() - start) 395 396 if self.estimators == "all": 397 if self.n_layers <= 1: 398 self.regressors = REGRESSORSMTS 399 else: 400 self.regressors = DEEPREGRESSORSMTS 401 else: 402 if self.n_layers <= 1: 403 self.regressors = [ 404 ("MTS(" + est[0] + ")", est[1]) 405 for est in all_estimators() 406 if ( 407 issubclass(est[1], RegressorMixin) 408 and (est[0] in self.estimators) 409 ) 410 ] 411 else: # self.n_layers > 1 412 self.regressors = [ 413 ("DeepMTS(" + est[0] + ")", est[1]) 414 for est in all_estimators() 415 if ( 416 issubclass(est[1], RegressorMixin) 417 and (est[0] in self.estimators) 418 ) 419 ] 420 421 if self.preprocess is True: 422 for name, model in tqdm(self.regressors): # do parallel exec 423 start = time.time() 424 try: 425 if "random_state" in model().get_params().keys(): 426 pipe = Pipeline( 427 steps=[ 428 ("preprocessor", preprocessor), 429 ( 430 "regressor", 431 DeepMTS( 432 obj=model( 433 random_state=self.random_state, 434 **kwargs, 435 ), 436 n_layers=self.n_layers, 437 n_hidden_features=self.n_hidden_features, 438 activation_name=self.activation_name, 439 a=self.a, 440 nodes_sim=self.nodes_sim, 441 bias=self.bias, 442 dropout=self.dropout, 443 direct_link=self.direct_link, 444 n_clusters=self.n_clusters, 445 cluster_encode=self.cluster_encode, 446 type_clust=self.type_clust, 447 type_scaling=self.type_scaling, 448 lags=self.lags, 449 type_pi=self.type_pi, 450 block_size=self.block_size, 451 replications=self.replications, 452 kernel=self.kernel, 453 agg=self.agg, 454 seed=self.seed, 455 backend=self.backend, 456 show_progress=self.show_progress, 457 ), 458 ), 459 ] 460 ) 461 else: # "random_state" in model().get_params().keys() 462 pipe = Pipeline( 463 steps=[ 464 ("preprocessor", preprocessor), 465 ( 466 "regressor", 467 DeepMTS( 468 obj=model(**kwargs), 469 n_layers=self.n_layers, 470 n_hidden_features=self.n_hidden_features, 471 activation_name=self.activation_name, 472 a=self.a, 473 nodes_sim=self.nodes_sim, 474 bias=self.bias, 475 dropout=self.dropout, 476 direct_link=self.direct_link, 477 n_clusters=self.n_clusters, 478 cluster_encode=self.cluster_encode, 479 type_clust=self.type_clust, 480 type_scaling=self.type_scaling, 481 lags=self.lags, 482 type_pi=self.type_pi, 483 block_size=self.block_size, 484 replications=self.replications, 485 kernel=self.kernel, 486 agg=self.agg, 487 seed=self.seed, 488 backend=self.backend, 489 show_progress=self.show_progress, 490 ), 491 ), 492 ] 493 ) 494 495 pipe.fit(X_train, **kwargs) 496 # pipe.fit(X_train, xreg=xreg) 497 498 self.models_[name] = pipe 499 500 if self.h is None: 501 X_pred = pipe["regressor"].predict(h=self.h, **kwargs) 502 else: 503 assert self.h > 0, "h must be > 0" 504 X_pred = pipe["regressor"].predict(h=self.h, **kwargs) 505 506 if (self.replications is not None) or (self.type_pi == "gaussian"): 507 rmse = mean_errors( 508 actual=X_test, 509 pred=X_pred, 510 scoring="root_mean_squared_error", 511 per_series=per_series, 512 ) 513 mae = mean_errors( 514 actual=X_test, 515 pred=X_pred, 516 scoring="mean_absolute_error", 517 per_series=per_series, 518 ) 519 mpl = mean_errors( 520 actual=X_test, 521 pred=X_pred, 522 scoring="mean_pinball_loss", 523 per_series=per_series, 524 ) 525 winklerscore = winkler_score( 526 obj=X_pred, 527 actual=X_test, 528 level=95, 529 per_series=per_series, 530 ) 531 coveragecalc = coverage( 532 X_pred, X_test, level=95, per_series=per_series 533 ) 534 else: 535 rmse = mean_errors( 536 actual=X_test, 537 pred=X_pred, 538 scoring="root_mean_squared_error", 539 per_series=per_series, 540 ) 541 mae = mean_errors( 542 actual=X_test, 543 pred=X_pred, 544 scoring="mean_absolute_error", 545 per_series=per_series, 546 ) 547 mpl = mean_errors( 548 actual=X_test, 549 pred=X_pred, 550 scoring="mean_pinball_loss", 551 per_series=per_series, 552 ) 553 554 names.append(name) 555 RMSE.append(rmse) 556 MAE.append(mae) 557 MPL.append(mpl) 558 559 if (self.replications is not None) or (self.type_pi == "gaussian"): 560 WINKLERSCORE.append(winklerscore) 561 COVERAGE.append(coveragecalc) 562 TIME.append(time.time() - start) 563 564 if self.custom_metric is not None: 565 try: 566 custom_metric = self.custom_metric(X_test, X_pred) 567 CUSTOM_METRIC.append(custom_metric) 568 except Exception as e: 569 custom_metric = np.iinfo(np.float32).max 570 CUSTOM_METRIC.append(custom_metric) 571 572 if self.verbose > 0: 573 if (self.replications is not None) or ( 574 self.type_pi == "gaussian" 575 ): 576 scores_verbose = { 577 "Model": name, 578 "RMSE": rmse, 579 "MAE": mae, 580 "MPL": mpl, 581 "WINKLERSCORE": winklerscore, 582 "COVERAGE": coveragecalc, 583 "Time taken": time.time() - start, 584 } 585 else: 586 scores_verbose = { 587 "Model": name, 588 "RMSE": rmse, 589 "MAE": mae, 590 "MPL": mpl, 591 "Time taken": time.time() - start, 592 } 593 594 if self.custom_metric is not None: 595 scores_verbose["Custom metric"] = custom_metric 596 597 if self.predictions: 598 predictions[name] = X_pred 599 except Exception as exception: 600 if self.ignore_warnings is False: 601 print(name + " model failed to execute") 602 print(exception) 603 604 else: # no preprocessing 605 606 for name, model in tqdm(self.regressors): # do parallel exec 607 start = time.time() 608 try: 609 if "random_state" in model().get_params().keys(): 610 pipe = DeepMTS( 611 obj=model(random_state=self.random_state, **kwargs), 612 n_layers=self.n_layers, 613 n_hidden_features=self.n_hidden_features, 614 activation_name=self.activation_name, 615 a=self.a, 616 nodes_sim=self.nodes_sim, 617 bias=self.bias, 618 dropout=self.dropout, 619 direct_link=self.direct_link, 620 n_clusters=self.n_clusters, 621 cluster_encode=self.cluster_encode, 622 type_clust=self.type_clust, 623 type_scaling=self.type_scaling, 624 lags=self.lags, 625 type_pi=self.type_pi, 626 block_size=self.block_size, 627 replications=self.replications, 628 kernel=self.kernel, 629 agg=self.agg, 630 seed=self.seed, 631 backend=self.backend, 632 show_progress=self.show_progress, 633 ) 634 else: 635 pipe = DeepMTS( 636 obj=model(**kwargs), 637 n_layers=self.n_layers, 638 n_hidden_features=self.n_hidden_features, 639 activation_name=self.activation_name, 640 a=self.a, 641 nodes_sim=self.nodes_sim, 642 bias=self.bias, 643 dropout=self.dropout, 644 direct_link=self.direct_link, 645 n_clusters=self.n_clusters, 646 cluster_encode=self.cluster_encode, 647 type_clust=self.type_clust, 648 type_scaling=self.type_scaling, 649 lags=self.lags, 650 type_pi=self.type_pi, 651 block_size=self.block_size, 652 replications=self.replications, 653 kernel=self.kernel, 654 agg=self.agg, 655 seed=self.seed, 656 backend=self.backend, 657 show_progress=self.show_progress, 658 ) 659 660 pipe.fit(X_train, xreg, **kwargs) 661 # pipe.fit(X_train, xreg=xreg) # DO xreg like in `ahead` 662 663 self.models_[name] = pipe 664 665 if self.preprocess is True: 666 if self.h is None: 667 X_pred = pipe["regressor"].predict( 668 h=X_test.shape[0], **kwargs 669 ) 670 else: 671 assert ( 672 self.h > 0 and self.h <= X_test.shape[0] 673 ), "h must be > 0 and < X_test.shape[0]" 674 X_pred = pipe["regressor"].predict(h=self.h, **kwargs) 675 676 else: 677 678 if self.h is None: 679 X_pred = pipe.predict( 680 h=X_test.shape[0], 681 **kwargs, 682 # X_pred = pipe.predict(h=X_test.shape[0], new_xreg=new_xreg) ## DO xreg like in `ahead` 683 ) 684 else: 685 assert ( 686 self.h > 0 and self.h <= X_test.shape[0] 687 ), "h must be > 0 and < X_test.shape[0]" 688 X_pred = pipe.predict(h=self.h, **kwargs) 689 690 if self.h is None: 691 if (self.replications is not None) or ( 692 self.type_pi == "gaussian" 693 ): 694 rmse = mean_errors( 695 actual=X_test, 696 pred=X_pred.mean, 697 scoring="root_mean_squared_error", 698 per_series=per_series, 699 ) 700 mae = mean_errors( 701 actual=X_test, 702 pred=X_pred.mean, 703 scoring="mean_absolute_error", 704 per_series=per_series, 705 ) 706 mpl = mean_errors( 707 actual=X_test, 708 pred=X_pred.mean, 709 scoring="mean_pinball_loss", 710 per_series=per_series, 711 ) 712 winklerscore = winkler_score( 713 obj=X_pred, 714 actual=X_test, 715 level=95, 716 per_series=per_series, 717 ) 718 coveragecalc = coverage( 719 X_pred, X_test, level=95, per_series=per_series 720 ) 721 else: # no prediction interval 722 rmse = mean_errors( 723 actual=X_test, 724 pred=X_pred, 725 scoring="root_mean_squared_error", 726 per_series=per_series, 727 ) 728 mae = mean_errors( 729 actual=X_test, 730 pred=X_pred, 731 scoring="mean_absolute_error", 732 per_series=per_series, 733 ) 734 mpl = mean_errors( 735 actual=X_test, 736 pred=X_pred, 737 scoring="mean_pinball_loss", 738 per_series=per_series, 739 ) 740 else: # self.h is not None 741 if (self.replications is not None) or ( 742 self.type_pi == "gaussian" 743 ): 744 745 if isinstance(X_test, pd.DataFrame): 746 X_test_h = X_test.iloc[0 : self.h, :] 747 rmse = mean_errors( 748 actual=X_test_h, 749 pred=X_pred, 750 scoring="root_mean_squared_error", 751 per_series=per_series, 752 ) 753 mae = mean_errors( 754 actual=X_test_h, 755 pred=X_pred, 756 scoring="mean_absolute_error", 757 per_series=per_series, 758 ) 759 mpl = mean_errors( 760 actual=X_test_h, 761 pred=X_pred, 762 scoring="mean_pinball_loss", 763 per_series=per_series, 764 ) 765 winklerscore = winkler_score( 766 obj=X_pred, 767 actual=X_test_h, 768 level=95, 769 per_series=per_series, 770 ) 771 coveragecalc = coverage( 772 X_pred, 773 X_test_h, 774 level=95, 775 per_series=per_series, 776 ) 777 else: 778 X_test_h = X_test[0 : self.h, :] 779 rmse = mean_errors( 780 actual=X_test_h, 781 pred=X_pred, 782 scoring="root_mean_squared_error", 783 per_series=per_series, 784 ) 785 mae = mean_errors( 786 actual=X_test_h, 787 pred=X_pred, 788 scoring="mean_absolute_error", 789 per_series=per_series, 790 ) 791 mpl = mean_errors( 792 actual=X_test_h, 793 pred=X_pred, 794 scoring="mean_pinball_loss", 795 per_series=per_series, 796 ) 797 winklerscore = winkler_score( 798 obj=X_pred, 799 actual=X_test_h, 800 level=95, 801 per_series=per_series, 802 ) 803 coveragecalc = coverage( 804 X_pred, 805 X_test_h, 806 level=95, 807 per_series=per_series, 808 ) 809 else: # no prediction interval 810 811 if isinstance(X_test, pd.DataFrame): 812 X_test_h = X_test.iloc[0 : self.h, :] 813 rmse = mean_errors( 814 actual=X_test_h, 815 pred=X_pred, 816 scoring="root_mean_squared_error", 817 per_series=per_series, 818 ) 819 mae = mean_errors( 820 actual=X_test_h, 821 pred=X_pred, 822 scoring="mean_absolute_error", 823 per_series=per_series, 824 ) 825 mpl = mean_errors( 826 actual=X_test_h, 827 pred=X_pred, 828 scoring="mean_pinball_loss", 829 per_series=per_series, 830 ) 831 else: 832 X_test_h = X_test[0 : self.h, :] 833 rmse = mean_errors( 834 actual=X_test_h, 835 pred=X_pred, 836 scoring="root_mean_squared_error", 837 per_series=per_series, 838 ) 839 mae = mean_errors( 840 actual=X_test_h, 841 pred=X_pred, 842 scoring="mean_absolute_error", 843 per_series=per_series, 844 ) 845 846 names.append(name) 847 RMSE.append(rmse) 848 MAE.append(mae) 849 MPL.append(mpl) 850 if (self.replications is not None) or (self.type_pi == "gaussian"): 851 WINKLERSCORE.append(winklerscore) 852 COVERAGE.append(coveragecalc) 853 TIME.append(time.time() - start) 854 855 if self.custom_metric is not None: 856 try: 857 if self.h is None: 858 custom_metric = self.custom_metric(X_test, X_pred) 859 else: 860 custom_metric = self.custom_metric(X_test_h, X_pred) 861 CUSTOM_METRIC.append(custom_metric) 862 except Exception as e: 863 custom_metric = np.iinfo(np.float32).max 864 CUSTOM_METRIC.append(np.iinfo(np.float32).max) 865 866 if self.verbose > 0: 867 if (self.replications is not None) or ( 868 self.type_pi == "gaussian" 869 ): 870 scores_verbose = { 871 "Model": name, 872 "RMSE": rmse, 873 "MAE": mae, 874 "MPL": mpl, 875 "WINKLERSCORE": winklerscore, 876 "COVERAGE": coveragecalc, 877 "Time taken": time.time() - start, 878 } 879 else: 880 scores_verbose = { 881 "Model": name, 882 "RMSE": rmse, 883 "MAE": mae, 884 "MPL": mpl, 885 "Time taken": time.time() - start, 886 } 887 888 if self.custom_metric is not None: 889 scores_verbose["Custom metric"] = custom_metric 890 891 if self.predictions: 892 predictions[name] = X_pred 893 894 except Exception as exception: 895 if self.ignore_warnings is False: 896 print(name + " model failed to execute") 897 print(exception) 898 899 if (self.replications is not None) or (self.type_pi == "gaussian"): 900 scores = { 901 "Model": names, 902 "RMSE": RMSE, 903 "MAE": MAE, 904 "MPL": MPL, 905 "WINKLERSCORE": WINKLERSCORE, 906 "COVERAGE": COVERAGE, 907 "Time Taken": TIME, 908 } 909 else: 910 scores = { 911 "Model": names, 912 "RMSE": RMSE, 913 "MAE": MAE, 914 "MPL": MPL, 915 "Time Taken": TIME, 916 } 917 918 if self.custom_metric is not None: 919 scores["Custom metric"] = CUSTOM_METRIC 920 921 if per_series: 922 scores = dict_to_dataframe_series(scores, self.series_names) 923 else: 924 scores = pd.DataFrame(scores) 925 926 try: # case per_series, can't be sorted 927 scores = scores.sort_values(by=self.sort_by, ascending=True).set_index( 928 "Model" 929 ) 930 931 self.best_model_ = self.models_[scores.index[0]] 932 except Exception as e: 933 pass 934 935 if self.predictions is True: 936 937 return scores, predictions 938 939 return scores 940 941 def get_best_model(self): 942 """ 943 This function returns the best model pipeline based on the sort_by metric. 944 945 Returns: 946 947 best_model: object, 948 Returns the best model pipeline based on the sort_by metric. 949 950 """ 951 return self.best_model_ 952 953 def provide_models(self, X_train, X_test): 954 """ 955 This function returns all the model objects trained in fit function. 956 If fit is not called already, then we call fit and then return the models. 957 958 Parameters: 959 960 X_train : array-like, 961 Training vectors, where rows is the number of samples 962 and columns is the number of features. 963 964 X_test : array-like, 965 Testing vectors, where rows is the number of samples 966 and columns is the number of features. 967 968 Returns: 969 970 models: dict-object, 971 Returns a dictionary with each model pipeline as value 972 with key as name of models. 973 974 """ 975 if self.h is None: 976 if len(self.models_.keys()) == 0: 977 self.fit(X_train, X_test) 978 else: 979 if len(self.models_.keys()) == 0: 980 if isinstance(X_test, pd.DataFrame): 981 self.fit(X_train, X_test.iloc[0 : self.h, :]) 982 else: 983 self.fit(X_train, X_test[0 : self.h, :]) 984 985 return self.models_
Fitting -- almost -- all the regression algorithms with layers of nnetsauce's CustomRegressor to multivariate time series and returning their scores.
Parameters:
verbose: int, optional (default=0)
Any positive number for verbosity.
ignore_warnings: bool, optional (default=True)
When set to True, the warning related to algorigms that are not
able to run are ignored.
custom_metric: function, optional (default=None)
When function is provided, models are evaluated based on the custom
evaluation metric provided.
predictions: bool, optional (default=False)
When set to True, the predictions of all the models models are returned as dataframe.
sort_by: string, optional (default='RMSE')
Sort models by a metric. Available options are 'RMSE', 'MAE', 'MPL', 'MPE', 'MAPE',
'R-Squared', 'Adjusted R-Squared' or a custom metric identified by its name and
provided by custom_metric.
random_state: int, optional (default=42)
Reproducibiility seed.
estimators: list, optional (default='all')
list of Estimators (regression algorithms) names or just 'all' (default='all')
preprocess: bool, preprocessing is done when set to True
n_layers: int, optional (default=1)
Number of layers in the network. When set to 1, the model is equivalent to a MTS.
h: int, optional (default=None)
Number of steps ahead to predict (when used, must be > 0 and < X_test.shape[0]).
All the other parameters are the same as MTS's.
Attributes:
models_: dict-object
Returns a dictionary with each model pipeline as value
with key as name of models.
best_model_: object
Returns the best model pipeline based on the sort_by metric.
Examples:
See https://thierrymoudiki.github.io/blog/2023/10/29/python/quasirandomizednn/MTS-LazyPredict
241 def fit(self, X_train, X_test, xreg=None, per_series=False, **kwargs): 242 """Fit Regression algorithms to X_train, predict and score on X_test. 243 244 Parameters: 245 246 X_train: array-like or data frame, 247 Training vectors, where rows is the number of samples 248 and columns is the number of features. 249 250 X_test: array-like or data frame, 251 Testing vectors, where rows is the number of samples 252 and columns is the number of features. 253 254 xreg: array-like, optional (default=None) 255 Additional (external) regressors to be passed to self.obj 256 xreg must be in 'increasing' order (most recent observations last) 257 258 per_series: bool, optional (default=False) 259 When set to True, the metrics are computed series by series. 260 261 **kwargs: dict, optional (default=None) 262 Additional parameters to be passed to `fit` method of `obj`. 263 264 Returns: 265 266 scores: Pandas DataFrame 267 Returns metrics of all the models in a Pandas DataFrame. 268 269 predictions: Pandas DataFrame 270 Returns predictions of all the models in a Pandas DataFrame. 271 272 """ 273 R2 = [] 274 ADJR2 = [] 275 ME = [] 276 MPL = [] 277 RMSE = [] 278 MAE = [] 279 MPE = [] 280 MAPE = [] 281 WINKLERSCORE = [] 282 COVERAGE = [] 283 284 # WIN = [] 285 names = [] 286 TIME = [] 287 predictions = {} 288 289 if self.custom_metric is not None: 290 CUSTOM_METRIC = [] 291 292 if self.h is None: 293 assert X_test is not None, "If h is None, X_test must be provided." 294 295 if isinstance(X_train, np.ndarray): 296 X_train = pd.DataFrame(X_train) 297 X_test = pd.DataFrame(X_test) 298 299 self.series_names = X_train.columns.tolist() 300 301 X_train = convert_df_to_numeric(X_train) 302 X_test = convert_df_to_numeric(X_test) 303 304 numeric_features = X_train.select_dtypes(include=[np.number]).columns 305 categorical_features = X_train.select_dtypes(include=["object"]).columns 306 307 categorical_low, categorical_high = get_card_split( 308 X_train, categorical_features 309 ) 310 311 if self.preprocess: 312 preprocessor = ColumnTransformer( 313 transformers=[ 314 ("numeric", numeric_transformer, numeric_features), 315 ( 316 "categorical_low", 317 categorical_transformer_low, 318 categorical_low, 319 ), 320 ( 321 "categorical_high", 322 categorical_transformer_high, 323 categorical_high, 324 ), 325 ] 326 ) 327 328 # baselines (Classical MTS) ---- 329 for i, name in enumerate(["ARIMA", "ETS", "Theta", "VAR", "VECM"]): 330 try: 331 start = time.time() 332 regr = ClassicalMTS(model=name) 333 regr.fit(X_train, **kwargs) 334 self.models_[name] = regr 335 if self.h is None: 336 X_pred = regr.predict(h=X_test.shape[0], **kwargs) 337 else: 338 assert self.h > 0, "h must be > 0" 339 X_pred = regr.predict(h=self.h, **kwargs) 340 try: 341 X_test = X_test[0 : self.h, :] 342 except Exception as e: 343 X_test = X_test.iloc[0 : self.h, :] 344 345 rmse = mean_errors( 346 actual=X_test, 347 pred=X_pred, 348 scoring="root_mean_squared_error", 349 per_series=per_series, 350 ) 351 mae = mean_errors( 352 actual=X_test, 353 pred=X_pred, 354 scoring="mean_absolute_error", 355 per_series=per_series, 356 ) 357 mpl = mean_errors( 358 actual=X_test, 359 pred=X_pred, 360 scoring="mean_pinball_loss", 361 per_series=per_series, 362 ) 363 except Exception: 364 365 continue 366 367 names.append(name) 368 RMSE.append(rmse) 369 MAE.append(mae) 370 MPL.append(mpl) 371 372 if self.custom_metric is not None: 373 try: 374 if self.h is None: 375 custom_metric = self.custom_metric(X_test, X_pred) 376 else: 377 custom_metric = self.custom_metric(X_test_h, X_pred) 378 CUSTOM_METRIC.append(custom_metric) 379 except Exception as e: 380 custom_metric = np.iinfo(np.float32).max 381 CUSTOM_METRIC.append(np.iinfo(np.float32).max) 382 383 if (self.replications is not None) or (self.type_pi == "gaussian"): 384 if per_series == False: 385 winklerscore = winkler_score(obj=X_pred, actual=X_test, level=95) 386 coveragecalc = coverage(X_pred, X_test, level=95) 387 else: 388 winklerscore = winkler_score( 389 obj=X_pred, actual=X_test, level=95, per_series=True 390 ) 391 coveragecalc = coverage(X_pred, X_test, level=95, per_series=True) 392 WINKLERSCORE.append(winklerscore) 393 COVERAGE.append(coveragecalc) 394 TIME.append(time.time() - start) 395 396 if self.estimators == "all": 397 if self.n_layers <= 1: 398 self.regressors = REGRESSORSMTS 399 else: 400 self.regressors = DEEPREGRESSORSMTS 401 else: 402 if self.n_layers <= 1: 403 self.regressors = [ 404 ("MTS(" + est[0] + ")", est[1]) 405 for est in all_estimators() 406 if ( 407 issubclass(est[1], RegressorMixin) 408 and (est[0] in self.estimators) 409 ) 410 ] 411 else: # self.n_layers > 1 412 self.regressors = [ 413 ("DeepMTS(" + est[0] + ")", est[1]) 414 for est in all_estimators() 415 if ( 416 issubclass(est[1], RegressorMixin) 417 and (est[0] in self.estimators) 418 ) 419 ] 420 421 if self.preprocess is True: 422 for name, model in tqdm(self.regressors): # do parallel exec 423 start = time.time() 424 try: 425 if "random_state" in model().get_params().keys(): 426 pipe = Pipeline( 427 steps=[ 428 ("preprocessor", preprocessor), 429 ( 430 "regressor", 431 DeepMTS( 432 obj=model( 433 random_state=self.random_state, 434 **kwargs, 435 ), 436 n_layers=self.n_layers, 437 n_hidden_features=self.n_hidden_features, 438 activation_name=self.activation_name, 439 a=self.a, 440 nodes_sim=self.nodes_sim, 441 bias=self.bias, 442 dropout=self.dropout, 443 direct_link=self.direct_link, 444 n_clusters=self.n_clusters, 445 cluster_encode=self.cluster_encode, 446 type_clust=self.type_clust, 447 type_scaling=self.type_scaling, 448 lags=self.lags, 449 type_pi=self.type_pi, 450 block_size=self.block_size, 451 replications=self.replications, 452 kernel=self.kernel, 453 agg=self.agg, 454 seed=self.seed, 455 backend=self.backend, 456 show_progress=self.show_progress, 457 ), 458 ), 459 ] 460 ) 461 else: # "random_state" in model().get_params().keys() 462 pipe = Pipeline( 463 steps=[ 464 ("preprocessor", preprocessor), 465 ( 466 "regressor", 467 DeepMTS( 468 obj=model(**kwargs), 469 n_layers=self.n_layers, 470 n_hidden_features=self.n_hidden_features, 471 activation_name=self.activation_name, 472 a=self.a, 473 nodes_sim=self.nodes_sim, 474 bias=self.bias, 475 dropout=self.dropout, 476 direct_link=self.direct_link, 477 n_clusters=self.n_clusters, 478 cluster_encode=self.cluster_encode, 479 type_clust=self.type_clust, 480 type_scaling=self.type_scaling, 481 lags=self.lags, 482 type_pi=self.type_pi, 483 block_size=self.block_size, 484 replications=self.replications, 485 kernel=self.kernel, 486 agg=self.agg, 487 seed=self.seed, 488 backend=self.backend, 489 show_progress=self.show_progress, 490 ), 491 ), 492 ] 493 ) 494 495 pipe.fit(X_train, **kwargs) 496 # pipe.fit(X_train, xreg=xreg) 497 498 self.models_[name] = pipe 499 500 if self.h is None: 501 X_pred = pipe["regressor"].predict(h=self.h, **kwargs) 502 else: 503 assert self.h > 0, "h must be > 0" 504 X_pred = pipe["regressor"].predict(h=self.h, **kwargs) 505 506 if (self.replications is not None) or (self.type_pi == "gaussian"): 507 rmse = mean_errors( 508 actual=X_test, 509 pred=X_pred, 510 scoring="root_mean_squared_error", 511 per_series=per_series, 512 ) 513 mae = mean_errors( 514 actual=X_test, 515 pred=X_pred, 516 scoring="mean_absolute_error", 517 per_series=per_series, 518 ) 519 mpl = mean_errors( 520 actual=X_test, 521 pred=X_pred, 522 scoring="mean_pinball_loss", 523 per_series=per_series, 524 ) 525 winklerscore = winkler_score( 526 obj=X_pred, 527 actual=X_test, 528 level=95, 529 per_series=per_series, 530 ) 531 coveragecalc = coverage( 532 X_pred, X_test, level=95, per_series=per_series 533 ) 534 else: 535 rmse = mean_errors( 536 actual=X_test, 537 pred=X_pred, 538 scoring="root_mean_squared_error", 539 per_series=per_series, 540 ) 541 mae = mean_errors( 542 actual=X_test, 543 pred=X_pred, 544 scoring="mean_absolute_error", 545 per_series=per_series, 546 ) 547 mpl = mean_errors( 548 actual=X_test, 549 pred=X_pred, 550 scoring="mean_pinball_loss", 551 per_series=per_series, 552 ) 553 554 names.append(name) 555 RMSE.append(rmse) 556 MAE.append(mae) 557 MPL.append(mpl) 558 559 if (self.replications is not None) or (self.type_pi == "gaussian"): 560 WINKLERSCORE.append(winklerscore) 561 COVERAGE.append(coveragecalc) 562 TIME.append(time.time() - start) 563 564 if self.custom_metric is not None: 565 try: 566 custom_metric = self.custom_metric(X_test, X_pred) 567 CUSTOM_METRIC.append(custom_metric) 568 except Exception as e: 569 custom_metric = np.iinfo(np.float32).max 570 CUSTOM_METRIC.append(custom_metric) 571 572 if self.verbose > 0: 573 if (self.replications is not None) or ( 574 self.type_pi == "gaussian" 575 ): 576 scores_verbose = { 577 "Model": name, 578 "RMSE": rmse, 579 "MAE": mae, 580 "MPL": mpl, 581 "WINKLERSCORE": winklerscore, 582 "COVERAGE": coveragecalc, 583 "Time taken": time.time() - start, 584 } 585 else: 586 scores_verbose = { 587 "Model": name, 588 "RMSE": rmse, 589 "MAE": mae, 590 "MPL": mpl, 591 "Time taken": time.time() - start, 592 } 593 594 if self.custom_metric is not None: 595 scores_verbose["Custom metric"] = custom_metric 596 597 if self.predictions: 598 predictions[name] = X_pred 599 except Exception as exception: 600 if self.ignore_warnings is False: 601 print(name + " model failed to execute") 602 print(exception) 603 604 else: # no preprocessing 605 606 for name, model in tqdm(self.regressors): # do parallel exec 607 start = time.time() 608 try: 609 if "random_state" in model().get_params().keys(): 610 pipe = DeepMTS( 611 obj=model(random_state=self.random_state, **kwargs), 612 n_layers=self.n_layers, 613 n_hidden_features=self.n_hidden_features, 614 activation_name=self.activation_name, 615 a=self.a, 616 nodes_sim=self.nodes_sim, 617 bias=self.bias, 618 dropout=self.dropout, 619 direct_link=self.direct_link, 620 n_clusters=self.n_clusters, 621 cluster_encode=self.cluster_encode, 622 type_clust=self.type_clust, 623 type_scaling=self.type_scaling, 624 lags=self.lags, 625 type_pi=self.type_pi, 626 block_size=self.block_size, 627 replications=self.replications, 628 kernel=self.kernel, 629 agg=self.agg, 630 seed=self.seed, 631 backend=self.backend, 632 show_progress=self.show_progress, 633 ) 634 else: 635 pipe = DeepMTS( 636 obj=model(**kwargs), 637 n_layers=self.n_layers, 638 n_hidden_features=self.n_hidden_features, 639 activation_name=self.activation_name, 640 a=self.a, 641 nodes_sim=self.nodes_sim, 642 bias=self.bias, 643 dropout=self.dropout, 644 direct_link=self.direct_link, 645 n_clusters=self.n_clusters, 646 cluster_encode=self.cluster_encode, 647 type_clust=self.type_clust, 648 type_scaling=self.type_scaling, 649 lags=self.lags, 650 type_pi=self.type_pi, 651 block_size=self.block_size, 652 replications=self.replications, 653 kernel=self.kernel, 654 agg=self.agg, 655 seed=self.seed, 656 backend=self.backend, 657 show_progress=self.show_progress, 658 ) 659 660 pipe.fit(X_train, xreg, **kwargs) 661 # pipe.fit(X_train, xreg=xreg) # DO xreg like in `ahead` 662 663 self.models_[name] = pipe 664 665 if self.preprocess is True: 666 if self.h is None: 667 X_pred = pipe["regressor"].predict( 668 h=X_test.shape[0], **kwargs 669 ) 670 else: 671 assert ( 672 self.h > 0 and self.h <= X_test.shape[0] 673 ), "h must be > 0 and < X_test.shape[0]" 674 X_pred = pipe["regressor"].predict(h=self.h, **kwargs) 675 676 else: 677 678 if self.h is None: 679 X_pred = pipe.predict( 680 h=X_test.shape[0], 681 **kwargs, 682 # X_pred = pipe.predict(h=X_test.shape[0], new_xreg=new_xreg) ## DO xreg like in `ahead` 683 ) 684 else: 685 assert ( 686 self.h > 0 and self.h <= X_test.shape[0] 687 ), "h must be > 0 and < X_test.shape[0]" 688 X_pred = pipe.predict(h=self.h, **kwargs) 689 690 if self.h is None: 691 if (self.replications is not None) or ( 692 self.type_pi == "gaussian" 693 ): 694 rmse = mean_errors( 695 actual=X_test, 696 pred=X_pred.mean, 697 scoring="root_mean_squared_error", 698 per_series=per_series, 699 ) 700 mae = mean_errors( 701 actual=X_test, 702 pred=X_pred.mean, 703 scoring="mean_absolute_error", 704 per_series=per_series, 705 ) 706 mpl = mean_errors( 707 actual=X_test, 708 pred=X_pred.mean, 709 scoring="mean_pinball_loss", 710 per_series=per_series, 711 ) 712 winklerscore = winkler_score( 713 obj=X_pred, 714 actual=X_test, 715 level=95, 716 per_series=per_series, 717 ) 718 coveragecalc = coverage( 719 X_pred, X_test, level=95, per_series=per_series 720 ) 721 else: # no prediction interval 722 rmse = mean_errors( 723 actual=X_test, 724 pred=X_pred, 725 scoring="root_mean_squared_error", 726 per_series=per_series, 727 ) 728 mae = mean_errors( 729 actual=X_test, 730 pred=X_pred, 731 scoring="mean_absolute_error", 732 per_series=per_series, 733 ) 734 mpl = mean_errors( 735 actual=X_test, 736 pred=X_pred, 737 scoring="mean_pinball_loss", 738 per_series=per_series, 739 ) 740 else: # self.h is not None 741 if (self.replications is not None) or ( 742 self.type_pi == "gaussian" 743 ): 744 745 if isinstance(X_test, pd.DataFrame): 746 X_test_h = X_test.iloc[0 : self.h, :] 747 rmse = mean_errors( 748 actual=X_test_h, 749 pred=X_pred, 750 scoring="root_mean_squared_error", 751 per_series=per_series, 752 ) 753 mae = mean_errors( 754 actual=X_test_h, 755 pred=X_pred, 756 scoring="mean_absolute_error", 757 per_series=per_series, 758 ) 759 mpl = mean_errors( 760 actual=X_test_h, 761 pred=X_pred, 762 scoring="mean_pinball_loss", 763 per_series=per_series, 764 ) 765 winklerscore = winkler_score( 766 obj=X_pred, 767 actual=X_test_h, 768 level=95, 769 per_series=per_series, 770 ) 771 coveragecalc = coverage( 772 X_pred, 773 X_test_h, 774 level=95, 775 per_series=per_series, 776 ) 777 else: 778 X_test_h = X_test[0 : self.h, :] 779 rmse = mean_errors( 780 actual=X_test_h, 781 pred=X_pred, 782 scoring="root_mean_squared_error", 783 per_series=per_series, 784 ) 785 mae = mean_errors( 786 actual=X_test_h, 787 pred=X_pred, 788 scoring="mean_absolute_error", 789 per_series=per_series, 790 ) 791 mpl = mean_errors( 792 actual=X_test_h, 793 pred=X_pred, 794 scoring="mean_pinball_loss", 795 per_series=per_series, 796 ) 797 winklerscore = winkler_score( 798 obj=X_pred, 799 actual=X_test_h, 800 level=95, 801 per_series=per_series, 802 ) 803 coveragecalc = coverage( 804 X_pred, 805 X_test_h, 806 level=95, 807 per_series=per_series, 808 ) 809 else: # no prediction interval 810 811 if isinstance(X_test, pd.DataFrame): 812 X_test_h = X_test.iloc[0 : self.h, :] 813 rmse = mean_errors( 814 actual=X_test_h, 815 pred=X_pred, 816 scoring="root_mean_squared_error", 817 per_series=per_series, 818 ) 819 mae = mean_errors( 820 actual=X_test_h, 821 pred=X_pred, 822 scoring="mean_absolute_error", 823 per_series=per_series, 824 ) 825 mpl = mean_errors( 826 actual=X_test_h, 827 pred=X_pred, 828 scoring="mean_pinball_loss", 829 per_series=per_series, 830 ) 831 else: 832 X_test_h = X_test[0 : self.h, :] 833 rmse = mean_errors( 834 actual=X_test_h, 835 pred=X_pred, 836 scoring="root_mean_squared_error", 837 per_series=per_series, 838 ) 839 mae = mean_errors( 840 actual=X_test_h, 841 pred=X_pred, 842 scoring="mean_absolute_error", 843 per_series=per_series, 844 ) 845 846 names.append(name) 847 RMSE.append(rmse) 848 MAE.append(mae) 849 MPL.append(mpl) 850 if (self.replications is not None) or (self.type_pi == "gaussian"): 851 WINKLERSCORE.append(winklerscore) 852 COVERAGE.append(coveragecalc) 853 TIME.append(time.time() - start) 854 855 if self.custom_metric is not None: 856 try: 857 if self.h is None: 858 custom_metric = self.custom_metric(X_test, X_pred) 859 else: 860 custom_metric = self.custom_metric(X_test_h, X_pred) 861 CUSTOM_METRIC.append(custom_metric) 862 except Exception as e: 863 custom_metric = np.iinfo(np.float32).max 864 CUSTOM_METRIC.append(np.iinfo(np.float32).max) 865 866 if self.verbose > 0: 867 if (self.replications is not None) or ( 868 self.type_pi == "gaussian" 869 ): 870 scores_verbose = { 871 "Model": name, 872 "RMSE": rmse, 873 "MAE": mae, 874 "MPL": mpl, 875 "WINKLERSCORE": winklerscore, 876 "COVERAGE": coveragecalc, 877 "Time taken": time.time() - start, 878 } 879 else: 880 scores_verbose = { 881 "Model": name, 882 "RMSE": rmse, 883 "MAE": mae, 884 "MPL": mpl, 885 "Time taken": time.time() - start, 886 } 887 888 if self.custom_metric is not None: 889 scores_verbose["Custom metric"] = custom_metric 890 891 if self.predictions: 892 predictions[name] = X_pred 893 894 except Exception as exception: 895 if self.ignore_warnings is False: 896 print(name + " model failed to execute") 897 print(exception) 898 899 if (self.replications is not None) or (self.type_pi == "gaussian"): 900 scores = { 901 "Model": names, 902 "RMSE": RMSE, 903 "MAE": MAE, 904 "MPL": MPL, 905 "WINKLERSCORE": WINKLERSCORE, 906 "COVERAGE": COVERAGE, 907 "Time Taken": TIME, 908 } 909 else: 910 scores = { 911 "Model": names, 912 "RMSE": RMSE, 913 "MAE": MAE, 914 "MPL": MPL, 915 "Time Taken": TIME, 916 } 917 918 if self.custom_metric is not None: 919 scores["Custom metric"] = CUSTOM_METRIC 920 921 if per_series: 922 scores = dict_to_dataframe_series(scores, self.series_names) 923 else: 924 scores = pd.DataFrame(scores) 925 926 try: # case per_series, can't be sorted 927 scores = scores.sort_values(by=self.sort_by, ascending=True).set_index( 928 "Model" 929 ) 930 931 self.best_model_ = self.models_[scores.index[0]] 932 except Exception as e: 933 pass 934 935 if self.predictions is True: 936 937 return scores, predictions 938 939 return scores
Fit Regression algorithms to X_train, predict and score on X_test.
Parameters:
X_train: array-like or data frame,
Training vectors, where rows is the number of samples
and columns is the number of features.
X_test: array-like or data frame,
Testing vectors, where rows is the number of samples
and columns is the number of features.
xreg: array-like, optional (default=None)
Additional (external) regressors to be passed to self.obj
xreg must be in 'increasing' order (most recent observations last)
per_series: bool, optional (default=False)
When set to True, the metrics are computed series by series.
**kwargs: dict, optional (default=None)
Additional parameters to be passed to `fit` method of `obj`.
Returns:
scores: Pandas DataFrame
Returns metrics of all the models in a Pandas DataFrame.
predictions: Pandas DataFrame
Returns predictions of all the models in a Pandas DataFrame.
953 def provide_models(self, X_train, X_test): 954 """ 955 This function returns all the model objects trained in fit function. 956 If fit is not called already, then we call fit and then return the models. 957 958 Parameters: 959 960 X_train : array-like, 961 Training vectors, where rows is the number of samples 962 and columns is the number of features. 963 964 X_test : array-like, 965 Testing vectors, where rows is the number of samples 966 and columns is the number of features. 967 968 Returns: 969 970 models: dict-object, 971 Returns a dictionary with each model pipeline as value 972 with key as name of models. 973 974 """ 975 if self.h is None: 976 if len(self.models_.keys()) == 0: 977 self.fit(X_train, X_test) 978 else: 979 if len(self.models_.keys()) == 0: 980 if isinstance(X_test, pd.DataFrame): 981 self.fit(X_train, X_test.iloc[0 : self.h, :]) 982 else: 983 self.fit(X_train, X_test[0 : self.h, :]) 984 985 return self.models_
This function returns all the model objects trained in fit function. If fit is not called already, then we call fit and then return the models.
Parameters:
X_train : array-like,
Training vectors, where rows is the number of samples
and columns is the number of features.
X_test : array-like,
Testing vectors, where rows is the number of samples
and columns is the number of features.
Returns:
models: dict-object,
Returns a dictionary with each model pipeline as value
with key as name of models.
18class MLARCH(MTS): 19 """Machine Learning with ARCH effects for time series forecasting 20 21 Parameters: 22 23 model_mean: object of class nnetsauce.MTS 24 Model for mean prediction (default: None, uses obj) 25 26 model_sigma: object of class nnetsauce.MTS 27 Model for residuals volatility prediction (default: None, uses obj) 28 29 model_residuals: object of class nnetsauce.MTS 30 Model for residuals prediction (default: None, uses obj) 31 32 Examples: 33 34 See examples/mlarch.py 35 36 """ 37 def __init__( 38 self, 39 model_mean, 40 model_sigma, 41 model_residuals 42 ): 43 assert isinstance(model_mean, MTS), "model_mean must be an object of class nnetsauce.MTS" 44 assert isinstance(model_sigma, MTS), "model_sigma must be an object of class nnetsauce.MTS" 45 assert isinstance(model_residuals, MTS), "model_residuals must be an object of class nnetsauce.MTS" 46 assert model_sigma.type_pi.startswith("scp") and model_sigma.replications is not None, \ 47 "for now, the models must be conformalized, i.e type_pi must start with 'scp' and replications must be an integer" 48 assert model_residuals.type_pi.startswith("scp") and model_residuals.replications is not None, \ 49 "for now, the models must be conformalized, i.e type_pi must start with 'scp' and replications must be an integer" 50 51 self.model_mean = model_mean 52 self.model_sigma = model_sigma 53 self.model_residuals = model_residuals 54 55 self.mean_residuals_ = None 56 self.mean_residuals_wilcoxon_test_ = None 57 self.mean_residuals_kss_test_ = None 58 self.standardized_residuals_ = None 59 60 61 def fit(self, y): 62 """Fit the MLARCH model to the time series data. 63 64 Parameters 65 ---------- 66 y : array-like of shape (n_samples,) 67 The target time series to be fitted. 68 69 Returns 70 ------- 71 self : object 72 Returns self. 73 74 Notes 75 ----- 76 This method: 77 78 1. Fits the mean model to the time series 79 2. Performs statistical tests on the residuals (Wilcoxon and KPSS) 80 3. Fits the volatility model to the squared residuals 81 4. Computes standardized residuals 82 5. Fits the residuals model to the standardized residuals 83 """ 84 n = len(y) 85 self.model_mean.fit(y.reshape(-1, 1)) 86 # Wilcoxon signed-rank test on residuals (mean = 0) 87 self.mean_residuals_wilcoxon_test_ = stats.wilcoxon(self.model_mean.residuals_) 88 # KPSS test for stationarity on residuals 89 self.mean_residuals_kss_test_ = kpss(self.model_mean.residuals_, regression='c') 90 self.model_sigma.fit(np.log(self.model_mean.residuals_.reshape(-1, 1)**2)) 91 # n//2 here because the model is conformalized 92 fitted_sigma = self.model_sigma.residuals_ + np.log(self.model_mean.residuals_**2)[(n//2):,:] 93 # standardized residuals 94 self.standardized_residuals_ = self.model_mean.residuals_[(n//2):,:]/np.sqrt(np.exp(fitted_sigma)) 95 self.model_residuals.fit(self.standardized_residuals_.reshape(-1, 1)) 96 return self 97 98 99 def predict(self, h=5, level=95): 100 """Predict (probabilistic) future values of the time series. 101 102 Parameters 103 ---------- 104 h : int, default=5 105 The forecast horizon. 106 level : int, default=95 107 The confidence level for prediction intervals. 108 109 Returns 110 ------- 111 DescribeResult : namedtuple 112 A named tuple containing: 113 114 - mean : array-like of shape (h,) 115 The mean forecast. 116 - sims : array-like of shape (h, n_replications) 117 The simulated forecasts. 118 - lower : array-like of shape (h,) 119 The lower bound of the prediction interval. 120 - upper : array-like of shape (h,) 121 The upper bound of the prediction interval. 122 123 Notes 124 ----- 125 This method: 126 1. Generates mean forecasts using the mean model 127 2. Generates standardized residual forecasts using the residuals model 128 3. Generates volatility forecasts using the sigma model 129 4. Combines these forecasts to generate the final predictions 130 5. Computes prediction intervals at the specified confidence level 131 """ 132 DescribeResult = namedtuple( 133 "DescribeResult", ("mean", "sims", "lower", "upper") 134 ) 135 mean_forecast = self.model_mean.predict(h=h).values.ravel() 136 preds_z = self.model_residuals.predict(h=h) 137 preds_sigma = self.model_sigma.predict(h=h) 138 sims_z = preds_z.sims 139 sims_sigma = preds_sigma.sims 140 141 f = [] 142 for i in range(len(sims_z)): 143 f.append(mean_forecast + sims_z[i].values.ravel()*np.sqrt(np.exp(sims_sigma[i].values.ravel()))) 144 145 f = np.asarray(f).T 146 mean_f = np.mean(f, axis=1) 147 alpha = 1 - level/100 148 lower_bound = np.quantile(f, alpha/2, axis=1) 149 upper_bound = np.quantile(f, 1-alpha/2, axis=1) 150 151 return DescribeResult(mean_f, f, 152 lower_bound, upper_bound)
Machine Learning with ARCH effects for time series forecasting
Parameters:
model_mean: object of class nnetsauce.MTS
Model for mean prediction (default: None, uses obj)
model_sigma: object of class nnetsauce.MTS
Model for residuals volatility prediction (default: None, uses obj)
model_residuals: object of class nnetsauce.MTS
Model for residuals prediction (default: None, uses obj)
Examples:
See examples/mlarch.py
61 def fit(self, y): 62 """Fit the MLARCH model to the time series data. 63 64 Parameters 65 ---------- 66 y : array-like of shape (n_samples,) 67 The target time series to be fitted. 68 69 Returns 70 ------- 71 self : object 72 Returns self. 73 74 Notes 75 ----- 76 This method: 77 78 1. Fits the mean model to the time series 79 2. Performs statistical tests on the residuals (Wilcoxon and KPSS) 80 3. Fits the volatility model to the squared residuals 81 4. Computes standardized residuals 82 5. Fits the residuals model to the standardized residuals 83 """ 84 n = len(y) 85 self.model_mean.fit(y.reshape(-1, 1)) 86 # Wilcoxon signed-rank test on residuals (mean = 0) 87 self.mean_residuals_wilcoxon_test_ = stats.wilcoxon(self.model_mean.residuals_) 88 # KPSS test for stationarity on residuals 89 self.mean_residuals_kss_test_ = kpss(self.model_mean.residuals_, regression='c') 90 self.model_sigma.fit(np.log(self.model_mean.residuals_.reshape(-1, 1)**2)) 91 # n//2 here because the model is conformalized 92 fitted_sigma = self.model_sigma.residuals_ + np.log(self.model_mean.residuals_**2)[(n//2):,:] 93 # standardized residuals 94 self.standardized_residuals_ = self.model_mean.residuals_[(n//2):,:]/np.sqrt(np.exp(fitted_sigma)) 95 self.model_residuals.fit(self.standardized_residuals_.reshape(-1, 1)) 96 return self
Fit the MLARCH model to the time series data.
Parameters
y : array-like of shape (n_samples,) The target time series to be fitted.
Returns
self : object Returns self.
Notes
This method:
- Fits the mean model to the time series
- Performs statistical tests on the residuals (Wilcoxon and KPSS)
- Fits the volatility model to the squared residuals
- Computes standardized residuals
- Fits the residuals model to the standardized residuals
99 def predict(self, h=5, level=95): 100 """Predict (probabilistic) future values of the time series. 101 102 Parameters 103 ---------- 104 h : int, default=5 105 The forecast horizon. 106 level : int, default=95 107 The confidence level for prediction intervals. 108 109 Returns 110 ------- 111 DescribeResult : namedtuple 112 A named tuple containing: 113 114 - mean : array-like of shape (h,) 115 The mean forecast. 116 - sims : array-like of shape (h, n_replications) 117 The simulated forecasts. 118 - lower : array-like of shape (h,) 119 The lower bound of the prediction interval. 120 - upper : array-like of shape (h,) 121 The upper bound of the prediction interval. 122 123 Notes 124 ----- 125 This method: 126 1. Generates mean forecasts using the mean model 127 2. Generates standardized residual forecasts using the residuals model 128 3. Generates volatility forecasts using the sigma model 129 4. Combines these forecasts to generate the final predictions 130 5. Computes prediction intervals at the specified confidence level 131 """ 132 DescribeResult = namedtuple( 133 "DescribeResult", ("mean", "sims", "lower", "upper") 134 ) 135 mean_forecast = self.model_mean.predict(h=h).values.ravel() 136 preds_z = self.model_residuals.predict(h=h) 137 preds_sigma = self.model_sigma.predict(h=h) 138 sims_z = preds_z.sims 139 sims_sigma = preds_sigma.sims 140 141 f = [] 142 for i in range(len(sims_z)): 143 f.append(mean_forecast + sims_z[i].values.ravel()*np.sqrt(np.exp(sims_sigma[i].values.ravel()))) 144 145 f = np.asarray(f).T 146 mean_f = np.mean(f, axis=1) 147 alpha = 1 - level/100 148 lower_bound = np.quantile(f, alpha/2, axis=1) 149 upper_bound = np.quantile(f, 1-alpha/2, axis=1) 150 151 return DescribeResult(mean_f, f, 152 lower_bound, upper_bound)
Predict (probabilistic) future values of the time series.
Parameters
h : int, default=5 The forecast horizon. level : int, default=95 The confidence level for prediction intervals.
Returns
DescribeResult : namedtuple A named tuple containing:
- mean : array-like of shape (h,)
The mean forecast.
- sims : array-like of shape (h, n_replications)
The simulated forecasts.
- lower : array-like of shape (h,)
The lower bound of the prediction interval.
- upper : array-like of shape (h,)
The upper bound of the prediction interval.
Notes
This method:
- Generates mean forecasts using the mean model
- Generates standardized residual forecasts using the residuals model
- Generates volatility forecasts using the sigma model
- Combines these forecasts to generate the final predictions
- Computes prediction intervals at the specified confidence level
6class MedianVotingRegressor(VotingRegressor): 7 def predict(self, X): 8 """ 9 Predict using the median of the base regressors' predictions. 10 11 Parameters: 12 X (array-like): Feature matrix for predictions. 13 14 Returns: 15 y_pred (array): Median of predictions from the base regressors. 16 """ 17 predictions = np.asarray( 18 [regressor.predict(X) for regressor in self.estimators_] 19 ) 20 return np.median(predictions, axis=0)
Prediction voting regressor for unfitted estimators.
A voting regressor is an ensemble meta-estimator that fits several base regressors, each on the whole dataset. Then it averages the individual predictions to form a final prediction.
Read more in the :ref:User Guide <voting_regressor>
.
New in version 0.21.
Parameters
estimators : list of (str, estimator) tuples
Invoking the fit
method on the VotingRegressor
will fit clones
of those original estimators that will be stored in the class attribute
self.estimators_
. An estimator can be set to 'drop'
using
set_params()
.
*Changed in version 0.21:*
``'drop'`` is accepted. Using None was deprecated in 0.22 and
support was removed in 0.24.
weights : array-like of shape (n_regressors,), default=None
Sequence of weights (float
or int
) to weight the occurrences of
predicted values before averaging. Uses uniform weights if None
.
n_jobs : int, default=None
The number of jobs to run in parallel for fit
.
None
means 1 unless in a joblib.parallel_backend
context.
-1
means using all processors. See :term:Glossary <n_jobs>
for more details.
verbose : bool, default=False If True, the time elapsed while fitting will be printed as it is completed.
*New in version 0.23.*
Attributes
estimators_ : list of regressors
The collection of fitted sub-estimators as defined in estimators
that are not 'drop'.
named_estimators_ : ~sklearn.utils.Bunch
Attribute to access any fitted sub-estimators by name.
*New in version 0.20.*
n_features_in_ : int
Number of features seen during :term:fit
. Only defined if the
underlying regressor exposes such an attribute when fit.
*New in version 0.24.*
feature_names_in_ : ndarray of shape (n_features_in_
,)
Names of features seen during :term:fit
. Only defined if the
underlying estimators expose such an attribute when fit.
*New in version 1.0.*
See Also
VotingClassifier : Soft Voting/Majority Rule classifier.
Examples
>>> import numpy as np
>>> from sklearn.linear_model import LinearRegression
>>> from sklearn.ensemble import RandomForestRegressor
>>> from sklearn.ensemble import VotingRegressor
>>> from sklearn.neighbors import KNeighborsRegressor
>>> r1 = LinearRegression()
>>> r2 = RandomForestRegressor(n_estimators=10, random_state=1)
>>> r3 = KNeighborsRegressor()
>>> X = np.array([[1, 1], [2, 4], [3, 9], [4, 16], [5, 25], [6, 36]])
>>> y = np.array([2, 6, 12, 20, 30, 42])
>>> er = VotingRegressor([('lr', r1), ('rf', r2), ('r3', r3)])
>>> print(er.fit(X, y).predict(X))
[ 6.8... 8.4... 12.5... 17.8... 26... 34...]
In the following example, we drop the 'lr'
estimator with
~VotingRegressor.set_params()
and fit the remaining two estimators:
>>> er = er.set_params(lr='drop')
>>> er = er.fit(X, y)
>>> len(er.estimators_)
2
7 def predict(self, X): 8 """ 9 Predict using the median of the base regressors' predictions. 10 11 Parameters: 12 X (array-like): Feature matrix for predictions. 13 14 Returns: 15 y_pred (array): Median of predictions from the base regressors. 16 """ 17 predictions = np.asarray( 18 [regressor.predict(X) for regressor in self.estimators_] 19 ) 20 return np.median(predictions, axis=0)
Predict using the median of the base regressors' predictions.
Parameters: X (array-like): Feature matrix for predictions.
Returns: y_pred (array): Median of predictions from the base regressors.
28class MTS(Base): 29 """Univariate and multivariate time series (MTS) forecasting with Quasi-Randomized networks 30 31 Parameters: 32 33 obj: object. 34 any object containing a method fit (obj.fit()) and a method predict 35 (obj.predict()). 36 37 n_hidden_features: int. 38 number of nodes in the hidden layer. 39 40 activation_name: str. 41 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'. 42 43 a: float. 44 hyperparameter for 'prelu' or 'elu' activation function. 45 46 nodes_sim: str. 47 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 48 'uniform'. 49 50 bias: boolean. 51 indicates if the hidden layer contains a bias term (True) or not 52 (False). 53 54 dropout: float. 55 regularization parameter; (random) percentage of nodes dropped out 56 of the training. 57 58 direct_link: boolean. 59 indicates if the original predictors are included (True) in model's fitting or not (False). 60 61 n_clusters: int. 62 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering). 63 64 cluster_encode: bool. 65 defines how the variable containing clusters is treated (default is one-hot) 66 if `False`, then labels are used, without one-hot encoding. 67 68 type_clust: str. 69 type of clustering method: currently k-means ('kmeans') or Gaussian 70 Mixture Model ('gmm'). 71 72 type_scaling: a tuple of 3 strings. 73 scaling methods for inputs, hidden layer, and clustering respectively 74 (and when relevant). 75 Currently available: standardization ('std') or MinMax scaling ('minmax'). 76 77 lags: int. 78 number of lags used for each time series. 79 If string, lags must be one of 'AIC', 'AICc', or 'BIC'. 80 81 type_pi: str. 82 type of prediction interval; currently: 83 - "gaussian": simple, fast, but: assumes stationarity of Gaussian in-sample residuals and independence in the multivariate case 84 - "kde": based on Kernel Density Estimation of in-sample residuals 85 - "bootstrap": based on independent bootstrap of in-sample residuals 86 - "block-bootstrap": based on basic block bootstrap of in-sample residuals 87 - "scp-kde": Sequential split conformal prediction with Kernel Density Estimation of calibrated residuals 88 - "scp-bootstrap": Sequential split conformal prediction with independent bootstrap of calibrated residuals 89 - "scp-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of calibrated residuals 90 - "scp2-kde": Sequential split conformal prediction with Kernel Density Estimation of standardized calibrated residuals 91 - "scp2-bootstrap": Sequential split conformal prediction with independent bootstrap of standardized calibrated residuals 92 - "scp2-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of standardized calibrated residuals 93 - based on copulas of in-sample residuals: 'vine-tll', 'vine-bb1', 'vine-bb6', 'vine-bb7', 'vine-bb8', 'vine-clayton', 94 'vine-frank', 'vine-gaussian', 'vine-gumbel', 'vine-indep', 'vine-joe', 'vine-student' 95 - 'scp-vine-tll', 'scp-vine-bb1', 'scp-vine-bb6', 'scp-vine-bb7', 'scp-vine-bb8', 'scp-vine-clayton', 96 'scp-vine-frank', 'scp-vine-gaussian', 'scp-vine-gumbel', 'scp-vine-indep', 'scp-vine-joe', 'scp-vine-student' 97 - 'scp2-vine-tll', 'scp2-vine-bb1', 'scp2-vine-bb6', 'scp2-vine-bb7', 'scp2-vine-bb8', 'scp2-vine-clayton', 98 'scp2-vine-frank', 'scp2-vine-gaussian', 'scp2-vine-gumbel', 'scp2-vine-indep', 'scp2-vine-joe', 'scp2-vine-student' 99 100 block_size: int. 101 size of block for 'type_pi' in ("block-bootstrap", "scp-block-bootstrap", "scp2-block-bootstrap"). 102 Default is round(3.15*(n_residuals^1/3)) 103 104 replications: int. 105 number of replications (if needed, for predictive simulation). Default is 'None'. 106 107 kernel: str. 108 the kernel to use for residuals density estimation (used for predictive simulation). Currently, either 'gaussian' or 'tophat'. 109 110 agg: str. 111 either "mean" or "median" for simulation of bootstrap aggregating 112 113 seed: int. 114 reproducibility seed for nodes_sim=='uniform' or predictive simulation. 115 116 backend: str. 117 "cpu" or "gpu" or "tpu". 118 119 verbose: int. 120 0: not printing; 1: printing 121 122 show_progress: bool. 123 True: progress bar when fitting each series; False: no progress bar when fitting each series 124 125 Attributes: 126 127 fit_objs_: dict 128 objects adjusted to each individual time series 129 130 y_: {array-like} 131 MTS responses (most recent observations first) 132 133 X_: {array-like} 134 MTS lags 135 136 xreg_: {array-like} 137 external regressors 138 139 y_means_: dict 140 a dictionary of each series mean values 141 142 preds_: {array-like} 143 successive model predictions 144 145 preds_std_: {array-like} 146 standard deviation around the predictions for Bayesian base learners (`obj`) 147 148 gaussian_preds_std_: {array-like} 149 standard deviation around the predictions for `type_pi='gaussian'` 150 151 return_std_: boolean 152 return uncertainty or not (set in predict) 153 154 df_: data frame 155 the input data frame, in case a data.frame is provided to `fit` 156 157 n_obs_: int 158 number of time series observations (number of rows for multivariate) 159 160 level_: int 161 level of confidence for prediction intervals (default is 95) 162 163 residuals_: {array-like} 164 in-sample residuals (for `type_pi` not conformal prediction) or calibrated residuals 165 (for `type_pi` in conformal prediction) 166 167 residuals_sims_: tuple of {array-like} 168 simulations of in-sample residuals (for `type_pi` not conformal prediction) or 169 calibrated residuals (for `type_pi` in conformal prediction) 170 171 kde_: A scikit-learn object, see https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KernelDensity.html 172 173 residuals_std_dev_: residuals standard deviation 174 175 Examples: 176 177 Example 1: 178 179 ```python 180 import nnetsauce as ns 181 import numpy as np 182 from sklearn import linear_model 183 np.random.seed(123) 184 185 M = np.random.rand(10, 3) 186 M[:,0] = 10*M[:,0] 187 M[:,2] = 25*M[:,2] 188 print(M) 189 190 # Adjust Bayesian Ridge 191 regr4 = linear_model.BayesianRidge() 192 obj_MTS = ns.MTS(regr4, lags = 1, n_hidden_features=5) 193 obj_MTS.fit(M) 194 print(obj_MTS.predict()) 195 196 # with credible intervals 197 print(obj_MTS.predict(return_std=True, level=80)) 198 199 print(obj_MTS.predict(return_std=True, level=95)) 200 ``` 201 202 Example 2: 203 204 ```python 205 import nnetsauce as ns 206 import numpy as np 207 from sklearn import linear_model 208 209 dataset = { 210 'date' : ['2001-01-01', '2002-01-01', '2003-01-01', '2004-01-01', '2005-01-01'], 211 'series1' : [34, 30, 35.6, 33.3, 38.1], 212 'series2' : [4, 5.5, 5.6, 6.3, 5.1], 213 'series3' : [100, 100.5, 100.6, 100.2, 100.1]} 214 df = pd.DataFrame(dataset).set_index('date') 215 print(df) 216 217 # Adjust Bayesian Ridge 218 regr5 = linear_model.BayesianRidge() 219 obj_MTS = ns.MTS(regr5, lags = 1, n_hidden_features=5) 220 obj_MTS.fit(df) 221 print(obj_MTS.predict()) 222 223 # with credible intervals 224 print(obj_MTS.predict(return_std=True, level=80)) 225 226 print(obj_MTS.predict(return_std=True, level=95)) 227 ``` 228 """ 229 230 # construct the object ----- 231 232 def __init__( 233 self, 234 obj, 235 n_hidden_features=5, 236 activation_name="relu", 237 a=0.01, 238 nodes_sim="sobol", 239 bias=True, 240 dropout=0, 241 direct_link=True, 242 n_clusters=2, 243 cluster_encode=True, 244 type_clust="kmeans", 245 type_scaling=("std", "std", "std"), 246 lags=1, 247 type_pi="kde", 248 block_size=None, 249 replications=None, 250 kernel="gaussian", 251 agg="mean", 252 seed=123, 253 backend="cpu", 254 verbose=0, 255 show_progress=True, 256 ): 257 258 super().__init__( 259 n_hidden_features=n_hidden_features, 260 activation_name=activation_name, 261 a=a, 262 nodes_sim=nodes_sim, 263 bias=bias, 264 dropout=dropout, 265 direct_link=direct_link, 266 n_clusters=n_clusters, 267 cluster_encode=cluster_encode, 268 type_clust=type_clust, 269 type_scaling=type_scaling, 270 seed=seed, 271 backend=backend, 272 ) 273 274 # Add validation for lags parameter 275 if isinstance(lags, str): 276 assert lags in ( 277 "AIC", 278 "AICc", 279 "BIC", 280 ), "if string, lags must be one of 'AIC', 'AICc', or 'BIC'" 281 else: 282 assert int(lags) == lags, "if numeric, lags parameter should be an integer" 283 284 self.obj = obj 285 self.n_series = None 286 self.lags = lags 287 self.type_pi = type_pi 288 self.block_size = block_size 289 self.replications = replications 290 self.kernel = kernel 291 self.agg = agg 292 self.verbose = verbose 293 self.show_progress = show_progress 294 self.series_names = None 295 self.input_dates = None 296 self.fit_objs_ = {} 297 self.y_ = None # MTS responses (most recent observations first) 298 self.X_ = None # MTS lags 299 self.xreg_ = None 300 self.y_means_ = {} 301 self.mean_ = None 302 self.median_ = None 303 self.upper_ = None 304 self.lower_ = None 305 self.output_dates_ = None 306 self.preds_std_ = [] 307 self.gaussian_preds_std_ = None 308 self.alpha_ = None 309 self.return_std_ = None 310 self.df_ = None 311 self.residuals_ = [] 312 self.abs_calib_residuals_ = None 313 self.calib_residuals_quantile_ = None 314 self.residuals_sims_ = None 315 self.kde_ = None 316 self.sims_ = None 317 self.residuals_std_dev_ = None 318 self.n_obs_ = None 319 self.level_ = None 320 self.init_n_series_ = None 321 322 def fit(self, X, xreg=None, **kwargs): 323 """Fit MTS model to training data X, with optional regressors xreg 324 325 Parameters: 326 327 X: {array-like}, shape = [n_samples, n_features] 328 Training time series, where n_samples is the number 329 of samples and n_features is the number of features; 330 X must be in increasing order (most recent observations last) 331 332 xreg: {array-like}, shape = [n_samples, n_features_xreg] 333 Additional (external) regressors to be passed to self.obj 334 xreg must be in 'increasing' order (most recent observations last) 335 336 **kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity) 337 338 Returns: 339 340 self: object 341 """ 342 343 self.init_n_series_ = X.shape[1] 344 345 # Automatic lag selection if requested 346 if isinstance(self.lags, str): 347 max_lags = min(25, X.shape[0] // 4) 348 best_ic = float("inf") 349 best_lags = 1 350 351 if self.verbose: 352 print(f"\nSelecting optimal number of lags using {self.lags}...") 353 iterator = tqdm(range(1, max_lags + 1)) 354 else: 355 iterator = range(1, max_lags + 1) 356 357 for lag in iterator: 358 # Convert DataFrame to numpy array before reversing 359 if isinstance(X, pd.DataFrame): 360 X_values = X.values[::-1] 361 else: 362 X_values = X[::-1] 363 364 # Try current lag value 365 if self.init_n_series_ > 1: 366 mts_input = ts.create_train_inputs(X_values, lag) 367 else: 368 mts_input = ts.create_train_inputs(X_values.reshape(-1, 1), lag) 369 370 # Cook training set and fit model 371 dummy_y, scaled_Z = self.cook_training_set( 372 y=np.ones(mts_input[0].shape[0]), X=mts_input[1] 373 ) 374 residuals_ = [] 375 376 for i in range(self.init_n_series_): 377 y_mean = np.mean(mts_input[0][:, i]) 378 centered_y_i = mts_input[0][:, i] - y_mean 379 self.obj.fit(X=scaled_Z, y=centered_y_i) 380 residuals_.append( 381 (centered_y_i - self.obj.predict(scaled_Z)).tolist() 382 ) 383 384 self.residuals_ = np.asarray(residuals_).T 385 ic = self._compute_information_criterion( 386 curr_lags=lag, criterion=self.lags 387 ) 388 389 if self.verbose: 390 print(f"Trying lags={lag}, {self.lags}={ic:.2f}") 391 392 if ic < best_ic: 393 best_ic = ic 394 best_lags = lag 395 396 if self.verbose: 397 print(f"\nSelected {best_lags} lags with {self.lags}={best_ic:.2f}") 398 399 self.lags = best_lags 400 401 self.input_dates = None 402 self.df_ = None 403 404 if isinstance(X, pd.DataFrame) is False: 405 # input data set is a numpy array 406 if xreg is None: 407 X = pd.DataFrame(X) 408 self.series_names = ["series" + str(i) for i in range(X.shape[1])] 409 else: 410 # xreg is not None 411 X = mo.cbind(X, xreg) 412 self.xreg_ = xreg 413 414 else: # input data set is a DataFrame with column names 415 416 X_index = None 417 if X.index is not None: 418 X_index = X.index 419 if xreg is None: 420 X = copy.deepcopy(mo.convert_df_to_numeric(X)) 421 else: 422 X = copy.deepcopy(mo.cbind(mo.convert_df_to_numeric(X), xreg)) 423 self.xreg_ = xreg 424 if X_index is not None: 425 X.index = X_index 426 self.series_names = X.columns.tolist() 427 428 if isinstance(X, pd.DataFrame): 429 if self.df_ is None: 430 self.df_ = X 431 X = X.values 432 else: 433 input_dates_prev = pd.DatetimeIndex(self.df_.index.values) 434 frequency = pd.infer_freq(input_dates_prev) 435 self.df_ = pd.concat([self.df_, X], axis=0) 436 self.input_dates = pd.date_range( 437 start=input_dates_prev[0], 438 periods=len(input_dates_prev) + X.shape[0], 439 freq=frequency, 440 ).values.tolist() 441 self.df_.index = self.input_dates 442 X = self.df_.values 443 self.df_.columns = self.series_names 444 else: 445 if self.df_ is None: 446 self.df_ = pd.DataFrame(X, columns=self.series_names) 447 else: 448 self.df_ = pd.concat( 449 [self.df_, pd.DataFrame(X, columns=self.series_names)], 450 axis=0, 451 ) 452 453 self.input_dates = ts.compute_input_dates(self.df_) 454 455 try: 456 # multivariate time series 457 n, p = X.shape 458 except: 459 # univariate time series 460 n = X.shape[0] 461 p = 1 462 self.n_obs_ = n 463 464 rep_1_n = np.repeat(1, n) 465 466 self.y_ = None 467 self.X_ = None 468 self.n_series = p 469 self.fit_objs_.clear() 470 self.y_means_.clear() 471 residuals_ = [] 472 self.residuals_ = None 473 self.residuals_sims_ = None 474 self.kde_ = None 475 self.sims_ = None 476 self.scaled_Z_ = None 477 self.centered_y_is_ = [] 478 479 if self.init_n_series_ > 1: 480 # multivariate time series 481 mts_input = ts.create_train_inputs(X[::-1], self.lags) 482 else: 483 # univariate time series 484 mts_input = ts.create_train_inputs(X.reshape(-1, 1)[::-1], self.lags) 485 486 self.y_ = mts_input[0] 487 488 self.X_ = mts_input[1] 489 490 dummy_y, scaled_Z = self.cook_training_set(y=rep_1_n, X=self.X_) 491 492 self.scaled_Z_ = scaled_Z 493 494 # loop on all the time series and adjust self.obj.fit 495 if self.verbose > 0: 496 print( 497 f"\n Adjusting {type(self.obj).__name__} to multivariate time series... \n" 498 ) 499 500 if self.show_progress is True: 501 iterator = tqdm(range(self.init_n_series_)) 502 else: 503 iterator = range(self.init_n_series_) 504 505 if self.type_pi in ( 506 "gaussian", 507 "kde", 508 "bootstrap", 509 "block-bootstrap", 510 ) or self.type_pi.startswith("vine"): 511 for i in iterator: 512 y_mean = np.mean(self.y_[:, i]) 513 self.y_means_[i] = y_mean 514 centered_y_i = self.y_[:, i] - y_mean 515 self.centered_y_is_.append(centered_y_i) 516 self.obj.fit(X=scaled_Z, y=centered_y_i) 517 self.fit_objs_[i] = deepcopy(self.obj) 518 residuals_.append( 519 (centered_y_i - self.fit_objs_[i].predict(scaled_Z)).tolist() 520 ) 521 522 if self.type_pi.startswith("scp"): 523 # split conformal prediction 524 for i in iterator: 525 n_y = self.y_.shape[0] 526 n_y_half = n_y // 2 527 first_half_idx = range(0, n_y_half) 528 second_half_idx = range(n_y_half, n_y) 529 y_mean_temp = np.mean(self.y_[first_half_idx, i]) 530 centered_y_i_temp = self.y_[first_half_idx, i] - y_mean_temp 531 self.obj.fit(X=scaled_Z[first_half_idx, :], y=centered_y_i_temp) 532 # calibrated residuals actually 533 residuals_.append( 534 ( 535 self.y_[second_half_idx, i] 536 - (y_mean_temp + self.obj.predict(scaled_Z[second_half_idx, :])) 537 ).tolist() 538 ) 539 # fit on the second half 540 y_mean = np.mean(self.y_[second_half_idx, i]) 541 self.y_means_[i] = y_mean 542 centered_y_i = self.y_[second_half_idx, i] - y_mean 543 self.obj.fit(X=scaled_Z[second_half_idx, :], y=centered_y_i) 544 self.fit_objs_[i] = deepcopy(self.obj) 545 546 self.residuals_ = np.asarray(residuals_).T 547 548 if self.type_pi == "gaussian": 549 self.gaussian_preds_std_ = np.std(self.residuals_, axis=0) 550 551 if self.type_pi.startswith("scp2"): 552 # Calculate mean and standard deviation for each column 553 data_mean = np.mean(self.residuals_, axis=0) 554 self.residuals_std_dev_ = np.std(self.residuals_, axis=0) 555 # Center and scale the array using broadcasting 556 self.residuals_ = ( 557 self.residuals_ - data_mean[np.newaxis, :] 558 ) / self.residuals_std_dev_[np.newaxis, :] 559 560 if self.replications != None and "kde" in self.type_pi: 561 if self.verbose > 0: 562 print(f"\n Simulate residuals using {self.kernel} kernel... \n") 563 assert self.kernel in ( 564 "gaussian", 565 "tophat", 566 ), "currently, 'kernel' must be either 'gaussian' or 'tophat'" 567 kernel_bandwidths = {"bandwidth": np.logspace(-6, 6, 150)} 568 grid = GridSearchCV( 569 KernelDensity(kernel=self.kernel, **kwargs), 570 param_grid=kernel_bandwidths, 571 ) 572 grid.fit(self.residuals_) 573 574 if self.verbose > 0: 575 print( 576 f"\n Best parameters for {self.kernel} kernel: {grid.best_params_} \n" 577 ) 578 579 self.kde_ = grid.best_estimator_ 580 581 return self 582 583 def partial_fit(self, X, xreg=None, **kwargs): 584 """Update the model with new observations X, with optional regressors xreg 585 586 Parameters: 587 588 X: {array-like}, shape = [n_samples, n_features] 589 Training time series, where n_samples is the number 590 of samples and n_features is the number of features; 591 X must be in increasing order (most recent observations last) 592 593 xreg: {array-like}, shape = [n_samples, n_features_xreg] 594 Additional (external) regressors to be passed to self.obj 595 xreg must be in 'increasing' order (most recent observations last) 596 597 **kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity) 598 599 Returns: 600 601 self: object 602 """ 603 604 assert self.df_ is not None, "fit() must be called before partial_fit()" 605 606 if (isinstance(X, pd.DataFrame) is False) and isinstance(X, pd.Series) is False: 607 if len(X.shape) == 1: 608 X = X.reshape(1, -1) 609 610 return self.fit(X, xreg, **kwargs) 611 612 else: 613 if len(X.shape) == 1: 614 X = pd.DataFrame(X.values.reshape(1, -1), columns=self.df_.columns) 615 616 return self.fit(X, xreg, **kwargs) 617 618 def predict(self, h=5, level=95, **kwargs): 619 """Forecast all the time series, h steps ahead""" 620 621 self.output_dates_, frequency = ts.compute_output_dates(self.df_, h) 622 623 self.level_ = level 624 625 self.return_std_ = False # do not remove (/!\) 626 627 self.mean_ = None # do not remove (/!\) 628 629 self.mean_ = deepcopy(self.y_) # do not remove (/!\) 630 631 self.lower_ = None # do not remove (/!\) 632 633 self.upper_ = None # do not remove (/!\) 634 635 self.sims_ = None # do not remove (/!\) 636 637 y_means_ = np.asarray([self.y_means_[i] for i in range(self.init_n_series_)]) 638 639 n_features = self.init_n_series_ * self.lags 640 641 self.alpha_ = 100 - level 642 643 pi_multiplier = norm.ppf(1 - self.alpha_ / 200) 644 645 if "return_std" in kwargs: # bayesian forecasting 646 self.return_std_ = True 647 self.preds_std_ = [] 648 DescribeResult = namedtuple( 649 "DescribeResult", ("mean", "lower", "upper") 650 ) # to be updated 651 652 if "return_pi" in kwargs: # split conformal, without simulation 653 mean_pi_ = [] 654 lower_pi_ = [] 655 upper_pi_ = [] 656 median_pi_ = [] 657 DescribeResult = namedtuple( 658 "DescribeResult", ("mean", "lower", "upper") 659 ) # to be updated 660 661 if self.kde_ != None and "kde" in self.type_pi: # kde 662 target_cols = self.df_.columns[ 663 : self.init_n_series_ 664 ] # Get target column names 665 if self.verbose == 1: 666 self.residuals_sims_ = tuple( 667 self.kde_.sample( 668 n_samples=h, random_state=self.seed + 100 * i 669 ) # Keep full sample 670 for i in tqdm(range(self.replications)) 671 ) 672 elif self.verbose == 0: 673 self.residuals_sims_ = tuple( 674 self.kde_.sample( 675 n_samples=h, random_state=self.seed + 100 * i 676 ) # Keep full sample 677 for i in range(self.replications) 678 ) 679 680 # Convert to DataFrames after sampling 681 self.residuals_sims_ = tuple( 682 pd.DataFrame( 683 sim, # Keep all columns 684 columns=target_cols, # Use original target column names 685 index=self.output_dates_, 686 ) 687 for sim in self.residuals_sims_ 688 ) 689 690 if self.type_pi in ("bootstrap", "scp-bootstrap", "scp2-bootstrap"): 691 assert self.replications is not None and isinstance( 692 self.replications, int 693 ), "'replications' must be provided and be an integer" 694 if self.verbose == 1: 695 self.residuals_sims_ = tuple( 696 ts.bootstrap( 697 self.residuals_, 698 h=h, 699 block_size=None, 700 seed=self.seed + 100 * i, 701 ) 702 for i in tqdm(range(self.replications)) 703 ) 704 elif self.verbose == 0: 705 self.residuals_sims_ = tuple( 706 ts.bootstrap( 707 self.residuals_, 708 h=h, 709 block_size=None, 710 seed=self.seed + 100 * i, 711 ) 712 for i in range(self.replications) 713 ) 714 715 if self.type_pi in ( 716 "block-bootstrap", 717 "scp-block-bootstrap", 718 "scp2-block-bootstrap", 719 ): 720 if self.block_size is None: 721 self.block_size = int( 722 np.ceil(3.15 * (self.residuals_.shape[0] ** (1 / 3))) 723 ) 724 725 assert self.replications is not None and isinstance( 726 self.replications, int 727 ), "'replications' must be provided and be an integer" 728 if self.verbose == 1: 729 self.residuals_sims_ = tuple( 730 ts.bootstrap( 731 self.residuals_, 732 h=h, 733 block_size=self.block_size, 734 seed=self.seed + 100 * i, 735 ) 736 for i in tqdm(range(self.replications)) 737 ) 738 elif self.verbose == 0: 739 self.residuals_sims_ = tuple( 740 ts.bootstrap( 741 self.residuals_, 742 h=h, 743 block_size=self.block_size, 744 seed=self.seed + 100 * i, 745 ) 746 for i in range(self.replications) 747 ) 748 749 if "vine" in self.type_pi: 750 if self.verbose == 1: 751 self.residuals_sims_ = tuple( 752 vinecopula_sample( 753 x=self.residuals_, 754 n_samples=h, 755 method=self.type_pi, 756 random_state=self.seed + 100 * i, 757 ) 758 for i in tqdm(range(self.replications)) 759 ) 760 elif self.verbose == 0: 761 self.residuals_sims_ = tuple( 762 vinecopula_sample( 763 x=self.residuals_, 764 n_samples=h, 765 method=self.type_pi, 766 random_state=self.seed + 100 * i, 767 ) 768 for i in range(self.replications) 769 ) 770 771 mean_ = deepcopy(self.mean_) 772 773 for i in range(h): 774 775 new_obs = ts.reformat_response(mean_, self.lags) 776 new_X = new_obs.reshape(1, -1) 777 cooked_new_X = self.cook_test_set(new_X, **kwargs) 778 779 if "return_std" in kwargs: 780 self.preds_std_.append( 781 [ 782 np.asarray( 783 self.fit_objs_[i].predict(cooked_new_X, return_std=True)[1] 784 ).item() 785 for i in range(self.n_series) 786 ] 787 ) 788 789 if "return_pi" in kwargs: 790 for i in range(self.n_series): 791 preds_pi = self.fit_objs_[i].predict(cooked_new_X, **kwargs) 792 mean_pi_.append(preds_pi.mean[0]) 793 lower_pi_.append(preds_pi.lower[0]) 794 upper_pi_.append(preds_pi.upper[0]) 795 796 predicted_cooked_new_X = np.asarray( 797 [ 798 np.asarray(self.fit_objs_[i].predict(cooked_new_X)).item() 799 for i in range(self.init_n_series_) 800 ] 801 ) 802 803 preds = np.asarray(y_means_ + predicted_cooked_new_X) 804 805 # Create full row with both predictions and external regressors 806 if self.xreg_ is not None and "xreg" in kwargs: 807 next_xreg = kwargs["xreg"].iloc[i : i + 1].values.flatten() 808 full_row = np.concatenate([preds, next_xreg]) 809 else: 810 full_row = preds 811 812 # Create a new row with same number of columns as mean_ 813 new_row = np.zeros((1, mean_.shape[1])) 814 new_row[0, : full_row.shape[0]] = full_row 815 816 # Maintain the full dimensionality by using vstack instead of rbind 817 mean_ = np.vstack([new_row, mean_[:-1]]) 818 819 # Final output should only include the target columns 820 self.mean_ = pd.DataFrame( 821 mean_[0:h, : self.init_n_series_][::-1], 822 columns=self.df_.columns[: self.init_n_series_], 823 index=self.output_dates_, 824 ) 825 826 # function's return ---------------------------------------------------------------------- 827 if ( 828 (("return_std" not in kwargs) and ("return_pi" not in kwargs)) 829 and (self.type_pi not in ("gaussian", "scp")) 830 ) or ("vine" in self.type_pi): 831 832 if self.replications is None: 833 return self.mean_.iloc[:, : self.init_n_series_] 834 835 # if "return_std" not in kwargs and self.replications is not None 836 meanf = [] 837 medianf = [] 838 lower = [] 839 upper = [] 840 841 if "scp2" in self.type_pi: 842 843 if self.verbose == 1: 844 self.sims_ = tuple( 845 ( 846 self.mean_ 847 + self.residuals_sims_[i] 848 * self.residuals_std_dev_[np.newaxis, :] 849 for i in tqdm(range(self.replications)) 850 ) 851 ) 852 elif self.verbose == 0: 853 self.sims_ = tuple( 854 ( 855 self.mean_ 856 + self.residuals_sims_[i] 857 * self.residuals_std_dev_[np.newaxis, :] 858 for i in range(self.replications) 859 ) 860 ) 861 else: 862 863 if self.verbose == 1: 864 self.sims_ = tuple( 865 ( 866 self.mean_ + self.residuals_sims_[i] 867 for i in tqdm(range(self.replications)) 868 ) 869 ) 870 elif self.verbose == 0: 871 self.sims_ = tuple( 872 ( 873 self.mean_ + self.residuals_sims_[i] 874 for i in range(self.replications) 875 ) 876 ) 877 878 DescribeResult = namedtuple( 879 "DescribeResult", ("mean", "sims", "lower", "upper") 880 ) 881 for ix in range(self.init_n_series_): 882 sims_ix = getsims(self.sims_, ix) 883 if self.agg == "mean": 884 meanf.append(np.mean(sims_ix, axis=1)) 885 else: 886 medianf.append(np.median(sims_ix, axis=1)) 887 lower.append(np.quantile(sims_ix, q=self.alpha_ / 200, axis=1)) 888 upper.append(np.quantile(sims_ix, q=1 - self.alpha_ / 200, axis=1)) 889 self.mean_ = pd.DataFrame( 890 np.asarray(meanf).T, 891 columns=self.series_names[: self.init_n_series_], # self.df_.columns, 892 index=self.output_dates_, 893 ) 894 895 self.lower_ = pd.DataFrame( 896 np.asarray(lower).T, 897 columns=self.series_names[: self.init_n_series_], # self.df_.columns, 898 index=self.output_dates_, 899 ) 900 901 self.upper_ = pd.DataFrame( 902 np.asarray(upper).T, 903 columns=self.series_names[: self.init_n_series_], # self.df_.columns, 904 index=self.output_dates_, 905 ) 906 907 try: 908 self.median_ = pd.DataFrame( 909 np.asarray(medianf).T, 910 columns=self.series_names[ 911 : self.init_n_series_ 912 ], # self.df_.columns, 913 index=self.output_dates_, 914 ) 915 except Exception as e: 916 pass 917 918 return DescribeResult(self.mean_, self.sims_, self.lower_, self.upper_) 919 920 if ( 921 (("return_std" in kwargs) or ("return_pi" in kwargs)) 922 and (self.type_pi not in ("gaussian", "scp")) 923 ) or "vine" in self.type_pi: 924 DescribeResult = namedtuple("DescribeResult", ("mean", "lower", "upper")) 925 926 self.mean_ = pd.DataFrame( 927 np.asarray(self.mean_), 928 columns=self.series_names, # self.df_.columns, 929 index=self.output_dates_, 930 ) 931 932 if "return_std" in kwargs: 933 934 self.preds_std_ = np.asarray(self.preds_std_) 935 print("self.preds_std_", self.preds_std_) 936 print("self.mean_", self.mean_) 937 print("pi_multiplier", pi_multiplier) 938 939 self.lower_ = pd.DataFrame( 940 self.mean_.values - pi_multiplier * self.preds_std_, 941 columns=self.series_names, # self.df_.columns, 942 index=self.output_dates_, 943 ) 944 945 self.upper_ = pd.DataFrame( 946 self.mean_.values + pi_multiplier * self.preds_std_, 947 columns=self.series_names, # self.df_.columns, 948 index=self.output_dates_, 949 ) 950 951 if "return_pi" in kwargs: 952 953 self.lower_ = pd.DataFrame( 954 np.asarray(lower_pi_).reshape(h, self.n_series) 955 + y_means_[np.newaxis, :], 956 columns=self.series_names, # self.df_.columns, 957 index=self.output_dates_, 958 ) 959 960 self.upper_ = pd.DataFrame( 961 np.asarray(upper_pi_).reshape(h, self.n_series) 962 + y_means_[np.newaxis, :], 963 columns=self.series_names, # self.df_.columns, 964 index=self.output_dates_, 965 ) 966 967 res = DescribeResult(self.mean_, self.lower_, self.upper_) 968 969 if self.xreg_ is not None: 970 if len(self.xreg_.shape) > 1: 971 res2 = mx.tuple_map( 972 res, 973 lambda x: mo.delete_last_columns( 974 x, num_columns=self.xreg_.shape[1] 975 ), 976 ) 977 else: 978 res2 = mx.tuple_map( 979 res, lambda x: mo.delete_last_columns(x, num_columns=1) 980 ) 981 return DescribeResult(res2[0], res2[1], res2[2]) 982 983 return res 984 985 if self.type_pi == "gaussian": 986 987 DescribeResult = namedtuple("DescribeResult", ("mean", "lower", "upper")) 988 989 self.mean_ = pd.DataFrame( 990 np.asarray(self.mean_), 991 columns=self.series_names, # self.df_.columns, 992 index=self.output_dates_, 993 ) 994 995 self.lower_ = pd.DataFrame( 996 self.mean_.values - pi_multiplier * self.gaussian_preds_std_, 997 columns=self.series_names, # self.df_.columns, 998 index=self.output_dates_, 999 ) 1000 1001 self.upper_ = pd.DataFrame( 1002 self.mean_.values + pi_multiplier * self.gaussian_preds_std_, 1003 columns=self.series_names, # self.df_.columns, 1004 index=self.output_dates_, 1005 ) 1006 1007 res = DescribeResult(self.mean_, self.lower_, self.upper_) 1008 1009 if self.xreg_ is not None: 1010 if len(self.xreg_.shape) > 1: 1011 res2 = mx.tuple_map( 1012 res, 1013 lambda x: mo.delete_last_columns( 1014 x, num_columns=self.xreg_.shape[1] 1015 ), 1016 ) 1017 else: 1018 res2 = mx.tuple_map( 1019 res, lambda x: mo.delete_last_columns(x, num_columns=1) 1020 ) 1021 return DescribeResult(res2[0], res2[1], res2[2]) 1022 1023 return res 1024 1025 # After prediction loop, ensure sims only contain target columns 1026 if self.sims_ is not None: 1027 if self.verbose == 1: 1028 self.sims_ = tuple( 1029 sim[:h,] # Only keep target columns and h rows 1030 for sim in tqdm(self.sims_) 1031 ) 1032 elif self.verbose == 0: 1033 self.sims_ = tuple( 1034 sim[:h,] # Only keep target columns and h rows 1035 for sim in self.sims_ 1036 ) 1037 1038 # Convert numpy arrays to DataFrames with proper columns 1039 self.sims_ = tuple( 1040 pd.DataFrame( 1041 sim, 1042 columns=self.df_.columns[: self.init_n_series_], 1043 index=self.output_dates_, 1044 ) 1045 for sim in self.sims_ 1046 ) 1047 1048 if self.type_pi in ("kde", "bootstrap", "block-bootstrap", "vine-copula"): 1049 if self.xreg_ is not None: 1050 # Use getsimsxreg when external regressors are present 1051 target_cols = self.df_.columns[: self.init_n_series_] 1052 self.sims_ = getsimsxreg(self.sims_, self.output_dates_, target_cols) 1053 else: 1054 # Use original getsims for backward compatibility 1055 self.sims_ = getsims(self.sims_) 1056 1057 def score(self, X, training_index, testing_index, scoring=None, **kwargs): 1058 """Train on training_index, score on testing_index.""" 1059 1060 assert ( 1061 bool(set(training_index).intersection(set(testing_index))) == False 1062 ), "Non-overlapping 'training_index' and 'testing_index' required" 1063 1064 # Dimensions 1065 try: 1066 # multivariate time series 1067 n, p = X.shape 1068 except: 1069 # univariate time series 1070 n = X.shape[0] 1071 p = 1 1072 1073 # Training and testing sets 1074 if p > 1: 1075 X_train = X[training_index, :] 1076 X_test = X[testing_index, :] 1077 else: 1078 X_train = X[training_index] 1079 X_test = X[testing_index] 1080 1081 # Horizon 1082 h = len(testing_index) 1083 assert ( 1084 len(training_index) + h 1085 ) <= n, "Please check lengths of training and testing windows" 1086 1087 # Fit and predict 1088 self.fit(X_train, **kwargs) 1089 preds = self.predict(h=h, **kwargs) 1090 1091 if scoring is None: 1092 scoring = "neg_root_mean_squared_error" 1093 1094 # check inputs 1095 assert scoring in ( 1096 "explained_variance", 1097 "neg_mean_absolute_error", 1098 "neg_mean_squared_error", 1099 "neg_root_mean_squared_error", 1100 "neg_mean_squared_log_error", 1101 "neg_median_absolute_error", 1102 "r2", 1103 ), "'scoring' should be in ('explained_variance', 'neg_mean_absolute_error', \ 1104 'neg_mean_squared_error', 'neg_root_mean_squared_error', 'neg_mean_squared_log_error', \ 1105 'neg_median_absolute_error', 'r2')" 1106 1107 scoring_options = { 1108 "explained_variance": skm2.explained_variance_score, 1109 "neg_mean_absolute_error": skm2.mean_absolute_error, 1110 "neg_mean_squared_error": lambda x, y: np.mean((x - y) ** 2), 1111 "neg_root_mean_squared_error": lambda x, y: np.sqrt(np.mean((x - y) ** 2)), 1112 "neg_mean_squared_log_error": skm2.mean_squared_log_error, 1113 "neg_median_absolute_error": skm2.median_absolute_error, 1114 "r2": skm2.r2_score, 1115 } 1116 1117 return scoring_options[scoring](X_test, preds) 1118 1119 def plot(self, series=None, type_axis="dates", type_plot="pi"): 1120 """Plot time series forecast 1121 1122 Parameters: 1123 1124 series: {integer} or {string} 1125 series index or name 1126 1127 """ 1128 1129 assert all( 1130 [ 1131 self.mean_ is not None, 1132 self.lower_ is not None, 1133 self.upper_ is not None, 1134 self.output_dates_ is not None, 1135 ] 1136 ), "model forecasting must be obtained first (with predict)" 1137 1138 if series is None: 1139 # assert ( 1140 # self.init_n_series_ == 1 1141 # ), "please specify series index or name (n_series > 1)" 1142 series = 0 1143 1144 if isinstance(series, str): 1145 assert ( 1146 series in self.series_names 1147 ), f"series {series} doesn't exist in the input dataset" 1148 series_idx = self.df_.columns.get_loc(series) 1149 else: 1150 assert isinstance(series, int) and ( 1151 0 <= series < self.n_series 1152 ), f"check series index (< {self.n_series})" 1153 series_idx = series 1154 1155 y_all = list(self.df_.iloc[:, series_idx]) + list( 1156 self.mean_.iloc[:, series_idx] 1157 ) 1158 y_test = list(self.mean_.iloc[:, series_idx]) 1159 n_points_all = len(y_all) 1160 n_points_train = self.df_.shape[0] 1161 1162 if type_axis == "numeric": 1163 x_all = [i for i in range(n_points_all)] 1164 x_test = [i for i in range(n_points_train, n_points_all)] 1165 1166 if type_axis == "dates": # use dates 1167 x_all = np.concatenate( 1168 (self.input_dates.values, self.output_dates_.values), axis=None 1169 ) 1170 x_test = self.output_dates_.values 1171 1172 if type_plot == "pi": 1173 fig, ax = plt.subplots() 1174 ax.plot(x_all, y_all, "-") 1175 ax.plot(x_test, y_test, "-", color="orange") 1176 ax.fill_between( 1177 x_test, 1178 self.lower_.iloc[:, series_idx], 1179 self.upper_.iloc[:, series_idx], 1180 alpha=0.2, 1181 color="orange", 1182 ) 1183 if self.replications is None: 1184 if self.n_series > 1: 1185 plt.title( 1186 f"prediction intervals for {series}", 1187 loc="left", 1188 fontsize=12, 1189 fontweight=0, 1190 color="black", 1191 ) 1192 else: 1193 plt.title( 1194 f"prediction intervals for input time series", 1195 loc="left", 1196 fontsize=12, 1197 fontweight=0, 1198 color="black", 1199 ) 1200 plt.show() 1201 else: # self.replications is not None 1202 if self.n_series > 1: 1203 plt.title( 1204 f"prediction intervals for {self.replications} simulations of {series}", 1205 loc="left", 1206 fontsize=12, 1207 fontweight=0, 1208 color="black", 1209 ) 1210 else: 1211 plt.title( 1212 f"prediction intervals for {self.replications} simulations of input time series", 1213 loc="left", 1214 fontsize=12, 1215 fontweight=0, 1216 color="black", 1217 ) 1218 plt.show() 1219 1220 if type_plot == "spaghetti": 1221 palette = plt.get_cmap("Set1") 1222 sims_ix = getsims(self.sims_, series_idx) 1223 plt.plot(x_all, y_all, "-") 1224 for col_ix in range( 1225 sims_ix.shape[1] 1226 ): # avoid this when there are thousands of simulations 1227 plt.plot( 1228 x_test, 1229 sims_ix[:, col_ix], 1230 "-", 1231 color=palette(col_ix), 1232 linewidth=1, 1233 alpha=0.9, 1234 ) 1235 plt.plot(x_all, y_all, "-", color="black") 1236 plt.plot(x_test, y_test, "-", color="blue") 1237 # Add titles 1238 if self.n_series > 1: 1239 plt.title( 1240 f"{self.replications} simulations of {series}", 1241 loc="left", 1242 fontsize=12, 1243 fontweight=0, 1244 color="black", 1245 ) 1246 else: 1247 plt.title( 1248 f"{self.replications} simulations of input time series", 1249 loc="left", 1250 fontsize=12, 1251 fontweight=0, 1252 color="black", 1253 ) 1254 plt.xlabel("Time") 1255 plt.ylabel("Values") 1256 # Show the graph 1257 plt.show() 1258 1259 def cross_val_score( 1260 self, 1261 X, 1262 scoring="root_mean_squared_error", 1263 n_jobs=None, 1264 verbose=0, 1265 xreg=None, 1266 initial_window=5, 1267 horizon=3, 1268 fixed_window=False, 1269 show_progress=True, 1270 level=95, 1271 **kwargs, 1272 ): 1273 """Evaluate a score by time series cross-validation. 1274 1275 Parameters: 1276 1277 X: {array-like, sparse matrix} of shape (n_samples, n_features) 1278 The data to fit. 1279 1280 scoring: str or a function 1281 A str in ('root_mean_squared_error', 'mean_squared_error', 'mean_error', 1282 'mean_absolute_error', 'mean_error', 'mean_percentage_error', 1283 'mean_absolute_percentage_error', 'winkler_score', 'coverage') 1284 Or a function defined as 'coverage' and 'winkler_score' in `utils.timeseries` 1285 1286 n_jobs: int, default=None 1287 Number of jobs to run in parallel. 1288 1289 verbose: int, default=0 1290 The verbosity level. 1291 1292 xreg: array-like, optional (default=None) 1293 Additional (external) regressors to be passed to `fit` 1294 xreg must be in 'increasing' order (most recent observations last) 1295 1296 initial_window: int 1297 initial number of consecutive values in each training set sample 1298 1299 horizon: int 1300 number of consecutive values in test set sample 1301 1302 fixed_window: boolean 1303 if False, all training samples start at index 0, and the training 1304 window's size is increasing. 1305 if True, the training window's size is fixed, and the window is 1306 rolling forward 1307 1308 show_progress: boolean 1309 if True, a progress bar is printed 1310 1311 **kwargs: dict 1312 additional parameters to be passed to `fit` and `predict` 1313 1314 Returns: 1315 1316 A tuple: descriptive statistics or errors and raw errors 1317 1318 """ 1319 tscv = TimeSeriesSplit() 1320 1321 tscv_obj = tscv.split( 1322 X, 1323 initial_window=initial_window, 1324 horizon=horizon, 1325 fixed_window=fixed_window, 1326 ) 1327 1328 if isinstance(scoring, str): 1329 1330 assert scoring in ( 1331 "root_mean_squared_error", 1332 "mean_squared_error", 1333 "mean_error", 1334 "mean_absolute_error", 1335 "mean_percentage_error", 1336 "mean_absolute_percentage_error", 1337 "winkler_score", 1338 "coverage", 1339 ), "must have scoring in ('root_mean_squared_error', 'mean_squared_error', 'mean_error', 'mean_absolute_error', 'mean_error', 'mean_percentage_error', 'mean_absolute_percentage_error', 'winkler_score', 'coverage')" 1340 1341 def err_func(X_test, X_pred, scoring): 1342 if (self.replications is not None) or ( 1343 self.type_pi == "gaussian" 1344 ): # probabilistic 1345 if scoring == "winkler_score": 1346 return winkler_score(X_pred, X_test, level=level) 1347 elif scoring == "coverage": 1348 return coverage(X_pred, X_test, level=level) 1349 else: 1350 return mean_errors( 1351 pred=X_pred.mean, actual=X_test, scoring=scoring 1352 ) 1353 else: # not probabilistic 1354 return mean_errors(pred=X_pred, actual=X_test, scoring=scoring) 1355 1356 else: # isinstance(scoring, str) = False 1357 1358 err_func = scoring 1359 1360 errors = [] 1361 1362 train_indices = [] 1363 1364 test_indices = [] 1365 1366 for train_index, test_index in tscv_obj: 1367 train_indices.append(train_index) 1368 test_indices.append(test_index) 1369 1370 if show_progress is True: 1371 iterator = tqdm(zip(train_indices, test_indices), total=len(train_indices)) 1372 else: 1373 iterator = zip(train_indices, test_indices) 1374 1375 for train_index, test_index in iterator: 1376 1377 if verbose == 1: 1378 print(f"TRAIN: {train_index}") 1379 print(f"TEST: {test_index}") 1380 1381 if isinstance(X, pd.DataFrame): 1382 self.fit(X.iloc[train_index, :], xreg=xreg, **kwargs) 1383 X_test = X.iloc[test_index, :] 1384 else: 1385 self.fit(X[train_index, :], xreg=xreg, **kwargs) 1386 X_test = X[test_index, :] 1387 X_pred = self.predict(h=int(len(test_index)), level=level, **kwargs) 1388 1389 errors.append(err_func(X_test, X_pred, scoring)) 1390 1391 res = np.asarray(errors) 1392 1393 return res, describe(res) 1394 1395 def _compute_information_criterion(self, curr_lags, criterion="AIC"): 1396 """Compute information criterion using existing residuals 1397 1398 Parameters 1399 ---------- 1400 curr_lags : int 1401 Current number of lags being evaluated 1402 criterion : str 1403 One of 'AIC', 'AICc', or 'BIC' 1404 1405 Returns 1406 ------- 1407 float 1408 Information criterion value or inf if parameters exceed observations 1409 """ 1410 # Get dimensions 1411 n_obs = self.residuals_.shape[0] 1412 n_features = int(self.init_n_series_ * curr_lags) 1413 n_hidden = int(self.n_hidden_features) 1414 1415 # Calculate number of parameters 1416 term1 = int(n_features * n_hidden) 1417 term2 = int(n_hidden * self.init_n_series_) 1418 n_params = term1 + term2 1419 1420 # Check if we have enough observations for the number of parameters 1421 if n_obs <= n_params + 1: 1422 return float("inf") # Return infinity if too many parameters 1423 1424 # Compute RSS using existing residuals 1425 rss = np.sum(self.residuals_**2) 1426 1427 # Compute criterion 1428 if criterion == "AIC": 1429 ic = n_obs * np.log(rss / n_obs) + 2 * n_params 1430 elif criterion == "AICc": 1431 ic = n_obs * np.log(rss / n_obs) + 2 * n_params * ( 1432 n_obs / (n_obs - n_params - 1) 1433 ) 1434 else: # BIC 1435 ic = n_obs * np.log(rss / n_obs) + n_params * np.log(n_obs) 1436 1437 return ic
Univariate and multivariate time series (MTS) forecasting with Quasi-Randomized networks
Parameters:
obj: object.
any object containing a method fit (obj.fit()) and a method predict
(obj.predict()).
n_hidden_features: int.
number of nodes in the hidden layer.
activation_name: str.
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'.
a: float.
hyperparameter for 'prelu' or 'elu' activation function.
nodes_sim: str.
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'.
bias: boolean.
indicates if the hidden layer contains a bias term (True) or not
(False).
dropout: float.
regularization parameter; (random) percentage of nodes dropped out
of the training.
direct_link: boolean.
indicates if the original predictors are included (True) in model's fitting or not (False).
n_clusters: int.
number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering).
cluster_encode: bool.
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding.
type_clust: str.
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm').
type_scaling: a tuple of 3 strings.
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax').
lags: int.
number of lags used for each time series.
If string, lags must be one of 'AIC', 'AICc', or 'BIC'.
type_pi: str.
type of prediction interval; currently:
- "gaussian": simple, fast, but: assumes stationarity of Gaussian in-sample residuals and independence in the multivariate case
- "kde": based on Kernel Density Estimation of in-sample residuals
- "bootstrap": based on independent bootstrap of in-sample residuals
- "block-bootstrap": based on basic block bootstrap of in-sample residuals
- "scp-kde": Sequential split conformal prediction with Kernel Density Estimation of calibrated residuals
- "scp-bootstrap": Sequential split conformal prediction with independent bootstrap of calibrated residuals
- "scp-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of calibrated residuals
- "scp2-kde": Sequential split conformal prediction with Kernel Density Estimation of standardized calibrated residuals
- "scp2-bootstrap": Sequential split conformal prediction with independent bootstrap of standardized calibrated residuals
- "scp2-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of standardized calibrated residuals
- based on copulas of in-sample residuals: 'vine-tll', 'vine-bb1', 'vine-bb6', 'vine-bb7', 'vine-bb8', 'vine-clayton',
'vine-frank', 'vine-gaussian', 'vine-gumbel', 'vine-indep', 'vine-joe', 'vine-student'
- 'scp-vine-tll', 'scp-vine-bb1', 'scp-vine-bb6', 'scp-vine-bb7', 'scp-vine-bb8', 'scp-vine-clayton',
'scp-vine-frank', 'scp-vine-gaussian', 'scp-vine-gumbel', 'scp-vine-indep', 'scp-vine-joe', 'scp-vine-student'
- 'scp2-vine-tll', 'scp2-vine-bb1', 'scp2-vine-bb6', 'scp2-vine-bb7', 'scp2-vine-bb8', 'scp2-vine-clayton',
'scp2-vine-frank', 'scp2-vine-gaussian', 'scp2-vine-gumbel', 'scp2-vine-indep', 'scp2-vine-joe', 'scp2-vine-student'
block_size: int.
size of block for 'type_pi' in ("block-bootstrap", "scp-block-bootstrap", "scp2-block-bootstrap").
Default is round(3.15*(n_residuals^1/3))
replications: int.
number of replications (if needed, for predictive simulation). Default is 'None'.
kernel: str.
the kernel to use for residuals density estimation (used for predictive simulation). Currently, either 'gaussian' or 'tophat'.
agg: str.
either "mean" or "median" for simulation of bootstrap aggregating
seed: int.
reproducibility seed for nodes_sim=='uniform' or predictive simulation.
backend: str.
"cpu" or "gpu" or "tpu".
verbose: int.
0: not printing; 1: printing
show_progress: bool.
True: progress bar when fitting each series; False: no progress bar when fitting each series
Attributes:
fit_objs_: dict
objects adjusted to each individual time series
y_: {array-like}
MTS responses (most recent observations first)
X_: {array-like}
MTS lags
xreg_: {array-like}
external regressors
y_means_: dict
a dictionary of each series mean values
preds_: {array-like}
successive model predictions
preds_std_: {array-like}
standard deviation around the predictions for Bayesian base learners (`obj`)
gaussian_preds_std_: {array-like}
standard deviation around the predictions for `type_pi='gaussian'`
return_std_: boolean
return uncertainty or not (set in predict)
df_: data frame
the input data frame, in case a data.frame is provided to `fit`
n_obs_: int
number of time series observations (number of rows for multivariate)
level_: int
level of confidence for prediction intervals (default is 95)
residuals_: {array-like}
in-sample residuals (for `type_pi` not conformal prediction) or calibrated residuals
(for `type_pi` in conformal prediction)
residuals_sims_: tuple of {array-like}
simulations of in-sample residuals (for `type_pi` not conformal prediction) or
calibrated residuals (for `type_pi` in conformal prediction)
kde_: A scikit-learn object, see https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KernelDensity.html
residuals_std_dev_: residuals standard deviation
Examples:
Example 1:
import nnetsauce as ns
import numpy as np
from sklearn import linear_model
np.random.seed(123)
M = np.random.rand(10, 3)
M[:,0] = 10*M[:,0]
M[:,2] = 25*M[:,2]
print(M)
# Adjust Bayesian Ridge
regr4 = linear_model.BayesianRidge()
obj_MTS = ns.MTS(regr4, lags = 1, n_hidden_features=5)
obj_MTS.fit(M)
print(obj_MTS.predict())
# with credible intervals
print(obj_MTS.predict(return_std=True, level=80))
print(obj_MTS.predict(return_std=True, level=95))
Example 2:
import nnetsauce as ns
import numpy as np
from sklearn import linear_model
dataset = {
'date' : ['2001-01-01', '2002-01-01', '2003-01-01', '2004-01-01', '2005-01-01'],
'series1' : [34, 30, 35.6, 33.3, 38.1],
'series2' : [4, 5.5, 5.6, 6.3, 5.1],
'series3' : [100, 100.5, 100.6, 100.2, 100.1]}
df = pd.DataFrame(dataset).set_index('date')
print(df)
# Adjust Bayesian Ridge
regr5 = linear_model.BayesianRidge()
obj_MTS = ns.MTS(regr5, lags = 1, n_hidden_features=5)
obj_MTS.fit(df)
print(obj_MTS.predict())
# with credible intervals
print(obj_MTS.predict(return_std=True, level=80))
print(obj_MTS.predict(return_std=True, level=95))
322 def fit(self, X, xreg=None, **kwargs): 323 """Fit MTS model to training data X, with optional regressors xreg 324 325 Parameters: 326 327 X: {array-like}, shape = [n_samples, n_features] 328 Training time series, where n_samples is the number 329 of samples and n_features is the number of features; 330 X must be in increasing order (most recent observations last) 331 332 xreg: {array-like}, shape = [n_samples, n_features_xreg] 333 Additional (external) regressors to be passed to self.obj 334 xreg must be in 'increasing' order (most recent observations last) 335 336 **kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity) 337 338 Returns: 339 340 self: object 341 """ 342 343 self.init_n_series_ = X.shape[1] 344 345 # Automatic lag selection if requested 346 if isinstance(self.lags, str): 347 max_lags = min(25, X.shape[0] // 4) 348 best_ic = float("inf") 349 best_lags = 1 350 351 if self.verbose: 352 print(f"\nSelecting optimal number of lags using {self.lags}...") 353 iterator = tqdm(range(1, max_lags + 1)) 354 else: 355 iterator = range(1, max_lags + 1) 356 357 for lag in iterator: 358 # Convert DataFrame to numpy array before reversing 359 if isinstance(X, pd.DataFrame): 360 X_values = X.values[::-1] 361 else: 362 X_values = X[::-1] 363 364 # Try current lag value 365 if self.init_n_series_ > 1: 366 mts_input = ts.create_train_inputs(X_values, lag) 367 else: 368 mts_input = ts.create_train_inputs(X_values.reshape(-1, 1), lag) 369 370 # Cook training set and fit model 371 dummy_y, scaled_Z = self.cook_training_set( 372 y=np.ones(mts_input[0].shape[0]), X=mts_input[1] 373 ) 374 residuals_ = [] 375 376 for i in range(self.init_n_series_): 377 y_mean = np.mean(mts_input[0][:, i]) 378 centered_y_i = mts_input[0][:, i] - y_mean 379 self.obj.fit(X=scaled_Z, y=centered_y_i) 380 residuals_.append( 381 (centered_y_i - self.obj.predict(scaled_Z)).tolist() 382 ) 383 384 self.residuals_ = np.asarray(residuals_).T 385 ic = self._compute_information_criterion( 386 curr_lags=lag, criterion=self.lags 387 ) 388 389 if self.verbose: 390 print(f"Trying lags={lag}, {self.lags}={ic:.2f}") 391 392 if ic < best_ic: 393 best_ic = ic 394 best_lags = lag 395 396 if self.verbose: 397 print(f"\nSelected {best_lags} lags with {self.lags}={best_ic:.2f}") 398 399 self.lags = best_lags 400 401 self.input_dates = None 402 self.df_ = None 403 404 if isinstance(X, pd.DataFrame) is False: 405 # input data set is a numpy array 406 if xreg is None: 407 X = pd.DataFrame(X) 408 self.series_names = ["series" + str(i) for i in range(X.shape[1])] 409 else: 410 # xreg is not None 411 X = mo.cbind(X, xreg) 412 self.xreg_ = xreg 413 414 else: # input data set is a DataFrame with column names 415 416 X_index = None 417 if X.index is not None: 418 X_index = X.index 419 if xreg is None: 420 X = copy.deepcopy(mo.convert_df_to_numeric(X)) 421 else: 422 X = copy.deepcopy(mo.cbind(mo.convert_df_to_numeric(X), xreg)) 423 self.xreg_ = xreg 424 if X_index is not None: 425 X.index = X_index 426 self.series_names = X.columns.tolist() 427 428 if isinstance(X, pd.DataFrame): 429 if self.df_ is None: 430 self.df_ = X 431 X = X.values 432 else: 433 input_dates_prev = pd.DatetimeIndex(self.df_.index.values) 434 frequency = pd.infer_freq(input_dates_prev) 435 self.df_ = pd.concat([self.df_, X], axis=0) 436 self.input_dates = pd.date_range( 437 start=input_dates_prev[0], 438 periods=len(input_dates_prev) + X.shape[0], 439 freq=frequency, 440 ).values.tolist() 441 self.df_.index = self.input_dates 442 X = self.df_.values 443 self.df_.columns = self.series_names 444 else: 445 if self.df_ is None: 446 self.df_ = pd.DataFrame(X, columns=self.series_names) 447 else: 448 self.df_ = pd.concat( 449 [self.df_, pd.DataFrame(X, columns=self.series_names)], 450 axis=0, 451 ) 452 453 self.input_dates = ts.compute_input_dates(self.df_) 454 455 try: 456 # multivariate time series 457 n, p = X.shape 458 except: 459 # univariate time series 460 n = X.shape[0] 461 p = 1 462 self.n_obs_ = n 463 464 rep_1_n = np.repeat(1, n) 465 466 self.y_ = None 467 self.X_ = None 468 self.n_series = p 469 self.fit_objs_.clear() 470 self.y_means_.clear() 471 residuals_ = [] 472 self.residuals_ = None 473 self.residuals_sims_ = None 474 self.kde_ = None 475 self.sims_ = None 476 self.scaled_Z_ = None 477 self.centered_y_is_ = [] 478 479 if self.init_n_series_ > 1: 480 # multivariate time series 481 mts_input = ts.create_train_inputs(X[::-1], self.lags) 482 else: 483 # univariate time series 484 mts_input = ts.create_train_inputs(X.reshape(-1, 1)[::-1], self.lags) 485 486 self.y_ = mts_input[0] 487 488 self.X_ = mts_input[1] 489 490 dummy_y, scaled_Z = self.cook_training_set(y=rep_1_n, X=self.X_) 491 492 self.scaled_Z_ = scaled_Z 493 494 # loop on all the time series and adjust self.obj.fit 495 if self.verbose > 0: 496 print( 497 f"\n Adjusting {type(self.obj).__name__} to multivariate time series... \n" 498 ) 499 500 if self.show_progress is True: 501 iterator = tqdm(range(self.init_n_series_)) 502 else: 503 iterator = range(self.init_n_series_) 504 505 if self.type_pi in ( 506 "gaussian", 507 "kde", 508 "bootstrap", 509 "block-bootstrap", 510 ) or self.type_pi.startswith("vine"): 511 for i in iterator: 512 y_mean = np.mean(self.y_[:, i]) 513 self.y_means_[i] = y_mean 514 centered_y_i = self.y_[:, i] - y_mean 515 self.centered_y_is_.append(centered_y_i) 516 self.obj.fit(X=scaled_Z, y=centered_y_i) 517 self.fit_objs_[i] = deepcopy(self.obj) 518 residuals_.append( 519 (centered_y_i - self.fit_objs_[i].predict(scaled_Z)).tolist() 520 ) 521 522 if self.type_pi.startswith("scp"): 523 # split conformal prediction 524 for i in iterator: 525 n_y = self.y_.shape[0] 526 n_y_half = n_y // 2 527 first_half_idx = range(0, n_y_half) 528 second_half_idx = range(n_y_half, n_y) 529 y_mean_temp = np.mean(self.y_[first_half_idx, i]) 530 centered_y_i_temp = self.y_[first_half_idx, i] - y_mean_temp 531 self.obj.fit(X=scaled_Z[first_half_idx, :], y=centered_y_i_temp) 532 # calibrated residuals actually 533 residuals_.append( 534 ( 535 self.y_[second_half_idx, i] 536 - (y_mean_temp + self.obj.predict(scaled_Z[second_half_idx, :])) 537 ).tolist() 538 ) 539 # fit on the second half 540 y_mean = np.mean(self.y_[second_half_idx, i]) 541 self.y_means_[i] = y_mean 542 centered_y_i = self.y_[second_half_idx, i] - y_mean 543 self.obj.fit(X=scaled_Z[second_half_idx, :], y=centered_y_i) 544 self.fit_objs_[i] = deepcopy(self.obj) 545 546 self.residuals_ = np.asarray(residuals_).T 547 548 if self.type_pi == "gaussian": 549 self.gaussian_preds_std_ = np.std(self.residuals_, axis=0) 550 551 if self.type_pi.startswith("scp2"): 552 # Calculate mean and standard deviation for each column 553 data_mean = np.mean(self.residuals_, axis=0) 554 self.residuals_std_dev_ = np.std(self.residuals_, axis=0) 555 # Center and scale the array using broadcasting 556 self.residuals_ = ( 557 self.residuals_ - data_mean[np.newaxis, :] 558 ) / self.residuals_std_dev_[np.newaxis, :] 559 560 if self.replications != None and "kde" in self.type_pi: 561 if self.verbose > 0: 562 print(f"\n Simulate residuals using {self.kernel} kernel... \n") 563 assert self.kernel in ( 564 "gaussian", 565 "tophat", 566 ), "currently, 'kernel' must be either 'gaussian' or 'tophat'" 567 kernel_bandwidths = {"bandwidth": np.logspace(-6, 6, 150)} 568 grid = GridSearchCV( 569 KernelDensity(kernel=self.kernel, **kwargs), 570 param_grid=kernel_bandwidths, 571 ) 572 grid.fit(self.residuals_) 573 574 if self.verbose > 0: 575 print( 576 f"\n Best parameters for {self.kernel} kernel: {grid.best_params_} \n" 577 ) 578 579 self.kde_ = grid.best_estimator_ 580 581 return self
Fit MTS model to training data X, with optional regressors xreg
Parameters:
X: {array-like}, shape = [n_samples, n_features] Training time series, where n_samples is the number of samples and n_features is the number of features; X must be in increasing order (most recent observations last)
xreg: {array-like}, shape = [n_samples, n_features_xreg] Additional (external) regressors to be passed to self.obj xreg must be in 'increasing' order (most recent observations last)
**kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity)
Returns:
self: object
618 def predict(self, h=5, level=95, **kwargs): 619 """Forecast all the time series, h steps ahead""" 620 621 self.output_dates_, frequency = ts.compute_output_dates(self.df_, h) 622 623 self.level_ = level 624 625 self.return_std_ = False # do not remove (/!\) 626 627 self.mean_ = None # do not remove (/!\) 628 629 self.mean_ = deepcopy(self.y_) # do not remove (/!\) 630 631 self.lower_ = None # do not remove (/!\) 632 633 self.upper_ = None # do not remove (/!\) 634 635 self.sims_ = None # do not remove (/!\) 636 637 y_means_ = np.asarray([self.y_means_[i] for i in range(self.init_n_series_)]) 638 639 n_features = self.init_n_series_ * self.lags 640 641 self.alpha_ = 100 - level 642 643 pi_multiplier = norm.ppf(1 - self.alpha_ / 200) 644 645 if "return_std" in kwargs: # bayesian forecasting 646 self.return_std_ = True 647 self.preds_std_ = [] 648 DescribeResult = namedtuple( 649 "DescribeResult", ("mean", "lower", "upper") 650 ) # to be updated 651 652 if "return_pi" in kwargs: # split conformal, without simulation 653 mean_pi_ = [] 654 lower_pi_ = [] 655 upper_pi_ = [] 656 median_pi_ = [] 657 DescribeResult = namedtuple( 658 "DescribeResult", ("mean", "lower", "upper") 659 ) # to be updated 660 661 if self.kde_ != None and "kde" in self.type_pi: # kde 662 target_cols = self.df_.columns[ 663 : self.init_n_series_ 664 ] # Get target column names 665 if self.verbose == 1: 666 self.residuals_sims_ = tuple( 667 self.kde_.sample( 668 n_samples=h, random_state=self.seed + 100 * i 669 ) # Keep full sample 670 for i in tqdm(range(self.replications)) 671 ) 672 elif self.verbose == 0: 673 self.residuals_sims_ = tuple( 674 self.kde_.sample( 675 n_samples=h, random_state=self.seed + 100 * i 676 ) # Keep full sample 677 for i in range(self.replications) 678 ) 679 680 # Convert to DataFrames after sampling 681 self.residuals_sims_ = tuple( 682 pd.DataFrame( 683 sim, # Keep all columns 684 columns=target_cols, # Use original target column names 685 index=self.output_dates_, 686 ) 687 for sim in self.residuals_sims_ 688 ) 689 690 if self.type_pi in ("bootstrap", "scp-bootstrap", "scp2-bootstrap"): 691 assert self.replications is not None and isinstance( 692 self.replications, int 693 ), "'replications' must be provided and be an integer" 694 if self.verbose == 1: 695 self.residuals_sims_ = tuple( 696 ts.bootstrap( 697 self.residuals_, 698 h=h, 699 block_size=None, 700 seed=self.seed + 100 * i, 701 ) 702 for i in tqdm(range(self.replications)) 703 ) 704 elif self.verbose == 0: 705 self.residuals_sims_ = tuple( 706 ts.bootstrap( 707 self.residuals_, 708 h=h, 709 block_size=None, 710 seed=self.seed + 100 * i, 711 ) 712 for i in range(self.replications) 713 ) 714 715 if self.type_pi in ( 716 "block-bootstrap", 717 "scp-block-bootstrap", 718 "scp2-block-bootstrap", 719 ): 720 if self.block_size is None: 721 self.block_size = int( 722 np.ceil(3.15 * (self.residuals_.shape[0] ** (1 / 3))) 723 ) 724 725 assert self.replications is not None and isinstance( 726 self.replications, int 727 ), "'replications' must be provided and be an integer" 728 if self.verbose == 1: 729 self.residuals_sims_ = tuple( 730 ts.bootstrap( 731 self.residuals_, 732 h=h, 733 block_size=self.block_size, 734 seed=self.seed + 100 * i, 735 ) 736 for i in tqdm(range(self.replications)) 737 ) 738 elif self.verbose == 0: 739 self.residuals_sims_ = tuple( 740 ts.bootstrap( 741 self.residuals_, 742 h=h, 743 block_size=self.block_size, 744 seed=self.seed + 100 * i, 745 ) 746 for i in range(self.replications) 747 ) 748 749 if "vine" in self.type_pi: 750 if self.verbose == 1: 751 self.residuals_sims_ = tuple( 752 vinecopula_sample( 753 x=self.residuals_, 754 n_samples=h, 755 method=self.type_pi, 756 random_state=self.seed + 100 * i, 757 ) 758 for i in tqdm(range(self.replications)) 759 ) 760 elif self.verbose == 0: 761 self.residuals_sims_ = tuple( 762 vinecopula_sample( 763 x=self.residuals_, 764 n_samples=h, 765 method=self.type_pi, 766 random_state=self.seed + 100 * i, 767 ) 768 for i in range(self.replications) 769 ) 770 771 mean_ = deepcopy(self.mean_) 772 773 for i in range(h): 774 775 new_obs = ts.reformat_response(mean_, self.lags) 776 new_X = new_obs.reshape(1, -1) 777 cooked_new_X = self.cook_test_set(new_X, **kwargs) 778 779 if "return_std" in kwargs: 780 self.preds_std_.append( 781 [ 782 np.asarray( 783 self.fit_objs_[i].predict(cooked_new_X, return_std=True)[1] 784 ).item() 785 for i in range(self.n_series) 786 ] 787 ) 788 789 if "return_pi" in kwargs: 790 for i in range(self.n_series): 791 preds_pi = self.fit_objs_[i].predict(cooked_new_X, **kwargs) 792 mean_pi_.append(preds_pi.mean[0]) 793 lower_pi_.append(preds_pi.lower[0]) 794 upper_pi_.append(preds_pi.upper[0]) 795 796 predicted_cooked_new_X = np.asarray( 797 [ 798 np.asarray(self.fit_objs_[i].predict(cooked_new_X)).item() 799 for i in range(self.init_n_series_) 800 ] 801 ) 802 803 preds = np.asarray(y_means_ + predicted_cooked_new_X) 804 805 # Create full row with both predictions and external regressors 806 if self.xreg_ is not None and "xreg" in kwargs: 807 next_xreg = kwargs["xreg"].iloc[i : i + 1].values.flatten() 808 full_row = np.concatenate([preds, next_xreg]) 809 else: 810 full_row = preds 811 812 # Create a new row with same number of columns as mean_ 813 new_row = np.zeros((1, mean_.shape[1])) 814 new_row[0, : full_row.shape[0]] = full_row 815 816 # Maintain the full dimensionality by using vstack instead of rbind 817 mean_ = np.vstack([new_row, mean_[:-1]]) 818 819 # Final output should only include the target columns 820 self.mean_ = pd.DataFrame( 821 mean_[0:h, : self.init_n_series_][::-1], 822 columns=self.df_.columns[: self.init_n_series_], 823 index=self.output_dates_, 824 ) 825 826 # function's return ---------------------------------------------------------------------- 827 if ( 828 (("return_std" not in kwargs) and ("return_pi" not in kwargs)) 829 and (self.type_pi not in ("gaussian", "scp")) 830 ) or ("vine" in self.type_pi): 831 832 if self.replications is None: 833 return self.mean_.iloc[:, : self.init_n_series_] 834 835 # if "return_std" not in kwargs and self.replications is not None 836 meanf = [] 837 medianf = [] 838 lower = [] 839 upper = [] 840 841 if "scp2" in self.type_pi: 842 843 if self.verbose == 1: 844 self.sims_ = tuple( 845 ( 846 self.mean_ 847 + self.residuals_sims_[i] 848 * self.residuals_std_dev_[np.newaxis, :] 849 for i in tqdm(range(self.replications)) 850 ) 851 ) 852 elif self.verbose == 0: 853 self.sims_ = tuple( 854 ( 855 self.mean_ 856 + self.residuals_sims_[i] 857 * self.residuals_std_dev_[np.newaxis, :] 858 for i in range(self.replications) 859 ) 860 ) 861 else: 862 863 if self.verbose == 1: 864 self.sims_ = tuple( 865 ( 866 self.mean_ + self.residuals_sims_[i] 867 for i in tqdm(range(self.replications)) 868 ) 869 ) 870 elif self.verbose == 0: 871 self.sims_ = tuple( 872 ( 873 self.mean_ + self.residuals_sims_[i] 874 for i in range(self.replications) 875 ) 876 ) 877 878 DescribeResult = namedtuple( 879 "DescribeResult", ("mean", "sims", "lower", "upper") 880 ) 881 for ix in range(self.init_n_series_): 882 sims_ix = getsims(self.sims_, ix) 883 if self.agg == "mean": 884 meanf.append(np.mean(sims_ix, axis=1)) 885 else: 886 medianf.append(np.median(sims_ix, axis=1)) 887 lower.append(np.quantile(sims_ix, q=self.alpha_ / 200, axis=1)) 888 upper.append(np.quantile(sims_ix, q=1 - self.alpha_ / 200, axis=1)) 889 self.mean_ = pd.DataFrame( 890 np.asarray(meanf).T, 891 columns=self.series_names[: self.init_n_series_], # self.df_.columns, 892 index=self.output_dates_, 893 ) 894 895 self.lower_ = pd.DataFrame( 896 np.asarray(lower).T, 897 columns=self.series_names[: self.init_n_series_], # self.df_.columns, 898 index=self.output_dates_, 899 ) 900 901 self.upper_ = pd.DataFrame( 902 np.asarray(upper).T, 903 columns=self.series_names[: self.init_n_series_], # self.df_.columns, 904 index=self.output_dates_, 905 ) 906 907 try: 908 self.median_ = pd.DataFrame( 909 np.asarray(medianf).T, 910 columns=self.series_names[ 911 : self.init_n_series_ 912 ], # self.df_.columns, 913 index=self.output_dates_, 914 ) 915 except Exception as e: 916 pass 917 918 return DescribeResult(self.mean_, self.sims_, self.lower_, self.upper_) 919 920 if ( 921 (("return_std" in kwargs) or ("return_pi" in kwargs)) 922 and (self.type_pi not in ("gaussian", "scp")) 923 ) or "vine" in self.type_pi: 924 DescribeResult = namedtuple("DescribeResult", ("mean", "lower", "upper")) 925 926 self.mean_ = pd.DataFrame( 927 np.asarray(self.mean_), 928 columns=self.series_names, # self.df_.columns, 929 index=self.output_dates_, 930 ) 931 932 if "return_std" in kwargs: 933 934 self.preds_std_ = np.asarray(self.preds_std_) 935 print("self.preds_std_", self.preds_std_) 936 print("self.mean_", self.mean_) 937 print("pi_multiplier", pi_multiplier) 938 939 self.lower_ = pd.DataFrame( 940 self.mean_.values - pi_multiplier * self.preds_std_, 941 columns=self.series_names, # self.df_.columns, 942 index=self.output_dates_, 943 ) 944 945 self.upper_ = pd.DataFrame( 946 self.mean_.values + pi_multiplier * self.preds_std_, 947 columns=self.series_names, # self.df_.columns, 948 index=self.output_dates_, 949 ) 950 951 if "return_pi" in kwargs: 952 953 self.lower_ = pd.DataFrame( 954 np.asarray(lower_pi_).reshape(h, self.n_series) 955 + y_means_[np.newaxis, :], 956 columns=self.series_names, # self.df_.columns, 957 index=self.output_dates_, 958 ) 959 960 self.upper_ = pd.DataFrame( 961 np.asarray(upper_pi_).reshape(h, self.n_series) 962 + y_means_[np.newaxis, :], 963 columns=self.series_names, # self.df_.columns, 964 index=self.output_dates_, 965 ) 966 967 res = DescribeResult(self.mean_, self.lower_, self.upper_) 968 969 if self.xreg_ is not None: 970 if len(self.xreg_.shape) > 1: 971 res2 = mx.tuple_map( 972 res, 973 lambda x: mo.delete_last_columns( 974 x, num_columns=self.xreg_.shape[1] 975 ), 976 ) 977 else: 978 res2 = mx.tuple_map( 979 res, lambda x: mo.delete_last_columns(x, num_columns=1) 980 ) 981 return DescribeResult(res2[0], res2[1], res2[2]) 982 983 return res 984 985 if self.type_pi == "gaussian": 986 987 DescribeResult = namedtuple("DescribeResult", ("mean", "lower", "upper")) 988 989 self.mean_ = pd.DataFrame( 990 np.asarray(self.mean_), 991 columns=self.series_names, # self.df_.columns, 992 index=self.output_dates_, 993 ) 994 995 self.lower_ = pd.DataFrame( 996 self.mean_.values - pi_multiplier * self.gaussian_preds_std_, 997 columns=self.series_names, # self.df_.columns, 998 index=self.output_dates_, 999 ) 1000 1001 self.upper_ = pd.DataFrame( 1002 self.mean_.values + pi_multiplier * self.gaussian_preds_std_, 1003 columns=self.series_names, # self.df_.columns, 1004 index=self.output_dates_, 1005 ) 1006 1007 res = DescribeResult(self.mean_, self.lower_, self.upper_) 1008 1009 if self.xreg_ is not None: 1010 if len(self.xreg_.shape) > 1: 1011 res2 = mx.tuple_map( 1012 res, 1013 lambda x: mo.delete_last_columns( 1014 x, num_columns=self.xreg_.shape[1] 1015 ), 1016 ) 1017 else: 1018 res2 = mx.tuple_map( 1019 res, lambda x: mo.delete_last_columns(x, num_columns=1) 1020 ) 1021 return DescribeResult(res2[0], res2[1], res2[2]) 1022 1023 return res 1024 1025 # After prediction loop, ensure sims only contain target columns 1026 if self.sims_ is not None: 1027 if self.verbose == 1: 1028 self.sims_ = tuple( 1029 sim[:h,] # Only keep target columns and h rows 1030 for sim in tqdm(self.sims_) 1031 ) 1032 elif self.verbose == 0: 1033 self.sims_ = tuple( 1034 sim[:h,] # Only keep target columns and h rows 1035 for sim in self.sims_ 1036 ) 1037 1038 # Convert numpy arrays to DataFrames with proper columns 1039 self.sims_ = tuple( 1040 pd.DataFrame( 1041 sim, 1042 columns=self.df_.columns[: self.init_n_series_], 1043 index=self.output_dates_, 1044 ) 1045 for sim in self.sims_ 1046 ) 1047 1048 if self.type_pi in ("kde", "bootstrap", "block-bootstrap", "vine-copula"): 1049 if self.xreg_ is not None: 1050 # Use getsimsxreg when external regressors are present 1051 target_cols = self.df_.columns[: self.init_n_series_] 1052 self.sims_ = getsimsxreg(self.sims_, self.output_dates_, target_cols) 1053 else: 1054 # Use original getsims for backward compatibility 1055 self.sims_ = getsims(self.sims_)
Forecast all the time series, h steps ahead
1057 def score(self, X, training_index, testing_index, scoring=None, **kwargs): 1058 """Train on training_index, score on testing_index.""" 1059 1060 assert ( 1061 bool(set(training_index).intersection(set(testing_index))) == False 1062 ), "Non-overlapping 'training_index' and 'testing_index' required" 1063 1064 # Dimensions 1065 try: 1066 # multivariate time series 1067 n, p = X.shape 1068 except: 1069 # univariate time series 1070 n = X.shape[0] 1071 p = 1 1072 1073 # Training and testing sets 1074 if p > 1: 1075 X_train = X[training_index, :] 1076 X_test = X[testing_index, :] 1077 else: 1078 X_train = X[training_index] 1079 X_test = X[testing_index] 1080 1081 # Horizon 1082 h = len(testing_index) 1083 assert ( 1084 len(training_index) + h 1085 ) <= n, "Please check lengths of training and testing windows" 1086 1087 # Fit and predict 1088 self.fit(X_train, **kwargs) 1089 preds = self.predict(h=h, **kwargs) 1090 1091 if scoring is None: 1092 scoring = "neg_root_mean_squared_error" 1093 1094 # check inputs 1095 assert scoring in ( 1096 "explained_variance", 1097 "neg_mean_absolute_error", 1098 "neg_mean_squared_error", 1099 "neg_root_mean_squared_error", 1100 "neg_mean_squared_log_error", 1101 "neg_median_absolute_error", 1102 "r2", 1103 ), "'scoring' should be in ('explained_variance', 'neg_mean_absolute_error', \ 1104 'neg_mean_squared_error', 'neg_root_mean_squared_error', 'neg_mean_squared_log_error', \ 1105 'neg_median_absolute_error', 'r2')" 1106 1107 scoring_options = { 1108 "explained_variance": skm2.explained_variance_score, 1109 "neg_mean_absolute_error": skm2.mean_absolute_error, 1110 "neg_mean_squared_error": lambda x, y: np.mean((x - y) ** 2), 1111 "neg_root_mean_squared_error": lambda x, y: np.sqrt(np.mean((x - y) ** 2)), 1112 "neg_mean_squared_log_error": skm2.mean_squared_log_error, 1113 "neg_median_absolute_error": skm2.median_absolute_error, 1114 "r2": skm2.r2_score, 1115 } 1116 1117 return scoring_options[scoring](X_test, preds)
Train on training_index, score on testing_index.
16class MultitaskClassifier(Base, ClassifierMixin): 17 """Multitask Classification model based on regression models, with shared covariates 18 19 Parameters: 20 21 obj: object 22 any object (must be a regression model) containing a method fit (obj.fit()) 23 and a method predict (obj.predict()) 24 25 n_hidden_features: int 26 number of nodes in the hidden layer 27 28 activation_name: str 29 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 30 31 a: float 32 hyperparameter for 'prelu' or 'elu' activation function 33 34 nodes_sim: str 35 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 36 'uniform' 37 38 bias: boolean 39 indicates if the hidden layer contains a bias term (True) or not 40 (False) 41 42 dropout: float 43 regularization parameter; (random) percentage of nodes dropped out 44 of the training 45 46 direct_link: boolean 47 indicates if the original predictors are included (True) in model's 48 fitting or not (False) 49 50 n_clusters: int 51 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: 52 no clustering) 53 54 cluster_encode: bool 55 defines how the variable containing clusters is treated (default is one-hot) 56 if `False`, then labels are used, without one-hot encoding 57 58 type_clust: str 59 type of clustering method: currently k-means ('kmeans') or Gaussian 60 Mixture Model ('gmm') 61 62 type_scaling: a tuple of 3 strings 63 scaling methods for inputs, hidden layer, and clustering respectively 64 (and when relevant). 65 Currently available: standardization ('std') or MinMax scaling ('minmax') 66 67 col_sample: float 68 percentage of covariates randomly chosen for training 69 70 row_sample: float 71 percentage of rows chosen for training, by stratified bootstrapping 72 73 seed: int 74 reproducibility seed for nodes_sim=='uniform' 75 76 backend: str 77 "cpu" or "gpu" or "tpu" 78 79 Attributes: 80 81 fit_objs_: dict 82 objects adjusted to each individual time series 83 84 n_classes_: int 85 number of classes for the classifier 86 87 Examples: 88 89 See also [https://github.com/Techtonique/nnetsauce/blob/master/examples/mtask_classification.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/mtask_classification.py) 90 91 ```python 92 import nnetsauce as ns 93 import numpy as np 94 from sklearn.datasets import load_breast_cancer 95 from sklearn.linear_model import LinearRegression 96 from sklearn.model_selection import train_test_split 97 from sklearn import metrics 98 from time import time 99 100 breast_cancer = load_breast_cancer() 101 Z = breast_cancer.data 102 t = breast_cancer.target 103 104 X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2, 105 random_state=123+2*10) 106 107 # Linear Regression is used 108 regr = LinearRegression() 109 fit_obj = ns.MultitaskClassifier(regr, n_hidden_features=5, 110 n_clusters=2, type_clust="gmm") 111 112 start = time() 113 fit_obj.fit(X_train, y_train) 114 print(f"Elapsed {time() - start}") 115 116 print(fit_obj.score(X_test, y_test)) 117 print(fit_obj.score(X_test, y_test, scoring="roc_auc")) 118 119 start = time() 120 preds = fit_obj.predict(X_test) 121 print(f"Elapsed {time() - start}") 122 print(metrics.classification_report(preds, y_test)) 123 ``` 124 125 """ 126 127 # construct the object ----- 128 129 def __init__( 130 self, 131 obj, 132 n_hidden_features=5, 133 activation_name="relu", 134 a=0.01, 135 nodes_sim="sobol", 136 bias=True, 137 dropout=0, 138 direct_link=True, 139 n_clusters=2, 140 cluster_encode=True, 141 type_clust="kmeans", 142 type_scaling=("std", "std", "std"), 143 col_sample=1, 144 row_sample=1, 145 seed=123, 146 backend="cpu", 147 ): 148 super().__init__( 149 n_hidden_features=n_hidden_features, 150 activation_name=activation_name, 151 a=a, 152 nodes_sim=nodes_sim, 153 bias=bias, 154 dropout=dropout, 155 direct_link=direct_link, 156 n_clusters=n_clusters, 157 cluster_encode=cluster_encode, 158 type_clust=type_clust, 159 type_scaling=type_scaling, 160 col_sample=col_sample, 161 row_sample=row_sample, 162 seed=seed, 163 backend=backend, 164 ) 165 166 self.type_fit = "classification" 167 self.obj = obj 168 self.fit_objs_ = {} 169 170 def fit(self, X, y, sample_weight=None, **kwargs): 171 """Fit MultitaskClassifier to training data (X, y). 172 173 Args: 174 175 X: {array-like}, shape = [n_samples, n_features] 176 Training vectors, where n_samples is the number 177 of samples and n_features is the number of features. 178 179 y: array-like, shape = [n_samples] 180 Target values. 181 182 **kwargs: additional parameters to be passed to 183 self.cook_training_set or self.obj.fit 184 185 Returns: 186 187 self: object 188 189 """ 190 191 assert mx.is_factor(y), "y must contain only integers" 192 193 output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 194 195 self.classes_ = np.unique(y) # for compatibility with sklearn 196 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 197 198 # multitask response 199 Y = mo.one_hot_encode2(output_y, self.n_classes_) 200 201 # if sample_weight is None: 202 for i in range(self.n_classes_): 203 self.fit_objs_[i] = deepcopy(self.obj.fit(scaled_Z, Y[:, i], **kwargs)) 204 205 self.classes_ = np.unique(y) 206 return self 207 208 def predict(self, X, **kwargs): 209 """Predict test data X. 210 211 Args: 212 213 X: {array-like}, shape = [n_samples, n_features] 214 Training vectors, where n_samples is the number 215 of samples and n_features is the number of features. 216 217 **kwargs: additional parameters to be passed to 218 self.cook_test_set 219 220 Returns: 221 222 model predictions: {array-like} 223 224 """ 225 return np.argmax(self.predict_proba(X, **kwargs), axis=1) 226 227 def predict_proba(self, X, **kwargs): 228 """Predict probabilities for test data X. 229 230 Args: 231 232 X: {array-like}, shape = [n_samples, n_features] 233 Training vectors, where n_samples is the number 234 of samples and n_features is the number of features. 235 236 **kwargs: additional parameters to be passed to 237 self.cook_test_set 238 239 Returns: 240 241 probability estimates for test data: {array-like} 242 243 """ 244 245 shape_X = X.shape 246 247 probs = np.zeros((shape_X[0], self.n_classes_)) 248 249 if len(shape_X) == 1: 250 n_features = shape_X[0] 251 252 new_X = mo.rbind( 253 X.reshape(1, n_features), 254 np.ones(n_features).reshape(1, n_features), 255 ) 256 257 Z = self.cook_test_set(new_X, **kwargs) 258 259 # loop on all the classes 260 for i in range(self.n_classes_): 261 probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs)[0] 262 263 else: 264 Z = self.cook_test_set(X, **kwargs) 265 266 # loop on all the classes 267 for i in range(self.n_classes_): 268 probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs) 269 270 expit_raw_probs = expit(probs) 271 272 return expit_raw_probs / expit_raw_probs.sum(axis=1)[:, None] 273 274 def decision_function(self, X, **kwargs): 275 """Compute the decision function of X. 276 277 Parameters: 278 X: {array-like}, shape = [n_samples, n_features] 279 Samples to compute decision function for. 280 281 **kwargs: additional parameters to be passed to 282 self.cook_test_set 283 284 Returns: 285 array-like of shape (n_samples,) or (n_samples, n_classes) 286 Decision function of the input samples. The order of outputs is the same 287 as that of the classes passed to fit. 288 """ 289 if not hasattr(self.obj, "decision_function"): 290 # If base classifier doesn't have decision_function, use predict_proba 291 proba = self.predict_proba(X, **kwargs) 292 if proba.shape[1] == 2: 293 return proba[:, 1] # For binary classification 294 return proba # For multiclass 295 296 if len(X.shape) == 1: 297 n_features = X.shape[0] 298 new_X = mo.rbind( 299 X.reshape(1, n_features), 300 np.ones(n_features).reshape(1, n_features), 301 ) 302 303 return ( 304 self.obj.decision_function( 305 self.cook_test_set(new_X, **kwargs), **kwargs 306 ) 307 )[0] 308 309 return self.obj.decision_function(self.cook_test_set(X, **kwargs), **kwargs)
Multitask Classification model based on regression models, with shared covariates
Parameters:
obj: object
any object (must be a regression model) containing a method fit (obj.fit())
and a method predict (obj.predict())
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not
(False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original predictors are included (True) in model's
fitting or not (False)
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
col_sample: float
percentage of covariates randomly chosen for training
row_sample: float
percentage of rows chosen for training, by stratified bootstrapping
seed: int
reproducibility seed for nodes_sim=='uniform'
backend: str
"cpu" or "gpu" or "tpu"
Attributes:
fit_objs_: dict
objects adjusted to each individual time series
n_classes_: int
number of classes for the classifier
Examples:
See also https://github.com/Techtonique/nnetsauce/blob/master/examples/mtask_classification.py
import nnetsauce as ns
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics
from time import time
breast_cancer = load_breast_cancer()
Z = breast_cancer.data
t = breast_cancer.target
X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2,
random_state=123+2*10)
# Linear Regression is used
regr = LinearRegression()
fit_obj = ns.MultitaskClassifier(regr, n_hidden_features=5,
n_clusters=2, type_clust="gmm")
start = time()
fit_obj.fit(X_train, y_train)
print(f"Elapsed {time() - start}")
print(fit_obj.score(X_test, y_test))
print(fit_obj.score(X_test, y_test, scoring="roc_auc"))
start = time()
preds = fit_obj.predict(X_test)
print(f"Elapsed {time() - start}")
print(metrics.classification_report(preds, y_test))
170 def fit(self, X, y, sample_weight=None, **kwargs): 171 """Fit MultitaskClassifier to training data (X, y). 172 173 Args: 174 175 X: {array-like}, shape = [n_samples, n_features] 176 Training vectors, where n_samples is the number 177 of samples and n_features is the number of features. 178 179 y: array-like, shape = [n_samples] 180 Target values. 181 182 **kwargs: additional parameters to be passed to 183 self.cook_training_set or self.obj.fit 184 185 Returns: 186 187 self: object 188 189 """ 190 191 assert mx.is_factor(y), "y must contain only integers" 192 193 output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 194 195 self.classes_ = np.unique(y) # for compatibility with sklearn 196 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 197 198 # multitask response 199 Y = mo.one_hot_encode2(output_y, self.n_classes_) 200 201 # if sample_weight is None: 202 for i in range(self.n_classes_): 203 self.fit_objs_[i] = deepcopy(self.obj.fit(scaled_Z, Y[:, i], **kwargs)) 204 205 self.classes_ = np.unique(y) 206 return self
Fit MultitaskClassifier to training data (X, y).
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
208 def predict(self, X, **kwargs): 209 """Predict test data X. 210 211 Args: 212 213 X: {array-like}, shape = [n_samples, n_features] 214 Training vectors, where n_samples is the number 215 of samples and n_features is the number of features. 216 217 **kwargs: additional parameters to be passed to 218 self.cook_test_set 219 220 Returns: 221 222 model predictions: {array-like} 223 224 """ 225 return np.argmax(self.predict_proba(X, **kwargs), axis=1)
Predict test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
227 def predict_proba(self, X, **kwargs): 228 """Predict probabilities for test data X. 229 230 Args: 231 232 X: {array-like}, shape = [n_samples, n_features] 233 Training vectors, where n_samples is the number 234 of samples and n_features is the number of features. 235 236 **kwargs: additional parameters to be passed to 237 self.cook_test_set 238 239 Returns: 240 241 probability estimates for test data: {array-like} 242 243 """ 244 245 shape_X = X.shape 246 247 probs = np.zeros((shape_X[0], self.n_classes_)) 248 249 if len(shape_X) == 1: 250 n_features = shape_X[0] 251 252 new_X = mo.rbind( 253 X.reshape(1, n_features), 254 np.ones(n_features).reshape(1, n_features), 255 ) 256 257 Z = self.cook_test_set(new_X, **kwargs) 258 259 # loop on all the classes 260 for i in range(self.n_classes_): 261 probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs)[0] 262 263 else: 264 Z = self.cook_test_set(X, **kwargs) 265 266 # loop on all the classes 267 for i in range(self.n_classes_): 268 probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs) 269 270 expit_raw_probs = expit(probs) 271 272 return expit_raw_probs / expit_raw_probs.sum(axis=1)[:, None]
Predict probabilities for test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
probability estimates for test data: {array-like}
98class NeuralNetRegressor(BaseEstimator, RegressorMixin): 99 """ 100 (Pretrained) Neural Network Regressor. 101 102 Parameters: 103 104 hidden_layer_sizes : tuple, default=(100,) 105 The number of neurons in each hidden layer. 106 max_iter : int, default=100 107 The maximum number of iterations to train the model. 108 learning_rate : float, default=0.01 109 The learning rate for the optimizer. 110 l1_ratio : float, default=0.5 111 The ratio of L1 regularization. 112 alpha : float, default=1e-6 113 The regularization parameter. 114 activation_name : str, default="relu" 115 The activation function to use. 116 dropout : float, default=0.0 117 The dropout rate. 118 random_state : int, default=None 119 The random state for the random number generator. 120 weights : list, default=None 121 The weights to initialize the model with. 122 123 Attributes: 124 125 weights : list 126 The weights of the model. 127 params : list 128 The parameters of the model. 129 scaler_ : sklearn.preprocessing.StandardScaler 130 The scaler used to standardize the input features. 131 y_mean_ : float 132 The mean of the target variable. 133 134 Methods: 135 136 fit(X, y) 137 Fit the model to the data. 138 predict(X) 139 Predict the target variable. 140 get_weights() 141 Get the weights of the model. 142 set_weights(weights) 143 Set the weights of the model. 144 """ 145 146 def __init__( 147 self, 148 hidden_layer_sizes=None, 149 max_iter=100, 150 learning_rate=0.01, 151 l1_ratio=0.5, 152 alpha=1e-6, 153 activation_name="relu", 154 dropout=0, 155 weights=None, 156 random_state=None, 157 ): 158 if weights is None and hidden_layer_sizes is None: 159 hidden_layer_sizes = (100,) # default value if neither is provided 160 self.hidden_layer_sizes = hidden_layer_sizes 161 self.max_iter = max_iter 162 self.learning_rate = learning_rate 163 self.l1_ratio = l1_ratio 164 self.alpha = alpha 165 self.activation_name = activation_name 166 self.dropout = dropout 167 self.weights = weights 168 self.random_state = random_state 169 self.params = None 170 self.scaler_ = StandardScaler() 171 self.y_mean_ = None 172 173 def _validate_weights(self, input_dim): 174 """Validate that weights dimensions are coherent.""" 175 if not self.weights: 176 return False 177 178 try: 179 # Check each layer's weights and biases 180 prev_dim = input_dim 181 for W, b in self.weights: 182 # Check weight matrix dimensions 183 if W.shape[0] != prev_dim: 184 raise ValueError( 185 f"Weight matrix input dimension {W.shape[0]} does not match, previous layer output dimension {prev_dim}" 186 ) 187 # Check bias dimension matches weight matrix output 188 if W.shape[1] != b.shape[0]: 189 raise ValueError( 190 f"Bias dimension {b.shape[0]} does not match weight matrix, output dimension {W.shape[1]}" 191 ) 192 prev_dim = W.shape[1] 193 194 # Check final output dimension is 1 for regression 195 if prev_dim != 1: 196 raise ValueError( 197 f"Final layer output dimension {prev_dim} must be 1 for regression" 198 ) 199 200 return True 201 except (AttributeError, IndexError): 202 raise ValueError( 203 "Weights format is invalid. Expected list of (weight, bias) tuples" 204 ) 205 206 def fit(self, X, y): 207 # Standardize the input features 208 X = self.scaler_.fit_transform(X) 209 # Ensure y is 2D for consistency 210 y = y.reshape(-1, 1) 211 self.y_mean_ = jnp.mean(y) 212 y = y - self.y_mean_ 213 # Validate or initialize weights 214 if self.weights is not None: 215 if self._validate_weights(X.shape[1]): 216 self.params = self.weights 217 else: 218 if self.hidden_layer_sizes is None: 219 raise ValueError( 220 "Either weights or hidden_layer_sizes must be provided" 221 ) 222 self.params = initialize_params( 223 X.shape[1], self.hidden_layer_sizes, self.random_state 224 ) 225 loss_fn = partial(loss, l1_ratio=self.l1_ratio, alpha=self.alpha) 226 grad_loss = jit(grad(loss_fn)) # compiled gradient evaluation function 227 perex_grads = jit( 228 vmap(grad_loss, in_axes=(None, 0, 0)) 229 ) # fast per-example grads 230 # Training loop 231 for _ in range(self.max_iter): 232 grads = perex_grads(self.params, X, y) 233 # Average gradients across examples 234 grads = jax.tree_map(lambda g: jnp.mean(g, axis=0), grads) 235 # Update parameters 236 self.params = [ 237 (W - self.learning_rate * dW, b - self.learning_rate * db) 238 for (W, b), (dW, db) in zip(self.params, grads) 239 ] 240 # Store final weights 241 self.weights = self.params 242 return self 243 244 def get_weights(self): 245 """Return the current weights of the model.""" 246 if self.weights is None: 247 raise ValueError("No weights available. Model has not been fitted yet.") 248 return self.weights 249 250 def set_weights(self, weights): 251 """Set the weights of the model manually.""" 252 self.weights = weights 253 self.params = weights 254 255 def predict(self, X): 256 X = self.scaler_.transform(X) 257 if self.params is None: 258 raise ValueError("Model has not been fitted yet.") 259 predictions = predict_internal( 260 self.params, 261 X, 262 activation_func=self.activation_name, 263 dropout=self.dropout, 264 seed=self.random_state, 265 ) 266 return predictions.reshape(-1) + self.y_mean_
(Pretrained) Neural Network Regressor.
Parameters:
hidden_layer_sizes : tuple, default=(100,)
The number of neurons in each hidden layer.
max_iter : int, default=100
The maximum number of iterations to train the model.
learning_rate : float, default=0.01
The learning rate for the optimizer.
l1_ratio : float, default=0.5
The ratio of L1 regularization.
alpha : float, default=1e-6
The regularization parameter.
activation_name : str, default="relu"
The activation function to use.
dropout : float, default=0.0
The dropout rate.
random_state : int, default=None
The random state for the random number generator.
weights : list, default=None
The weights to initialize the model with.
Attributes:
weights : list
The weights of the model.
params : list
The parameters of the model.
scaler_ : sklearn.preprocessing.StandardScaler
The scaler used to standardize the input features.
y_mean_ : float
The mean of the target variable.
Methods:
fit(X, y)
Fit the model to the data.
predict(X)
Predict the target variable.
get_weights()
Get the weights of the model.
set_weights(weights)
Set the weights of the model.
206 def fit(self, X, y): 207 # Standardize the input features 208 X = self.scaler_.fit_transform(X) 209 # Ensure y is 2D for consistency 210 y = y.reshape(-1, 1) 211 self.y_mean_ = jnp.mean(y) 212 y = y - self.y_mean_ 213 # Validate or initialize weights 214 if self.weights is not None: 215 if self._validate_weights(X.shape[1]): 216 self.params = self.weights 217 else: 218 if self.hidden_layer_sizes is None: 219 raise ValueError( 220 "Either weights or hidden_layer_sizes must be provided" 221 ) 222 self.params = initialize_params( 223 X.shape[1], self.hidden_layer_sizes, self.random_state 224 ) 225 loss_fn = partial(loss, l1_ratio=self.l1_ratio, alpha=self.alpha) 226 grad_loss = jit(grad(loss_fn)) # compiled gradient evaluation function 227 perex_grads = jit( 228 vmap(grad_loss, in_axes=(None, 0, 0)) 229 ) # fast per-example grads 230 # Training loop 231 for _ in range(self.max_iter): 232 grads = perex_grads(self.params, X, y) 233 # Average gradients across examples 234 grads = jax.tree_map(lambda g: jnp.mean(g, axis=0), grads) 235 # Update parameters 236 self.params = [ 237 (W - self.learning_rate * dW, b - self.learning_rate * db) 238 for (W, b), (dW, db) in zip(self.params, grads) 239 ] 240 # Store final weights 241 self.weights = self.params 242 return self
255 def predict(self, X): 256 X = self.scaler_.transform(X) 257 if self.params is None: 258 raise ValueError("Model has not been fitted yet.") 259 predictions = predict_internal( 260 self.params, 261 X, 262 activation_func=self.activation_name, 263 dropout=self.dropout, 264 seed=self.random_state, 265 ) 266 return predictions.reshape(-1) + self.y_mean_
10class NeuralNetClassifier(BaseEstimator, ClassifierMixin): 11 """ 12 (Pretrained) Neural Network Classifier. 13 14 Parameters: 15 16 hidden_layer_sizes : tuple, default=(100,) 17 The number of neurons in each hidden layer. 18 max_iter : int, default=100 19 The maximum number of iterations to train the model. 20 learning_rate : float, default=0.01 21 The learning rate for the optimizer. 22 l1_ratio : float, default=0.5 23 The ratio of L1 regularization. 24 alpha : float, default=1e-6 25 The regularization parameter. 26 activation_name : str, default="relu" 27 The activation function to use. 28 dropout : float, default=0.0 29 The dropout rate. 30 random_state : int, default=None 31 The random state for the random number generator. 32 weights : list, default=None 33 The weights to initialize the model with. 34 35 Attributes: 36 37 weights : list 38 The weights of the model. 39 params : list 40 The parameters of the model. 41 scaler_ : sklearn.preprocessing.StandardScaler 42 The scaler used to standardize the input features. 43 y_mean_ : float 44 The mean of the target variable. 45 46 Methods: 47 48 fit(X, y) 49 Fit the model to the data. 50 predict(X) 51 Predict the target variable. 52 predict_proba(X) 53 Predict the probability of the target variable. 54 get_weights() 55 Get the weights of the model. 56 set_weights(weights) 57 Set the weights of the model. 58 """ 59 60 def __init__( 61 self, 62 hidden_layer_sizes=(100,), 63 max_iter=100, 64 learning_rate=0.01, 65 weights=None, 66 l1_ratio=0.5, 67 alpha=1e-6, 68 activation_name="relu", 69 dropout=0.0, 70 random_state=None, 71 ): 72 self.hidden_layer_sizes = hidden_layer_sizes 73 self.max_iter = max_iter 74 self.learning_rate = learning_rate 75 self.weights = weights 76 self.l1_ratio = l1_ratio 77 self.alpha = alpha 78 self.activation_name = activation_name 79 self.dropout = dropout 80 self.random_state = random_state 81 self.regr = None 82 83 def fit(self, X, y): 84 """Fit the model to the data. 85 86 Parameters: 87 88 X: {array-like}, shape = [n_samples, n_features] 89 Training vectors, where n_samples is the number of samples and 90 n_features is the number of features. 91 y: array-like, shape = [n_samples] 92 Target values. 93 """ 94 regressor = NeuralNetRegressor( 95 hidden_layer_sizes=self.hidden_layer_sizes, 96 max_iter=self.max_iter, 97 learning_rate=self.learning_rate, 98 weights=self.weights, 99 l1_ratio=self.l1_ratio, 100 alpha=self.alpha, 101 activation_name=self.activation_name, 102 dropout=self.dropout, 103 random_state=self.random_state, 104 ) 105 self.regr = SimpleMultitaskClassifier(regressor) 106 self.regr.fit(X, y) 107 self.classes_ = np.unique(y) 108 self.n_classes_ = len(self.classes_) 109 self.n_tasks_ = 1 110 self.n_features_in_ = X.shape[1] 111 self.n_outputs_ = 1 112 self.n_samples_fit_ = X.shape[0] 113 self.n_samples_test_ = X.shape[0] 114 self.n_features_out_ = 1 115 self.n_outputs_ = 1 116 self.n_features_in_ = X.shape[1] 117 self.n_features_out_ = 1 118 self.n_outputs_ = 1 119 return self 120 121 def predict_proba(self, X): 122 """Predict the probability of the target variable. 123 124 Parameters: 125 126 X: {array-like}, shape = [n_samples, n_features] 127 Training vectors, where n_samples is the number of samples and 128 n_features is the number of features. 129 """ 130 return self.regr.predict_proba(X) 131 132 def predict(self, X): 133 """Predict the target variable. 134 135 Parameters: 136 137 X: {array-like}, shape = [n_samples, n_features] 138 Training vectors, where n_samples is the number of samples and 139 n_features is the number of features. 140 """ 141 return self.regr.predict(X)
(Pretrained) Neural Network Classifier.
Parameters:
hidden_layer_sizes : tuple, default=(100,)
The number of neurons in each hidden layer.
max_iter : int, default=100
The maximum number of iterations to train the model.
learning_rate : float, default=0.01
The learning rate for the optimizer.
l1_ratio : float, default=0.5
The ratio of L1 regularization.
alpha : float, default=1e-6
The regularization parameter.
activation_name : str, default="relu"
The activation function to use.
dropout : float, default=0.0
The dropout rate.
random_state : int, default=None
The random state for the random number generator.
weights : list, default=None
The weights to initialize the model with.
Attributes:
weights : list
The weights of the model.
params : list
The parameters of the model.
scaler_ : sklearn.preprocessing.StandardScaler
The scaler used to standardize the input features.
y_mean_ : float
The mean of the target variable.
Methods:
fit(X, y)
Fit the model to the data.
predict(X)
Predict the target variable.
predict_proba(X)
Predict the probability of the target variable.
get_weights()
Get the weights of the model.
set_weights(weights)
Set the weights of the model.
83 def fit(self, X, y): 84 """Fit the model to the data. 85 86 Parameters: 87 88 X: {array-like}, shape = [n_samples, n_features] 89 Training vectors, where n_samples is the number of samples and 90 n_features is the number of features. 91 y: array-like, shape = [n_samples] 92 Target values. 93 """ 94 regressor = NeuralNetRegressor( 95 hidden_layer_sizes=self.hidden_layer_sizes, 96 max_iter=self.max_iter, 97 learning_rate=self.learning_rate, 98 weights=self.weights, 99 l1_ratio=self.l1_ratio, 100 alpha=self.alpha, 101 activation_name=self.activation_name, 102 dropout=self.dropout, 103 random_state=self.random_state, 104 ) 105 self.regr = SimpleMultitaskClassifier(regressor) 106 self.regr.fit(X, y) 107 self.classes_ = np.unique(y) 108 self.n_classes_ = len(self.classes_) 109 self.n_tasks_ = 1 110 self.n_features_in_ = X.shape[1] 111 self.n_outputs_ = 1 112 self.n_samples_fit_ = X.shape[0] 113 self.n_samples_test_ = X.shape[0] 114 self.n_features_out_ = 1 115 self.n_outputs_ = 1 116 self.n_features_in_ = X.shape[1] 117 self.n_features_out_ = 1 118 self.n_outputs_ = 1 119 return self
Fit the model to the data.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number of samples and
n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
121 def predict_proba(self, X): 122 """Predict the probability of the target variable. 123 124 Parameters: 125 126 X: {array-like}, shape = [n_samples, n_features] 127 Training vectors, where n_samples is the number of samples and 128 n_features is the number of features. 129 """ 130 return self.regr.predict_proba(X)
Predict the probability of the target variable.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number of samples and
n_features is the number of features.
132 def predict(self, X): 133 """Predict the target variable. 134 135 Parameters: 136 137 X: {array-like}, shape = [n_samples, n_features] 138 Training vectors, where n_samples is the number of samples and 139 n_features is the number of features. 140 """ 141 return self.regr.predict(X)
Predict the target variable.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number of samples and
n_features is the number of features.
19class PredictionInterval(BaseEstimator, RegressorMixin): 20 """Class PredictionInterval: Obtain prediction intervals. 21 22 Attributes: 23 24 obj: an object; 25 fitted object containing methods `fit` and `predict` 26 27 method: a string; 28 method for constructing the prediction intervals. 29 Currently "splitconformal" (default) and "localconformal" 30 31 level: a float; 32 Confidence level for prediction intervals. Default is 95, 33 equivalent to a miscoverage error of 5 (%) 34 35 replications: an integer; 36 Number of replications for simulated conformal (default is `None`), 37 for type_pi = "bootstrap" or "kde" 38 39 type_pi: a string; 40 type of prediction interval: currently `None` 41 (split conformal without simulation), "kde" or "bootstrap" 42 43 type_split: a string; 44 "random" (random split of data) or "sequential" (sequential split of data) 45 46 seed: an integer; 47 Reproducibility of fit (there's a random split between fitting and calibration data) 48 """ 49 50 def __init__( 51 self, 52 obj, 53 method="splitconformal", 54 level=95, 55 type_pi=None, 56 type_split="random", 57 replications=None, 58 kernel=None, 59 agg="mean", 60 seed=123, 61 ): 62 63 self.obj = obj 64 self.method = method 65 self.level = level 66 self.type_pi = type_pi 67 self.type_split = type_split 68 self.replications = replications 69 self.kernel = kernel 70 self.agg = agg 71 self.seed = seed 72 self.alpha_ = 1 - self.level / 100 73 self.quantile_ = None 74 self.icp_ = None 75 self.calibrated_residuals_ = None 76 self.scaled_calibrated_residuals_ = None 77 self.calibrated_residuals_scaler_ = None 78 self.kde_ = None 79 self.aic_ = None 80 self.aicc_ = None 81 self.bic_ = None 82 83 def fit(self, X, y, sample_weight=None, **kwargs): 84 """Fit the `method` to training data (X, y). 85 86 Args: 87 88 X: array-like, shape = [n_samples, n_features]; 89 Training set vectors, where n_samples is the number 90 of samples and n_features is the number of features. 91 92 y: array-like, shape = [n_samples, ]; Target values. 93 94 sample_weight: array-like, shape = [n_samples] 95 Sample weights. 96 97 """ 98 99 if self.type_split == "random": 100 101 X_train, X_calibration, y_train, y_calibration = train_test_split( 102 X, y, test_size=0.5, random_state=self.seed 103 ) 104 105 elif self.type_split == "sequential": 106 107 n_x = X.shape[0] 108 n_x_half = n_x // 2 109 first_half_idx = range(0, n_x_half) 110 second_half_idx = range(n_x_half, n_x) 111 X_train = X[first_half_idx, :] 112 X_calibration = X[second_half_idx, :] 113 y_train = y[first_half_idx] 114 y_calibration = y[second_half_idx] 115 116 if self.method == "splitconformal": 117 118 self.obj.fit(X_train, y_train) 119 preds_calibration = self.obj.predict(X_calibration) 120 self.calibrated_residuals_ = y_calibration - preds_calibration 121 absolute_residuals = np.abs(self.calibrated_residuals_) 122 self.calibrated_residuals_scaler_ = StandardScaler( 123 with_mean=True, with_std=True 124 ) 125 self.scaled_calibrated_residuals_ = ( 126 self.calibrated_residuals_scaler_.fit_transform( 127 self.calibrated_residuals_.reshape(-1, 1) 128 ).ravel() 129 ) 130 try: 131 # numpy version >= 1.22 132 self.quantile_ = np.quantile( 133 a=absolute_residuals, q=self.level / 100, method="higher" 134 ) 135 except Exception: 136 # numpy version < 1.22 137 self.quantile_ = np.quantile( 138 a=absolute_residuals, 139 q=self.level / 100, 140 interpolation="higher", 141 ) 142 143 if self.method == "localconformal": 144 145 mad_estimator = ExtraTreesRegressor() 146 normalizer = RegressorNormalizer(self.obj, mad_estimator, AbsErrorErrFunc()) 147 nc = RegressorNc(self.obj, AbsErrorErrFunc(), normalizer) 148 self.icp_ = IcpRegressor(nc) 149 self.icp_.fit(X_train, y_train) 150 self.icp_.calibrate(X_calibration, y_calibration) 151 152 return self 153 154 def predict(self, X, return_pi=False): 155 """Obtain predictions and prediction intervals 156 157 Args: 158 159 X: array-like, shape = [n_samples, n_features]; 160 Testing set vectors, where n_samples is the number 161 of samples and n_features is the number of features. 162 163 return_pi: boolean 164 Whether the prediction interval is returned or not. 165 Default is False, for compatibility with other _estimators_. 166 If True, a tuple containing the predictions + lower and upper 167 bounds is returned. 168 169 """ 170 171 if self.method == "splitconformal": 172 pred = self.obj.predict(X) 173 174 if self.method == "localconformal": 175 pred = self.icp_.predict(X) 176 177 if self.method == "splitconformal": 178 179 if ( 180 self.replications is None and self.type_pi is None 181 ): # type_pi is not used here, no bootstrap or kde 182 183 if return_pi: 184 185 DescribeResult = namedtuple( 186 "DescribeResult", ("mean", "lower", "upper") 187 ) 188 return DescribeResult( 189 pred, pred - self.quantile_, pred + self.quantile_ 190 ) 191 192 else: 193 194 return pred 195 196 else: # self.method == "splitconformal" and if self.replications is not None, type_pi must be used 197 198 if self.type_pi is None: 199 self.type_pi = "kde" 200 raise Warning("type_pi must be set, setting to 'kde'") 201 202 if self.replications is None: 203 self.replications = 100 204 raise Warning("replications must be set, setting to 100") 205 206 assert self.type_pi in ( 207 "bootstrap", 208 "kde", 209 ), "`self.type_pi` must be in ('bootstrap', 'kde')" 210 211 if self.type_pi == "bootstrap": 212 np.random.seed(self.seed) 213 self.residuals_sims_ = np.asarray( 214 [ 215 np.random.choice( 216 a=self.scaled_calibrated_residuals_, 217 size=X.shape[0], 218 ) 219 for _ in range(self.replications) 220 ] 221 ).T 222 self.sims_ = np.asarray( 223 [ 224 pred 225 + self.calibrated_residuals_scaler_.scale_[0] 226 * self.residuals_sims_[:, i].ravel() 227 for i in range(self.replications) 228 ] 229 ).T 230 elif self.type_pi == "kde": 231 self.kde_ = gaussian_kde(dataset=self.scaled_calibrated_residuals_) 232 self.sims_ = np.asarray( 233 [ 234 pred 235 + self.calibrated_residuals_scaler_.scale_[0] 236 * self.kde_.resample( 237 size=X.shape[0], seed=self.seed + i 238 ).ravel() 239 for i in range(self.replications) 240 ] 241 ).T 242 243 self.mean_ = np.mean(self.sims_, axis=1) 244 self.lower_ = np.quantile(self.sims_, q=self.alpha_ / 200, axis=1) 245 self.upper_ = np.quantile(self.sims_, q=1 - self.alpha_ / 200, axis=1) 246 247 DescribeResult = namedtuple( 248 "DescribeResult", ("mean", "sims", "lower", "upper") 249 ) 250 251 return DescribeResult(self.mean_, self.sims_, self.lower_, self.upper_) 252 253 if self.method == "localconformal": 254 255 if self.replications is None: 256 257 if return_pi: 258 259 predictions_bounds = self.icp_.predict( 260 X, significance=1 - self.level 261 ) 262 DescribeResult = namedtuple( 263 "DescribeResult", ("mean", "lower", "upper") 264 ) 265 return DescribeResult( 266 pred, predictions_bounds[:, 0], predictions_bounds[:, 1] 267 ) 268 269 else: 270 271 return pred 272 273 else: # (self.method == "localconformal") and if self.replications is not None 274 275 raise NotImplementedError( 276 "When self.method == 'localconformal', there are no simulations" 277 )
Class PredictionInterval: Obtain prediction intervals.
Attributes:
obj: an object;
fitted object containing methods `fit` and `predict`
method: a string;
method for constructing the prediction intervals.
Currently "splitconformal" (default) and "localconformal"
level: a float;
Confidence level for prediction intervals. Default is 95,
equivalent to a miscoverage error of 5 (%)
replications: an integer;
Number of replications for simulated conformal (default is `None`),
for type_pi = "bootstrap" or "kde"
type_pi: a string;
type of prediction interval: currently `None`
(split conformal without simulation), "kde" or "bootstrap"
type_split: a string;
"random" (random split of data) or "sequential" (sequential split of data)
seed: an integer;
Reproducibility of fit (there's a random split between fitting and calibration data)
83 def fit(self, X, y, sample_weight=None, **kwargs): 84 """Fit the `method` to training data (X, y). 85 86 Args: 87 88 X: array-like, shape = [n_samples, n_features]; 89 Training set vectors, where n_samples is the number 90 of samples and n_features is the number of features. 91 92 y: array-like, shape = [n_samples, ]; Target values. 93 94 sample_weight: array-like, shape = [n_samples] 95 Sample weights. 96 97 """ 98 99 if self.type_split == "random": 100 101 X_train, X_calibration, y_train, y_calibration = train_test_split( 102 X, y, test_size=0.5, random_state=self.seed 103 ) 104 105 elif self.type_split == "sequential": 106 107 n_x = X.shape[0] 108 n_x_half = n_x // 2 109 first_half_idx = range(0, n_x_half) 110 second_half_idx = range(n_x_half, n_x) 111 X_train = X[first_half_idx, :] 112 X_calibration = X[second_half_idx, :] 113 y_train = y[first_half_idx] 114 y_calibration = y[second_half_idx] 115 116 if self.method == "splitconformal": 117 118 self.obj.fit(X_train, y_train) 119 preds_calibration = self.obj.predict(X_calibration) 120 self.calibrated_residuals_ = y_calibration - preds_calibration 121 absolute_residuals = np.abs(self.calibrated_residuals_) 122 self.calibrated_residuals_scaler_ = StandardScaler( 123 with_mean=True, with_std=True 124 ) 125 self.scaled_calibrated_residuals_ = ( 126 self.calibrated_residuals_scaler_.fit_transform( 127 self.calibrated_residuals_.reshape(-1, 1) 128 ).ravel() 129 ) 130 try: 131 # numpy version >= 1.22 132 self.quantile_ = np.quantile( 133 a=absolute_residuals, q=self.level / 100, method="higher" 134 ) 135 except Exception: 136 # numpy version < 1.22 137 self.quantile_ = np.quantile( 138 a=absolute_residuals, 139 q=self.level / 100, 140 interpolation="higher", 141 ) 142 143 if self.method == "localconformal": 144 145 mad_estimator = ExtraTreesRegressor() 146 normalizer = RegressorNormalizer(self.obj, mad_estimator, AbsErrorErrFunc()) 147 nc = RegressorNc(self.obj, AbsErrorErrFunc(), normalizer) 148 self.icp_ = IcpRegressor(nc) 149 self.icp_.fit(X_train, y_train) 150 self.icp_.calibrate(X_calibration, y_calibration) 151 152 return self
Fit the method
to training data (X, y).
Args:
X: array-like, shape = [n_samples, n_features];
Training set vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples, ]; Target values.
sample_weight: array-like, shape = [n_samples]
Sample weights.
154 def predict(self, X, return_pi=False): 155 """Obtain predictions and prediction intervals 156 157 Args: 158 159 X: array-like, shape = [n_samples, n_features]; 160 Testing set vectors, where n_samples is the number 161 of samples and n_features is the number of features. 162 163 return_pi: boolean 164 Whether the prediction interval is returned or not. 165 Default is False, for compatibility with other _estimators_. 166 If True, a tuple containing the predictions + lower and upper 167 bounds is returned. 168 169 """ 170 171 if self.method == "splitconformal": 172 pred = self.obj.predict(X) 173 174 if self.method == "localconformal": 175 pred = self.icp_.predict(X) 176 177 if self.method == "splitconformal": 178 179 if ( 180 self.replications is None and self.type_pi is None 181 ): # type_pi is not used here, no bootstrap or kde 182 183 if return_pi: 184 185 DescribeResult = namedtuple( 186 "DescribeResult", ("mean", "lower", "upper") 187 ) 188 return DescribeResult( 189 pred, pred - self.quantile_, pred + self.quantile_ 190 ) 191 192 else: 193 194 return pred 195 196 else: # self.method == "splitconformal" and if self.replications is not None, type_pi must be used 197 198 if self.type_pi is None: 199 self.type_pi = "kde" 200 raise Warning("type_pi must be set, setting to 'kde'") 201 202 if self.replications is None: 203 self.replications = 100 204 raise Warning("replications must be set, setting to 100") 205 206 assert self.type_pi in ( 207 "bootstrap", 208 "kde", 209 ), "`self.type_pi` must be in ('bootstrap', 'kde')" 210 211 if self.type_pi == "bootstrap": 212 np.random.seed(self.seed) 213 self.residuals_sims_ = np.asarray( 214 [ 215 np.random.choice( 216 a=self.scaled_calibrated_residuals_, 217 size=X.shape[0], 218 ) 219 for _ in range(self.replications) 220 ] 221 ).T 222 self.sims_ = np.asarray( 223 [ 224 pred 225 + self.calibrated_residuals_scaler_.scale_[0] 226 * self.residuals_sims_[:, i].ravel() 227 for i in range(self.replications) 228 ] 229 ).T 230 elif self.type_pi == "kde": 231 self.kde_ = gaussian_kde(dataset=self.scaled_calibrated_residuals_) 232 self.sims_ = np.asarray( 233 [ 234 pred 235 + self.calibrated_residuals_scaler_.scale_[0] 236 * self.kde_.resample( 237 size=X.shape[0], seed=self.seed + i 238 ).ravel() 239 for i in range(self.replications) 240 ] 241 ).T 242 243 self.mean_ = np.mean(self.sims_, axis=1) 244 self.lower_ = np.quantile(self.sims_, q=self.alpha_ / 200, axis=1) 245 self.upper_ = np.quantile(self.sims_, q=1 - self.alpha_ / 200, axis=1) 246 247 DescribeResult = namedtuple( 248 "DescribeResult", ("mean", "sims", "lower", "upper") 249 ) 250 251 return DescribeResult(self.mean_, self.sims_, self.lower_, self.upper_) 252 253 if self.method == "localconformal": 254 255 if self.replications is None: 256 257 if return_pi: 258 259 predictions_bounds = self.icp_.predict( 260 X, significance=1 - self.level 261 ) 262 DescribeResult = namedtuple( 263 "DescribeResult", ("mean", "lower", "upper") 264 ) 265 return DescribeResult( 266 pred, predictions_bounds[:, 0], predictions_bounds[:, 1] 267 ) 268 269 else: 270 271 return pred 272 273 else: # (self.method == "localconformal") and if self.replications is not None 274 275 raise NotImplementedError( 276 "When self.method == 'localconformal', there are no simulations" 277 )
Obtain predictions and prediction intervals
Args:
X: array-like, shape = [n_samples, n_features];
Testing set vectors, where n_samples is the number
of samples and n_features is the number of features.
return_pi: boolean
Whether the prediction interval is returned or not.
Default is False, for compatibility with other _estimators_.
If True, a tuple containing the predictions + lower and upper
bounds is returned.
18class SimpleMultitaskClassifier(Base, ClassifierMixin): 19 """Multitask Classification model based on regression models, with shared covariates 20 21 Parameters: 22 23 obj: object 24 any object (must be a regression model) containing a method fit (obj.fit()) 25 and a method predict (obj.predict()) 26 27 seed: int 28 reproducibility seed 29 30 Attributes: 31 32 fit_objs_: dict 33 objects adjusted to each individual time series 34 35 n_classes_: int 36 number of classes for the classifier 37 38 Examples: 39 40 ```python 41 import nnetsauce as ns 42 import numpy as np 43 from sklearn.datasets import load_breast_cancer 44 from sklearn.linear_model import LinearRegression 45 from sklearn.model_selection import train_test_split 46 from sklearn import metrics 47 from time import time 48 49 breast_cancer = load_breast_cancer() 50 Z = breast_cancer.data 51 t = breast_cancer.target 52 53 X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2, 54 random_state=123+2*10) 55 56 # Linear Regression is used 57 regr = LinearRegression() 58 fit_obj = ns.SimpleMultitaskClassifier(regr) 59 60 start = time() 61 fit_obj.fit(X_train, y_train) 62 print(f"Elapsed {time() - start}") 63 64 print(fit_obj.score(X_test, y_test)) 65 print(fit_obj.score(X_test, y_test, scoring="roc_auc")) 66 67 start = time() 68 preds = fit_obj.predict(X_test) 69 print(f"Elapsed {time() - start}") 70 print(metrics.classification_report(preds, y_test)) 71 ``` 72 73 """ 74 75 # construct the object ----- 76 77 def __init__( 78 self, 79 obj, 80 ): 81 self.type_fit = "classification" 82 self.obj = obj 83 self.fit_objs_ = {} 84 self.X_scaler_ = StandardScaler() 85 self.scaled_X_ = None 86 87 def fit(self, X, y, sample_weight=None, **kwargs): 88 """Fit SimpleMultitaskClassifier to training data (X, y). 89 90 Args: 91 92 X: {array-like}, shape = [n_samples, n_features] 93 Training vectors, where n_samples is the number 94 of samples and n_features is the number of features. 95 96 y: array-like, shape = [n_samples] 97 Target values. 98 99 **kwargs: additional parameters to be passed to 100 self.cook_training_set or self.obj.fit 101 102 Returns: 103 104 self: object 105 106 """ 107 108 assert mx.is_factor(y), "y must contain only integers" 109 110 self.classes_ = np.unique(y) # for compatibility with sklearn 111 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 112 113 self.scaled_X_ = self.X_scaler_.fit_transform(X) 114 115 # multitask response 116 Y = mo.one_hot_encode2(y, self.n_classes_) 117 118 try: 119 for i in range(self.n_classes_): 120 self.fit_objs_[i] = deepcopy( 121 self.obj.fit(self.scaled_X_, Y[:, i], sample_weight=sample_weight, **kwargs) 122 ) 123 except Exception as e: 124 for i in range(self.n_classes_): 125 self.fit_objs_[i] = deepcopy( 126 self.obj.fit(self.scaled_X_, Y[:, i], **kwargs) 127 ) 128 return self 129 130 def predict(self, X, **kwargs): 131 """Predict test data X. 132 133 Args: 134 135 X: {array-like}, shape = [n_samples, n_features] 136 Training vectors, where n_samples is the number 137 of samples and n_features is the number of features. 138 139 **kwargs: additional parameters 140 141 Returns: 142 143 model predictions: {array-like} 144 145 """ 146 return np.argmax(self.predict_proba(X, **kwargs), axis=1) 147 148 def predict_proba(self, X, **kwargs): 149 """Predict probabilities for test data X. 150 151 Args: 152 153 X: {array-like}, shape = [n_samples, n_features] 154 Training vectors, where n_samples is the number 155 of samples and n_features is the number of features. 156 157 **kwargs: additional parameters 158 159 Returns: 160 161 probability estimates for test data: {array-like} 162 163 """ 164 165 shape_X = X.shape 166 167 probs = np.zeros((shape_X[0], self.n_classes_)) 168 169 if len(shape_X) == 1: # one example 170 171 n_features = shape_X[0] 172 173 new_X = mo.rbind( 174 X.reshape(1, n_features), 175 np.ones(n_features).reshape(1, n_features), 176 ) 177 178 Z = self.X_scaler_.transform(new_X, **kwargs) 179 180 # Fallback to standard model 181 for i in range(self.n_classes_): 182 probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs)[0] 183 184 else: # multiple rows 185 186 Z = self.X_scaler_.transform(X, **kwargs) 187 188 # Fallback to standard model 189 for i in range(self.n_classes_): 190 probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs) 191 192 expit_raw_probs = expit(probs) 193 194 # Add small epsilon to avoid division by zero 195 row_sums = expit_raw_probs.sum(axis=1)[:, None] 196 row_sums[row_sums < 1e-10] = 1e-10 197 198 return expit_raw_probs / row_sums 199 200 def decision_function(self, X, **kwargs): 201 """Compute the decision function of X. 202 203 Parameters: 204 X: {array-like}, shape = [n_samples, n_features] 205 Samples to compute decision function for. 206 207 **kwargs: additional parameters to be passed to 208 self.cook_test_set 209 210 Returns: 211 array-like of shape (n_samples,) or (n_samples, n_classes) 212 Decision function of the input samples. The order of outputs is the same 213 as that of the classes passed to fit. 214 """ 215 if not hasattr(self.obj, "decision_function"): 216 # If base classifier doesn't have decision_function, use predict_proba 217 proba = self.predict_proba(X, **kwargs) 218 if proba.shape[1] == 2: 219 return proba[:, 1] # For binary classification 220 return proba # For multiclass 221 222 if len(X.shape) == 1: 223 n_features = X.shape[0] 224 new_X = mo.rbind( 225 X.reshape(1, n_features), 226 np.ones(n_features).reshape(1, n_features), 227 ) 228 229 return ( 230 self.obj.decision_function( 231 self.cook_test_set(new_X, **kwargs), **kwargs 232 ) 233 )[0] 234 235 return self.obj.decision_function(self.cook_test_set(X, **kwargs), **kwargs)
Multitask Classification model based on regression models, with shared covariates
Parameters:
obj: object
any object (must be a regression model) containing a method fit (obj.fit())
and a method predict (obj.predict())
seed: int
reproducibility seed
Attributes:
fit_objs_: dict
objects adjusted to each individual time series
n_classes_: int
number of classes for the classifier
Examples:
import nnetsauce as ns
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics
from time import time
breast_cancer = load_breast_cancer()
Z = breast_cancer.data
t = breast_cancer.target
X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2,
random_state=123+2*10)
# Linear Regression is used
regr = LinearRegression()
fit_obj = ns.SimpleMultitaskClassifier(regr)
start = time()
fit_obj.fit(X_train, y_train)
print(f"Elapsed {time() - start}")
print(fit_obj.score(X_test, y_test))
print(fit_obj.score(X_test, y_test, scoring="roc_auc"))
start = time()
preds = fit_obj.predict(X_test)
print(f"Elapsed {time() - start}")
print(metrics.classification_report(preds, y_test))
87 def fit(self, X, y, sample_weight=None, **kwargs): 88 """Fit SimpleMultitaskClassifier to training data (X, y). 89 90 Args: 91 92 X: {array-like}, shape = [n_samples, n_features] 93 Training vectors, where n_samples is the number 94 of samples and n_features is the number of features. 95 96 y: array-like, shape = [n_samples] 97 Target values. 98 99 **kwargs: additional parameters to be passed to 100 self.cook_training_set or self.obj.fit 101 102 Returns: 103 104 self: object 105 106 """ 107 108 assert mx.is_factor(y), "y must contain only integers" 109 110 self.classes_ = np.unique(y) # for compatibility with sklearn 111 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 112 113 self.scaled_X_ = self.X_scaler_.fit_transform(X) 114 115 # multitask response 116 Y = mo.one_hot_encode2(y, self.n_classes_) 117 118 try: 119 for i in range(self.n_classes_): 120 self.fit_objs_[i] = deepcopy( 121 self.obj.fit(self.scaled_X_, Y[:, i], sample_weight=sample_weight, **kwargs) 122 ) 123 except Exception as e: 124 for i in range(self.n_classes_): 125 self.fit_objs_[i] = deepcopy( 126 self.obj.fit(self.scaled_X_, Y[:, i], **kwargs) 127 ) 128 return self
Fit SimpleMultitaskClassifier to training data (X, y).
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
130 def predict(self, X, **kwargs): 131 """Predict test data X. 132 133 Args: 134 135 X: {array-like}, shape = [n_samples, n_features] 136 Training vectors, where n_samples is the number 137 of samples and n_features is the number of features. 138 139 **kwargs: additional parameters 140 141 Returns: 142 143 model predictions: {array-like} 144 145 """ 146 return np.argmax(self.predict_proba(X, **kwargs), axis=1)
Predict test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters
Returns:
model predictions: {array-like}
148 def predict_proba(self, X, **kwargs): 149 """Predict probabilities for test data X. 150 151 Args: 152 153 X: {array-like}, shape = [n_samples, n_features] 154 Training vectors, where n_samples is the number 155 of samples and n_features is the number of features. 156 157 **kwargs: additional parameters 158 159 Returns: 160 161 probability estimates for test data: {array-like} 162 163 """ 164 165 shape_X = X.shape 166 167 probs = np.zeros((shape_X[0], self.n_classes_)) 168 169 if len(shape_X) == 1: # one example 170 171 n_features = shape_X[0] 172 173 new_X = mo.rbind( 174 X.reshape(1, n_features), 175 np.ones(n_features).reshape(1, n_features), 176 ) 177 178 Z = self.X_scaler_.transform(new_X, **kwargs) 179 180 # Fallback to standard model 181 for i in range(self.n_classes_): 182 probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs)[0] 183 184 else: # multiple rows 185 186 Z = self.X_scaler_.transform(X, **kwargs) 187 188 # Fallback to standard model 189 for i in range(self.n_classes_): 190 probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs) 191 192 expit_raw_probs = expit(probs) 193 194 # Add small epsilon to avoid division by zero 195 row_sums = expit_raw_probs.sum(axis=1)[:, None] 196 row_sums[row_sums < 1e-10] = 1e-10 197 198 return expit_raw_probs / row_sums
Predict probabilities for test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters
Returns:
probability estimates for test data: {array-like}
9class Optimizer: 10 """Optimizer class 11 12 Attributes: 13 14 type_optim: str 15 type of optimizer, (currently) either 'sgd' (stochastic minibatch gradient descent) 16 or 'scd' (stochastic minibatch coordinate descent) 17 18 num_iters: int 19 number of iterations of the optimizer 20 21 learning_rate: float 22 step size 23 24 batch_prop: float 25 proportion of the initial data used at each optimization step 26 27 learning_method: str 28 "poly" - learning rate decreasing as a polynomial function 29 of # of iterations (default) 30 "exp" - learning rate decreasing as an exponential function 31 of # of iterations 32 "momentum" - gradient descent using momentum 33 34 randomization: str 35 type of randomization applied at each step 36 "strat" - stratified subsampling (default) 37 "shuffle" - random subsampling 38 39 mass: float 40 mass on velocity, for `method` == "momentum" 41 42 decay: float 43 coefficient of decrease of the learning rate for 44 `method` == "poly" and `method` == "exp" 45 46 tolerance: float 47 early stopping parameter (convergence of loss function) 48 49 verbose: int 50 controls verbosity of gradient descent 51 0 - nothing is printed 52 1 - a progress bar is printed 53 2 - successive loss function values are printed 54 55 """ 56 57 # construct the object ----- 58 59 def __init__( 60 self, 61 type_optim="sgd", 62 num_iters=100, 63 learning_rate=0.01, 64 batch_prop=1.0, 65 learning_method="momentum", 66 randomization="strat", 67 mass=0.9, 68 decay=0.1, 69 tolerance=1e-3, 70 verbose=1, 71 ): 72 self.type_optim = type_optim 73 self.num_iters = num_iters 74 self.learning_rate = learning_rate 75 self.batch_prop = batch_prop 76 self.learning_method = learning_method 77 self.randomization = randomization 78 self.mass = mass 79 self.decay = decay 80 self.tolerance = tolerance 81 self.verbose = verbose 82 self.opt = None 83 84 def fit(self, loss_func, response, x0, **kwargs): 85 """Fit GLM model to training data (X, y). 86 87 Args: 88 89 loss_func: loss function 90 91 response: array-like, shape = [n_samples] 92 target variable (used for subsampling) 93 94 x0: array-like, shape = [n_features] 95 initial value provided to the optimizer 96 97 **kwargs: additional parameters to be passed to 98 loss function 99 100 Returns: 101 102 self: object 103 104 """ 105 106 if self.type_optim == "scd": 107 self.results = scd( 108 loss_func, 109 response=response, 110 x=x0, 111 num_iters=self.num_iters, 112 batch_prop=self.batch_prop, 113 learning_rate=self.learning_rate, 114 learning_method=self.learning_method, 115 mass=self.mass, 116 decay=self.decay, 117 randomization=self.randomization, 118 tolerance=self.tolerance, 119 verbose=self.verbose, 120 **kwargs 121 ) 122 123 if self.type_optim == "sgd": 124 self.results = sgd( 125 loss_func, 126 response=response, 127 x=x0, 128 num_iters=self.num_iters, 129 batch_prop=self.batch_prop, 130 learning_rate=self.learning_rate, 131 learning_method=self.learning_method, 132 mass=self.mass, 133 decay=self.decay, 134 randomization=self.randomization, 135 tolerance=self.tolerance, 136 verbose=self.verbose, 137 **kwargs 138 ) 139 140 return self 141 142 def one_hot_encode(self, y, n_classes): 143 return one_hot_encode(y, n_classes)
Optimizer class
Attributes:
type_optim: str
type of optimizer, (currently) either 'sgd' (stochastic minibatch gradient descent)
or 'scd' (stochastic minibatch coordinate descent)
num_iters: int
number of iterations of the optimizer
learning_rate: float
step size
batch_prop: float
proportion of the initial data used at each optimization step
learning_method: str
"poly" - learning rate decreasing as a polynomial function
of # of iterations (default)
"exp" - learning rate decreasing as an exponential function
of # of iterations
"momentum" - gradient descent using momentum
randomization: str
type of randomization applied at each step
"strat" - stratified subsampling (default)
"shuffle" - random subsampling
mass: float
mass on velocity, for `method` == "momentum"
decay: float
coefficient of decrease of the learning rate for
`method` == "poly" and `method` == "exp"
tolerance: float
early stopping parameter (convergence of loss function)
verbose: int
controls verbosity of gradient descent
0 - nothing is printed
1 - a progress bar is printed
2 - successive loss function values are printed
84 def fit(self, loss_func, response, x0, **kwargs): 85 """Fit GLM model to training data (X, y). 86 87 Args: 88 89 loss_func: loss function 90 91 response: array-like, shape = [n_samples] 92 target variable (used for subsampling) 93 94 x0: array-like, shape = [n_features] 95 initial value provided to the optimizer 96 97 **kwargs: additional parameters to be passed to 98 loss function 99 100 Returns: 101 102 self: object 103 104 """ 105 106 if self.type_optim == "scd": 107 self.results = scd( 108 loss_func, 109 response=response, 110 x=x0, 111 num_iters=self.num_iters, 112 batch_prop=self.batch_prop, 113 learning_rate=self.learning_rate, 114 learning_method=self.learning_method, 115 mass=self.mass, 116 decay=self.decay, 117 randomization=self.randomization, 118 tolerance=self.tolerance, 119 verbose=self.verbose, 120 **kwargs 121 ) 122 123 if self.type_optim == "sgd": 124 self.results = sgd( 125 loss_func, 126 response=response, 127 x=x0, 128 num_iters=self.num_iters, 129 batch_prop=self.batch_prop, 130 learning_rate=self.learning_rate, 131 learning_method=self.learning_method, 132 mass=self.mass, 133 decay=self.decay, 134 randomization=self.randomization, 135 tolerance=self.tolerance, 136 verbose=self.verbose, 137 **kwargs 138 ) 139 140 return self
Fit GLM model to training data (X, y).
Args:
loss_func: loss function
response: array-like, shape = [n_samples]
target variable (used for subsampling)
x0: array-like, shape = [n_features]
initial value provided to the optimizer
**kwargs: additional parameters to be passed to
loss function
Returns:
self: object
37class QuantileRegressor(BaseEstimator, RegressorMixin): 38 """ 39 Quantile Regressor. 40 41 Parameters: 42 43 obj: base model (regression model) 44 The base regressor from which to build a 45 quantile regressor. 46 47 level: int, default=95 48 The level of the quantiles to compute. 49 50 scoring: str, default="predictions" 51 The scoring to use for the optimization and constructing 52 prediction intervals (predictions, residuals, conformal, 53 studentized, conformal-studentized). 54 55 Attributes: 56 57 obj_ : base model (regression model) 58 The base regressor from which to build a 59 quantile regressor. 60 61 offset_multipliers_ : list 62 The multipliers for the offset. 63 64 scoring_residuals_ : list 65 The residuals for the scoring. 66 67 student_multiplier_ : float 68 The multiplier for the student. 69 70 71 """ 72 73 def __init__(self, obj, level=95, scoring="predictions"): 74 assert scoring in ( 75 "predictions", 76 "residuals", 77 "conformal", 78 "studentized", 79 "conformal-studentized", 80 ), "scoring must be 'predictions' or 'residuals'" 81 self.obj = obj 82 low_risk_level = (1 - level / 100) / 2 83 self.quantiles = [low_risk_level, 0.5, 1 - low_risk_level] 84 self.scoring = scoring 85 self.offset_multipliers_ = None 86 self.obj_ = None 87 self.scoring_residuals_ = None 88 self.student_multiplier_ = None 89 90 def _compute_quantile_loss(self, residuals: np.ndarray, quantile: float) -> float: 91 """ 92 Compute the quantile loss for a given set of residuals and quantile. 93 """ 94 if not 0 < quantile < 1: 95 raise ValueError("Quantile should be between 0 and 1.") 96 loss = quantile * (residuals >= 0) + (quantile - 1) * (residuals < 0) 97 return np.mean(residuals * loss) 98 99 def _optimize_multiplier( 100 self, 101 y: np.ndarray, 102 base_predictions: np.ndarray, 103 prev_predictions: np.ndarray = None, 104 scoring_residuals: np.ndarray = None, 105 quantile: float = 0.5, 106 ) -> float: 107 """ 108 Optimize the multiplier for a given quantile. 109 """ 110 if not 0 < quantile < 1: 111 raise ValueError("Quantile should be between 0 and 1.") 112 113 def objective(log_multiplier): 114 """ 115 Objective function for optimization. 116 """ 117 # Convert to positive multiplier using exp 118 multiplier = np.exp(log_multiplier[0]) 119 if self.scoring == "predictions": 120 assert base_predictions is not None, "base_predictions must be not None" 121 # Calculate predictions 122 if prev_predictions is None: 123 # For first quantile, subtract from conditional expectation 124 predictions = base_predictions - multiplier * np.abs( 125 base_predictions 126 ) 127 else: 128 # For other quantiles, add to previous quantile 129 offset = multiplier * np.abs(base_predictions) 130 predictions = prev_predictions + offset 131 elif self.scoring in ("residuals", "conformal"): 132 assert ( 133 scoring_residuals is not None 134 ), "scoring_residuals must be not None" 135 # print("scoring_residuals", scoring_residuals) 136 # Calculate predictions 137 if prev_predictions is None: 138 # For first quantile, subtract from conditional expectation 139 predictions = base_predictions - multiplier * np.std( 140 scoring_residuals 141 ) 142 # print("predictions", predictions) 143 else: 144 # For other quantiles, add to previous quantile 145 offset = multiplier * np.std(scoring_residuals) 146 predictions = prev_predictions + offset 147 elif self.scoring in ("studentized", "conformal-studentized"): 148 assert ( 149 scoring_residuals is not None 150 ), "scoring_residuals must be not None" 151 # Calculate predictions 152 if prev_predictions is None: 153 # For first quantile, subtract from conditional expectation 154 predictions = ( 155 base_predictions - multiplier * self.student_multiplier_ 156 ) 157 # print("predictions", predictions) 158 else: 159 # For other quantiles, add to previous quantile 160 offset = multiplier * self.student_multiplier_ 161 predictions = prev_predictions + offset 162 else: 163 raise ValueError("Invalid argument 'scoring'") 164 165 residuals = y - predictions 166 return self._compute_quantile_loss(residuals, quantile) 167 168 # Optimize in log space for numerical stability 169 # bounds = [(-10, 10)] # log space bounds 170 bounds = [(-100, 100)] # log space bounds 171 result = differential_evolution( 172 objective, 173 bounds, 174 # popsize=15, 175 # maxiter=100, 176 # tol=1e-4, 177 popsize=25, 178 maxiter=200, 179 tol=1e-6, 180 disp=False, 181 ) 182 183 return np.exp(result.x[0]) 184 185 def fit(self, X: np.ndarray, y: np.ndarray): 186 """Fit the model to the data. 187 188 Parameters: 189 190 X: {array-like}, shape = [n_samples, n_features] 191 Training vectors, where n_samples is the number of samples and 192 n_features is the number of features. 193 y: array-like, shape = [n_samples] 194 Target values. 195 """ 196 self.obj_ = clone(self.obj) 197 if self.scoring in ("predictions", "residuals"): 198 self.obj_.fit(X, y) 199 base_predictions = self.obj_.predict(X) 200 scoring_residuals = y - base_predictions 201 self.scoring_residuals_ = scoring_residuals 202 elif self.scoring == "conformal": 203 X_train, X_calib, y_train, y_calib = train_test_split( 204 X, y, test_size=0.5, random_state=42 205 ) 206 self.obj_.fit(X_train, y_train) 207 scoring_residuals = y_calib - self.obj_.predict( 208 X_calib 209 ) # These are calibration predictions 210 self.scoring_residuals_ = scoring_residuals 211 # Update base_predictions to use training predictions for optimization 212 self.obj_.fit(X_calib, y_calib) 213 base_predictions = self.obj_.predict(X_calib) 214 elif self.scoring in ("studentized", "conformal-studentized"): 215 # Calculate student multiplier 216 if self.scoring == "conformal-studentized": 217 X_train, X_calib, y_train, y_calib = train_test_split( 218 X, y, test_size=0.5, random_state=42 219 ) 220 self.obj_.fit(X_train, y_train) 221 scoring_residuals = y_calib - self.obj_.predict(X_calib) 222 # Calculate studentized multiplier using calibration data 223 self.student_multiplier_ = np.std(y_calib, ddof=1) / np.sqrt( 224 len(y_calib) 225 ) 226 self.obj_.fit(X_calib, y_calib) 227 base_predictions = self.obj_.predict(X_calib) 228 else: # regular studentized 229 self.obj_.fit(X, y) 230 base_predictions = self.obj_.predict(X) 231 scoring_residuals = y - base_predictions 232 self.student_multiplier_ = np.std(y, ddof=1) / np.sqrt(len(y)) 233 234 # Initialize storage for multipliers 235 self.offset_multipliers_ = [] 236 # Keep track of current predictions for each quantile 237 current_predictions = None 238 239 # Fit each quantile sequentially 240 for i, quantile in enumerate(self.quantiles): 241 if self.scoring == "predictions": 242 multiplier = self._optimize_multiplier( 243 y=y, 244 base_predictions=base_predictions, 245 prev_predictions=current_predictions, 246 quantile=quantile, 247 ) 248 249 self.offset_multipliers_.append(multiplier) 250 251 # Update current predictions 252 if current_predictions is None: 253 # First quantile (lowest) 254 current_predictions = base_predictions - multiplier * np.abs( 255 base_predictions 256 ) 257 else: 258 # Subsequent quantiles 259 offset = multiplier * np.abs(base_predictions) 260 current_predictions = current_predictions + offset 261 elif self.scoring == "residuals": 262 multiplier = self._optimize_multiplier( 263 y=y, 264 base_predictions=base_predictions, 265 scoring_residuals=scoring_residuals, 266 prev_predictions=current_predictions, 267 quantile=quantile, 268 ) 269 270 self.offset_multipliers_.append(multiplier) 271 272 # Update current predictions 273 if current_predictions is None: 274 # First quantile (lowest) 275 current_predictions = base_predictions - multiplier * np.std( 276 scoring_residuals 277 ) 278 else: 279 # Subsequent quantiles 280 offset = multiplier * np.std(scoring_residuals) 281 current_predictions = current_predictions + offset 282 elif self.scoring == "conformal": 283 multiplier = self._optimize_multiplier( 284 y=y_calib, 285 base_predictions=base_predictions, 286 scoring_residuals=scoring_residuals, 287 prev_predictions=current_predictions, 288 quantile=quantile, 289 ) 290 291 self.offset_multipliers_.append(multiplier) 292 293 # Update current predictions 294 if current_predictions is None: 295 # First quantile (lowest) 296 current_predictions = base_predictions - multiplier * np.std( 297 scoring_residuals 298 ) 299 else: 300 # Subsequent quantiles 301 offset = multiplier * np.std(scoring_residuals) 302 current_predictions = current_predictions + offset 303 elif self.scoring in ("studentized", "conformal-studentized"): 304 multiplier = self._optimize_multiplier( 305 y=y_calib if self.scoring == "conformal-studentized" else y, 306 base_predictions=base_predictions, 307 scoring_residuals=scoring_residuals, 308 prev_predictions=current_predictions, 309 quantile=quantile, 310 ) 311 312 self.offset_multipliers_.append(multiplier) 313 314 # Update current predictions 315 if current_predictions is None: 316 current_predictions = ( 317 base_predictions - multiplier * self.student_multiplier_ 318 ) 319 else: 320 offset = multiplier * self.student_multiplier_ 321 current_predictions = current_predictions + offset 322 323 return self 324 325 def predict(self, X, return_pi=False): 326 """Predict the target variable. 327 328 Parameters: 329 330 X: {array-like}, shape = [n_samples, n_features] 331 Training vectors, where n_samples is the number of samples and 332 n_features is the number of features. 333 334 return_pi: bool, default=True 335 Whether to return the prediction intervals. 336 """ 337 if self.obj_ is None or self.offset_multipliers_ is None: 338 raise ValueError("Model not fitted yet.") 339 340 base_predictions = self.obj_.predict(X) 341 all_predictions = [] 342 343 if self.scoring == "predictions": 344 345 # Generate first quantile 346 current_predictions = base_predictions - self.offset_multipliers_[ 347 0 348 ] * np.abs(base_predictions) 349 all_predictions.append(current_predictions) 350 351 # Generate remaining quantiles 352 for multiplier in self.offset_multipliers_[1:]: 353 offset = multiplier * np.abs(base_predictions) 354 current_predictions = current_predictions + offset 355 all_predictions.append(current_predictions) 356 357 elif self.scoring in ("residuals", "conformal"): 358 359 # Generate first quantile 360 current_predictions = base_predictions - self.offset_multipliers_[ 361 0 362 ] * np.std(self.scoring_residuals_) 363 all_predictions.append(current_predictions) 364 365 # Generate remaining quantiles 366 for multiplier in self.offset_multipliers_[1:]: 367 offset = multiplier * np.std(self.scoring_residuals_) 368 current_predictions = current_predictions + offset 369 all_predictions.append(current_predictions) 370 371 elif self.scoring in ("studentized", "conformal-studentized"): 372 # Generate first quantile 373 current_predictions = ( 374 base_predictions 375 - self.offset_multipliers_[0] * self.student_multiplier_ 376 ) 377 all_predictions.append(current_predictions) 378 379 # Generate remaining quantiles 380 for multiplier in self.offset_multipliers_[1:]: 381 offset = multiplier * self.student_multiplier_ 382 current_predictions = current_predictions + offset 383 all_predictions.append(current_predictions) 384 385 if return_pi == False: 386 return np.asarray(all_predictions[1]) 387 388 DescribeResult = namedtuple( 389 "DecribeResult", ["mean", "lower", "upper", "median"] 390 ) 391 DescribeResult.mean = base_predictions 392 DescribeResult.lower = np.asarray(all_predictions[0]) 393 DescribeResult.median = np.asarray(all_predictions[1]) 394 DescribeResult.upper = np.asarray(all_predictions[2]) 395 return DescribeResult
Quantile Regressor.
Parameters:
obj: base model (regression model)
The base regressor from which to build a
quantile regressor.
level: int, default=95
The level of the quantiles to compute.
scoring: str, default="predictions"
The scoring to use for the optimization and constructing
prediction intervals (predictions, residuals, conformal,
studentized, conformal-studentized).
Attributes:
obj_ : base model (regression model)
The base regressor from which to build a
quantile regressor.
offset_multipliers_ : list
The multipliers for the offset.
scoring_residuals_ : list
The residuals for the scoring.
student_multiplier_ : float
The multiplier for the student.
185 def fit(self, X: np.ndarray, y: np.ndarray): 186 """Fit the model to the data. 187 188 Parameters: 189 190 X: {array-like}, shape = [n_samples, n_features] 191 Training vectors, where n_samples is the number of samples and 192 n_features is the number of features. 193 y: array-like, shape = [n_samples] 194 Target values. 195 """ 196 self.obj_ = clone(self.obj) 197 if self.scoring in ("predictions", "residuals"): 198 self.obj_.fit(X, y) 199 base_predictions = self.obj_.predict(X) 200 scoring_residuals = y - base_predictions 201 self.scoring_residuals_ = scoring_residuals 202 elif self.scoring == "conformal": 203 X_train, X_calib, y_train, y_calib = train_test_split( 204 X, y, test_size=0.5, random_state=42 205 ) 206 self.obj_.fit(X_train, y_train) 207 scoring_residuals = y_calib - self.obj_.predict( 208 X_calib 209 ) # These are calibration predictions 210 self.scoring_residuals_ = scoring_residuals 211 # Update base_predictions to use training predictions for optimization 212 self.obj_.fit(X_calib, y_calib) 213 base_predictions = self.obj_.predict(X_calib) 214 elif self.scoring in ("studentized", "conformal-studentized"): 215 # Calculate student multiplier 216 if self.scoring == "conformal-studentized": 217 X_train, X_calib, y_train, y_calib = train_test_split( 218 X, y, test_size=0.5, random_state=42 219 ) 220 self.obj_.fit(X_train, y_train) 221 scoring_residuals = y_calib - self.obj_.predict(X_calib) 222 # Calculate studentized multiplier using calibration data 223 self.student_multiplier_ = np.std(y_calib, ddof=1) / np.sqrt( 224 len(y_calib) 225 ) 226 self.obj_.fit(X_calib, y_calib) 227 base_predictions = self.obj_.predict(X_calib) 228 else: # regular studentized 229 self.obj_.fit(X, y) 230 base_predictions = self.obj_.predict(X) 231 scoring_residuals = y - base_predictions 232 self.student_multiplier_ = np.std(y, ddof=1) / np.sqrt(len(y)) 233 234 # Initialize storage for multipliers 235 self.offset_multipliers_ = [] 236 # Keep track of current predictions for each quantile 237 current_predictions = None 238 239 # Fit each quantile sequentially 240 for i, quantile in enumerate(self.quantiles): 241 if self.scoring == "predictions": 242 multiplier = self._optimize_multiplier( 243 y=y, 244 base_predictions=base_predictions, 245 prev_predictions=current_predictions, 246 quantile=quantile, 247 ) 248 249 self.offset_multipliers_.append(multiplier) 250 251 # Update current predictions 252 if current_predictions is None: 253 # First quantile (lowest) 254 current_predictions = base_predictions - multiplier * np.abs( 255 base_predictions 256 ) 257 else: 258 # Subsequent quantiles 259 offset = multiplier * np.abs(base_predictions) 260 current_predictions = current_predictions + offset 261 elif self.scoring == "residuals": 262 multiplier = self._optimize_multiplier( 263 y=y, 264 base_predictions=base_predictions, 265 scoring_residuals=scoring_residuals, 266 prev_predictions=current_predictions, 267 quantile=quantile, 268 ) 269 270 self.offset_multipliers_.append(multiplier) 271 272 # Update current predictions 273 if current_predictions is None: 274 # First quantile (lowest) 275 current_predictions = base_predictions - multiplier * np.std( 276 scoring_residuals 277 ) 278 else: 279 # Subsequent quantiles 280 offset = multiplier * np.std(scoring_residuals) 281 current_predictions = current_predictions + offset 282 elif self.scoring == "conformal": 283 multiplier = self._optimize_multiplier( 284 y=y_calib, 285 base_predictions=base_predictions, 286 scoring_residuals=scoring_residuals, 287 prev_predictions=current_predictions, 288 quantile=quantile, 289 ) 290 291 self.offset_multipliers_.append(multiplier) 292 293 # Update current predictions 294 if current_predictions is None: 295 # First quantile (lowest) 296 current_predictions = base_predictions - multiplier * np.std( 297 scoring_residuals 298 ) 299 else: 300 # Subsequent quantiles 301 offset = multiplier * np.std(scoring_residuals) 302 current_predictions = current_predictions + offset 303 elif self.scoring in ("studentized", "conformal-studentized"): 304 multiplier = self._optimize_multiplier( 305 y=y_calib if self.scoring == "conformal-studentized" else y, 306 base_predictions=base_predictions, 307 scoring_residuals=scoring_residuals, 308 prev_predictions=current_predictions, 309 quantile=quantile, 310 ) 311 312 self.offset_multipliers_.append(multiplier) 313 314 # Update current predictions 315 if current_predictions is None: 316 current_predictions = ( 317 base_predictions - multiplier * self.student_multiplier_ 318 ) 319 else: 320 offset = multiplier * self.student_multiplier_ 321 current_predictions = current_predictions + offset 322 323 return self
Fit the model to the data.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number of samples and
n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
325 def predict(self, X, return_pi=False): 326 """Predict the target variable. 327 328 Parameters: 329 330 X: {array-like}, shape = [n_samples, n_features] 331 Training vectors, where n_samples is the number of samples and 332 n_features is the number of features. 333 334 return_pi: bool, default=True 335 Whether to return the prediction intervals. 336 """ 337 if self.obj_ is None or self.offset_multipliers_ is None: 338 raise ValueError("Model not fitted yet.") 339 340 base_predictions = self.obj_.predict(X) 341 all_predictions = [] 342 343 if self.scoring == "predictions": 344 345 # Generate first quantile 346 current_predictions = base_predictions - self.offset_multipliers_[ 347 0 348 ] * np.abs(base_predictions) 349 all_predictions.append(current_predictions) 350 351 # Generate remaining quantiles 352 for multiplier in self.offset_multipliers_[1:]: 353 offset = multiplier * np.abs(base_predictions) 354 current_predictions = current_predictions + offset 355 all_predictions.append(current_predictions) 356 357 elif self.scoring in ("residuals", "conformal"): 358 359 # Generate first quantile 360 current_predictions = base_predictions - self.offset_multipliers_[ 361 0 362 ] * np.std(self.scoring_residuals_) 363 all_predictions.append(current_predictions) 364 365 # Generate remaining quantiles 366 for multiplier in self.offset_multipliers_[1:]: 367 offset = multiplier * np.std(self.scoring_residuals_) 368 current_predictions = current_predictions + offset 369 all_predictions.append(current_predictions) 370 371 elif self.scoring in ("studentized", "conformal-studentized"): 372 # Generate first quantile 373 current_predictions = ( 374 base_predictions 375 - self.offset_multipliers_[0] * self.student_multiplier_ 376 ) 377 all_predictions.append(current_predictions) 378 379 # Generate remaining quantiles 380 for multiplier in self.offset_multipliers_[1:]: 381 offset = multiplier * self.student_multiplier_ 382 current_predictions = current_predictions + offset 383 all_predictions.append(current_predictions) 384 385 if return_pi == False: 386 return np.asarray(all_predictions[1]) 387 388 DescribeResult = namedtuple( 389 "DecribeResult", ["mean", "lower", "upper", "median"] 390 ) 391 DescribeResult.mean = base_predictions 392 DescribeResult.lower = np.asarray(all_predictions[0]) 393 DescribeResult.median = np.asarray(all_predictions[1]) 394 DescribeResult.upper = np.asarray(all_predictions[2]) 395 return DescribeResult
Predict the target variable.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number of samples and
n_features is the number of features.
return_pi: bool, default=True
Whether to return the prediction intervals.
41class QuantileClassifier(BaseEstimator, ClassifierMixin): 42 """ 43 Quantile Classifier. 44 45 Parameters: 46 47 obj: base model (classification model) 48 The base classifier from which to build a 49 quantile classifier. 50 51 level: int, default=95 52 The level of the quantiles to compute. 53 54 scoring: str, default="predictions" 55 The scoring to use for the optimization and constructing 56 prediction intervals (predictions, residuals, conformal, 57 studentized, conformal-studentized). 58 59 Attributes: 60 61 obj_ : base model (classification model) 62 The base classifier from which to build a 63 quantile classifier. 64 65 offset_multipliers_ : list 66 The multipliers for the offset. 67 68 scoring_residuals_ : list 69 The residuals for the scoring. 70 71 student_multiplier_ : float 72 The multiplier for the student. 73 74 75 """ 76 77 def __init__(self, obj, level=95, scoring="predictions"): 78 assert scoring in ( 79 "predictions", 80 "residuals", 81 "conformal", 82 "studentized", 83 "conformal-studentized", 84 ), "scoring must be 'predictions' or 'residuals'" 85 self.obj = obj 86 quantileregressor = QuantileRegressor(self.obj) 87 quantileregressor.predict = partial(quantileregressor.predict, return_pi=False) 88 self.obj_ = SimpleMultitaskClassifier(quantileregressor) 89 90 def fit(self, X, y, **kwargs): 91 self.obj_.fit(X, y, **kwargs) 92 93 def predict(self, X, **kwargs): 94 return self.obj_.predict(X, **kwargs) 95 96 def predict_proba(self, X, **kwargs): 97 return self.obj_.predict_proba(X, **kwargs)
Quantile Classifier.
Parameters:
obj: base model (classification model)
The base classifier from which to build a
quantile classifier.
level: int, default=95
The level of the quantiles to compute.
scoring: str, default="predictions"
The scoring to use for the optimization and constructing
prediction intervals (predictions, residuals, conformal,
studentized, conformal-studentized).
Attributes:
obj_ : base model (classification model)
The base classifier from which to build a
quantile classifier.
offset_multipliers_ : list
The multipliers for the offset.
scoring_residuals_ : list
The residuals for the scoring.
student_multiplier_ : float
The multiplier for the student.
18class RandomBagRegressor(RandomBag, RegressorMixin): 19 """Randomized 'Bagging' Regression model 20 21 Parameters: 22 23 obj: object 24 any object containing a method fit (obj.fit()) and a method predict 25 (obj.predict()) 26 27 n_estimators: int 28 number of boosting iterations 29 30 n_hidden_features: int 31 number of nodes in the hidden layer 32 33 activation_name: str 34 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 35 36 a: float 37 hyperparameter for 'prelu' or 'elu' activation function 38 39 nodes_sim: str 40 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 41 'uniform' 42 43 bias: boolean 44 indicates if the hidden layer contains a bias term (True) or not 45 (False) 46 47 dropout: float 48 regularization parameter; (random) percentage of nodes dropped out 49 of the training 50 51 direct_link: boolean 52 indicates if the original predictors are included (True) in model''s 53 fitting or not (False) 54 55 n_clusters: int 56 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: 57 no clustering) 58 59 cluster_encode: bool 60 defines how the variable containing clusters is treated (default is one-hot) 61 if `False`, then labels are used, without one-hot encoding 62 63 type_clust: str 64 type of clustering method: currently k-means ('kmeans') or Gaussian 65 Mixture Model ('gmm') 66 67 type_scaling: a tuple of 3 strings 68 scaling methods for inputs, hidden layer, and clustering respectively 69 (and when relevant). 70 Currently available: standardization ('std') or MinMax scaling ('minmax') 71 72 col_sample: float 73 percentage of covariates randomly chosen for training 74 75 row_sample: float 76 percentage of rows chosen for training, by stratified bootstrapping 77 78 seed: int 79 reproducibility seed for nodes_sim=='uniform' 80 81 backend: str 82 "cpu" or "gpu" or "tpu" 83 84 Attributes: 85 86 voter_: dict 87 dictionary containing all the fitted base-learners 88 89 90 Examples: 91 92 ```python 93 import numpy as np 94 import nnetsauce as ns 95 from sklearn.datasets import fetch_california_housing 96 from sklearn.tree import DecisionTreeRegressor 97 from sklearn.model_selection import train_test_split 98 99 X, y = fetch_california_housing(return_X_y=True, as_frame=False) 100 101 # split data into training test and test set 102 X_train, X_test, y_train, y_test = train_test_split(X, y, 103 test_size=0.2, random_state=13) 104 105 # Requires further tuning 106 obj = DecisionTreeRegressor(max_depth=3, random_state=123) 107 obj2 = ns.RandomBagRegressor(obj=obj, direct_link=False, 108 n_estimators=50, 109 col_sample=0.9, row_sample=0.9, 110 dropout=0, n_clusters=0, verbose=1) 111 112 obj2.fit(X_train, y_train) 113 114 print(np.sqrt(obj2.score(X_test, y_test))) # RMSE 115 116 ``` 117 118 """ 119 120 # construct the object ----- 121 122 def __init__( 123 self, 124 obj, 125 n_estimators=10, 126 n_hidden_features=1, 127 activation_name="relu", 128 a=0.01, 129 nodes_sim="sobol", 130 bias=True, 131 dropout=0, 132 direct_link=False, 133 n_clusters=2, 134 cluster_encode=True, 135 type_clust="kmeans", 136 type_scaling=("std", "std", "std"), 137 col_sample=1, 138 row_sample=1, 139 n_jobs=None, 140 seed=123, 141 verbose=1, 142 backend="cpu", 143 ): 144 super().__init__( 145 obj=obj, 146 n_estimators=n_estimators, 147 n_hidden_features=n_hidden_features, 148 activation_name=activation_name, 149 a=a, 150 nodes_sim=nodes_sim, 151 bias=bias, 152 dropout=dropout, 153 direct_link=direct_link, 154 n_clusters=n_clusters, 155 cluster_encode=cluster_encode, 156 type_clust=type_clust, 157 type_scaling=type_scaling, 158 col_sample=col_sample, 159 row_sample=row_sample, 160 seed=seed, 161 backend=backend, 162 ) 163 164 self.type_fit = "regression" 165 self.verbose = verbose 166 self.n_jobs = n_jobs 167 self.voter_ = {} 168 169 def fit(self, X, y, **kwargs): 170 """Fit Random 'Bagging' model to training data (X, y). 171 172 Args: 173 174 X: {array-like}, shape = [n_samples, n_features] 175 Training vectors, where n_samples is the number 176 of samples and n_features is the number of features. 177 178 y: array-like, shape = [n_samples] 179 Target values. 180 181 **kwargs: additional parameters to be passed to 182 self.cook_training_set or self.obj.fit 183 184 Returns: 185 186 self: object 187 188 """ 189 190 base_learner = CustomRegressor( 191 self.obj, 192 n_hidden_features=self.n_hidden_features, 193 activation_name=self.activation_name, 194 a=self.a, 195 nodes_sim=self.nodes_sim, 196 bias=self.bias, 197 dropout=self.dropout, 198 direct_link=self.direct_link, 199 n_clusters=self.n_clusters, 200 type_clust=self.type_clust, 201 type_scaling=self.type_scaling, 202 col_sample=self.col_sample, 203 row_sample=self.row_sample, 204 seed=self.seed, 205 ) 206 207 # 1 - Sequential training ----- 208 209 if self.n_jobs is None: 210 self.voter_ = rbagloop_regression( 211 base_learner, X, y, self.n_estimators, self.verbose, self.seed 212 ) 213 214 self.n_estimators = len(self.voter_) 215 216 return self 217 218 # 2 - Parallel training ----- 219 # buggy 220 # if self.n_jobs is not None: 221 def fit_estimators(m): 222 base_learner__ = deepcopy(base_learner) 223 base_learner__.set_params(seed=self.seed + m * 1000) 224 base_learner__.fit(X, y, **kwargs) 225 return base_learner__ 226 227 if self.verbose == 1: 228 voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")( 229 delayed(fit_estimators)(m) for m in tqdm(range(self.n_estimators)) 230 ) 231 else: 232 voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")( 233 delayed(fit_estimators)(m) for m in range(self.n_estimators) 234 ) 235 236 self.voter_ = {i: elt for i, elt in enumerate(voters_list)} 237 238 self.n_estimators = len(self.voter_) 239 240 return self 241 242 def predict(self, X, weights=None, **kwargs): 243 """Predict for test data X. 244 245 Args: 246 247 X: {array-like}, shape = [n_samples, n_features] 248 Training vectors, where n_samples is the number 249 of samples and n_features is the number of features. 250 251 **kwargs: additional parameters to be passed to 252 self.cook_test_set 253 254 Returns: 255 256 estimates for test data: {array-like} 257 258 """ 259 260 def calculate_preds(voter, weights=None): 261 ensemble_preds = 0 262 263 n_iter = len(voter) 264 265 assert n_iter > 0, "no estimator found in `RandomBag` ensemble" 266 267 if weights is None: 268 for idx, elt in voter.items(): 269 ensemble_preds += elt.predict(X) 270 271 return ensemble_preds / n_iter 272 273 # if weights is not None: 274 for idx, elt in voter.items(): 275 ensemble_preds += weights[idx] * elt.predict(X) 276 277 return ensemble_preds 278 279 # end calculate_preds ---- 280 281 if weights is None: 282 return calculate_preds(self.voter_) 283 284 # if weights is not None: 285 self.weights = weights 286 287 return calculate_preds(self.voter_, weights)
Randomized 'Bagging' Regression model
Parameters:
obj: object
any object containing a method fit (obj.fit()) and a method predict
(obj.predict())
n_estimators: int
number of boosting iterations
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not
(False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original predictors are included (True) in model''s
fitting or not (False)
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
col_sample: float
percentage of covariates randomly chosen for training
row_sample: float
percentage of rows chosen for training, by stratified bootstrapping
seed: int
reproducibility seed for nodes_sim=='uniform'
backend: str
"cpu" or "gpu" or "tpu"
Attributes:
voter_: dict
dictionary containing all the fitted base-learners
Examples:
import numpy as np
import nnetsauce as ns
from sklearn.datasets import fetch_california_housing
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split
X, y = fetch_california_housing(return_X_y=True, as_frame=False)
# split data into training test and test set
X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size=0.2, random_state=13)
# Requires further tuning
obj = DecisionTreeRegressor(max_depth=3, random_state=123)
obj2 = ns.RandomBagRegressor(obj=obj, direct_link=False,
n_estimators=50,
col_sample=0.9, row_sample=0.9,
dropout=0, n_clusters=0, verbose=1)
obj2.fit(X_train, y_train)
print(np.sqrt(obj2.score(X_test, y_test))) # RMSE
169 def fit(self, X, y, **kwargs): 170 """Fit Random 'Bagging' model to training data (X, y). 171 172 Args: 173 174 X: {array-like}, shape = [n_samples, n_features] 175 Training vectors, where n_samples is the number 176 of samples and n_features is the number of features. 177 178 y: array-like, shape = [n_samples] 179 Target values. 180 181 **kwargs: additional parameters to be passed to 182 self.cook_training_set or self.obj.fit 183 184 Returns: 185 186 self: object 187 188 """ 189 190 base_learner = CustomRegressor( 191 self.obj, 192 n_hidden_features=self.n_hidden_features, 193 activation_name=self.activation_name, 194 a=self.a, 195 nodes_sim=self.nodes_sim, 196 bias=self.bias, 197 dropout=self.dropout, 198 direct_link=self.direct_link, 199 n_clusters=self.n_clusters, 200 type_clust=self.type_clust, 201 type_scaling=self.type_scaling, 202 col_sample=self.col_sample, 203 row_sample=self.row_sample, 204 seed=self.seed, 205 ) 206 207 # 1 - Sequential training ----- 208 209 if self.n_jobs is None: 210 self.voter_ = rbagloop_regression( 211 base_learner, X, y, self.n_estimators, self.verbose, self.seed 212 ) 213 214 self.n_estimators = len(self.voter_) 215 216 return self 217 218 # 2 - Parallel training ----- 219 # buggy 220 # if self.n_jobs is not None: 221 def fit_estimators(m): 222 base_learner__ = deepcopy(base_learner) 223 base_learner__.set_params(seed=self.seed + m * 1000) 224 base_learner__.fit(X, y, **kwargs) 225 return base_learner__ 226 227 if self.verbose == 1: 228 voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")( 229 delayed(fit_estimators)(m) for m in tqdm(range(self.n_estimators)) 230 ) 231 else: 232 voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")( 233 delayed(fit_estimators)(m) for m in range(self.n_estimators) 234 ) 235 236 self.voter_ = {i: elt for i, elt in enumerate(voters_list)} 237 238 self.n_estimators = len(self.voter_) 239 240 return self
Fit Random 'Bagging' model to training data (X, y).
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
242 def predict(self, X, weights=None, **kwargs): 243 """Predict for test data X. 244 245 Args: 246 247 X: {array-like}, shape = [n_samples, n_features] 248 Training vectors, where n_samples is the number 249 of samples and n_features is the number of features. 250 251 **kwargs: additional parameters to be passed to 252 self.cook_test_set 253 254 Returns: 255 256 estimates for test data: {array-like} 257 258 """ 259 260 def calculate_preds(voter, weights=None): 261 ensemble_preds = 0 262 263 n_iter = len(voter) 264 265 assert n_iter > 0, "no estimator found in `RandomBag` ensemble" 266 267 if weights is None: 268 for idx, elt in voter.items(): 269 ensemble_preds += elt.predict(X) 270 271 return ensemble_preds / n_iter 272 273 # if weights is not None: 274 for idx, elt in voter.items(): 275 ensemble_preds += weights[idx] * elt.predict(X) 276 277 return ensemble_preds 278 279 # end calculate_preds ---- 280 281 if weights is None: 282 return calculate_preds(self.voter_) 283 284 # if weights is not None: 285 self.weights = weights 286 287 return calculate_preds(self.voter_, weights)
Predict for test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
estimates for test data: {array-like}
18class RandomBagClassifier(RandomBag, ClassifierMixin): 19 """Randomized 'Bagging' Classification model 20 21 Parameters: 22 23 obj: object 24 any object containing a method fit (obj.fit()) and a method predict 25 (obj.predict()) 26 27 n_estimators: int 28 number of boosting iterations 29 30 n_hidden_features: int 31 number of nodes in the hidden layer 32 33 activation_name: str 34 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 35 36 a: float 37 hyperparameter for 'prelu' or 'elu' activation function 38 39 nodes_sim: str 40 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 41 'uniform' 42 43 bias: boolean 44 indicates if the hidden layer contains a bias term (True) or not 45 (False) 46 47 dropout: float 48 regularization parameter; (random) percentage of nodes dropped out 49 of the training 50 51 direct_link: boolean 52 indicates if the original predictors are included (True) in model's 53 fitting or not (False) 54 55 n_clusters: int 56 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: 57 no clustering) 58 59 cluster_encode: bool 60 defines how the variable containing clusters is treated (default is one-hot) 61 if `False`, then labels are used, without one-hot encoding 62 63 type_clust: str 64 type of clustering method: currently k-means ('kmeans') or Gaussian 65 Mixture Model ('gmm') 66 67 type_scaling: a tuple of 3 strings 68 scaling methods for inputs, hidden layer, and clustering respectively 69 (and when relevant). 70 Currently available: standardization ('std') or MinMax scaling ('minmax') 71 72 col_sample: float 73 percentage of covariates randomly chosen for training 74 75 row_sample: float 76 percentage of rows chosen for training, by stratified bootstrapping 77 78 seed: int 79 reproducibility seed for nodes_sim=='uniform' 80 81 backend: str 82 "cpu" or "gpu" or "tpu" 83 84 Attributes: 85 86 voter_: dict 87 dictionary containing all the fitted base-learners 88 89 90 Examples: 91 92 See also [https://github.com/Techtonique/nnetsauce/blob/master/examples/randombag_classification.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/randombag_classification.py) 93 94 ```python 95 import nnetsauce as ns 96 from sklearn.datasets import load_breast_cancer 97 from sklearn.tree import DecisionTreeClassifier 98 from sklearn.model_selection import train_test_split 99 from sklearn import metrics 100 from time import time 101 102 103 breast_cancer = load_breast_cancer() 104 Z = breast_cancer.data 105 t = breast_cancer.target 106 np.random.seed(123) 107 X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2) 108 109 # decision tree 110 clf = DecisionTreeClassifier(max_depth=2, random_state=123) 111 fit_obj = ns.RandomBagClassifier(clf, n_hidden_features=2, 112 direct_link=True, 113 n_estimators=100, 114 col_sample=0.9, row_sample=0.9, 115 dropout=0.3, n_clusters=0, verbose=1) 116 117 start = time() 118 fit_obj.fit(X_train, y_train) 119 print(f"Elapsed {time() - start}") 120 121 print(fit_obj.score(X_test, y_test)) 122 print(fit_obj.score(X_test, y_test, scoring="roc_auc")) 123 124 start = time() 125 preds = fit_obj.predict(X_test) 126 print(f"Elapsed {time() - start}") 127 print(metrics.classification_report(preds, y_test)) 128 ``` 129 130 """ 131 132 # construct the object ----- 133 134 def __init__( 135 self, 136 obj, 137 n_estimators=10, 138 n_hidden_features=1, 139 activation_name="relu", 140 a=0.01, 141 nodes_sim="sobol", 142 bias=True, 143 dropout=0, 144 direct_link=False, 145 n_clusters=2, 146 cluster_encode=True, 147 type_clust="kmeans", 148 type_scaling=("std", "std", "std"), 149 col_sample=1, 150 row_sample=1, 151 n_jobs=None, 152 seed=123, 153 verbose=1, 154 backend="cpu", 155 ): 156 super().__init__( 157 obj=obj, 158 n_estimators=n_estimators, 159 n_hidden_features=n_hidden_features, 160 activation_name=activation_name, 161 a=a, 162 nodes_sim=nodes_sim, 163 bias=bias, 164 dropout=dropout, 165 direct_link=direct_link, 166 n_clusters=n_clusters, 167 cluster_encode=cluster_encode, 168 type_clust=type_clust, 169 type_scaling=type_scaling, 170 col_sample=col_sample, 171 row_sample=row_sample, 172 seed=seed, 173 backend=backend, 174 ) 175 176 self.type_fit = "classification" 177 self.verbose = verbose 178 self.n_jobs = n_jobs 179 self.voter_ = {} 180 181 def fit(self, X, y, **kwargs): 182 """Fit Random 'Bagging' model to training data (X, y). 183 184 Args: 185 186 X: {array-like}, shape = [n_samples, n_features] 187 Training vectors, where n_samples is the number 188 of samples and n_features is the number of features. 189 190 y: array-like, shape = [n_samples] 191 Target values. 192 193 **kwargs: additional parameters to be passed to 194 self.cook_training_set or self.obj.fit 195 196 Returns: 197 198 self: object 199 200 """ 201 202 assert mx.is_factor(y), "y must contain only integers" 203 204 self.n_classes_ = len(np.unique(y)) # for compatibility with sklearn 205 206 # training 207 self.n_classes = len(np.unique(y)) 208 209 base_learner = CustomClassifier( 210 self.obj, 211 n_hidden_features=self.n_hidden_features, 212 activation_name=self.activation_name, 213 a=self.a, 214 nodes_sim=self.nodes_sim, 215 bias=self.bias, 216 dropout=self.dropout, 217 direct_link=self.direct_link, 218 n_clusters=self.n_clusters, 219 type_clust=self.type_clust, 220 type_scaling=self.type_scaling, 221 col_sample=self.col_sample, 222 row_sample=self.row_sample, 223 seed=self.seed, 224 cv_calibration=None 225 ) 226 227 # 1 - Sequential training ----- 228 229 if self.n_jobs is None: 230 self.voter_ = rbagloop_classification( 231 base_learner, X, y, self.n_estimators, self.verbose, self.seed 232 ) 233 234 self.n_estimators = len(self.voter_) 235 236 return self 237 238 # 2 - Parallel training ----- 239 # buggy 240 # if self.n_jobs is not None: 241 def fit_estimators(m): 242 base_learner__ = deepcopy(base_learner) 243 base_learner__.set_params(seed=self.seed + m * 1000) 244 base_learner__.fit(X, y, **kwargs) 245 return base_learner__ 246 247 if self.verbose == 1: 248 voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")( 249 delayed(fit_estimators)(m) for m in tqdm(range(self.n_estimators)) 250 ) 251 else: 252 voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")( 253 delayed(fit_estimators)(m) for m in range(self.n_estimators) 254 ) 255 256 self.voter_ = {idx: elt for idx, elt in enumerate(voters_list)} 257 258 self.n_estimators = len(self.voter_) 259 self.classes_ = np.unique(y) 260 return self 261 262 def predict(self, X, weights=None, **kwargs): 263 """Predict test data X. 264 265 Args: 266 267 X: {array-like}, shape = [n_samples, n_features] 268 Training vectors, where n_samples is the number 269 of samples and n_features is the number of features. 270 271 **kwargs: additional parameters to be passed to 272 self.cook_test_set 273 274 Returns: 275 276 model predictions: {array-like} 277 278 """ 279 return self.predict_proba(X, weights, **kwargs).argmax(axis=1) 280 281 def predict_proba(self, X, weights=None, **kwargs): 282 """Predict probabilities for test data X. 283 284 Args: 285 286 X: {array-like}, shape = [n_samples, n_features] 287 Training vectors, where n_samples is the number 288 of samples and n_features is the number of features. 289 290 **kwargs: additional parameters to be passed to 291 self.cook_test_set 292 293 Returns: 294 295 probability estimates for test data: {array-like} 296 297 """ 298 299 def calculate_probas(voter, weights=None, verbose=None): 300 ensemble_proba = 0 301 302 n_iter = len(voter) 303 304 assert n_iter > 0, "no estimator found in `RandomBag` ensemble" 305 306 if weights is None: 307 for idx, elt in voter.items(): 308 try: 309 ensemble_proba += elt.predict_proba(X) 310 311 # if verbose == 1: 312 # pbar.update(idx) 313 314 except: 315 continue 316 317 # if verbose == 1: 318 # pbar.update(n_iter) 319 320 return ensemble_proba / n_iter 321 322 # if weights is not None: 323 for idx, elt in voter.items(): 324 ensemble_proba += weights[idx] * elt.predict_proba(X) 325 326 # if verbose == 1: 327 # pbar.update(idx) 328 329 # if verbose == 1: 330 # pbar.update(n_iter) 331 332 return ensemble_proba 333 334 # end calculate_probas ---- 335 336 if self.n_jobs is None: 337 # if self.verbose == 1: 338 # pbar = Progbar(self.n_estimators) 339 340 if weights is None: 341 return calculate_probas(self.voter_, verbose=self.verbose) 342 343 # if weights is not None: 344 self.weights = weights 345 346 return calculate_probas(self.voter_, weights, verbose=self.verbose) 347 348 # if self.n_jobs is not None: 349 def predict_estimator(m): 350 try: 351 return self.voter_[m].predict_proba(X) 352 except: 353 pass 354 355 if self.verbose == 1: 356 preds = Parallel(n_jobs=self.n_jobs, prefer="threads")( 357 delayed(predict_estimator)(m) for m in tqdm(range(self.n_estimators)) 358 ) 359 360 else: 361 preds = Parallel(n_jobs=self.n_jobs, prefer="threads")( 362 delayed(predict_estimator)(m) for m in range(self.n_estimators) 363 ) 364 365 ensemble_proba = 0 366 367 if weights is None: 368 for i in range(self.n_estimators): 369 ensemble_proba += preds[i] 370 371 return ensemble_proba / self.n_estimators 372 373 for i in range(self.n_estimators): 374 ensemble_proba += weights[i] * preds[i] 375 376 return ensemble_proba
Randomized 'Bagging' Classification model
Parameters:
obj: object
any object containing a method fit (obj.fit()) and a method predict
(obj.predict())
n_estimators: int
number of boosting iterations
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not
(False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original predictors are included (True) in model's
fitting or not (False)
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
col_sample: float
percentage of covariates randomly chosen for training
row_sample: float
percentage of rows chosen for training, by stratified bootstrapping
seed: int
reproducibility seed for nodes_sim=='uniform'
backend: str
"cpu" or "gpu" or "tpu"
Attributes:
voter_: dict
dictionary containing all the fitted base-learners
Examples:
See also https://github.com/Techtonique/nnetsauce/blob/master/examples/randombag_classification.py
import nnetsauce as ns
from sklearn.datasets import load_breast_cancer
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn import metrics
from time import time
breast_cancer = load_breast_cancer()
Z = breast_cancer.data
t = breast_cancer.target
np.random.seed(123)
X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2)
# decision tree
clf = DecisionTreeClassifier(max_depth=2, random_state=123)
fit_obj = ns.RandomBagClassifier(clf, n_hidden_features=2,
direct_link=True,
n_estimators=100,
col_sample=0.9, row_sample=0.9,
dropout=0.3, n_clusters=0, verbose=1)
start = time()
fit_obj.fit(X_train, y_train)
print(f"Elapsed {time() - start}")
print(fit_obj.score(X_test, y_test))
print(fit_obj.score(X_test, y_test, scoring="roc_auc"))
start = time()
preds = fit_obj.predict(X_test)
print(f"Elapsed {time() - start}")
print(metrics.classification_report(preds, y_test))
181 def fit(self, X, y, **kwargs): 182 """Fit Random 'Bagging' model to training data (X, y). 183 184 Args: 185 186 X: {array-like}, shape = [n_samples, n_features] 187 Training vectors, where n_samples is the number 188 of samples and n_features is the number of features. 189 190 y: array-like, shape = [n_samples] 191 Target values. 192 193 **kwargs: additional parameters to be passed to 194 self.cook_training_set or self.obj.fit 195 196 Returns: 197 198 self: object 199 200 """ 201 202 assert mx.is_factor(y), "y must contain only integers" 203 204 self.n_classes_ = len(np.unique(y)) # for compatibility with sklearn 205 206 # training 207 self.n_classes = len(np.unique(y)) 208 209 base_learner = CustomClassifier( 210 self.obj, 211 n_hidden_features=self.n_hidden_features, 212 activation_name=self.activation_name, 213 a=self.a, 214 nodes_sim=self.nodes_sim, 215 bias=self.bias, 216 dropout=self.dropout, 217 direct_link=self.direct_link, 218 n_clusters=self.n_clusters, 219 type_clust=self.type_clust, 220 type_scaling=self.type_scaling, 221 col_sample=self.col_sample, 222 row_sample=self.row_sample, 223 seed=self.seed, 224 cv_calibration=None 225 ) 226 227 # 1 - Sequential training ----- 228 229 if self.n_jobs is None: 230 self.voter_ = rbagloop_classification( 231 base_learner, X, y, self.n_estimators, self.verbose, self.seed 232 ) 233 234 self.n_estimators = len(self.voter_) 235 236 return self 237 238 # 2 - Parallel training ----- 239 # buggy 240 # if self.n_jobs is not None: 241 def fit_estimators(m): 242 base_learner__ = deepcopy(base_learner) 243 base_learner__.set_params(seed=self.seed + m * 1000) 244 base_learner__.fit(X, y, **kwargs) 245 return base_learner__ 246 247 if self.verbose == 1: 248 voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")( 249 delayed(fit_estimators)(m) for m in tqdm(range(self.n_estimators)) 250 ) 251 else: 252 voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")( 253 delayed(fit_estimators)(m) for m in range(self.n_estimators) 254 ) 255 256 self.voter_ = {idx: elt for idx, elt in enumerate(voters_list)} 257 258 self.n_estimators = len(self.voter_) 259 self.classes_ = np.unique(y) 260 return self
Fit Random 'Bagging' model to training data (X, y).
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
262 def predict(self, X, weights=None, **kwargs): 263 """Predict test data X. 264 265 Args: 266 267 X: {array-like}, shape = [n_samples, n_features] 268 Training vectors, where n_samples is the number 269 of samples and n_features is the number of features. 270 271 **kwargs: additional parameters to be passed to 272 self.cook_test_set 273 274 Returns: 275 276 model predictions: {array-like} 277 278 """ 279 return self.predict_proba(X, weights, **kwargs).argmax(axis=1)
Predict test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
281 def predict_proba(self, X, weights=None, **kwargs): 282 """Predict probabilities for test data X. 283 284 Args: 285 286 X: {array-like}, shape = [n_samples, n_features] 287 Training vectors, where n_samples is the number 288 of samples and n_features is the number of features. 289 290 **kwargs: additional parameters to be passed to 291 self.cook_test_set 292 293 Returns: 294 295 probability estimates for test data: {array-like} 296 297 """ 298 299 def calculate_probas(voter, weights=None, verbose=None): 300 ensemble_proba = 0 301 302 n_iter = len(voter) 303 304 assert n_iter > 0, "no estimator found in `RandomBag` ensemble" 305 306 if weights is None: 307 for idx, elt in voter.items(): 308 try: 309 ensemble_proba += elt.predict_proba(X) 310 311 # if verbose == 1: 312 # pbar.update(idx) 313 314 except: 315 continue 316 317 # if verbose == 1: 318 # pbar.update(n_iter) 319 320 return ensemble_proba / n_iter 321 322 # if weights is not None: 323 for idx, elt in voter.items(): 324 ensemble_proba += weights[idx] * elt.predict_proba(X) 325 326 # if verbose == 1: 327 # pbar.update(idx) 328 329 # if verbose == 1: 330 # pbar.update(n_iter) 331 332 return ensemble_proba 333 334 # end calculate_probas ---- 335 336 if self.n_jobs is None: 337 # if self.verbose == 1: 338 # pbar = Progbar(self.n_estimators) 339 340 if weights is None: 341 return calculate_probas(self.voter_, verbose=self.verbose) 342 343 # if weights is not None: 344 self.weights = weights 345 346 return calculate_probas(self.voter_, weights, verbose=self.verbose) 347 348 # if self.n_jobs is not None: 349 def predict_estimator(m): 350 try: 351 return self.voter_[m].predict_proba(X) 352 except: 353 pass 354 355 if self.verbose == 1: 356 preds = Parallel(n_jobs=self.n_jobs, prefer="threads")( 357 delayed(predict_estimator)(m) for m in tqdm(range(self.n_estimators)) 358 ) 359 360 else: 361 preds = Parallel(n_jobs=self.n_jobs, prefer="threads")( 362 delayed(predict_estimator)(m) for m in range(self.n_estimators) 363 ) 364 365 ensemble_proba = 0 366 367 if weights is None: 368 for i in range(self.n_estimators): 369 ensemble_proba += preds[i] 370 371 return ensemble_proba / self.n_estimators 372 373 for i in range(self.n_estimators): 374 ensemble_proba += weights[i] * preds[i] 375 376 return ensemble_proba
Predict probabilities for test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
probability estimates for test data: {array-like}
16class RegressorUpdater(BaseEstimator, RegressorMixin): 17 """ 18 Update a regression model with new observations 19 20 Parameters 21 ---------- 22 regr: object 23 A regression model with a coef_ attribute 24 alpha: float 25 Updating factor's exponent 26 27 Attributes 28 ---------- 29 n_obs_: int 30 Number of observations 31 coef_: np.ndarray 32 Coefficients of the model 33 updating_factor_: float 34 Updating factor 35 36 """ 37 38 def __init__(self, regr, alpha=0.5): 39 self.regr = regr 40 self.alpha = alpha 41 self.n_obs_ = None 42 self.coef_ = None 43 self.updating_factor_ = None 44 try: 45 self.coef_ = self.regr.coef_ 46 if isinstance(self.regr, Base): 47 self.n_obs_ = self.regr.scaler_.n_samples_seen_ 48 except AttributeError: 49 pass 50 51 def fit(self, X, y, **kwargs): 52 53 if isinstance(self.regr, CustomRegressor): # nnetsauce model not deep --- 54 if check_is_fitted(self.regr) == False: 55 self.regr.fit(X, y, **kwargs) 56 self.n_obs_ = X.shape[0] 57 if hasattr(self.regr, "coef_"): 58 self.coef_ = self.regr.coef_ 59 return self 60 self.n_obs_ = self.regr.scaler_.n_samples_seen_ 61 if hasattr(self.regr, "coef_"): 62 self.coef_ = self.regr.coef_ 63 return self 64 65 if ( 66 hasattr(self.regr, "coef_") == False 67 ): # sklearn model or CustomRegressor model --- 68 self.regr.fit(X, y) 69 self.n_obs_ = X.shape[0] 70 self.regr.fit(X, y) 71 if hasattr(self.regr, "stacked_obj"): 72 self.coef_ = self.regr.stacked_obj.coef_ 73 else: 74 self.coef_ = self.regr.coef_ 75 return self 76 self.n_obs_ = X.shape[0] 77 if hasattr(self.regr, "coef_"): 78 self.coef_ = self.regr.coef_ 79 return self 80 81 def predict(self, X): 82 # assert hasattr(self.regr, "coef_"), "model must have coef_ attribute" 83 return self.regr.predict(X) 84 85 def partial_fit(self, X, y): 86 87 assert hasattr( 88 self.regr, "coef_" 89 ), "model must be fitted first (i.e have 'coef_' attribute)" 90 assert ( 91 self.n_obs_ is not None 92 ), "model must be fitted first (i.e have 'n_obs_' attribute)" 93 94 if len(X.shape) == 1: 95 X = X.reshape(1, -1) 96 97 assert X.shape[0] == 1, "X must have one row" 98 99 self.updating_factor_ = self.n_obs_ ** (-self.alpha) 100 101 if isinstance(self.regr, Base): # nnetsauce model --- 102 103 newX = deepcopy(X) 104 105 if isinstance( 106 self.regr, CustomRegressor 107 ): # other nnetsauce model (CustomRegressor) --- 108 newX = self.regr.cook_test_set(X=X) 109 if isinstance(X, pd.DataFrame): 110 newx = newX.values.ravel() 111 else: 112 newx = newX.ravel() 113 114 else: # an sklearn model --- 115 116 if isinstance(X, pd.DataFrame): 117 newx = X.values.ravel() 118 else: 119 newx = X.ravel() 120 121 new_coef = self.regr.coef_ + self.updating_factor_ * np.dot( 122 newx, y - np.dot(newx, self.regr.coef_) 123 ) 124 self.regr.coef_ = _update_mean(self.regr.coef_, self.n_obs_, new_coef) 125 self.coef_ = deepcopy(self.regr.coef_) 126 self.n_obs_ += 1 127 return self
Update a regression model with new observations
Parameters
regr: object A regression model with a coef_ attribute alpha: float Updating factor's exponent
Attributes
n_obs_: int Number of observations coef_: np.ndarray Coefficients of the model updating_factor_: float Updating factor
51 def fit(self, X, y, **kwargs): 52 53 if isinstance(self.regr, CustomRegressor): # nnetsauce model not deep --- 54 if check_is_fitted(self.regr) == False: 55 self.regr.fit(X, y, **kwargs) 56 self.n_obs_ = X.shape[0] 57 if hasattr(self.regr, "coef_"): 58 self.coef_ = self.regr.coef_ 59 return self 60 self.n_obs_ = self.regr.scaler_.n_samples_seen_ 61 if hasattr(self.regr, "coef_"): 62 self.coef_ = self.regr.coef_ 63 return self 64 65 if ( 66 hasattr(self.regr, "coef_") == False 67 ): # sklearn model or CustomRegressor model --- 68 self.regr.fit(X, y) 69 self.n_obs_ = X.shape[0] 70 self.regr.fit(X, y) 71 if hasattr(self.regr, "stacked_obj"): 72 self.coef_ = self.regr.stacked_obj.coef_ 73 else: 74 self.coef_ = self.regr.coef_ 75 return self 76 self.n_obs_ = X.shape[0] 77 if hasattr(self.regr, "coef_"): 78 self.coef_ = self.regr.coef_ 79 return self
16class ClassifierUpdater(BaseEstimator, ClassifierMixin): 17 """ 18 Update a regression model with new observations 19 20 Parameters 21 ---------- 22 clf: object 23 A regression model with a coef_ attribute 24 alpha: float 25 Updating factor's exponent 26 27 Attributes 28 ---------- 29 n_obs_: int 30 Number of observations 31 coef_: np.ndarray 32 Coefficients of the model 33 updating_factor_: float 34 Updating factor 35 36 """ 37 38 def __init__(self, clf, alpha=0.5): 39 self.clf = clf 40 self.alpha = alpha 41 self.n_obs_ = None 42 self.coef_ = None 43 self.updating_factor_ = None 44 try: 45 self.coef_ = self.clf.coef_ 46 if isinstance(self.clf, Base): 47 self.n_obs_ = self.clf.scaler_.n_samples_seen_ 48 except AttributeError: 49 pass 50 51 def fit(self, X, y, **kwargs): 52 53 raise NotImplementedError("fit method is not implemented for ClassifierUpdater") 54 55 if isinstance(self.clf, CustomClassifier): # nnetsauce model not deep --- 56 if check_is_fitted(self.clf) == False: 57 self.clf.fit(X, y, **kwargs) 58 self.n_obs_ = X.shape[0] 59 if hasattr(self.clf, "coef_"): 60 self.coef_ = self.clf.coef_ 61 return self 62 self.n_obs_ = self.clf.scaler_.n_samples_seen_ 63 if hasattr(self.clf, "coef_"): 64 self.coef_ = self.clf.coef_ 65 return self 66 67 if ( 68 hasattr(self.clf, "coef_") == False 69 ): # sklearn model or CustomClassifier model --- 70 self.clf.fit(X, y) 71 self.n_obs_ = X.shape[0] 72 self.clf.fit(X, y) 73 if hasattr(self.clf, "stacked_obj"): 74 self.coef_ = self.clf.stacked_obj.coef_ 75 else: 76 self.coef_ = self.clf.coef_ 77 return self 78 self.n_obs_ = X.shape[0] 79 if hasattr(self.clf, "coef_"): 80 self.coef_ = self.clf.coef_ 81 return self 82 83 def predict(self, X): 84 85 raise NotImplementedError( 86 "predict method is not implemented for ClassifierUpdater" 87 ) 88 # assert hasattr(self.clf, "coef_"), "model must have coef_ attribute" 89 return self.clf.predict(X) 90 91 def partial_fit(self, X, y): 92 93 raise NotImplementedError( 94 "partial_fit method is not implemented for ClassifierUpdater" 95 ) 96 97 assert hasattr( 98 self.clf, "coef_" 99 ), "model must be fitted first (i.e have 'coef_' attribute)" 100 assert ( 101 self.n_obs_ is not None 102 ), "model must be fitted first (i.e have 'n_obs_' attribute)" 103 104 if len(X.shape) == 1: 105 X = X.reshape(1, -1) 106 107 assert X.shape[0] == 1, "X must have one row" 108 109 self.updating_factor_ = self.n_obs_ ** (-self.alpha) 110 111 if isinstance(self.clf, Base): # nnetsauce model --- 112 113 newX = deepcopy(X) 114 115 if isinstance( 116 self.clf, CustomClassifier 117 ): # other nnetsauce model (CustomClassifier) --- 118 newX = self.clf.cook_test_set(X=X) 119 if isinstance(X, pd.DataFrame): 120 newx = newX.values.ravel() 121 else: 122 newx = newX.ravel() 123 124 else: # an sklearn model --- 125 126 if isinstance(X, pd.DataFrame): 127 newx = X.values.ravel() 128 else: 129 newx = X.ravel() 130 131 new_coef = self.clf.coef_ + self.updating_factor_ * np.dot( 132 newx, y - np.dot(newx, self.clf.coef_) 133 ) 134 self.clf.coef_ = _update_mean(self.clf.coef_, self.n_obs_, new_coef) 135 self.coef_ = deepcopy(self.clf.coef_) 136 self.n_obs_ += 1 137 return self
Update a regression model with new observations
Parameters
clf: object A regression model with a coef_ attribute alpha: float Updating factor's exponent
Attributes
n_obs_: int Number of observations coef_: np.ndarray Coefficients of the model updating_factor_: float Updating factor
51 def fit(self, X, y, **kwargs): 52 53 raise NotImplementedError("fit method is not implemented for ClassifierUpdater") 54 55 if isinstance(self.clf, CustomClassifier): # nnetsauce model not deep --- 56 if check_is_fitted(self.clf) == False: 57 self.clf.fit(X, y, **kwargs) 58 self.n_obs_ = X.shape[0] 59 if hasattr(self.clf, "coef_"): 60 self.coef_ = self.clf.coef_ 61 return self 62 self.n_obs_ = self.clf.scaler_.n_samples_seen_ 63 if hasattr(self.clf, "coef_"): 64 self.coef_ = self.clf.coef_ 65 return self 66 67 if ( 68 hasattr(self.clf, "coef_") == False 69 ): # sklearn model or CustomClassifier model --- 70 self.clf.fit(X, y) 71 self.n_obs_ = X.shape[0] 72 self.clf.fit(X, y) 73 if hasattr(self.clf, "stacked_obj"): 74 self.coef_ = self.clf.stacked_obj.coef_ 75 else: 76 self.coef_ = self.clf.coef_ 77 return self 78 self.n_obs_ = X.shape[0] 79 if hasattr(self.clf, "coef_"): 80 self.coef_ = self.clf.coef_ 81 return self
20class RidgeRegressor(Base, RegressorMixin): 21 """Basic Ridge Regression model. 22 23 Parameters: 24 lambda_: float or array-like 25 Ridge regularization parameter(s). Default is 0. 26 """ 27 28 def __init__( 29 self, 30 lambda_=0.0, 31 n_hidden_features=0, 32 activation_name="relu", 33 a=0.01, 34 nodes_sim="sobol", 35 bias=True, 36 dropout=0, 37 direct_link=True, 38 n_clusters=0, 39 cluster_encode=True, 40 type_clust="kmeans", 41 type_scaling=("std", "std", "std"), 42 col_sample=1, 43 row_sample=1, 44 seed=123, 45 backend="cpu", 46 ): 47 super().__init__( 48 n_hidden_features=n_hidden_features, 49 activation_name=activation_name, 50 a=a, 51 nodes_sim=nodes_sim, 52 bias=bias, 53 dropout=dropout, 54 direct_link=direct_link, 55 n_clusters=n_clusters, 56 cluster_encode=cluster_encode, 57 type_clust=type_clust, 58 type_scaling=type_scaling, 59 col_sample=col_sample, 60 row_sample=row_sample, 61 seed=seed, 62 backend=backend, 63 ) 64 self.lambda_ = lambda_ 65 self.scale_ = {} 66 67 def _center_scale_xy(self, X, y): 68 """Center X and y, scale X.""" 69 n = X.shape[0] 70 71 # Center X and y 72 X_mean = np.mean(X, axis=0) 73 y_mean = np.mean(y) 74 X_centered = X - X_mean 75 y_centered = y - y_mean 76 77 # Scale X 78 X_scale = np.sqrt(np.sum(X_centered**2, axis=0) / n) 79 # Avoid division by zero 80 X_scale = np.where(X_scale == 0, 1.0, X_scale) 81 X_scaled = X_centered / X_scale 82 83 return X_scaled, y_centered, X_mean, y_mean, X_scale 84 85 def fit(self, X, y): 86 """Fit Ridge regression model. 87 88 Parameters: 89 X : array-like of shape (n_samples, n_features) 90 Training data 91 y : array-like of shape (n_samples,) 92 Target values 93 94 Returns: 95 self : returns an instance of self. 96 """ 97 # Ensure numpy arrays 98 X = np.asarray(X) 99 y = np.asarray(y) 100 print(f"\nInput shapes - X: {X.shape}, y: {y.shape}") 101 print(f"First few X values: {X[:2]}") 102 print(f"First few y values: {y[:2]}") 103 104 if y.ndim == 2: 105 y = y.ravel() 106 107 # Center and scale 108 X_scaled, y_centered, self.X_mean_, self.y_mean_, self.X_scale_ = ( 109 self._center_scale_xy(X, y) 110 ) 111 112 # SVD decomposition 113 U, d, Vt = np.linalg.svd(X_scaled, full_matrices=False) 114 115 # Compute coefficients 116 rhs = np.dot(U.T, y_centered) 117 d2 = d**2 118 119 print(f"d2 shape: {d2.shape}") 120 print(f"rhs shape: {rhs.shape}") 121 print(f"Vt shape: {Vt.shape}") 122 123 if np.isscalar(self.lambda_): 124 div = d2 + self.lambda_ 125 a = (d * rhs) / div 126 print(f"\nSingle lambda case:") 127 print(f"lambda: {self.lambda_}") 128 print(f"div shape: {div.shape}") 129 print(f"a shape: {a.shape}") 130 self.coef_ = np.dot(Vt.T, a) / self.X_scale_ 131 print(f"coef shape: {self.coef_.shape}") 132 else: 133 coefs = [] 134 print(f"\nMultiple lambda case:") 135 for lambda_ in self.lambda_: 136 print(f"lambda: {lambda_}") 137 div = d2 + lambda_ 138 print(f"div shape: {div.shape}") 139 a = (d * rhs) / div 140 print(f"a shape: {a.shape}") 141 coef = np.dot(Vt.T, a) / self.X_scale_ 142 print(f"coef shape: {coef.shape}") 143 coefs.append(coef) 144 self.coef_ = np.array(coefs).T 145 print(f"final coefs shape: {self.coef_.shape}") 146 147 # Compute GCV, HKB and LW criteria 148 y_pred = self.predict(X) 149 try: 150 resid = y - y_pred 151 except Exception as e: 152 resid = y[:, np.newaxis] - y_pred 153 n, p = X.shape 154 if resid.ndim == 1: 155 s2 = np.sum(resid**2) / (n - p) 156 else: 157 s2 = np.sum(resid**2, axis=0) / (n - p) 158 159 self.HKB_ = (p - 2) * s2 / np.sum(self.coef_**2) 160 self.LW_ = (p - 2) * s2 * n / np.sum(y_pred**2) 161 162 if np.isscalar(self.lambda_): 163 div = d2 + self.lambda_ 164 self.GCV_ = np.sum((y - y_pred) ** 2) / (n - np.sum(d2 / div)) ** 2 165 else: 166 self.GCV_ = [] 167 for lambda_ in self.lambda_: 168 div = d2 + lambda_ 169 try: 170 gcv = np.sum((y - y_pred) ** 2) / (n - np.sum(d2 / div)) ** 2 171 except Exception as e: 172 gcv = ( 173 np.sum((y[:, np.newaxis] - y_pred) ** 2) 174 / (n - np.sum(d2 / div)) ** 2 175 ) 176 self.GCV_.append(gcv) 177 self.GCV_ = np.array(self.GCV_) 178 179 return self 180 181 def predict(self, X): 182 """Predict using the Ridge regression model. 183 184 Parameters: 185 X : array-like of shape (n_samples, n_features) 186 Samples to predict for 187 188 Returns: 189 y_pred : array-like of shape (n_samples,) 190 Returns predicted values. 191 """ 192 # Ensure X is 2D 193 X = np.asarray(X) 194 if X.ndim == 1: 195 X = X.reshape(1, -1) 196 197 # Center and scale X 198 X_scaled = (X - self.X_mean_) / self.X_scale_ 199 200 if self.backend == "cpu": 201 if np.isscalar(self.lambda_): 202 return ( 203 mo.safe_sparse_dot(X_scaled, self.coef_, backend=self.backend) 204 + self.y_mean_ 205 ) 206 else: 207 return np.array( 208 [ 209 mo.safe_sparse_dot(X_scaled, coef, backend=self.backend) + self.y_mean_ 210 for coef in self.coef_.T 211 ] 212 ).T 213 else: 214 if np.isscalar(self.lambda_): 215 return ( 216 mo.safe_sparse_dot(X_scaled, self.coef_, backend=self.backend) 217 + self.y_mean_ 218 ) 219 else: 220 return jnp.array( 221 [ 222 mo.safe_sparse_dot(X_scaled, coef, backend=self.backend) + self.y_mean_ 223 for coef in self.coef_.T 224 ] 225 ).T 226 227 def decision_function(self, X): 228 """Compute the decision function of X. 229 230 Parameters: 231 X : array-like of shape (n_samples, n_features) 232 Samples 233 234 Returns: 235 decision : array-like of shape (n_samples,) or (n_samples, n_lambdas) 236 Decision function of the input samples. The order of outputs is the same 237 as that of the provided lambda_ values. For a single lambda, returns 238 array of shape (n_samples,). For multiple lambdas, returns array of shape 239 (n_samples, n_lambdas). 240 """ 241 X = self.cook_test_set(X) 242 243 if self.backend == "cpu": 244 if np.isscalar(self.lambda_): 245 return mo.safe_sparse_dot(X, self.coef_, backend=self.backend) 246 else: 247 return np.array( 248 [ 249 mo.safe_sparse_dot(X, coef, backend=self.backend) 250 for coef in self.coef_.T 251 ] 252 ).T 253 else: 254 if np.isscalar(self.lambda_): 255 return mo.safe_sparse_dot(X, self.coef_, backend=self.backend) 256 else: 257 return jnp.array( 258 [ 259 mo.safe_sparse_dot(X, coef, backend=self.backend) 260 for coef in self.coef_.T 261 ] 262 ).T
Basic Ridge Regression model.
Parameters: lambda_: float or array-like Ridge regularization parameter(s). Default is 0.
85 def fit(self, X, y): 86 """Fit Ridge regression model. 87 88 Parameters: 89 X : array-like of shape (n_samples, n_features) 90 Training data 91 y : array-like of shape (n_samples,) 92 Target values 93 94 Returns: 95 self : returns an instance of self. 96 """ 97 # Ensure numpy arrays 98 X = np.asarray(X) 99 y = np.asarray(y) 100 print(f"\nInput shapes - X: {X.shape}, y: {y.shape}") 101 print(f"First few X values: {X[:2]}") 102 print(f"First few y values: {y[:2]}") 103 104 if y.ndim == 2: 105 y = y.ravel() 106 107 # Center and scale 108 X_scaled, y_centered, self.X_mean_, self.y_mean_, self.X_scale_ = ( 109 self._center_scale_xy(X, y) 110 ) 111 112 # SVD decomposition 113 U, d, Vt = np.linalg.svd(X_scaled, full_matrices=False) 114 115 # Compute coefficients 116 rhs = np.dot(U.T, y_centered) 117 d2 = d**2 118 119 print(f"d2 shape: {d2.shape}") 120 print(f"rhs shape: {rhs.shape}") 121 print(f"Vt shape: {Vt.shape}") 122 123 if np.isscalar(self.lambda_): 124 div = d2 + self.lambda_ 125 a = (d * rhs) / div 126 print(f"\nSingle lambda case:") 127 print(f"lambda: {self.lambda_}") 128 print(f"div shape: {div.shape}") 129 print(f"a shape: {a.shape}") 130 self.coef_ = np.dot(Vt.T, a) / self.X_scale_ 131 print(f"coef shape: {self.coef_.shape}") 132 else: 133 coefs = [] 134 print(f"\nMultiple lambda case:") 135 for lambda_ in self.lambda_: 136 print(f"lambda: {lambda_}") 137 div = d2 + lambda_ 138 print(f"div shape: {div.shape}") 139 a = (d * rhs) / div 140 print(f"a shape: {a.shape}") 141 coef = np.dot(Vt.T, a) / self.X_scale_ 142 print(f"coef shape: {coef.shape}") 143 coefs.append(coef) 144 self.coef_ = np.array(coefs).T 145 print(f"final coefs shape: {self.coef_.shape}") 146 147 # Compute GCV, HKB and LW criteria 148 y_pred = self.predict(X) 149 try: 150 resid = y - y_pred 151 except Exception as e: 152 resid = y[:, np.newaxis] - y_pred 153 n, p = X.shape 154 if resid.ndim == 1: 155 s2 = np.sum(resid**2) / (n - p) 156 else: 157 s2 = np.sum(resid**2, axis=0) / (n - p) 158 159 self.HKB_ = (p - 2) * s2 / np.sum(self.coef_**2) 160 self.LW_ = (p - 2) * s2 * n / np.sum(y_pred**2) 161 162 if np.isscalar(self.lambda_): 163 div = d2 + self.lambda_ 164 self.GCV_ = np.sum((y - y_pred) ** 2) / (n - np.sum(d2 / div)) ** 2 165 else: 166 self.GCV_ = [] 167 for lambda_ in self.lambda_: 168 div = d2 + lambda_ 169 try: 170 gcv = np.sum((y - y_pred) ** 2) / (n - np.sum(d2 / div)) ** 2 171 except Exception as e: 172 gcv = ( 173 np.sum((y[:, np.newaxis] - y_pred) ** 2) 174 / (n - np.sum(d2 / div)) ** 2 175 ) 176 self.GCV_.append(gcv) 177 self.GCV_ = np.array(self.GCV_) 178 179 return self
Fit Ridge regression model.
Parameters: X : array-like of shape (n_samples, n_features) Training data y : array-like of shape (n_samples,) Target values
Returns: self : returns an instance of self.
181 def predict(self, X): 182 """Predict using the Ridge regression model. 183 184 Parameters: 185 X : array-like of shape (n_samples, n_features) 186 Samples to predict for 187 188 Returns: 189 y_pred : array-like of shape (n_samples,) 190 Returns predicted values. 191 """ 192 # Ensure X is 2D 193 X = np.asarray(X) 194 if X.ndim == 1: 195 X = X.reshape(1, -1) 196 197 # Center and scale X 198 X_scaled = (X - self.X_mean_) / self.X_scale_ 199 200 if self.backend == "cpu": 201 if np.isscalar(self.lambda_): 202 return ( 203 mo.safe_sparse_dot(X_scaled, self.coef_, backend=self.backend) 204 + self.y_mean_ 205 ) 206 else: 207 return np.array( 208 [ 209 mo.safe_sparse_dot(X_scaled, coef, backend=self.backend) + self.y_mean_ 210 for coef in self.coef_.T 211 ] 212 ).T 213 else: 214 if np.isscalar(self.lambda_): 215 return ( 216 mo.safe_sparse_dot(X_scaled, self.coef_, backend=self.backend) 217 + self.y_mean_ 218 ) 219 else: 220 return jnp.array( 221 [ 222 mo.safe_sparse_dot(X_scaled, coef, backend=self.backend) + self.y_mean_ 223 for coef in self.coef_.T 224 ] 225 ).T
Predict using the Ridge regression model.
Parameters: X : array-like of shape (n_samples, n_features) Samples to predict for
Returns: y_pred : array-like of shape (n_samples,) Returns predicted values.
23class Ridge2Regressor(Ridge2, RegressorMixin): 24 """Ridge regression with 2 regularization parameters derived from class Ridge 25 26 Parameters: 27 28 n_hidden_features: int 29 number of nodes in the hidden layer 30 31 activation_name: str 32 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 33 34 a: float 35 hyperparameter for 'prelu' or 'elu' activation function 36 37 nodes_sim: str 38 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 39 'uniform' 40 41 bias: boolean 42 indicates if the hidden layer contains a bias term (True) or not 43 (False) 44 45 dropout: float 46 regularization parameter; (random) percentage of nodes dropped out 47 of the training 48 49 n_clusters: int 50 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: 51 no clustering) 52 53 cluster_encode: bool 54 defines how the variable containing clusters is treated (default is one-hot) 55 if `False`, then labels are used, without one-hot encoding 56 57 type_clust: str 58 type of clustering method: currently k-means ('kmeans') or Gaussian 59 Mixture Model ('gmm') 60 61 type_scaling: a tuple of 3 strings 62 scaling methods for inputs, hidden layer, and clustering respectively 63 (and when relevant). 64 Currently available: standardization ('std') or MinMax scaling ('minmax') 65 66 lambda1: float 67 regularization parameter on direct link 68 69 lambda2: float 70 regularization parameter on hidden layer 71 72 seed: int 73 reproducibility seed for nodes_sim=='uniform' 74 75 backend: str 76 'cpu' or 'gpu' or 'tpu' 77 78 Attributes: 79 80 beta_: {array-like} 81 regression coefficients 82 83 y_mean_: float 84 average response 85 86 """ 87 88 # construct the object ----- 89 90 def __init__( 91 self, 92 n_hidden_features=5, 93 activation_name="relu", 94 a=0.01, 95 nodes_sim="sobol", 96 bias=True, 97 dropout=0, 98 n_clusters=2, 99 cluster_encode=True, 100 type_clust="kmeans", 101 type_scaling=("std", "std", "std"), 102 lambda1=0.1, 103 lambda2=0.1, 104 seed=123, 105 backend="cpu", 106 ): 107 super().__init__( 108 n_hidden_features=n_hidden_features, 109 activation_name=activation_name, 110 a=a, 111 nodes_sim=nodes_sim, 112 bias=bias, 113 dropout=dropout, 114 n_clusters=n_clusters, 115 cluster_encode=cluster_encode, 116 type_clust=type_clust, 117 type_scaling=type_scaling, 118 lambda1=lambda1, 119 lambda2=lambda2, 120 seed=seed, 121 backend=backend, 122 ) 123 124 self.type_fit = "regression" 125 126 def fit(self, X, y, **kwargs): 127 """Fit Ridge model to training data (X, y). 128 129 Args: 130 131 X: {array-like}, shape = [n_samples, n_features] 132 Training vectors, where n_samples is the number 133 of samples and n_features is the number of features. 134 135 y: array-like, shape = [n_samples] 136 Target values. 137 138 **kwargs: additional parameters to be passed to 139 self.cook_training_set or self.obj.fit 140 141 Returns: 142 143 self: object 144 145 """ 146 147 sys_platform = platform.system() 148 149 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 150 151 n_X, p_X = X.shape 152 n_Z, p_Z = scaled_Z.shape 153 154 if self.n_clusters > 0: 155 if self.encode_clusters == True: 156 n_features = p_X + self.n_clusters 157 else: 158 n_features = p_X + 1 159 else: 160 n_features = p_X 161 162 X_ = scaled_Z[:, 0:n_features] 163 Phi_X_ = scaled_Z[:, n_features:p_Z] 164 165 B = mo.crossprod(x=X_, backend=self.backend) + self.lambda1 * np.diag( 166 np.repeat(1, n_features) 167 ) 168 C = mo.crossprod(x=Phi_X_, y=X_, backend=self.backend) 169 D = mo.crossprod(x=Phi_X_, backend=self.backend) + self.lambda2 * np.diag( 170 np.repeat(1, Phi_X_.shape[1]) 171 ) 172 173 if sys_platform in ("Linux", "Darwin"): 174 B_inv = pinv(B) if self.backend == "cpu" else jpinv(B) 175 else: 176 B_inv = pinv(B) 177 178 W = mo.safe_sparse_dot(a=C, b=B_inv, backend=self.backend) 179 S_mat = D - mo.tcrossprod(x=W, y=C, backend=self.backend) 180 181 if sys_platform in ("Linux", "Darwin"): 182 S_inv = pinv(S_mat) if self.backend == "cpu" else jpinv(S_mat) 183 else: 184 S_inv = pinv(S_mat) 185 186 Y = mo.safe_sparse_dot(a=S_inv, b=W, backend=self.backend) 187 inv = mo.rbind( 188 mo.cbind( 189 x=B_inv + mo.crossprod(x=W, y=Y, backend=self.backend), 190 y=-np.transpose(Y), 191 backend=self.backend, 192 ), 193 mo.cbind(x=-Y, y=S_inv, backend=self.backend), 194 backend=self.backend, 195 ) 196 197 self.beta_ = mo.safe_sparse_dot( 198 a=inv, 199 b=mo.crossprod(x=scaled_Z, y=centered_y, backend=self.backend), 200 backend=self.backend, 201 ) 202 203 return self 204 205 def predict(self, X, **kwargs): 206 """Predict test data X. 207 208 Args: 209 210 X: {array-like}, shape = [n_samples, n_features] 211 Training vectors, where n_samples is the number 212 of samples and n_features is the number of features. 213 214 **kwargs: additional parameters to be passed to 215 self.cook_test_set 216 217 Returns: 218 219 model predictions: {array-like} 220 221 """ 222 223 if len(X.shape) == 1: 224 n_features = X.shape[0] 225 new_X = mo.rbind( 226 x=X.reshape(1, n_features), 227 y=np.ones(n_features).reshape(1, n_features), 228 backend=self.backend, 229 ) 230 231 return ( 232 self.y_mean_ 233 + mo.safe_sparse_dot( 234 a=self.cook_test_set(new_X, **kwargs), 235 b=self.beta_, 236 backend=self.backend, 237 ) 238 )[0] 239 240 return self.y_mean_ + mo.safe_sparse_dot( 241 a=self.cook_test_set(X, **kwargs), 242 b=self.beta_, 243 backend=self.backend, 244 )
Ridge regression with 2 regularization parameters derived from class Ridge
Parameters:
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not
(False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
lambda1: float
regularization parameter on direct link
lambda2: float
regularization parameter on hidden layer
seed: int
reproducibility seed for nodes_sim=='uniform'
backend: str
'cpu' or 'gpu' or 'tpu'
Attributes:
beta_: {array-like}
regression coefficients
y_mean_: float
average response
126 def fit(self, X, y, **kwargs): 127 """Fit Ridge model to training data (X, y). 128 129 Args: 130 131 X: {array-like}, shape = [n_samples, n_features] 132 Training vectors, where n_samples is the number 133 of samples and n_features is the number of features. 134 135 y: array-like, shape = [n_samples] 136 Target values. 137 138 **kwargs: additional parameters to be passed to 139 self.cook_training_set or self.obj.fit 140 141 Returns: 142 143 self: object 144 145 """ 146 147 sys_platform = platform.system() 148 149 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 150 151 n_X, p_X = X.shape 152 n_Z, p_Z = scaled_Z.shape 153 154 if self.n_clusters > 0: 155 if self.encode_clusters == True: 156 n_features = p_X + self.n_clusters 157 else: 158 n_features = p_X + 1 159 else: 160 n_features = p_X 161 162 X_ = scaled_Z[:, 0:n_features] 163 Phi_X_ = scaled_Z[:, n_features:p_Z] 164 165 B = mo.crossprod(x=X_, backend=self.backend) + self.lambda1 * np.diag( 166 np.repeat(1, n_features) 167 ) 168 C = mo.crossprod(x=Phi_X_, y=X_, backend=self.backend) 169 D = mo.crossprod(x=Phi_X_, backend=self.backend) + self.lambda2 * np.diag( 170 np.repeat(1, Phi_X_.shape[1]) 171 ) 172 173 if sys_platform in ("Linux", "Darwin"): 174 B_inv = pinv(B) if self.backend == "cpu" else jpinv(B) 175 else: 176 B_inv = pinv(B) 177 178 W = mo.safe_sparse_dot(a=C, b=B_inv, backend=self.backend) 179 S_mat = D - mo.tcrossprod(x=W, y=C, backend=self.backend) 180 181 if sys_platform in ("Linux", "Darwin"): 182 S_inv = pinv(S_mat) if self.backend == "cpu" else jpinv(S_mat) 183 else: 184 S_inv = pinv(S_mat) 185 186 Y = mo.safe_sparse_dot(a=S_inv, b=W, backend=self.backend) 187 inv = mo.rbind( 188 mo.cbind( 189 x=B_inv + mo.crossprod(x=W, y=Y, backend=self.backend), 190 y=-np.transpose(Y), 191 backend=self.backend, 192 ), 193 mo.cbind(x=-Y, y=S_inv, backend=self.backend), 194 backend=self.backend, 195 ) 196 197 self.beta_ = mo.safe_sparse_dot( 198 a=inv, 199 b=mo.crossprod(x=scaled_Z, y=centered_y, backend=self.backend), 200 backend=self.backend, 201 ) 202 203 return self
Fit Ridge model to training data (X, y).
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
205 def predict(self, X, **kwargs): 206 """Predict test data X. 207 208 Args: 209 210 X: {array-like}, shape = [n_samples, n_features] 211 Training vectors, where n_samples is the number 212 of samples and n_features is the number of features. 213 214 **kwargs: additional parameters to be passed to 215 self.cook_test_set 216 217 Returns: 218 219 model predictions: {array-like} 220 221 """ 222 223 if len(X.shape) == 1: 224 n_features = X.shape[0] 225 new_X = mo.rbind( 226 x=X.reshape(1, n_features), 227 y=np.ones(n_features).reshape(1, n_features), 228 backend=self.backend, 229 ) 230 231 return ( 232 self.y_mean_ 233 + mo.safe_sparse_dot( 234 a=self.cook_test_set(new_X, **kwargs), 235 b=self.beta_, 236 backend=self.backend, 237 ) 238 )[0] 239 240 return self.y_mean_ + mo.safe_sparse_dot( 241 a=self.cook_test_set(X, **kwargs), 242 b=self.beta_, 243 backend=self.backend, 244 )
Predict test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
18class Ridge2Classifier(Ridge2, ClassifierMixin): 19 """Multinomial logit classification with 2 regularization parameters 20 21 Parameters: 22 23 n_hidden_features: int 24 number of nodes in the hidden layer 25 26 activation_name: str 27 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 28 29 a: float 30 hyperparameter for 'prelu' or 'elu' activation function 31 32 nodes_sim: str 33 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 34 'uniform' 35 36 bias: boolean 37 indicates if the hidden layer contains a bias term (True) or not 38 (False) 39 40 dropout: float 41 regularization parameter; (random) percentage of nodes dropped out 42 of the training 43 44 direct_link: boolean 45 indicates if the original predictors are included (True) in model's 46 fitting or not (False) 47 48 n_clusters: int 49 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: 50 no clustering) 51 52 cluster_encode: bool 53 defines how the variable containing clusters is treated (default is one-hot) 54 if `False`, then labels are used, without one-hot encoding 55 56 type_clust: str 57 type of clustering method: currently k-means ('kmeans') or Gaussian 58 Mixture Model ('gmm') 59 60 type_scaling: a tuple of 3 strings 61 scaling methods for inputs, hidden layer, and clustering respectively 62 (and when relevant). 63 Currently available: standardization ('std') or MinMax scaling ('minmax') 64 65 lambda1: float 66 regularization parameter on direct link 67 68 lambda2: float 69 regularization parameter on hidden layer 70 71 solver: str 72 optimization function "L-BFGS-B", "Newton-CG", 73 "trust-ncg", "L-BFGS-B-lstsq", "Newton-CG-lstsq", 74 "trust-ncg-lstsq" (see scipy.optimize.minimize) 75 When using "L-BFGS-B-lstsq", "Newton-CG-lstsq", or "trust-ncg-lstsq", 76 the initial value for the optimization is set to the least squares solution 77 78 seed: int 79 reproducibility seed for nodes_sim=='uniform' 80 81 backend: str 82 "cpu" or "gpu" or "tpu" 83 84 Attributes: 85 86 beta_: {array-like} 87 regression coefficients 88 89 classes_: {array-like} 90 unique classes in the target variable 91 92 minloglik_: float 93 minimum value of the negative log-likelihood 94 95 Examples: 96 97 See also [https://github.com/Techtonique/nnetsauce/blob/master/examples/ridge_classification.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/ridge_classification.py) 98 99 ```python 100 import nnetsauce as ns 101 import numpy as np 102 from sklearn.datasets import load_breast_cancer 103 from sklearn.model_selection import train_test_split 104 from time import time 105 106 107 breast_cancer = load_breast_cancer() 108 X = breast_cancer.data 109 y = breast_cancer.target 110 111 # split data into training test and test set 112 np.random.seed(123) 113 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) 114 115 # create the model with nnetsauce 116 fit_obj = ns.Ridge2Classifier(lambda1 = 6.90185578e+04, 117 lambda2 = 3.17392781e+02, 118 n_hidden_features=95, 119 n_clusters=2, 120 dropout = 3.62817383e-01, 121 type_clust = "gmm") 122 123 # fit the model on training set 124 start = time() 125 fit_obj.fit(X_train, y_train) 126 print(f"Elapsed {time() - start}") 127 128 # get the accuracy on test set 129 start = time() 130 print(fit_obj.score(X_test, y_test)) 131 print(f"Elapsed {time() - start}") 132 133 # get area under the curve on test set (auc) 134 print(fit_obj.score(X_test, y_test, scoring="roc_auc")) 135 ``` 136 137 138 """ 139 140 # construct the object ----- 141 142 def __init__( 143 self, 144 n_hidden_features=5, 145 activation_name="relu", 146 a=0.01, 147 nodes_sim="sobol", 148 bias=True, 149 dropout=0, 150 direct_link=True, 151 n_clusters=2, 152 cluster_encode=True, 153 type_clust="kmeans", 154 type_scaling=("std", "std", "std"), 155 lambda1=0.1, 156 lambda2=0.1, 157 solver="L-BFGS-B", 158 seed=123, 159 backend="cpu", 160 ): 161 super().__init__( 162 n_hidden_features=n_hidden_features, 163 activation_name=activation_name, 164 a=a, 165 nodes_sim=nodes_sim, 166 bias=bias, 167 dropout=dropout, 168 direct_link=direct_link, 169 n_clusters=n_clusters, 170 cluster_encode=cluster_encode, 171 type_clust=type_clust, 172 type_scaling=type_scaling, 173 lambda1=lambda1, 174 lambda2=lambda2, 175 seed=seed, 176 backend=backend, 177 ) 178 179 self.type_fit = "classification" 180 self.solver = solver 181 self.beta_ = None 182 self.classes_ = None 183 self.minloglik_ = None 184 185 def loglik(self, X, Y, **kwargs): 186 """Log-likelihood for training data (X, Y). 187 188 Args: 189 190 X: {array-like}, shape = [n_samples, n_features] 191 Training vectors, where n_samples is the number 192 of samples and n_features is the number of features. 193 194 Y: array-like, shape = [n_samples] 195 One-hot encode target values. 196 197 **kwargs: additional parameters to be passed to 198 self.cook_training_set or self.obj.fit 199 200 Returns: 201 202 """ 203 204 def loglik_grad_hess(Y, X, B, XB, hessian=True, **kwargs): 205 # nobs, n_classes 206 n, K = Y.shape 207 208 # total number of covariates 209 p = X.shape[1] 210 211 # initial number of covariates 212 init_p = p - self.n_hidden_features 213 214 max_double = 709.0 215 XB[XB > max_double] = max_double 216 exp_XB = np.exp(XB) 217 probs = exp_XB / exp_XB.sum(axis=1)[:, None] 218 219 # gradient ----- 220 # (Y - p) -> (n, K) 221 # X -> (n, p) 222 # (K, n) %*% (n, p) -> (K, p) 223 if hessian is False: 224 grad = ( 225 -mo.safe_sparse_dot(a=(Y - probs).T, b=X, backend=self.backend) / n 226 ) 227 grad += self.lambda1 * B[0:init_p, :].sum(axis=0)[:, None] 228 grad += self.lambda2 * B[init_p:p, :].sum(axis=0)[:, None] 229 230 return grad.flatten() 231 232 # hessian ----- 233 if hessian is True: 234 Kp = K * p 235 hess = np.zeros((Kp, Kp), float) 236 for k1 in range(K): 237 x_index = range(k1 * p, (k1 + 1) * p) 238 for k2 in range(k1, K): 239 y_index = range(k2 * p, (k2 + 1) * p) 240 H_sub = ( 241 -mo.safe_sparse_dot( 242 a=X.T, 243 b=(probs[:, k1] * probs[:, k2])[:, None] * X, 244 backend=self.backend, 245 ) 246 / n 247 ) # do not store 248 hess[np.ix_(x_index, y_index)] = hess[ 249 np.ix_(y_index, x_index) 250 ] = H_sub 251 252 return hess + (self.lambda1 + self.lambda2) * np.identity(Kp) 253 254 # total number of covariates 255 p = X.shape[1] 256 257 # initial number of covariates 258 init_p = p - self.n_hidden_features 259 260 # log-likelihood (1st return) 261 def loglik_func(x): 262 # (p, K) 263 B = x.reshape(Y.shape[1], p).T 264 265 # (n, K) 266 XB = mo.safe_sparse_dot(X, B, backend=self.backend) 267 268 res = -(np.sum(Y * XB, axis=1) - logsumexp(XB)).mean() 269 270 res += ( 271 0.5 272 * self.lambda1 273 * mo.squared_norm(B[0:init_p, :], backend=self.backend) 274 ) 275 res += ( 276 0.5 277 * self.lambda2 278 * mo.squared_norm(B[init_p:p, :], backend=self.backend) 279 ) 280 281 return res 282 283 # gradient of log-likelihood 284 def grad_func(x): 285 # (p, K) 286 B = x.reshape(Y.shape[1], p).T 287 288 return loglik_grad_hess( 289 Y=Y, 290 X=X, 291 B=B, 292 XB=mo.safe_sparse_dot(X, B, backend=self.backend), 293 hessian=False, 294 **kwargs 295 ) 296 297 # hessian of log-likelihood 298 def hessian_func(x): 299 # (p, K) 300 B = x.reshape(Y.shape[1], p).T 301 302 return loglik_grad_hess( 303 Y=Y, 304 X=X, 305 B=B, 306 XB=mo.safe_sparse_dot(X, B, backend=self.backend), 307 hessian=True, 308 **kwargs 309 ) 310 311 return loglik_func, grad_func, hessian_func 312 313 # newton-cg 314 # L-BFGS-B 315 def fit(self, X, y, **kwargs): 316 """Fit Ridge model to training data (X, y). 317 318 for beta: regression coeffs (beta11, ..., beta1p, ..., betaK1, ..., betaKp) 319 for K classes and p covariates. 320 321 Args: 322 323 X: {array-like}, shape = [n_samples, n_features] 324 Training vectors, where n_samples is the number 325 of samples and n_features is the number of features. 326 327 y: array-like, shape = [n_samples] 328 Target values. 329 330 **kwargs: additional parameters to be passed to 331 self.cook_training_set or self.obj.fit 332 333 Returns: 334 335 self: object 336 337 """ 338 339 assert mx.is_factor(y), "y must contain only integers" 340 341 output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 342 343 self.n_classes = len(np.unique(y)) 344 self.classes_ = np.unique(y) # for compatibility with sklearn 345 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 346 347 Y = mo.one_hot_encode2(output_y, self.n_classes) 348 349 # optimize for beta, minimize self.loglik (maximize loglik) ----- 350 loglik_func, grad_func, hessian_func = self.loglik(X=scaled_Z, Y=Y) 351 352 if self.solver == "L-BFGS-B": 353 opt = minimize( 354 fun=loglik_func, 355 x0=np.zeros(scaled_Z.shape[1] * self.n_classes), 356 jac=grad_func, 357 method=self.solver, 358 ) 359 self.beta_ = opt.x 360 self.minloglik_ = opt.fun 361 362 if self.solver in ("Newton-CG", "trust-ncg"): 363 opt = minimize( 364 fun=loglik_func, 365 x0=np.zeros(scaled_Z.shape[1] * self.n_classes), 366 jac=grad_func, 367 hess=hessian_func, 368 method=self.solver, 369 ) 370 self.beta_ = opt.x 371 self.minloglik_ = opt.fun 372 373 if self.solver == "L-BFGS-B-lstsq": 374 opt = minimize( 375 fun=loglik_func, 376 x0=np.linalg.lstsq(scaled_Z, Y, rcond=None)[0].flatten(order="F"), 377 jac=grad_func, 378 method="L-BFGS-B", 379 ) 380 self.beta_ = opt.x 381 self.minloglik_ = opt.fun 382 383 if self.solver in "Newton-CG-lstsq": 384 opt = minimize( 385 fun=loglik_func, 386 x0=np.linalg.lstsq(scaled_Z, Y, rcond=None)[0].flatten(order="F"), 387 jac=grad_func, 388 hess=hessian_func, 389 method="Newton-CG", 390 ) 391 self.beta_ = opt.x 392 self.minloglik_ = opt.fun 393 394 if self.solver in "trust-ncg-lstsq": 395 opt = minimize( 396 fun=loglik_func, 397 x0=np.linalg.lstsq(scaled_Z, Y, rcond=None)[0].flatten(order="F"), 398 jac=grad_func, 399 hess=hessian_func, 400 method="trust-ncg", 401 ) 402 self.beta_ = opt.x 403 self.minloglik_ = opt.fun 404 405 self.classes_ = np.unique(y) 406 407 return self 408 409 def predict(self, X, **kwargs): 410 """Predict test data X. 411 412 Args: 413 414 X: {array-like}, shape = [n_samples, n_features] 415 Training vectors, where n_samples is the number 416 of samples and n_features is the number of features. 417 418 **kwargs: additional parameters to be passed to 419 self.cook_test_set 420 421 Returns: 422 423 model predictions: {array-like} 424 """ 425 426 return np.argmax(self.predict_proba(X, **kwargs), axis=1) 427 428 def predict_proba(self, X, **kwargs): 429 """Predict probabilities for test data X. 430 431 Args: 432 433 X: {array-like}, shape = [n_samples, n_features] 434 Training vectors, where n_samples is the number 435 of samples and n_features is the number of features. 436 437 **kwargs: additional parameters to be passed to 438 self.cook_test_set 439 440 Returns: 441 442 probability estimates for test data: {array-like} 443 444 """ 445 if len(X.shape) == 1: 446 n_features = X.shape[0] 447 new_X = mo.rbind( 448 X.reshape(1, n_features), 449 np.ones(n_features).reshape(1, n_features), 450 ) 451 452 Z = self.cook_test_set(new_X, **kwargs) 453 454 else: 455 Z = self.cook_test_set(X, **kwargs) 456 457 ZB = mo.safe_sparse_dot( 458 a=Z, 459 b=self.beta_.reshape( 460 self.n_classes, 461 X.shape[1] + self.n_hidden_features + self.n_clusters, 462 ).T, 463 backend=self.backend, 464 ) 465 466 exp_ZB = np.exp(ZB) 467 468 return exp_ZB / exp_ZB.sum(axis=1)[:, None]
Multinomial logit classification with 2 regularization parameters
Parameters:
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not
(False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original predictors are included (True) in model's
fitting or not (False)
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
lambda1: float
regularization parameter on direct link
lambda2: float
regularization parameter on hidden layer
solver: str
optimization function "L-BFGS-B", "Newton-CG",
"trust-ncg", "L-BFGS-B-lstsq", "Newton-CG-lstsq",
"trust-ncg-lstsq" (see scipy.optimize.minimize)
When using "L-BFGS-B-lstsq", "Newton-CG-lstsq", or "trust-ncg-lstsq",
the initial value for the optimization is set to the least squares solution
seed: int
reproducibility seed for nodes_sim=='uniform'
backend: str
"cpu" or "gpu" or "tpu"
Attributes:
beta_: {array-like}
regression coefficients
classes_: {array-like}
unique classes in the target variable
minloglik_: float
minimum value of the negative log-likelihood
Examples:
See also https://github.com/Techtonique/nnetsauce/blob/master/examples/ridge_classification.py
import nnetsauce as ns
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from time import time
breast_cancer = load_breast_cancer()
X = breast_cancer.data
y = breast_cancer.target
# split data into training test and test set
np.random.seed(123)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
# create the model with nnetsauce
fit_obj = ns.Ridge2Classifier(lambda1 = 6.90185578e+04,
lambda2 = 3.17392781e+02,
n_hidden_features=95,
n_clusters=2,
dropout = 3.62817383e-01,
type_clust = "gmm")
# fit the model on training set
start = time()
fit_obj.fit(X_train, y_train)
print(f"Elapsed {time() - start}")
# get the accuracy on test set
start = time()
print(fit_obj.score(X_test, y_test))
print(f"Elapsed {time() - start}")
# get area under the curve on test set (auc)
print(fit_obj.score(X_test, y_test, scoring="roc_auc"))
315 def fit(self, X, y, **kwargs): 316 """Fit Ridge model to training data (X, y). 317 318 for beta: regression coeffs (beta11, ..., beta1p, ..., betaK1, ..., betaKp) 319 for K classes and p covariates. 320 321 Args: 322 323 X: {array-like}, shape = [n_samples, n_features] 324 Training vectors, where n_samples is the number 325 of samples and n_features is the number of features. 326 327 y: array-like, shape = [n_samples] 328 Target values. 329 330 **kwargs: additional parameters to be passed to 331 self.cook_training_set or self.obj.fit 332 333 Returns: 334 335 self: object 336 337 """ 338 339 assert mx.is_factor(y), "y must contain only integers" 340 341 output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 342 343 self.n_classes = len(np.unique(y)) 344 self.classes_ = np.unique(y) # for compatibility with sklearn 345 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 346 347 Y = mo.one_hot_encode2(output_y, self.n_classes) 348 349 # optimize for beta, minimize self.loglik (maximize loglik) ----- 350 loglik_func, grad_func, hessian_func = self.loglik(X=scaled_Z, Y=Y) 351 352 if self.solver == "L-BFGS-B": 353 opt = minimize( 354 fun=loglik_func, 355 x0=np.zeros(scaled_Z.shape[1] * self.n_classes), 356 jac=grad_func, 357 method=self.solver, 358 ) 359 self.beta_ = opt.x 360 self.minloglik_ = opt.fun 361 362 if self.solver in ("Newton-CG", "trust-ncg"): 363 opt = minimize( 364 fun=loglik_func, 365 x0=np.zeros(scaled_Z.shape[1] * self.n_classes), 366 jac=grad_func, 367 hess=hessian_func, 368 method=self.solver, 369 ) 370 self.beta_ = opt.x 371 self.minloglik_ = opt.fun 372 373 if self.solver == "L-BFGS-B-lstsq": 374 opt = minimize( 375 fun=loglik_func, 376 x0=np.linalg.lstsq(scaled_Z, Y, rcond=None)[0].flatten(order="F"), 377 jac=grad_func, 378 method="L-BFGS-B", 379 ) 380 self.beta_ = opt.x 381 self.minloglik_ = opt.fun 382 383 if self.solver in "Newton-CG-lstsq": 384 opt = minimize( 385 fun=loglik_func, 386 x0=np.linalg.lstsq(scaled_Z, Y, rcond=None)[0].flatten(order="F"), 387 jac=grad_func, 388 hess=hessian_func, 389 method="Newton-CG", 390 ) 391 self.beta_ = opt.x 392 self.minloglik_ = opt.fun 393 394 if self.solver in "trust-ncg-lstsq": 395 opt = minimize( 396 fun=loglik_func, 397 x0=np.linalg.lstsq(scaled_Z, Y, rcond=None)[0].flatten(order="F"), 398 jac=grad_func, 399 hess=hessian_func, 400 method="trust-ncg", 401 ) 402 self.beta_ = opt.x 403 self.minloglik_ = opt.fun 404 405 self.classes_ = np.unique(y) 406 407 return self
Fit Ridge model to training data (X, y).
for beta: regression coeffs (beta11, ..., beta1p, ..., betaK1, ..., betaKp) for K classes and p covariates.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
409 def predict(self, X, **kwargs): 410 """Predict test data X. 411 412 Args: 413 414 X: {array-like}, shape = [n_samples, n_features] 415 Training vectors, where n_samples is the number 416 of samples and n_features is the number of features. 417 418 **kwargs: additional parameters to be passed to 419 self.cook_test_set 420 421 Returns: 422 423 model predictions: {array-like} 424 """ 425 426 return np.argmax(self.predict_proba(X, **kwargs), axis=1)
Predict test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
428 def predict_proba(self, X, **kwargs): 429 """Predict probabilities for test data X. 430 431 Args: 432 433 X: {array-like}, shape = [n_samples, n_features] 434 Training vectors, where n_samples is the number 435 of samples and n_features is the number of features. 436 437 **kwargs: additional parameters to be passed to 438 self.cook_test_set 439 440 Returns: 441 442 probability estimates for test data: {array-like} 443 444 """ 445 if len(X.shape) == 1: 446 n_features = X.shape[0] 447 new_X = mo.rbind( 448 X.reshape(1, n_features), 449 np.ones(n_features).reshape(1, n_features), 450 ) 451 452 Z = self.cook_test_set(new_X, **kwargs) 453 454 else: 455 Z = self.cook_test_set(X, **kwargs) 456 457 ZB = mo.safe_sparse_dot( 458 a=Z, 459 b=self.beta_.reshape( 460 self.n_classes, 461 X.shape[1] + self.n_hidden_features + self.n_clusters, 462 ).T, 463 backend=self.backend, 464 ) 465 466 exp_ZB = np.exp(ZB) 467 468 return exp_ZB / exp_ZB.sum(axis=1)[:, None]
Predict probabilities for test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
probability estimates for test data: {array-like}
23class Ridge2MultitaskClassifier(Ridge2, ClassifierMixin): 24 """Multitask Ridge classification with 2 regularization parameters 25 26 Parameters: 27 28 n_hidden_features: int 29 number of nodes in the hidden layer 30 31 activation_name: str 32 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 33 34 a: float 35 hyperparameter for 'prelu' or 'elu' activation function 36 37 nodes_sim: str 38 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 39 'uniform' 40 41 bias: boolean 42 indicates if the hidden layer contains a bias term (True) or not 43 (False) 44 45 dropout: float 46 regularization parameter; (random) percentage of nodes dropped out 47 of the training 48 49 n_clusters: int 50 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: 51 no clustering) 52 53 cluster_encode: bool 54 defines how the variable containing clusters is treated (default is one-hot) 55 if `False`, then labels are used, without one-hot encoding 56 57 type_clust: str 58 type of clustering method: currently k-means ('kmeans') or Gaussian 59 Mixture Model ('gmm') 60 61 type_scaling: a tuple of 3 strings 62 scaling methods for inputs, hidden layer, and clustering respectively 63 (and when relevant). 64 Currently available: standardization ('std') or MinMax scaling ('minmax') 65 66 lambda1: float 67 regularization parameter on direct link 68 69 lambda2: float 70 regularization parameter on hidden layer 71 72 seed: int 73 reproducibility seed for nodes_sim=='uniform' 74 75 backend: str 76 "cpu" or "gpu" or "tpu" 77 78 Attributes: 79 80 beta_: {array-like} 81 regression coefficients 82 83 Examples: 84 85 See also [https://github.com/Techtonique/nnetsauce/blob/master/examples/ridgemtask_classification.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/ridgemtask_classification.py) 86 87 ```python 88 import nnetsauce as ns 89 import numpy as np 90 from sklearn.datasets import load_breast_cancer 91 from sklearn.model_selection import train_test_split 92 from sklearn import metrics 93 from time import time 94 95 breast_cancer = load_breast_cancer() 96 Z = breast_cancer.data 97 t = breast_cancer.target 98 np.random.seed(123) 99 X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2) 100 101 fit_obj = ns.Ridge2MultitaskClassifier(n_hidden_features=int(9.83730469e+01), 102 dropout=4.31054687e-01, 103 n_clusters=int(1.71484375e+00), 104 lambda1=1.24023438e+01, lambda2=7.30263672e+03) 105 106 start = time() 107 fit_obj.fit(X_train, y_train) 108 print(f"Elapsed {time() - start}") 109 110 print(fit_obj.score(X_test, y_test)) 111 print(fit_obj.score(X_test, y_test, scoring="roc_auc")) 112 113 start = time() 114 preds = fit_obj.predict(X_test) 115 print(f"Elapsed {time() - start}") 116 print(metrics.classification_report(preds, y_test)) 117 ``` 118 119 """ 120 121 # construct the object ----- 122 123 def __init__( 124 self, 125 n_hidden_features=5, 126 activation_name="relu", 127 a=0.01, 128 nodes_sim="sobol", 129 bias=True, 130 dropout=0, 131 n_clusters=2, 132 cluster_encode=True, 133 type_clust="kmeans", 134 type_scaling=("std", "std", "std"), 135 lambda1=0.1, 136 lambda2=0.1, 137 seed=123, 138 backend="cpu", 139 ): 140 super().__init__( 141 n_hidden_features=n_hidden_features, 142 activation_name=activation_name, 143 a=a, 144 nodes_sim=nodes_sim, 145 bias=bias, 146 dropout=dropout, 147 n_clusters=n_clusters, 148 cluster_encode=cluster_encode, 149 type_clust=type_clust, 150 type_scaling=type_scaling, 151 lambda1=lambda1, 152 lambda2=lambda2, 153 seed=seed, 154 backend=backend, 155 ) 156 157 self.type_fit = "classification" 158 159 def fit(self, X, y, **kwargs): 160 """Fit Ridge model to training data (X, y). 161 162 Args: 163 164 X: {array-like}, shape = [n_samples, n_features] 165 Training vectors, where n_samples is the number 166 of samples and n_features is the number of features. 167 168 y: array-like, shape = [n_samples] 169 Target values. 170 171 **kwargs: additional parameters to be passed to 172 self.cook_training_set or self.obj.fit 173 174 Returns: 175 176 self: object 177 178 """ 179 180 sys_platform = platform.system() 181 182 assert mx.is_factor(y), "y must contain only integers" 183 184 self.classes_ = np.unique(y) # for compatibility with sklearn 185 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 186 187 output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 188 189 n_X, p_X = X.shape 190 n_Z, p_Z = scaled_Z.shape 191 192 self.n_classes = len(np.unique(y)) 193 194 # multitask response 195 Y = mo.one_hot_encode2(output_y, self.n_classes) 196 197 if self.n_clusters > 0: 198 if self.encode_clusters == True: 199 n_features = p_X + self.n_clusters 200 else: 201 n_features = p_X + 1 202 else: 203 n_features = p_X 204 205 X_ = scaled_Z[:, 0:n_features] 206 Phi_X_ = scaled_Z[:, n_features:p_Z] 207 208 B = mo.crossprod(x=X_, backend=self.backend) + self.lambda1 * np.diag( 209 np.repeat(1, X_.shape[1]) 210 ) 211 C = mo.crossprod(x=Phi_X_, y=X_, backend=self.backend) 212 D = mo.crossprod(x=Phi_X_, backend=self.backend) + self.lambda2 * np.diag( 213 np.repeat(1, Phi_X_.shape[1]) 214 ) 215 216 if sys_platform in ("Linux", "Darwin"): 217 B_inv = pinv(B) if self.backend == "cpu" else jpinv(B) 218 else: 219 B_inv = pinv(B) 220 221 W = mo.safe_sparse_dot(a=C, b=B_inv, backend=self.backend) 222 S_mat = D - mo.tcrossprod(x=W, y=C, backend=self.backend) 223 224 if sys_platform in ("Linux", "Darwin"): 225 S_inv = pinv(S_mat) if self.backend == "cpu" else jpinv(S_mat) 226 else: 227 S_inv = pinv(S_mat) 228 229 Y2 = mo.safe_sparse_dot(a=S_inv, b=W, backend=self.backend) 230 inv = mo.rbind( 231 mo.cbind( 232 x=B_inv + mo.crossprod(x=W, y=Y2, backend=self.backend), 233 y=-np.transpose(Y2), 234 backend=self.backend, 235 ), 236 mo.cbind(x=-Y2, y=S_inv, backend=self.backend), 237 backend=self.backend, 238 ) 239 240 self.beta_ = mo.safe_sparse_dot( 241 a=inv, 242 b=mo.crossprod(x=scaled_Z, y=Y, backend=self.backend), 243 backend=self.backend, 244 ) 245 self.classes_ = np.unique(y) 246 return self 247 248 def predict(self, X, **kwargs): 249 """Predict test data X. 250 251 Args: 252 253 X: {array-like}, shape = [n_samples, n_features] 254 Training vectors, where n_samples is the number 255 of samples and n_features is the number of features. 256 257 **kwargs: additional parameters to be passed to 258 self.cook_test_set 259 260 Returns: 261 262 model predictions: {array-like} 263 264 """ 265 266 return np.argmax(self.predict_proba(X, **kwargs), axis=1) 267 268 def predict_proba(self, X, **kwargs): 269 """Predict probabilities for test data X. 270 271 Args: 272 273 X: {array-like}, shape = [n_samples, n_features] 274 Training vectors, where n_samples is the number 275 of samples and n_features is the number of features. 276 277 **kwargs: additional parameters to be passed to 278 self.cook_test_set 279 280 Returns: 281 282 probability estimates for test data: {array-like} 283 284 """ 285 286 if len(X.shape) == 1: 287 n_features = X.shape[0] 288 new_X = mo.rbind( 289 x=X.reshape(1, n_features), 290 y=np.ones(n_features).reshape(1, n_features), 291 backend=self.backend, 292 ) 293 294 Z = self.cook_test_set(new_X, **kwargs) 295 296 else: 297 Z = self.cook_test_set(X, **kwargs) 298 299 ZB = mo.safe_sparse_dot(a=Z, b=self.beta_, backend=self.backend) 300 301 exp_ZB = np.exp(ZB) 302 303 return exp_ZB / exp_ZB.sum(axis=1)[:, None] 304 305 def score(self, X, y, scoring=None): 306 """Scoring function for classification. 307 308 Args: 309 310 X: {array-like}, shape = [n_samples, n_features] 311 Training vectors, where n_samples is the number 312 of samples and n_features is the number of features. 313 314 y: array-like, shape = [n_samples] 315 Target values. 316 317 scoring: str 318 scoring method (default is accuracy) 319 320 Returns: 321 322 score: float 323 """ 324 325 if scoring is None: 326 scoring = "accuracy" 327 328 if scoring == "accuracy": 329 return skm2.accuracy_score(y, self.predict(X)) 330 331 if scoring == "f1": 332 return skm2.f1_score(y, self.predict(X)) 333 334 if scoring == "precision": 335 return skm2.precision_score(y, self.predict(X)) 336 337 if scoring == "recall": 338 return skm2.recall_score(y, self.predict(X)) 339 340 if scoring == "roc_auc": 341 return skm2.roc_auc_score(y, self.predict(X)) 342 343 if scoring == "log_loss": 344 return skm2.log_loss(y, self.predict_proba(X)) 345 346 if scoring == "balanced_accuracy": 347 return skm2.balanced_accuracy_score(y, self.predict(X)) 348 349 if scoring == "average_precision": 350 return skm2.average_precision_score(y, self.predict(X)) 351 352 if scoring == "neg_brier_score": 353 return -skm2.brier_score_loss(y, self.predict_proba(X)) 354 355 if scoring == "neg_log_loss": 356 return -skm2.log_loss(y, self.predict_proba(X))
Multitask Ridge classification with 2 regularization parameters
Parameters:
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not
(False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
lambda1: float
regularization parameter on direct link
lambda2: float
regularization parameter on hidden layer
seed: int
reproducibility seed for nodes_sim=='uniform'
backend: str
"cpu" or "gpu" or "tpu"
Attributes:
beta_: {array-like}
regression coefficients
Examples:
See also https://github.com/Techtonique/nnetsauce/blob/master/examples/ridgemtask_classification.py
import nnetsauce as ns
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn import metrics
from time import time
breast_cancer = load_breast_cancer()
Z = breast_cancer.data
t = breast_cancer.target
np.random.seed(123)
X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2)
fit_obj = ns.Ridge2MultitaskClassifier(n_hidden_features=int(9.83730469e+01),
dropout=4.31054687e-01,
n_clusters=int(1.71484375e+00),
lambda1=1.24023438e+01, lambda2=7.30263672e+03)
start = time()
fit_obj.fit(X_train, y_train)
print(f"Elapsed {time() - start}")
print(fit_obj.score(X_test, y_test))
print(fit_obj.score(X_test, y_test, scoring="roc_auc"))
start = time()
preds = fit_obj.predict(X_test)
print(f"Elapsed {time() - start}")
print(metrics.classification_report(preds, y_test))
159 def fit(self, X, y, **kwargs): 160 """Fit Ridge model to training data (X, y). 161 162 Args: 163 164 X: {array-like}, shape = [n_samples, n_features] 165 Training vectors, where n_samples is the number 166 of samples and n_features is the number of features. 167 168 y: array-like, shape = [n_samples] 169 Target values. 170 171 **kwargs: additional parameters to be passed to 172 self.cook_training_set or self.obj.fit 173 174 Returns: 175 176 self: object 177 178 """ 179 180 sys_platform = platform.system() 181 182 assert mx.is_factor(y), "y must contain only integers" 183 184 self.classes_ = np.unique(y) # for compatibility with sklearn 185 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 186 187 output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 188 189 n_X, p_X = X.shape 190 n_Z, p_Z = scaled_Z.shape 191 192 self.n_classes = len(np.unique(y)) 193 194 # multitask response 195 Y = mo.one_hot_encode2(output_y, self.n_classes) 196 197 if self.n_clusters > 0: 198 if self.encode_clusters == True: 199 n_features = p_X + self.n_clusters 200 else: 201 n_features = p_X + 1 202 else: 203 n_features = p_X 204 205 X_ = scaled_Z[:, 0:n_features] 206 Phi_X_ = scaled_Z[:, n_features:p_Z] 207 208 B = mo.crossprod(x=X_, backend=self.backend) + self.lambda1 * np.diag( 209 np.repeat(1, X_.shape[1]) 210 ) 211 C = mo.crossprod(x=Phi_X_, y=X_, backend=self.backend) 212 D = mo.crossprod(x=Phi_X_, backend=self.backend) + self.lambda2 * np.diag( 213 np.repeat(1, Phi_X_.shape[1]) 214 ) 215 216 if sys_platform in ("Linux", "Darwin"): 217 B_inv = pinv(B) if self.backend == "cpu" else jpinv(B) 218 else: 219 B_inv = pinv(B) 220 221 W = mo.safe_sparse_dot(a=C, b=B_inv, backend=self.backend) 222 S_mat = D - mo.tcrossprod(x=W, y=C, backend=self.backend) 223 224 if sys_platform in ("Linux", "Darwin"): 225 S_inv = pinv(S_mat) if self.backend == "cpu" else jpinv(S_mat) 226 else: 227 S_inv = pinv(S_mat) 228 229 Y2 = mo.safe_sparse_dot(a=S_inv, b=W, backend=self.backend) 230 inv = mo.rbind( 231 mo.cbind( 232 x=B_inv + mo.crossprod(x=W, y=Y2, backend=self.backend), 233 y=-np.transpose(Y2), 234 backend=self.backend, 235 ), 236 mo.cbind(x=-Y2, y=S_inv, backend=self.backend), 237 backend=self.backend, 238 ) 239 240 self.beta_ = mo.safe_sparse_dot( 241 a=inv, 242 b=mo.crossprod(x=scaled_Z, y=Y, backend=self.backend), 243 backend=self.backend, 244 ) 245 self.classes_ = np.unique(y) 246 return self
Fit Ridge model to training data (X, y).
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
248 def predict(self, X, **kwargs): 249 """Predict test data X. 250 251 Args: 252 253 X: {array-like}, shape = [n_samples, n_features] 254 Training vectors, where n_samples is the number 255 of samples and n_features is the number of features. 256 257 **kwargs: additional parameters to be passed to 258 self.cook_test_set 259 260 Returns: 261 262 model predictions: {array-like} 263 264 """ 265 266 return np.argmax(self.predict_proba(X, **kwargs), axis=1)
Predict test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
268 def predict_proba(self, X, **kwargs): 269 """Predict probabilities for test data X. 270 271 Args: 272 273 X: {array-like}, shape = [n_samples, n_features] 274 Training vectors, where n_samples is the number 275 of samples and n_features is the number of features. 276 277 **kwargs: additional parameters to be passed to 278 self.cook_test_set 279 280 Returns: 281 282 probability estimates for test data: {array-like} 283 284 """ 285 286 if len(X.shape) == 1: 287 n_features = X.shape[0] 288 new_X = mo.rbind( 289 x=X.reshape(1, n_features), 290 y=np.ones(n_features).reshape(1, n_features), 291 backend=self.backend, 292 ) 293 294 Z = self.cook_test_set(new_X, **kwargs) 295 296 else: 297 Z = self.cook_test_set(X, **kwargs) 298 299 ZB = mo.safe_sparse_dot(a=Z, b=self.beta_, backend=self.backend) 300 301 exp_ZB = np.exp(ZB) 302 303 return exp_ZB / exp_ZB.sum(axis=1)[:, None]
Predict probabilities for test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
probability estimates for test data: {array-like}
305 def score(self, X, y, scoring=None): 306 """Scoring function for classification. 307 308 Args: 309 310 X: {array-like}, shape = [n_samples, n_features] 311 Training vectors, where n_samples is the number 312 of samples and n_features is the number of features. 313 314 y: array-like, shape = [n_samples] 315 Target values. 316 317 scoring: str 318 scoring method (default is accuracy) 319 320 Returns: 321 322 score: float 323 """ 324 325 if scoring is None: 326 scoring = "accuracy" 327 328 if scoring == "accuracy": 329 return skm2.accuracy_score(y, self.predict(X)) 330 331 if scoring == "f1": 332 return skm2.f1_score(y, self.predict(X)) 333 334 if scoring == "precision": 335 return skm2.precision_score(y, self.predict(X)) 336 337 if scoring == "recall": 338 return skm2.recall_score(y, self.predict(X)) 339 340 if scoring == "roc_auc": 341 return skm2.roc_auc_score(y, self.predict(X)) 342 343 if scoring == "log_loss": 344 return skm2.log_loss(y, self.predict_proba(X)) 345 346 if scoring == "balanced_accuracy": 347 return skm2.balanced_accuracy_score(y, self.predict(X)) 348 349 if scoring == "average_precision": 350 return skm2.average_precision_score(y, self.predict(X)) 351 352 if scoring == "neg_brier_score": 353 return -skm2.brier_score_loss(y, self.predict_proba(X)) 354 355 if scoring == "neg_log_loss": 356 return -skm2.log_loss(y, self.predict_proba(X))
Scoring function for classification.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
scoring: str
scoring method (default is accuracy)
Returns:
score: float
6class SubSampler: 7 """Subsampling class. 8 9 Attributes: 10 11 y: array-like, shape = [n_samples] 12 Target values. 13 14 row_sample: double 15 subsampling fraction 16 17 n_samples: int 18 subsampling by using the number of rows (supersedes row_sample) 19 20 seed: int 21 reproductibility seed 22 23 n_jobs: int 24 number of jobs to run in parallel 25 26 verbose: bool 27 print progress messages and bars 28 """ 29 30 def __init__( 31 self, 32 y, 33 row_sample=0.8, 34 n_samples=None, 35 seed=123, 36 n_jobs=None, 37 verbose=False, 38 ): 39 self.y = y 40 self.n_samples = n_samples 41 if self.n_samples is None: 42 assert ( 43 row_sample < 1 and row_sample >= 0 44 ), "'row_sample' must be provided, plus < 1 and >= 0" 45 self.row_sample = row_sample 46 else: 47 assert self.n_samples < len(y), "'n_samples' must be < len(y)" 48 self.row_sample = self.n_samples / len(y) 49 self.seed = seed 50 self.indices = None 51 self.n_jobs = n_jobs 52 self.verbose = verbose 53 54 def subsample(self): 55 """Returns indices of subsampled input data. 56 57 Examples: 58 59 <ul> 60 <li> <a href="https://github.com/Techtonique/nnetsauce/blob/master/nnetsauce/demo/thierrymoudiki_20240105_subsampling.ipynb">20240105_subsampling.ipynb</a> </li> 61 <li> <a href="https://github.com/Techtonique/nnetsauce/blob/master/nnetsauce/demo/thierrymoudiki_20240131_subsampling_nsamples.ipynb">20240131_subsampling_nsamples.ipynb</a> </li> 62 </ul> 63 64 """ 65 self.indices = dosubsample( 66 y=self.y, 67 row_sample=self.row_sample, 68 seed=self.seed, 69 n_jobs=self.n_jobs, 70 verbose=self.verbose, 71 ) 72 return self.indices
Subsampling class.
Attributes:
y: array-like, shape = [n_samples] Target values.
row_sample: double subsampling fraction
n_samples: int subsampling by using the number of rows (supersedes row_sample)
seed: int reproductibility seed
n_jobs: int number of jobs to run in parallel
verbose: bool print progress messages and bars
54 def subsample(self): 55 """Returns indices of subsampled input data. 56 57 Examples: 58 59 <ul> 60 <li> <a href="https://github.com/Techtonique/nnetsauce/blob/master/nnetsauce/demo/thierrymoudiki_20240105_subsampling.ipynb">20240105_subsampling.ipynb</a> </li> 61 <li> <a href="https://github.com/Techtonique/nnetsauce/blob/master/nnetsauce/demo/thierrymoudiki_20240131_subsampling_nsamples.ipynb">20240131_subsampling_nsamples.ipynb</a> </li> 62 </ul> 63 64 """ 65 self.indices = dosubsample( 66 y=self.y, 67 row_sample=self.row_sample, 68 seed=self.seed, 69 n_jobs=self.n_jobs, 70 verbose=self.verbose, 71 ) 72 return self.indices
Returns indices of subsampled input data.
Examples: