glmnetforpython.glmnet_class
1import matplotlib.pyplot as plt 2import numpy as np 3import warnings 4 5from collections import namedtuple 6from sklearn.base import BaseEstimator, RegressorMixin, ClassifierMixin 7from .glmnet import glmnet 8from .glmnetPlot import glmnetPlot 9from .glmnetPrint import glmnetPrint 10from .glmnetCoef import glmnetCoef 11from .glmnetPredict import glmnetPredict 12from .cvglmnet import cvglmnet 13from .cvglmnetCoef import cvglmnetCoef 14from .cvglmnetPlot import cvglmnetPlot 15from .cvglmnetPredict import cvglmnetPredict 16 17 18class GLMNet(BaseEstimator, RegressorMixin, ClassifierMixin): 19 """ 20 A sklearn-style wrapper for the glmnet package. 21 22 More details about GLMNet can be found at: 23 24 https://glmnet.stanford.edu/articles/glmnet.html 25 26 Parameters 27 ---------- 28 29 alpha : float, optional 30 The alpha parameter in the elastic net penalty. Default is 1.0. 31 0.0 is the ridge penalty and 1.0 is the lasso penalty. 32 33 nlambda : int, optional 34 The number of lambda values to compute. Default is 100. 35 36 lambdau : float, optional 37 User-defined lambda value. Default is None. 38 39 standardize : bool, optional 40 Whether to standardize the predictors. Default is True. 41 42 thresh : float, optional 43 The convergence threshold. Default is 1e-07. 44 45 dfmax : float, optional 46 The maximum number of degrees of freedom. Default is 1e+10. 47 48 pmax : float, optional 49 The maximum number of predictors. Default is 1e+10. 50 51 exclude : array-like, optional 52 The indices of predictors to exclude. Default is None. 53 54 penalty_factor : array-like, optional 55 The penalty factors for each predictor. Default is None. 56 57 lower_lambdau : float, optional 58 The lower bound for the lambda values. Default is None. 59 60 upper_lambdau : float, optional 61 The upper bound for the lambda values. Default is None. 62 63 maxit : float, optional 64 The maximum number of iterations. Default is 1e+05. 65 66 type_measure : int, optional 67 The type of measure to use. Default is 1. 68 69 family : str, optional 70 The family of the response variable. Default is 'gaussian'. 71 Other options are 'binomial', 'multinomial', 'poisson', 'cox' and 'mgaussian'. 72 73 parallel : bool, optional 74 Whether to use parallel processing. Default is False. 75 76 ncores : int, optional 77 The number of cores to use. Default is -1. 78 79 verbose : bool, optional 80 Whether to print messages. Default is False. 81 82 """ 83 84 def __init__( 85 self, 86 weights=None, 87 alpha=1.0, 88 nlambda=100, 89 lambdau=None, 90 standardize=True, 91 thresh=1e-07, 92 dfmax=1e10, 93 pmax=1e10, 94 exclude=None, 95 penalty_factor=None, 96 lower_lambdau=None, 97 upper_lambdau=None, 98 maxit=1e05, 99 type_measure=1, 100 family="gaussian", 101 parallel=False, 102 ncores=-1, 103 verbose=False, 104 ): 105 self.weights = weights 106 self.alpha = alpha 107 self.nlambda = nlambda 108 self.lambdau = lambdau 109 self.standardize = standardize 110 self.thresh = thresh 111 self.dfmax = dfmax 112 self.pmax = pmax 113 self.exclude = exclude 114 self.penalty_factor = penalty_factor 115 self.lower_lambdau = lower_lambdau 116 self.upper_lambdau = upper_lambdau 117 self.maxit = maxit 118 self.type_measure = type_measure 119 self.family = family 120 self.parallel = parallel 121 self.ncores = ncores 122 self.verbose = verbose 123 self.model = None 124 self.s = None 125 self.coef_ = None 126 127 def fit(self, X, y, s=None, exact=False, **kwargs): 128 """ 129 Fit the model. 130 131 Parameters 132 ---------- 133 134 X : array-like 135 The predictor variables. 136 137 y : array-like 138 The response variable. 139 140 s : float, optional 141 The value of lambda at which extraction is made. Default is None. 142 143 exact : bool, optional 144 Whether to use exact lambda values. Default is False. 145 146 **kwargs : dict 147 Additional arguments to pass to glmnetCoef. 148 149 Returns 150 ------- 151 152 self : object 153 Returns the instance itself. 154 155 """ 156 self.model = glmnet( 157 x=X, 158 y=y, 159 weights=self.weights, 160 alpha=self.alpha, 161 nlambda=self.nlambda, 162 lambdau=self.lambdau, 163 standardize=self.standardize, 164 thresh=self.thresh, 165 dfmax=self.dfmax, 166 pmax=self.pmax, 167 exclude=self.exclude, 168 penalty_factor=self.penalty_factor, 169 lower_lambdau=self.lower_lambdau, 170 upper_lambdau=self.upper_lambdau, 171 maxit=self.maxit, 172 type_measure=self.type_measure, 173 family=self.family, 174 parallel=self.parallel, 175 ncores=self.ncores, 176 verbose=self.verbose, 177 ) 178 self.coef_ = glmnetCoef(self.model, s=s, exact=exact, **kwargs) 179 return self 180 181 def cvglmnet( 182 self, 183 X, 184 y, 185 family="gaussian", 186 ptype="default", 187 nfolds=10, 188 foldid=np.empty([0]), 189 parallel=1, 190 keep=False, 191 grouped=True, 192 **kwargs 193 ): 194 warnings.filterwarnings("ignore") 195 cvfit = cvglmnet( 196 x=X, 197 y=y, 198 family=family, 199 ptype=ptype, 200 nfolds=nfolds, 201 foldid=foldid, 202 parallel=parallel, 203 keep=keep, 204 grouped=grouped, 205 **kwargs 206 ) 207 208 warnings.filterwarnings("default") 209 best_lambda = cvfit["lambda_min"][0] 210 best_lambda_1se = cvfit["lambda_1se"][0] 211 best_coef = cvglmnetCoef(cvfit, s=best_lambda).ravel() 212 DescribeResult = namedtuple( 213 "DescribeResult", 214 ["model", "cvfit", "lambda_min", "lambda_1se", "best_coef"], 215 ) 216 glmnet_obj = GLMNet() 217 glmnet_obj.model = cvfit 218 glmnet_obj.coef_ = best_coef 219 return DescribeResult( 220 glmnet_obj, cvfit, best_lambda, best_lambda_1se, best_coef 221 ) 222 223 def get_coef(self, s=None, exact=False): 224 """ 225 Get the coefficients. 226 227 Parameters 228 ---------- 229 230 s : float, optional 231 The value of lambda at which extraction is made. Default is None. 232 233 exact : bool, optional 234 Whether to use exact lambda values or not. Default is False. 235 236 Returns 237 ------- 238 239 coef : array-like 240 The coefficients. 241 """ 242 if s is None: 243 return self.coef_ 244 assert self.model is not None, "Model not fitted yet." 245 return glmnetCoef(self.model, s=s, exact=exact) 246 247 def print(self): 248 """ 249 Print the model's characteristics. 250 """ 251 return glmnetPrint(self.model) 252 253 def plot(self, xvar="lambda", label=True): 254 """ 255 Plot the model's coefficients. 256 257 Parameters 258 ---------- 259 260 xvar : str, optional 261 The variable to plot ("norm" for the L1 norm of coefficients, 262 "lambda" for the log-lambda value or "dev" for percentage of 263 deviance explained). Default is "lambda". 264 265 label : bool, optional 266 Whether to label the plot. Default is True. 267 """ 268 assert xvar in ("norm", "lambda", "dev"), "Invalid input for xvar." 269 return glmnetPlot(self.model, xvar=xvar, label=label) 270 271 def predict(self, X, ptype="response", s=None, exact=False, **kwargs): 272 """ 273 Predict the response variable. 274 275 Parameters 276 ---------- 277 278 X : array-like 279 The predictor variables. 280 281 ptype : str 282 The type of prediction to make. 283 "response" the sames as "link" for "gaussian" family. 284 "class" returns the class label. 285 "coefficients" computes the coefficients at values of s 286 "nonzero" retuns a list of the indices of the nonzero coefficients for each value of s. 287 Default is "response". 288 289 s : float 290 The value of lambda at which extraction is made. Default is None. 291 292 exact : bool, optional 293 Whether to use exact lambda values or not. Default is False. 294 295 **kwargs : dict 296 Additional arguments 297 """ 298 if s is None or self.s is None: 299 self.s = 0.1 300 else: 301 self.s = s 302 assert ptype in ( 303 "response", 304 "coefficients", 305 "class", 306 "nonzero", 307 ), "Invalid input for ptype." 308 if np.isscalar(self.s) or len(self.s) == 1: 309 res = glmnetPredict( 310 self.model, 311 X, 312 ptype=ptype, 313 s=np.asarray([self.s, 0.1]), 314 exact=exact, 315 **kwargs 316 ) 317 try: 318 return res[:, 0] 319 except IndexError: 320 return res[range(X.shape[0])] 321 except ValueError: 322 return res 323 return glmnetPredict( 324 self.model, X, ptype=ptype, s=self.s, exact=exact, **kwargs 325 ) 326 327 def predict_proba(self, X): 328 return self.model.predict_proba(X) 329 330 def predict_log_proba(self, X): 331 return self.model.predict_log_proba(X) 332 333 def score(self, X, y): 334 return self.model.score(X, y)
19class GLMNet(BaseEstimator, RegressorMixin, ClassifierMixin): 20 """ 21 A sklearn-style wrapper for the glmnet package. 22 23 More details about GLMNet can be found at: 24 25 https://glmnet.stanford.edu/articles/glmnet.html 26 27 Parameters 28 ---------- 29 30 alpha : float, optional 31 The alpha parameter in the elastic net penalty. Default is 1.0. 32 0.0 is the ridge penalty and 1.0 is the lasso penalty. 33 34 nlambda : int, optional 35 The number of lambda values to compute. Default is 100. 36 37 lambdau : float, optional 38 User-defined lambda value. Default is None. 39 40 standardize : bool, optional 41 Whether to standardize the predictors. Default is True. 42 43 thresh : float, optional 44 The convergence threshold. Default is 1e-07. 45 46 dfmax : float, optional 47 The maximum number of degrees of freedom. Default is 1e+10. 48 49 pmax : float, optional 50 The maximum number of predictors. Default is 1e+10. 51 52 exclude : array-like, optional 53 The indices of predictors to exclude. Default is None. 54 55 penalty_factor : array-like, optional 56 The penalty factors for each predictor. Default is None. 57 58 lower_lambdau : float, optional 59 The lower bound for the lambda values. Default is None. 60 61 upper_lambdau : float, optional 62 The upper bound for the lambda values. Default is None. 63 64 maxit : float, optional 65 The maximum number of iterations. Default is 1e+05. 66 67 type_measure : int, optional 68 The type of measure to use. Default is 1. 69 70 family : str, optional 71 The family of the response variable. Default is 'gaussian'. 72 Other options are 'binomial', 'multinomial', 'poisson', 'cox' and 'mgaussian'. 73 74 parallel : bool, optional 75 Whether to use parallel processing. Default is False. 76 77 ncores : int, optional 78 The number of cores to use. Default is -1. 79 80 verbose : bool, optional 81 Whether to print messages. Default is False. 82 83 """ 84 85 def __init__( 86 self, 87 weights=None, 88 alpha=1.0, 89 nlambda=100, 90 lambdau=None, 91 standardize=True, 92 thresh=1e-07, 93 dfmax=1e10, 94 pmax=1e10, 95 exclude=None, 96 penalty_factor=None, 97 lower_lambdau=None, 98 upper_lambdau=None, 99 maxit=1e05, 100 type_measure=1, 101 family="gaussian", 102 parallel=False, 103 ncores=-1, 104 verbose=False, 105 ): 106 self.weights = weights 107 self.alpha = alpha 108 self.nlambda = nlambda 109 self.lambdau = lambdau 110 self.standardize = standardize 111 self.thresh = thresh 112 self.dfmax = dfmax 113 self.pmax = pmax 114 self.exclude = exclude 115 self.penalty_factor = penalty_factor 116 self.lower_lambdau = lower_lambdau 117 self.upper_lambdau = upper_lambdau 118 self.maxit = maxit 119 self.type_measure = type_measure 120 self.family = family 121 self.parallel = parallel 122 self.ncores = ncores 123 self.verbose = verbose 124 self.model = None 125 self.s = None 126 self.coef_ = None 127 128 def fit(self, X, y, s=None, exact=False, **kwargs): 129 """ 130 Fit the model. 131 132 Parameters 133 ---------- 134 135 X : array-like 136 The predictor variables. 137 138 y : array-like 139 The response variable. 140 141 s : float, optional 142 The value of lambda at which extraction is made. Default is None. 143 144 exact : bool, optional 145 Whether to use exact lambda values. Default is False. 146 147 **kwargs : dict 148 Additional arguments to pass to glmnetCoef. 149 150 Returns 151 ------- 152 153 self : object 154 Returns the instance itself. 155 156 """ 157 self.model = glmnet( 158 x=X, 159 y=y, 160 weights=self.weights, 161 alpha=self.alpha, 162 nlambda=self.nlambda, 163 lambdau=self.lambdau, 164 standardize=self.standardize, 165 thresh=self.thresh, 166 dfmax=self.dfmax, 167 pmax=self.pmax, 168 exclude=self.exclude, 169 penalty_factor=self.penalty_factor, 170 lower_lambdau=self.lower_lambdau, 171 upper_lambdau=self.upper_lambdau, 172 maxit=self.maxit, 173 type_measure=self.type_measure, 174 family=self.family, 175 parallel=self.parallel, 176 ncores=self.ncores, 177 verbose=self.verbose, 178 ) 179 self.coef_ = glmnetCoef(self.model, s=s, exact=exact, **kwargs) 180 return self 181 182 def cvglmnet( 183 self, 184 X, 185 y, 186 family="gaussian", 187 ptype="default", 188 nfolds=10, 189 foldid=np.empty([0]), 190 parallel=1, 191 keep=False, 192 grouped=True, 193 **kwargs 194 ): 195 warnings.filterwarnings("ignore") 196 cvfit = cvglmnet( 197 x=X, 198 y=y, 199 family=family, 200 ptype=ptype, 201 nfolds=nfolds, 202 foldid=foldid, 203 parallel=parallel, 204 keep=keep, 205 grouped=grouped, 206 **kwargs 207 ) 208 209 warnings.filterwarnings("default") 210 best_lambda = cvfit["lambda_min"][0] 211 best_lambda_1se = cvfit["lambda_1se"][0] 212 best_coef = cvglmnetCoef(cvfit, s=best_lambda).ravel() 213 DescribeResult = namedtuple( 214 "DescribeResult", 215 ["model", "cvfit", "lambda_min", "lambda_1se", "best_coef"], 216 ) 217 glmnet_obj = GLMNet() 218 glmnet_obj.model = cvfit 219 glmnet_obj.coef_ = best_coef 220 return DescribeResult( 221 glmnet_obj, cvfit, best_lambda, best_lambda_1se, best_coef 222 ) 223 224 def get_coef(self, s=None, exact=False): 225 """ 226 Get the coefficients. 227 228 Parameters 229 ---------- 230 231 s : float, optional 232 The value of lambda at which extraction is made. Default is None. 233 234 exact : bool, optional 235 Whether to use exact lambda values or not. Default is False. 236 237 Returns 238 ------- 239 240 coef : array-like 241 The coefficients. 242 """ 243 if s is None: 244 return self.coef_ 245 assert self.model is not None, "Model not fitted yet." 246 return glmnetCoef(self.model, s=s, exact=exact) 247 248 def print(self): 249 """ 250 Print the model's characteristics. 251 """ 252 return glmnetPrint(self.model) 253 254 def plot(self, xvar="lambda", label=True): 255 """ 256 Plot the model's coefficients. 257 258 Parameters 259 ---------- 260 261 xvar : str, optional 262 The variable to plot ("norm" for the L1 norm of coefficients, 263 "lambda" for the log-lambda value or "dev" for percentage of 264 deviance explained). Default is "lambda". 265 266 label : bool, optional 267 Whether to label the plot. Default is True. 268 """ 269 assert xvar in ("norm", "lambda", "dev"), "Invalid input for xvar." 270 return glmnetPlot(self.model, xvar=xvar, label=label) 271 272 def predict(self, X, ptype="response", s=None, exact=False, **kwargs): 273 """ 274 Predict the response variable. 275 276 Parameters 277 ---------- 278 279 X : array-like 280 The predictor variables. 281 282 ptype : str 283 The type of prediction to make. 284 "response" the sames as "link" for "gaussian" family. 285 "class" returns the class label. 286 "coefficients" computes the coefficients at values of s 287 "nonzero" retuns a list of the indices of the nonzero coefficients for each value of s. 288 Default is "response". 289 290 s : float 291 The value of lambda at which extraction is made. Default is None. 292 293 exact : bool, optional 294 Whether to use exact lambda values or not. Default is False. 295 296 **kwargs : dict 297 Additional arguments 298 """ 299 if s is None or self.s is None: 300 self.s = 0.1 301 else: 302 self.s = s 303 assert ptype in ( 304 "response", 305 "coefficients", 306 "class", 307 "nonzero", 308 ), "Invalid input for ptype." 309 if np.isscalar(self.s) or len(self.s) == 1: 310 res = glmnetPredict( 311 self.model, 312 X, 313 ptype=ptype, 314 s=np.asarray([self.s, 0.1]), 315 exact=exact, 316 **kwargs 317 ) 318 try: 319 return res[:, 0] 320 except IndexError: 321 return res[range(X.shape[0])] 322 except ValueError: 323 return res 324 return glmnetPredict( 325 self.model, X, ptype=ptype, s=self.s, exact=exact, **kwargs 326 ) 327 328 def predict_proba(self, X): 329 return self.model.predict_proba(X) 330 331 def predict_log_proba(self, X): 332 return self.model.predict_log_proba(X) 333 334 def score(self, X, y): 335 return self.model.score(X, y)
A sklearn-style wrapper for the glmnet package.
More details about GLMNet can be found at:
https://glmnet.stanford.edu/articles/glmnet.html
Parameters
alpha : float, optional The alpha parameter in the elastic net penalty. Default is 1.0. 0.0 is the ridge penalty and 1.0 is the lasso penalty.
nlambda : int, optional The number of lambda values to compute. Default is 100.
lambdau : float, optional User-defined lambda value. Default is None.
standardize : bool, optional Whether to standardize the predictors. Default is True.
thresh : float, optional The convergence threshold. Default is 1e-07.
dfmax : float, optional The maximum number of degrees of freedom. Default is 1e+10.
pmax : float, optional The maximum number of predictors. Default is 1e+10.
exclude : array-like, optional The indices of predictors to exclude. Default is None.
penalty_factor : array-like, optional The penalty factors for each predictor. Default is None.
lower_lambdau : float, optional The lower bound for the lambda values. Default is None.
upper_lambdau : float, optional The upper bound for the lambda values. Default is None.
maxit : float, optional The maximum number of iterations. Default is 1e+05.
type_measure : int, optional The type of measure to use. Default is 1.
family : str, optional The family of the response variable. Default is 'gaussian'. Other options are 'binomial', 'multinomial', 'poisson', 'cox' and 'mgaussian'.
parallel : bool, optional Whether to use parallel processing. Default is False.
ncores : int, optional The number of cores to use. Default is -1.
verbose : bool, optional Whether to print messages. Default is False.
128 def fit(self, X, y, s=None, exact=False, **kwargs): 129 """ 130 Fit the model. 131 132 Parameters 133 ---------- 134 135 X : array-like 136 The predictor variables. 137 138 y : array-like 139 The response variable. 140 141 s : float, optional 142 The value of lambda at which extraction is made. Default is None. 143 144 exact : bool, optional 145 Whether to use exact lambda values. Default is False. 146 147 **kwargs : dict 148 Additional arguments to pass to glmnetCoef. 149 150 Returns 151 ------- 152 153 self : object 154 Returns the instance itself. 155 156 """ 157 self.model = glmnet( 158 x=X, 159 y=y, 160 weights=self.weights, 161 alpha=self.alpha, 162 nlambda=self.nlambda, 163 lambdau=self.lambdau, 164 standardize=self.standardize, 165 thresh=self.thresh, 166 dfmax=self.dfmax, 167 pmax=self.pmax, 168 exclude=self.exclude, 169 penalty_factor=self.penalty_factor, 170 lower_lambdau=self.lower_lambdau, 171 upper_lambdau=self.upper_lambdau, 172 maxit=self.maxit, 173 type_measure=self.type_measure, 174 family=self.family, 175 parallel=self.parallel, 176 ncores=self.ncores, 177 verbose=self.verbose, 178 ) 179 self.coef_ = glmnetCoef(self.model, s=s, exact=exact, **kwargs) 180 return self
Fit the model.
Parameters
X : array-like The predictor variables.
y : array-like The response variable.
s : float, optional The value of lambda at which extraction is made. Default is None.
exact : bool, optional Whether to use exact lambda values. Default is False.
**kwargs : dict Additional arguments to pass to glmnetCoef.
Returns
self : object Returns the instance itself.
272 def predict(self, X, ptype="response", s=None, exact=False, **kwargs): 273 """ 274 Predict the response variable. 275 276 Parameters 277 ---------- 278 279 X : array-like 280 The predictor variables. 281 282 ptype : str 283 The type of prediction to make. 284 "response" the sames as "link" for "gaussian" family. 285 "class" returns the class label. 286 "coefficients" computes the coefficients at values of s 287 "nonzero" retuns a list of the indices of the nonzero coefficients for each value of s. 288 Default is "response". 289 290 s : float 291 The value of lambda at which extraction is made. Default is None. 292 293 exact : bool, optional 294 Whether to use exact lambda values or not. Default is False. 295 296 **kwargs : dict 297 Additional arguments 298 """ 299 if s is None or self.s is None: 300 self.s = 0.1 301 else: 302 self.s = s 303 assert ptype in ( 304 "response", 305 "coefficients", 306 "class", 307 "nonzero", 308 ), "Invalid input for ptype." 309 if np.isscalar(self.s) or len(self.s) == 1: 310 res = glmnetPredict( 311 self.model, 312 X, 313 ptype=ptype, 314 s=np.asarray([self.s, 0.1]), 315 exact=exact, 316 **kwargs 317 ) 318 try: 319 return res[:, 0] 320 except IndexError: 321 return res[range(X.shape[0])] 322 except ValueError: 323 return res 324 return glmnetPredict( 325 self.model, X, ptype=ptype, s=self.s, exact=exact, **kwargs 326 )
Predict the response variable.
Parameters
X : array-like The predictor variables.
ptype : str The type of prediction to make. "response" the sames as "link" for "gaussian" family. "class" returns the class label. "coefficients" computes the coefficients at values of s "nonzero" retuns a list of the indices of the nonzero coefficients for each value of s. Default is "response".
s : float The value of lambda at which extraction is made. Default is None.
exact : bool, optional Whether to use exact lambda values or not. Default is False.
**kwargs : dict Additional arguments
Return the coefficient of determination of the prediction.
The coefficient of determination \( R^2 \) is defined as
\( (1 - \frac{u}{v}) \), where \( u \) is the residual
sum of squares ((y_true - y_pred)** 2).sum() and \( v \)
is the total sum of squares ((y_true - y_true.mean()) ** 2).sum().
The best possible score is 1.0 and it can be negative (because the
model can be arbitrarily worse). A constant model that always predicts
the expected value of y, disregarding the input features, would get
a \( R^2 \) score of 0.0.
Parameters
X : array-like of shape (n_samples, n_features)
Test samples. For some estimators this may be a precomputed
kernel matrix or a list of generic objects instead with shape
(n_samples, n_samples_fitted), where n_samples_fitted
is the number of samples used in the fitting for the estimator.
y : array-like of shape (n_samples,) or (n_samples, n_outputs)
True values for X.
sample_weight : array-like of shape (n_samples,), default=None Sample weights.
Returns
score : float
\( R^2 \) of self.predict(X) w.r.t. y.
Notes
The \( R^2 \) score used when calling score on a regressor uses
multioutput='uniform_average' from version 0.23 to keep consistent
with default value of ~sklearn.metrics.r2_score().
This influences the score method of all the multioutput
regressors (except for
~sklearn.multioutput.MultiOutputRegressor).