genbooster
1from .genboosterregressor import BoosterRegressor 2from .genboosterclassifier import BoosterClassifier 3from .randombagregressor import RandomBagRegressor 4from .randombagclassifier import RandomBagClassifier 5from .regressionmodels import LinfaRegressor 6from .adaboostclassifier import AdaBoostClassifier 7from .adaboostregressor import AdaBoostRegressor 8from .rust_core import RustBooster, Regressor 9 10 11__all__ = ["BoosterRegressor", "BoosterClassifier", 12 "RandomBagRegressor", "RandomBagClassifier", 13 "RustBooster", "Regressor", "LinfaRegressor", 14 "AdaBoostClassifier", "AdaBoostRegressor"]
14class BoosterRegressor(BaseEstimator, RegressorMixin): 15 """Generic Gradient Boosting Regressor (for any base learner). 16 17 Parameters: 18 19 base_estimator: Base learner to use for the booster. 20 21 n_estimators: Number of boosting stages to perform. 22 23 learning_rate: Learning rate shrinks the contribution of each estimator. 24 25 n_hidden_features: Number of hidden features to use for the base learner. 26 27 direct_link: Whether to use direct link for the base learner or not. 28 29 weights_distribution: Distribution of the weights for the booster (uniform or normal). 30 31 dropout: Dropout rate. 32 33 tolerance: Tolerance for early stopping. 34 35 random_state: Random state. 36 37 Attributes: 38 39 base_estimator_: The base learner. 40 41 booster_: The boosting model. 42 43 y_mean_: Mean of the target variable. 44 45 Examples: 46 47 See https://github.com/Techtonique/genbooster/tree/main/examples 48 49 """ 50 51 def __init__( 52 self, 53 base_estimator: Optional[BaseEstimator] = None, 54 n_estimators: int = 100, 55 learning_rate: float = 0.1, 56 n_hidden_features: int = 5, 57 direct_link: bool = True, 58 weights_distribution: str = 'uniform', 59 dropout: float = 0.0, 60 tolerance: float = 1e-4, 61 random_state: Optional[int] = 42 62 ): 63 self.base_estimator = base_estimator 64 if base_estimator is None: 65 self.base_estimator = ExtraTreeRegressor() 66 else: 67 self.base_estimator = base_estimator 68 self.n_estimators = n_estimators 69 self.learning_rate = learning_rate 70 self.n_hidden_features = n_hidden_features 71 self.direct_link = direct_link 72 self.weights_distribution = weights_distribution 73 self.dropout = dropout 74 self.tolerance = tolerance 75 self.random_state = random_state 76 self.scaler_ = StandardScaler() 77 self.y_mean_ = None 78 79 def fit(self, X, y) -> "BoosterRegressor": 80 """Fit the boosting model. 81 82 Parameters: 83 84 X: Input data. 85 86 y: Target data. 87 88 Returns: 89 90 self: The fitted boosting model. 91 """ 92 # Set random seed if provided 93 if self.random_state is not None: 94 # Convert to int for Python's random.seed 95 seed_int = int(abs(self.random_state)) 96 # Set Python RNG seeds 97 np.random.seed(seed_int) 98 random.seed(seed_int) 99 if hasattr(self.base_estimator, "random_state"): 100 self.base_estimator.random_state = seed_int 101 # Convert to u64 for Rust 102 seed = np.uint64(seed_int) 103 else: 104 # Use a random seed if none provided 105 seed_int = np.random.randint(0, 2**31 - 1) 106 np.random.seed(seed_int) 107 random.seed(seed_int) 108 seed = np.uint64(seed_int) 109 110 # Convert to numpy arrays and ensure float64 dtype with C-contiguous memory layout 111 X = np.array(X, dtype=np.float64, copy=True, order='C') 112 y = np.array(y, dtype=np.float64, copy=True, order='C') 113 114 # Scale X and force conversion to numpy array 115 scaled_X = np.array(self.scaler_.fit_transform(X), dtype=np.float64, copy=True, order='C') 116 117 self.y_mean_ = float(np.mean(y)) 118 centered_y = np.array(y - self.y_mean_, dtype=np.float64, copy=True, order='C') 119 120 # Ensure y is 1D array 121 if centered_y.ndim == 2: 122 if centered_y.shape[1] != 1: 123 raise ValueError("y must have shape (n_samples,) or (n_samples, 1)") 124 centered_y = centered_y.ravel() # Convert to 1D array 125 126 # Use Ridge as default base estimator if none provided 127 if self.base_estimator is None: 128 self.base_estimator_ = Ridge() 129 else: 130 self.base_estimator_ = self.base_estimator 131 132 # Initialize Rust booster 133 self.booster_ = _RustBooster( 134 self.base_estimator_, 135 self.n_estimators, 136 self.learning_rate, 137 self.n_hidden_features, 138 self.direct_link, 139 weights_distribution=self.weights_distribution, 140 tolerance=self.tolerance 141 ) 142 143 # Fit the model 144 self.booster_.fit_boosting( 145 scaled_X, 146 centered_y, 147 dropout=self.dropout, 148 seed=seed 149 ) 150 return self 151 152 def predict(self, X) -> np.ndarray: 153 """Make predictions with the boosting model. 154 155 Parameters: 156 157 X: Input data. 158 159 Returns: 160 161 preds: Predictions. 162 """ 163 if isinstance(X, pd.DataFrame): 164 X = X.values 165 scaled_X = self.scaler_.transform(X) 166 return self.booster_.predict_boosting(scaled_X) + self.y_mean_
Generic Gradient Boosting Regressor (for any base learner).
Parameters:
base_estimator: Base learner to use for the booster.
n_estimators: Number of boosting stages to perform.
learning_rate: Learning rate shrinks the contribution of each estimator.
n_hidden_features: Number of hidden features to use for the base learner.
direct_link: Whether to use direct link for the base learner or not.
weights_distribution: Distribution of the weights for the booster (uniform or normal).
dropout: Dropout rate.
tolerance: Tolerance for early stopping.
random_state: Random state.
Attributes:
base_estimator_: The base learner.
booster_: The boosting model.
y_mean_: Mean of the target variable.
Examples:
See https://github.com/Techtonique/genbooster/tree/main/examples
79 def fit(self, X, y) -> "BoosterRegressor": 80 """Fit the boosting model. 81 82 Parameters: 83 84 X: Input data. 85 86 y: Target data. 87 88 Returns: 89 90 self: The fitted boosting model. 91 """ 92 # Set random seed if provided 93 if self.random_state is not None: 94 # Convert to int for Python's random.seed 95 seed_int = int(abs(self.random_state)) 96 # Set Python RNG seeds 97 np.random.seed(seed_int) 98 random.seed(seed_int) 99 if hasattr(self.base_estimator, "random_state"): 100 self.base_estimator.random_state = seed_int 101 # Convert to u64 for Rust 102 seed = np.uint64(seed_int) 103 else: 104 # Use a random seed if none provided 105 seed_int = np.random.randint(0, 2**31 - 1) 106 np.random.seed(seed_int) 107 random.seed(seed_int) 108 seed = np.uint64(seed_int) 109 110 # Convert to numpy arrays and ensure float64 dtype with C-contiguous memory layout 111 X = np.array(X, dtype=np.float64, copy=True, order='C') 112 y = np.array(y, dtype=np.float64, copy=True, order='C') 113 114 # Scale X and force conversion to numpy array 115 scaled_X = np.array(self.scaler_.fit_transform(X), dtype=np.float64, copy=True, order='C') 116 117 self.y_mean_ = float(np.mean(y)) 118 centered_y = np.array(y - self.y_mean_, dtype=np.float64, copy=True, order='C') 119 120 # Ensure y is 1D array 121 if centered_y.ndim == 2: 122 if centered_y.shape[1] != 1: 123 raise ValueError("y must have shape (n_samples,) or (n_samples, 1)") 124 centered_y = centered_y.ravel() # Convert to 1D array 125 126 # Use Ridge as default base estimator if none provided 127 if self.base_estimator is None: 128 self.base_estimator_ = Ridge() 129 else: 130 self.base_estimator_ = self.base_estimator 131 132 # Initialize Rust booster 133 self.booster_ = _RustBooster( 134 self.base_estimator_, 135 self.n_estimators, 136 self.learning_rate, 137 self.n_hidden_features, 138 self.direct_link, 139 weights_distribution=self.weights_distribution, 140 tolerance=self.tolerance 141 ) 142 143 # Fit the model 144 self.booster_.fit_boosting( 145 scaled_X, 146 centered_y, 147 dropout=self.dropout, 148 seed=seed 149 ) 150 return self
Fit the boosting model.
Parameters:
X: Input data.
y: Target data.
Returns:
self: The fitted boosting model.
152 def predict(self, X) -> np.ndarray: 153 """Make predictions with the boosting model. 154 155 Parameters: 156 157 X: Input data. 158 159 Returns: 160 161 preds: Predictions. 162 """ 163 if isinstance(X, pd.DataFrame): 164 X = X.values 165 scaled_X = self.scaler_.transform(X) 166 return self.booster_.predict_boosting(scaled_X) + self.y_mean_
Make predictions with the boosting model.
Parameters:
X: Input data.
Returns:
preds: Predictions.
14class BoosterClassifier(BaseEstimator, ClassifierMixin): 15 """Generic Gradient Boosting Classifier (for any base learner). 16 17 Parameters: 18 19 base_estimator: Base learner to use for the booster. Default is ExtraTreeRegressor. 20 21 n_estimators: Number of boosting stages to perform. 22 23 learning_rate: Learning rate shrinks the contribution of each estimator. 24 25 n_hidden_features: Number of hidden features to use for the base learner. 26 27 direct_link: Whether to use direct link for the base learner or not. 28 29 weights_distribution: Distribution of the weights for the booster (uniform or normal). 30 31 dropout: Dropout rate. 32 33 tolerance: Tolerance for early stopping. 34 35 random_state: Random state. 36 37 Attributes: 38 39 classes_: The classes of the target variable. 40 41 n_classes_: The number of classes of the target variable. 42 43 boosters_: Base learners. 44 45 Examples: 46 47 See https://github.com/Techtonique/genbooster/tree/main/examples 48 49 """ 50 51 def __init__(self, 52 base_estimator: Optional[BaseEstimator] = None, 53 n_estimators: int = 100, 54 learning_rate: float = 0.1, 55 n_hidden_features: int = 5, 56 direct_link: bool = True, 57 weights_distribution: str = 'uniform', 58 dropout: float = 0.0, 59 tolerance: float = 1e-4, 60 random_state: Optional[int] = 42): 61 if base_estimator is None: 62 self.base_estimator = ExtraTreeRegressor() 63 else: 64 self.base_estimator = base_estimator 65 self.n_estimators = n_estimators 66 self.learning_rate = learning_rate 67 self.n_hidden_features = n_hidden_features 68 self.direct_link = direct_link 69 self.weights_distribution = weights_distribution 70 self.dropout = dropout 71 self.tolerance = tolerance 72 self.random_state = random_state 73 self.boosters_ = [] 74 75 def fit(self, X, y) -> "BoosterClassifier": 76 """Fit the booster model. 77 78 Parameters: 79 X: Input data. 80 y: Target data. 81 82 Returns: 83 self: The fitted boosting model. 84 """ 85 # Get unique classes and one-hot encode 86 self.classes_ = np.unique(y) 87 self.n_classes_ = len(self.classes_) 88 Y = one_hot_encode2(y, self.n_classes_) 89 90 # Train one booster per class 91 for i in range(self.n_classes_): 92 booster = BoosterRegressor( 93 base_estimator=self.base_estimator, 94 n_estimators=self.n_estimators, 95 learning_rate=self.learning_rate, 96 n_hidden_features=self.n_hidden_features, 97 direct_link=self.direct_link, 98 weights_distribution=self.weights_distribution, 99 tolerance=self.tolerance, 100 dropout=self.dropout, 101 random_state=self.random_state 102 ) 103 104 # Convert X and y to the right format without reshaping y 105 X_arr = np.asarray(X.values if hasattr(X, 'values') else X, dtype=np.float64) 106 y_arr = np.asarray(Y[:, i], dtype=np.float64) 107 108 # Fit the booster 109 booster.fit(X=X_arr, y=y_arr) 110 self.boosters_.append(booster) 111 112 return self 113 114 def predict(self, X) -> np.ndarray: 115 """Make predictions with the boosting model. 116 117 Parameters: 118 119 X: Input data. 120 121 Returns: 122 123 preds: Class predictions. 124 """ 125 if isinstance(X, pd.DataFrame): 126 X = X.values 127 preds_proba = self.predict_proba(X) 128 return np.argmax(preds_proba, axis=0) 129 130 def predict_proba(self, X) -> np.ndarray: 131 """Make probability predictions with the boosting model. 132 133 Parameters: 134 135 X: Input data. 136 137 Returns: 138 139 preds: Probability predictions. 140 """ 141 if isinstance(X, pd.DataFrame): 142 X = X.values 143 raw_preds = np.asarray([booster.predict(X) for booster in self.boosters_]) 144 shifted_preds = raw_preds - np.max(raw_preds, axis=0) 145 exp_preds = np.exp(shifted_preds) 146 return exp_preds / np.sum(exp_preds, axis=0)
Generic Gradient Boosting Classifier (for any base learner).
Parameters:
base_estimator: Base learner to use for the booster. Default is ExtraTreeRegressor.
n_estimators: Number of boosting stages to perform.
learning_rate: Learning rate shrinks the contribution of each estimator.
n_hidden_features: Number of hidden features to use for the base learner.
direct_link: Whether to use direct link for the base learner or not.
weights_distribution: Distribution of the weights for the booster (uniform or normal).
dropout: Dropout rate.
tolerance: Tolerance for early stopping.
random_state: Random state.
Attributes:
classes_: The classes of the target variable.
n_classes_: The number of classes of the target variable.
boosters_: Base learners.
Examples:
See https://github.com/Techtonique/genbooster/tree/main/examples
75 def fit(self, X, y) -> "BoosterClassifier": 76 """Fit the booster model. 77 78 Parameters: 79 X: Input data. 80 y: Target data. 81 82 Returns: 83 self: The fitted boosting model. 84 """ 85 # Get unique classes and one-hot encode 86 self.classes_ = np.unique(y) 87 self.n_classes_ = len(self.classes_) 88 Y = one_hot_encode2(y, self.n_classes_) 89 90 # Train one booster per class 91 for i in range(self.n_classes_): 92 booster = BoosterRegressor( 93 base_estimator=self.base_estimator, 94 n_estimators=self.n_estimators, 95 learning_rate=self.learning_rate, 96 n_hidden_features=self.n_hidden_features, 97 direct_link=self.direct_link, 98 weights_distribution=self.weights_distribution, 99 tolerance=self.tolerance, 100 dropout=self.dropout, 101 random_state=self.random_state 102 ) 103 104 # Convert X and y to the right format without reshaping y 105 X_arr = np.asarray(X.values if hasattr(X, 'values') else X, dtype=np.float64) 106 y_arr = np.asarray(Y[:, i], dtype=np.float64) 107 108 # Fit the booster 109 booster.fit(X=X_arr, y=y_arr) 110 self.boosters_.append(booster) 111 112 return self
Fit the booster model.
Parameters: X: Input data. y: Target data.
Returns: self: The fitted boosting model.
114 def predict(self, X) -> np.ndarray: 115 """Make predictions with the boosting model. 116 117 Parameters: 118 119 X: Input data. 120 121 Returns: 122 123 preds: Class predictions. 124 """ 125 if isinstance(X, pd.DataFrame): 126 X = X.values 127 preds_proba = self.predict_proba(X) 128 return np.argmax(preds_proba, axis=0)
Make predictions with the boosting model.
Parameters:
X: Input data.
Returns:
preds: Class predictions.
130 def predict_proba(self, X) -> np.ndarray: 131 """Make probability predictions with the boosting model. 132 133 Parameters: 134 135 X: Input data. 136 137 Returns: 138 139 preds: Probability predictions. 140 """ 141 if isinstance(X, pd.DataFrame): 142 X = X.values 143 raw_preds = np.asarray([booster.predict(X) for booster in self.boosters_]) 144 shifted_preds = raw_preds - np.max(raw_preds, axis=0) 145 exp_preds = np.exp(shifted_preds) 146 return exp_preds / np.sum(exp_preds, axis=0)
Make probability predictions with the boosting model.
Parameters:
X: Input data.
Returns:
preds: Probability predictions.
11class RandomBagRegressor(BaseEstimator, RegressorMixin): 12 """Generic Random Bagging Regressor (for any base learner). 13 14 Parameters: 15 16 base_estimator: Base learner to use for the booster. 17 18 n_estimators: Number of boosting stages to perform. 19 20 learning_rate: Learning rate shrinks the contribution of each estimator. 21 22 n_hidden_features: Number of hidden features to use for the base learner. 23 24 direct_link: Whether to use direct link for the base learner or not. 25 26 weights_distribution: Distribution of the weights for the booster (uniform or normal). 27 28 dropout: Dropout rate. 29 30 random_state: Random state. 31 32 Attributes: 33 34 baggers_: The bagging learners. 35 36 y_mean_: The mean of the target variable. 37 38 Examples: 39 40 See https://github.com/Techtonique/genbooster/tree/main/examples 41 42 """ 43 44 def __init__( 45 self, 46 base_estimator: Optional[BaseEstimator] = None, 47 n_estimators: int = 100, 48 learning_rate: float = 0.01, 49 n_hidden_features: int = 5, 50 direct_link: bool = True, 51 weights_distribution: str = 'uniform', 52 dropout: float = 0.0, 53 random_state: Optional[int] = 42 54 ): 55 self.base_estimator = base_estimator 56 self.n_estimators = n_estimators 57 self.learning_rate = learning_rate 58 self.n_hidden_features = n_hidden_features 59 self.direct_link = direct_link 60 self.weights_distribution = weights_distribution 61 self.dropout = dropout 62 self.random_state = random_state 63 self.scaler_ = StandardScaler() 64 self.y_mean_ = None 65 66 def fit(self, X, y) -> "RandomBagRegressor": 67 """Fit the bagging model. 68 69 Parameters: 70 71 X: Input data. 72 73 y: Target data. 74 75 Returns: 76 77 self: The fitted booster model. 78 """ 79 if isinstance(X, pd.DataFrame): 80 X = X.values 81 if isinstance(y, pd.DataFrame): 82 y = y.values 83 scaled_X = self.scaler_.fit_transform(X) 84 self.y_mean_ = np.mean(y) 85 centered_y = y - self.y_mean_ 86 # Use Ridge as default base estimator if none provided 87 if self.base_estimator is None: 88 self.base_estimator_ = Ridge() 89 else: 90 self.base_estimator_ = self.base_estimator 91 # Initialize Rust booster 92 self.booster_ = _RustBooster( 93 self.base_estimator_, 94 self.n_estimators, 95 self.learning_rate, 96 self.n_hidden_features, 97 self.direct_link, 98 weights_distribution=self.weights_distribution 99 ) 100 # Fit the model 101 self.booster_.fit_bagging( 102 np.asarray(scaled_X, dtype=np.float64), 103 np.asarray(centered_y, dtype=np.float64), 104 dropout=self.dropout, 105 seed=self.random_state if self.random_state is not None else 42 106 ) 107 return self 108 109 def predict(self, X) -> np.ndarray: 110 """Make predictions with the bagging model. 111 112 Parameters: 113 114 X: Input data. 115 116 Returns: 117 118 preds: Predictions. 119 """ 120 if isinstance(X, pd.DataFrame): 121 X = X.values 122 scaled_X = self.scaler_.transform(X) 123 return self.booster_.predict_bagging(scaled_X) + self.y_mean_
Generic Random Bagging Regressor (for any base learner).
Parameters:
base_estimator: Base learner to use for the booster.
n_estimators: Number of boosting stages to perform.
learning_rate: Learning rate shrinks the contribution of each estimator.
n_hidden_features: Number of hidden features to use for the base learner.
direct_link: Whether to use direct link for the base learner or not.
weights_distribution: Distribution of the weights for the booster (uniform or normal).
dropout: Dropout rate.
random_state: Random state.
Attributes:
baggers_: The bagging learners.
y_mean_: The mean of the target variable.
Examples:
See https://github.com/Techtonique/genbooster/tree/main/examples
66 def fit(self, X, y) -> "RandomBagRegressor": 67 """Fit the bagging model. 68 69 Parameters: 70 71 X: Input data. 72 73 y: Target data. 74 75 Returns: 76 77 self: The fitted booster model. 78 """ 79 if isinstance(X, pd.DataFrame): 80 X = X.values 81 if isinstance(y, pd.DataFrame): 82 y = y.values 83 scaled_X = self.scaler_.fit_transform(X) 84 self.y_mean_ = np.mean(y) 85 centered_y = y - self.y_mean_ 86 # Use Ridge as default base estimator if none provided 87 if self.base_estimator is None: 88 self.base_estimator_ = Ridge() 89 else: 90 self.base_estimator_ = self.base_estimator 91 # Initialize Rust booster 92 self.booster_ = _RustBooster( 93 self.base_estimator_, 94 self.n_estimators, 95 self.learning_rate, 96 self.n_hidden_features, 97 self.direct_link, 98 weights_distribution=self.weights_distribution 99 ) 100 # Fit the model 101 self.booster_.fit_bagging( 102 np.asarray(scaled_X, dtype=np.float64), 103 np.asarray(centered_y, dtype=np.float64), 104 dropout=self.dropout, 105 seed=self.random_state if self.random_state is not None else 42 106 ) 107 return self
Fit the bagging model.
Parameters:
X: Input data.
y: Target data.
Returns:
self: The fitted booster model.
109 def predict(self, X) -> np.ndarray: 110 """Make predictions with the bagging model. 111 112 Parameters: 113 114 X: Input data. 115 116 Returns: 117 118 preds: Predictions. 119 """ 120 if isinstance(X, pd.DataFrame): 121 X = X.values 122 scaled_X = self.scaler_.transform(X) 123 return self.booster_.predict_bagging(scaled_X) + self.y_mean_
Make predictions with the bagging model.
Parameters:
X: Input data.
Returns:
preds: Predictions.
12class RandomBagClassifier(BaseEstimator, ClassifierMixin): 13 """Generic Random Bagging Classifier (for any base learner). 14 15 Parameters: 16 17 base_estimator: Base learner to use for the booster. 18 19 n_estimators: Number of boosting stages to perform. 20 21 learning_rate: Learning rate shrinks the contribution of each estimator. 22 23 n_hidden_features: Number of hidden features to use for the base learner. 24 25 direct_link: Whether to use direct link for the base learner or not. 26 27 weights_distribution: Distribution of the weights for the booster (uniform or normal). 28 29 dropout: Dropout rate. 30 31 random_state: Random state. 32 33 Attributes: 34 35 baggers_: The bagging learners. 36 37 classes_: The classes of the target variable. 38 39 n_classes_: The number of classes of the target variable. 40 41 Examples: 42 43 See https://github.com/Techtonique/genbooster/tree/main/examples 44 45 """ 46 47 def __init__(self, 48 base_estimator: Optional[BaseEstimator] = None, 49 n_estimators: int = 100, 50 learning_rate: float = 0.01, 51 n_hidden_features: int = 5, 52 direct_link: bool = True, 53 weights_distribution: str = 'uniform', 54 dropout: float = 0.0, 55 random_state: Optional[int] = 42): 56 if base_estimator is None: 57 self.base_estimator = Ridge() 58 else: 59 self.base_estimator = base_estimator 60 self.n_estimators = n_estimators 61 self.learning_rate = learning_rate 62 self.n_hidden_features = n_hidden_features 63 self.direct_link = direct_link 64 self.weights_distribution = weights_distribution 65 self.dropout = dropout 66 self.random_state = random_state 67 self.y_mean_ = None 68 self.boosters_ = None 69 70 def fit(self, X, y) -> "RandomBagClassifier": 71 """Fit the bagging model. 72 73 Parameters: 74 75 X: Input data. 76 77 y: Target data. 78 79 Returns: 80 81 self: The fitted bagging model. 82 """ 83 if isinstance(X, pd.DataFrame): 84 X = X.values 85 if isinstance(y, pd.DataFrame): 86 y = y.values 87 y = np.asarray([int(x) for x in y]).ravel() 88 Y = OneHotEncoder().fit_transform(y.reshape(-1, 1)).toarray() 89 self.classes_ = np.unique(y) 90 self.n_classes_ = len(self.classes_) 91 92 # Store the results of the list comprehension 93 self.boosters_ = [] 94 for i in range(self.n_classes_): 95 booster = _RustBooster( 96 self.base_estimator, 97 self.n_estimators, 98 self.learning_rate, 99 self.n_hidden_features, 100 self.direct_link, 101 weights_distribution=self.weights_distribution 102 ) 103 booster.fit_bagging(X, Y[:, i], dropout=self.dropout, seed=self.random_state) 104 self.boosters_.append(booster) 105 return self 106 107 def predict(self, X) -> np.ndarray: 108 """Make predictions with the bagging model. 109 110 Parameters: 111 112 X: Input data. 113 114 Returns: 115 116 preds: Class predictions. 117 """ 118 if isinstance(X, pd.DataFrame): 119 X = X.values 120 preds_proba = self.predict_proba(X) 121 return np.argmax(preds_proba, axis=0) 122 123 def predict_proba(self, X) -> np.ndarray: 124 """Make probability predictions with the booster model. 125 126 Parameters: 127 128 X: Input data. 129 130 Returns: 131 132 preds: Probability predictions. 133 """ 134 if isinstance(X, pd.DataFrame): 135 X = X.values 136 raw_preds = np.asarray([booster.predict_bagging(X) for booster in self.boosters_]) 137 shifted_preds = raw_preds - np.max(raw_preds, axis=0) 138 exp_preds = np.exp(shifted_preds) 139 return exp_preds / np.sum(exp_preds, axis=0)
Generic Random Bagging Classifier (for any base learner).
Parameters:
base_estimator: Base learner to use for the booster.
n_estimators: Number of boosting stages to perform.
learning_rate: Learning rate shrinks the contribution of each estimator.
n_hidden_features: Number of hidden features to use for the base learner.
direct_link: Whether to use direct link for the base learner or not.
weights_distribution: Distribution of the weights for the booster (uniform or normal).
dropout: Dropout rate.
random_state: Random state.
Attributes:
baggers_: The bagging learners.
classes_: The classes of the target variable.
n_classes_: The number of classes of the target variable.
Examples:
See https://github.com/Techtonique/genbooster/tree/main/examples
70 def fit(self, X, y) -> "RandomBagClassifier": 71 """Fit the bagging model. 72 73 Parameters: 74 75 X: Input data. 76 77 y: Target data. 78 79 Returns: 80 81 self: The fitted bagging model. 82 """ 83 if isinstance(X, pd.DataFrame): 84 X = X.values 85 if isinstance(y, pd.DataFrame): 86 y = y.values 87 y = np.asarray([int(x) for x in y]).ravel() 88 Y = OneHotEncoder().fit_transform(y.reshape(-1, 1)).toarray() 89 self.classes_ = np.unique(y) 90 self.n_classes_ = len(self.classes_) 91 92 # Store the results of the list comprehension 93 self.boosters_ = [] 94 for i in range(self.n_classes_): 95 booster = _RustBooster( 96 self.base_estimator, 97 self.n_estimators, 98 self.learning_rate, 99 self.n_hidden_features, 100 self.direct_link, 101 weights_distribution=self.weights_distribution 102 ) 103 booster.fit_bagging(X, Y[:, i], dropout=self.dropout, seed=self.random_state) 104 self.boosters_.append(booster) 105 return self
Fit the bagging model.
Parameters:
X: Input data.
y: Target data.
Returns:
self: The fitted bagging model.
107 def predict(self, X) -> np.ndarray: 108 """Make predictions with the bagging model. 109 110 Parameters: 111 112 X: Input data. 113 114 Returns: 115 116 preds: Class predictions. 117 """ 118 if isinstance(X, pd.DataFrame): 119 X = X.values 120 preds_proba = self.predict_proba(X) 121 return np.argmax(preds_proba, axis=0)
Make predictions with the bagging model.
Parameters:
X: Input data.
Returns:
preds: Class predictions.
123 def predict_proba(self, X) -> np.ndarray: 124 """Make probability predictions with the booster model. 125 126 Parameters: 127 128 X: Input data. 129 130 Returns: 131 132 preds: Probability predictions. 133 """ 134 if isinstance(X, pd.DataFrame): 135 X = X.values 136 raw_preds = np.asarray([booster.predict_bagging(X) for booster in self.boosters_]) 137 shifted_preds = raw_preds - np.max(raw_preds, axis=0) 138 exp_preds = np.exp(shifted_preds) 139 return exp_preds / np.sum(exp_preds, axis=0)
Make probability predictions with the booster model.
Parameters:
X: Input data.
Returns:
preds: Probability predictions.
12class LinfaRegressor(BaseEstimator, RegressorMixin): 13 def __init__(self, model_name="LinearRegression"): 14 self.model_name = model_name 15 self.model = _Regressor(model_name=self.model_name) 16 17 def fit(self, X, y): 18 if isinstance(X, pd.DataFrame): 19 X = X.values 20 if isinstance(y, pd.DataFrame) or isinstance(y, pd.Series): 21 y = y.values 22 try: 23 self.model.fit(X, y) 24 except TypeError as e: 25 try: 26 self.model.fit(X, y.reshape(-1, 1)) 27 except TypeError as e: 28 try: 29 self.model.fit(X, y.reshape(1, -1)) 30 except TypeError as e: 31 try: 32 self.model.fit(X, y.ravel()) 33 except TypeError as e: 34 raise e 35 return self 36 37 def predict(self, X): 38 if isinstance(X, pd.DataFrame): 39 X = X.values 40 X = np.asarray(X, dtype=np.float64) 41 predictions = self.model.predict(X) 42 # Ensure 1D output 43 if predictions.ndim == 2: 44 predictions = predictions.ravel() 45 return predictions
Base class for all estimators in scikit-learn.
Inheriting from this class provides default implementations of:
- setting and getting parameters used by
GridSearchCVand friends; - textual and HTML representation displayed in terminals and IDEs;
- estimator serialization;
- parameters validation;
- data validation;
- feature names validation.
Read more in the :ref:User Guide <rolling_your_own_estimator>.
Notes
All estimators should specify all the parameters that can be set
at the class level in their __init__ as explicit keyword
arguments (no *args or **kwargs).
Examples
>>> import numpy as np
>>> from sklearn.base import BaseEstimator
>>> class MyEstimator(BaseEstimator):
... def __init__(self, *, param=1):
... self.param = param
... def fit(self, X, y=None):
... self.is_fitted_ = True
... return self
... def predict(self, X):
... return np.full(shape=X.shape[0], fill_value=self.param)
>>> estimator = MyEstimator(param=2)
>>> estimator.get_params()
{'param': 2}
>>> X = np.array([[1, 2], [2, 3], [3, 4]])
>>> y = np.array([1, 0, 1])
>>> estimator.fit(X, y).predict(X)
array([2, 2, 2])
>>> estimator.set_params(param=3).fit(X, y).predict(X)
array([3, 3, 3])
17 def fit(self, X, y): 18 if isinstance(X, pd.DataFrame): 19 X = X.values 20 if isinstance(y, pd.DataFrame) or isinstance(y, pd.Series): 21 y = y.values 22 try: 23 self.model.fit(X, y) 24 except TypeError as e: 25 try: 26 self.model.fit(X, y.reshape(-1, 1)) 27 except TypeError as e: 28 try: 29 self.model.fit(X, y.reshape(1, -1)) 30 except TypeError as e: 31 try: 32 self.model.fit(X, y.ravel()) 33 except TypeError as e: 34 raise e 35 return self
12class AdaBoostClassifier(BaseEstimator, ClassifierMixin): 13 """AdaBoost Classifier using AdaBoostRegressor as a multi-task learner. 14 15 Parameters: 16 17 base_estimator: Base learner to use for the booster. Default is ExtraTreeRegressor. 18 19 n_estimators: Number of boosting stages to perform. 20 21 learning_rate: Learning rate shrinks the contribution of each estimator. 22 23 n_hidden_features: Number of hidden features to use for the base learner. 24 25 direct_link: Whether to use direct link for the base learner or not. 26 27 weights_distribution: Distribution of the weights for the booster (uniform or normal). 28 29 dropout: Dropout rate. 30 31 tolerance: Tolerance for early stopping. 32 33 random_state: Random state. 34 35 Attributes: 36 37 classes_: The classes labels. 38 39 n_classes_: The number of classes. 40 41 boosters_: List of AdaBoostRegressor instances, one per class. 42 """ 43 44 def __init__( 45 self, 46 base_estimator: Optional[BaseEstimator] = None, 47 n_estimators: int = 100, 48 learning_rate: float = 0.1, 49 n_hidden_features: int = 5, 50 direct_link: bool = True, 51 weights_distribution: str = "uniform", 52 dropout: float = 0.0, 53 tolerance: float = 1e-4, 54 random_state: Optional[int] = None 55 ): 56 self.base_estimator = base_estimator 57 self.n_estimators = n_estimators 58 self.learning_rate = learning_rate 59 self.n_hidden_features = n_hidden_features 60 self.direct_link = direct_link 61 self.weights_distribution = weights_distribution 62 self.dropout = dropout 63 self.tolerance = tolerance 64 self.random_state = random_state 65 self.boosters_ = [] 66 67 def fit(self, X, y) -> "AdaBoostClassifier": 68 """Fit the AdaBoost classifier. 69 70 Parameters: 71 72 X: Input data. 73 74 y: Target data. 75 76 Returns: 77 78 self: The fitted boosting model. 79 """ 80 # Get unique classes and one-hot encode 81 self.classes_ = np.unique(y) 82 self.n_classes_ = len(self.classes_) 83 Y = one_hot_encode2(y, self.n_classes_) 84 85 # Use ExtraTreeRegressor as default base estimator if none provided 86 if self.base_estimator is None: 87 self.base_estimator_ = ExtraTreeRegressor( 88 random_state=self.random_state 89 ) 90 else: 91 self.base_estimator_ = self.base_estimator 92 93 # Train one booster per class 94 for i in range(self.n_classes_): 95 booster = AdaBoostRegressor( 96 base_estimator=self.base_estimator_, 97 n_estimators=self.n_estimators, 98 learning_rate=self.learning_rate, 99 n_hidden_features=self.n_hidden_features, 100 direct_link=self.direct_link, 101 weights_distribution=self.weights_distribution, 102 dropout=self.dropout, 103 tolerance=self.tolerance, 104 random_state=None if self.random_state is None 105 else self.random_state + i 106 ) 107 108 # Convert X and y to the right format 109 X_arr = np.asarray(X.values if hasattr(X, 'values') else X, dtype=np.float64) 110 y_arr = np.asarray(Y[:, i], dtype=np.float64) 111 112 # Fit the booster 113 booster.fit(X_arr, y_arr) 114 self.boosters_.append(booster) 115 116 return self 117 118 def predict(self, X) -> np.ndarray: 119 """Make predictions with the boosting model. 120 121 Parameters: 122 123 X: Input data. 124 125 Returns: 126 127 preds: Class predictions. 128 """ 129 if isinstance(X, pd.DataFrame): 130 X = X.values 131 preds_proba = self.predict_proba(X) 132 return np.argmax(preds_proba, axis=0) 133 134 def predict_proba(self, X) -> np.ndarray: 135 """Make probability predictions with the boosting model. 136 137 Parameters: 138 139 X: Input data. 140 141 Returns: 142 143 preds: Probability predictions. 144 """ 145 if isinstance(X, pd.DataFrame): 146 X = X.values 147 raw_preds = np.asarray([booster.predict(X) for booster in self.boosters_]) 148 shifted_preds = raw_preds - np.max(raw_preds, axis=0) 149 exp_preds = np.exp(shifted_preds) 150 return exp_preds / np.sum(exp_preds, axis=0)
AdaBoost Classifier using AdaBoostRegressor as a multi-task learner.
Parameters:
base_estimator: Base learner to use for the booster. Default is ExtraTreeRegressor.
n_estimators: Number of boosting stages to perform.
learning_rate: Learning rate shrinks the contribution of each estimator.
n_hidden_features: Number of hidden features to use for the base learner.
direct_link: Whether to use direct link for the base learner or not.
weights_distribution: Distribution of the weights for the booster (uniform or normal).
dropout: Dropout rate.
tolerance: Tolerance for early stopping.
random_state: Random state.
Attributes:
classes_: The classes labels.
n_classes_: The number of classes.
boosters_: List of AdaBoostRegressor instances, one per class.
67 def fit(self, X, y) -> "AdaBoostClassifier": 68 """Fit the AdaBoost classifier. 69 70 Parameters: 71 72 X: Input data. 73 74 y: Target data. 75 76 Returns: 77 78 self: The fitted boosting model. 79 """ 80 # Get unique classes and one-hot encode 81 self.classes_ = np.unique(y) 82 self.n_classes_ = len(self.classes_) 83 Y = one_hot_encode2(y, self.n_classes_) 84 85 # Use ExtraTreeRegressor as default base estimator if none provided 86 if self.base_estimator is None: 87 self.base_estimator_ = ExtraTreeRegressor( 88 random_state=self.random_state 89 ) 90 else: 91 self.base_estimator_ = self.base_estimator 92 93 # Train one booster per class 94 for i in range(self.n_classes_): 95 booster = AdaBoostRegressor( 96 base_estimator=self.base_estimator_, 97 n_estimators=self.n_estimators, 98 learning_rate=self.learning_rate, 99 n_hidden_features=self.n_hidden_features, 100 direct_link=self.direct_link, 101 weights_distribution=self.weights_distribution, 102 dropout=self.dropout, 103 tolerance=self.tolerance, 104 random_state=None if self.random_state is None 105 else self.random_state + i 106 ) 107 108 # Convert X and y to the right format 109 X_arr = np.asarray(X.values if hasattr(X, 'values') else X, dtype=np.float64) 110 y_arr = np.asarray(Y[:, i], dtype=np.float64) 111 112 # Fit the booster 113 booster.fit(X_arr, y_arr) 114 self.boosters_.append(booster) 115 116 return self
Fit the AdaBoost classifier.
Parameters:
X: Input data.
y: Target data.
Returns:
self: The fitted boosting model.
118 def predict(self, X) -> np.ndarray: 119 """Make predictions with the boosting model. 120 121 Parameters: 122 123 X: Input data. 124 125 Returns: 126 127 preds: Class predictions. 128 """ 129 if isinstance(X, pd.DataFrame): 130 X = X.values 131 preds_proba = self.predict_proba(X) 132 return np.argmax(preds_proba, axis=0)
Make predictions with the boosting model.
Parameters:
X: Input data.
Returns:
preds: Class predictions.
134 def predict_proba(self, X) -> np.ndarray: 135 """Make probability predictions with the boosting model. 136 137 Parameters: 138 139 X: Input data. 140 141 Returns: 142 143 preds: Probability predictions. 144 """ 145 if isinstance(X, pd.DataFrame): 146 X = X.values 147 raw_preds = np.asarray([booster.predict(X) for booster in self.boosters_]) 148 shifted_preds = raw_preds - np.max(raw_preds, axis=0) 149 exp_preds = np.exp(shifted_preds) 150 return exp_preds / np.sum(exp_preds, axis=0)
Make probability predictions with the boosting model.
Parameters:
X: Input data.
Returns:
preds: Probability predictions.
16class AdaBoostRegressor(BaseEstimator, RegressorMixin): 17 """AdaBoost Regressor with neural network-like feature transformation. 18 19 Parameters: 20 21 base_estimator: Base learner to use for the booster. 22 23 n_estimators: Number of boosting stages to perform. 24 25 learning_rate: Learning rate shrinks the contribution of each estimator. 26 27 n_hidden_features: Number of hidden features to use for the base learner. 28 29 direct_link: Whether to use direct link for the base learner or not. 30 31 weights_distribution: Distribution of the weights for the booster (uniform or normal). 32 33 dropout: Dropout rate. 34 35 tolerance: Tolerance for early stopping. 36 37 random_state: Random state. 38 39 Attributes: 40 41 base_estimator_: The base learner. 42 43 booster_: The boosting model. 44 45 scaler_: StandardScaler for feature scaling. 46 """ 47 48 def __init__( 49 self, 50 base_estimator: Optional[BaseEstimator] = None, 51 n_estimators: int = 100, 52 learning_rate: float = 0.1, 53 n_hidden_features: int = 5, 54 direct_link: bool = True, 55 weights_distribution: str = "uniform", 56 dropout: float = 0.0, 57 tolerance: float = 1e-4, 58 random_state: Optional[int] = None 59 ): 60 self.base_estimator = base_estimator 61 self.n_estimators = n_estimators 62 self.learning_rate = learning_rate 63 self.n_hidden_features = n_hidden_features 64 self.direct_link = direct_link 65 self.weights_distribution = weights_distribution 66 self.dropout = dropout 67 self.tolerance = tolerance 68 self.random_state = random_state 69 self.scaler_ = StandardScaler() 70 71 def fit(self, X, y) -> "AdaBoostRegressor": 72 """Fit the AdaBoost regressor. 73 74 Parameters: 75 76 X: Input data 77 78 y: Target values 79 """ 80 # Convert inputs to numpy arrays 81 X_arr = np.asarray(X.values if hasattr(X, 'values') else X, dtype=np.float64) 82 y_arr = np.asarray(y, dtype=np.float64) 83 84 # Fit and transform with StandardScaler 85 self.scaler_ = StandardScaler() 86 X_scaled = self.scaler_.fit_transform(X_arr) 87 88 # Initialize base estimator if None 89 if self.base_estimator is None: 90 self.base_estimator_ = ExtraTreeRegressor(random_state=self.random_state) 91 else: 92 self.base_estimator_ = self.base_estimator 93 94 # Create and fit the booster 95 self.booster_ = _AdaBoostRegressor( 96 base_estimator=self.base_estimator_, 97 n_estimators=self.n_estimators, 98 learning_rate=self.learning_rate, 99 n_hidden_features=self.n_hidden_features, 100 direct_link=self.direct_link, 101 weights_distribution=self.weights_distribution, 102 dropout=self.dropout, 103 tolerance=self.tolerance, 104 random_state=self.random_state 105 ) 106 107 self.booster_.fit(X_scaled, y_arr) 108 return self 109 110 def predict(self, X) -> np.ndarray: 111 """Make predictions with the AdaBoost model. 112 113 Parameters: 114 115 X: Input data. 116 117 Returns: 118 119 predictions: Model predictions. 120 """ 121 if isinstance(X, pd.DataFrame): 122 X = X.values 123 X = np.array(X, dtype=np.float64, copy=True, order='C') 124 scaled_X = self.scaler_.transform(X) 125 return self.booster_.predict(scaled_X)
AdaBoost Regressor with neural network-like feature transformation.
Parameters:
base_estimator: Base learner to use for the booster.
n_estimators: Number of boosting stages to perform.
learning_rate: Learning rate shrinks the contribution of each estimator.
n_hidden_features: Number of hidden features to use for the base learner.
direct_link: Whether to use direct link for the base learner or not.
weights_distribution: Distribution of the weights for the booster (uniform or normal).
dropout: Dropout rate.
tolerance: Tolerance for early stopping.
random_state: Random state.
Attributes:
base_estimator_: The base learner.
booster_: The boosting model.
scaler_: StandardScaler for feature scaling.
71 def fit(self, X, y) -> "AdaBoostRegressor": 72 """Fit the AdaBoost regressor. 73 74 Parameters: 75 76 X: Input data 77 78 y: Target values 79 """ 80 # Convert inputs to numpy arrays 81 X_arr = np.asarray(X.values if hasattr(X, 'values') else X, dtype=np.float64) 82 y_arr = np.asarray(y, dtype=np.float64) 83 84 # Fit and transform with StandardScaler 85 self.scaler_ = StandardScaler() 86 X_scaled = self.scaler_.fit_transform(X_arr) 87 88 # Initialize base estimator if None 89 if self.base_estimator is None: 90 self.base_estimator_ = ExtraTreeRegressor(random_state=self.random_state) 91 else: 92 self.base_estimator_ = self.base_estimator 93 94 # Create and fit the booster 95 self.booster_ = _AdaBoostRegressor( 96 base_estimator=self.base_estimator_, 97 n_estimators=self.n_estimators, 98 learning_rate=self.learning_rate, 99 n_hidden_features=self.n_hidden_features, 100 direct_link=self.direct_link, 101 weights_distribution=self.weights_distribution, 102 dropout=self.dropout, 103 tolerance=self.tolerance, 104 random_state=self.random_state 105 ) 106 107 self.booster_.fit(X_scaled, y_arr) 108 return self
Fit the AdaBoost regressor.
Parameters:
X: Input data
y: Target values
110 def predict(self, X) -> np.ndarray: 111 """Make predictions with the AdaBoost model. 112 113 Parameters: 114 115 X: Input data. 116 117 Returns: 118 119 predictions: Model predictions. 120 """ 121 if isinstance(X, pd.DataFrame): 122 X = X.values 123 X = np.array(X, dtype=np.float64, copy=True, order='C') 124 scaled_X = self.scaler_.transform(X) 125 return self.booster_.predict(scaled_X)
Make predictions with the AdaBoost model.
Parameters:
X: Input data.
Returns:
predictions: Model predictions.