genbooster

 1from .genboosterregressor import BoosterRegressor
 2from .genboosterclassifier import BoosterClassifier
 3from .randombagregressor import RandomBagRegressor
 4from .randombagclassifier import RandomBagClassifier
 5from .regressionmodels import LinfaRegressor
 6from .adaboostclassifier import AdaBoostClassifier
 7from .adaboostregressor import AdaBoostRegressor
 8from .rust_core import RustBooster, Regressor
 9
10
11__all__ = ["BoosterRegressor", "BoosterClassifier", 
12           "RandomBagRegressor", "RandomBagClassifier",
13           "RustBooster", "Regressor", "LinfaRegressor",
14           "AdaBoostClassifier", "AdaBoostRegressor"]
class BoosterRegressor(sklearn.base.BaseEstimator, sklearn.base.RegressorMixin):
 14class BoosterRegressor(BaseEstimator, RegressorMixin):
 15    """Generic Gradient Boosting Regressor (for any base learner).
 16
 17        Parameters:
 18
 19            base_estimator: Base learner to use for the booster.
 20
 21            n_estimators: Number of boosting stages to perform.
 22
 23            learning_rate: Learning rate shrinks the contribution of each estimator.
 24
 25            n_hidden_features: Number of hidden features to use for the base learner.
 26
 27            direct_link: Whether to use direct link for the base learner or not.
 28
 29            weights_distribution: Distribution of the weights for the booster (uniform or normal).
 30
 31            dropout: Dropout rate.
 32
 33            tolerance: Tolerance for early stopping.
 34
 35            random_state: Random state.
 36
 37        Attributes:
 38
 39            base_estimator_: The base learner.
 40
 41            booster_: The boosting model.
 42
 43            y_mean_: Mean of the target variable.
 44
 45        Examples:
 46
 47            See https://github.com/Techtonique/genbooster/tree/main/examples
 48                                
 49    """
 50    
 51    def __init__(
 52        self,
 53        base_estimator: Optional[BaseEstimator] = None,
 54        n_estimators: int = 100,
 55        learning_rate: float = 0.1,
 56        n_hidden_features: int = 5,
 57        direct_link: bool = True,
 58        weights_distribution: str = 'uniform',
 59        dropout: float = 0.0,
 60        tolerance: float = 1e-4,
 61        random_state: Optional[int] = 42
 62    ):
 63        self.base_estimator = base_estimator
 64        if base_estimator is None:
 65            self.base_estimator = ExtraTreeRegressor()
 66        else: 
 67            self.base_estimator = base_estimator
 68        self.n_estimators = n_estimators
 69        self.learning_rate = learning_rate
 70        self.n_hidden_features = n_hidden_features
 71        self.direct_link = direct_link
 72        self.weights_distribution = weights_distribution
 73        self.dropout = dropout
 74        self.tolerance = tolerance
 75        self.random_state = random_state
 76        self.scaler_ = StandardScaler()
 77        self.y_mean_ = None
 78
 79    def fit(self, X, y) -> "BoosterRegressor":
 80        """Fit the boosting model.
 81        
 82        Parameters:
 83
 84            X: Input data.
 85
 86            y: Target data.
 87            
 88        Returns:
 89
 90            self: The fitted boosting model.
 91        """        
 92        # Set random seed if provided
 93        if self.random_state is not None:
 94            # Convert to int for Python's random.seed
 95            seed_int = int(abs(self.random_state))
 96            # Set Python RNG seeds
 97            np.random.seed(seed_int)
 98            random.seed(seed_int)
 99            if hasattr(self.base_estimator, "random_state"):
100                self.base_estimator.random_state = seed_int
101            # Convert to u64 for Rust
102            seed = np.uint64(seed_int)
103        else:
104            # Use a random seed if none provided
105            seed_int = np.random.randint(0, 2**31 - 1)
106            np.random.seed(seed_int)
107            random.seed(seed_int)
108            seed = np.uint64(seed_int)
109            
110        # Convert to numpy arrays and ensure float64 dtype with C-contiguous memory layout
111        X = np.array(X, dtype=np.float64, copy=True, order='C')
112        y = np.array(y, dtype=np.float64, copy=True, order='C')
113        
114        # Scale X and force conversion to numpy array
115        scaled_X = np.array(self.scaler_.fit_transform(X), dtype=np.float64, copy=True, order='C')
116        
117        self.y_mean_ = float(np.mean(y))
118        centered_y = np.array(y - self.y_mean_, dtype=np.float64, copy=True, order='C')
119        
120        # Ensure y is 1D array
121        if centered_y.ndim == 2:
122            if centered_y.shape[1] != 1:
123                raise ValueError("y must have shape (n_samples,) or (n_samples, 1)")
124            centered_y = centered_y.ravel()  # Convert to 1D array
125            
126        # Use Ridge as default base estimator if none provided
127        if self.base_estimator is None:
128            self.base_estimator_ = Ridge()
129        else:
130            self.base_estimator_ = self.base_estimator            
131            
132        # Initialize Rust booster
133        self.booster_ = _RustBooster(
134            self.base_estimator_,
135            self.n_estimators,
136            self.learning_rate,
137            self.n_hidden_features,
138            self.direct_link,
139            weights_distribution=self.weights_distribution,
140            tolerance=self.tolerance
141        )        
142        
143        # Fit the model
144        self.booster_.fit_boosting(
145            scaled_X,
146            centered_y,
147            dropout=self.dropout,
148            seed=seed
149        )        
150        return self
151        
152    def predict(self, X) -> np.ndarray:
153        """Make predictions with the boosting model.
154
155        Parameters:
156
157            X: Input data.
158            
159        Returns:
160        
161            preds: Predictions.
162        """
163        if isinstance(X, pd.DataFrame):
164            X = X.values
165        scaled_X = self.scaler_.transform(X)
166        return self.booster_.predict_boosting(scaled_X) + self.y_mean_

Generic Gradient Boosting Regressor (for any base learner).

Parameters:

base_estimator: Base learner to use for the booster.

n_estimators: Number of boosting stages to perform.

learning_rate: Learning rate shrinks the contribution of each estimator.

n_hidden_features: Number of hidden features to use for the base learner.

direct_link: Whether to use direct link for the base learner or not.

weights_distribution: Distribution of the weights for the booster (uniform or normal).

dropout: Dropout rate.

tolerance: Tolerance for early stopping.

random_state: Random state.

Attributes:

base_estimator_: The base learner.

booster_: The boosting model.

y_mean_: Mean of the target variable.

Examples:

See https://github.com/Techtonique/genbooster/tree/main/examples
def fit(self, X, y) -> BoosterRegressor:
 79    def fit(self, X, y) -> "BoosterRegressor":
 80        """Fit the boosting model.
 81        
 82        Parameters:
 83
 84            X: Input data.
 85
 86            y: Target data.
 87            
 88        Returns:
 89
 90            self: The fitted boosting model.
 91        """        
 92        # Set random seed if provided
 93        if self.random_state is not None:
 94            # Convert to int for Python's random.seed
 95            seed_int = int(abs(self.random_state))
 96            # Set Python RNG seeds
 97            np.random.seed(seed_int)
 98            random.seed(seed_int)
 99            if hasattr(self.base_estimator, "random_state"):
100                self.base_estimator.random_state = seed_int
101            # Convert to u64 for Rust
102            seed = np.uint64(seed_int)
103        else:
104            # Use a random seed if none provided
105            seed_int = np.random.randint(0, 2**31 - 1)
106            np.random.seed(seed_int)
107            random.seed(seed_int)
108            seed = np.uint64(seed_int)
109            
110        # Convert to numpy arrays and ensure float64 dtype with C-contiguous memory layout
111        X = np.array(X, dtype=np.float64, copy=True, order='C')
112        y = np.array(y, dtype=np.float64, copy=True, order='C')
113        
114        # Scale X and force conversion to numpy array
115        scaled_X = np.array(self.scaler_.fit_transform(X), dtype=np.float64, copy=True, order='C')
116        
117        self.y_mean_ = float(np.mean(y))
118        centered_y = np.array(y - self.y_mean_, dtype=np.float64, copy=True, order='C')
119        
120        # Ensure y is 1D array
121        if centered_y.ndim == 2:
122            if centered_y.shape[1] != 1:
123                raise ValueError("y must have shape (n_samples,) or (n_samples, 1)")
124            centered_y = centered_y.ravel()  # Convert to 1D array
125            
126        # Use Ridge as default base estimator if none provided
127        if self.base_estimator is None:
128            self.base_estimator_ = Ridge()
129        else:
130            self.base_estimator_ = self.base_estimator            
131            
132        # Initialize Rust booster
133        self.booster_ = _RustBooster(
134            self.base_estimator_,
135            self.n_estimators,
136            self.learning_rate,
137            self.n_hidden_features,
138            self.direct_link,
139            weights_distribution=self.weights_distribution,
140            tolerance=self.tolerance
141        )        
142        
143        # Fit the model
144        self.booster_.fit_boosting(
145            scaled_X,
146            centered_y,
147            dropout=self.dropout,
148            seed=seed
149        )        
150        return self

Fit the boosting model.

Parameters:

X: Input data.

y: Target data.

Returns:

self: The fitted boosting model.
def predict(self, X) -> numpy.ndarray:
152    def predict(self, X) -> np.ndarray:
153        """Make predictions with the boosting model.
154
155        Parameters:
156
157            X: Input data.
158            
159        Returns:
160        
161            preds: Predictions.
162        """
163        if isinstance(X, pd.DataFrame):
164            X = X.values
165        scaled_X = self.scaler_.transform(X)
166        return self.booster_.predict_boosting(scaled_X) + self.y_mean_

Make predictions with the boosting model.

Parameters:

X: Input data.

Returns:

preds: Predictions.
class BoosterClassifier(sklearn.base.BaseEstimator, sklearn.base.ClassifierMixin):
 14class BoosterClassifier(BaseEstimator, ClassifierMixin):
 15    """Generic Gradient Boosting Classifier (for any base learner).
 16
 17    Parameters:
 18
 19        base_estimator: Base learner to use for the booster. Default is ExtraTreeRegressor.
 20
 21        n_estimators: Number of boosting stages to perform.
 22
 23        learning_rate: Learning rate shrinks the contribution of each estimator.
 24
 25        n_hidden_features: Number of hidden features to use for the base learner.
 26
 27        direct_link: Whether to use direct link for the base learner or not.
 28
 29        weights_distribution: Distribution of the weights for the booster (uniform or normal).
 30
 31        dropout: Dropout rate.
 32
 33        tolerance: Tolerance for early stopping.
 34
 35        random_state: Random state.
 36    
 37    Attributes:
 38
 39        classes_: The classes of the target variable.
 40
 41        n_classes_: The number of classes of the target variable.
 42
 43        boosters_: Base learners.
 44    
 45    Examples:
 46
 47        See https://github.com/Techtonique/genbooster/tree/main/examples
 48
 49    """
 50    
 51    def __init__(self,
 52                base_estimator: Optional[BaseEstimator] = None,
 53                n_estimators: int = 100,
 54                learning_rate: float = 0.1,
 55                n_hidden_features: int = 5,
 56                direct_link: bool = True,
 57                weights_distribution: str = 'uniform',
 58                dropout: float = 0.0,
 59                tolerance: float = 1e-4,
 60                random_state: Optional[int] = 42):
 61        if base_estimator is None:
 62            self.base_estimator = ExtraTreeRegressor()
 63        else: 
 64            self.base_estimator = base_estimator        
 65        self.n_estimators = n_estimators
 66        self.learning_rate = learning_rate
 67        self.n_hidden_features = n_hidden_features
 68        self.direct_link = direct_link
 69        self.weights_distribution = weights_distribution
 70        self.dropout = dropout
 71        self.tolerance = tolerance
 72        self.random_state = random_state        
 73        self.boosters_ = [] 
 74    
 75    def fit(self, X, y) -> "BoosterClassifier":
 76        """Fit the booster model.
 77        
 78        Parameters:
 79            X: Input data.
 80            y: Target data.
 81            
 82        Returns:
 83            self: The fitted boosting model.
 84        """
 85        # Get unique classes and one-hot encode
 86        self.classes_ = np.unique(y)
 87        self.n_classes_ = len(self.classes_)        
 88        Y = one_hot_encode2(y, self.n_classes_)
 89        
 90        # Train one booster per class
 91        for i in range(self.n_classes_):
 92            booster = BoosterRegressor(
 93                base_estimator=self.base_estimator,
 94                n_estimators=self.n_estimators,
 95                learning_rate=self.learning_rate,
 96                n_hidden_features=self.n_hidden_features,
 97                direct_link=self.direct_link,
 98                weights_distribution=self.weights_distribution,
 99                tolerance=self.tolerance, 
100                dropout=self.dropout, 
101                random_state=self.random_state
102            )
103            
104            # Convert X and y to the right format without reshaping y
105            X_arr = np.asarray(X.values if hasattr(X, 'values') else X, dtype=np.float64)
106            y_arr = np.asarray(Y[:, i], dtype=np.float64)
107            
108            # Fit the booster
109            booster.fit(X=X_arr, y=y_arr)
110            self.boosters_.append(booster)
111        
112        return self
113    
114    def predict(self, X) -> np.ndarray:
115        """Make predictions with the boosting model.
116        
117        Parameters:
118
119            X: Input data.
120            
121        Returns:
122
123            preds: Class predictions.
124        """
125        if isinstance(X, pd.DataFrame):
126            X = X.values       
127        preds_proba = self.predict_proba(X)
128        return np.argmax(preds_proba, axis=0)
129
130    def predict_proba(self, X) -> np.ndarray:
131        """Make probability predictions with the boosting model.
132        
133        Parameters:
134
135            X: Input data.
136            
137        Returns:
138        
139            preds: Probability predictions.
140        """
141        if isinstance(X, pd.DataFrame):
142            X = X.values
143        raw_preds = np.asarray([booster.predict(X) for booster in self.boosters_])
144        shifted_preds = raw_preds - np.max(raw_preds, axis=0)
145        exp_preds = np.exp(shifted_preds)
146        return exp_preds / np.sum(exp_preds, axis=0)

Generic Gradient Boosting Classifier (for any base learner).

Parameters:

base_estimator: Base learner to use for the booster. Default is ExtraTreeRegressor.

n_estimators: Number of boosting stages to perform.

learning_rate: Learning rate shrinks the contribution of each estimator.

n_hidden_features: Number of hidden features to use for the base learner.

direct_link: Whether to use direct link for the base learner or not.

weights_distribution: Distribution of the weights for the booster (uniform or normal).

dropout: Dropout rate.

tolerance: Tolerance for early stopping.

random_state: Random state.

Attributes:

classes_: The classes of the target variable.

n_classes_: The number of classes of the target variable.

boosters_: Base learners.

Examples:

See https://github.com/Techtonique/genbooster/tree/main/examples
def fit(self, X, y) -> BoosterClassifier:
 75    def fit(self, X, y) -> "BoosterClassifier":
 76        """Fit the booster model.
 77        
 78        Parameters:
 79            X: Input data.
 80            y: Target data.
 81            
 82        Returns:
 83            self: The fitted boosting model.
 84        """
 85        # Get unique classes and one-hot encode
 86        self.classes_ = np.unique(y)
 87        self.n_classes_ = len(self.classes_)        
 88        Y = one_hot_encode2(y, self.n_classes_)
 89        
 90        # Train one booster per class
 91        for i in range(self.n_classes_):
 92            booster = BoosterRegressor(
 93                base_estimator=self.base_estimator,
 94                n_estimators=self.n_estimators,
 95                learning_rate=self.learning_rate,
 96                n_hidden_features=self.n_hidden_features,
 97                direct_link=self.direct_link,
 98                weights_distribution=self.weights_distribution,
 99                tolerance=self.tolerance, 
100                dropout=self.dropout, 
101                random_state=self.random_state
102            )
103            
104            # Convert X and y to the right format without reshaping y
105            X_arr = np.asarray(X.values if hasattr(X, 'values') else X, dtype=np.float64)
106            y_arr = np.asarray(Y[:, i], dtype=np.float64)
107            
108            # Fit the booster
109            booster.fit(X=X_arr, y=y_arr)
110            self.boosters_.append(booster)
111        
112        return self

Fit the booster model.

Parameters: X: Input data. y: Target data.

Returns: self: The fitted boosting model.

def predict(self, X) -> numpy.ndarray:
114    def predict(self, X) -> np.ndarray:
115        """Make predictions with the boosting model.
116        
117        Parameters:
118
119            X: Input data.
120            
121        Returns:
122
123            preds: Class predictions.
124        """
125        if isinstance(X, pd.DataFrame):
126            X = X.values       
127        preds_proba = self.predict_proba(X)
128        return np.argmax(preds_proba, axis=0)

Make predictions with the boosting model.

Parameters:

X: Input data.

Returns:

preds: Class predictions.
def predict_proba(self, X) -> numpy.ndarray:
130    def predict_proba(self, X) -> np.ndarray:
131        """Make probability predictions with the boosting model.
132        
133        Parameters:
134
135            X: Input data.
136            
137        Returns:
138        
139            preds: Probability predictions.
140        """
141        if isinstance(X, pd.DataFrame):
142            X = X.values
143        raw_preds = np.asarray([booster.predict(X) for booster in self.boosters_])
144        shifted_preds = raw_preds - np.max(raw_preds, axis=0)
145        exp_preds = np.exp(shifted_preds)
146        return exp_preds / np.sum(exp_preds, axis=0)

Make probability predictions with the boosting model.

Parameters:

X: Input data.

Returns:

preds: Probability predictions.
class RandomBagRegressor(sklearn.base.BaseEstimator, sklearn.base.RegressorMixin):
 11class RandomBagRegressor(BaseEstimator, RegressorMixin):
 12    """Generic Random Bagging Regressor (for any base learner).
 13
 14        Parameters:
 15
 16            base_estimator: Base learner to use for the booster.
 17
 18            n_estimators: Number of boosting stages to perform.
 19
 20            learning_rate: Learning rate shrinks the contribution of each estimator.
 21
 22            n_hidden_features: Number of hidden features to use for the base learner.
 23
 24            direct_link: Whether to use direct link for the base learner or not.
 25
 26            weights_distribution: Distribution of the weights for the booster (uniform or normal).
 27
 28            dropout: Dropout rate.
 29
 30            random_state: Random state.
 31
 32        Attributes:
 33        
 34            baggers_: The bagging learners.
 35
 36            y_mean_: The mean of the target variable.
 37
 38        Examples:
 39
 40            See https://github.com/Techtonique/genbooster/tree/main/examples
 41                    
 42    """
 43    
 44    def __init__(
 45        self,
 46        base_estimator: Optional[BaseEstimator] = None,
 47        n_estimators: int = 100,
 48        learning_rate: float = 0.01,
 49        n_hidden_features: int = 5,
 50        direct_link: bool = True,
 51        weights_distribution: str = 'uniform',
 52        dropout: float = 0.0,
 53        random_state: Optional[int] = 42
 54    ):
 55        self.base_estimator = base_estimator
 56        self.n_estimators = n_estimators
 57        self.learning_rate = learning_rate
 58        self.n_hidden_features = n_hidden_features
 59        self.direct_link = direct_link
 60        self.weights_distribution = weights_distribution
 61        self.dropout = dropout
 62        self.random_state = random_state
 63        self.scaler_ = StandardScaler()
 64        self.y_mean_ = None
 65
 66    def fit(self, X, y) -> "RandomBagRegressor":
 67        """Fit the bagging model.
 68        
 69        Parameters:
 70
 71            X: Input data.
 72
 73            y: Target data.
 74            
 75        Returns:
 76
 77            self: The fitted booster model.
 78        """        
 79        if isinstance(X, pd.DataFrame):
 80            X = X.values
 81        if isinstance(y, pd.DataFrame):
 82            y = y.values
 83        scaled_X = self.scaler_.fit_transform(X)
 84        self.y_mean_ = np.mean(y)
 85        centered_y = y - self.y_mean_
 86        # Use Ridge as default base estimator if none provided
 87        if self.base_estimator is None:
 88            self.base_estimator_ = Ridge()
 89        else:
 90            self.base_estimator_ = self.base_estimator            
 91        # Initialize Rust booster
 92        self.booster_ = _RustBooster(
 93            self.base_estimator_,
 94            self.n_estimators,
 95            self.learning_rate,
 96            self.n_hidden_features,
 97            self.direct_link,
 98            weights_distribution=self.weights_distribution
 99        )        
100        # Fit the model
101        self.booster_.fit_bagging(
102            np.asarray(scaled_X, dtype=np.float64), 
103            np.asarray(centered_y, dtype=np.float64),
104            dropout=self.dropout,
105            seed=self.random_state if self.random_state is not None else 42
106        )        
107        return self
108        
109    def predict(self, X) -> np.ndarray:
110        """Make predictions with the bagging model.
111
112        Parameters:
113
114            X: Input data.
115            
116        Returns:
117
118            preds: Predictions.
119        """
120        if isinstance(X, pd.DataFrame):
121            X = X.values
122        scaled_X = self.scaler_.transform(X)
123        return self.booster_.predict_bagging(scaled_X) + self.y_mean_

Generic Random Bagging Regressor (for any base learner).

Parameters:

base_estimator: Base learner to use for the booster.

n_estimators: Number of boosting stages to perform.

learning_rate: Learning rate shrinks the contribution of each estimator.

n_hidden_features: Number of hidden features to use for the base learner.

direct_link: Whether to use direct link for the base learner or not.

weights_distribution: Distribution of the weights for the booster (uniform or normal).

dropout: Dropout rate.

random_state: Random state.

Attributes:

baggers_: The bagging learners.

y_mean_: The mean of the target variable.

Examples:

See https://github.com/Techtonique/genbooster/tree/main/examples
def fit(self, X, y) -> RandomBagRegressor:
 66    def fit(self, X, y) -> "RandomBagRegressor":
 67        """Fit the bagging model.
 68        
 69        Parameters:
 70
 71            X: Input data.
 72
 73            y: Target data.
 74            
 75        Returns:
 76
 77            self: The fitted booster model.
 78        """        
 79        if isinstance(X, pd.DataFrame):
 80            X = X.values
 81        if isinstance(y, pd.DataFrame):
 82            y = y.values
 83        scaled_X = self.scaler_.fit_transform(X)
 84        self.y_mean_ = np.mean(y)
 85        centered_y = y - self.y_mean_
 86        # Use Ridge as default base estimator if none provided
 87        if self.base_estimator is None:
 88            self.base_estimator_ = Ridge()
 89        else:
 90            self.base_estimator_ = self.base_estimator            
 91        # Initialize Rust booster
 92        self.booster_ = _RustBooster(
 93            self.base_estimator_,
 94            self.n_estimators,
 95            self.learning_rate,
 96            self.n_hidden_features,
 97            self.direct_link,
 98            weights_distribution=self.weights_distribution
 99        )        
100        # Fit the model
101        self.booster_.fit_bagging(
102            np.asarray(scaled_X, dtype=np.float64), 
103            np.asarray(centered_y, dtype=np.float64),
104            dropout=self.dropout,
105            seed=self.random_state if self.random_state is not None else 42
106        )        
107        return self

Fit the bagging model.

Parameters:

X: Input data.

y: Target data.

Returns:

self: The fitted booster model.
def predict(self, X) -> numpy.ndarray:
109    def predict(self, X) -> np.ndarray:
110        """Make predictions with the bagging model.
111
112        Parameters:
113
114            X: Input data.
115            
116        Returns:
117
118            preds: Predictions.
119        """
120        if isinstance(X, pd.DataFrame):
121            X = X.values
122        scaled_X = self.scaler_.transform(X)
123        return self.booster_.predict_bagging(scaled_X) + self.y_mean_

Make predictions with the bagging model.

Parameters:

X: Input data.

Returns:

preds: Predictions.
class RandomBagClassifier(sklearn.base.BaseEstimator, sklearn.base.ClassifierMixin):
 12class RandomBagClassifier(BaseEstimator, ClassifierMixin):
 13    """Generic Random Bagging Classifier (for any base learner).
 14
 15    Parameters:
 16
 17        base_estimator: Base learner to use for the booster.
 18
 19        n_estimators: Number of boosting stages to perform.
 20
 21        learning_rate: Learning rate shrinks the contribution of each estimator.
 22
 23        n_hidden_features: Number of hidden features to use for the base learner.
 24
 25        direct_link: Whether to use direct link for the base learner or not.
 26
 27        weights_distribution: Distribution of the weights for the booster (uniform or normal).
 28
 29        dropout: Dropout rate.
 30
 31        random_state: Random state.
 32    
 33    Attributes:
 34
 35        baggers_: The bagging learners.
 36
 37        classes_: The classes of the target variable.
 38
 39        n_classes_: The number of classes of the target variable.
 40
 41    Examples:
 42
 43        See https://github.com/Techtonique/genbooster/tree/main/examples
 44
 45    """
 46    
 47    def __init__(self,
 48                base_estimator: Optional[BaseEstimator] = None,
 49                n_estimators: int = 100,
 50                learning_rate: float = 0.01,
 51                n_hidden_features: int = 5,
 52                direct_link: bool = True,
 53                weights_distribution: str = 'uniform',
 54                dropout: float = 0.0,
 55                random_state: Optional[int] = 42):
 56        if base_estimator is None:
 57            self.base_estimator = Ridge()
 58        else: 
 59            self.base_estimator = base_estimator        
 60        self.n_estimators = n_estimators
 61        self.learning_rate = learning_rate
 62        self.n_hidden_features = n_hidden_features
 63        self.direct_link = direct_link
 64        self.weights_distribution = weights_distribution
 65        self.dropout = dropout
 66        self.random_state = random_state
 67        self.y_mean_ = None
 68        self.boosters_ = None 
 69    
 70    def fit(self, X, y) -> "RandomBagClassifier":
 71        """Fit the bagging model.
 72        
 73        Parameters:
 74
 75            X: Input data.
 76
 77            y: Target data.
 78            
 79        Returns:
 80
 81            self: The fitted bagging model.
 82        """
 83        if isinstance(X, pd.DataFrame):
 84            X = X.values
 85        if isinstance(y, pd.DataFrame):
 86            y = y.values        
 87        y = np.asarray([int(x) for x in y]).ravel() 
 88        Y = OneHotEncoder().fit_transform(y.reshape(-1, 1)).toarray()
 89        self.classes_ = np.unique(y)
 90        self.n_classes_ = len(self.classes_)
 91        
 92        # Store the results of the list comprehension
 93        self.boosters_ = []
 94        for i in range(self.n_classes_):
 95            booster = _RustBooster(
 96                self.base_estimator,
 97                self.n_estimators,
 98                self.learning_rate,
 99                self.n_hidden_features,
100                self.direct_link,
101                weights_distribution=self.weights_distribution
102            )
103            booster.fit_bagging(X, Y[:, i], dropout=self.dropout, seed=self.random_state)
104            self.boosters_.append(booster)            
105        return self
106    
107    def predict(self, X) -> np.ndarray:
108        """Make predictions with the bagging model.
109        
110        Parameters:
111
112            X: Input data.
113            
114        Returns:
115
116            preds: Class predictions.
117        """
118        if isinstance(X, pd.DataFrame):
119            X = X.values       
120        preds_proba = self.predict_proba(X)
121        return np.argmax(preds_proba, axis=0)
122
123    def predict_proba(self, X) -> np.ndarray:
124        """Make probability predictions with the booster model.
125        
126        Parameters:
127
128            X: Input data.
129            
130        Returns:
131        
132            preds: Probability predictions.
133        """
134        if isinstance(X, pd.DataFrame):
135            X = X.values
136        raw_preds = np.asarray([booster.predict_bagging(X) for booster in self.boosters_])
137        shifted_preds = raw_preds - np.max(raw_preds, axis=0)
138        exp_preds = np.exp(shifted_preds)
139        return exp_preds / np.sum(exp_preds, axis=0)

Generic Random Bagging Classifier (for any base learner).

Parameters:

base_estimator: Base learner to use for the booster.

n_estimators: Number of boosting stages to perform.

learning_rate: Learning rate shrinks the contribution of each estimator.

n_hidden_features: Number of hidden features to use for the base learner.

direct_link: Whether to use direct link for the base learner or not.

weights_distribution: Distribution of the weights for the booster (uniform or normal).

dropout: Dropout rate.

random_state: Random state.

Attributes:

baggers_: The bagging learners.

classes_: The classes of the target variable.

n_classes_: The number of classes of the target variable.

Examples:

See https://github.com/Techtonique/genbooster/tree/main/examples
def fit(self, X, y) -> RandomBagClassifier:
 70    def fit(self, X, y) -> "RandomBagClassifier":
 71        """Fit the bagging model.
 72        
 73        Parameters:
 74
 75            X: Input data.
 76
 77            y: Target data.
 78            
 79        Returns:
 80
 81            self: The fitted bagging model.
 82        """
 83        if isinstance(X, pd.DataFrame):
 84            X = X.values
 85        if isinstance(y, pd.DataFrame):
 86            y = y.values        
 87        y = np.asarray([int(x) for x in y]).ravel() 
 88        Y = OneHotEncoder().fit_transform(y.reshape(-1, 1)).toarray()
 89        self.classes_ = np.unique(y)
 90        self.n_classes_ = len(self.classes_)
 91        
 92        # Store the results of the list comprehension
 93        self.boosters_ = []
 94        for i in range(self.n_classes_):
 95            booster = _RustBooster(
 96                self.base_estimator,
 97                self.n_estimators,
 98                self.learning_rate,
 99                self.n_hidden_features,
100                self.direct_link,
101                weights_distribution=self.weights_distribution
102            )
103            booster.fit_bagging(X, Y[:, i], dropout=self.dropout, seed=self.random_state)
104            self.boosters_.append(booster)            
105        return self

Fit the bagging model.

Parameters:

X: Input data.

y: Target data.

Returns:

self: The fitted bagging model.
def predict(self, X) -> numpy.ndarray:
107    def predict(self, X) -> np.ndarray:
108        """Make predictions with the bagging model.
109        
110        Parameters:
111
112            X: Input data.
113            
114        Returns:
115
116            preds: Class predictions.
117        """
118        if isinstance(X, pd.DataFrame):
119            X = X.values       
120        preds_proba = self.predict_proba(X)
121        return np.argmax(preds_proba, axis=0)

Make predictions with the bagging model.

Parameters:

X: Input data.

Returns:

preds: Class predictions.
def predict_proba(self, X) -> numpy.ndarray:
123    def predict_proba(self, X) -> np.ndarray:
124        """Make probability predictions with the booster model.
125        
126        Parameters:
127
128            X: Input data.
129            
130        Returns:
131        
132            preds: Probability predictions.
133        """
134        if isinstance(X, pd.DataFrame):
135            X = X.values
136        raw_preds = np.asarray([booster.predict_bagging(X) for booster in self.boosters_])
137        shifted_preds = raw_preds - np.max(raw_preds, axis=0)
138        exp_preds = np.exp(shifted_preds)
139        return exp_preds / np.sum(exp_preds, axis=0)

Make probability predictions with the booster model.

Parameters:

X: Input data.

Returns:

preds: Probability predictions.
class RustBooster:
class Regressor:
def fit(self, /, x, y):
def predict(self, /, x):
class LinfaRegressor(sklearn.base.BaseEstimator, sklearn.base.RegressorMixin):
12class LinfaRegressor(BaseEstimator, RegressorMixin):
13    def __init__(self, model_name="LinearRegression"):
14        self.model_name = model_name
15        self.model = _Regressor(model_name=self.model_name)
16
17    def fit(self, X, y):
18        if isinstance(X, pd.DataFrame):
19            X = X.values
20        if isinstance(y, pd.DataFrame) or isinstance(y, pd.Series):
21            y = y.values
22        try: 
23            self.model.fit(X, y)
24        except TypeError as e:
25            try: 
26                self.model.fit(X, y.reshape(-1, 1))
27            except TypeError as e:
28                try: 
29                    self.model.fit(X, y.reshape(1, -1))
30                except TypeError as e:
31                    try: 
32                        self.model.fit(X, y.ravel())
33                    except TypeError as e:
34                        raise e
35        return self
36
37    def predict(self, X):
38        if isinstance(X, pd.DataFrame):
39            X = X.values
40        X = np.asarray(X, dtype=np.float64)
41        predictions = self.model.predict(X)
42        # Ensure 1D output
43        if predictions.ndim == 2:
44            predictions = predictions.ravel()
45        return predictions

Base class for all estimators in scikit-learn.

Inheriting from this class provides default implementations of:

  • setting and getting parameters used by GridSearchCV and friends;
  • textual and HTML representation displayed in terminals and IDEs;
  • estimator serialization;
  • parameters validation;
  • data validation;
  • feature names validation.

Read more in the :ref:User Guide <rolling_your_own_estimator>.

Notes

All estimators should specify all the parameters that can be set at the class level in their __init__ as explicit keyword arguments (no *args or **kwargs).

Examples

>>> import numpy as np
>>> from sklearn.base import BaseEstimator
>>> class MyEstimator(BaseEstimator):
...     def __init__(self, *, param=1):
...         self.param = param
...     def fit(self, X, y=None):
...         self.is_fitted_ = True
...         return self
...     def predict(self, X):
...         return np.full(shape=X.shape[0], fill_value=self.param)
>>> estimator = MyEstimator(param=2)
>>> estimator.get_params()
{'param': 2}
>>> X = np.array([[1, 2], [2, 3], [3, 4]])
>>> y = np.array([1, 0, 1])
>>> estimator.fit(X, y).predict(X)
array([2, 2, 2])
>>> estimator.set_params(param=3).fit(X, y).predict(X)
array([3, 3, 3])
def fit(self, X, y):
17    def fit(self, X, y):
18        if isinstance(X, pd.DataFrame):
19            X = X.values
20        if isinstance(y, pd.DataFrame) or isinstance(y, pd.Series):
21            y = y.values
22        try: 
23            self.model.fit(X, y)
24        except TypeError as e:
25            try: 
26                self.model.fit(X, y.reshape(-1, 1))
27            except TypeError as e:
28                try: 
29                    self.model.fit(X, y.reshape(1, -1))
30                except TypeError as e:
31                    try: 
32                        self.model.fit(X, y.ravel())
33                    except TypeError as e:
34                        raise e
35        return self
def predict(self, X):
37    def predict(self, X):
38        if isinstance(X, pd.DataFrame):
39            X = X.values
40        X = np.asarray(X, dtype=np.float64)
41        predictions = self.model.predict(X)
42        # Ensure 1D output
43        if predictions.ndim == 2:
44            predictions = predictions.ravel()
45        return predictions
class AdaBoostClassifier(sklearn.base.BaseEstimator, sklearn.base.ClassifierMixin):
 12class AdaBoostClassifier(BaseEstimator, ClassifierMixin):
 13    """AdaBoost Classifier using AdaBoostRegressor as a multi-task learner.
 14    
 15    Parameters:
 16
 17        base_estimator: Base learner to use for the booster. Default is ExtraTreeRegressor.
 18
 19        n_estimators: Number of boosting stages to perform.
 20
 21        learning_rate: Learning rate shrinks the contribution of each estimator.
 22
 23        n_hidden_features: Number of hidden features to use for the base learner.
 24
 25        direct_link: Whether to use direct link for the base learner or not.
 26
 27        weights_distribution: Distribution of the weights for the booster (uniform or normal).
 28
 29        dropout: Dropout rate.
 30
 31        tolerance: Tolerance for early stopping.
 32
 33        random_state: Random state.
 34        
 35    Attributes:
 36
 37        classes_: The classes labels.
 38
 39        n_classes_: The number of classes.
 40
 41        boosters_: List of AdaBoostRegressor instances, one per class.
 42    """
 43    
 44    def __init__(
 45        self,
 46        base_estimator: Optional[BaseEstimator] = None,
 47        n_estimators: int = 100,
 48        learning_rate: float = 0.1,
 49        n_hidden_features: int = 5,
 50        direct_link: bool = True,
 51        weights_distribution: str = "uniform",
 52        dropout: float = 0.0,
 53        tolerance: float = 1e-4,
 54        random_state: Optional[int] = None
 55    ):
 56        self.base_estimator = base_estimator
 57        self.n_estimators = n_estimators
 58        self.learning_rate = learning_rate
 59        self.n_hidden_features = n_hidden_features
 60        self.direct_link = direct_link
 61        self.weights_distribution = weights_distribution
 62        self.dropout = dropout
 63        self.tolerance = tolerance
 64        self.random_state = random_state
 65        self.boosters_ = []
 66
 67    def fit(self, X, y) -> "AdaBoostClassifier":
 68        """Fit the AdaBoost classifier.
 69        
 70        Parameters:
 71
 72            X: Input data.
 73
 74            y: Target data.
 75            
 76        Returns:
 77        
 78            self: The fitted boosting model.
 79        """
 80        # Get unique classes and one-hot encode
 81        self.classes_ = np.unique(y)
 82        self.n_classes_ = len(self.classes_)
 83        Y = one_hot_encode2(y, self.n_classes_)
 84        
 85        # Use ExtraTreeRegressor as default base estimator if none provided
 86        if self.base_estimator is None:
 87            self.base_estimator_ = ExtraTreeRegressor(
 88                random_state=self.random_state
 89            )
 90        else:
 91            self.base_estimator_ = self.base_estimator
 92        
 93        # Train one booster per class
 94        for i in range(self.n_classes_):
 95            booster = AdaBoostRegressor(
 96                base_estimator=self.base_estimator_,
 97                n_estimators=self.n_estimators,
 98                learning_rate=self.learning_rate,
 99                n_hidden_features=self.n_hidden_features,
100                direct_link=self.direct_link,
101                weights_distribution=self.weights_distribution,
102                dropout=self.dropout,
103                tolerance=self.tolerance,
104                random_state=None if self.random_state is None 
105                    else self.random_state + i
106            )
107            
108            # Convert X and y to the right format
109            X_arr = np.asarray(X.values if hasattr(X, 'values') else X, dtype=np.float64)
110            y_arr = np.asarray(Y[:, i], dtype=np.float64)
111            
112            # Fit the booster
113            booster.fit(X_arr, y_arr)
114            self.boosters_.append(booster)
115        
116        return self 
117
118    def predict(self, X) -> np.ndarray:
119        """Make predictions with the boosting model.
120        
121        Parameters:
122
123            X: Input data.
124            
125        Returns:
126
127            preds: Class predictions.
128        """
129        if isinstance(X, pd.DataFrame):
130            X = X.values       
131        preds_proba = self.predict_proba(X)
132        return np.argmax(preds_proba, axis=0)
133
134    def predict_proba(self, X) -> np.ndarray:
135        """Make probability predictions with the boosting model.
136        
137        Parameters:
138
139            X: Input data.
140            
141        Returns:
142        
143            preds: Probability predictions.
144        """
145        if isinstance(X, pd.DataFrame):
146            X = X.values
147        raw_preds = np.asarray([booster.predict(X) for booster in self.boosters_])
148        shifted_preds = raw_preds - np.max(raw_preds, axis=0)
149        exp_preds = np.exp(shifted_preds)
150        return exp_preds / np.sum(exp_preds, axis=0)

AdaBoost Classifier using AdaBoostRegressor as a multi-task learner.

Parameters:

base_estimator: Base learner to use for the booster. Default is ExtraTreeRegressor.

n_estimators: Number of boosting stages to perform.

learning_rate: Learning rate shrinks the contribution of each estimator.

n_hidden_features: Number of hidden features to use for the base learner.

direct_link: Whether to use direct link for the base learner or not.

weights_distribution: Distribution of the weights for the booster (uniform or normal).

dropout: Dropout rate.

tolerance: Tolerance for early stopping.

random_state: Random state.

Attributes:

classes_: The classes labels.

n_classes_: The number of classes.

boosters_: List of AdaBoostRegressor instances, one per class.
def fit(self, X, y) -> AdaBoostClassifier:
 67    def fit(self, X, y) -> "AdaBoostClassifier":
 68        """Fit the AdaBoost classifier.
 69        
 70        Parameters:
 71
 72            X: Input data.
 73
 74            y: Target data.
 75            
 76        Returns:
 77        
 78            self: The fitted boosting model.
 79        """
 80        # Get unique classes and one-hot encode
 81        self.classes_ = np.unique(y)
 82        self.n_classes_ = len(self.classes_)
 83        Y = one_hot_encode2(y, self.n_classes_)
 84        
 85        # Use ExtraTreeRegressor as default base estimator if none provided
 86        if self.base_estimator is None:
 87            self.base_estimator_ = ExtraTreeRegressor(
 88                random_state=self.random_state
 89            )
 90        else:
 91            self.base_estimator_ = self.base_estimator
 92        
 93        # Train one booster per class
 94        for i in range(self.n_classes_):
 95            booster = AdaBoostRegressor(
 96                base_estimator=self.base_estimator_,
 97                n_estimators=self.n_estimators,
 98                learning_rate=self.learning_rate,
 99                n_hidden_features=self.n_hidden_features,
100                direct_link=self.direct_link,
101                weights_distribution=self.weights_distribution,
102                dropout=self.dropout,
103                tolerance=self.tolerance,
104                random_state=None if self.random_state is None 
105                    else self.random_state + i
106            )
107            
108            # Convert X and y to the right format
109            X_arr = np.asarray(X.values if hasattr(X, 'values') else X, dtype=np.float64)
110            y_arr = np.asarray(Y[:, i], dtype=np.float64)
111            
112            # Fit the booster
113            booster.fit(X_arr, y_arr)
114            self.boosters_.append(booster)
115        
116        return self 

Fit the AdaBoost classifier.

Parameters:

X: Input data.

y: Target data.

Returns:

self: The fitted boosting model.
def predict(self, X) -> numpy.ndarray:
118    def predict(self, X) -> np.ndarray:
119        """Make predictions with the boosting model.
120        
121        Parameters:
122
123            X: Input data.
124            
125        Returns:
126
127            preds: Class predictions.
128        """
129        if isinstance(X, pd.DataFrame):
130            X = X.values       
131        preds_proba = self.predict_proba(X)
132        return np.argmax(preds_proba, axis=0)

Make predictions with the boosting model.

Parameters:

X: Input data.

Returns:

preds: Class predictions.
def predict_proba(self, X) -> numpy.ndarray:
134    def predict_proba(self, X) -> np.ndarray:
135        """Make probability predictions with the boosting model.
136        
137        Parameters:
138
139            X: Input data.
140            
141        Returns:
142        
143            preds: Probability predictions.
144        """
145        if isinstance(X, pd.DataFrame):
146            X = X.values
147        raw_preds = np.asarray([booster.predict(X) for booster in self.boosters_])
148        shifted_preds = raw_preds - np.max(raw_preds, axis=0)
149        exp_preds = np.exp(shifted_preds)
150        return exp_preds / np.sum(exp_preds, axis=0)

Make probability predictions with the boosting model.

Parameters:

X: Input data.

Returns:

preds: Probability predictions.
class AdaBoostRegressor(sklearn.base.BaseEstimator, sklearn.base.RegressorMixin):
 16class AdaBoostRegressor(BaseEstimator, RegressorMixin):
 17    """AdaBoost Regressor with neural network-like feature transformation.
 18    
 19    Parameters:
 20
 21        base_estimator: Base learner to use for the booster.
 22
 23        n_estimators: Number of boosting stages to perform.
 24
 25        learning_rate: Learning rate shrinks the contribution of each estimator.
 26
 27        n_hidden_features: Number of hidden features to use for the base learner.
 28
 29        direct_link: Whether to use direct link for the base learner or not.
 30
 31        weights_distribution: Distribution of the weights for the booster (uniform or normal).
 32
 33        dropout: Dropout rate.
 34
 35        tolerance: Tolerance for early stopping.
 36
 37        random_state: Random state.
 38        
 39    Attributes:
 40
 41        base_estimator_: The base learner.
 42
 43        booster_: The boosting model.
 44
 45        scaler_: StandardScaler for feature scaling.
 46    """
 47    
 48    def __init__(
 49        self,
 50        base_estimator: Optional[BaseEstimator] = None,
 51        n_estimators: int = 100,
 52        learning_rate: float = 0.1,
 53        n_hidden_features: int = 5,
 54        direct_link: bool = True,
 55        weights_distribution: str = "uniform",
 56        dropout: float = 0.0,
 57        tolerance: float = 1e-4,
 58        random_state: Optional[int] = None
 59    ):
 60        self.base_estimator = base_estimator
 61        self.n_estimators = n_estimators
 62        self.learning_rate = learning_rate
 63        self.n_hidden_features = n_hidden_features
 64        self.direct_link = direct_link
 65        self.weights_distribution = weights_distribution
 66        self.dropout = dropout
 67        self.tolerance = tolerance
 68        self.random_state = random_state
 69        self.scaler_ = StandardScaler()
 70
 71    def fit(self, X, y) -> "AdaBoostRegressor":
 72        """Fit the AdaBoost regressor.
 73        
 74        Parameters:
 75
 76            X: Input data
 77            
 78            y: Target values
 79        """
 80        # Convert inputs to numpy arrays
 81        X_arr = np.asarray(X.values if hasattr(X, 'values') else X, dtype=np.float64)
 82        y_arr = np.asarray(y, dtype=np.float64)
 83        
 84        # Fit and transform with StandardScaler
 85        self.scaler_ = StandardScaler()
 86        X_scaled = self.scaler_.fit_transform(X_arr)
 87        
 88        # Initialize base estimator if None
 89        if self.base_estimator is None:
 90            self.base_estimator_ = ExtraTreeRegressor(random_state=self.random_state)
 91        else:
 92            self.base_estimator_ = self.base_estimator
 93        
 94        # Create and fit the booster
 95        self.booster_ = _AdaBoostRegressor(
 96            base_estimator=self.base_estimator_,
 97            n_estimators=self.n_estimators,
 98            learning_rate=self.learning_rate,
 99            n_hidden_features=self.n_hidden_features,
100            direct_link=self.direct_link,
101            weights_distribution=self.weights_distribution,
102            dropout=self.dropout,
103            tolerance=self.tolerance,
104            random_state=self.random_state
105        )
106        
107        self.booster_.fit(X_scaled, y_arr)
108        return self
109
110    def predict(self, X) -> np.ndarray:
111        """Make predictions with the AdaBoost model.
112        
113        Parameters:
114
115            X: Input data.
116            
117        Returns:
118
119            predictions: Model predictions.
120        """
121        if isinstance(X, pd.DataFrame):
122            X = X.values
123        X = np.array(X, dtype=np.float64, copy=True, order='C')
124        scaled_X = self.scaler_.transform(X)
125        return self.booster_.predict(scaled_X)

AdaBoost Regressor with neural network-like feature transformation.

Parameters:

base_estimator: Base learner to use for the booster.

n_estimators: Number of boosting stages to perform.

learning_rate: Learning rate shrinks the contribution of each estimator.

n_hidden_features: Number of hidden features to use for the base learner.

direct_link: Whether to use direct link for the base learner or not.

weights_distribution: Distribution of the weights for the booster (uniform or normal).

dropout: Dropout rate.

tolerance: Tolerance for early stopping.

random_state: Random state.

Attributes:

base_estimator_: The base learner.

booster_: The boosting model.

scaler_: StandardScaler for feature scaling.
def fit(self, X, y) -> AdaBoostRegressor:
 71    def fit(self, X, y) -> "AdaBoostRegressor":
 72        """Fit the AdaBoost regressor.
 73        
 74        Parameters:
 75
 76            X: Input data
 77            
 78            y: Target values
 79        """
 80        # Convert inputs to numpy arrays
 81        X_arr = np.asarray(X.values if hasattr(X, 'values') else X, dtype=np.float64)
 82        y_arr = np.asarray(y, dtype=np.float64)
 83        
 84        # Fit and transform with StandardScaler
 85        self.scaler_ = StandardScaler()
 86        X_scaled = self.scaler_.fit_transform(X_arr)
 87        
 88        # Initialize base estimator if None
 89        if self.base_estimator is None:
 90            self.base_estimator_ = ExtraTreeRegressor(random_state=self.random_state)
 91        else:
 92            self.base_estimator_ = self.base_estimator
 93        
 94        # Create and fit the booster
 95        self.booster_ = _AdaBoostRegressor(
 96            base_estimator=self.base_estimator_,
 97            n_estimators=self.n_estimators,
 98            learning_rate=self.learning_rate,
 99            n_hidden_features=self.n_hidden_features,
100            direct_link=self.direct_link,
101            weights_distribution=self.weights_distribution,
102            dropout=self.dropout,
103            tolerance=self.tolerance,
104            random_state=self.random_state
105        )
106        
107        self.booster_.fit(X_scaled, y_arr)
108        return self

Fit the AdaBoost regressor.

Parameters:

X: Input data

y: Target values
def predict(self, X) -> numpy.ndarray:
110    def predict(self, X) -> np.ndarray:
111        """Make predictions with the AdaBoost model.
112        
113        Parameters:
114
115            X: Input data.
116            
117        Returns:
118
119            predictions: Model predictions.
120        """
121        if isinstance(X, pd.DataFrame):
122            X = X.values
123        X = np.array(X, dtype=np.float64, copy=True, order='C')
124        scaled_X = self.scaler_.transform(X)
125        return self.booster_.predict(scaled_X)

Make predictions with the AdaBoost model.

Parameters:

X: Input data.

Returns:

predictions: Model predictions.