rvfl.base

View Source
  1import numpy as np
  2from sklearn.preprocessing import StandardScaler
  3from sklearn.base import BaseEstimator
  4
  5
  6class _RVFLBase(BaseEstimator):
  7    """
  8    Base class for Random Vector Functional Link (RVFL) networks.
  9
 10    RVFL networks are single-hidden-layer feedforward networks where the
 11    hidden-layer weights are drawn randomly and kept fixed. Only the output
 12    weights are trained, via ridge regression in closed form. This makes
 13    training extremely fast compared to gradient-based alternatives.
 14
 15    The augmented-system formulation used here is numerically equivalent to
 16    the normal equations with L2 regularisation but avoids forming ``H.T @ H``
 17    explicitly, which improves numerical stability for wide feature matrices.
 18
 19    Parameters
 20    ----------
 21    n_nodes : int, default=100
 22        Number of randomly initialised hidden neurons.
 23    alpha : float, default=1e-3
 24        Ridge (L2) regularisation strength. Larger values shrink the output
 25        weights more aggressively and reduce variance at the cost of bias.
 26    direct_link : bool, default=True
 27        If ``True``, the original (scaled) inputs are concatenated to the
 28        hidden-layer activations before solving for the output weights.
 29        Direct links often improve accuracy at negligible cost.
 30    activation : {'tanh', 'relu', 'sigmoid'}, default='tanh'
 31        Element-wise activation function applied to the hidden layer.
 32    scale : float, default=1.0
 33        Standard deviation of the zero-mean Gaussian used to sample the
 34        random hidden weights ``W_`` and biases ``b_``.
 35    random_state : int, default=42
 36        Seed passed to ``numpy.random.RandomState`` for reproducibility.
 37
 38    Attributes
 39    ----------
 40    W_ : ndarray of shape (n_features, n_nodes)
 41        Random input-to-hidden weight matrix, fixed after ``fit``.
 42    b_ : ndarray of shape (n_nodes,)
 43        Random hidden biases, fixed after ``fit``.
 44    beta_ : ndarray of shape (n_nodes [+ n_features], n_outputs)
 45        Learned output weights.  The leading dimension is
 46        ``n_nodes + n_features`` when ``direct_link=True``.
 47    scaler_ : StandardScaler
 48        Fitted scaler used to standardise inputs.
 49    """
 50
 51    def __init__(
 52        self,
 53        n_nodes=100,
 54        alpha=1e-3,
 55        direct_link=True,
 56        activation="tanh",
 57        scale=1.0,
 58        random_state=42,
 59    ):
 60        self.n_nodes = n_nodes
 61        self.alpha = alpha
 62        self.direct_link = direct_link
 63        self.activation = activation
 64        self.scale = scale
 65        self.random_state = random_state
 66
 67    def _activate(self, Z):
 68        """
 69        Apply the chosen element-wise activation function.
 70
 71        Parameters
 72        ----------
 73        Z : ndarray of shape (n_samples, n_nodes)
 74            Pre-activation matrix.
 75
 76        Returns
 77        -------
 78        ndarray of shape (n_samples, n_nodes)
 79            Post-activation matrix.
 80        """
 81        if self.activation == "relu":
 82            return np.maximum(0, Z)
 83        if self.activation == "sigmoid":
 84            return 1 / (1 + np.exp(-Z))
 85        return np.tanh(Z)
 86
 87    def _hidden(self, X):
 88        """
 89        Compute the (extended) hidden representation of ``X``.
 90
 91        Applies the random projection, adds the bias, activates, and
 92        optionally appends the raw inputs as direct links.
 93
 94        Parameters
 95        ----------
 96        X : ndarray of shape (n_samples, n_features)
 97            Standardised input matrix.
 98
 99        Returns
100        -------
101        H : ndarray of shape (n_samples, n_nodes [+ n_features])
102            Hidden (+ direct-link) feature matrix.
103        """
104        H = self._activate(X @ self.W_ + self.b_)
105        return np.hstack([H, X]) if self.direct_link else H
106
107    def _init_random(self, n_in):
108        """
109        Draw and store the random hidden weights and biases.
110
111        Parameters
112        ----------
113        n_in : int
114            Number of input features (i.e. columns of ``W_``).
115        """
116        rng = np.random.RandomState(self.random_state)
117        self.W_ = rng.normal(0, self.scale, (n_in, self.n_nodes))
118        self.b_ = rng.normal(0, self.scale, (self.n_nodes,))
119
120    def _fit_hidden(self, X, Y):
121        """
122        Standardise inputs, build the hidden representation, and solve
123        for the output weights via ridge regression.
124
125        The ridge problem is cast as an ordinary least-squares problem on an
126        augmented system::
127
128            [H          ] beta = [Y         ]
129            [sqrt(alpha)*I]       [0         ]
130
131        so that ``numpy.linalg.lstsq`` can be used directly without forming
132        the normal equations.
133
134        Parameters
135        ----------
136        X : array-like of shape (n_samples, n_features)
137            Raw training inputs.
138        Y : ndarray of shape (n_samples, n_outputs)
139            Target matrix (already one-hot encoded for classifiers).
140        """
141        self.scaler_ = StandardScaler()
142        X = self.scaler_.fit_transform(X)
143
144        self._init_random(X.shape[1])
145        H = self._hidden(X)
146
147        p = H.shape[1]
148        H_aug = np.vstack([H, np.sqrt(self.alpha) * np.eye(p)])
149        Y_aug = np.vstack([Y, np.zeros((p, Y.shape[1]))])
150
151        self.beta_ = np.linalg.lstsq(H_aug, Y_aug, rcond=None)[0]
152
153    def _predict_raw(self, X):
154        """
155        Compute raw (pre-activation) model outputs for new inputs.
156
157        Parameters
158        ----------
159        X : array-like of shape (n_samples, n_features)
160            Raw input matrix.
161
162        Returns
163        -------
164        ndarray of shape (n_samples, n_outputs)
165            Linear combination of the hidden (+ direct-link) features and
166            the learned output weights.
167        """
168        return self._hidden(self.scaler_.transform(X)) @ self.beta_