1import numpy as np
2from sklearn.preprocessing import StandardScaler
3from sklearn.base import BaseEstimator
4
5
6class _RVFLBase(BaseEstimator):
7 """
8 Base class for Random Vector Functional Link (RVFL) networks.
9
10 RVFL networks are single-hidden-layer feedforward networks where the
11 hidden-layer weights are drawn randomly and kept fixed. Only the output
12 weights are trained, via ridge regression in closed form. This makes
13 training extremely fast compared to gradient-based alternatives.
14
15 The augmented-system formulation used here is numerically equivalent to
16 the normal equations with L2 regularisation but avoids forming ``H.T @ H``
17 explicitly, which improves numerical stability for wide feature matrices.
18
19 Parameters
20 ----------
21 n_nodes : int, default=100
22 Number of randomly initialised hidden neurons.
23 alpha : float, default=1e-3
24 Ridge (L2) regularisation strength. Larger values shrink the output
25 weights more aggressively and reduce variance at the cost of bias.
26 direct_link : bool, default=True
27 If ``True``, the original (scaled) inputs are concatenated to the
28 hidden-layer activations before solving for the output weights.
29 Direct links often improve accuracy at negligible cost.
30 activation : {'tanh', 'relu', 'sigmoid'}, default='tanh'
31 Element-wise activation function applied to the hidden layer.
32 scale : float, default=1.0
33 Standard deviation of the zero-mean Gaussian used to sample the
34 random hidden weights ``W_`` and biases ``b_``.
35 random_state : int, default=42
36 Seed passed to ``numpy.random.RandomState`` for reproducibility.
37
38 Attributes
39 ----------
40 W_ : ndarray of shape (n_features, n_nodes)
41 Random input-to-hidden weight matrix, fixed after ``fit``.
42 b_ : ndarray of shape (n_nodes,)
43 Random hidden biases, fixed after ``fit``.
44 beta_ : ndarray of shape (n_nodes [+ n_features], n_outputs)
45 Learned output weights. The leading dimension is
46 ``n_nodes + n_features`` when ``direct_link=True``.
47 scaler_ : StandardScaler
48 Fitted scaler used to standardise inputs.
49 """
50
51 def __init__(
52 self,
53 n_nodes=100,
54 alpha=1e-3,
55 direct_link=True,
56 activation="tanh",
57 scale=1.0,
58 random_state=42,
59 ):
60 self.n_nodes = n_nodes
61 self.alpha = alpha
62 self.direct_link = direct_link
63 self.activation = activation
64 self.scale = scale
65 self.random_state = random_state
66
67 def _activate(self, Z):
68 """
69 Apply the chosen element-wise activation function.
70
71 Parameters
72 ----------
73 Z : ndarray of shape (n_samples, n_nodes)
74 Pre-activation matrix.
75
76 Returns
77 -------
78 ndarray of shape (n_samples, n_nodes)
79 Post-activation matrix.
80 """
81 if self.activation == "relu":
82 return np.maximum(0, Z)
83 if self.activation == "sigmoid":
84 return 1 / (1 + np.exp(-Z))
85 return np.tanh(Z)
86
87 def _hidden(self, X):
88 """
89 Compute the (extended) hidden representation of ``X``.
90
91 Applies the random projection, adds the bias, activates, and
92 optionally appends the raw inputs as direct links.
93
94 Parameters
95 ----------
96 X : ndarray of shape (n_samples, n_features)
97 Standardised input matrix.
98
99 Returns
100 -------
101 H : ndarray of shape (n_samples, n_nodes [+ n_features])
102 Hidden (+ direct-link) feature matrix.
103 """
104 H = self._activate(X @ self.W_ + self.b_)
105 return np.hstack([H, X]) if self.direct_link else H
106
107 def _init_random(self, n_in):
108 """
109 Draw and store the random hidden weights and biases.
110
111 Parameters
112 ----------
113 n_in : int
114 Number of input features (i.e. columns of ``W_``).
115 """
116 rng = np.random.RandomState(self.random_state)
117 self.W_ = rng.normal(0, self.scale, (n_in, self.n_nodes))
118 self.b_ = rng.normal(0, self.scale, (self.n_nodes,))
119
120 def _fit_hidden(self, X, Y):
121 """
122 Standardise inputs, build the hidden representation, and solve
123 for the output weights via ridge regression.
124
125 The ridge problem is cast as an ordinary least-squares problem on an
126 augmented system::
127
128 [H ] beta = [Y ]
129 [sqrt(alpha)*I] [0 ]
130
131 so that ``numpy.linalg.lstsq`` can be used directly without forming
132 the normal equations.
133
134 Parameters
135 ----------
136 X : array-like of shape (n_samples, n_features)
137 Raw training inputs.
138 Y : ndarray of shape (n_samples, n_outputs)
139 Target matrix (already one-hot encoded for classifiers).
140 """
141 self.scaler_ = StandardScaler()
142 X = self.scaler_.fit_transform(X)
143
144 self._init_random(X.shape[1])
145 H = self._hidden(X)
146
147 p = H.shape[1]
148 H_aug = np.vstack([H, np.sqrt(self.alpha) * np.eye(p)])
149 Y_aug = np.vstack([Y, np.zeros((p, Y.shape[1]))])
150
151 self.beta_ = np.linalg.lstsq(H_aug, Y_aug, rcond=None)[0]
152
153 def _predict_raw(self, X):
154 """
155 Compute raw (pre-activation) model outputs for new inputs.
156
157 Parameters
158 ----------
159 X : array-like of shape (n_samples, n_features)
160 Raw input matrix.
161
162 Returns
163 -------
164 ndarray of shape (n_samples, n_outputs)
165 Linear combination of the hidden (+ direct-link) features and
166 the learned output weights.
167 """
168 return self._hidden(self.scaler_.transform(X)) @ self.beta_