GPopt.BOstopping

  1import numpy as np
  2from scipy.stats import mannwhitneyu, norm
  3from scipy.stats import wasserstein_distance
  4from sklearn.gaussian_process import GaussianProcessRegressor
  5from sklearn.gaussian_process.kernels import Matern
  6from copy import deepcopy
  7from tqdm import tqdm
  8import xgboost as xgb
  9from sklearn.datasets import make_classification
 10from sklearn.model_selection import (
 11    train_test_split,
 12    cross_validate,
 13    StratifiedKFold,
 14)
 15from sklearn.metrics import accuracy_score, log_loss
 16from sklearn.preprocessing import StandardScaler
 17import matplotlib.pyplot as plt
 18
 19
 20# BayesianOptimization class with robust early stopping
 21class BOstopping:
 22    """Bayesian Optimization with robust early stopping criteria."""
 23
 24    def __init__(
 25        self,
 26        f,
 27        bounds,
 28        n_init=5,
 29        kappa=1.96,
 30        early_stopping=True,
 31        stop_patience=20,
 32        stop_threshold=0.01,
 33        n_test_points=100,
 34        alpha=1e-6,
 35        n_restarts_optimizer=25,
 36        seed=123,
 37    ):
 38        self.f = f
 39        self.bounds = np.array(bounds)
 40        self.n_init = n_init
 41        self.kappa = kappa
 42        self.early_stopping = early_stopping
 43        self.stop_patience = stop_patience
 44        self.stop_threshold = stop_threshold
 45        self.n_test_points = n_test_points
 46        self.alpha = alpha
 47        self.n_restarts_optimizer = n_restarts_optimizer
 48        self.seed = seed
 49
 50        np.random.seed(self.seed)
 51        self.test_points = self._sample_random(n_test_points)
 52
 53        # History tracking
 54        self.wasserstein_history = []
 55        self.X = []
 56        self.y = []
 57        self.best_values = []
 58        self.acquisition_values = []
 59        self.gp_variance = []
 60        self.phase = []
 61
 62        # GP setup
 63        self.gp = GaussianProcessRegressor(
 64            kernel=Matern(nu=2.5),
 65            alpha=self.alpha,
 66            normalize_y=True,
 67            n_restarts_optimizer=self.n_restarts_optimizer,
 68            random_state=self.seed,
 69        )
 70
 71    def _sample_random(self, n_samples):
 72        """Uniform sampling within bounds."""
 73        return np.random.uniform(
 74            self.bounds[:, 0],
 75            self.bounds[:, 1],
 76            size=(n_samples, len(self.bounds)),
 77        )
 78
 79    def _acquisition(self, X_candidate):
 80        """Expected Improvement acquisition function."""
 81        mu, sigma = self.gp.predict(X_candidate, return_std=True)
 82        mu_sample = np.min(self.y)  # Use actual observed minimum
 83
 84        sigma = np.maximum(sigma, 1e-6)
 85        gamma = (mu_sample - mu) / sigma
 86        ei = (mu_sample - mu) * norm.cdf(gamma) + sigma * norm.pdf(gamma)
 87        return ei
 88
 89    def _get_posterior_samples(self, gp, n_samples=100):
 90        """Sample from GP posterior at test points."""
 91        mu, sigma = gp.predict(self.test_points, return_std=True)
 92        return np.random.normal(
 93            mu, sigma, size=(n_samples, len(self.test_points))
 94        )
 95
 96    def _compute_wasserstein(self, gp_prev, gp_current):
 97        """Compute approximate Wasserstein distance between posteriors."""
 98        mu_prev, std_prev = gp_prev.predict(self.test_points, return_std=True)
 99        mu_curr, std_curr = gp_current.predict(
100            self.test_points, return_std=True
101        )
102
103        # 2-Wasserstein distance for independent 1D Gaussians, averaged
104        w2_per_point = (mu_prev - mu_curr) ** 2 + (std_prev - std_curr) ** 2
105        return np.sqrt(np.mean(w2_per_point))
106
107    def _should_stop(self, gp_prev):
108        """Early stopping based on improvement and posterior stability."""
109        if len(self.best_values) < self.stop_patience + 1:
110            return False
111
112        # 1. Improvement check
113        recent_improvements = np.diff(self.best_values[-self.stop_patience :])
114        improvement_stop = np.all(
115            np.abs(recent_improvements) < self.stop_threshold
116        )
117
118        # 2. Posterior stability
119        current_w = self._compute_wasserstein(gp_prev, self.gp)
120        self.wasserstein_history.append(current_w)
121
122        if len(self.wasserstein_history) >= 2 * self.stop_patience:
123            recent_w = self.wasserstein_history[-self.stop_patience :]
124            older_w = self.wasserstein_history[
125                -2 * self.stop_patience : -self.stop_patience
126            ]
127            _, p_value = mannwhitneyu(recent_w, older_w, alternative="greater")
128            mwu_stable = p_value > 0.1
129            var_stable = np.var(recent_w) < 1e-6
130            posterior_stable = mwu_stable or var_stable
131        else:
132            posterior_stable = False
133
134        return improvement_stop or posterior_stable
135
136    def optimize(self, n_iter=100):
137        """Run Bayesian optimization loop."""
138        print("Starting Initial Design Phase...")
139        self.X = self._sample_random(self.n_init)
140        self.y = []
141
142        for i, x in enumerate(self.X):
143            y_val = self.f(x)
144            self.y.append(y_val)
145            current_best = np.min(self.y)
146            self.best_values.append(current_best)
147            self.acquisition_values.append(0)
148            self.gp_variance.append(0)
149            self.phase.append("initial")
150            print(
151                f"  Initial sample {i+1}/{self.n_init}: f(x) = {y_val:.6f}, best = {current_best:.6f}"
152            )
153
154        print(f"Initial Design Complete. Best value: {np.min(self.y):.6f}")
155        print("\nStarting Bayesian Optimization Phase...")
156
157        gp_prev = None
158        for i in tqdm(range(n_iter), desc="Bayesian Optimization"):
159            if i > 0:
160                gp_prev = deepcopy(self.gp)
161
162            self.gp.fit(self.X, self.y)
163
164            X_candidate = self._sample_random(1000)
165            acq = self._acquisition(X_candidate)
166            best_acq_idx = np.argmax(acq)
167            x_next = X_candidate[best_acq_idx]
168            max_acq_value = acq[best_acq_idx]
169
170            _, gp_std = self.gp.predict([x_next], return_std=True)
171
172            y_next = self.f(x_next)
173            self.X = np.vstack((self.X, x_next))
174            self.y.append(y_next)
175            current_best = np.min(self.y)
176            self.best_values.append(current_best)
177            self.acquisition_values.append(max_acq_value)
178            self.gp_variance.append(gp_std[0])
179            self.phase.append("bayesian")
180
181            print(
182                f"  Iteration {i+1}: f(x) = {y_next:.6f}, best = {current_best:.6f}, "
183                f"EI = {max_acq_value:.6f}, σ = {gp_std[0]:.6f}"
184            )
185
186            if gp_prev is not None:
187                w_dist = self._compute_wasserstein(gp_prev, self.gp)
188                self.wasserstein_history.append(w_dist)
189                print(f"    Wasserstein distance: {w_dist:.8f}")
190
191            if self.early_stopping and i > self.n_init and gp_prev is not None:
192                if self._should_stop(gp_prev):
193                    print(f"Early stopping at iteration {i+1}")
194                    break
195
196        best_idx = np.argmin(self.y)
197        return self.X[best_idx], self.y[best_idx]
198
199
200def plot_optimization_history(optimizer, title):
201    """Plot optimization convergence and diagnostics."""
202    fig, axes = plt.subplots(2, 2, figsize=(15, 10))
203    phase_colors = [
204        "red" if p == "initial" else "blue" for p in optimizer.phase
205    ]
206    iterations = range(len(optimizer.best_values))
207
208    # Plot 1: Convergence
209    axes[0, 0].scatter(
210        iterations, optimizer.best_values, c=phase_colors, alpha=0.7, s=30
211    )
212    axes[0, 0].plot(optimizer.best_values, "k-", alpha=0.3, linewidth=1)
213    axes[0, 0].set_xlabel("Iteration")
214    axes[0, 0].set_ylabel("Best Objective Value")
215    axes[0, 0].set_title(f"{title} - Convergence (Red=Initial, Blue=Bayesian)")
216    axes[0, 0].grid(True, alpha=0.3)
217
218    n_initial = sum(1 for p in optimizer.phase if p == "initial")
219    axes[0, 0].axvline(
220        x=n_initial - 0.5,
221        color="orange",
222        linestyle="--",
223        alpha=0.8,
224        linewidth=2,
225        label="Phase Boundary",
226    )
227    axes[0, 0].legend()
228
229    # Plot 2: Acquisition
230    bayesian_iters = [
231        i for i, p in enumerate(optimizer.phase) if p == "bayesian"
232    ]
233    bayesian_acq = [optimizer.acquisition_values[i] for i in bayesian_iters]
234    if bayesian_acq:
235        axes[0, 1].plot(
236            bayesian_iters,
237            bayesian_acq,
238            "g-",
239            linewidth=2,
240            marker="o",
241            markersize=4,
242        )
243        axes[0, 1].set_xlabel("Iteration")
244        axes[0, 1].set_ylabel("Expected Improvement")
245        axes[0, 1].set_title(f"{title} - Acquisition Function")
246        axes[0, 1].grid(True, alpha=0.3)
247    else:
248        axes[0, 1].text(
249            0.5,
250            0.5,
251            "No Bayesian iterations",
252            ha="center",
253            va="center",
254            transform=axes[0, 1].transAxes,
255        )
256
257    # Plot 3: GP Uncertainty
258    if bayesian_acq:
259        bayesian_var = [optimizer.gp_variance[i] for i in bayesian_iters]
260        axes[1, 0].plot(
261            bayesian_iters,
262            bayesian_var,
263            "purple",
264            linewidth=2,
265            marker="s",
266            markersize=4,
267        )
268        axes[1, 0].set_xlabel("Iteration")
269        axes[1, 0].set_ylabel("GP Standard Deviation")
270        axes[1, 0].set_title(f"{title} - GP Uncertainty")
271        axes[1, 0].grid(True, alpha=0.3)
272    else:
273        axes[1, 0].text(
274            0.5,
275            0.5,
276            "No Bayesian iterations",
277            ha="center",
278            va="center",
279            transform=axes[1, 0].transAxes,
280        )
281
282    # Plot 4: Posterior Stability
283    if optimizer.wasserstein_history:
284        w_start_iter = n_initial + 1
285        w_iterations = range(
286            w_start_iter, w_start_iter + len(optimizer.wasserstein_history)
287        )
288        axes[1, 1].plot(
289            w_iterations,
290            optimizer.wasserstein_history,
291            "r-",
292            linewidth=2,
293            marker="d",
294            markersize=4,
295        )
296        axes[1, 1].set_xlabel("Iteration")
297        axes[1, 1].set_ylabel("Wasserstein Distance")
298        axes[1, 1].set_title(f"{title} - Posterior Stability")
299        axes[1, 1].grid(True, alpha=0.3)
300        axes[1, 1].set_yscale("log")
301    else:
302        axes[1, 1].text(
303            0.5,
304            0.5,
305            "No Wasserstein history\n(Need ≥2 Bayesian iterations)",
306            ha="center",
307            va="center",
308            transform=axes[1, 1].transAxes,
309        )
310
311    plt.tight_layout()
312    plt.show()
313
314    print(f"\n{title} Optimization Summary:")
315    print("=" * 50)
316    print(f"Total iterations: {len(optimizer.best_values)}")
317    print(f"Initial design: {n_initial} samples")
318    print(
319        f"Bayesian optimization: {len(optimizer.best_values) - n_initial} iterations"
320    )
321    print(f"Initial best: {optimizer.best_values[n_initial-1]:.6f}")
322    print(f"Final best: {min(optimizer.best_values):.6f}")
323    print(
324        f"Improvement: {optimizer.best_values[n_initial-1] - min(optimizer.best_values):.6f}"
325    )
326    if optimizer.wasserstein_history:
327        print(
328            f"Avg Wasserstein distance: {np.mean(optimizer.wasserstein_history):.8f}"
329        )
330        print(
331            f"Final Wasserstein distance: {optimizer.wasserstein_history[-1]:.8f}"
332        )
333    if bayesian_acq:
334        print(f"Avg Expected Improvement: {np.mean(bayesian_acq):.6f}")
335        print(f"Final Expected Improvement: {bayesian_acq[-1]:.6f}")
class BOstopping:
 22class BOstopping:
 23    """Bayesian Optimization with robust early stopping criteria."""
 24
 25    def __init__(
 26        self,
 27        f,
 28        bounds,
 29        n_init=5,
 30        kappa=1.96,
 31        early_stopping=True,
 32        stop_patience=20,
 33        stop_threshold=0.01,
 34        n_test_points=100,
 35        alpha=1e-6,
 36        n_restarts_optimizer=25,
 37        seed=123,
 38    ):
 39        self.f = f
 40        self.bounds = np.array(bounds)
 41        self.n_init = n_init
 42        self.kappa = kappa
 43        self.early_stopping = early_stopping
 44        self.stop_patience = stop_patience
 45        self.stop_threshold = stop_threshold
 46        self.n_test_points = n_test_points
 47        self.alpha = alpha
 48        self.n_restarts_optimizer = n_restarts_optimizer
 49        self.seed = seed
 50
 51        np.random.seed(self.seed)
 52        self.test_points = self._sample_random(n_test_points)
 53
 54        # History tracking
 55        self.wasserstein_history = []
 56        self.X = []
 57        self.y = []
 58        self.best_values = []
 59        self.acquisition_values = []
 60        self.gp_variance = []
 61        self.phase = []
 62
 63        # GP setup
 64        self.gp = GaussianProcessRegressor(
 65            kernel=Matern(nu=2.5),
 66            alpha=self.alpha,
 67            normalize_y=True,
 68            n_restarts_optimizer=self.n_restarts_optimizer,
 69            random_state=self.seed,
 70        )
 71
 72    def _sample_random(self, n_samples):
 73        """Uniform sampling within bounds."""
 74        return np.random.uniform(
 75            self.bounds[:, 0],
 76            self.bounds[:, 1],
 77            size=(n_samples, len(self.bounds)),
 78        )
 79
 80    def _acquisition(self, X_candidate):
 81        """Expected Improvement acquisition function."""
 82        mu, sigma = self.gp.predict(X_candidate, return_std=True)
 83        mu_sample = np.min(self.y)  # Use actual observed minimum
 84
 85        sigma = np.maximum(sigma, 1e-6)
 86        gamma = (mu_sample - mu) / sigma
 87        ei = (mu_sample - mu) * norm.cdf(gamma) + sigma * norm.pdf(gamma)
 88        return ei
 89
 90    def _get_posterior_samples(self, gp, n_samples=100):
 91        """Sample from GP posterior at test points."""
 92        mu, sigma = gp.predict(self.test_points, return_std=True)
 93        return np.random.normal(
 94            mu, sigma, size=(n_samples, len(self.test_points))
 95        )
 96
 97    def _compute_wasserstein(self, gp_prev, gp_current):
 98        """Compute approximate Wasserstein distance between posteriors."""
 99        mu_prev, std_prev = gp_prev.predict(self.test_points, return_std=True)
100        mu_curr, std_curr = gp_current.predict(
101            self.test_points, return_std=True
102        )
103
104        # 2-Wasserstein distance for independent 1D Gaussians, averaged
105        w2_per_point = (mu_prev - mu_curr) ** 2 + (std_prev - std_curr) ** 2
106        return np.sqrt(np.mean(w2_per_point))
107
108    def _should_stop(self, gp_prev):
109        """Early stopping based on improvement and posterior stability."""
110        if len(self.best_values) < self.stop_patience + 1:
111            return False
112
113        # 1. Improvement check
114        recent_improvements = np.diff(self.best_values[-self.stop_patience :])
115        improvement_stop = np.all(
116            np.abs(recent_improvements) < self.stop_threshold
117        )
118
119        # 2. Posterior stability
120        current_w = self._compute_wasserstein(gp_prev, self.gp)
121        self.wasserstein_history.append(current_w)
122
123        if len(self.wasserstein_history) >= 2 * self.stop_patience:
124            recent_w = self.wasserstein_history[-self.stop_patience :]
125            older_w = self.wasserstein_history[
126                -2 * self.stop_patience : -self.stop_patience
127            ]
128            _, p_value = mannwhitneyu(recent_w, older_w, alternative="greater")
129            mwu_stable = p_value > 0.1
130            var_stable = np.var(recent_w) < 1e-6
131            posterior_stable = mwu_stable or var_stable
132        else:
133            posterior_stable = False
134
135        return improvement_stop or posterior_stable
136
137    def optimize(self, n_iter=100):
138        """Run Bayesian optimization loop."""
139        print("Starting Initial Design Phase...")
140        self.X = self._sample_random(self.n_init)
141        self.y = []
142
143        for i, x in enumerate(self.X):
144            y_val = self.f(x)
145            self.y.append(y_val)
146            current_best = np.min(self.y)
147            self.best_values.append(current_best)
148            self.acquisition_values.append(0)
149            self.gp_variance.append(0)
150            self.phase.append("initial")
151            print(
152                f"  Initial sample {i+1}/{self.n_init}: f(x) = {y_val:.6f}, best = {current_best:.6f}"
153            )
154
155        print(f"Initial Design Complete. Best value: {np.min(self.y):.6f}")
156        print("\nStarting Bayesian Optimization Phase...")
157
158        gp_prev = None
159        for i in tqdm(range(n_iter), desc="Bayesian Optimization"):
160            if i > 0:
161                gp_prev = deepcopy(self.gp)
162
163            self.gp.fit(self.X, self.y)
164
165            X_candidate = self._sample_random(1000)
166            acq = self._acquisition(X_candidate)
167            best_acq_idx = np.argmax(acq)
168            x_next = X_candidate[best_acq_idx]
169            max_acq_value = acq[best_acq_idx]
170
171            _, gp_std = self.gp.predict([x_next], return_std=True)
172
173            y_next = self.f(x_next)
174            self.X = np.vstack((self.X, x_next))
175            self.y.append(y_next)
176            current_best = np.min(self.y)
177            self.best_values.append(current_best)
178            self.acquisition_values.append(max_acq_value)
179            self.gp_variance.append(gp_std[0])
180            self.phase.append("bayesian")
181
182            print(
183                f"  Iteration {i+1}: f(x) = {y_next:.6f}, best = {current_best:.6f}, "
184                f"EI = {max_acq_value:.6f}, σ = {gp_std[0]:.6f}"
185            )
186
187            if gp_prev is not None:
188                w_dist = self._compute_wasserstein(gp_prev, self.gp)
189                self.wasserstein_history.append(w_dist)
190                print(f"    Wasserstein distance: {w_dist:.8f}")
191
192            if self.early_stopping and i > self.n_init and gp_prev is not None:
193                if self._should_stop(gp_prev):
194                    print(f"Early stopping at iteration {i+1}")
195                    break
196
197        best_idx = np.argmin(self.y)
198        return self.X[best_idx], self.y[best_idx]

Bayesian Optimization with robust early stopping criteria.

def plot_optimization_history(optimizer, title):
201def plot_optimization_history(optimizer, title):
202    """Plot optimization convergence and diagnostics."""
203    fig, axes = plt.subplots(2, 2, figsize=(15, 10))
204    phase_colors = [
205        "red" if p == "initial" else "blue" for p in optimizer.phase
206    ]
207    iterations = range(len(optimizer.best_values))
208
209    # Plot 1: Convergence
210    axes[0, 0].scatter(
211        iterations, optimizer.best_values, c=phase_colors, alpha=0.7, s=30
212    )
213    axes[0, 0].plot(optimizer.best_values, "k-", alpha=0.3, linewidth=1)
214    axes[0, 0].set_xlabel("Iteration")
215    axes[0, 0].set_ylabel("Best Objective Value")
216    axes[0, 0].set_title(f"{title} - Convergence (Red=Initial, Blue=Bayesian)")
217    axes[0, 0].grid(True, alpha=0.3)
218
219    n_initial = sum(1 for p in optimizer.phase if p == "initial")
220    axes[0, 0].axvline(
221        x=n_initial - 0.5,
222        color="orange",
223        linestyle="--",
224        alpha=0.8,
225        linewidth=2,
226        label="Phase Boundary",
227    )
228    axes[0, 0].legend()
229
230    # Plot 2: Acquisition
231    bayesian_iters = [
232        i for i, p in enumerate(optimizer.phase) if p == "bayesian"
233    ]
234    bayesian_acq = [optimizer.acquisition_values[i] for i in bayesian_iters]
235    if bayesian_acq:
236        axes[0, 1].plot(
237            bayesian_iters,
238            bayesian_acq,
239            "g-",
240            linewidth=2,
241            marker="o",
242            markersize=4,
243        )
244        axes[0, 1].set_xlabel("Iteration")
245        axes[0, 1].set_ylabel("Expected Improvement")
246        axes[0, 1].set_title(f"{title} - Acquisition Function")
247        axes[0, 1].grid(True, alpha=0.3)
248    else:
249        axes[0, 1].text(
250            0.5,
251            0.5,
252            "No Bayesian iterations",
253            ha="center",
254            va="center",
255            transform=axes[0, 1].transAxes,
256        )
257
258    # Plot 3: GP Uncertainty
259    if bayesian_acq:
260        bayesian_var = [optimizer.gp_variance[i] for i in bayesian_iters]
261        axes[1, 0].plot(
262            bayesian_iters,
263            bayesian_var,
264            "purple",
265            linewidth=2,
266            marker="s",
267            markersize=4,
268        )
269        axes[1, 0].set_xlabel("Iteration")
270        axes[1, 0].set_ylabel("GP Standard Deviation")
271        axes[1, 0].set_title(f"{title} - GP Uncertainty")
272        axes[1, 0].grid(True, alpha=0.3)
273    else:
274        axes[1, 0].text(
275            0.5,
276            0.5,
277            "No Bayesian iterations",
278            ha="center",
279            va="center",
280            transform=axes[1, 0].transAxes,
281        )
282
283    # Plot 4: Posterior Stability
284    if optimizer.wasserstein_history:
285        w_start_iter = n_initial + 1
286        w_iterations = range(
287            w_start_iter, w_start_iter + len(optimizer.wasserstein_history)
288        )
289        axes[1, 1].plot(
290            w_iterations,
291            optimizer.wasserstein_history,
292            "r-",
293            linewidth=2,
294            marker="d",
295            markersize=4,
296        )
297        axes[1, 1].set_xlabel("Iteration")
298        axes[1, 1].set_ylabel("Wasserstein Distance")
299        axes[1, 1].set_title(f"{title} - Posterior Stability")
300        axes[1, 1].grid(True, alpha=0.3)
301        axes[1, 1].set_yscale("log")
302    else:
303        axes[1, 1].text(
304            0.5,
305            0.5,
306            "No Wasserstein history\n(Need ≥2 Bayesian iterations)",
307            ha="center",
308            va="center",
309            transform=axes[1, 1].transAxes,
310        )
311
312    plt.tight_layout()
313    plt.show()
314
315    print(f"\n{title} Optimization Summary:")
316    print("=" * 50)
317    print(f"Total iterations: {len(optimizer.best_values)}")
318    print(f"Initial design: {n_initial} samples")
319    print(
320        f"Bayesian optimization: {len(optimizer.best_values) - n_initial} iterations"
321    )
322    print(f"Initial best: {optimizer.best_values[n_initial-1]:.6f}")
323    print(f"Final best: {min(optimizer.best_values):.6f}")
324    print(
325        f"Improvement: {optimizer.best_values[n_initial-1] - min(optimizer.best_values):.6f}"
326    )
327    if optimizer.wasserstein_history:
328        print(
329            f"Avg Wasserstein distance: {np.mean(optimizer.wasserstein_history):.8f}"
330        )
331        print(
332            f"Final Wasserstein distance: {optimizer.wasserstein_history[-1]:.8f}"
333        )
334    if bayesian_acq:
335        print(f"Avg Expected Improvement: {np.mean(bayesian_acq):.6f}")
336        print(f"Final Expected Improvement: {bayesian_acq[-1]:.6f}")

Plot optimization convergence and diagnostics.