GPopt.BOstopping
1import numpy as np 2from scipy.stats import mannwhitneyu, norm 3from scipy.stats import wasserstein_distance 4from sklearn.gaussian_process import GaussianProcessRegressor 5from sklearn.gaussian_process.kernels import Matern 6from copy import deepcopy 7from tqdm import tqdm 8import xgboost as xgb 9from sklearn.datasets import make_classification 10from sklearn.model_selection import ( 11 train_test_split, 12 cross_validate, 13 StratifiedKFold, 14) 15from sklearn.metrics import accuracy_score, log_loss 16from sklearn.preprocessing import StandardScaler 17import matplotlib.pyplot as plt 18 19 20# BayesianOptimization class with robust early stopping 21class BOstopping: 22 """Bayesian Optimization with robust early stopping criteria.""" 23 24 def __init__( 25 self, 26 f, 27 bounds, 28 n_init=5, 29 kappa=1.96, 30 early_stopping=True, 31 stop_patience=20, 32 stop_threshold=0.01, 33 n_test_points=100, 34 alpha=1e-6, 35 n_restarts_optimizer=25, 36 seed=123, 37 ): 38 self.f = f 39 self.bounds = np.array(bounds) 40 self.n_init = n_init 41 self.kappa = kappa 42 self.early_stopping = early_stopping 43 self.stop_patience = stop_patience 44 self.stop_threshold = stop_threshold 45 self.n_test_points = n_test_points 46 self.alpha = alpha 47 self.n_restarts_optimizer = n_restarts_optimizer 48 self.seed = seed 49 50 np.random.seed(self.seed) 51 self.test_points = self._sample_random(n_test_points) 52 53 # History tracking 54 self.wasserstein_history = [] 55 self.X = [] 56 self.y = [] 57 self.best_values = [] 58 self.acquisition_values = [] 59 self.gp_variance = [] 60 self.phase = [] 61 62 # GP setup 63 self.gp = GaussianProcessRegressor( 64 kernel=Matern(nu=2.5), 65 alpha=self.alpha, 66 normalize_y=True, 67 n_restarts_optimizer=self.n_restarts_optimizer, 68 random_state=self.seed, 69 ) 70 71 def _sample_random(self, n_samples): 72 """Uniform sampling within bounds.""" 73 return np.random.uniform( 74 self.bounds[:, 0], 75 self.bounds[:, 1], 76 size=(n_samples, len(self.bounds)), 77 ) 78 79 def _acquisition(self, X_candidate): 80 """Expected Improvement acquisition function.""" 81 mu, sigma = self.gp.predict(X_candidate, return_std=True) 82 mu_sample = np.min(self.y) # Use actual observed minimum 83 84 sigma = np.maximum(sigma, 1e-6) 85 gamma = (mu_sample - mu) / sigma 86 ei = (mu_sample - mu) * norm.cdf(gamma) + sigma * norm.pdf(gamma) 87 return ei 88 89 def _get_posterior_samples(self, gp, n_samples=100): 90 """Sample from GP posterior at test points.""" 91 mu, sigma = gp.predict(self.test_points, return_std=True) 92 return np.random.normal( 93 mu, sigma, size=(n_samples, len(self.test_points)) 94 ) 95 96 def _compute_wasserstein(self, gp_prev, gp_current): 97 """Compute approximate Wasserstein distance between posteriors.""" 98 mu_prev, std_prev = gp_prev.predict(self.test_points, return_std=True) 99 mu_curr, std_curr = gp_current.predict( 100 self.test_points, return_std=True 101 ) 102 103 # 2-Wasserstein distance for independent 1D Gaussians, averaged 104 w2_per_point = (mu_prev - mu_curr) ** 2 + (std_prev - std_curr) ** 2 105 return np.sqrt(np.mean(w2_per_point)) 106 107 def _should_stop(self, gp_prev): 108 """Early stopping based on improvement and posterior stability.""" 109 if len(self.best_values) < self.stop_patience + 1: 110 return False 111 112 # 1. Improvement check 113 recent_improvements = np.diff(self.best_values[-self.stop_patience :]) 114 improvement_stop = np.all( 115 np.abs(recent_improvements) < self.stop_threshold 116 ) 117 118 # 2. Posterior stability 119 current_w = self._compute_wasserstein(gp_prev, self.gp) 120 self.wasserstein_history.append(current_w) 121 122 if len(self.wasserstein_history) >= 2 * self.stop_patience: 123 recent_w = self.wasserstein_history[-self.stop_patience :] 124 older_w = self.wasserstein_history[ 125 -2 * self.stop_patience : -self.stop_patience 126 ] 127 _, p_value = mannwhitneyu(recent_w, older_w, alternative="greater") 128 mwu_stable = p_value > 0.1 129 var_stable = np.var(recent_w) < 1e-6 130 posterior_stable = mwu_stable or var_stable 131 else: 132 posterior_stable = False 133 134 return improvement_stop or posterior_stable 135 136 def optimize(self, n_iter=100): 137 """Run Bayesian optimization loop.""" 138 print("Starting Initial Design Phase...") 139 self.X = self._sample_random(self.n_init) 140 self.y = [] 141 142 for i, x in enumerate(self.X): 143 y_val = self.f(x) 144 self.y.append(y_val) 145 current_best = np.min(self.y) 146 self.best_values.append(current_best) 147 self.acquisition_values.append(0) 148 self.gp_variance.append(0) 149 self.phase.append("initial") 150 print( 151 f" Initial sample {i+1}/{self.n_init}: f(x) = {y_val:.6f}, best = {current_best:.6f}" 152 ) 153 154 print(f"Initial Design Complete. Best value: {np.min(self.y):.6f}") 155 print("\nStarting Bayesian Optimization Phase...") 156 157 gp_prev = None 158 for i in tqdm(range(n_iter), desc="Bayesian Optimization"): 159 if i > 0: 160 gp_prev = deepcopy(self.gp) 161 162 self.gp.fit(self.X, self.y) 163 164 X_candidate = self._sample_random(1000) 165 acq = self._acquisition(X_candidate) 166 best_acq_idx = np.argmax(acq) 167 x_next = X_candidate[best_acq_idx] 168 max_acq_value = acq[best_acq_idx] 169 170 _, gp_std = self.gp.predict([x_next], return_std=True) 171 172 y_next = self.f(x_next) 173 self.X = np.vstack((self.X, x_next)) 174 self.y.append(y_next) 175 current_best = np.min(self.y) 176 self.best_values.append(current_best) 177 self.acquisition_values.append(max_acq_value) 178 self.gp_variance.append(gp_std[0]) 179 self.phase.append("bayesian") 180 181 print( 182 f" Iteration {i+1}: f(x) = {y_next:.6f}, best = {current_best:.6f}, " 183 f"EI = {max_acq_value:.6f}, σ = {gp_std[0]:.6f}" 184 ) 185 186 if gp_prev is not None: 187 w_dist = self._compute_wasserstein(gp_prev, self.gp) 188 self.wasserstein_history.append(w_dist) 189 print(f" Wasserstein distance: {w_dist:.8f}") 190 191 if self.early_stopping and i > self.n_init and gp_prev is not None: 192 if self._should_stop(gp_prev): 193 print(f"Early stopping at iteration {i+1}") 194 break 195 196 best_idx = np.argmin(self.y) 197 return self.X[best_idx], self.y[best_idx] 198 199 200def plot_optimization_history(optimizer, title): 201 """Plot optimization convergence and diagnostics.""" 202 fig, axes = plt.subplots(2, 2, figsize=(15, 10)) 203 phase_colors = [ 204 "red" if p == "initial" else "blue" for p in optimizer.phase 205 ] 206 iterations = range(len(optimizer.best_values)) 207 208 # Plot 1: Convergence 209 axes[0, 0].scatter( 210 iterations, optimizer.best_values, c=phase_colors, alpha=0.7, s=30 211 ) 212 axes[0, 0].plot(optimizer.best_values, "k-", alpha=0.3, linewidth=1) 213 axes[0, 0].set_xlabel("Iteration") 214 axes[0, 0].set_ylabel("Best Objective Value") 215 axes[0, 0].set_title(f"{title} - Convergence (Red=Initial, Blue=Bayesian)") 216 axes[0, 0].grid(True, alpha=0.3) 217 218 n_initial = sum(1 for p in optimizer.phase if p == "initial") 219 axes[0, 0].axvline( 220 x=n_initial - 0.5, 221 color="orange", 222 linestyle="--", 223 alpha=0.8, 224 linewidth=2, 225 label="Phase Boundary", 226 ) 227 axes[0, 0].legend() 228 229 # Plot 2: Acquisition 230 bayesian_iters = [ 231 i for i, p in enumerate(optimizer.phase) if p == "bayesian" 232 ] 233 bayesian_acq = [optimizer.acquisition_values[i] for i in bayesian_iters] 234 if bayesian_acq: 235 axes[0, 1].plot( 236 bayesian_iters, 237 bayesian_acq, 238 "g-", 239 linewidth=2, 240 marker="o", 241 markersize=4, 242 ) 243 axes[0, 1].set_xlabel("Iteration") 244 axes[0, 1].set_ylabel("Expected Improvement") 245 axes[0, 1].set_title(f"{title} - Acquisition Function") 246 axes[0, 1].grid(True, alpha=0.3) 247 else: 248 axes[0, 1].text( 249 0.5, 250 0.5, 251 "No Bayesian iterations", 252 ha="center", 253 va="center", 254 transform=axes[0, 1].transAxes, 255 ) 256 257 # Plot 3: GP Uncertainty 258 if bayesian_acq: 259 bayesian_var = [optimizer.gp_variance[i] for i in bayesian_iters] 260 axes[1, 0].plot( 261 bayesian_iters, 262 bayesian_var, 263 "purple", 264 linewidth=2, 265 marker="s", 266 markersize=4, 267 ) 268 axes[1, 0].set_xlabel("Iteration") 269 axes[1, 0].set_ylabel("GP Standard Deviation") 270 axes[1, 0].set_title(f"{title} - GP Uncertainty") 271 axes[1, 0].grid(True, alpha=0.3) 272 else: 273 axes[1, 0].text( 274 0.5, 275 0.5, 276 "No Bayesian iterations", 277 ha="center", 278 va="center", 279 transform=axes[1, 0].transAxes, 280 ) 281 282 # Plot 4: Posterior Stability 283 if optimizer.wasserstein_history: 284 w_start_iter = n_initial + 1 285 w_iterations = range( 286 w_start_iter, w_start_iter + len(optimizer.wasserstein_history) 287 ) 288 axes[1, 1].plot( 289 w_iterations, 290 optimizer.wasserstein_history, 291 "r-", 292 linewidth=2, 293 marker="d", 294 markersize=4, 295 ) 296 axes[1, 1].set_xlabel("Iteration") 297 axes[1, 1].set_ylabel("Wasserstein Distance") 298 axes[1, 1].set_title(f"{title} - Posterior Stability") 299 axes[1, 1].grid(True, alpha=0.3) 300 axes[1, 1].set_yscale("log") 301 else: 302 axes[1, 1].text( 303 0.5, 304 0.5, 305 "No Wasserstein history\n(Need ≥2 Bayesian iterations)", 306 ha="center", 307 va="center", 308 transform=axes[1, 1].transAxes, 309 ) 310 311 plt.tight_layout() 312 plt.show() 313 314 print(f"\n{title} Optimization Summary:") 315 print("=" * 50) 316 print(f"Total iterations: {len(optimizer.best_values)}") 317 print(f"Initial design: {n_initial} samples") 318 print( 319 f"Bayesian optimization: {len(optimizer.best_values) - n_initial} iterations" 320 ) 321 print(f"Initial best: {optimizer.best_values[n_initial-1]:.6f}") 322 print(f"Final best: {min(optimizer.best_values):.6f}") 323 print( 324 f"Improvement: {optimizer.best_values[n_initial-1] - min(optimizer.best_values):.6f}" 325 ) 326 if optimizer.wasserstein_history: 327 print( 328 f"Avg Wasserstein distance: {np.mean(optimizer.wasserstein_history):.8f}" 329 ) 330 print( 331 f"Final Wasserstein distance: {optimizer.wasserstein_history[-1]:.8f}" 332 ) 333 if bayesian_acq: 334 print(f"Avg Expected Improvement: {np.mean(bayesian_acq):.6f}") 335 print(f"Final Expected Improvement: {bayesian_acq[-1]:.6f}")
class
BOstopping:
22class BOstopping: 23 """Bayesian Optimization with robust early stopping criteria.""" 24 25 def __init__( 26 self, 27 f, 28 bounds, 29 n_init=5, 30 kappa=1.96, 31 early_stopping=True, 32 stop_patience=20, 33 stop_threshold=0.01, 34 n_test_points=100, 35 alpha=1e-6, 36 n_restarts_optimizer=25, 37 seed=123, 38 ): 39 self.f = f 40 self.bounds = np.array(bounds) 41 self.n_init = n_init 42 self.kappa = kappa 43 self.early_stopping = early_stopping 44 self.stop_patience = stop_patience 45 self.stop_threshold = stop_threshold 46 self.n_test_points = n_test_points 47 self.alpha = alpha 48 self.n_restarts_optimizer = n_restarts_optimizer 49 self.seed = seed 50 51 np.random.seed(self.seed) 52 self.test_points = self._sample_random(n_test_points) 53 54 # History tracking 55 self.wasserstein_history = [] 56 self.X = [] 57 self.y = [] 58 self.best_values = [] 59 self.acquisition_values = [] 60 self.gp_variance = [] 61 self.phase = [] 62 63 # GP setup 64 self.gp = GaussianProcessRegressor( 65 kernel=Matern(nu=2.5), 66 alpha=self.alpha, 67 normalize_y=True, 68 n_restarts_optimizer=self.n_restarts_optimizer, 69 random_state=self.seed, 70 ) 71 72 def _sample_random(self, n_samples): 73 """Uniform sampling within bounds.""" 74 return np.random.uniform( 75 self.bounds[:, 0], 76 self.bounds[:, 1], 77 size=(n_samples, len(self.bounds)), 78 ) 79 80 def _acquisition(self, X_candidate): 81 """Expected Improvement acquisition function.""" 82 mu, sigma = self.gp.predict(X_candidate, return_std=True) 83 mu_sample = np.min(self.y) # Use actual observed minimum 84 85 sigma = np.maximum(sigma, 1e-6) 86 gamma = (mu_sample - mu) / sigma 87 ei = (mu_sample - mu) * norm.cdf(gamma) + sigma * norm.pdf(gamma) 88 return ei 89 90 def _get_posterior_samples(self, gp, n_samples=100): 91 """Sample from GP posterior at test points.""" 92 mu, sigma = gp.predict(self.test_points, return_std=True) 93 return np.random.normal( 94 mu, sigma, size=(n_samples, len(self.test_points)) 95 ) 96 97 def _compute_wasserstein(self, gp_prev, gp_current): 98 """Compute approximate Wasserstein distance between posteriors.""" 99 mu_prev, std_prev = gp_prev.predict(self.test_points, return_std=True) 100 mu_curr, std_curr = gp_current.predict( 101 self.test_points, return_std=True 102 ) 103 104 # 2-Wasserstein distance for independent 1D Gaussians, averaged 105 w2_per_point = (mu_prev - mu_curr) ** 2 + (std_prev - std_curr) ** 2 106 return np.sqrt(np.mean(w2_per_point)) 107 108 def _should_stop(self, gp_prev): 109 """Early stopping based on improvement and posterior stability.""" 110 if len(self.best_values) < self.stop_patience + 1: 111 return False 112 113 # 1. Improvement check 114 recent_improvements = np.diff(self.best_values[-self.stop_patience :]) 115 improvement_stop = np.all( 116 np.abs(recent_improvements) < self.stop_threshold 117 ) 118 119 # 2. Posterior stability 120 current_w = self._compute_wasserstein(gp_prev, self.gp) 121 self.wasserstein_history.append(current_w) 122 123 if len(self.wasserstein_history) >= 2 * self.stop_patience: 124 recent_w = self.wasserstein_history[-self.stop_patience :] 125 older_w = self.wasserstein_history[ 126 -2 * self.stop_patience : -self.stop_patience 127 ] 128 _, p_value = mannwhitneyu(recent_w, older_w, alternative="greater") 129 mwu_stable = p_value > 0.1 130 var_stable = np.var(recent_w) < 1e-6 131 posterior_stable = mwu_stable or var_stable 132 else: 133 posterior_stable = False 134 135 return improvement_stop or posterior_stable 136 137 def optimize(self, n_iter=100): 138 """Run Bayesian optimization loop.""" 139 print("Starting Initial Design Phase...") 140 self.X = self._sample_random(self.n_init) 141 self.y = [] 142 143 for i, x in enumerate(self.X): 144 y_val = self.f(x) 145 self.y.append(y_val) 146 current_best = np.min(self.y) 147 self.best_values.append(current_best) 148 self.acquisition_values.append(0) 149 self.gp_variance.append(0) 150 self.phase.append("initial") 151 print( 152 f" Initial sample {i+1}/{self.n_init}: f(x) = {y_val:.6f}, best = {current_best:.6f}" 153 ) 154 155 print(f"Initial Design Complete. Best value: {np.min(self.y):.6f}") 156 print("\nStarting Bayesian Optimization Phase...") 157 158 gp_prev = None 159 for i in tqdm(range(n_iter), desc="Bayesian Optimization"): 160 if i > 0: 161 gp_prev = deepcopy(self.gp) 162 163 self.gp.fit(self.X, self.y) 164 165 X_candidate = self._sample_random(1000) 166 acq = self._acquisition(X_candidate) 167 best_acq_idx = np.argmax(acq) 168 x_next = X_candidate[best_acq_idx] 169 max_acq_value = acq[best_acq_idx] 170 171 _, gp_std = self.gp.predict([x_next], return_std=True) 172 173 y_next = self.f(x_next) 174 self.X = np.vstack((self.X, x_next)) 175 self.y.append(y_next) 176 current_best = np.min(self.y) 177 self.best_values.append(current_best) 178 self.acquisition_values.append(max_acq_value) 179 self.gp_variance.append(gp_std[0]) 180 self.phase.append("bayesian") 181 182 print( 183 f" Iteration {i+1}: f(x) = {y_next:.6f}, best = {current_best:.6f}, " 184 f"EI = {max_acq_value:.6f}, σ = {gp_std[0]:.6f}" 185 ) 186 187 if gp_prev is not None: 188 w_dist = self._compute_wasserstein(gp_prev, self.gp) 189 self.wasserstein_history.append(w_dist) 190 print(f" Wasserstein distance: {w_dist:.8f}") 191 192 if self.early_stopping and i > self.n_init and gp_prev is not None: 193 if self._should_stop(gp_prev): 194 print(f"Early stopping at iteration {i+1}") 195 break 196 197 best_idx = np.argmin(self.y) 198 return self.X[best_idx], self.y[best_idx]
Bayesian Optimization with robust early stopping criteria.
def
plot_optimization_history(optimizer, title):
201def plot_optimization_history(optimizer, title): 202 """Plot optimization convergence and diagnostics.""" 203 fig, axes = plt.subplots(2, 2, figsize=(15, 10)) 204 phase_colors = [ 205 "red" if p == "initial" else "blue" for p in optimizer.phase 206 ] 207 iterations = range(len(optimizer.best_values)) 208 209 # Plot 1: Convergence 210 axes[0, 0].scatter( 211 iterations, optimizer.best_values, c=phase_colors, alpha=0.7, s=30 212 ) 213 axes[0, 0].plot(optimizer.best_values, "k-", alpha=0.3, linewidth=1) 214 axes[0, 0].set_xlabel("Iteration") 215 axes[0, 0].set_ylabel("Best Objective Value") 216 axes[0, 0].set_title(f"{title} - Convergence (Red=Initial, Blue=Bayesian)") 217 axes[0, 0].grid(True, alpha=0.3) 218 219 n_initial = sum(1 for p in optimizer.phase if p == "initial") 220 axes[0, 0].axvline( 221 x=n_initial - 0.5, 222 color="orange", 223 linestyle="--", 224 alpha=0.8, 225 linewidth=2, 226 label="Phase Boundary", 227 ) 228 axes[0, 0].legend() 229 230 # Plot 2: Acquisition 231 bayesian_iters = [ 232 i for i, p in enumerate(optimizer.phase) if p == "bayesian" 233 ] 234 bayesian_acq = [optimizer.acquisition_values[i] for i in bayesian_iters] 235 if bayesian_acq: 236 axes[0, 1].plot( 237 bayesian_iters, 238 bayesian_acq, 239 "g-", 240 linewidth=2, 241 marker="o", 242 markersize=4, 243 ) 244 axes[0, 1].set_xlabel("Iteration") 245 axes[0, 1].set_ylabel("Expected Improvement") 246 axes[0, 1].set_title(f"{title} - Acquisition Function") 247 axes[0, 1].grid(True, alpha=0.3) 248 else: 249 axes[0, 1].text( 250 0.5, 251 0.5, 252 "No Bayesian iterations", 253 ha="center", 254 va="center", 255 transform=axes[0, 1].transAxes, 256 ) 257 258 # Plot 3: GP Uncertainty 259 if bayesian_acq: 260 bayesian_var = [optimizer.gp_variance[i] for i in bayesian_iters] 261 axes[1, 0].plot( 262 bayesian_iters, 263 bayesian_var, 264 "purple", 265 linewidth=2, 266 marker="s", 267 markersize=4, 268 ) 269 axes[1, 0].set_xlabel("Iteration") 270 axes[1, 0].set_ylabel("GP Standard Deviation") 271 axes[1, 0].set_title(f"{title} - GP Uncertainty") 272 axes[1, 0].grid(True, alpha=0.3) 273 else: 274 axes[1, 0].text( 275 0.5, 276 0.5, 277 "No Bayesian iterations", 278 ha="center", 279 va="center", 280 transform=axes[1, 0].transAxes, 281 ) 282 283 # Plot 4: Posterior Stability 284 if optimizer.wasserstein_history: 285 w_start_iter = n_initial + 1 286 w_iterations = range( 287 w_start_iter, w_start_iter + len(optimizer.wasserstein_history) 288 ) 289 axes[1, 1].plot( 290 w_iterations, 291 optimizer.wasserstein_history, 292 "r-", 293 linewidth=2, 294 marker="d", 295 markersize=4, 296 ) 297 axes[1, 1].set_xlabel("Iteration") 298 axes[1, 1].set_ylabel("Wasserstein Distance") 299 axes[1, 1].set_title(f"{title} - Posterior Stability") 300 axes[1, 1].grid(True, alpha=0.3) 301 axes[1, 1].set_yscale("log") 302 else: 303 axes[1, 1].text( 304 0.5, 305 0.5, 306 "No Wasserstein history\n(Need ≥2 Bayesian iterations)", 307 ha="center", 308 va="center", 309 transform=axes[1, 1].transAxes, 310 ) 311 312 plt.tight_layout() 313 plt.show() 314 315 print(f"\n{title} Optimization Summary:") 316 print("=" * 50) 317 print(f"Total iterations: {len(optimizer.best_values)}") 318 print(f"Initial design: {n_initial} samples") 319 print( 320 f"Bayesian optimization: {len(optimizer.best_values) - n_initial} iterations" 321 ) 322 print(f"Initial best: {optimizer.best_values[n_initial-1]:.6f}") 323 print(f"Final best: {min(optimizer.best_values):.6f}") 324 print( 325 f"Improvement: {optimizer.best_values[n_initial-1] - min(optimizer.best_values):.6f}" 326 ) 327 if optimizer.wasserstein_history: 328 print( 329 f"Avg Wasserstein distance: {np.mean(optimizer.wasserstein_history):.8f}" 330 ) 331 print( 332 f"Final Wasserstein distance: {optimizer.wasserstein_history[-1]:.8f}" 333 ) 334 if bayesian_acq: 335 print(f"Avg Expected Improvement: {np.mean(bayesian_acq):.6f}") 336 print(f"Final Expected Improvement: {bayesian_acq[-1]:.6f}")
Plot optimization convergence and diagnostics.