In this vignette, we will demonstrate how to use the bayesianrvfl package for Bayesian optimization of hyperparameters in a machine learning model. We will use the Sonar dataset from the mlbench package and optimize hyperparameters for an XGBoost model.

The surrogate model used for Bayesian optimization is a Gaussian Random Vector Functional Link (RVFL) network (instead of a Gaussian Process), whose number of nodes in the hidden layer and volatility of residuals are chosen by using maximum likelihood estimation (MLE). This surrogate model is trained on 10 results of the objective function evaluations, and an Expected Improvement acquisition function is used to determine the next point to sample in the hyperparameter space.

library("bayesianrvfl")
## Loading required package: cclust
## Loading required package: memoise
## Loading required package: mlbench
## Loading required package: caret
## Loading required package: ggplot2
## Loading required package: lattice
library("mlbench")
data(Sonar)
library(caret)
set.seed(998)
inTraining <- createDataPartition(Sonar$Class, p = .75, list = FALSE)
training <- Sonar[ inTraining,]
testing  <- Sonar[-inTraining,]
objective <- function(xx) {
  fitControl <- trainControl(method = "cv", 
                           number = 3,
                           classProbs = TRUE,
                           summaryFunction = twoClassSummary)

  set.seed(825)
  model <- train(Class ~ ., data = training, 
                method = "xgbTree", 
                trControl = fitControl, 
                verbose = FALSE, 
                tuneGrid = data.frame(max_depth = floor(xx[1]),
                                      eta = xx[2],
                                      subsample = xx[3], 
                                      nrounds = floor(xx[5]),
                                      gamma = 0,
                                      colsample_bytree = xx[4],
                                      min_child_weight = 1),
                metric = "ROC")
  
  # Return the ROC value (higher is better)
  return(-getTrainPerf(model)$TrainROC)
}
(res_rvfl <- bayesianrvfl::bayes_opt(objective, # objective function
          lower = c(1L, 0.001, 0.7, 0.7, 100L), # lower bound for search
          upper = c(8L, 0.1, 1, 1, 250L), # upper bound for search
          type_acq = "ei", # type of acquisition function
          nb_init = 10L, # number of points in initial design
          nb_iter = 40L, # number of iterations of the algo
          surrogate_model = "rvfl")) # surrogate model
## 
##  ----- define initial design... 
## initial design 
##           1           2         3         4        5     scores
## 1  3.013043 0.095726501 0.9668618 0.9889073 121.4200 -0.8855556
## 2  6.518136 0.045880081 0.9078410 0.9706897 162.1820 -0.8904167
## 3  3.862838 0.068079493 0.8921520 0.9072116 162.0586 -0.9008135
## 4  7.181122 0.057690707 0.9982809 0.9386402 155.3268 -0.8755754
## 5  7.583271 0.011189544 0.8967117 0.7073841 122.8667 -0.8942063
## 6  1.318895 0.090082672 0.9125591 0.8433388 120.8209 -0.8687500
## 7  4.696738 0.025362686 0.8632198 0.9275379 134.9551 -0.9038889
## 8  7.246933 0.005163894 0.8782426 0.7649224 169.8944 -0.8965476
## 9  4.860045 0.033464151 0.7867479 0.7954543 139.8959 -0.9043651
## 10 4.196303 0.095495861 0.7441341 0.7694877 228.6742 -0.8947619
## 
## 
## finding hyperparams for surrogate... 
## 
##   |                                                                              |                                                                      |   0%  |                                                                              |=                                                                     |   1%  |                                                                              |=                                                                     |   2%  |                                                                              |==                                                                    |   3%  |                                                                              |===                                                                   |   4%  |                                                                              |====                                                                  |   5%  |                                                                              |====                                                                  |   6%  |                                                                              |=====                                                                 |   7%  |                                                                              |======                                                                |   8%  |                                                                              |======                                                                |   9%  |                                                                              |=======                                                               |  10%  |                                                                              |========                                                              |  11%  |                                                                              |========                                                              |  12%  |                                                                              |=========                                                             |  13%  |                                                                              |==========                                                            |  14%  |                                                                              |==========                                                            |  15%  |                                                                              |===========                                                           |  16%  |                                                                              |============                                                          |  17%  |                                                                              |=============                                                         |  18%  |                                                                              |=============                                                         |  19%  |                                                                              |==============                                                        |  20%  |                                                                              |===============                                                       |  21%  |                                                                              |===============                                                       |  22%  |                                                                              |================                                                      |  23%  |                                                                              |=================                                                     |  24%  |                                                                              |==================                                                    |  25%  |                                                                              |==================                                                    |  26%  |                                                                              |===================                                                   |  27%  |                                                                              |====================                                                  |  28%  |                                                                              |====================                                                  |  29%  |                                                                              |=====================                                                 |  30%  |                                                                              |======================                                                |  31%  |                                                                              |======================                                                |  32%  |                                                                              |=======================                                               |  33%  |                                                                              |========================                                              |  34%  |                                                                              |========================                                              |  35%  |                                                                              |=========================                                             |  36%  |                                                                              |==========================                                            |  37%  |                                                                              |===========================                                           |  38%  |                                                                              |===========================                                           |  39%  |                                                                              |============================                                          |  40%  |                                                                              |=============================                                         |  41%  |                                                                              |=============================                                         |  42%  |                                                                              |==============================                                        |  43%  |                                                                              |===============================                                       |  44%  |                                                                              |================================                                      |  45%  |                                                                              |================================                                      |  46%  |                                                                              |=================================                                     |  47%  |                                                                              |==================================                                    |  48%  |                                                                              |==================================                                    |  49%  |                                                                              |===================================                                   |  50%  |                                                                              |====================================                                  |  51%  |                                                                              |====================================                                  |  52%  |                                                                              |=====================================                                 |  53%  |                                                                              |======================================                                |  54%  |                                                                              |======================================                                |  55%  |                                                                              |=======================================                               |  56%  |                                                                              |========================================                              |  57%  |                                                                              |=========================================                             |  58%  |                                                                              |=========================================                             |  59%  |                                                                              |==========================================                            |  60%  |                                                                              |===========================================                           |  61%  |                                                                              |===========================================                           |  62%  |                                                                              |============================================                          |  63%  |                                                                              |=============================================                         |  64%  |                                                                              |==============================================                        |  65%  |                                                                              |==============================================                        |  66%  |                                                                              |===============================================                       |  67%  |                                                                              |================================================                      |  68%  |                                                                              |================================================                      |  69%  |                                                                              |=================================================                     |  70%  |                                                                              |==================================================                    |  71%  |                                                                              |==================================================                    |  72%  |                                                                              |===================================================                   |  73%  |                                                                              |====================================================                  |  74%  |                                                                              |====================================================                  |  75%  |                                                                              |=====================================================                 |  76%  |                                                                              |======================================================                |  77%  |                                                                              |=======================================================               |  78%  |                                                                              |=======================================================               |  79%  |                                                                              |========================================================              |  80%  |                                                                              |=========================================================             |  81%  |                                                                              |=========================================================             |  82%  |                                                                              |==========================================================            |  83%  |                                                                              |===========================================================           |  84%  |                                                                              |============================================================          |  85%  |                                                                              |============================================================          |  86%  |                                                                              |=============================================================         |  87%  |                                                                              |==============================================================        |  88%  |                                                                              |==============================================================        |  89%  |                                                                              |===============================================================       |  90%  |                                                                              |================================================================      |  91%  |                                                                              |================================================================      |  92%  |                                                                              |=================================================================     |  93%  |                                                                              |==================================================================    |  94%  |                                                                              |==================================================================    |  95%  |                                                                              |===================================================================   |  96%  |                                                                              |====================================================================  |  97%  |                                                                              |===================================================================== |  98%  |                                                                              |===================================================================== |  99%  |                                                                              |======================================================================| 100%
## 
## ----- iteration # 1 
## 
## next_param 
## [1]   1.000   0.001   1.000   0.700 250.000
## 
## score 
## [1] -0.8081647
## 
## current best param 
## [1]   4.86004510   0.03346415   0.78674792   0.79545430 139.89589605
## 
## current best score 
## [1] -0.9043651
## 
## 
## ----- iteration # 2 
## 
## next_param 
## [1]   2.1554164   0.1000000   0.9001092   0.7463488 190.3152003
## 
## score 
## [1] -0.9067659
## 
## current best param 
## [1]   2.1554164   0.1000000   0.9001092   0.7463488 190.3152003
## 
## current best score 
## [1] -0.9067659
## 
## 
## ----- iteration # 3 
## 
## next_param 
## [1]   6.370721   0.001000   0.700000   0.700000 222.029457
## 
## score 
## [1] -0.9094643
## 
## current best param 
## [1]   6.370721   0.001000   0.700000   0.700000 222.029457
## 
## current best score 
## [1] -0.9094643
## 
## 
## ----- iteration # 4 
## 
## next_param 
## [1]   1.0   0.1   1.0   0.7 100.0
## 
## score 
## [1] -0.8471627
## 
## current best param 
## [1]   6.370721   0.001000   0.700000   0.700000 222.029457
## 
## current best score 
## [1] -0.9094643
## 
## 
## ----- iteration # 5 
## 
## next_param 
## [1]   8.0   0.1   0.7   0.7 250.0
## 
## score 
## [1] -0.897619
## 
## current best param 
## [1]   6.370721   0.001000   0.700000   0.700000 222.029457
## 
## current best score 
## [1] -0.9094643
## 
## 
## ----- iteration # 6 
## 
## next_param 
## [1] 8e+00 1e-03 7e-01 1e+00 1e+02
## 
## score 
## [1] -0.8982341
## 
## current best param 
## [1]   6.370721   0.001000   0.700000   0.700000 222.029457
## 
## current best score 
## [1] -0.9094643
## 
## 
## ----- iteration # 7 
## 
## next_param 
## [1]   1.000   0.001   1.000   1.000 250.000
## 
## score 
## [1] -0.7797917
## 
## current best param 
## [1]   6.370721   0.001000   0.700000   0.700000 222.029457
## 
## current best score 
## [1] -0.9094643
## 
## 
## ----- iteration # 8 
## 
## next_param 
## [1]   1.0000   0.0010   0.7000   0.7000 145.3991
## 
## score 
## [1] -0.8355456
## 
## current best param 
## [1]   6.370721   0.001000   0.700000   0.700000 222.029457
## 
## current best score 
## [1] -0.9094643
## 
## 
## ----- iteration # 9 
## 
## next_param 
## [1] 1e+00 1e-03 7e-01 1e+00 1e+02
## 
## score 
## [1] -0.8124008
## 
## current best param 
## [1]   6.370721   0.001000   0.700000   0.700000 222.029457
## 
## current best score 
## [1] -0.9094643
## 
## 
## ----- iteration # 10 
## 
## next_param 
## [1]   2.9394035   0.0010000   0.9261346   0.7000000 109.7033386
## 
## score 
## [1] -0.8788492
## 
## current best param 
## [1]   6.370721   0.001000   0.700000   0.700000 222.029457
## 
## current best score 
## [1] -0.9094643
## 
## 
## ----- iteration # 11 
## 
## next_param 
## [1]   1.48552641   0.08195974   0.98278652   0.78081456 125.40221849
## 
## score 
## [1] -0.8417262
## 
## current best param 
## [1]   6.370721   0.001000   0.700000   0.700000 222.029457
## 
## current best score 
## [1] -0.9094643
## 
## 
## ----- iteration # 12 
## 
## next_param 
## [1]   8.000   0.001   1.000   0.700 250.000
## 
## score 
## [1] -0.8542659
## 
## current best param 
## [1]   6.370721   0.001000   0.700000   0.700000 222.029457
## 
## current best score 
## [1] -0.9094643
## 
## 
## ----- iteration # 13 
## 
## next_param 
## [1]   8.0   0.1   1.0   0.7 250.0
## 
## score 
## [1] -0.89125
## 
## current best param 
## [1]   6.370721   0.001000   0.700000   0.700000 222.029457
## 
## current best score 
## [1] -0.9094643
## 
## 
## ----- iteration # 14 
## 
## next_param 
## [1]   5.2043511   0.0010000   1.0000000   0.8239158 155.0608019
## 
## score 
## [1] -0.8266071
## 
## current best param 
## [1]   6.370721   0.001000   0.700000   0.700000 222.029457
## 
## current best score 
## [1] -0.9094643
## 
## 
## ----- iteration # 15 
## 
## next_param 
## [1]   1.0000000   0.1000000   0.7261650   0.9817481 194.8588843
## 
## score 
## [1] -0.8694048
## 
## current best param 
## [1]   6.370721   0.001000   0.700000   0.700000 222.029457
## 
## current best score 
## [1] -0.9094643
## 
## 
## ----- iteration # 16 
## 
## next_param 
## [1]   1.0   0.1   0.7   1.0 100.0
## 
## score 
## [1] -0.8674008
## 
## current best param 
## [1]   6.370721   0.001000   0.700000   0.700000 222.029457
## 
## current best score 
## [1] -0.9094643
## 
## 
## ----- iteration # 17 
## 
## next_param 
## [1]   6.642957   0.100000   1.000000   0.700000 108.022761
## 
## score 
## [1] -0.8873016
## 
## current best param 
## [1]   6.370721   0.001000   0.700000   0.700000 222.029457
## 
## current best score 
## [1] -0.9094643
## 
## 
## ----- iteration # 18 
## 
## next_param 
## [1] 1e+00 1e-03 1e+00 7e-01 1e+02
## 
## score 
## [1] -0.8056845
## 
## current best param 
## [1]   6.370721   0.001000   0.700000   0.700000 222.029457
## 
## current best score 
## [1] -0.9094643
## 
## 
## ----- iteration # 19 
## 
## next_param 
## [1]   8.0   0.1   0.7   1.0 250.0
## 
## score 
## [1] -0.9094643
## 
## current best param 
## [1]   6.370721   0.001000   0.700000   0.700000 222.029457
## 
## current best score 
## [1] -0.9094643
## 
## 
## ----- iteration # 20 
## 
## next_param 
## [1]   6.50280449   0.02599211   0.90977569   0.75533823 243.94207385
## 
## score 
## [1] -0.8938889
## 
## current best param 
## [1]   6.370721   0.001000   0.700000   0.700000 222.029457
## 
## current best score 
## [1] -0.9094643
## 
## 
## ----- iteration # 21 
## 
## next_param 
## [1]   3.13024329   0.06190122   1.00000000   0.85198598 226.48466952
## 
## score 
## [1] -0.8847619
## 
## current best param 
## [1]   6.370721   0.001000   0.700000   0.700000 222.029457
## 
## current best score 
## [1] -0.9094643
## 
## 
## ----- iteration # 22 
## 
## next_param 
## [1]   8.000   0.001   0.700   1.000 250.000
## 
## score 
## [1] -0.9002183
## 
## current best param 
## [1]   6.370721   0.001000   0.700000   0.700000 222.029457
## 
## current best score 
## [1] -0.9094643
## 
## 
## ----- iteration # 23 
## 
## next_param 
## [1] 3.048443e+00 3.802012e-03 9.021433e-01 8.568365e-01 1.993929e+02
## 
## score 
## [1] -0.8842659
## 
## current best param 
## [1]   6.370721   0.001000   0.700000   0.700000 222.029457
## 
## current best score 
## [1] -0.9094643
## 
## 
## ----- iteration # 24 
## 
## next_param 
## [1]   8.0   0.1   0.7   1.0 100.0
## 
## score 
## [1] -0.9080357
## 
## current best param 
## [1]   6.370721   0.001000   0.700000   0.700000 222.029457
## 
## current best score 
## [1] -0.9094643
## 
## 
## ----- iteration # 25 
## 
## next_param 
## [1] 1e+00 1e-03 1e+00 1e+00 1e+02
## 
## score 
## [1] -0.7597024
## 
## current best param 
## [1]   6.370721   0.001000   0.700000   0.700000 222.029457
## 
## current best score 
## [1] -0.9094643
## 
## 
## ----- iteration # 26 
## 
## next_param 
## [1] 8e+00 1e-03 7e-01 7e-01 1e+02
## 
## score 
## [1] -0.9006349
## 
## current best param 
## [1]   6.370721   0.001000   0.700000   0.700000 222.029457
## 
## current best score 
## [1] -0.9094643
## 
## 
## ----- iteration # 27 
## 
## next_param 
## [1] 1.200027e+00 9.748567e-03 8.423103e-01 9.656133e-01 1.137351e+02
## 
## score 
## [1] -0.836121
## 
## current best param 
## [1]   6.370721   0.001000   0.700000   0.700000 222.029457
## 
## current best score 
## [1] -0.9094643
## 
## 
## ----- iteration # 28 
## 
## next_param 
## [1]   1.69768818   0.03979698   0.72203610   0.79302132 187.74832383
## 
## score 
## [1] -0.8742659
## 
## current best param 
## [1]   6.370721   0.001000   0.700000   0.700000 222.029457
## 
## current best score 
## [1] -0.9094643
## 
## 
## ----- iteration # 29 
## 
## next_param 
## [1]   1.0   0.1   0.7   0.7 100.0
## 
## score 
## [1] -0.8799405
## 
## current best param 
## [1]   6.370721   0.001000   0.700000   0.700000 222.029457
## 
## current best score 
## [1] -0.9094643
## 
## 
## ----- iteration # 30 
## 
## next_param 
## [1]   4.65510540   0.09504582   0.82807394   0.82732907 241.70935162
## 
## score 
## [1] -0.896627
## 
## current best param 
## [1]   6.370721   0.001000   0.700000   0.700000 222.029457
## 
## current best score 
## [1] -0.9094643
## 
## 
## ----- iteration # 31 
## 
## next_param 
## [1]   1.0000   0.0010   1.0000   1.0000 226.1014
## 
## score 
## [1] -0.7797917
## 
## current best param 
## [1]   6.370721   0.001000   0.700000   0.700000 222.029457
## 
## current best score 
## [1] -0.9094643
## 
## 
## ----- iteration # 32 
## 
## next_param 
## [1]   3.743302   0.100000   0.700000   1.000000 226.647581
## 
## score 
## [1] -0.9104167
## 
## current best param 
## [1]   3.743302   0.100000   0.700000   1.000000 226.647581
## 
## current best score 
## [1] -0.9104167
## 
## 
## ----- iteration # 33 
## 
## next_param 
## [1]   8.0   0.1   1.0   0.7 100.0
## 
## score 
## [1] -0.8868056
## 
## current best param 
## [1]   3.743302   0.100000   0.700000   1.000000 226.647581
## 
## current best score 
## [1] -0.9104167
## 
## 
## ----- iteration # 34 
## 
## next_param 
## [1]   8.0000   0.0010   0.7000   1.0000 213.1815
## 
## score 
## [1] -0.9011508
## 
## current best param 
## [1]   3.743302   0.100000   0.700000   1.000000 226.647581
## 
## current best score 
## [1] -0.9104167
## 
## 
## ----- iteration # 35 
## 
## next_param 
## [1]   8.0000   0.1000   1.0000   0.7000 217.6668
## 
## score 
## [1] -0.8887698
## 
## current best param 
## [1]   3.743302   0.100000   0.700000   1.000000 226.647581
## 
## current best score 
## [1] -0.9104167
## 
## 
## ----- iteration # 36 
## 
## next_param 
## [1]   5.783553   0.001000   1.000000   0.700000 207.863667
## 
## score 
## [1] -0.8523413
## 
## current best param 
## [1]   3.743302   0.100000   0.700000   1.000000 226.647581
## 
## current best score 
## [1] -0.9104167
## 
## 
## ----- iteration # 37 
## 
## next_param 
## [1]   1.632167   0.001000   0.700000   0.700000 213.464137
## 
## score 
## [1] -0.8348115
## 
## current best param 
## [1]   3.743302   0.100000   0.700000   1.000000 226.647581
## 
## current best score 
## [1] -0.9104167
## 
## 
## ----- iteration # 38 
## 
## next_param 
## [1]   3.340422   0.001000   0.700000   0.700000 150.188590
## 
## score 
## [1] -0.9061111
## 
## current best param 
## [1]   3.743302   0.100000   0.700000   1.000000 226.647581
## 
## current best score 
## [1] -0.9104167
## 
## 
## ----- iteration # 39 
## 
## next_param 
## [1]   8.0   0.1   0.7   0.7 100.0
## 
## score 
## [1] -0.9026389
## 
## current best param 
## [1]   3.743302   0.100000   0.700000   1.000000 226.647581
## 
## current best score 
## [1] -0.9104167
## 
## 
## ----- iteration # 40 
## 
## next_param 
## [1]   1.0   0.1   1.0   0.7 250.0
## 
## score 
## [1] -0.8540873
## 
## current best param 
## [1]   3.743302   0.100000   0.700000   1.000000 226.647581
## 
## current best score 
## [1] -0.9104167
## $index_min
## [1] 42
## 
## $nb_is_found
## [1] 4
## 
## $best_param
## [1]   3.743302   0.100000   0.700000   1.000000 226.647581
## 
## $best_value
## [1] -0.9104167
## 
## $points_found
##              param1      param2    param3    param4   param5      score
##            3.013043 0.095726501 0.9668618 0.9889073 121.4200 -0.8855556
##            6.518136 0.045880081 0.9078410 0.9706897 162.1820 -0.8904167
##            3.862838 0.068079493 0.8921520 0.9072116 162.0586 -0.9008135
##            7.181122 0.057690707 0.9982809 0.9386402 155.3268 -0.8755754
##            7.583271 0.011189544 0.8967117 0.7073841 122.8667 -0.8942063
##            1.318895 0.090082672 0.9125591 0.8433388 120.8209 -0.8687500
##            4.696738 0.025362686 0.8632198 0.9275379 134.9551 -0.9038889
##            7.246933 0.005163894 0.8782426 0.7649224 169.8944 -0.8965476
##            4.860045 0.033464151 0.7867479 0.7954543 139.8959 -0.9043651
##            4.196303 0.095495861 0.7441341 0.7694877 228.6742 -0.8947619
## next_param 1.000000 0.001000000 1.0000000 0.7000000 250.0000 -0.8081647
## next_param 2.155416 0.100000000 0.9001092 0.7463488 190.3152 -0.9067659
## next_param 6.370721 0.001000000 0.7000000 0.7000000 222.0295 -0.9094643
## next_param 1.000000 0.100000000 1.0000000 0.7000000 100.0000 -0.8471627
## next_param 8.000000 0.100000000 0.7000000 0.7000000 250.0000 -0.8976190
## next_param 8.000000 0.001000000 0.7000000 1.0000000 100.0000 -0.8982341
## next_param 1.000000 0.001000000 1.0000000 1.0000000 250.0000 -0.7797917
## next_param 1.000000 0.001000000 0.7000000 0.7000000 145.3991 -0.8355456
## next_param 1.000000 0.001000000 0.7000000 1.0000000 100.0000 -0.8124008
## next_param 2.939403 0.001000000 0.9261346 0.7000000 109.7033 -0.8788492
## next_param 1.485526 0.081959745 0.9827865 0.7808146 125.4022 -0.8417262
## next_param 8.000000 0.001000000 1.0000000 0.7000000 250.0000 -0.8542659
## next_param 8.000000 0.100000000 1.0000000 0.7000000 250.0000 -0.8912500
## next_param 5.204351 0.001000000 1.0000000 0.8239158 155.0608 -0.8266071
## next_param 1.000000 0.100000000 0.7261650 0.9817481 194.8589 -0.8694048
## next_param 1.000000 0.100000000 0.7000000 1.0000000 100.0000 -0.8674008
## next_param 6.642957 0.100000000 1.0000000 0.7000000 108.0228 -0.8873016
## next_param 1.000000 0.001000000 1.0000000 0.7000000 100.0000 -0.8056845
## next_param 8.000000 0.100000000 0.7000000 1.0000000 250.0000 -0.9094643
## next_param 6.502804 0.025992114 0.9097757 0.7553382 243.9421 -0.8938889
## next_param 3.130243 0.061901220 1.0000000 0.8519860 226.4847 -0.8847619
## next_param 8.000000 0.001000000 0.7000000 1.0000000 250.0000 -0.9002183
## next_param 3.048443 0.003802012 0.9021433 0.8568365 199.3929 -0.8842659
## next_param 8.000000 0.100000000 0.7000000 1.0000000 100.0000 -0.9080357
## next_param 1.000000 0.001000000 1.0000000 1.0000000 100.0000 -0.7597024
## next_param 8.000000 0.001000000 0.7000000 0.7000000 100.0000 -0.9006349
## next_param 1.200027 0.009748567 0.8423103 0.9656133 113.7351 -0.8361210
## next_param 1.697688 0.039796981 0.7220361 0.7930213 187.7483 -0.8742659
## next_param 1.000000 0.100000000 0.7000000 0.7000000 100.0000 -0.8799405
## next_param 4.655105 0.095045825 0.8280739 0.8273291 241.7094 -0.8966270
## next_param 1.000000 0.001000000 1.0000000 1.0000000 226.1014 -0.7797917
## next_param 3.743302 0.100000000 0.7000000 1.0000000 226.6476 -0.9104167
## next_param 8.000000 0.100000000 1.0000000 0.7000000 100.0000 -0.8868056
## next_param 8.000000 0.001000000 0.7000000 1.0000000 213.1815 -0.9011508
## next_param 8.000000 0.100000000 1.0000000 0.7000000 217.6668 -0.8887698
## next_param 5.783553 0.001000000 1.0000000 0.7000000 207.8637 -0.8523413
## next_param 1.632167 0.001000000 0.7000000 0.7000000 213.4641 -0.8348115
## next_param 3.340422 0.001000000 0.7000000 0.7000000 150.1886 -0.9061111
## next_param 8.000000 0.100000000 0.7000000 0.7000000 100.0000 -0.9026389
## next_param 1.000000 0.100000000 1.0000000 0.7000000 250.0000 -0.8540873

out-of-sample prediction

xx <- res_rvfl$best_param

fitControl <- trainControl(method = "none", 
                           classProbs = TRUE)

  set.seed(825)
  model <- train(Class ~ ., data = training, 
                method = "xgbTree", 
                trControl = fitControl, 
                verbose = FALSE, 
                tuneGrid = data.frame(max_depth = floor(xx[1]),
                                      eta = xx[2],
                                      subsample = xx[3], 
                                      nrounds = floor(xx[5]),
                                      gamma = 0,
                                      colsample_bytree = xx[4],
                                      min_child_weight = 1),
                metric = "ROC")

(preds <- predict(model, newdata = testing))
##  [1] R M R R R M R M R R R R R R R R R R R R R R R M M M R R M M M R M M M M M R
## [39] M R M M M M M M M M M M M
## Levels: M R
caret::confusionMatrix(data = preds, reference = testing$Class)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  M  R
##          M 22  4
##          R  5 20
##                                          
##                Accuracy : 0.8235         
##                  95% CI : (0.6913, 0.916)
##     No Information Rate : 0.5294         
##     P-Value [Acc > NIR] : 1.117e-05      
##                                          
##                   Kappa : 0.6467         
##                                          
##  Mcnemar's Test P-Value : 1              
##                                          
##             Sensitivity : 0.8148         
##             Specificity : 0.8333         
##          Pos Pred Value : 0.8462         
##          Neg Pred Value : 0.8000         
##              Prevalence : 0.5294         
##          Detection Rate : 0.4314         
##    Detection Prevalence : 0.5098         
##       Balanced Accuracy : 0.8241         
##                                          
##        'Positive' Class : M              
## 
# Get probability predictions for the whole test set
probs <- predict(model, newdata = testing, type = "prob")

# Create calibration curve data
create_calibration_data <- function(probs, actual, n_bins = 10) {
  # Convert actual to numeric (0/1)
  actual_numeric <- as.numeric(actual == levels(actual)[2])
  
  # Create bins based on predicted probabilities
  bins <- cut(probs[,2], breaks = seq(0, 1, length.out = n_bins + 1), 
              include.lowest = TRUE)
  
  # Calculate mean predicted probability and actual outcome for each bin
  cal_data <- data.frame(
    bin_mid = tapply(probs[,2], bins, mean),
    actual_freq = tapply(actual_numeric, bins, mean),
    n_samples = tapply(actual_numeric, bins, length)
  )
  
  cal_data$bin <- 1:nrow(cal_data)
  return(na.omit(cal_data))
}

# Generate calibration data
cal_data <- create_calibration_data(probs, testing$Class)

# Plot calibration curve
library(ggplot2)
ggplot(cal_data, aes(x = bin_mid, y = actual_freq)) +
  geom_point(aes(size = n_samples)) +
  geom_abline(intercept = 0, slope = 1, linetype = "dashed", color = "red") +
  geom_line() +
  xlim(0,1) + ylim(0,1) +
  labs(x = "Predicted Probability",
       y = "Observed Frequency",
       size = "Number of\nSamples",
       title = "Calibration Curve for XGBoost Model") +
  theme_minimal()

# Calculate calibration metrics
brier_score <- mean((probs[,2] - as.numeric(testing$Class == levels(testing$Class)[2]))^2)
cat("Brier Score:", round(brier_score, 4), "\n")
## Brier Score: 0.1268