getting-started.Rmd
library(learningmachine)
## Loading required package: randtoolbox
## Loading required package: rngWELL
## This is randtoolbox. For an overview, type 'help("randtoolbox")'.
## Loading required package: tseries
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## Loading required package: memoise
## Loading required package: foreach
## Loading required package: skimr
## Loading required package: snow
## Loading required package: doSNOW
## Loading required package: iterators
## Loading required package: ranger
## Loading required package: glmnet
## Loading required package: Matrix
## Loaded glmnet 4.1-8
## Loading required package: xgboost
## Loading required package: e1071
## Loading required package: MASS
## Loading required package: caret
## Loading required package: ggplot2
## Loading required package: lattice
library(caret)
library(mlbench)
library(palmerpenguins)
X <- as.matrix(mtcars[,-1])
y <- mtcars$mpg
set.seed(123)
(index_train <- base::sample.int(n = nrow(X),
size = floor(0.8*nrow(X)),
replace = FALSE))
## [1] 31 15 19 14 3 10 18 22 11 5 20 29 23 30 9 28 8 27 7 32 26 17 4 1 24
X_train <- X[index_train, ]
y_train <- y[index_train]
X_test <- X[-index_train, ]
y_test <- y[-index_train]
dim(X_train)
## [1] 25 10
dim(X_test)
## [1] 7 10
obj <- learningmachine::Regressor$new(method = "lm", pi_method = "splitconformal")
obj$get_type()
## [1] "regression"
obj$get_name()
## [1] "Regressor"
obj$get_method()
## [1] "lm"
## Elapsed: 0.007 s
## [1] 3.548852
(res <- obj$predict(X = X_test))
## $preds
## Mazda RX4 Wag Valiant Merc 450SE Merc 450SL
## 21.67584 19.80291 14.75149 15.70693
## Lincoln Continental Toyota Corona Pontiac Firebird
## 12.03666 28.20630 13.55241
##
## $lower
## Mazda RX4 Wag Valiant Merc 450SE Merc 450SL
## 10.675844 8.802908 3.751488 4.706932
## Lincoln Continental Toyota Corona Pontiac Firebird
## 1.036659 17.206298 2.552412
##
## $upper
## Mazda RX4 Wag Valiant Merc 450SE Merc 450SL
## 32.67584 30.80291 25.75149 26.70693
## Lincoln Continental Toyota Corona Pontiac Firebird
## 23.03666 39.20630 24.55241
plot(c(y_train, res$preds), type='l',
main="",
ylab="",
ylim = c(min(c(res$upper, res$lower, y)),
max(c(res$upper, res$lower, y))))
lines(c(y_train, res$upper), col="gray60")
lines(c(y_train, res$lower), col="gray60")
lines(c(y_train, res$preds), col = "red")
lines(c(y_train, y_test), col = "blue")
mean((y_test >= as.numeric(res$lower)) * (y_test <= as.numeric(res$upper)))
## [1] 1
t0 <- proc.time()[3]
obj$fit(X_train, y_train,
pi_method = "jackknifeplus")
cat("Elapsed: ", proc.time()[3] - t0, "s \n")
## Elapsed: 0.121 s
obj$set_level(95L)
res <- obj$predict(X = X_test)
plot(c(y_train, res$preds), type='l',
main="",
ylab="",
ylim = c(min(c(res$upper, res$lower, y)),
max(c(res$upper, res$lower, y))))
lines(c(y_train, res$upper), col="gray60")
lines(c(y_train, res$lower), col="gray60")
lines(c(y_train, res$preds), col = "red")
lines(c(y_train, y_test), col = "blue")
mean((y_test >= as.numeric(res$lower)) * (y_test <= as.numeric(res$upper)))
## [1] 1
ranger
regression
obj <- learningmachine::Regressor$new(method = "ranger", pi_method = "splitconformal")
obj$get_type()
## [1] "regression"
obj$get_name()
## [1] "Regressor"
## Elapsed: 0.03 s
## [1] 2.344976
## Elapsed: 0.027 s
obj$set_level(95)
res <- obj$predict(X = X_test)
plot(c(y_train, res$preds), type='l',
main="",
ylab="",
ylim = c(min(c(res$upper, res$lower, y)),
max(c(res$upper, res$lower, y))))
lines(c(y_train, res$upper), col="gray60")
lines(c(y_train, res$lower), col="gray60")
lines(c(y_train, res$preds), col = "red")
lines(c(y_train, y_test), col = "blue")
mean((y_test >= as.numeric(res$lower)) * (y_test <= as.numeric(res$upper)))
## [1] 1
res <- obj$predict(X = X_test)
plot(c(y_train, res$preds), type='l',
main="",
ylab="",
ylim = c(min(c(res$upper, res$lower, y)),
max(c(res$upper, res$lower, y))))
lines(c(y_train, res$upper), col="gray60")
lines(c(y_train, res$lower), col="gray60")
lines(c(y_train, res$preds), col = "red")
lines(c(y_train, y_test), col = "blue")
mean((y_test >= as.numeric(res$lower)) * (y_test <= as.numeric(res$upper)))
## [1] 1
KRR
& ranger
regression on
Boston
# Boston dataset (dataset has an ethical problem)
library(MASS)
data("Boston")
set.seed(13)
train_idx <- sample(nrow(Boston), 0.8 * nrow(Boston))
X_train <- as.matrix(Boston[train_idx, -ncol(Boston)])
X_test <- as.matrix(Boston[-train_idx, -ncol(Boston)])
y_train <- Boston$medv[train_idx]
y_test <- Boston$medv[-train_idx]
KRR
obj <- learningmachine::Regressor$new(method = "krr", pi_method = "none")
obj$get_type()
## [1] "regression"
obj$get_name()
## [1] "Regressor"
obj$get_method()
## [1] "krr"
t0 <- proc.time()[3]
obj$fit(X_train, y_train, reg_lambda = 0.1)
cat("Elapsed: ", proc.time()[3] - t0, "s \n")
## Elapsed: 0.271 s
## [1] 2.888748
t0 <- proc.time()[3]
obj$summary(X_test, y=y_test, show_progress=FALSE)
## $R_squared
## [1] 0.906853
##
## $R_squared_adj
## [1] 0.8930926
##
## $Residuals
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -8.33671 -1.10461 -0.02411 0.12105 1.46980 9.29398
##
## $citests
## estimate lower upper p-value signif
## crim -0.0544164530 -0.075529516 -0.0333033896 1.509961e-06 ***
## zn -0.0046418101 -0.009605912 0.0003222914 6.652189e-02 .
## indus -0.0245357110 -0.051187606 0.0021161842 7.077114e-02 .
## chas 7.1730075477 6.375361838 7.9706532576 5.512518e-33 ***
## nox -9.4958030753 -12.095806303 -6.8957998474 8.811259e-11 ***
## rm 4.7080249286 3.939353604 5.4766962526 1.787394e-21 ***
## age -0.0439718628 -0.053082559 -0.0348611667 7.779486e-16 ***
## dis -1.4214523042 -1.573146091 -1.2697585171 2.257304e-34 ***
## rad 0.1810040336 0.155692359 0.2063157080 8.827967e-26 ***
## tax -0.0115644823 -0.013066430 -0.0100625342 5.303780e-28 ***
## ptratio -0.4819300831 -0.582754790 -0.3811053766 1.242164e-15 ***
## black -0.0002461991 -0.001967809 0.0014754112 7.772335e-01
## lstat -0.4091458985 -0.475209948 -0.3430818492 9.126116e-22 ***
##
## $signif_codes
## [1] "Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1"
##
## $effects
## ── Data Summary ────────────────────────
## Values
## Name effects
## Number of rows 102
## Number of columns 13
## _______________________
## Column type frequency:
## numeric 13
## ________________________
## Group variables None
##
## ── Variable type: numeric ──────────────────────────────────────────────────────
## skim_variable mean sd p0 p25 p50 p75
## 1 crim -0.0544 0.107 -0.416 -0.0696 -0.00478 0.00661
## 2 zn -0.00464 0.0253 -0.0524 -0.0200 -0.00301 0.00416
## 3 indus -0.0245 0.136 -0.308 -0.109 -0.0396 0.0220
## 4 chas 7.17 4.06 -17.7 5.97 7.12 9.16
## 5 nox -9.50 13.2 -78.3 -15.0 -7.32 -2.30
## 6 rm 4.71 3.91 -3.43 1.83 4.48 7.72
## 7 age -0.0440 0.0464 -0.138 -0.0778 -0.0506 -0.00674
## 8 dis -1.42 0.772 -3.43 -1.80 -1.32 -0.933
## 9 rad 0.181 0.129 -0.0944 0.0827 0.173 0.261
## 10 tax -0.0116 0.00765 -0.0388 -0.0148 -0.00963 -0.00718
## 11 ptratio -0.482 0.513 -2.13 -0.671 -0.441 -0.209
## 12 black -0.000246 0.00877 -0.0263 -0.00450 0.0000316 0.00344
## 13 lstat -0.409 0.336 -1.62 -0.474 -0.310 -0.189
## p100 hist
## 1 0.107 ▁▁▂▇▅
## 2 0.0728 ▃▇▇▂▁
## 3 0.499 ▂▇▂▁▁
## 4 14.8 ▁▁▁▇▅
## 5 15.5 ▁▁▁▇▃
## 6 12.4 ▃▇▇▇▅
## 7 0.0628 ▂▇▆▃▂
## 8 0.0716 ▂▂▇▇▃
## 9 0.492 ▂▇▇▃▂
## 10 0.00304 ▁▁▃▇▂
## 11 1.01 ▁▁▇▃▁
## 12 0.0391 ▁▇▇▁▁
## 13 0.0311 ▁▁▂▆▇
## Elapsed: 11.178 s
t0 <- proc.time()[3]
obj$summary(X_test, y=y_test, show_progress=FALSE, type_ci="bootstrap")
## $R_squared
## [1] 0.906853
##
## $R_squared_adj
## [1] 0.8930926
##
## $Residuals
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -8.33671 -1.10461 -0.02411 0.12105 1.46980 9.29398
##
## $citests
## estimate lower upper p-value signif
## crim -0.0538049728 -0.07788039 -0.0339150922 3.330856e-165 ***
## zn -0.0046264575 -0.00930898 0.0002208004 2.129513e-162 ***
## indus -0.0250873356 -0.04983945 0.0013580905 3.367434e-162 ***
## chas 7.1874845264 6.31700628 7.8848224325 3.330856e-165 ***
## nox -9.4746636134 -12.26642878 -6.9077586130 3.330856e-165 ***
## rm 4.7102428290 3.86896609 5.5034002307 3.330856e-165 ***
## age -0.0440650425 -0.05296489 -0.0349701343 3.330856e-165 ***
## dis -1.4211028552 -1.56809609 -1.2833726839 3.330856e-165 ***
## rad 0.1812312885 0.15663376 0.2067723827 3.330856e-165 ***
## tax -0.0115489146 -0.01305204 -0.0100571311 3.330856e-165 ***
## ptratio -0.4789522388 -0.58263847 -0.3853958372 3.330856e-165 ***
## black -0.0002264778 -0.00187825 0.0015182254 9.583495e-16 ***
## lstat -0.4057946096 -0.47807035 -0.3461364993 3.330856e-165 ***
##
## $signif_codes
## [1] "Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1"
##
## $effects
## ── Data Summary ────────────────────────
## Values
## Name effects
## Number of rows 102
## Number of columns 13
## _______________________
## Column type frequency:
## numeric 13
## ________________________
## Group variables None
##
## ── Variable type: numeric ──────────────────────────────────────────────────────
## skim_variable mean sd p0 p25 p50 p75
## 1 crim -0.0544 0.107 -0.416 -0.0696 -0.00478 0.00661
## 2 zn -0.00464 0.0253 -0.0524 -0.0200 -0.00301 0.00416
## 3 indus -0.0245 0.136 -0.308 -0.109 -0.0396 0.0220
## 4 chas 7.17 4.06 -17.7 5.97 7.12 9.16
## 5 nox -9.50 13.2 -78.3 -15.0 -7.32 -2.30
## 6 rm 4.71 3.91 -3.43 1.83 4.48 7.72
## 7 age -0.0440 0.0464 -0.138 -0.0778 -0.0506 -0.00674
## 8 dis -1.42 0.772 -3.43 -1.80 -1.32 -0.933
## 9 rad 0.181 0.129 -0.0944 0.0827 0.173 0.261
## 10 tax -0.0116 0.00765 -0.0388 -0.0148 -0.00963 -0.00718
## 11 ptratio -0.482 0.513 -2.13 -0.671 -0.441 -0.209
## 12 black -0.000246 0.00877 -0.0263 -0.00450 0.0000316 0.00344
## 13 lstat -0.409 0.336 -1.62 -0.474 -0.310 -0.189
## p100 hist
## 1 0.107 ▁▁▂▇▅
## 2 0.0728 ▃▇▇▂▁
## 3 0.499 ▂▇▂▁▁
## 4 14.8 ▁▁▁▇▅
## 5 15.5 ▁▁▁▇▃
## 6 12.4 ▃▇▇▇▅
## 7 0.0628 ▂▇▆▃▂
## 8 0.0716 ▂▂▇▇▃
## 9 0.492 ▂▇▇▃▂
## 10 0.00304 ▁▁▃▇▂
## 11 1.01 ▁▁▇▃▁
## 12 0.0391 ▁▇▇▁▁
## 13 0.0311 ▁▁▂▆▇
## Elapsed: 7.374 s
t0 <- proc.time()[3]
obj$summary(X_test, y=y_test, show_progress=FALSE, type_ci="conformal")
## $R_squared
## [1] 0.906853
##
## $R_squared_adj
## [1] 0.8930926
##
## $Residuals
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -8.33671 -1.10461 -0.02411 0.12105 1.46980 9.29398
##
## $citests
## estimate lower upper p-value signif
## crim -0.038249667 -0.072311473 -0.0092072317 1.276172e-83 ***
## zn -0.003430363 -0.010415578 0.0048271717 3.711411e-48 ***
## indus -0.011338481 -0.049056671 0.0396596453 8.835736e-21 ***
## chas 7.020875282 5.960564604 7.8937923537 1.268525e-83 ***
## nox -7.972379837 -12.209285137 -4.5740514013 1.268525e-83 ***
## rm 5.349917255 4.373674234 6.3126619900 1.268525e-83 ***
## age -0.051087238 -0.063338051 -0.0374166901 1.268525e-83 ***
## dis -1.369556022 -1.555997505 -1.1779069549 1.268525e-83 ***
## rad 0.182189327 0.155131490 0.2169839629 1.268525e-83 ***
## tax -0.011863144 -0.014019083 -0.0098332395 1.268525e-83 ***
## ptratio -0.547009860 -0.714028563 -0.4058215350 1.268525e-83 ***
## black -0.001283891 -0.003252675 0.0005966127 3.171493e-74 ***
## lstat -0.402560290 -0.516615968 -0.3170131131 1.268525e-83 ***
##
## $signif_codes
## [1] "Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1"
##
## $effects
## ── Data Summary ────────────────────────
## Values
## Name effects
## Number of rows 102
## Number of columns 13
## _______________________
## Column type frequency:
## numeric 13
## ________________________
## Group variables None
##
## ── Variable type: numeric ──────────────────────────────────────────────────────
## skim_variable mean sd p0 p25 p50 p75
## 1 crim -0.0544 0.107 -0.416 -0.0696 -0.00478 0.00661
## 2 zn -0.00464 0.0253 -0.0524 -0.0200 -0.00301 0.00416
## 3 indus -0.0245 0.136 -0.308 -0.109 -0.0396 0.0220
## 4 chas 7.17 4.06 -17.7 5.97 7.12 9.16
## 5 nox -9.50 13.2 -78.3 -15.0 -7.32 -2.30
## 6 rm 4.71 3.91 -3.43 1.83 4.48 7.72
## 7 age -0.0440 0.0464 -0.138 -0.0778 -0.0506 -0.00674
## 8 dis -1.42 0.772 -3.43 -1.80 -1.32 -0.933
## 9 rad 0.181 0.129 -0.0944 0.0827 0.173 0.261
## 10 tax -0.0116 0.00765 -0.0388 -0.0148 -0.00963 -0.00718
## 11 ptratio -0.482 0.513 -2.13 -0.671 -0.441 -0.209
## 12 black -0.000246 0.00877 -0.0263 -0.00450 0.0000316 0.00344
## 13 lstat -0.409 0.336 -1.62 -0.474 -0.310 -0.189
## p100 hist
## 1 0.107 ▁▁▂▇▅
## 2 0.0728 ▃▇▇▂▁
## 3 0.499 ▂▇▂▁▁
## 4 14.8 ▁▁▁▇▅
## 5 15.5 ▁▁▁▇▃
## 6 12.4 ▃▇▇▇▅
## 7 0.0628 ▂▇▆▃▂
## 8 0.0716 ▂▂▇▇▃
## 9 0.492 ▂▇▇▃▂
## 10 0.00304 ▁▁▃▇▂
## 11 1.01 ▁▁▇▃▁
## 12 0.0391 ▁▇▇▁▁
## 13 0.0311 ▁▁▂▆▇
## Elapsed: 3.886 s
ranger
obj <- learningmachine::Regressor$new(method = "ranger", pi_method="splitconformal")
obj$get_type()
## [1] "regression"
obj$get_name()
## [1] "Regressor"
## Elapsed: 0.239 s
## [1] 3.79469
t0 <- proc.time()[3]
obj$summary(X_test, y=y_test, show_progress=FALSE)
## $R_squared
## [1] 0.8392681
##
## $R_squared_adj
## [1] 0.8155236
##
## $Residuals
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -9.1590 -1.7302 -0.2022 0.4068 2.0991 12.6480
##
## $Coverage_rate
## [1] 100
##
## $citests
## estimate lower upper p-value signif
## crim -46.07120224 -210.45915529 118.31675081 5.794688e-01
## zn -0.01770371 -0.20552612 0.17011870 8.520499e-01
## indus -15.27494058 -26.85443276 -3.69544841 1.023790e-02 *
## chas 0.00000000 NaN NaN NaN
## nox -521.68056125 -688.90286706 -354.45825544 1.310563e-08 ***
## rm 275.37923329 224.35153031 326.40693626 2.514291e-18 ***
## age -1.75704619 -2.46954300 -1.04454938 3.780961e-06 ***
## dis 20.57598690 -4.55485374 45.70682754 1.074525e-01
## rad 2.13991832 0.02367618 4.25616046 4.753842e-02 *
## tax -0.60978051 -0.79942682 -0.42013420 5.448371e-09 ***
## ptratio -32.87462570 -38.77698867 -26.97226272 4.440300e-19 ***
## black -0.26384711 -0.49997135 -0.02772287 2.889086e-02 *
## lstat -44.64193590 -57.99309865 -31.29077315 1.652805e-09 ***
##
## $signif_codes
## [1] "Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1"
##
## $effects
## ── Data Summary ────────────────────────
## Values
## Name effects
## Number of rows 102
## Number of columns 13
## _______________________
## Column type frequency:
## numeric 13
## ________________________
## Group variables None
##
## ── Variable type: numeric ──────────────────────────────────────────────────────
## skim_variable mean sd p0 p25 p50 p75 p100
## 1 crim -46.1 837. -5846. -80.5 -4.90 114. 2107.
## 2 zn -0.0177 0.956 -4.47 0 0 0 5.22
## 3 indus -15.3 59.0 -299. -4.20 1.66 8.67 41.1
## 4 chas 0 0 0 0 0 0 0
## 5 nox -522. 851. -3931. -965. -334. 4.36 852.
## 6 rm 275. 260. -11.5 73.4 180. 378. 870.
## 7 age -1.76 3.63 -26.2 -2.88 -1.16 0 5.56
## 8 dis 20.6 128. -234. -33.8 -2.58 12.7 636.
## 9 rad 2.14 10.8 -6.31 0 0 0.638 91.8
## 10 tax -0.610 0.966 -3.85 -0.621 -0.233 -0.116 0.525
## 11 ptratio -32.9 30.0 -140. -44.7 -26.5 -13.8 4.66
## 12 black -0.264 1.20 -4.11 -1.07 0.0418 0.521 2.22
## 13 lstat -44.6 68.0 -335. -56.4 -21.1 -5.23 45.9
## hist
## 1 ▁▁▁▇▁
## 2 ▁▁▇▁▁
## 3 ▁▁▁▁▇
## 4 ▁▁▇▁▁
## 5 ▁▁▃▇▇
## 6 ▇▅▁▂▂
## 7 ▁▁▁▇▆
## 8 ▁▇▁▁▁
## 9 ▇▁▁▁▁
## 10 ▁▁▁▃▇
## 11 ▁▁▂▇▇
## 12 ▁▂▃▇▂
## 13 ▁▁▁▃▇
## Elapsed: 2.353 s
t0 <- proc.time()[3]
obj$summary(X_test, y=y_test, show_progress=FALSE, type_ci="bootstrap")
## Warning in wilcox.test.default(x, mu = mu_0): cannot compute exact p-value with
## zeroes
## $R_squared
## [1] 0.8392681
##
## $R_squared_adj
## [1] 0.8155236
##
## $Residuals
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -9.1590 -1.7302 -0.2022 0.4068 2.0991 12.6480
##
## $Coverage_rate
## [1] 100
##
## $citests
## estimate lower upper p-value signif
## crim -42.80272593 -222.9401178 87.85993946 2.195373e-60 ***
## zn -0.01962089 -0.1973614 0.16418601 5.218958e-10 ***
## indus -14.94722807 -27.3502454 -4.57347412 3.340872e-165 ***
## chas 0.00000000 0.0000000 0.00000000 NaN
## nox -515.86571697 -695.3004211 -353.53331454 3.330856e-165 ***
## rm 274.61218807 225.1134711 330.67621239 3.330856e-165 ***
## age -1.74181612 -2.5305571 -1.08962968 3.330856e-165 ***
## dis 19.52739261 -3.2000731 45.76914765 8.690758e-159 ***
## rad 2.04799322 0.4708851 4.47254728 3.330856e-165 ***
## tax -0.61243039 -0.8050040 -0.41939923 3.330856e-165 ***
## ptratio -32.68729037 -39.2295474 -27.23786297 3.330856e-165 ***
## black -0.25905523 -0.4747878 -0.03433839 1.125993e-164 ***
## lstat -44.36476900 -57.3813624 -32.33199672 3.330856e-165 ***
##
## $signif_codes
## [1] "Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1"
##
## $effects
## ── Data Summary ────────────────────────
## Values
## Name effects
## Number of rows 102
## Number of columns 13
## _______________________
## Column type frequency:
## numeric 13
## ________________________
## Group variables None
##
## ── Variable type: numeric ──────────────────────────────────────────────────────
## skim_variable mean sd p0 p25 p50 p75 p100
## 1 crim -46.1 837. -5846. -80.5 -4.90 114. 2107.
## 2 zn -0.0177 0.956 -4.47 0 0 0 5.22
## 3 indus -15.3 59.0 -299. -4.20 1.66 8.67 41.1
## 4 chas 0 0 0 0 0 0 0
## 5 nox -522. 851. -3931. -965. -334. 4.36 852.
## 6 rm 275. 260. -11.5 73.4 180. 378. 870.
## 7 age -1.76 3.63 -26.2 -2.88 -1.16 0 5.56
## 8 dis 20.6 128. -234. -33.8 -2.58 12.7 636.
## 9 rad 2.14 10.8 -6.31 0 0 0.638 91.8
## 10 tax -0.610 0.966 -3.85 -0.621 -0.233 -0.116 0.525
## 11 ptratio -32.9 30.0 -140. -44.7 -26.5 -13.8 4.66
## 12 black -0.264 1.20 -4.11 -1.07 0.0418 0.521 2.22
## 13 lstat -44.6 68.0 -335. -56.4 -21.1 -5.23 45.9
## hist
## 1 ▁▁▁▇▁
## 2 ▁▁▇▁▁
## 3 ▁▁▁▁▇
## 4 ▁▁▇▁▁
## 5 ▁▁▃▇▇
## 6 ▇▅▁▂▂
## 7 ▁▁▁▇▆
## 8 ▁▇▁▁▁
## 9 ▇▁▁▁▁
## 10 ▁▁▁▃▇
## 11 ▁▁▂▇▇
## 12 ▁▂▃▇▂
## 13 ▁▁▁▃▇
## Elapsed: 3.9 s
t0 <- proc.time()[3]
obj$summary(X_test, y=y_test, show_progress=FALSE, type_ci="conformal")
## Warning in wilcox.test.default(x, mu = mu_0): cannot compute exact p-value with
## zeroes
## $R_squared
## [1] 0.8392681
##
## $R_squared_adj
## [1] 0.8155236
##
## $Residuals
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -9.1590 -1.7302 -0.2022 0.4068 2.0991 12.6480
##
## $Coverage_rate
## [1] 100
##
## $citests
## estimate lower upper p-value signif
## crim -53.02973192 -309.44372969 123.9894590 2.000915e-22 ***
## zn 0.09395212 -0.16748657 0.4292619 2.840741e-38 ***
## indus -22.73512494 -43.65549343 -5.8649095 1.299388e-83 ***
## chas 0.00000000 0.00000000 0.0000000 NaN
## nox -530.17833130 -797.24229689 -311.8700672 1.268525e-83 ***
## rm 299.20519353 239.94467614 374.5585691 1.268525e-83 ***
## age -1.51455089 -2.87045835 -0.5337860 1.268525e-83 ***
## dis 11.70011255 -20.87895323 61.4062994 3.826445e-37 ***
## rad 3.19960138 0.08106856 7.9279486 1.908473e-83 ***
## tax -0.65995049 -0.95908283 -0.4194447 1.268525e-83 ***
## ptratio -36.65285563 -43.86593188 -29.4764947 1.268525e-83 ***
## black -0.49754142 -0.89669708 -0.1469885 1.379862e-83 ***
## lstat -47.96035265 -67.45233541 -33.3474118 1.268525e-83 ***
##
## $signif_codes
## [1] "Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1"
##
## $effects
## ── Data Summary ────────────────────────
## Values
## Name effects
## Number of rows 102
## Number of columns 13
## _______________________
## Column type frequency:
## numeric 13
## ________________________
## Group variables None
##
## ── Variable type: numeric ──────────────────────────────────────────────────────
## skim_variable mean sd p0 p25 p50 p75 p100
## 1 crim -46.1 837. -5846. -80.5 -4.90 114. 2107.
## 2 zn -0.0177 0.956 -4.47 0 0 0 5.22
## 3 indus -15.3 59.0 -299. -4.20 1.66 8.67 41.1
## 4 chas 0 0 0 0 0 0 0
## 5 nox -522. 851. -3931. -965. -334. 4.36 852.
## 6 rm 275. 260. -11.5 73.4 180. 378. 870.
## 7 age -1.76 3.63 -26.2 -2.88 -1.16 0 5.56
## 8 dis 20.6 128. -234. -33.8 -2.58 12.7 636.
## 9 rad 2.14 10.8 -6.31 0 0 0.638 91.8
## 10 tax -0.610 0.966 -3.85 -0.621 -0.233 -0.116 0.525
## 11 ptratio -32.9 30.0 -140. -44.7 -26.5 -13.8 4.66
## 12 black -0.264 1.20 -4.11 -1.07 0.0418 0.521 2.22
## 13 lstat -44.6 68.0 -335. -56.4 -21.1 -5.23 45.9
## hist
## 1 ▁▁▁▇▁
## 2 ▁▁▇▁▁
## 3 ▁▁▁▁▇
## 4 ▁▁▇▁▁
## 5 ▁▁▃▇▇
## 6 ▇▅▁▂▂
## 7 ▁▁▁▇▆
## 8 ▁▇▁▁▁
## 9 ▇▁▁▁▁
## 10 ▁▁▁▃▇
## 11 ▁▁▂▇▇
## 12 ▁▂▃▇▂
## 13 ▁▁▁▃▇
## Elapsed: 2.105 s
KRR
regression on mtcars
X <- as.matrix(mtcars[,-1])
y <- mtcars$mpg
set.seed(123)
(index_train <- base::sample.int(n = nrow(X),
size = floor(0.7*nrow(X)),
replace = FALSE))
## [1] 31 15 19 14 3 10 18 22 11 5 20 29 23 30 9 28 8 27 7 32 26 17
X_train <- X[index_train, ]
y_train <- y[index_train]
X_test <- X[-index_train, ]
y_test <- y[-index_train]
dim(X_train)
## [1] 22 10
dim(X_test)
## [1] 10 10
obj <- learningmachine::Regressor$new(method = "krr", pi_method = "splitconformal")
obj$get_type()
## [1] "regression"
obj$get_name()
## [1] "Regressor"
t0 <- proc.time()[3]
obj$fit(X_train, y_train, reg_lambda = 0.1)
cat("Elapsed: ", proc.time()[3] - t0, "s \n")
## Elapsed: 0.011 s
print(obj$predict(X_test))
## $preds
## [1] 22.151349 21.802194 12.541365 10.124759 13.408181 14.155816 7.421184
## [8] 16.879536 13.615153 12.749565
##
## $lower
## [1] 12.1513495 11.8021941 2.5413650 0.1247588 3.4081805 4.1558157
## [7] -2.5788160 6.8795365 3.6151533 2.7495651
##
## $upper
## [1] 32.15135 31.80219 22.54137 20.12476 23.40818 24.15582 17.42118 26.87954
## [9] 23.61515 22.74957
t0 <- proc.time()[3]
obj$fit(X_train, y_train, reg_lambda = 0.1)
cat("Elapsed: ", proc.time()[3] - t0, "s \n")
## Elapsed: 0.016 s
obj$set_level(95)
obj$set_pi_method("splitconformal")
res <- obj$predict(X = X_test)
plot(c(y_train, res$preds), type='l',
main="",
ylab="",
ylim = c(min(c(res$upper, res$lower, y)),
max(c(res$upper, res$lower, y))))
lines(c(y_train, res$upper), col="gray60")
lines(c(y_train, res$lower), col="gray60")
lines(c(y_train, res$preds), col = "red")
lines(c(y_train, y_test), col = "blue")
mean((y_test >= as.numeric(res$lower)) * (y_test <= as.numeric(res$upper)))
## [1] 1
t0 <- proc.time()[3]
obj$fit(X_train, y_train, reg_lambda = 0.1)
cat("Elapsed: ", proc.time()[3] - t0, "s \n")
## Elapsed: 0.015 s
res <- obj$predict(X = X_test)
plot(c(y_train, res$preds), type='l',
main="",
ylab="",
ylim = c(min(c(res$upper, res$lower, y)),
max(c(res$upper, res$lower, y))))
lines(c(y_train, res$upper), col="gray60")
lines(c(y_train, res$lower), col="gray60")
lines(c(y_train, res$preds), col = "red")
lines(c(y_train, y_test), col = "blue")
mean((y_test >= as.numeric(res$lower)) * (y_test <= as.numeric(res$upper)))
## [1] 1
t0 <- proc.time()[3]
obj$summary(X_test, y=y_test, show_progress=FALSE)
## $R_squared
## [1] -0.8614864
##
## $R_squared_adj
## [1] 17.75338
##
## $Residuals
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.1513 0.5083 3.0680 3.4751 5.9929 8.8586
##
## $Coverage_rate
## [1] 100
##
## $citests
## estimate lower upper p-value signif
## cyl -23.98943109 -46.4975109 -1.4813513 3.918219e-02 *
## disp -0.61133395 -0.9655770 -0.2570909 3.597927e-03 **
## hp -0.07828878 -0.3785573 0.2219797 5.698268e-01
## drat 310.94399534 160.4146969 461.4732937 1.163859e-03 **
## wt -197.39979731 -240.1776661 -154.6219286 2.500030e-06 ***
## qsec -19.50660485 -54.1139966 15.1007869 2.342132e-01
## vs 69.84795566 -85.8899529 225.5858643 3.368080e-01
## am 137.97019623 -0.2148915 276.1552839 5.028830e-02 .
## gear 191.57905165 134.3446800 248.8134233 3.424783e-05 ***
## carb 3.39227959 -22.2875140 29.0720732 7.718555e-01
##
## $signif_codes
## [1] "Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1"
##
## $effects
## ── Data Summary ────────────────────────
## Values
## Name effects
## Number of rows 10
## Number of columns 10
## _______________________
## Column type frequency:
## numeric 10
## ________________________
## Group variables None
##
## ── Variable type: numeric ──────────────────────────────────────────────────────
## skim_variable mean sd p0 p25 p50 p75 p100
## 1 cyl -24.0 31.5 -64.0 -40.9 -34.1 -0.849 37.6
## 2 disp -0.611 0.495 -1.66 -0.934 -0.429 -0.307 -0.0817
## 3 hp -0.0783 0.420 -1.00 -0.218 -0.0402 0.235 0.359
## 4 drat 311. 210. -159. 195. 369. 464. 534.
## 5 wt -197. 59.8 -280. -252. -196. -144. -124.
## 6 qsec -19.5 48.4 -73.4 -60.0 -29.3 12.8 60.3
## 7 vs 69.8 218. -218. -104. 86.1 103. 421.
## 8 am 138. 193. -161. 99.8 162. 201. 516.
## 9 gear 192. 80.0 74.7 142. 178. 224. 367.
## 10 carb 3.39 35.9 -56.3 -6.54 3.71 36.0 41.3
## hist
## 1 ▃▇▂▃▂
## 2 ▂▂▂▆▇
## 3 ▂▁▆▃▇
## 4 ▂▁▆▃▇
## 5 ▇▁▇▂▇
## 6 ▇▇▂▂▅
## 7 ▆▂▇▁▃
## 8 ▂▁▇▁▁
## 9 ▂▇▃▂▂
## 10 ▃▁▆▂▇
## Elapsed: 0.62 s
t0 <- proc.time()[3]
obj$summary(X_test, y=y_test, show_progress=FALSE, type_ci="bootstrap")
## $R_squared
## [1] -0.8614864
##
## $R_squared_adj
## [1] 17.75338
##
## $Residuals
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.1513 0.5083 3.0680 3.4751 5.9929 8.8586
##
## $Coverage_rate
## [1] 100
##
## $citests
## estimate lower upper p-value signif
## cyl -24.90623398 -40.2794196 -5.6202611 3.536958e-165 ***
## disp -0.60214954 -0.9189034 -0.3419554 3.330824e-165 ***
## hp -0.06820063 -0.3342753 0.1339877 1.174915e-64 ***
## drat 314.63303511 178.9216937 418.0172986 3.330824e-165 ***
## wt -197.32353350 -232.9121566 -163.3831785 3.330824e-165 ***
## qsec -19.52528169 -44.6875092 9.0752923 1.159421e-146 ***
## vs 71.87587019 -47.6861837 190.9669405 1.623953e-133 ***
## am 139.18488710 28.8373119 248.4883434 4.072915e-165 ***
## gear 190.53694931 146.5700802 240.0194316 3.330824e-165 ***
## carb 3.53972676 -17.6098472 22.3805033 3.220691e-21 ***
##
## $signif_codes
## [1] "Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1"
##
## $effects
## ── Data Summary ────────────────────────
## Values
## Name effects
## Number of rows 10
## Number of columns 10
## _______________________
## Column type frequency:
## numeric 10
## ________________________
## Group variables None
##
## ── Variable type: numeric ──────────────────────────────────────────────────────
## skim_variable mean sd p0 p25 p50 p75 p100
## 1 cyl -24.0 31.5 -64.0 -40.9 -34.1 -0.849 37.6
## 2 disp -0.611 0.495 -1.66 -0.934 -0.429 -0.307 -0.0817
## 3 hp -0.0783 0.420 -1.00 -0.218 -0.0402 0.235 0.359
## 4 drat 311. 210. -159. 195. 369. 464. 534.
## 5 wt -197. 59.8 -280. -252. -196. -144. -124.
## 6 qsec -19.5 48.4 -73.4 -60.0 -29.3 12.8 60.3
## 7 vs 69.8 218. -218. -104. 86.1 103. 421.
## 8 am 138. 193. -161. 99.8 162. 201. 516.
## 9 gear 192. 80.0 74.7 142. 178. 224. 367.
## 10 carb 3.39 35.9 -56.3 -6.54 3.71 36.0 41.3
## hist
## 1 ▃▇▂▃▂
## 2 ▂▂▂▆▇
## 3 ▂▁▆▃▇
## 4 ▂▁▆▃▇
## 5 ▇▁▇▂▇
## 6 ▇▇▂▂▅
## 7 ▆▂▇▁▃
## 8 ▂▁▇▁▁
## 9 ▂▇▃▂▂
## 10 ▃▁▆▂▇
## Elapsed: 0.656 s
t0 <- proc.time()[3]
obj$summary(X_test, y=y_test, show_progress=FALSE, type_ci="conformal")
## $R_squared
## [1] -0.8614864
##
## $R_squared_adj
## [1] 17.75338
##
## $Residuals
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.1513 0.5083 3.0680 3.4751 5.9929 8.8586
##
## $Coverage_rate
## [1] 100
##
## $citests
## estimate lower upper p-value signif
## cyl -29.8838378 -49.2164493 -11.6421130 1.277680e-83 ***
## disp -0.5711107 -0.8987403 -0.2368822 1.251974e-83 ***
## hp -0.1266592 -0.6527376 0.2131664 1.792790e-34 ***
## drat 232.0653188 50.6575979 416.7283596 1.474427e-83 ***
## wt -212.1232593 -259.4448623 -164.8016563 1.252490e-83 ***
## qsec -5.6244312 -46.0259583 37.9885966 2.775177e-07 ***
## vs 83.9673011 -73.8294770 278.6178292 1.002688e-66 ***
## am 99.1443134 -28.6971381 188.9939412 1.439650e-77 ***
## gear 167.6983530 113.1189782 220.0858429 1.249518e-83 ***
## carb -5.8569358 -33.9051378 16.4163956 5.560550e-22 ***
##
## $signif_codes
## [1] "Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1"
##
## $effects
## ── Data Summary ────────────────────────
## Values
## Name effects
## Number of rows 10
## Number of columns 10
## _______________________
## Column type frequency:
## numeric 10
## ________________________
## Group variables None
##
## ── Variable type: numeric ──────────────────────────────────────────────────────
## skim_variable mean sd p0 p25 p50 p75 p100
## 1 cyl -24.0 31.5 -64.0 -40.9 -34.1 -0.849 37.6
## 2 disp -0.611 0.495 -1.66 -0.934 -0.429 -0.307 -0.0817
## 3 hp -0.0783 0.420 -1.00 -0.218 -0.0402 0.235 0.359
## 4 drat 311. 210. -159. 195. 369. 464. 534.
## 5 wt -197. 59.8 -280. -252. -196. -144. -124.
## 6 qsec -19.5 48.4 -73.4 -60.0 -29.3 12.8 60.3
## 7 vs 69.8 218. -218. -104. 86.1 103. 421.
## 8 am 138. 193. -161. 99.8 162. 201. 516.
## 9 gear 192. 80.0 74.7 142. 178. 224. 367.
## 10 carb 3.39 35.9 -56.3 -6.54 3.71 36.0 41.3
## hist
## 1 ▃▇▂▃▂
## 2 ▂▂▂▆▇
## 3 ▂▁▆▃▇
## 4 ▂▁▆▃▇
## 5 ▇▁▇▂▇
## 6 ▇▇▂▂▅
## 7 ▆▂▇▁▃
## 8 ▂▁▇▁▁
## 9 ▂▇▃▂▂
## 10 ▃▁▆▂▇
## Elapsed: 0.22 s
obj$set_pi_method("kdejackknifeplus")
t0 <- proc.time()[3]
obj$fit(X_train, y_train, reg_lambda = 0.1)
## | | | 0% | |=== | 5% | |======= | 10% | |========== | 14% | |============= | 19% | |================= | 24% | |==================== | 29% | |======================= | 33% | |=========================== | 38% | |============================== | 43% | |================================= | 48% | |===================================== | 52% | |======================================== | 57% | |=========================================== | 62% | |=============================================== | 67% | |================================================== | 71% | |===================================================== | 76% | |========================================================= | 81% | |============================================================ | 86% | |=============================================================== | 90% | |=================================================================== | 95% | |======================================================================| 100%
## Elapsed: 0.041 s
res <- obj$predict(X = X_test)
plot(c(y_train, res$preds), type='l',
main="",
ylab="",
ylim = c(min(c(res$upper, res$lower, y)),
max(c(res$upper, res$lower, y))))
lines(c(y_train, res$upper), col="gray60")
lines(c(y_train, res$lower), col="gray60")
lines(c(y_train, res$preds), col = "red")
lines(c(y_train, y_test), col = "blue")
mean((y_test >= as.numeric(res$lower)) * (y_test <= as.numeric(res$upper)))
## [1] 1
t0 <- proc.time()[3]
obj$summary(X_test, y=y_test, show_progress=FALSE)
## $R_squared
## [1] -3.076311
##
## $R_squared_adj
## [1] 37.6868
##
## $Residuals
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 3.660 6.009 6.743 6.944 7.966 10.753
##
## $Coverage_rate
## [1] 100
##
## $citests
## estimate lower upper p-value signif
## cyl -36.7817740 -50.6623622 -22.9011858 2.038888e-04 ***
## disp -0.2133047 -0.6537255 0.2271161 3.017009e-01
## hp -0.2920633 -0.8288709 0.2447443 2.495994e-01
## drat 259.9789584 141.9967301 377.9611867 7.545111e-04 ***
## wt -125.6032827 -159.9084338 -91.2981317 1.675653e-05 ***
## qsec 6.1547882 -22.2053730 34.5149494 6.352182e-01
## vs 35.1176737 -92.7670167 163.0023641 5.498770e-01
## am 85.8109695 -32.8651723 204.4871113 1.363325e-01
## gear 264.4099446 185.2573493 343.5625399 3.479849e-05 ***
## carb -24.1859057 -56.7206453 8.3488338 1.269314e-01
##
## $signif_codes
## [1] "Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1"
##
## $effects
## ── Data Summary ────────────────────────
## Values
## Name effects
## Number of rows 10
## Number of columns 10
## _______________________
## Column type frequency:
## numeric 10
## ________________________
## Group variables None
##
## ── Variable type: numeric ──────────────────────────────────────────────────────
## skim_variable mean sd p0 p25 p50 p75 p100
## 1 cyl -36.8 19.4 -52.8 -48.3 -40.9 -37.7 13.0
## 2 disp -0.213 0.616 -1.19 -0.561 -0.212 -0.146 0.781
## 3 hp -0.292 0.750 -1.73 -0.461 0.00750 0.135 0.531
## 4 drat 260. 165. -74.7 161. 265. 381. 485.
## 5 wt -126. 48.0 -202. -152. -125. -105. -30.9
## 6 qsec 6.15 39.6 -55.5 -20.7 -1.84 33.6 64.2
## 7 vs 35.1 179. -232. -108. 68.7 95.1 292.
## 8 am 85.8 166. -205. 94.4 122. 144. 346.
## 9 gear 264. 111. 122. 206. 242. 301. 529.
## 10 carb -24.2 45.5 -73.7 -54.9 -47.2 22.6 39.5
## hist
## 1 ▇▅▂▁▂
## 2 ▂▃▇▁▃
## 3 ▃▁▂▇▆
## 4 ▂▃▁▇▆
## 5 ▃▂▇▂▂
## 6 ▂▇▅▅▅
## 7 ▇▁▇▅▅
## 8 ▃▁▇▆▂
## 9 ▇▇▇▁▂
## 10 ▇▇▁▂▇
## Elapsed: 0.491 s
t0 <- proc.time()[3]
obj$summary(X_test, y=y_test, show_progress=FALSE, type_ci="bootstrap")
## $R_squared
## [1] -3.076311
##
## $R_squared_adj
## [1] 37.6868
##
## $Residuals
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 3.660 6.009 6.743 6.944 7.966 10.753
##
## $Coverage_rate
## [1] 100
##
## $citests
## estimate lower upper p-value signif
## cyl -37.7454484 -45.5951036 -25.4424864 3.330824e-165 ***
## disp -0.2106604 -0.5751210 0.1509308 3.774075e-133 ***
## hp -0.2879116 -0.7162016 0.1127945 9.161570e-149 ***
## drat 260.2656421 157.3020284 349.2281183 3.330824e-165 ***
## wt -126.0333424 -152.2494144 -97.9210297 3.330824e-165 ***
## qsec 6.5179006 -16.0686231 30.1460294 2.976614e-52 ***
## vs 38.6340339 -67.2977058 131.2472107 2.898223e-78 ***
## am 87.8700197 -8.0927507 171.5314902 7.071000e-160 ***
## gear 263.7730423 206.7863856 330.1303853 3.330824e-165 ***
## carb -24.3011733 -49.1225434 2.5087847 7.537326e-162 ***
##
## $signif_codes
## [1] "Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1"
##
## $effects
## ── Data Summary ────────────────────────
## Values
## Name effects
## Number of rows 10
## Number of columns 10
## _______________________
## Column type frequency:
## numeric 10
## ________________________
## Group variables None
##
## ── Variable type: numeric ──────────────────────────────────────────────────────
## skim_variable mean sd p0 p25 p50 p75 p100
## 1 cyl -36.8 19.4 -52.8 -48.3 -40.9 -37.7 13.0
## 2 disp -0.213 0.616 -1.19 -0.561 -0.212 -0.146 0.781
## 3 hp -0.292 0.750 -1.73 -0.461 0.00750 0.135 0.531
## 4 drat 260. 165. -74.7 161. 265. 381. 485.
## 5 wt -126. 48.0 -202. -152. -125. -105. -30.9
## 6 qsec 6.15 39.6 -55.5 -20.7 -1.84 33.6 64.2
## 7 vs 35.1 179. -232. -108. 68.7 95.1 292.
## 8 am 85.8 166. -205. 94.4 122. 144. 346.
## 9 gear 264. 111. 122. 206. 242. 301. 529.
## 10 carb -24.2 45.5 -73.7 -54.9 -47.2 22.6 39.5
## hist
## 1 ▇▅▂▁▂
## 2 ▂▃▇▁▃
## 3 ▃▁▂▇▆
## 4 ▂▃▁▇▆
## 5 ▃▂▇▂▂
## 6 ▂▇▅▅▅
## 7 ▇▁▇▅▅
## 8 ▃▁▇▆▂
## 9 ▇▇▇▁▂
## 10 ▇▇▁▂▇
## Elapsed: 1.195 s
t0 <- proc.time()[3]
obj$summary(X_test, y=y_test, show_progress=FALSE, type_ci="conformal")
## $R_squared
## [1] -3.076311
##
## $R_squared_adj
## [1] 37.6868
##
## $Residuals
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 3.660 6.009 6.743 6.944 7.966 10.753
##
## $Coverage_rate
## [1] 100
##
## $citests
## estimate lower upper p-value signif
## cyl -39.7840252 -47.4872258 -31.4799014 1.250976e-83 ***
## disp -0.2387740 -0.7816262 0.3715319 9.952268e-49 ***
## hp -0.2099869 -0.8357056 0.2798777 1.783507e-42 ***
## drat 257.1278559 73.5229906 409.6945353 1.488911e-83 ***
## wt -123.8632263 -161.4551428 -72.9909944 1.253015e-83 ***
## qsec 5.3742789 -34.4860716 46.1929182 4.171798e-08 ***
## vs 57.7189131 -83.9771371 194.1192920 2.046089e-52 ***
## am 70.2180272 -63.4640810 153.3095833 6.312497e-60 ***
## gear 224.5553844 168.6830661 274.9425132 1.247483e-83 ***
## carb -39.8042180 -64.8809066 -2.1193497 3.643944e-83 ***
##
## $signif_codes
## [1] "Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1"
##
## $effects
## ── Data Summary ────────────────────────
## Values
## Name effects
## Number of rows 10
## Number of columns 10
## _______________________
## Column type frequency:
## numeric 10
## ________________________
## Group variables None
##
## ── Variable type: numeric ──────────────────────────────────────────────────────
## skim_variable mean sd p0 p25 p50 p75 p100
## 1 cyl -36.8 19.4 -52.8 -48.3 -40.9 -37.7 13.0
## 2 disp -0.213 0.616 -1.19 -0.561 -0.212 -0.146 0.781
## 3 hp -0.292 0.750 -1.73 -0.461 0.00750 0.135 0.531
## 4 drat 260. 165. -74.7 161. 265. 381. 485.
## 5 wt -126. 48.0 -202. -152. -125. -105. -30.9
## 6 qsec 6.15 39.6 -55.5 -20.7 -1.84 33.6 64.2
## 7 vs 35.1 179. -232. -108. 68.7 95.1 292.
## 8 am 85.8 166. -205. 94.4 122. 144. 346.
## 9 gear 264. 111. 122. 206. 242. 301. 529.
## 10 carb -24.2 45.5 -73.7 -54.9 -47.2 22.6 39.5
## hist
## 1 ▇▅▂▁▂
## 2 ▂▃▇▁▃
## 3 ▃▁▂▇▆
## 4 ▂▃▁▇▆
## 5 ▃▂▇▂▂
## 6 ▂▇▅▅▅
## 7 ▇▁▇▅▅
## 8 ▃▁▇▆▂
## 9 ▇▇▇▁▂
## 10 ▇▇▁▂▇
## Elapsed: 0.402 s
xgboost
obj <- learningmachine::Regressor$new(method = "xgboost", pi_method = "splitconformal")
obj$get_type()
## [1] "regression"
obj$get_name()
## [1] "Regressor"
t0 <- proc.time()[3]
obj$fit(X_train, y_train, nrounds=10, verbose=FALSE)
cat("Elapsed: ", proc.time()[3] - t0, "s \n")
## Elapsed: 0.052 s
print(obj$predict(X_test))
## $preds
## [1] 18.13500 18.13500 17.13105 17.13105 14.64118 14.64118 14.03685 21.29947
## [9] 15.33300 14.03685
##
## $lower
## [1] 12.135002 12.135002 11.131052 11.131052 8.641179 8.641179 8.036854
## [8] 15.299475 9.333004 8.036854
##
## $upper
## [1] 24.13500 24.13500 23.13105 23.13105 20.64118 20.64118 20.03685 27.29947
## [9] 21.33300 20.03685
t0 <- proc.time()[3]
obj$summary(X_test, y=y_test, show_progress=FALSE)
## $R_squared
## [1] 0.2881145
##
## $R_squared_adj
## [1] 7.406969
##
## $Residuals
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -3.6369 0.3926 2.2088 1.5079 2.8650 5.1631
##
## $Coverage_rate
## [1] 100
##
## $citests
## estimate lower upper p-value signif
## cyl 0.0000000 NaN NaN NaN
## disp -0.1859971 -0.6067516 0.2347575 0.3434364
## hp 0.0000000 NaN NaN NaN
## drat 28.9866074 -18.4823056 76.4555203 0.2004909
## wt 0.0000000 NaN NaN NaN
## qsec -1.7295559 -5.6420830 2.1829713 0.3434364
## vs 0.0000000 NaN NaN NaN
## am 0.0000000 NaN NaN NaN
## gear 0.0000000 NaN NaN NaN
## carb 0.0000000 NaN NaN NaN
##
## $signif_codes
## [1] "Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1"
##
## $effects
## ── Data Summary ────────────────────────
## Values
## Name effects
## Number of rows 10
## Number of columns 10
## _______________________
## Column type frequency:
## numeric 10
## ________________________
## Group variables None
##
## ── Variable type: numeric ──────────────────────────────────────────────────────
## skim_variable mean sd p0 p25 p50 p75 p100 hist
## 1 cyl 0 0 0 0 0 0 0 ▁▁▇▁▁
## 2 disp -0.186 0.588 -1.86 0 0 0 0 ▁▁▁▁▇
## 3 hp 0 0 0 0 0 0 0 ▁▁▇▁▁
## 4 drat 29.0 66.4 0 0 0 0 200. ▇▁▁▁▁
## 5 wt 0 0 0 0 0 0 0 ▁▁▇▁▁
## 6 qsec -1.73 5.47 -17.3 0 0 0 0 ▁▁▁▁▇
## 7 vs 0 0 0 0 0 0 0 ▁▁▇▁▁
## 8 am 0 0 0 0 0 0 0 ▁▁▇▁▁
## 9 gear 0 0 0 0 0 0 0 ▁▁▇▁▁
## 10 carb 0 0 0 0 0 0 0 ▁▁▇▁▁
## Elapsed: 0.398 s
t0 <- proc.time()[3]
obj$summary(X_test, y=y_test, show_progress=FALSE, type_ci="bootstrap")
## Warning in wilcox.test.default(x, mu = mu_0): cannot compute exact p-value with
## zeroes
## Warning in wilcox.test.default(x, mu = mu_0): cannot compute exact p-value with
## zeroes
## Warning in wilcox.test.default(x, mu = mu_0): cannot compute exact p-value with
## zeroes
## Warning in wilcox.test.default(x, mu = mu_0): cannot compute exact p-value with
## zeroes
## Warning in wilcox.test.default(x, mu = mu_0): cannot compute exact p-value with
## zeroes
## Warning in wilcox.test.default(x, mu = mu_0): cannot compute exact p-value with
## zeroes
## Warning in wilcox.test.default(x, mu = mu_0): cannot compute exact p-value with
## zeroes
## $R_squared
## [1] 0.2881145
##
## $R_squared_adj
## [1] 7.406969
##
## $Residuals
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -3.6369 0.3926 2.2088 1.5079 2.8650 5.1631
##
## $Coverage_rate
## [1] 100
##
## $citests
## estimate lower upper p-value signif
## cyl 0.0000000 0.0000000 0.00000 NaN
## disp -0.1859971 -0.5579912 0.00000 8.962365e-118 ***
## hp 0.0000000 0.0000000 0.00000 NaN
## drat 28.9866074 0.0000000 77.95299 5.856765e-149 ***
## wt 0.0000000 0.0000000 0.00000 NaN
## qsec -1.7295559 -5.1886676 0.00000 9.526227e-117 ***
## vs 0.0000000 0.0000000 0.00000 NaN
## am 0.0000000 0.0000000 0.00000 NaN
## gear 0.0000000 0.0000000 0.00000 NaN
## carb 0.0000000 0.0000000 0.00000 NaN
##
## $signif_codes
## [1] "Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1"
##
## $effects
## ── Data Summary ────────────────────────
## Values
## Name effects
## Number of rows 10
## Number of columns 10
## _______________________
## Column type frequency:
## numeric 10
## ________________________
## Group variables None
##
## ── Variable type: numeric ──────────────────────────────────────────────────────
## skim_variable mean sd p0 p25 p50 p75 p100 hist
## 1 cyl 0 0 0 0 0 0 0 ▁▁▇▁▁
## 2 disp -0.186 0.588 -1.86 0 0 0 0 ▁▁▁▁▇
## 3 hp 0 0 0 0 0 0 0 ▁▁▇▁▁
## 4 drat 29.0 66.4 0 0 0 0 200. ▇▁▁▁▁
## 5 wt 0 0 0 0 0 0 0 ▁▁▇▁▁
## 6 qsec -1.73 5.47 -17.3 0 0 0 0 ▁▁▁▁▇
## 7 vs 0 0 0 0 0 0 0 ▁▁▇▁▁
## 8 am 0 0 0 0 0 0 0 ▁▁▇▁▁
## 9 gear 0 0 0 0 0 0 0 ▁▁▇▁▁
## 10 carb 0 0 0 0 0 0 0 ▁▁▇▁▁
## Elapsed: 0.848 s
t0 <- proc.time()[3]
obj$summary(X_test, y=y_test, show_progress=FALSE, type_ci="conformal")
## Warning in wilcox.test.default(x, mu = mu_0): cannot compute exact p-value with
## zeroes
## Warning in wilcox.test.default(x, mu = mu_0): cannot compute exact p-value with
## zeroes
## Warning in wilcox.test.default(x, mu = mu_0): cannot compute exact p-value with
## zeroes
## Warning in wilcox.test.default(x, mu = mu_0): cannot compute exact p-value with
## zeroes
## Warning in wilcox.test.default(x, mu = mu_0): cannot compute exact p-value with
## zeroes
## Warning in wilcox.test.default(x, mu = mu_0): cannot compute exact p-value with
## zeroes
## Warning in wilcox.test.default(x, mu = mu_0): cannot compute exact p-value with
## zeroes
## Warning in wilcox.test.default(x, mu = mu_0): cannot compute exact p-value with
## zeroes
## $R_squared
## [1] 0.2881145
##
## $R_squared_adj
## [1] 7.406969
##
## $Residuals
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -3.6369 0.3926 2.2088 1.5079 2.8650 5.1631
##
## $Coverage_rate
## [1] 100
##
## $citests
## estimate lower upper p-value signif
## cyl 0.0000000 0.000000 0.0000 NaN
## disp -0.3719941 -1.115982 0.0000 2.264193e-58 ***
## hp 0.0000000 0.000000 0.0000 NaN
## drat 39.9595601 0.000000 119.8787 5.882537e-59 ***
## wt 0.0000000 0.000000 0.0000 NaN
## qsec 0.0000000 0.000000 0.0000 NaN
## vs 0.0000000 0.000000 0.0000 NaN
## am 0.0000000 0.000000 0.0000 NaN
## gear 0.0000000 0.000000 0.0000 NaN
## carb 0.0000000 0.000000 0.0000 NaN
##
## $signif_codes
## [1] "Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1"
##
## $effects
## ── Data Summary ────────────────────────
## Values
## Name effects
## Number of rows 10
## Number of columns 10
## _______________________
## Column type frequency:
## numeric 10
## ________________________
## Group variables None
##
## ── Variable type: numeric ──────────────────────────────────────────────────────
## skim_variable mean sd p0 p25 p50 p75 p100 hist
## 1 cyl 0 0 0 0 0 0 0 ▁▁▇▁▁
## 2 disp -0.186 0.588 -1.86 0 0 0 0 ▁▁▁▁▇
## 3 hp 0 0 0 0 0 0 0 ▁▁▇▁▁
## 4 drat 29.0 66.4 0 0 0 0 200. ▇▁▁▁▁
## 5 wt 0 0 0 0 0 0 0 ▁▁▇▁▁
## 6 qsec -1.73 5.47 -17.3 0 0 0 0 ▁▁▁▁▇
## 7 vs 0 0 0 0 0 0 0 ▁▁▇▁▁
## 8 am 0 0 0 0 0 0 0 ▁▁▇▁▁
## 9 gear 0 0 0 0 0 0 0 ▁▁▇▁▁
## 10 carb 0 0 0 0 0 0 0 ▁▁▇▁▁
## Elapsed: 0.351 s
t0 <- proc.time()[3]
obj$fit(X_train, y_train, nrounds=10, verbose=FALSE)
cat("Elapsed: ", proc.time()[3] - t0, "s \n")
## Elapsed: 0.013 s
obj$set_level(95)
res <- obj$predict(X = X_test)
plot(c(y_train, res$preds), type='l',
main="",
ylab="",
ylim = c(min(c(res$upper, res$lower, y)),
max(c(res$upper, res$lower, y))))
lines(c(y_train, res$upper), col="gray60")
lines(c(y_train, res$lower), col="gray60")
lines(c(y_train, res$preds), col = "red")
lines(c(y_train, y_test), col = "blue")
mean((y_test >= as.numeric(res$lower)) * (y_test <= as.numeric(res$upper)))
## [1] 1
obj$set_pi_method("kdesplitconformal")
t0 <- proc.time()[3]
obj$fit(X_train, y_train, nrounds=10, verbose=FALSE)
cat("Elapsed: ", proc.time()[3] - t0, "s \n")
## Elapsed: 0.012 s
obj$set_level(95)
res <- obj$predict(X = X_test)
plot(c(y_train, res$preds), type='l',
main="",
ylab="",
ylim = c(min(c(res$upper, res$lower, y)),
max(c(res$upper, res$lower, y))))
lines(c(y_train, res$upper), col="gray60")
lines(c(y_train, res$lower), col="gray60")
lines(c(y_train, res$preds), col = "red")
lines(c(y_train, y_test), col = "blue")
mean((y_test >= as.numeric(res$lower)) * (y_test <= as.numeric(res$upper)))
## [1] 1
obj$set_pi_method("bootjackknifeplus")
t0 <- proc.time()[3]
obj$fit(X_train, y_train, nrounds=10, verbose=FALSE)
cat("Elapsed: ", proc.time()[3] - t0, "s \n")
## Elapsed: 0.002 s
obj$set_level(95)
res <- obj$predict(X = X_test)
plot(c(y_train, res$preds), type='l',
main="",
ylab="",
ylim = c(min(c(res$upper, res$lower, y)),
max(c(res$upper, res$lower, y))))
lines(c(y_train, res$upper), col="gray60")
lines(c(y_train, res$lower), col="gray60")
lines(c(y_train, res$preds), col = "red")
lines(c(y_train, y_test), col = "blue")
mean((y_test >= as.numeric(res$lower)) * (y_test <= as.numeric(res$upper)))
## [1] 1
obj <- learningmachine::Regressor$new(method = "rvfl",
nb_hidden = 50L,
pi_method = "splitconformal")
obj$get_type()
## [1] "regression"
obj$get_name()
## [1] "Regressor"
t0 <- proc.time()[3]
obj$fit(X_train, y_train, reg_lambda = 0.01)
cat("Elapsed: ", proc.time()[3] - t0, "s \n")
## Elapsed: 0.01 s
print(obj$predict(X_test))
## $preds
## Mazda RX4 Mazda RX4 Wag Hornet 4 Drive Valiant
## 21.350888 19.789387 13.106761 9.695310
## Merc 450SE Merc 450SL Lincoln Continental Toyota Corona
## 11.131161 12.568682 2.044672 19.289805
## Camaro Z28 Pontiac Firebird
## 14.847878 12.282272
##
## $lower
## Mazda RX4 Mazda RX4 Wag Hornet 4 Drive Valiant
## 12.3508879 10.7893873 4.1067608 0.6953102
## Merc 450SE Merc 450SL Lincoln Continental Toyota Corona
## 2.1311611 3.5686817 -6.9553279 10.2898053
## Camaro Z28 Pontiac Firebird
## 5.8478777 3.2822719
##
## $upper
## Mazda RX4 Mazda RX4 Wag Hornet 4 Drive Valiant
## 30.35089 28.78939 22.10676 18.69531
## Merc 450SE Merc 450SL Lincoln Continental Toyota Corona
## 20.13116 21.56868 11.04467 28.28981
## Camaro Z28 Pontiac Firebird
## 23.84788 21.28227
t0 <- proc.time()[3]
obj$summary(X_test, y=y_test, show_progress=FALSE)
## $R_squared
## [1] -1.505856
##
## $R_squared_adj
## [1] 23.55271
##
## $Residuals
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.548 1.461 5.000 4.349 7.949 8.405
##
## $Coverage_rate
## [1] 100
##
## $citests
## estimate lower upper p-value signif
## cyl 137.649985 39.777048 235.5229227 1.115728e-02 *
## disp -2.406399 -4.650678 -0.1621204 3.825959e-02 *
## hp -0.527573 -1.402043 0.3468975 2.054686e-01
## drat 707.372951 246.095138 1168.6507638 7.059500e-03 **
## wt -500.429007 -565.047979 -435.8100352 2.910469e-08 ***
## qsec -89.930939 -124.899691 -54.9621860 2.537870e-04 ***
## vs 234.198406 -127.886990 596.2838006 1.774484e-01
## am -235.789718 -512.422513 40.8430776 8.592503e-02 .
## gear 52.646721 -6.640614 111.9340567 7.547657e-02 .
## carb -17.100561 -87.819649 53.6185270 5.976705e-01
##
## $signif_codes
## [1] "Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1"
##
## $effects
## ── Data Summary ────────────────────────
## Values
## Name effects
## Number of rows 10
## Number of columns 10
## _______________________
## Column type frequency:
## numeric 10
## ________________________
## Group variables None
##
## ── Variable type: numeric ──────────────────────────────────────────────────────
## skim_variable mean sd p0 p25 p50 p75 p100
## 1 cyl 138. 137. -8.40 75.8 91.1 98.6 394.
## 2 disp -2.41 3.14 -8.46 -1.32 -1.08 -0.775 -0.300
## 3 hp -0.528 1.22 -3.40 -0.695 -0.188 0.0137 0.893
## 4 drat 707. 645. 55.7 388. 482. 563. 1939.
## 5 wt -500. 90.3 -698. -538. -500. -458. -377.
## 6 qsec -89.9 48.9 -145. -128. -102. -64.0 2.67
## 7 vs 234. 506. -121. -13.2 36.8 53.2 1269.
## 8 am -236. 387. -653. -450. -397. -168. 519.
## 9 gear 52.6 82.9 -107. -4.69 66.2 112. 170.
## 10 carb -17.1 98.9 -117. -64.6 -60.6 -17.5 171.
## hist
## 1 ▂▇▁▁▂
## 2 ▂▁▁▁▇
## 3 ▁▁▁▇▂
## 4 ▅▇▁▁▃
## 5 ▂▁▆▇▃
## 6 ▇▆▁▂▃
## 7 ▇▁▁▁▂
## 8 ▆▇▂▁▃
## 9 ▂▅▅▅▇
## 10 ▇▂▁▁▂
## Elapsed: 0.221 s
t0 <- proc.time()[3]
obj$summary(X_test, y=y_test, show_progress=FALSE, type_ci="bootstrap")
## $R_squared
## [1] -1.505856
##
## $R_squared_adj
## [1] 23.55271
##
## $Residuals
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.548 1.461 5.000 4.349 7.949 8.405
##
## $Coverage_rate
## [1] 100
##
## $citests
## estimate lower upper p-value signif
## cyl 137.0111423 69.296022 213.05499150 3.330824e-165 ***
## disp -2.4063335 -4.151551 -0.85145010 3.330824e-165 ***
## hp -0.4894821 -1.206580 0.06393166 1.559621e-159 ***
## drat 704.2409575 379.648789 1088.69475280 3.330824e-165 ***
## wt -499.6464041 -553.621396 -453.75958810 3.330824e-165 ***
## qsec -89.9155324 -116.434421 -60.70468612 3.330824e-165 ***
## vs 235.1520119 -14.444923 528.12353553 2.876117e-162 ***
## am -241.1699530 -437.064336 -4.24144263 3.530328e-163 ***
## gear 52.2145847 5.676782 96.67674463 1.907992e-164 ***
## carb -17.7070158 -67.018662 43.26358463 1.396178e-56 ***
##
## $signif_codes
## [1] "Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1"
##
## $effects
## ── Data Summary ────────────────────────
## Values
## Name effects
## Number of rows 10
## Number of columns 10
## _______________________
## Column type frequency:
## numeric 10
## ________________________
## Group variables None
##
## ── Variable type: numeric ──────────────────────────────────────────────────────
## skim_variable mean sd p0 p25 p50 p75 p100
## 1 cyl 138. 137. -8.40 75.8 91.1 98.6 394.
## 2 disp -2.41 3.14 -8.46 -1.32 -1.08 -0.775 -0.300
## 3 hp -0.528 1.22 -3.40 -0.695 -0.188 0.0137 0.893
## 4 drat 707. 645. 55.7 388. 482. 563. 1939.
## 5 wt -500. 90.3 -698. -538. -500. -458. -377.
## 6 qsec -89.9 48.9 -145. -128. -102. -64.0 2.67
## 7 vs 234. 506. -121. -13.2 36.8 53.2 1269.
## 8 am -236. 387. -653. -450. -397. -168. 519.
## 9 gear 52.6 82.9 -107. -4.69 66.2 112. 170.
## 10 carb -17.1 98.9 -117. -64.6 -60.6 -17.5 171.
## hist
## 1 ▂▇▁▁▂
## 2 ▂▁▁▁▇
## 3 ▁▁▁▇▂
## 4 ▅▇▁▁▃
## 5 ▂▁▆▇▃
## 6 ▇▆▁▂▃
## 7 ▇▁▁▁▂
## 8 ▆▇▂▁▃
## 9 ▂▅▅▅▇
## 10 ▇▂▁▁▂
## Elapsed: 1.336 s
t0 <- proc.time()[3]
obj$summary(X_test, y=y_test, show_progress=FALSE, type_ci="conformal")
## $R_squared
## [1] -1.505856
##
## $R_squared_adj
## [1] 23.55271
##
## $Residuals
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.548 1.461 5.000 4.349 7.949 8.405
##
## $Coverage_rate
## [1] 100
##
## $citests
## estimate lower upper p-value signif
## cyl 150.3445262 87.812350 269.06011283 1.250607e-83 ***
## disp -2.4109857 -5.486538 -0.79591386 1.250216e-83 ***
## hp -0.5248472 -1.082605 -0.07181913 1.515688e-83 ***
## drat 646.6988568 218.854514 1303.05566246 1.249299e-83 ***
## wt -470.3148562 -516.997944 -415.26465965 1.249068e-83 ***
## qsec -87.0403486 -131.680654 -41.00111741 1.252016e-83 ***
## vs 250.5498624 -28.182752 766.57411838 1.753121e-67 ***
## am -259.2704269 -539.300395 113.18348316 1.498428e-75 ***
## gear 67.8915285 20.037869 106.80377873 1.274353e-83 ***
## carb -15.7962310 -70.922940 78.38902384 8.128479e-11 ***
##
## $signif_codes
## [1] "Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1"
##
## $effects
## ── Data Summary ────────────────────────
## Values
## Name effects
## Number of rows 10
## Number of columns 10
## _______________________
## Column type frequency:
## numeric 10
## ________________________
## Group variables None
##
## ── Variable type: numeric ──────────────────────────────────────────────────────
## skim_variable mean sd p0 p25 p50 p75 p100
## 1 cyl 138. 137. -8.40 75.8 91.1 98.6 394.
## 2 disp -2.41 3.14 -8.46 -1.32 -1.08 -0.775 -0.300
## 3 hp -0.528 1.22 -3.40 -0.695 -0.188 0.0137 0.893
## 4 drat 707. 645. 55.7 388. 482. 563. 1939.
## 5 wt -500. 90.3 -698. -538. -500. -458. -377.
## 6 qsec -89.9 48.9 -145. -128. -102. -64.0 2.67
## 7 vs 234. 506. -121. -13.2 36.8 53.2 1269.
## 8 am -236. 387. -653. -450. -397. -168. 519.
## 9 gear 52.6 82.9 -107. -4.69 66.2 112. 170.
## 10 carb -17.1 98.9 -117. -64.6 -60.6 -17.5 171.
## hist
## 1 ▂▇▁▁▂
## 2 ▂▁▁▁▇
## 3 ▁▁▁▇▂
## 4 ▅▇▁▁▃
## 5 ▂▁▆▇▃
## 6 ▇▆▁▂▃
## 7 ▇▁▁▁▂
## 8 ▆▇▂▁▃
## 9 ▂▅▅▅▇
## 10 ▇▂▁▁▂
## Elapsed: 0.299 s
## Elapsed: 0.014 s
obj$set_level(95)
res <- obj$predict(X = X_test)
plot(c(y_train, res$preds), type='l',
main="",
ylab="",
ylim = c(min(c(res$upper, res$lower, y)),
max(c(res$upper, res$lower, y))))
lines(c(y_train, res$upper), col="gray60")
lines(c(y_train, res$lower), col="gray60")
lines(c(y_train, res$preds), col = "red")
lines(c(y_train, y_test), col = "blue")
abline(v = length(y_train), lty=2, col="black")
mean((y_test >= as.numeric(res$lower)) * (y_test <= as.numeric(res$upper)))
## [1] 1
update RVFL model
previous_coefs <- drop(obj$model$coef)
newx <- X_test[1, ]
newy <- y_test[1]
new_X_test <- X_test[-1, ]
new_y_test <- y_test[-1]
t0 <- proc.time()[3]
obj$update(newx, newy)
cat("Elapsed: ", proc.time()[3] - t0, "s \n")
## Elapsed: 0.009 s
summary(previous_coefs)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -0.68212 -0.26567 -0.05157 0.00700 0.21046 2.19222
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -0.030666 -0.002610 0.004189 0.002917 0.011386 0.025243
start <- proc.time()[3]
obj$summary(new_X_test, y=new_y_test, show_progress=FALSE)
## $R_squared
## [1] -1.809339
##
## $R_squared_adj
## [1] 12.23735
##
## $Residuals
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.168 2.513 5.541 5.058 8.185 8.703
##
## $Coverage_rate
## [1] 100
##
## $citests
## estimate lower upper p-value signif
## cyl 111.6701473 17.076928 206.2633669 2.615518e-02 *
## disp -1.7983224 -3.876380 0.2797349 8.106884e-02 .
## hp -0.4167545 -1.501658 0.6681495 4.015523e-01
## drat 569.9102780 148.862037 990.9585186 1.420088e-02 *
## wt -504.1496696 -583.757006 -424.5423330 4.741273e-07 ***
## qsec -107.9102921 -138.571336 -77.2492482 3.936777e-05 ***
## vs 145.0280002 -173.164419 463.2204193 3.239468e-01
## am -319.6910568 -566.618653 -72.7634604 1.745263e-02 *
## gear 57.7630332 -18.934712 134.4607782 1.206459e-01
## carb -42.9572292 -108.690903 22.7764447 1.702409e-01
##
## $signif_codes
## [1] "Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1"
##
## $effects
## ── Data Summary ────────────────────────
## Values
## Name effects
## Number of rows 9
## Number of columns 10
## _______________________
## Column type frequency:
## numeric 10
## ________________________
## Group variables None
##
## ── Variable type: numeric ──────────────────────────────────────────────────────
## skim_variable mean sd p0 p25 p50 p75 p100
## 1 cyl 112. 123. -13.5 64.5 93.6 93.9 426.
## 2 disp -1.80 2.70 -8.94 -1.41 -0.805 -0.689 -0.361
## 3 hp -0.417 1.41 -3.54 -0.679 -0.0942 -0.0556 1.19
## 4 drat 570. 548. 36.8 371. 439. 501. 1972.
## 5 wt -504. 104. -742. -523. -497. -461. -382.
## 6 qsec -108. 39.9 -152. -143. -115. -93.0 -35.9
## 7 vs 145. 414. -116. -23.9 51.1 81.2 1231.
## 8 am -320. 321. -575. -479. -395. -368. 465.
## 9 gear 57.8 99.8 -113. 1.22 35.2 130. 196.
## 10 carb -43.0 85.5 -129. -79.6 -77.9 -22.5 165.
## hist
## 1 ▅▇▁▁▂
## 2 ▁▁▁▁▇
## 3 ▂▁▂▇▃
## 4 ▅▇▁▁▂
## 5 ▂▁▂▇▃
## 6 ▇▅▅▂▂
## 7 ▇▁▁▁▁
## 8 ▇▁▁▁▁
## 9 ▃▇▇▇▇
## 10 ▇▅▁▁▂
## Elapsed: 0.432 s
start <- proc.time()[3]
obj$summary(new_X_test, y=new_y_test, show_progress=FALSE, type_ci="bootstrap")
## $R_squared
## [1] -1.809339
##
## $R_squared_adj
## [1] 12.23735
##
## $Residuals
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.168 2.513 5.541 5.058 8.185 8.703
##
## $Coverage_rate
## [1] 100
##
## $citests
## estimate lower upper p-value signif
## cyl 112.3919404 55.200080 198.90428 3.330730e-165 ***
## disp -1.7848239 -3.647352 -0.74150 3.330730e-165 ***
## hp -0.3674135 -1.299011 0.34313 3.063436e-110 ***
## drat 565.9832346 325.398523 950.32059 3.330730e-165 ***
## wt -504.3143079 -572.073593 -451.85782 3.330730e-165 ***
## qsec -107.9791731 -129.437921 -82.20922 3.330730e-165 ***
## vs 144.4399640 -32.248151 427.63924 2.565958e-145 ***
## am -324.5343769 -475.286679 -100.58739 3.688681e-165 ***
## gear 57.8310159 -1.595434 116.61761 2.545767e-162 ***
## carb -42.8382811 -84.631850 12.61998 4.603841e-153 ***
##
## $signif_codes
## [1] "Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1"
##
## $effects
## ── Data Summary ────────────────────────
## Values
## Name effects
## Number of rows 9
## Number of columns 10
## _______________________
## Column type frequency:
## numeric 10
## ________________________
## Group variables None
##
## ── Variable type: numeric ──────────────────────────────────────────────────────
## skim_variable mean sd p0 p25 p50 p75 p100
## 1 cyl 112. 123. -13.5 64.5 93.6 93.9 426.
## 2 disp -1.80 2.70 -8.94 -1.41 -0.805 -0.689 -0.361
## 3 hp -0.417 1.41 -3.54 -0.679 -0.0942 -0.0556 1.19
## 4 drat 570. 548. 36.8 371. 439. 501. 1972.
## 5 wt -504. 104. -742. -523. -497. -461. -382.
## 6 qsec -108. 39.9 -152. -143. -115. -93.0 -35.9
## 7 vs 145. 414. -116. -23.9 51.1 81.2 1231.
## 8 am -320. 321. -575. -479. -395. -368. 465.
## 9 gear 57.8 99.8 -113. 1.22 35.2 130. 196.
## 10 carb -43.0 85.5 -129. -79.6 -77.9 -22.5 165.
## hist
## 1 ▅▇▁▁▂
## 2 ▁▁▁▁▇
## 3 ▂▁▂▇▃
## 4 ▅▇▁▁▂
## 5 ▂▁▂▇▃
## 6 ▇▅▅▂▂
## 7 ▇▁▁▁▁
## 8 ▇▁▁▁▁
## 9 ▃▇▇▇▇
## 10 ▇▅▁▁▂
## Elapsed: 0.577 s
start <- proc.time()[3]
obj$summary(new_X_test, y=new_y_test, show_progress=FALSE, type_ci="conformal")
## $R_squared
## [1] -1.809339
##
## $R_squared_adj
## [1] 12.23735
##
## $Residuals
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.168 2.513 5.541 5.058 8.185 8.703
##
## $Coverage_rate
## [1] 100
##
## $citests
## estimate lower upper p-value signif
## cyl 140.206405 30.897867 272.5024431 1.253748e-83 ***
## disp -2.338689 -5.638112 -0.5700328 1.247348e-83 ***
## hp -1.002518 -2.384855 -0.1801822 1.253009e-83 ***
## drat 688.854780 222.599631 1357.5819419 1.247907e-83 ***
## wt -497.028711 -632.112217 -404.6436236 1.250973e-83 ***
## qsec -102.341868 -141.123901 -63.0025474 1.241731e-83 ***
## vs 301.065907 61.939634 768.2851053 1.173799e-83 ***
## am -208.479868 -467.258502 171.5791962 2.195275e-66 ***
## gear 111.884197 50.838048 163.9796862 1.251599e-83 ***
## carb -40.150576 -108.414021 67.1890930 1.421564e-45 ***
##
## $signif_codes
## [1] "Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1"
##
## $effects
## ── Data Summary ────────────────────────
## Values
## Name effects
## Number of rows 9
## Number of columns 10
## _______________________
## Column type frequency:
## numeric 10
## ________________________
## Group variables None
##
## ── Variable type: numeric ──────────────────────────────────────────────────────
## skim_variable mean sd p0 p25 p50 p75 p100
## 1 cyl 112. 123. -13.5 64.5 93.6 93.9 426.
## 2 disp -1.80 2.70 -8.94 -1.41 -0.805 -0.689 -0.361
## 3 hp -0.417 1.41 -3.54 -0.679 -0.0942 -0.0556 1.19
## 4 drat 570. 548. 36.8 371. 439. 501. 1972.
## 5 wt -504. 104. -742. -523. -497. -461. -382.
## 6 qsec -108. 39.9 -152. -143. -115. -93.0 -35.9
## 7 vs 145. 414. -116. -23.9 51.1 81.2 1231.
## 8 am -320. 321. -575. -479. -395. -368. 465.
## 9 gear 57.8 99.8 -113. 1.22 35.2 130. 196.
## 10 carb -43.0 85.5 -129. -79.6 -77.9 -22.5 165.
## hist
## 1 ▅▇▁▁▂
## 2 ▁▁▁▁▇
## 3 ▂▁▂▇▃
## 4 ▅▇▁▁▂
## 5 ▂▁▂▇▃
## 6 ▇▅▅▂▂
## 7 ▇▁▁▁▁
## 8 ▇▁▁▁▁
## 9 ▃▇▇▇▇
## 10 ▇▅▁▁▂
## Elapsed: 0.405 s
res <- obj$predict(X = new_X_test)
new_y_train <- c(y_train, newy)
plot(c(new_y_train, res$preds), type='l',
main="",
ylab="",
ylim = c(min(c(res$upper, res$lower, y)),
max(c(res$upper, res$lower, y))))
lines(c(new_y_train, res$upper), col="gray60")
lines(c(new_y_train, res$lower), col="gray60")
lines(c(new_y_train, res$preds), col = "red")
lines(c(new_y_train, new_y_test), col = "blue")
abline(v = length(y_train), lty=2, col="black")
mean((new_y_test >= as.numeric(res$lower)) * (new_y_test <= as.numeric(res$upper)))
## [1] 1
update RVFL model (Pt.2)
newx <- X_test[2, ]
newy <- y_test[2]
new_X_test <- X_test[-c(1, 2), ]
new_y_test <- y_test[-c(1, 2)]
## Elapsed: 0.007 s
t0 <- proc.time()[3]
obj$summary(new_X_test, y=new_y_test, show_progress=FALSE)
## $R_squared
## [1] -3.356623
##
## $R_squared_adj
## [1] 11.16545
##
## $Residuals
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.950 5.030 6.374 6.369 8.774 11.528
##
## $Coverage_rate
## [1] 75
##
## $citests
## estimate lower upper p-value signif
## cyl 40.8981137 6.878148 74.9180798 2.494779e-02 *
## disp -0.7335494 -1.206939 -0.2601595 8.026181e-03 **
## hp -0.8233606 -2.198927 0.5522055 1.998737e-01
## drat 549.7206897 416.053783 683.3875968 2.570765e-05 ***
## wt -469.9351032 -535.877454 -403.9927527 6.344763e-07 ***
## qsec -116.6183871 -156.767393 -76.4693814 2.380078e-04 ***
## vs -194.4213942 -288.046178 -100.7966103 1.732503e-03 **
## am -395.7216847 -562.762331 -228.6810387 8.143911e-04 ***
## gear 53.0732573 -59.833653 165.9801679 3.030574e-01
## carb -25.9448064 -63.759959 11.8703467 1.487567e-01
##
## $signif_codes
## [1] "Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1"
##
## $effects
## ── Data Summary ────────────────────────
## Values
## Name effects
## Number of rows 8
## Number of columns 10
## _______________________
## Column type frequency:
## numeric 10
## ________________________
## Group variables None
##
## ── Variable type: numeric ──────────────────────────────────────────────────────
## skim_variable mean sd p0 p25 p50 p75 p100
## 1 cyl 40.9 40.7 -40.5 23.9 56.3 69.9 77.8
## 2 disp -0.734 0.566 -1.64 -1.03 -0.571 -0.372 -0.139
## 3 hp -0.823 1.65 -3.99 -1.18 -0.974 -0.196 1.25
## 4 drat 550. 160. 170. 549. 606. 642. 643.
## 5 wt -470. 78.9 -543. -537. -489. -437. -336.
## 6 qsec -117. 48.0 -179. -143. -131. -99.1 -29.9
## 7 vs -194. 112. -377. -283. -162. -120. -46.3
## 8 am -396. 200. -719. -481. -357. -319. -67.7
## 9 gear 53.1 135. -143. -23.9 16.5 172. 231.
## 10 carb -25.9 45.2 -101. -48.8 -23.8 -9.36 45.7
## hist
## 1 ▂▂▂▁▇
## 2 ▅▁▂▇▅
## 3 ▂▁▇▂▃
## 4 ▁▁▁▁▇
## 5 ▇▅▂▁▅
## 6 ▂▇▂▂▂
## 7 ▂▅▂▇▂
## 8 ▃▁▇▂▂
## 9 ▂▅▅▁▇
## 10 ▂▅▇▁▅
## Elapsed: 0.265 s
t0 <- proc.time()[3]
obj$summary(new_X_test, y=new_y_test, show_progress=FALSE, type_ci="bootstrap")
## $R_squared
## [1] -3.356623
##
## $R_squared_adj
## [1] 11.16545
##
## $Residuals
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.950 5.030 6.374 6.369 8.774 11.528
##
## $Coverage_rate
## [1] 75
##
## $citests
## estimate lower upper p-value signif
## cyl 41.4258367 12.588077 65.2679002 4.350539e-165 ***
## disp -0.7222008 -1.115485 -0.4125830 3.330513e-165 ***
## hp -0.8065083 -1.909190 0.1497299 1.786493e-158 ***
## drat 558.7650921 437.296154 627.3085008 3.330513e-165 ***
## wt -472.0764594 -515.956910 -418.1652602 3.330513e-165 ***
## qsec -118.3772945 -145.482273 -83.8895244 3.330513e-165 ***
## vs -193.6445785 -265.087370 -124.5273605 3.330513e-165 ***
## am -388.8129686 -519.250653 -272.5465744 3.330513e-165 ***
## gear 51.8672891 -32.695264 137.3143094 4.222165e-136 ***
## carb -25.7347296 -54.019221 2.3478991 1.825688e-161 ***
##
## $signif_codes
## [1] "Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1"
##
## $effects
## ── Data Summary ────────────────────────
## Values
## Name effects
## Number of rows 8
## Number of columns 10
## _______________________
## Column type frequency:
## numeric 10
## ________________________
## Group variables None
##
## ── Variable type: numeric ──────────────────────────────────────────────────────
## skim_variable mean sd p0 p25 p50 p75 p100
## 1 cyl 40.9 40.7 -40.5 23.9 56.3 69.9 77.8
## 2 disp -0.734 0.566 -1.64 -1.03 -0.571 -0.372 -0.139
## 3 hp -0.823 1.65 -3.99 -1.18 -0.974 -0.196 1.25
## 4 drat 550. 160. 170. 549. 606. 642. 643.
## 5 wt -470. 78.9 -543. -537. -489. -437. -336.
## 6 qsec -117. 48.0 -179. -143. -131. -99.1 -29.9
## 7 vs -194. 112. -377. -283. -162. -120. -46.3
## 8 am -396. 200. -719. -481. -357. -319. -67.7
## 9 gear 53.1 135. -143. -23.9 16.5 172. 231.
## 10 carb -25.9 45.2 -101. -48.8 -23.8 -9.36 45.7
## hist
## 1 ▂▂▂▁▇
## 2 ▅▁▂▇▅
## 3 ▂▁▇▂▃
## 4 ▁▁▁▁▇
## 5 ▇▅▂▁▅
## 6 ▂▇▂▂▂
## 7 ▂▅▂▇▂
## 8 ▃▁▇▂▂
## 9 ▂▅▅▁▇
## 10 ▂▅▇▁▅
## Elapsed: 0.983 s
t0 <- proc.time()[3]
obj$summary(new_X_test, y=new_y_test, show_progress=FALSE, type_ci="conformal")
## $R_squared
## [1] -3.356623
##
## $R_squared_adj
## [1] 11.16545
##
## $Residuals
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.950 5.030 6.374 6.369 8.774 11.528
##
## $Coverage_rate
## [1] 75
##
## $citests
## estimate lower upper p-value signif
## cyl 59.4677692 41.857693 75.5928842 1.113064e-83 ***
## disp -0.7298717 -1.372407 -0.2545631 1.120943e-83 ***
## hp -0.4219054 -1.054373 0.6831616 2.004880e-53 ***
## drat 597.4424346 554.087929 640.7969406 1.082817e-83 ***
## wt -506.1389175 -537.352520 -475.0632642 1.165119e-83 ***
## qsec -132.3622867 -143.160353 -117.5296013 1.113907e-83 ***
## vs -222.6300825 -283.532484 -160.6471851 1.150085e-83 ***
## am -360.2388021 -411.247416 -310.9483644 1.126951e-83 ***
## gear 3.3479327 -104.639996 114.9982065 3.623536e-04 ***
## carb -17.1193947 -38.397051 8.3335623 2.480992e-77 ***
##
## $signif_codes
## [1] "Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1"
##
## $effects
## ── Data Summary ────────────────────────
## Values
## Name effects
## Number of rows 8
## Number of columns 10
## _______________________
## Column type frequency:
## numeric 10
## ________________________
## Group variables None
##
## ── Variable type: numeric ──────────────────────────────────────────────────────
## skim_variable mean sd p0 p25 p50 p75 p100
## 1 cyl 40.9 40.7 -40.5 23.9 56.3 69.9 77.8
## 2 disp -0.734 0.566 -1.64 -1.03 -0.571 -0.372 -0.139
## 3 hp -0.823 1.65 -3.99 -1.18 -0.974 -0.196 1.25
## 4 drat 550. 160. 170. 549. 606. 642. 643.
## 5 wt -470. 78.9 -543. -537. -489. -437. -336.
## 6 qsec -117. 48.0 -179. -143. -131. -99.1 -29.9
## 7 vs -194. 112. -377. -283. -162. -120. -46.3
## 8 am -396. 200. -719. -481. -357. -319. -67.7
## 9 gear 53.1 135. -143. -23.9 16.5 172. 231.
## 10 carb -25.9 45.2 -101. -48.8 -23.8 -9.36 45.7
## hist
## 1 ▂▂▂▁▇
## 2 ▅▁▂▇▅
## 3 ▂▁▇▂▃
## 4 ▁▁▁▁▇
## 5 ▇▅▂▁▅
## 6 ▂▇▂▂▂
## 7 ▂▅▂▇▂
## 8 ▃▁▇▂▂
## 9 ▂▅▅▁▇
## 10 ▂▅▇▁▅
## Elapsed: 0.23 s
res <- obj$predict(X = new_X_test)
new_y_train <- c(y_train, y_test[c(1, 2)])
plot(c(new_y_train, res$preds), type='l',
main="",
ylab="",
ylim = c(min(c(res$upper, res$lower, y)),
max(c(res$upper, res$lower, y))))
lines(c(new_y_train, res$upper), col="gray60")
lines(c(new_y_train, res$lower), col="gray60")
lines(c(new_y_train, res$preds), col = "red")
lines(c(new_y_train, new_y_test), col = "blue")
abline(v = length(y_train), lty=2, col="black")
mean((new_y_test >= as.numeric(res$lower)) * (new_y_test <= as.numeric(res$upper)))
## [1] 0.75