target_based_encoder.Rd
Target-based encoder
target_based_encoder(x, y, rho = 0, seed = 123)
a list, encoded regressors and codes
n <- 100
X <- cbind.data.frame(as.factor(sample(x = c(1, 2, 3),
size = n, replace = TRUE)), as.factor(sample(x = c(0, 1),
size = n, replace = TRUE)))
X$X3 <- rnorm(n)
colnames(X) <- c("X1", "X2", "X3")
y <- rt(nrow(X), df=2)
z <- after::target_based_encoder(X, y)$newx
#>
|
| | 0%
|
|======================= | 33%
|
|=============================================== | 67%
|
|======================================================================| 100%
head(X)
#> X1 X2 X3
#> 1 3 1 0.6310629
#> 2 1 0 0.5394827
#> 3 3 0 0.2115170
#> 4 3 0 -0.9152734
#> 5 2 0 0.2632939
#> 6 3 0 0.3480227
head(z)
#> X1 X2 X3
#> [1,] -0.9266589 1.278917 0.6310629
#> [2,] -8.6988712 -19.921112 0.5394827
#> [3,] -0.9266589 -19.921112 0.2115170
#> [4,] -0.9266589 -19.921112 -0.9152734
#> [5,] -9.0166655 -19.921112 0.2632939
#> [6,] -0.9266589 -19.921112 0.3480227
tail(X)
#> X1 X2 X3
#> 95 2 1 -0.6519016
#> 96 1 1 0.9963354
#> 97 3 1 0.7551887
#> 98 2 0 0.3626396
#> 99 3 1 -0.7502416
#> 100 1 0 1.1638520
tail(z)
#> X1 X2 X3
#> [95,] -9.0166655 1.278917 -0.6519016
#> [96,] -8.6988712 1.278917 0.9963354
#> [97,] -0.9266589 1.278917 0.7551887
#> [98,] -9.0166655 -19.921112 0.3626396
#> [99,] -0.9266589 1.278917 -0.7502416
#> [100,] -8.6988712 -19.921112 1.1638520
cor(y, z[,2])
#> [1] 0.1060728
cor(y, z[,1])
#> [1] 0.02331431
sd(z[,2])
#> [1] 10.5485
sd(z[,1])
#> [1] 3.880926