Target-based encoder

target_based_encoder(x, y, rho = 0, seed = 123)

Arguments

x

matrix; explanatory variables

y

vector; response

rho

float; desired correlation

Value

a list, encoded regressors and codes

Examples


n <- 100
X <- cbind.data.frame(as.factor(sample(x = c(1, 2, 3),
size = n, replace = TRUE)), as.factor(sample(x = c(0, 1),
size = n, replace = TRUE)))
X$X3 <- rnorm(n)
colnames(X) <- c("X1", "X2", "X3")

y <- rt(nrow(X), df=2)

z <- after::target_based_encoder(X, y)$newx
#> 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |=======================                                               |  33%
  |                                                                            
  |===============================================                       |  67%
  |                                                                            
  |======================================================================| 100%

head(X)
#>   X1 X2         X3
#> 1  3  1  0.6310629
#> 2  1  0  0.5394827
#> 3  3  0  0.2115170
#> 4  3  0 -0.9152734
#> 5  2  0  0.2632939
#> 6  3  0  0.3480227
head(z)
#>              X1         X2         X3
#> [1,] -0.9266589   1.278917  0.6310629
#> [2,] -8.6988712 -19.921112  0.5394827
#> [3,] -0.9266589 -19.921112  0.2115170
#> [4,] -0.9266589 -19.921112 -0.9152734
#> [5,] -9.0166655 -19.921112  0.2632939
#> [6,] -0.9266589 -19.921112  0.3480227
tail(X)
#>     X1 X2         X3
#> 95   2  1 -0.6519016
#> 96   1  1  0.9963354
#> 97   3  1  0.7551887
#> 98   2  0  0.3626396
#> 99   3  1 -0.7502416
#> 100  1  0  1.1638520
tail(z)
#>                X1         X2         X3
#>  [95,] -9.0166655   1.278917 -0.6519016
#>  [96,] -8.6988712   1.278917  0.9963354
#>  [97,] -0.9266589   1.278917  0.7551887
#>  [98,] -9.0166655 -19.921112  0.3626396
#>  [99,] -0.9266589   1.278917 -0.7502416
#> [100,] -8.6988712 -19.921112  1.1638520

cor(y, z[,2])
#> [1] 0.1060728
cor(y, z[,1])
#> [1] 0.02331431

sd(z[,2])
#> [1] 10.5485
sd(z[,1])
#> [1] 3.880926