# Load data
data(mtcars)
head(mtcars)
#>                    mpg cyl disp  hp drat    wt  qsec vs am gear carb
#> Mazda RX4         21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
#> Mazda RX4 Wag     21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
#> Datsun 710        22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
#> Hornet 4 Drive    21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
#> Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2
#> Valiant           18.1   6  225 105 2.76 3.460 20.22  1  0    3    1

# Split features and target
X <- as.matrix(mtcars[, -1])  # all columns except mpg
y <- mtcars[, 1]              # mpg column

# Create train/test split
set.seed(42)
train_idx <- sample(nrow(mtcars), size = floor(0.8 * nrow(mtcars)))
X_train <- X[train_idx, ]
X_test <- X[-train_idx, ]
y_train <- y[train_idx]
y_test <- y[-train_idx]

Linear regression

Basic usage

The tisthemachinelearner package provides an R6 interface for machine learning tasks. Let’s start with a simple regression example using the built-in mtcars dataset:

library(tisthemachinelearner)

# Prepare the mtcars data
x <- as.matrix(mtcars[, c("cyl", "disp", "hp")])  # predictors
y <- mtcars$mpg  # target variable

# Create and train a regressor
reg <- Regressor$new()

# The R6 interface allows for method chaining
(preds <- reg$fit(X_train, y_train)$predict(X_test))
#> [1] 19.97085 20.25755 29.09144 29.61140 19.25911 26.09742 17.92195
print(sqrt(mean((y_test - preds)^2)))
#> [1] 4.876167

Random Forest

rf <- Regressor$new(model_name = "RandomForestRegressor")
rf$fit(X_train, y_train)
(preds <- rf$predict(X_test))
#> [1] 19.011 22.851 29.030 29.646 18.209 17.708 20.663
print(sqrt(mean((y_test - preds)^2)))
#> [1] 2.293614