Caretensemble Classification Example

March 17, 2013 • By Zach Deane-Mayer

Here’s a quick demo of how to fit a binary classification model with caretEnsemble. Please note that I haven’t spent as much time debugging caretEnsemble for classification models, so there’s probably more bugs than my last post. Also note that multi class models are not yet supported.

#Setup
rm(list = ls(all = TRUE)) 
gc(reset=TRUE)
#>          used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
#> Ncells 486205   26    1039010 55.5         NA   486205   26
#> Vcells 912720    7    8388608 64.0      98304   912720    7
set.seed(1234)

#Libraries
library(caret)
library(caretEnsemble)

#Data
library(mlbench)
dat <- mlbench.xor(500, 2)
X <- data.frame(dat$x)
Y <- factor(ifelse(dat$classes=='1', 'Yes', 'No'))

#Split train/test
train <- runif(nrow(X)) <= .66

#Setup CV Folds
#returnData=FALSE saves some space
folds=5
repeats=1
myControl <- trainControl(method='cv', number=folds, repeats=repeats, 
                          returnResamp='none', classProbs=TRUE,
                          returnData=FALSE, savePredictions=TRUE, 
                          verboseIter=FALSE, allowParallel=TRUE,
                          summaryFunction=twoClassSummary,
                          index=createMultiFolds(Y[train], k=folds, times=repeats))
PP <- c('center', 'scale')

#Train some models
model1 <- train(X[train,], Y[train], method='gbm', verbose=FALSE, trControl=myControl,
                tuneGrid=expand.grid(.n.trees=500, .interaction.depth=15, .shrinkage = 0.01, .n.minobsinnode=10))
model2 <- train(X[train,], Y[train], method='blackboost', trControl=myControl)
model3 <- train(X[train,], Y[train], method='parRF', tuneLength=1, trControl=myControl)
model4 <- train(X[train,], Y[train], method='mlpWeightDecay', trControl=myControl, trace=FALSE, preProcess=PP)
model5 <- train(X[train,], Y[train], method='knn', trControl=myControl, preProcess=PP)
model6 <- train(X[train,], Y[train], method='earth', trControl=myControl, preProcess=PP)
model7 <- train(X[train,], Y[train], method='glm', trControl=myControl, preProcess=PP)
model8 <- train(X[train,], Y[train], method='svmRadial', trControl=myControl, preProcess=PP)
model9 <- train(X[train,], Y[train], method='gam', trControl=myControl, preProcess=PP)
model10 <- train(X[train,], Y[train], method='glmnet', trControl=myControl, preProcess=PP)

#Make a list of all the models
all.models <- list(model1, model2, model3, model4, model5, model6, model7, model8, model9, model10)
names(all.models) <- sapply(all.models, function(x) x$method)
all.models <- as.caretList(all.models)
sort(sapply(all.models, function(x) min(x$results$ROC)))
#> mlpWeightDecay            glm         glmnet            gam          earth     blackboost            knn      svmRadial 
#>      0.3856384      0.4099975      0.4147880      0.4346771      0.4882353      0.5000000      0.9915315      0.9964432 
#>          parRF            gbm 
#>      0.9994703      0.9998268

#Make a greedy ensemble - currently can only use RMSE
greedy <- caretEnsemble(all.models, iter=1000L)
print(greedy$ens_model$finalModel$model_weights)
#>                    No Yes
#> gbm_No              1   0
#> gbm_Yes             0   1
#> blackboost_No       0   0
#> blackboost_Yes      0   0
#> parRF_No            0   0
#> parRF_Yes           0   0
#> mlpWeightDecay_No   0   0
#> mlpWeightDecay_Yes  0   0
#> knn_No              0   0
#> knn_Yes             0   0
#> earth_No            0   0
#> earth_Yes           0   0
#> glm_No              0   0
#> glm_Yes             0   0
#> svmRadial_No        0   0
#> svmRadial_Yes       0   0
#> gam_No              0   0
#> gam_Yes             0   0
#> glmnet_No           0   0
#> glmnet_Yes          0   0
greedy$error
#>   max_iter       ROC      Sens      Spec        ROCSD     SensSD     SpecSD
#> 1      100 0.9998217 0.9884034 0.9878788 0.0003985861 0.01588213 0.01659765

#Make a linear regression ensemble
linear <- caretStack(all.models, method='glm')
print(round(coef(linear$ens_model$finalModel), 2))
#>    (Intercept)            gbm     blackboost          parRF mlpWeightDecay            knn          earth            glm 
#>       64968.70         642.80      -92459.55           6.92         218.60         481.55      -35355.58       13868.35 
#>      svmRadial            gam         glmnet 
#>          10.76       -1110.63      -19911.04
linear$error
#>   parameter       ROC      Sens      Spec    ROCSD     SensSD     SpecSD
#> 1      none 0.9806543 0.9768067 0.9575758 0.011322 0.02400921 0.02710385

#Predict for test set:
library(caTools)
preds <- predict(all.models, newdata=X[!train,])
preds$ENS_greedy <- predict(greedy, newdata=X[!train,])[,'Yes']
preds$ENS_linear <- predict(linear, newdata=X[!train,])[,'Yes']
sort(colAUC(preds, Y[!train])[1,])
#>     blackboost          earth            gam         glmnet            glm            knn mlpWeightDecay      svmRadial 
#>      0.5000000      0.5000000      0.5490459      0.5638044      0.5642516      0.9950805      0.9992546      0.9997018 
#>            gbm          parRF     ENS_greedy     ENS_linear 
#>      1.0000000      1.0000000      1.0000000      1.0000000

Right now, this code fails for me if I try a model like a nnet or an SVM for stacking, so there’s clearly bugs to fix.

The greedy model relies 100% on the gbm, which makes sense as the gbm has an AUC of 1 on the training set. The linear model uses all of the models, and achieves an AUC of 1, but it contains some very large, negative weights, which makes it a harder model to understand.

Caretensemble Classification Example

Ready to bootstrap your AI engineering?