set.seed(1024)
18/05/2020
set.seed(1024)
library(adabag) library(mlbench) library(randomForest)
One superwise person’s decisions vs. one hundred barely wise persons’ decisions
SuperWise <- 0.9 BarelyWise <- 0.6 x <- rbinom(100, 1, SuperWise) y <- rbinom(100, 100, BarelyWise) / 100 y <- ifelse(y > 0.5, 1, 0) table(x)
## x ## 0 1 ## 10 90
table(y)
## y ## 0 1 ## 4 96
cat(sum(x) , sum(y))
## 90 96
Idea:
adabag
# BreastCancer data from mlbench package data(BreastCancer, package = "mlbench") # use only the complete cases and remove the ID column bc <- BreastCancer[complete.cases(BreastCancer), -1] # Obtain a 70-30 split for training and testing rndSample <- sample(1:nrow(bc), nrow(bc) * 0.70) tr <- bc[rndSample, ] ts <- bc[-rndSample, ] # Build the model (mfinal = number of trees) m <- bagging(Class ~ ., tr, mfinal = 20, control = rpart.control(maxdepth=1)) ps <- predict(m,ts) names(ps)
## [1] "formula" "votes" "prob" "class" "confusion" "error"
ps$confusion
## Observed Class ## Predicted Class benign malignant ## benign 124 7 ## malignant 14 60
maxdepth = 1
?# Build the model (mfinal = number of trees) m <- bagging(Class ~ ., tr, mfinal = 20, control = rpart.control(maxdepth=3)) ps <- predict(m, ts) names(ps)
## [1] "formula" "votes" "prob" "class" "confusion" "error"
ps$confusion
## Observed Class ## Predicted Class benign malignant ## benign 131 7 ## malignant 7 60
randomForest
m <- randomForest(Class ~ ., tr, ntree = 100, mtry = 3) ps <- predict(m,ts) (cm <- table(ps, ts$Class))
## ## ps benign malignant ## benign 132 0 ## malignant 6 67
mtry
controls the size of the feature subset
error <- numeric() nmodels <- 20 for (i in 1:nmodels) { m <- randomForest(Class ~ ., tr, ntree = i, mtry = 3) ps <- predict(m, ts) cm <- table(ps, ts$Class) error[i] <- (cm[1,2]+cm[2,1])/nrow(ts) } par(mar=c(2,4,1,2)) plot(1:nmodels, error, type = "l")
\[H(x_i) = \sum_k{w_kh_k(x_i)}\]
adabag
as boosting()
m <- boosting(Class ~ ., tr, mfinal = 20) ps <- predict(m, ts) ps$confusion
## Observed Class ## Predicted Class benign malignant ## benign 131 0 ## malignant 7 67
coeflearn = "Zhu"
to run SAMME algorithmerror <- numeric() nmodels <- 20 for (i in 1:nmodels) { m <- boosting(Class ~ ., tr, mfinal = i) ps <- predict(m, ts) error[i] <- ps$error } par(mar=c(2,4,1,2)) plot(1:nmodels, error, type = "l")
gbm