07/06/2020

## Seed used in these slides

set.seed(1024)

## Libraries used in these slides

library(performanceEstimation)
library(e1071)
library(randomForest)
library(DMwR2)
library(rpart)

## Performance Evaluation

• We need to evaluate the performance of our predictions
• Evaluate your performance on the training set
• Good idea:
• Evaluate your performance on a never-before-seen set, a.k.a. the test set

## Evaluation Methodology

• Training Set
• Test Set
• Train Model
• Evaluate Error -> $$E_i$$
• Do this a few times to ensure statistical reliability
• Calculate mean prediction error $\bar E = \frac{1}{k}\sum_{i=1}^{k}{E_i}$
• Also check the standard error of the mean: $SE(\bar E) = \frac{s_E}{\sqrt{k}}$ where $$s_E$$ is the sample standard deviation of the error $s_E = \sqrt{\frac{1}{k-1}\sum_{i=1}^{k}{(E_i-\bar E)^2}}$

## Automated Evaluation

• We will investigate the performanceEstimation package.
• caret and mlr also provides similar functionalities.

## Holdout and Random Subsampling

• Holdout method splits the data into two parts
• Training set: Usually 70%
• Test set: Usually 30%
• What if the dataset size is too small?
• Not enough data left to train a good model
• Not enough data left to test accurately
• This method works best for large datasets

## Holdout and Random Subsampling

• Random Subsampling is repeating holdout many times
• This way we get a set of test scores
• Which means more statistical significance

## Example - Holdout

• We will use performanceEstimation method
data(iris)
r <- performanceEstimation(
Workflow(learner = "svm"),
method = Holdout(hldSz = 0.3)))
##
##
## ##### PERFORMANCE ESTIMATION USING  HOLD OUT  #####
##
## ** PREDICTIVE TASK :: iris.Species
##
## ++ MODEL/WORKFLOW :: svm
## Task for estimating  err  using
##  1 x 70 % / 30 % Holdout
##   Run with seed =  1234
## Iteration :  1
• use parameter learner.pars in Workflow to apply specific parameter values

## Example - Holdout

summary(r)
##
## == Summary of a  Hold Out Performance Estimation Experiment ==
##
## Task for estimating  err  using
##  1 x 70 % / 30 % Holdout
##   Run with seed =  1234
##
## * Predictive Tasks ::  iris.Species
## * Workflows  ::  svm
##
##   *Workflow: svm
##                err
## avg     0.02222222
## std             NA
## med     0.02222222
## iqr     0.00000000
## min     0.02222222
## max     0.02222222
## invalid 0.00000000

## Example - Random Subsampling

data(Boston, package="MASS")
r <- performanceEstimation(
Workflow(learner = "randomForest"),
method = Holdout(nReps = 3, hldSz = 0.3)))
##
##
## ##### PERFORMANCE ESTIMATION USING  HOLD OUT  #####
##
## ** PREDICTIVE TASK :: Boston.medv
##
## ++ MODEL/WORKFLOW :: randomForest
## Task for estimating  mse  using
##  3 x 70 % / 30 % Holdout
##   Run with seed =  1234
## Iteration :  1  2  3

## Example - Random Subsampling

summary(r)
##
## == Summary of a  Hold Out Performance Estimation Experiment ==
##
## Task for estimating  mse  using
##  3 x 70 % / 30 % Holdout
##   Run with seed =  1234
##
## * Predictive Tasks ::  Boston.medv
## * Workflows  ::  randomForest
##
##   *Workflow: randomForest
##               mse
## avg     11.032014
## std      1.852507
## med     10.245758
## iqr      1.722827
## min      9.702315
## max     13.147969
## invalid  0.000000

## Cross Validation

• Instead of randomly subsampling k test cases, we uniformly design them
• Best works for medium sized datasets
• few hundreds to few thousands

## Cross Validation - Example

r <- performanceEstimation(
workflowVariants(learner = "rpartXse",
learner.pars = list(se = c(0, 0.25, 0.5, 1, 2))),
method = CV(nReps = 8, nFolds = 10)))
##
##
## ##### PERFORMANCE ESTIMATION USING  CROSS VALIDATION  #####
##
## ** PREDICTIVE TASK :: Boston.medv
##
## ++ MODEL/WORKFLOW :: rpartXse.v1
## Task for estimating  mse,mae  using
##  8 x 10 - Fold Cross Validation
##   Run with seed =  1234
## Iteration :********************************************************************************
##
##
## ++ MODEL/WORKFLOW :: rpartXse.v2
## Task for estimating  mse,mae  using
##  8 x 10 - Fold Cross Validation
##   Run with seed =  1234
## Iteration :********************************************************************************
##
##
## ++ MODEL/WORKFLOW :: rpartXse.v3
## Task for estimating  mse,mae  using
##  8 x 10 - Fold Cross Validation
##   Run with seed =  1234
## Iteration :********************************************************************************
##
##
## ++ MODEL/WORKFLOW :: rpartXse.v4
## Task for estimating  mse,mae  using
##  8 x 10 - Fold Cross Validation
##   Run with seed =  1234
## Iteration :********************************************************************************
##
##
## ++ MODEL/WORKFLOW :: rpartXse.v5
## Task for estimating  mse,mae  using
##  8 x 10 - Fold Cross Validation
##   Run with seed =  1234
## Iteration :********************************************************************************

## Cross Validation - Example

rankWorkflows(r, top = 3)
## $Boston.medv ##$Boston.medv$mse ## Workflow Estimate ## 1 rpartXse.v1 18.15666 ## 2 rpartXse.v2 18.70882 ## 3 rpartXse.v3 20.24466 ## ##$Boston.medv$mae ## Workflow Estimate ## 1 rpartXse.v1 2.846930 ## 2 rpartXse.v2 2.954981 ## 3 rpartXse.v3 3.055003 ## Cross Validation - Example getWorkflow("rpartXse.v1", r) ## Workflow Object: ## Workflow ID :: rpartXse.v1 ## Workflow Function :: standardWF ## Parameter values: ## learner.pars -> se=0 ## learner -> rpartXse ## Cross Validation - Example plot(r) ## Custom Workflows foo <- function(form, train, test, maxdep, cpar) { treemodel <- rpart( form, train, control = rpart.control( maxdepth = maxdep, cp = cpar)) predictions <- predict( treemodel, test) list(trues = responseValues( form, test), preds = predictions) } r <- performanceEstimation( PredTask(medv ~ ., Boston), workflowVariants( wf = "foo", maxdep = c(2, 5, 8), cpar = c(0.01, 0.005, 0.002, 0.001)), EstimationTask( metrics = "mse", method = CV( nFolds = 10, seed = 1001))) ## ## ## ##### PERFORMANCE ESTIMATION USING CROSS VALIDATION ##### ## ## ** PREDICTIVE TASK :: Boston.medv ## ## ++ MODEL/WORKFLOW :: foo.v1 ## Task for estimating mse using ## 1 x 10 - Fold Cross Validation ## Run with seed = 1001 ## Iteration :********** ## ## ## ++ MODEL/WORKFLOW :: foo.v2 ## Task for estimating mse using ## 1 x 10 - Fold Cross Validation ## Run with seed = 1001 ## Iteration :********** ## ## ## ++ MODEL/WORKFLOW :: foo.v3 ## Task for estimating mse using ## 1 x 10 - Fold Cross Validation ## Run with seed = 1001 ## Iteration :********** ## ## ## ++ MODEL/WORKFLOW :: foo.v4 ## Task for estimating mse using ## 1 x 10 - Fold Cross Validation ## Run with seed = 1001 ## Iteration :********** ## ## ## ++ MODEL/WORKFLOW :: foo.v5 ## Task for estimating mse using ## 1 x 10 - Fold Cross Validation ## Run with seed = 1001 ## Iteration :********** ## ## ## ++ MODEL/WORKFLOW :: foo.v6 ## Task for estimating mse using ## 1 x 10 - Fold Cross Validation ## Run with seed = 1001 ## Iteration :********** ## ## ## ++ MODEL/WORKFLOW :: foo.v7 ## Task for estimating mse using ## 1 x 10 - Fold Cross Validation ## Run with seed = 1001 ## Iteration :********** ## ## ## ++ MODEL/WORKFLOW :: foo.v8 ## Task for estimating mse using ## 1 x 10 - Fold Cross Validation ## Run with seed = 1001 ## Iteration :********** ## ## ## ++ MODEL/WORKFLOW :: foo.v9 ## Task for estimating mse using ## 1 x 10 - Fold Cross Validation ## Run with seed = 1001 ## Iteration :********** ## ## ## ++ MODEL/WORKFLOW :: foo.v10 ## Task for estimating mse using ## 1 x 10 - Fold Cross Validation ## Run with seed = 1001 ## Iteration :********** ## ## ## ++ MODEL/WORKFLOW :: foo.v11 ## Task for estimating mse using ## 1 x 10 - Fold Cross Validation ## Run with seed = 1001 ## Iteration :********** ## ## ## ++ MODEL/WORKFLOW :: foo.v12 ## Task for estimating mse using ## 1 x 10 - Fold Cross Validation ## Run with seed = 1001 ## Iteration :********** ## Cross Validation - Example rankWorkflows(r) ##$Boston.medv
## $Boston.medv$mse
##   Workflow Estimate
## 1  foo.v11 20.94134
## 2  foo.v12 20.97372
## 3   foo.v8 21.05089
## 4   foo.v9 21.05812
## 5   foo.v5 22.54060
topPerformer(r, metric = "mse", task = "Boston.medv")
## Workflow Object:
##  Workflow ID       ::  foo.v11
##  Workflow Function ::  foo
##       Parameter values:
##       maxdep  -> 5
##       cpar  -> 0.001

## Cross Validation - Example

plot(r)

## Bootstrap Estimates

• Construct the training set by random sampling with replacement
• This results in 63.2% of rows to be selected
• The remaining rows are chosen to be the test set
• Applied with many repetitions
• Best applicable to small datasets
datasize <- 10000
training <- sample(10000, replace = TRUE)
length(unique(training)) / datasize
## [1] 0.6296
head(training)
## [1] 7583 4572 2754 5903 1496 6312
test <- (1:10000)[-unique(training)]
head(test)
## [1]  4  5  6  7 11 12

## Bootstrap Estimates

• There are two versions
• $$\epsilon_0$$ estimates
• average of the k bootstrap estimations
• $$.632$$ estimates
• a weighted average of $$\epsilon_0$$ and $$\epsilon_r$$
• $$\epsilon_r$$ is the resubstitution estimate, obtained by training with the full dataset and then testing with the full dataset $\epsilon_{.632} = 0.368\epsilon_r + 0.632\epsilon_0$

## Bootstrap Estimates - Example

data(BreastCancer, package="mlbench")
bc <- cbind(knnImputation(BreastCancer[, -c(1,11)]),
Class = BreastCancer$Class) r <- performanceEstimation( PredTask(Class ~ ., bc), workflowVariants(learner = "svm", learner.pars = list(cost = c(1, 5, 10), gamma = c(0.01, 0.001))), EstimationTask(metrics = c("acc", "tnr"), method = Bootstrap(nReps = 100, type = ".632"))) ## Bootstrap Estimates - Example topPerformers(r, maxs = TRUE) ##$bc.Class
##     Workflow Estimate
## acc   svm.v3    0.966
## tnr   svm.v1    0.962
topPerformer(r,
max = TRUE,
metric = "acc",
task = "bc.Class")
## Workflow Object:
##  Workflow ID       ::  svm.v3
##  Workflow Function ::  standardWF
##       Parameter values:
##       learner.pars  -> cost=10 gamma=0.01
##       learner  -> svm
topPerformer(r,
max = TRUE,
metric = "tnr",
task = "bc.Class")
## Workflow Object:
##  Workflow ID       ::  svm.v1
##  Workflow Function ::  standardWF
##       Parameter values:
##       learner.pars  -> cost=1 gamma=0.01
##       learner  -> svm