set.seed(1024)
07/06/2020
set.seed(1024)
library(performanceEstimation) library(e1071) library(randomForest) library(DMwR2) library(rpart)
performanceEstimation
package.caret
and mlr
also provides similar functionalities.performanceEstimation
methoddata(iris) r <- performanceEstimation( PredTask(Species ~ ., iris), Workflow(learner = "svm"), EstimationTask(metrics = "err", method = Holdout(hldSz = 0.3)))
## ## ## ##### PERFORMANCE ESTIMATION USING HOLD OUT ##### ## ## ** PREDICTIVE TASK :: iris.Species ## ## ++ MODEL/WORKFLOW :: svm ## Task for estimating err using ## 1 x 70 % / 30 % Holdout ## Run with seed = 1234 ## Iteration : 1
learner.pars
in Workflow
to apply specific parameter valuessummary(r)
## ## == Summary of a Hold Out Performance Estimation Experiment == ## ## Task for estimating err using ## 1 x 70 % / 30 % Holdout ## Run with seed = 1234 ## ## * Predictive Tasks :: iris.Species ## * Workflows :: svm ## ## -> Task: iris.Species ## *Workflow: svm ## err ## avg 0.02222222 ## std NA ## med 0.02222222 ## iqr 0.00000000 ## min 0.02222222 ## max 0.02222222 ## invalid 0.00000000
data(Boston, package="MASS") r <- performanceEstimation( PredTask(medv ~ ., Boston), Workflow(learner = "randomForest"), EstimationTask(metrics = "mse", method = Holdout(nReps = 3, hldSz = 0.3)))
## ## ## ##### PERFORMANCE ESTIMATION USING HOLD OUT ##### ## ## ** PREDICTIVE TASK :: Boston.medv ## ## ++ MODEL/WORKFLOW :: randomForest ## Task for estimating mse using ## 3 x 70 % / 30 % Holdout ## Run with seed = 1234 ## Iteration : 1 2 3
summary(r)
## ## == Summary of a Hold Out Performance Estimation Experiment == ## ## Task for estimating mse using ## 3 x 70 % / 30 % Holdout ## Run with seed = 1234 ## ## * Predictive Tasks :: Boston.medv ## * Workflows :: randomForest ## ## -> Task: Boston.medv ## *Workflow: randomForest ## mse ## avg 11.032014 ## std 1.852507 ## med 10.245758 ## iqr 1.722827 ## min 9.702315 ## max 13.147969 ## invalid 0.000000
r <- performanceEstimation( PredTask(medv ~ ., Boston), workflowVariants(learner = "rpartXse", learner.pars = list(se = c(0, 0.25, 0.5, 1, 2))), EstimationTask(metrics = c("mse", "mae"), method = CV(nReps = 8, nFolds = 10)))
## ## ## ##### PERFORMANCE ESTIMATION USING CROSS VALIDATION ##### ## ## ** PREDICTIVE TASK :: Boston.medv ## ## ++ MODEL/WORKFLOW :: rpartXse.v1 ## Task for estimating mse,mae using ## 8 x 10 - Fold Cross Validation ## Run with seed = 1234 ## Iteration :******************************************************************************** ## ## ## ++ MODEL/WORKFLOW :: rpartXse.v2 ## Task for estimating mse,mae using ## 8 x 10 - Fold Cross Validation ## Run with seed = 1234 ## Iteration :******************************************************************************** ## ## ## ++ MODEL/WORKFLOW :: rpartXse.v3 ## Task for estimating mse,mae using ## 8 x 10 - Fold Cross Validation ## Run with seed = 1234 ## Iteration :******************************************************************************** ## ## ## ++ MODEL/WORKFLOW :: rpartXse.v4 ## Task for estimating mse,mae using ## 8 x 10 - Fold Cross Validation ## Run with seed = 1234 ## Iteration :******************************************************************************** ## ## ## ++ MODEL/WORKFLOW :: rpartXse.v5 ## Task for estimating mse,mae using ## 8 x 10 - Fold Cross Validation ## Run with seed = 1234 ## Iteration :********************************************************************************
rankWorkflows(r, top = 3)
## $Boston.medv ## $Boston.medv$mse ## Workflow Estimate ## 1 rpartXse.v1 18.15666 ## 2 rpartXse.v2 18.70882 ## 3 rpartXse.v3 20.24466 ## ## $Boston.medv$mae ## Workflow Estimate ## 1 rpartXse.v1 2.846930 ## 2 rpartXse.v2 2.954981 ## 3 rpartXse.v3 3.055003
getWorkflow("rpartXse.v1", r)
## Workflow Object: ## Workflow ID :: rpartXse.v1 ## Workflow Function :: standardWF ## Parameter values: ## learner.pars -> se=0 ## learner -> rpartXse
plot(r)
foo <- function(form, train, test, maxdep, cpar) { treemodel <- rpart( form, train, control = rpart.control( maxdepth = maxdep, cp = cpar)) predictions <- predict( treemodel, test) list(trues = responseValues( form, test), preds = predictions) } r <- performanceEstimation( PredTask(medv ~ ., Boston), workflowVariants( wf = "foo", maxdep = c(2, 5, 8), cpar = c(0.01, 0.005, 0.002, 0.001)), EstimationTask( metrics = "mse", method = CV( nFolds = 10, seed = 1001)))
## ## ## ##### PERFORMANCE ESTIMATION USING CROSS VALIDATION ##### ## ## ** PREDICTIVE TASK :: Boston.medv ## ## ++ MODEL/WORKFLOW :: foo.v1 ## Task for estimating mse using ## 1 x 10 - Fold Cross Validation ## Run with seed = 1001 ## Iteration :********** ## ## ## ++ MODEL/WORKFLOW :: foo.v2 ## Task for estimating mse using ## 1 x 10 - Fold Cross Validation ## Run with seed = 1001 ## Iteration :********** ## ## ## ++ MODEL/WORKFLOW :: foo.v3 ## Task for estimating mse using ## 1 x 10 - Fold Cross Validation ## Run with seed = 1001 ## Iteration :********** ## ## ## ++ MODEL/WORKFLOW :: foo.v4 ## Task for estimating mse using ## 1 x 10 - Fold Cross Validation ## Run with seed = 1001 ## Iteration :********** ## ## ## ++ MODEL/WORKFLOW :: foo.v5 ## Task for estimating mse using ## 1 x 10 - Fold Cross Validation ## Run with seed = 1001 ## Iteration :********** ## ## ## ++ MODEL/WORKFLOW :: foo.v6 ## Task for estimating mse using ## 1 x 10 - Fold Cross Validation ## Run with seed = 1001 ## Iteration :********** ## ## ## ++ MODEL/WORKFLOW :: foo.v7 ## Task for estimating mse using ## 1 x 10 - Fold Cross Validation ## Run with seed = 1001 ## Iteration :********** ## ## ## ++ MODEL/WORKFLOW :: foo.v8 ## Task for estimating mse using ## 1 x 10 - Fold Cross Validation ## Run with seed = 1001 ## Iteration :********** ## ## ## ++ MODEL/WORKFLOW :: foo.v9 ## Task for estimating mse using ## 1 x 10 - Fold Cross Validation ## Run with seed = 1001 ## Iteration :********** ## ## ## ++ MODEL/WORKFLOW :: foo.v10 ## Task for estimating mse using ## 1 x 10 - Fold Cross Validation ## Run with seed = 1001 ## Iteration :********** ## ## ## ++ MODEL/WORKFLOW :: foo.v11 ## Task for estimating mse using ## 1 x 10 - Fold Cross Validation ## Run with seed = 1001 ## Iteration :********** ## ## ## ++ MODEL/WORKFLOW :: foo.v12 ## Task for estimating mse using ## 1 x 10 - Fold Cross Validation ## Run with seed = 1001 ## Iteration :**********
rankWorkflows(r)
## $Boston.medv ## $Boston.medv$mse ## Workflow Estimate ## 1 foo.v11 20.94134 ## 2 foo.v12 20.97372 ## 3 foo.v8 21.05089 ## 4 foo.v9 21.05812 ## 5 foo.v5 22.54060
topPerformer(r, metric = "mse", task = "Boston.medv")
## Workflow Object: ## Workflow ID :: foo.v11 ## Workflow Function :: foo ## Parameter values: ## maxdep -> 5 ## cpar -> 0.001
plot(r)
datasize <- 10000 training <- sample(10000, replace = TRUE) length(unique(training)) / datasize
## [1] 0.6296
head(training)
## [1] 7583 4572 2754 5903 1496 6312
test <- (1:10000)[-unique(training)] head(test)
## [1] 4 5 6 7 11 12
data(BreastCancer, package="mlbench") bc <- cbind(knnImputation(BreastCancer[, -c(1,11)]), Class = BreastCancer$Class) r <- performanceEstimation( PredTask(Class ~ ., bc), workflowVariants(learner = "svm", learner.pars = list(cost = c(1, 5, 10), gamma = c(0.01, 0.001))), EstimationTask(metrics = c("acc", "tnr"), method = Bootstrap(nReps = 100, type = ".632")))
topPerformers(r, maxs = TRUE)
## $bc.Class ## Workflow Estimate ## acc svm.v3 0.966 ## tnr svm.v1 0.962
topPerformer(r, max = TRUE, metric = "acc", task = "bc.Class")
## Workflow Object: ## Workflow ID :: svm.v3 ## Workflow Function :: standardWF ## Parameter values: ## learner.pars -> cost=10 gamma=0.01 ## learner -> svm
topPerformer(r, max = TRUE, metric = "tnr", task = "bc.Class")
## Workflow Object: ## Workflow ID :: svm.v1 ## Workflow Function :: standardWF ## Parameter values: ## learner.pars -> cost=1 gamma=0.01 ## learner -> svm