diff options
Diffstat (limited to 'execs/R/cpdbench_changepoint.R')
| -rw-r--r-- | execs/R/cpdbench_changepoint.R | 153 |
1 files changed, 153 insertions, 0 deletions
diff --git a/execs/R/cpdbench_changepoint.R b/execs/R/cpdbench_changepoint.R new file mode 100644 index 00000000..ed1f6391 --- /dev/null +++ b/execs/R/cpdbench_changepoint.R @@ -0,0 +1,153 @@ +#' --- +#' title: Wrapper for changepoint package in TCPDBench +#' author: G.J.J. van den Burg +#' date: 2019-09-28 +#' license: See LICENSE file. +#' copyright: 2019, The Alan Turing Institute +#' --- + +library(argparse) +library(changepoint) + +load.utils <- function() { + # get the name of the current script so we can load utils.R (yay, R!) + cmd.args <- commandArgs(trailingOnly=F) + file.arg <- "--file=" + this.script <- sub(file.arg, "", cmd.args[grep(file.arg, cmd.args)]) + this.dir <- dirname(this.script) + utils.script <- file.path(this.dir, 'utils.R') + source(utils.script) +} + +parse.args <- function() { + parser <- ArgumentParser(description='Wrapper for changepoint package') + parser$add_argument('-i', + '--input', + help='path to the input data file', + required=TRUE + ) + parser$add_argument('-o', + '--output', + help='path to the output file') + parser$add_argument('-f', + '--func', + choices=c('mean', 'var', 'meanvar'), + help='Function to call in the changepoint package', + required=TRUE + ) + parser$add_argument('-p', + '--penalty', + choices=c( + 'None', + 'SIC', + 'BIC', + 'MBIC', + 'AIC', + 'Hannan-Quinn', + 'Asymptotic' + ), + help='Choice of penalty in the cpt function', + default='MBIC' + ) + parser$add_argument( + '-m', + '--method', + choices=c('AMOC', 'PELT', 'SegNeigh', 'BinSeg'), + help="Choice of method in the cpt function", + default='AMOC' + ) + parser$add_argument( + '-t', + '--test-statistic', + choices=c('Normal', 'CUSUM', 'CSS', 'Gamma', + 'Exponential', 'Poisson'), + help="Test statistic to use", + default='Normal' + ) + parser$add_argument('-Q', + '--max-cp', + help='Maximum number of change points', + choices=c('max', 'default'), + default='max') + return(parser$parse_args()) +} + +main <- function() +{ + args <- parse.args() + + # load the data + data <- load.dataset(args$input) + + n.obs <- data$original$n_obs + + # get the parameter list + defaults <- list() + # we set this to the maximum because we have no a priori knowledge of the + # maximum number of change points we expect. + if (args$method == 'BinSeg' || args$method == 'SegNeigh') { + if (args$max_cp == 'max') + defaults$Q <- n.obs/2 + 1 + else + defaults$Q <- 5 + } + if (args$penalty == "Asymptotic") + defaults$pen.value <- 0.05 + else + defaults$pen.value <- 0 # not used for other penalties + params <- make.param.list(args, defaults) + + if (args$func == "mean") { + cpt.func <- cpt.mean + } else if (args$func == "var") { + cpt.func <- cpt.var + } else if (args$func == "meanvar") { + cpt.func <- cpt.meanvar + } + + if (data$original$n_dim > 1) { + # changepoint package can't handle multidimensional data + exit.error.multidim(data$original, args, params) + } + + vec <- as.vector(data$mat) + start.time <- Sys.time() + + # call the appropriate function with the specified parameters + result <- tryCatch({ + locs <- cpt.func(vec, + penalty=params$penalty, + pen.value=params$pen.value, + method=params$method, + test.stat=params$test_statistic, + Q=params$Q, + class=FALSE + ) + list(locations=locs, error=NULL) + }, error=function(e) { + return(list(locations=NULL, error=e$message)) + }) + stop.time <- Sys.time() + runtime <- difftime(stop.time, start.time, units="secs") + + if (!is.null(result$error)) { + exit.with.error(data$original, args, params, result$error) + } + + # convert indices to 0-based indices. + if (params$method == 'AMOC') { + locations <- c(result$locations[1]) - 1 + names(locations) <- NULL + locations <- as.list(locations) + } else { + if (is.list(result$locations)) { + result$locations <- result$locations$cpts + } + locations <- as.list(result$locations - 1) + } + + exit.success(data$original, args, params, locations, runtime) +} + +load.utils() +main() |
