diff options
Diffstat (limited to 'execs/R/cpdbench_ecp.R')
| -rw-r--r-- | execs/R/cpdbench_ecp.R | 117 |
1 files changed, 117 insertions, 0 deletions
diff --git a/execs/R/cpdbench_ecp.R b/execs/R/cpdbench_ecp.R new file mode 100644 index 00000000..84131c20 --- /dev/null +++ b/execs/R/cpdbench_ecp.R @@ -0,0 +1,117 @@ +#' --- +#' title: Wrapper for ecp package in TCPDBench +#' author: G.J.J. van den Burg +#' date: 2019-09-29 +#' license: See LICENSE file. +#' copyright: 2019, The Alan Turing Institute +#' --- + +library(argparse) +library(ecp) + +load.utils <- function() { + # get the name of the current script so we can load utils.R (yay, R!) + cmd.args <- commandArgs(trailingOnly=F) + file.arg <- "--file=" + this.script <- sub(file.arg, "", cmd.args[grep(file.arg, cmd.args)]) + this.dir <- dirname(this.script) + utils.script <- file.path(this.dir, 'utils.R') + source(utils.script) +} + +parse.args <- function() { + parser <- ArgumentParser(description='Wrapper for ecp package') + parser$add_argument('-i', + '--input', + help='path to the input data file', + required=TRUE + ) + parser$add_argument('-o', + '--output', + help='path to the output file' + ) + parser$add_argument('-a', + '--algorithm', + help='algorithm to use', + choices=c('e.agglo', 'e.divisive', 'kcpa'), + required=TRUE + ) + parser$add_argument('--alpha', + type='double', + help='alpha parameter for agglo and divisive') + parser$add_argument('--minsize', + help='minsize argument for e.divisive', + type='integer', default=30) + parser$add_argument('-R', '--runs', + help='number of random permutations to use', + type='integer', default=199) + parser$add_argument('--siglvl', + type='double', + help='Significance level to use for tests') + # No examples are provided in the ecp package documentation about + # reasonable values for C, so we use 1 as default. + parser$add_argument('-C', '--cost', + type='double', + help='cost to use in the kcpa algorithm', + default=1) + parser$add_argument('-L', '--maxcp', + help='maximum number of cps in kcpa algorithm', + choices=c('max', 'default') + ) + return(parser$parse_args()) +} + +main <- function() { + args <- parse.args() + + # load the dataset + data <- load.dataset(args$input) + + # copy defaults from the ecp package + defaults <- list() + if (args$algorithm == 'e.divisive') { + defaults$k <- 'null' + } + if (args$algorithm == 'kcpa') { + # Again, we don't want to limit the number of change points a priori, + # so set the maximum to the length of the series. + if (args$maxcp == 'max') + defaults$L <- data$original$n_obs + else + defaults$L <- 5 # following binseg and segneigh default + } + params <- make.param.list(args, defaults) + + start.time <- Sys.time() + result <- tryCatch({ + if (args$algorithm == 'e.agglo') { + out <- e.agglo(data$mat, alpha=params$alpha) + locs <- out$estimates + } else if (args$algorithm == 'e.divisive') { + out <- e.divisive(data$mat, sig.lvl=params$siglvl, R=params$runs, + min.size=params$minsize, alpha=params$alpha) + locs <- out$estimates + } else { + # kcpa + out <- kcpa(data$mat, params$L, params$cost) + locs <- out + } + list(locations=locs, error=NULL) + }, error=function(e) { + return(list(locations=NULL, error=e$message)) + }) + + stop.time <- Sys.time() + runtime <- difftime(stop.time, start.time, units='secs') + + if (!is.null(result$error)) + exit.with.error(data$original, args, params, result$error) + + # convert to 0-based indices + locations <- as.list(result$locations - 1) + + exit.success(data$original, args, params, locations, runtime) +} + +load.utils() +main() |
