aboutsummaryrefslogtreecommitdiff
path: root/execs/R/cpdbench_changepoint.R
diff options
context:
space:
mode:
authorGertjan van den Burg <gertjanvandenburg@gmail.com>2020-03-12 14:33:57 +0000
committerGertjan van den Burg <gertjanvandenburg@gmail.com>2020-03-12 14:33:57 +0000
commit7ef8f6e58990fc069cccc71ed6564e8c639ea4fc (patch)
tree9e7662a34b7d0c1f1c5d9faf6d7d6ea8672f6410 /execs/R/cpdbench_changepoint.R
downloadTCPDBench-7ef8f6e58990fc069cccc71ed6564e8c639ea4fc.tar.gz
TCPDBench-7ef8f6e58990fc069cccc71ed6564e8c639ea4fc.zip
initial commit
Diffstat (limited to 'execs/R/cpdbench_changepoint.R')
-rw-r--r--execs/R/cpdbench_changepoint.R153
1 files changed, 153 insertions, 0 deletions
diff --git a/execs/R/cpdbench_changepoint.R b/execs/R/cpdbench_changepoint.R
new file mode 100644
index 00000000..ed1f6391
--- /dev/null
+++ b/execs/R/cpdbench_changepoint.R
@@ -0,0 +1,153 @@
+#' ---
+#' title: Wrapper for changepoint package in TCPDBench
+#' author: G.J.J. van den Burg
+#' date: 2019-09-28
+#' license: See LICENSE file.
+#' copyright: 2019, The Alan Turing Institute
+#' ---
+
+library(argparse)
+library(changepoint)
+
+load.utils <- function() {
+ # get the name of the current script so we can load utils.R (yay, R!)
+ cmd.args <- commandArgs(trailingOnly=F)
+ file.arg <- "--file="
+ this.script <- sub(file.arg, "", cmd.args[grep(file.arg, cmd.args)])
+ this.dir <- dirname(this.script)
+ utils.script <- file.path(this.dir, 'utils.R')
+ source(utils.script)
+}
+
+parse.args <- function() {
+ parser <- ArgumentParser(description='Wrapper for changepoint package')
+ parser$add_argument('-i',
+ '--input',
+ help='path to the input data file',
+ required=TRUE
+ )
+ parser$add_argument('-o',
+ '--output',
+ help='path to the output file')
+ parser$add_argument('-f',
+ '--func',
+ choices=c('mean', 'var', 'meanvar'),
+ help='Function to call in the changepoint package',
+ required=TRUE
+ )
+ parser$add_argument('-p',
+ '--penalty',
+ choices=c(
+ 'None',
+ 'SIC',
+ 'BIC',
+ 'MBIC',
+ 'AIC',
+ 'Hannan-Quinn',
+ 'Asymptotic'
+ ),
+ help='Choice of penalty in the cpt function',
+ default='MBIC'
+ )
+ parser$add_argument(
+ '-m',
+ '--method',
+ choices=c('AMOC', 'PELT', 'SegNeigh', 'BinSeg'),
+ help="Choice of method in the cpt function",
+ default='AMOC'
+ )
+ parser$add_argument(
+ '-t',
+ '--test-statistic',
+ choices=c('Normal', 'CUSUM', 'CSS', 'Gamma',
+ 'Exponential', 'Poisson'),
+ help="Test statistic to use",
+ default='Normal'
+ )
+ parser$add_argument('-Q',
+ '--max-cp',
+ help='Maximum number of change points',
+ choices=c('max', 'default'),
+ default='max')
+ return(parser$parse_args())
+}
+
+main <- function()
+{
+ args <- parse.args()
+
+ # load the data
+ data <- load.dataset(args$input)
+
+ n.obs <- data$original$n_obs
+
+ # get the parameter list
+ defaults <- list()
+ # we set this to the maximum because we have no a priori knowledge of the
+ # maximum number of change points we expect.
+ if (args$method == 'BinSeg' || args$method == 'SegNeigh') {
+ if (args$max_cp == 'max')
+ defaults$Q <- n.obs/2 + 1
+ else
+ defaults$Q <- 5
+ }
+ if (args$penalty == "Asymptotic")
+ defaults$pen.value <- 0.05
+ else
+ defaults$pen.value <- 0 # not used for other penalties
+ params <- make.param.list(args, defaults)
+
+ if (args$func == "mean") {
+ cpt.func <- cpt.mean
+ } else if (args$func == "var") {
+ cpt.func <- cpt.var
+ } else if (args$func == "meanvar") {
+ cpt.func <- cpt.meanvar
+ }
+
+ if (data$original$n_dim > 1) {
+ # changepoint package can't handle multidimensional data
+ exit.error.multidim(data$original, args, params)
+ }
+
+ vec <- as.vector(data$mat)
+ start.time <- Sys.time()
+
+ # call the appropriate function with the specified parameters
+ result <- tryCatch({
+ locs <- cpt.func(vec,
+ penalty=params$penalty,
+ pen.value=params$pen.value,
+ method=params$method,
+ test.stat=params$test_statistic,
+ Q=params$Q,
+ class=FALSE
+ )
+ list(locations=locs, error=NULL)
+ }, error=function(e) {
+ return(list(locations=NULL, error=e$message))
+ })
+ stop.time <- Sys.time()
+ runtime <- difftime(stop.time, start.time, units="secs")
+
+ if (!is.null(result$error)) {
+ exit.with.error(data$original, args, params, result$error)
+ }
+
+ # convert indices to 0-based indices.
+ if (params$method == 'AMOC') {
+ locations <- c(result$locations[1]) - 1
+ names(locations) <- NULL
+ locations <- as.list(locations)
+ } else {
+ if (is.list(result$locations)) {
+ result$locations <- result$locations$cpts
+ }
+ locations <- as.list(result$locations - 1)
+ }
+
+ exit.success(data$original, args, params, locations, runtime)
+}
+
+load.utils()
+main()