aboutsummaryrefslogtreecommitdiff
path: root/execs/R
diff options
context:
space:
mode:
Diffstat (limited to 'execs/R')
-rw-r--r--execs/R/cpdbench_changepoint.R153
-rw-r--r--execs/R/cpdbench_changepointnp.R102
-rw-r--r--execs/R/cpdbench_ecp.R117
-rw-r--r--execs/R/cpdbench_ocp.R116
-rw-r--r--execs/R/cpdbench_prophet.R185
-rw-r--r--execs/R/cpdbench_rfpop.R102
-rw-r--r--execs/R/cpdbench_wbs.R102
-rw-r--r--execs/R/utils.R138
8 files changed, 1015 insertions, 0 deletions
diff --git a/execs/R/cpdbench_changepoint.R b/execs/R/cpdbench_changepoint.R
new file mode 100644
index 00000000..ed1f6391
--- /dev/null
+++ b/execs/R/cpdbench_changepoint.R
@@ -0,0 +1,153 @@
+#' ---
+#' title: Wrapper for changepoint package in TCPDBench
+#' author: G.J.J. van den Burg
+#' date: 2019-09-28
+#' license: See LICENSE file.
+#' copyright: 2019, The Alan Turing Institute
+#' ---
+
+library(argparse)
+library(changepoint)
+
+load.utils <- function() {
+ # get the name of the current script so we can load utils.R (yay, R!)
+ cmd.args <- commandArgs(trailingOnly=F)
+ file.arg <- "--file="
+ this.script <- sub(file.arg, "", cmd.args[grep(file.arg, cmd.args)])
+ this.dir <- dirname(this.script)
+ utils.script <- file.path(this.dir, 'utils.R')
+ source(utils.script)
+}
+
+parse.args <- function() {
+ parser <- ArgumentParser(description='Wrapper for changepoint package')
+ parser$add_argument('-i',
+ '--input',
+ help='path to the input data file',
+ required=TRUE
+ )
+ parser$add_argument('-o',
+ '--output',
+ help='path to the output file')
+ parser$add_argument('-f',
+ '--func',
+ choices=c('mean', 'var', 'meanvar'),
+ help='Function to call in the changepoint package',
+ required=TRUE
+ )
+ parser$add_argument('-p',
+ '--penalty',
+ choices=c(
+ 'None',
+ 'SIC',
+ 'BIC',
+ 'MBIC',
+ 'AIC',
+ 'Hannan-Quinn',
+ 'Asymptotic'
+ ),
+ help='Choice of penalty in the cpt function',
+ default='MBIC'
+ )
+ parser$add_argument(
+ '-m',
+ '--method',
+ choices=c('AMOC', 'PELT', 'SegNeigh', 'BinSeg'),
+ help="Choice of method in the cpt function",
+ default='AMOC'
+ )
+ parser$add_argument(
+ '-t',
+ '--test-statistic',
+ choices=c('Normal', 'CUSUM', 'CSS', 'Gamma',
+ 'Exponential', 'Poisson'),
+ help="Test statistic to use",
+ default='Normal'
+ )
+ parser$add_argument('-Q',
+ '--max-cp',
+ help='Maximum number of change points',
+ choices=c('max', 'default'),
+ default='max')
+ return(parser$parse_args())
+}
+
+main <- function()
+{
+ args <- parse.args()
+
+ # load the data
+ data <- load.dataset(args$input)
+
+ n.obs <- data$original$n_obs
+
+ # get the parameter list
+ defaults <- list()
+ # we set this to the maximum because we have no a priori knowledge of the
+ # maximum number of change points we expect.
+ if (args$method == 'BinSeg' || args$method == 'SegNeigh') {
+ if (args$max_cp == 'max')
+ defaults$Q <- n.obs/2 + 1
+ else
+ defaults$Q <- 5
+ }
+ if (args$penalty == "Asymptotic")
+ defaults$pen.value <- 0.05
+ else
+ defaults$pen.value <- 0 # not used for other penalties
+ params <- make.param.list(args, defaults)
+
+ if (args$func == "mean") {
+ cpt.func <- cpt.mean
+ } else if (args$func == "var") {
+ cpt.func <- cpt.var
+ } else if (args$func == "meanvar") {
+ cpt.func <- cpt.meanvar
+ }
+
+ if (data$original$n_dim > 1) {
+ # changepoint package can't handle multidimensional data
+ exit.error.multidim(data$original, args, params)
+ }
+
+ vec <- as.vector(data$mat)
+ start.time <- Sys.time()
+
+ # call the appropriate function with the specified parameters
+ result <- tryCatch({
+ locs <- cpt.func(vec,
+ penalty=params$penalty,
+ pen.value=params$pen.value,
+ method=params$method,
+ test.stat=params$test_statistic,
+ Q=params$Q,
+ class=FALSE
+ )
+ list(locations=locs, error=NULL)
+ }, error=function(e) {
+ return(list(locations=NULL, error=e$message))
+ })
+ stop.time <- Sys.time()
+ runtime <- difftime(stop.time, start.time, units="secs")
+
+ if (!is.null(result$error)) {
+ exit.with.error(data$original, args, params, result$error)
+ }
+
+ # convert indices to 0-based indices.
+ if (params$method == 'AMOC') {
+ locations <- c(result$locations[1]) - 1
+ names(locations) <- NULL
+ locations <- as.list(locations)
+ } else {
+ if (is.list(result$locations)) {
+ result$locations <- result$locations$cpts
+ }
+ locations <- as.list(result$locations - 1)
+ }
+
+ exit.success(data$original, args, params, locations, runtime)
+}
+
+load.utils()
+main()
diff --git a/execs/R/cpdbench_changepointnp.R b/execs/R/cpdbench_changepointnp.R
new file mode 100644
index 00000000..823e58a4
--- /dev/null
+++ b/execs/R/cpdbench_changepointnp.R
@@ -0,0 +1,102 @@
+#' ---
+#' title: Wrapper for changepoint.np package in TCPDBench
+#' author: G.J.J. van den Burg
+#' date: 2019-09-30
+#' license: See LICENSE file.
+#' copyright: 2019, The Alan Turing Institute
+#' ---
+
+library(argparse)
+library(changepoint.np)
+
+load.utils <- function() {
+ # get the name of the current script so we can load utils.R (yay, R!)
+ cmd.args <- commandArgs(trailingOnly=F)
+ file.arg <- "--file="
+ this.script <- sub(file.arg, "", cmd.args[grep(file.arg, cmd.args)])
+ this.dir <- dirname(this.script)
+ utils.script <- file.path(this.dir, 'utils.R')
+ source(utils.script)
+}
+
+parse.args <- function() {
+ parser <- ArgumentParser(description='Wrapper for changepoint.np package')
+ parser$add_argument('-i',
+ '--input',
+ help='path to the input data file',
+ required=TRUE
+ )
+ parser$add_argument('-o',
+ '--output',
+ help='path to the output file')
+ parser$add_argument('-p',
+ '--penalty',
+ choices=c(
+ 'None',
+ 'SIC',
+ 'BIC',
+ 'MBIC',
+ 'AIC',
+ 'Hannan-Quinn',
+ 'Asymptotic'
+ ),
+ help='Choice of penalty in the cpt function',
+ default='MBIC'
+ )
+ parser$add_argument('-q',
+ '--nquantiles',
+ type='integer',
+ help='Number of quantiles to use',
+ default=10
+ )
+ return(parser$parse_args())
+}
+
+main <- function() {
+ args <- parse.args()
+
+ # load the data
+ data <- load.dataset(args$input)
+
+ # get the parameter list
+ defaults <- list(method='PELT',
+ test.stat='empirical_distribution',
+ minseglen=1)
+ params <- make.param.list(args, defaults)
+
+ if (data$origina$n_dim > 1) {
+ # changepoint.np package can't handle multidimensional data
+ exit.error.multidim(data$original, args, params)
+ }
+
+ vec <- as.vector(data$mat)
+ start.time <- Sys.time()
+
+ result <- tryCatch({
+ locs <- cpt.np(vec,
+ penalty=params$penalty,
+ method=params$method,
+ test.stat=params$test.stat,
+ minseglen=params$minseglen,
+ nquantiles=params$nquantiles,
+ class=FALSE
+ )
+ list(locations=locs, error=NULL)
+ }, error=function(e) {
+ return(list(locations=NULL, error=e$message))
+ })
+ stop.time <- Sys.time()
+ runtime <- difftime(stop.time, start.time, units="secs")
+
+ if (!is.null(result$error)) {
+ exit.with.error(data$original, args, params, result$error)
+ }
+
+ # convert indices to 0-based indices
+ locations <- as.list(result$locations - 1)
+
+ exit.success(data$original, args, params, locations, runtime)
+}
+
+load.utils()
+main()
diff --git a/execs/R/cpdbench_ecp.R b/execs/R/cpdbench_ecp.R
new file mode 100644
index 00000000..84131c20
--- /dev/null
+++ b/execs/R/cpdbench_ecp.R
@@ -0,0 +1,117 @@
+#' ---
+#' title: Wrapper for ecp package in TCPDBench
+#' author: G.J.J. van den Burg
+#' date: 2019-09-29
+#' license: See LICENSE file.
+#' copyright: 2019, The Alan Turing Institute
+#' ---
+
+library(argparse)
+library(ecp)
+
+load.utils <- function() {
+ # get the name of the current script so we can load utils.R (yay, R!)
+ cmd.args <- commandArgs(trailingOnly=F)
+ file.arg <- "--file="
+ this.script <- sub(file.arg, "", cmd.args[grep(file.arg, cmd.args)])
+ this.dir <- dirname(this.script)
+ utils.script <- file.path(this.dir, 'utils.R')
+ source(utils.script)
+}
+
+parse.args <- function() {
+ parser <- ArgumentParser(description='Wrapper for ecp package')
+ parser$add_argument('-i',
+ '--input',
+ help='path to the input data file',
+ required=TRUE
+ )
+ parser$add_argument('-o',
+ '--output',
+ help='path to the output file'
+ )
+ parser$add_argument('-a',
+ '--algorithm',
+ help='algorithm to use',
+ choices=c('e.agglo', 'e.divisive', 'kcpa'),
+ required=TRUE
+ )
+ parser$add_argument('--alpha',
+ type='double',
+ help='alpha parameter for agglo and divisive')
+ parser$add_argument('--minsize',
+ help='minsize argument for e.divisive',
+ type='integer', default=30)
+ parser$add_argument('-R', '--runs',
+ help='number of random permutations to use',
+ type='integer', default=199)
+ parser$add_argument('--siglvl',
+ type='double',
+ help='Significance level to use for tests')
+ # No examples are provided in the ecp package documentation about
+ # reasonable values for C, so we use 1 as default.
+ parser$add_argument('-C', '--cost',
+ type='double',
+ help='cost to use in the kcpa algorithm',
+ default=1)
+ parser$add_argument('-L', '--maxcp',
+ help='maximum number of cps in kcpa algorithm',
+ choices=c('max', 'default')
+ )
+ return(parser$parse_args())
+}
+
+main <- function() {
+ args <- parse.args()
+
+ # load the dataset
+ data <- load.dataset(args$input)
+
+ # copy defaults from the ecp package
+ defaults <- list()
+ if (args$algorithm == 'e.divisive') {
+ defaults$k <- 'null'
+ }
+ if (args$algorithm == 'kcpa') {
+ # Again, we don't want to limit the number of change points a priori,
+ # so set the maximum to the length of the series.
+ if (args$maxcp == 'max')
+ defaults$L <- data$original$n_obs
+ else
+ defaults$L <- 5 # following binseg and segneigh default
+ }
+ params <- make.param.list(args, defaults)
+
+ start.time <- Sys.time()
+ result <- tryCatch({
+ if (args$algorithm == 'e.agglo') {
+ out <- e.agglo(data$mat, alpha=params$alpha)
+ locs <- out$estimates
+ } else if (args$algorithm == 'e.divisive') {
+ out <- e.divisive(data$mat, sig.lvl=params$siglvl, R=params$runs,
+ min.size=params$minsize, alpha=params$alpha)
+ locs <- out$estimates
+ } else {
+ # kcpa
+ out <- kcpa(data$mat, params$L, params$cost)
+ locs <- out
+ }
+ list(locations=locs, error=NULL)
+ }, error=function(e) {
+ return(list(locations=NULL, error=e$message))
+ })
+
+ stop.time <- Sys.time()
+ runtime <- difftime(stop.time, start.time, units='secs')
+
+ if (!is.null(result$error))
+ exit.with.error(data$original, args, params, result$error)
+
+ # convert to 0-based indices
+ locations <- as.list(result$locations - 1)
+
+ exit.success(data$original, args, params, locations, runtime)
+}
+
+load.utils()
+main()
diff --git a/execs/R/cpdbench_ocp.R b/execs/R/cpdbench_ocp.R
new file mode 100644
index 00000000..308e5bac
--- /dev/null
+++ b/execs/R/cpdbench_ocp.R
@@ -0,0 +1,116 @@
+#' ---
+#' title: Wrapper for ocp package in TCPDBench
+#' author: G.J.J. van den Burg
+#' date: 2019-10-05
+#' license: See the LICENSE file.
+#' copyright: 2019, The Alan Turing Institute
+#' ---
+
+library(argparse)
+library(ocp)
+
+load.utils <- function() {
+ # get the name of the current script so we can load utils.R (yay, R!)
+ cmd.args <- commandArgs(trailingOnly=F)
+ file.arg <- "--file="
+ this.script <- sub(file.arg, "", cmd.args[grep(file.arg, cmd.args)])
+ this.dir <- dirname(this.script)
+ utils.script <- file.path(this.dir, 'utils.R')
+ source(utils.script)
+}
+
+parse.args <- function() {
+ parser <- ArgumentParser(description='Wrapper for changepoint package')
+ parser$add_argument('-i',
+ '--input',
+ help='path to the input data file',
+ required=TRUE
+ )
+ parser$add_argument('-o',
+ '--output',
+ help='path to the output file')
+ parser$add_argument('-l',
+ '--lambda',
+ help='lambda parameter for constant hazard function',
+ type='integer',
+ default=100
+ )
+ parser$add_argument('--prior-a',
+ help='Prior alpha for student-t',
+ type='double',
+ default=1)
+ parser$add_argument('--prior-b',
+ help='Prior beta for student-t',
+ type='double',
+ default=1
+ )
+ parser$add_argument('--prior-k',
+ help='Prior kappa for student-t',
+ type='double',
+ default=1
+ )
+
+ return(parser$parse_args())
+}
+
+main <- function()
+{
+ args <- parse.args()
+ data <- load.dataset(args$input)
+
+ # set the defaults that we don't change
+ defaults <- list(missPts="none",
+ cpthreshold=0.5, # unused by us
+ truncRlim=10^(-4),
+ minRlength=1,
+ maxRlength=10^4, # bigger than any of our datasets
+ minsep=1,
+ maxsep=10^4 # bigger than any of our datasets
+ )
+ defaults$multivariate = data$original$n_dim > 1
+
+ # combine defaults and cmd args
+ params <- make.param.list(args, defaults)
+
+ # define our hazard function with the lambda in the parameters
+ hazard_func <- function(x, lambda) {
+ const_hazard(x, lambda=params$lambda)
+ }
+
+ # we only use the gaussian model since the data is scaled
+ model.params <- list(list(m=0, k=params$prior_k, a=params$prior_a,
+ b=params$prior_b))
+
+ start.time <- Sys.time()
+ result <- tryCatch({
+ fit <- onlineCPD(data$mat, oCPD=NULL, missPts=params$missPts,
+ hazard_func=hazard_func,
+ probModel=list("gaussian"),
+ init_params=model.params,
+ multivariate=params$multivariate,
+ cpthreshold=params$cpthreshold,
+ truncRlim=params$truncRlim,
+ minRlength=params$minRlength,
+ maxRlength=params$maxRlength,
+ minsep=params$minsep,
+ maxsep=params$maxsep
+ )
+ locs <- as.vector(fit$changepoint_lists$maxCPs[[1]])
+ list(locations=locs, error=NULL)
+ }, error=function(e) {
+ return(list(locations=NULL, error=e$message))
+ })
+ stop.time <- Sys.time()
+ runtime <- difftime(stop.time, start.time, units="secs")
+
+ if (!is.null(result$error))
+ exit.with.error(data$original, args, params, result$error)
+
+ # convert indices to 0-based indices
+ locations <- as.list(result$locations - 1)
+
+ exit.success(data$original, args, params, locations, runtime)
+}
+
+load.utils()
+main()
diff --git a/execs/R/cpdbench_prophet.R b/execs/R/cpdbench_prophet.R
new file mode 100644
index 00000000..511217f5
--- /dev/null
+++ b/execs/R/cpdbench_prophet.R
@@ -0,0 +1,185 @@
+#' ---
+#' title: Wrapper for the Prophet package in TCPDBench
+#' author: G.J.J. van den Burg
+#' date: 2019-09-30
+#' license: See the LICENSE file.
+#' copyright: 2019, The Alan Turing Institute
+#' ---
+
+library(argparse)
+library(prophet)
+library(lubridate)
+
+NO.DATETIME <- c('scanline_126007', 'scanline_42049', 'well_log',
+ 'quality_control_1', 'quality_control_2', 'quality_control_3',
+ 'quality_control_4', 'quality_control_5')
+
+load.utils <- function() {
+ # get the name of the current script so we can load utils.R (yay, R!)
+ cmd.args <- commandArgs(trailingOnly=F)
+ file.arg <- "--file="
+ this.script <- sub(file.arg, "", cmd.args[grep(file.arg, cmd.args)])
+ this.dir <- dirname(this.script)
+ utils.script <- file.path(this.dir, 'utils.R')
+ source(utils.script)
+}
+
+parse.args <- function() {
+ parser <- ArgumentParser(description='Wrapper for Prophet package')
+ parser$add_argument('-i',
+ '--input',
+ help='path to the input data file',
+ required=TRUE
+ )
+ parser$add_argument('-o',
+ '--output',
+ help='path to the output file'
+ )
+ parser$add_argument('-N',
+ '--Nmax',
+ help='maximum number of changepoints',
+ choices=c('default', 'max')
+ )
+ return(parser$parse_args())
+}
+
+frac.to.dt <- function(raw) {
+ out <- c()
+ for (i in 1:length(raw)) {
+ replaced <- gsub('-', '.', raw[i]);
+ number <- as.double(replaced)
+ year <- floor(number)
+ remainder <- number - year
+ begin <- as_datetime(paste(year, '-01-01', sep=''))
+ end <- as_datetime(paste(year+1, '-01-01', sep=''))
+ offset <- remainder * (end - begin)
+ dt <- begin + offset
+ # you'd think there'd be a well-documented easy-to-find function for
+ # this
+ datepart <- date(dt)
+ timepart <- sprintf("%02d:%02d:%02d", hour(dt), minute(dt),
+ round(second(dt)))
+ iso <- paste(datepart, timepart, sep=' ')
+ out <- c(out, iso)
+ }
+ return(out)
+}
+
+preprocess.data <- function(data)
+{
+ if ("format" %in% names(data$original$time)) {
+ if (data$original$time$format == "%Y-%m-%d %H:%M:%S") {
+ tidx <- data$original$time$raw
+ } else if (data$original$time$format == "%Y-%m-%d") {
+ tidx <- data$original$time$raw
+ } else if (data$original$time$format == "%Y-%m") {
+ tidx <- paste(data$original$time$raw, '-01', sep='')
+ } else if (data$original$time$format == "%Y") {
+ tidx <- paste(data$original$time$raw, '-01-01', sep='')
+ } else if (data$original$time$format == "%Y-%F") {
+ tidx <- frac.to.dt(data$original$time$raw)
+ } else {
+ stop(cat("Unknown time format: ", data$original$time$format, '\n'))
+ }
+ } else {
+ if (data$original$name == 'bank') {
+ # bank is daily data
+ dt <- as_date("2019-01-01")
+ tidx <- c(dt)
+ for (i in 2:data$original$n_obs) {
+ dt <- dt + ddays(1)
+ tidx <- c(tidx, dt)
+ }
+ } else if (data$original$name == 'bee_waggle') {
+ # bee_waggle is seconds data (I believe)
+ dt <- as_date("2019-01-01 00:00:00")
+ tidx <- c(dt)
+ for (i in 2:data$original$n_obs) {
+ dt <- dt + dseconds(1)
+ tidx <- c(tidx, dt)
+ }
+ } else if (data$original$name %in% NO.DATETIME) {
+ # these datasets have no corresponding time axis, so we disable
+ # periodicity in prophet for fairness.
+ # We'll make it "daily", because prophet needs a datetime format
+ dt <- as_date("2019-01-01")
+ tidx <- c(dt)
+ for (i in 2:data$original$n_obs) {
+ dt <- dt + ddays(1)
+ tidx <- c(tidx, dt)
+ }
+ } else {
+ stop(cat("Unhandled time series: ", data$original$name, '\n'))
+ }
+ }
+
+ raw <- as.vector(data$mat)
+
+ df <- data.frame(ds=tidx, y=raw)
+
+ return(df)
+}
+
+main <- function() {
+ args <- parse.args()
+ data <- load.dataset(args$input)
+
+ defaults <- list()
+ # we want to allow change points throughout the entire range of the series
+ defaults$changepoint.range <- 1
+ # threshold used in add_changepoints_to_plot
+ defaults$threshold <- 0.01
+ defaults$yearly.seasonality <- 'auto'
+ defaults$weekly.seasonality <- 'auto'
+ defaults$daily.seasonality <- 'auto'
+
+ if (args$Nmax == 'default')
+ args$Nmax <- 25
+ else
+ args$Nmax <- data$original$n_obs - 1
+
+ if (data$original$name %in% NO.DATETIME) {
+ defaults$yearly.seasonality <- FALSE
+ defaults$weekly.seasonality <- FALSE
+ defaults$daily.seasonality <- FALSE
+ }
+
+ params <- make.param.list(args, defaults)
+
+ if (data$original$n_dim > 1) {
+ # package doesn't handle multidimensional data
+ exit.error.multidim(data$original, args, params)
+ }
+
+ df <- preprocess.data(data)
+
+ start.time <- Sys.time()
+ result <- tryCatch({
+ model <- prophet(df, changepoint.range=params$changepoint.range,
+ n.changepoints=params$Nmax,
+ yearly.seasonality=params$yearly.seasonality,
+ weekly.seasonality=params$weekly.seasonality,
+ daily.seasonality=params$daily.seasonality
+ )
+ threshold <- params$threshold
+ cpt <- model$changepoints[abs(model$params$delta) >= threshold]
+ cpt <- as.character(as.POSIXct(cpt))
+ locs <- match(cpt, as.character(df$ds))
+ list(locations=locs, error=NULL)
+ }, error=function(e) {
+ return(list(locations=NULL, error=e$message))
+ })
+ stop.time <- Sys.time()
+ runtime <- difftime(stop.time, start.time, units='secs')
+
+ if (!is.null(result$error))
+ exit.with.error(data$original, args, params, result$error)
+
+ # convert to 0-based indices
+ locations <- as.list(result$locations - 1)
+
+ exit.success(data$original, args, params, locations, runtime)
+}
+
+load.utils()
+main()
diff --git a/execs/R/cpdbench_rfpop.R b/execs/R/cpdbench_rfpop.R
new file mode 100644
index 00000000..33ced873
--- /dev/null
+++ b/execs/R/cpdbench_rfpop.R
@@ -0,0 +1,102 @@
+#' ---
+#' title: Wrapper for robust-fpop package in TCPDBench
+#' author: G.J.J. van den Burg
+#' date: 2019-09-30
+#' license: See the LICENSE file.
+#' copyright: 2019, The Alan Turing Institute
+#' ---
+
+library(argparse)
+library(robseg)
+
+load.utils <- function() {
+ # get the name of the current script so we can load utils.R (yay, R!)
+ cmd.args <- commandArgs(trailingOnly=F)
+ file.arg <- "--file="
+ this.script <- sub(file.arg, "", cmd.args[grep(file.arg, cmd.args)])
+ this.dir <- dirname(this.script)
+ utils.script <- file.path(this.dir, 'utils.R')
+ source(utils.script)
+}
+
+parse.args <- function() {
+ parser <- ArgumentParser(description='Wrapper for robseg package')
+ parser$add_argument('-i',
+ '--input',
+ help='path to the input data file',
+ required=TRUE
+ )
+ parser$add_argument('-o',
+ '--output',
+ help='path to the output file'
+ )
+ parser$add_argument('-l',
+ '--loss',
+ help='loss function to use',
+ choices=c('L1', 'L2', 'Huber', 'Outlier'),
+ required=TRUE
+ )
+ return(parser$parse_args())
+}
+
+main <- function() {
+ args <- parse.args()
+ data <- load.dataset(args$input)
+
+ # copy the defaults from the robust-fpop repo and the JASA paper.
+ defaults <- list()
+ if (args$loss == 'Outlier') {
+ defaults$lambda <- 2 * log(data$original$n_obs)
+ defaults$lthreshold <- 3
+ } else if (args$loss == 'Huber') {
+ defaults$lambda <- 1.4 * log(data$original$n_obs)
+ defaults$lthreshold <- 1.345
+ } else if (args$loss == 'L1') {
+ defaults$lambda <- log(data$original$n_obs)
+ } else if (args$loss == 'L2') {
+ defaults$lambda <- log(data$original$n_obs)
+ }
+ params <- make.param.list(args, defaults)
+
+ if (data$original$n_dim > 1) {
+ # robseg package can't handle multidimensional data
+ exit.error.multidim(data$original, args, params)
+ }
+
+ vec <- as.vector(data$mat)
+
+ start.time <- Sys.time()
+
+ # estimate the standard deviation as in the README of the robseg package.
+ est.std <- mad(diff(vec)/sqrt(2))
+ # and normalise the data with this. Note that this means that we don't need
+ # to scale lambda and the threshold by the estimated standard deviation.
+ x <- vec / est.std
+
+ result <- tryCatch({
+ out <- Rob_seg.std(x=x,
+ loss=params$loss,
+ lambda=params$lambda,
+ lthreshold=params$lthreshold
+ )
+ locs <- out$t.est
+ list(locations=locs, error=NULL)
+ }, error=function(e) {
+ return(list(locations=NULL, error=e$message))
+ })
+
+ stop.time <- Sys.time()
+ runtime <- difftime(stop.time, start.time, units='secs')
+
+ if (!is.null(result$error)) {
+ exit.with.error(data$original, args, params, result$error)
+ }
+
+ # convert indices to 0-based
+ locations <- as.list(result$locations - 1)
+
+ exit.success(data$original, args, params, locations, runtime)
+}
+
+load.utils()
+main()
diff --git a/execs/R/cpdbench_wbs.R b/execs/R/cpdbench_wbs.R
new file mode 100644
index 00000000..e858b4df
--- /dev/null
+++ b/execs/R/cpdbench_wbs.R
@@ -0,0 +1,102 @@
+#' ---
+#' title: Wrapper for wbs package in TCPDBench
+#' author: G.J.J. van den Burg
+#' date: 2019-09-28
+#' license: See the LICENSE file.
+#' copyright: 2019, The Alan Turing Institute
+#' ---
+
+library(argparse)
+library(wbs)
+
+load.utils <- function() {
+ # get the name of the current script so we can load utils.R (yay, R!)
+ cmd.args <- commandArgs(trailingOnly=F)
+ file.arg <- "--file="
+ this.script <- sub(file.arg, "", cmd.args[grep(file.arg, cmd.args)])
+ this.dir <- dirname(this.script)
+ utils.script <- file.path(this.dir, 'utils.R')
+ source(utils.script)
+}
+
+parse.args <- function() {
+ parser <- ArgumentParser(description='Wrapper for wbs package')
+ parser$add_argument('-i',
+ '--input',
+ help='path to the input data file',
+ required=TRUE
+ )
+ parser$add_argument('-o',
+ '--output',
+ help='path to the output file'
+ )
+ parser$add_argument('-K', '--Kmax', choices=c('default', 'max'),
+ help='the maximum number of changepoints',
+ default='max')
+ parser$add_argument('-p', '--penalty', choices=c('SSIC', 'BIC', 'MBIC'),
+ help='The penalty to use in WBS')
+ parser$add_argument("-g", "--integrated", choices=c("true", "false"),
+ help="Whether to use integrated WBS or not")
+ return(parser$parse_args())
+}
+
+main <- function() {
+ args <- parse.args()
+
+ # load the data
+ data <- load.dataset(args$input)
+
+ # copy defaults from the wbs package and set params
+ defaults <- list(M=5000, rand.intervals=T)
+ if (args$Kmax == 'default')
+ args$Kmax <- 50
+ else
+ args$Kmax <- data$original$n_obs
+
+ if (args$integrated == "true")
+ args$integrated = TRUE
+ else
+ args$integrated = FALSE
+ params <- make.param.list(args, defaults)
+
+ if (data$original$n_dim > 1) {
+ # wbs package doesn't handle multidimensional data
+ exit.error.multidim(data$original, args, params)
+ }
+
+ vec <- as.vector(data$mat)
+ start.time <- Sys.time()
+
+ # We use the SSIC penalty as this is used in the WBS paper and is the
+ # default in the WBS package (for plot.wbs, for instance).
+
+ result <- tryCatch({
+ out <- wbs(vec, M=params$M, rand.intervals=params$rand.intervals,
+ integrated=params$integrated)
+ cpt <- changepoints(out, Kmax=params$Kmax)
+ if (params$penalty == "SSIC")
+ locs <- cpt$cpt.ic$ssic.penalty
+ else if (params$penalty == "BIC")
+ locs <- cpt$cpt.ic$bic.penalty
+ else if (params$penalty == "MBIC")
+ locs <- cpt$cpt.ic$mbic.penalty
+ locs <- sort(locs)
+ list(locations=locs, error=NULL)
+ }, error=function(e) {
+ return(list(locations=NULL, error=e$message))
+ })
+ stop.time <- Sys.time()
+ runtime <- difftime(stop.time, start.time, units='secs')
+
+ if (!is.null(result$error)) {
+ exit.with.error(data$original, args, params, result$error)
+ }
+
+ # convert to 0-based indices.
+ locations <- as.list(result$locations - 1)
+
+ exit.success(data$original, args, params, locations, runtime)
+}
+
+load.utils()
+main()
diff --git a/execs/R/utils.R b/execs/R/utils.R
new file mode 100644
index 00000000..504b5373
--- /dev/null
+++ b/execs/R/utils.R
@@ -0,0 +1,138 @@
+#' ---
+#' title: Utilities shared between R code
+#' author: G.J.J. van den Burg
+#' date: 2019-09-29
+#' license: See the LICENSE file.
+#' copyright: 2019, The Alan Turing Institute
+#' ---
+
+library(RJSONIO)
+
+printf <- function(...) invisible(cat(sprintf(...)));
+
+load.dataset <- function(filename)
+{
+ data <- fromJSON(filename)
+
+ # reformat the data to a data frame with a time index and the data values
+ tidx <- data$time$index
+ exp <- 0:(data$n_obs - 1)
+ if (all(tidx == exp) && length(tidx) == length(exp)) {
+ tidx <- NULL
+ } else {
+ tidx <- data$time$index
+ }
+
+ mat <- NULL
+
+ for (j in 1:data$n_dim) {
+ s <- data$series[[j]]
+ v <- NULL
+ for (i in 1:data$n_obs) {
+ val <- s$raw[[i]]
+ if (is.null(val)) {
+ v <- c(v, NA)
+ } else {
+ v <- c(v, val)
+ }
+ }
+ mat <- cbind(mat, v)
+ }
+
+ # We normalize to avoid issues with numerical precision.
+ mat <- scale(mat)
+
+ out <- list(original=data,
+ time=tidx,
+ mat=mat)
+ return(out)
+}
+
+prepare.result <- function(data, data.filename, status, error,
+ params, locations, runtime) {
+ out <- list(error=NULL)
+ cmd.args <- commandArgs(trailingOnly=F)
+
+ # the full command used
+ out$command <- paste(cmd.args, collapse=' ')
+
+ # get the name of the current script
+ file.arg <- "--file="
+ out$script <- sub(file.arg, "", cmd.args[grep(file.arg, cmd.args)])
+
+ # hash of the script
+ script.hash <- tools::md5sum(out$script)
+ names(script.hash) <- NULL
+ out$script_md5 <- script.hash
+
+ # hostname of the machine
+ hostname <- Sys.info()['nodename']
+ names(hostname) <- NULL
+ out$hostname <- hostname
+
+ # dataset name
+ out$dataset <- data$name
+
+ # dataset hash
+ data.hash <- tools::md5sum(data.filename)
+ names(data.hash) <- NULL
+ out$dataset_md5 <- data.hash
+
+ # status of running the script
+ out$status <- status
+
+ # error (if any)
+ if (!is.null(error))
+ out$error <- error
+
+ # parameters used
+ out$parameters <- params
+
+ # result
+ out$result <- list(cplocations=locations, runtime=runtime)
+
+ return(out)
+}
+
+make.param.list <- function(args, defaults)
+{
+ params <- defaults
+
+ args.copy <- args
+ args.copy['input'] <- NULL
+ args.copy['output'] <- NULL
+
+ params <- modifyList(params, args.copy)
+ return(params)
+}
+
+dump.output <- function(out, filename) {
+ json.out <- toJSON(out, pretty=T)
+ if (!is.null(filename))
+ write(json.out, filename)
+ else
+ cat(json.out, '\n')
+}
+
+exit.error.multidim <- function(data, args, params) {
+ status = 'SKIP'
+ error = 'This method has no support for multidimensional data.'
+ out <- prepare.result(data, args$input, status, error, params, NULL, NA)
+ dump.output(out, args$output)
+ quit(save='no')
+}
+
+exit.with.error <- function(data, args, params, error) {
+ status = 'FAIL'
+ out <- prepare.result(data, args$input, status, error, params, NULL, NULL)
+ dump.output(out, args$output)
+ quit(save='no')
+}
+
+exit.success <- function(data, args, params, locations, runtime) {
+ status = 'SUCCESS'
+ error = NULL
+ out <- prepare.result(data, args$input, status, error, params, locations,
+ runtime)
+ dump.output(out, args$output)
+}