#' --- #' title: Utilities shared between R code #' author: G.J.J. van den Burg #' date: 2019-09-29 #' license: See the LICENSE file. #' copyright: 2019, The Alan Turing Institute #' --- library(RJSONIO) printf <- function(...) invisible(cat(sprintf(...))); #' Load a TCPDBench dataset #' #' This function reads in a JSON dataset in TCPDBench format (see TCPD #' repository for schema) and creates a matrix representation of the dataset. #' The dataset is scaled in the process. #' #' @param filename Path to the JSON file #' @return List object with the raw data in the \code{original} field, the time #' index in the \code{time} field, and the data matrix in the \code{mat} field. #' load.dataset <- function(filename) { data <- fromJSON(filename) # reformat the data to a data frame with a time index and the data values tidx <- data$time$index exp <- 0:(data$n_obs - 1) if (all(tidx == exp) && length(tidx) == length(exp)) { tidx <- NULL } else { tidx <- data$time$index } mat <- NULL for (j in 1:data$n_dim) { s <- data$series[[j]] v <- NULL for (i in 1:data$n_obs) { val <- s$raw[[i]] if (is.null(val)) { v <- c(v, NA) } else { v <- c(v, val) } } mat <- cbind(mat, v) } # We normalize to avoid issues with numerical precision. mat <- scale(mat) out <- list(original=data, time=tidx, mat=mat) return(out) } #' Prepare the experiment output #' #' This function creates a list of the necessary output data. This includes the #' exact command that was run, dataset and script information, the hostname, #' output status, any errors if present, and the detected change point location #' and runtime. #' #' @param data the raw data loaded from the JSON file #' @param data.filename the path to the dataset filename #' @param status the output status code of the experiment. Currently in use are #' 'SUCCESS' for when an experiment exited successfully, 'TIMEOUT' if the #' experiment exceeded a limit on runtime, 'SKIP' if the method was supplied #' with improper hyperparameters, and 'FAIL' if an error occurred. #' @param error a description of the error, if one occurred #' @param params input parameters (including defaults) to the method #' @param locations detected change point locations (important: these locations #' are 0-based, whereas R array indices are 1-based. It is important to convert #' them accordingly. Change point locations should be integers on the interval #' [0, T-1], including both endpoints). #' @param runtime the runtime of the method. #' #' @return list with all the necessary output fields. prepare.result <- function(data, data.filename, status, error, params, locations, runtime) { out <- list(error=NULL) cmd.args <- commandArgs(trailingOnly=F) # the full command used out$command <- paste(cmd.args, collapse=' ') # get the name of the current script file.arg <- "--file=" out$script <- sub(file.arg, "", cmd.args[grep(file.arg, cmd.args)]) # hash of the script script.hash <- tools::md5sum(out$script) names(script.hash) <- NULL out$script_md5 <- script.hash # hostname of the machine hostname <- Sys.info()['nodename'] names(hostname) <- NULL out$hostname <- hostname # dataset name out$dataset <- data$name # dataset hash data.hash <- tools::md5sum(data.filename) names(data.hash) <- NULL out$dataset_md5 <- data.hash # status of running the script out$status <- status # error (if any) if (!is.null(error)) out$error <- error # parameters used out$parameters <- params # result out$result <- list(cplocations=locations, runtime=runtime) return(out) } #' Combine default parameters and command line arguments #' #' @param args the command line arguments #' @param defaults default algorithm parameters #' @return a combined list with both the default parameter settings and those #' provided on the command line. If a parameter is in the default list that is #' specified on the command line the command line parameter takes precedence. make.param.list <- function(args, defaults) { params <- defaults args.copy <- args args.copy['input'] <- NULL args.copy['output'] <- NULL params <- modifyList(params, args.copy) return(params) } #' Write output to a file or stdout #' #' This function takes an output list generated by \code{\link{prepare.result}} #' and writes it out as JSON to a file if provided or stdout otherwise. #' #' @param out experimental results as a list #' @param filename (optional) output file to write to #' dump.output <- function(out, filename) { json.out <- toJSON(out, pretty=T) if (!is.null(filename)) write(json.out, filename) else cat(json.out, '\n') } #' Exit with SKIP status due to multidimensional data #' #' This is a shorthand for \code{\link{exit.with.error}} where the error is #' already set for methods that don't handle multidimensional data. Writes out #' the data and exits. #' #' @param data original data loaded by \code{\link{load.dataset}} #' @param args command line arguments #' @param params combined hyperparameters generated by #' \code{\link{make.param.list}} exit.error.multidim <- function(data, args, params) { status = 'SKIP' error = 'This method has no support for multidimensional data.' out <- prepare.result(data, args$input, status, error, params, NULL, NA) dump.output(out, args$output) quit(save='no') } #' Exit with FAIL status and a custom error message #' #' @param data original data loaded by \code{\link{load.dataset}} #' @param args command line arguments #' @param params combined hyperparameters generated by #' \code{\link{make.param.list}} #' @param error custom error message exit.with.error <- function(data, args, params, error) { status = 'FAIL' out <- prepare.result(data, args$input, status, error, params, NULL, NULL) dump.output(out, args$output) quit(save='no') } #' Exit with SUCCESS status #' #' @param data original data loaded by \code{\link{load.dataset}} #' @param args command line arguments #' @param params combined hyperparameters generated by #' \code{\link{make.param.list}} #' @param locations detected change point locations (0-based!) #' @param runtime runtime in seconds exit.success <- function(data, args, params, locations, runtime) { status = 'SUCCESS' error = NULL out <- prepare.result(data, args$input, status, error, params, locations, runtime) dump.output(out, args$output) }