diff options
Diffstat (limited to 'execs')
| -rw-r--r-- | execs/R/utils.R | 72 | ||||
| -rw-r--r-- | execs/python/cpdbench_utils.py | 48 |
2 files changed, 119 insertions, 1 deletions
diff --git a/execs/R/utils.R b/execs/R/utils.R index 504b5373..a170a1c0 100644 --- a/execs/R/utils.R +++ b/execs/R/utils.R @@ -10,6 +10,16 @@ library(RJSONIO) printf <- function(...) invisible(cat(sprintf(...))); +#' Load a TCPDBench dataset +#' +#' This function reads in a JSON dataset in TCPDBench format (see TCPD +#' repository for schema) and creates a matrix representation of the dataset. +#' The dataset is scaled in the process. +#' +#' @param filename Path to the JSON file +#' @return List object with the raw data in the \code{original} field, the time +#' index in the \code{time} field, and the data matrix in the \code{mat} field. +#' load.dataset <- function(filename) { data <- fromJSON(filename) @@ -48,6 +58,28 @@ load.dataset <- function(filename) return(out) } +#' Prepare the experiment output +#' +#' This function creates a list of the necessary output data. This includes the +#' exact command that was run, dataset and script information, the hostname, +#' output status, any errors if present, and the detected change point location +#' and runtime. +#' +#' @param data the raw data loaded from the JSON file +#' @param data.filename the path to the dataset filename +#' @param status the output status code of the experiment. Currently in use are +#' 'SUCCESS' for when an experiment exited successfully, 'TIMEOUT' if the +#' experiment exceeded a limit on runtime, 'SKIP' if the method was supplied +#' with improper hyperparameters, and 'FAIL' if an error occurred. +#' @param error a description of the error, if one occurred +#' @param params input parameters (including defaults) to the method +#' @param locations detected change point locations (important: these locations +#' are 0-based, whereas R array indices are 1-based. It is important to convert +#' them accordingly. Change point locations should be integers on the interval +#' [0, T-1], including both endpoints). +#' @param runtime the runtime of the method. +#' +#' @return list with all the necessary output fields. prepare.result <- function(data, data.filename, status, error, params, locations, runtime) { out <- list(error=NULL) @@ -94,6 +126,13 @@ prepare.result <- function(data, data.filename, status, error, return(out) } +#' Combine default parameters and command line arguments +#' +#' @param args the command line arguments +#' @param defaults default algorithm parameters +#' @return a combined list with both the default parameter settings and those +#' provided on the command line. If a parameter is in the default list that is +#' specified on the command line the command line parameter takes precedence. make.param.list <- function(args, defaults) { params <- defaults @@ -106,6 +145,14 @@ make.param.list <- function(args, defaults) return(params) } +#' Write output to a file or stdout +#' +#' This function takes an output list generated by \code{\link{prepare.result}} +#' and writes it out as JSON to a file if provided or stdout otherwise. +#' +#' @param out experimental results as a list +#' @param filename (optional) output file to write to +#' dump.output <- function(out, filename) { json.out <- toJSON(out, pretty=T) if (!is.null(filename)) @@ -114,6 +161,16 @@ dump.output <- function(out, filename) { cat(json.out, '\n') } +#' Exit with SKIP status due to multidimensional data +#' +#' This is a shorthand for \code{\link{exit.with.error}} where the error is +#' already set for methods that don't handle multidimensional data. Writes out +#' the data and exits. +#' +#' @param data original data loaded by \code{\link{load.dataset}} +#' @param args command line arguments +#' @param params combined hyperparameters generated by +#' \code{\link{make.param.list}} exit.error.multidim <- function(data, args, params) { status = 'SKIP' error = 'This method has no support for multidimensional data.' @@ -122,6 +179,13 @@ exit.error.multidim <- function(data, args, params) { quit(save='no') } +#' Exit with FAIL status and a custom error message +#' +#' @param data original data loaded by \code{\link{load.dataset}} +#' @param args command line arguments +#' @param params combined hyperparameters generated by +#' \code{\link{make.param.list}} +#' @param error custom error message exit.with.error <- function(data, args, params, error) { status = 'FAIL' out <- prepare.result(data, args$input, status, error, params, NULL, NULL) @@ -129,6 +193,14 @@ exit.with.error <- function(data, args, params, error) { quit(save='no') } +#' Exit with SUCCESS status +#' +#' @param data original data loaded by \code{\link{load.dataset}} +#' @param args command line arguments +#' @param params combined hyperparameters generated by +#' \code{\link{make.param.list}} +#' @param locations detected change point locations (0-based!) +#' @param runtime runtime in seconds exit.success <- function(data, args, params, locations, runtime) { status = 'SUCCESS' error = NULL diff --git a/execs/python/cpdbench_utils.py b/execs/python/cpdbench_utils.py index cb074c69..65e632c1 100644 --- a/execs/python/cpdbench_utils.py +++ b/execs/python/cpdbench_utils.py @@ -19,6 +19,7 @@ import sys def md5sum(filename): + """Compute the MD5 checksum of a given file""" blocksize = 65536 hasher = hashlib.md5() with open(filename, "rb") as fp: @@ -30,6 +31,7 @@ def md5sum(filename): def load_dataset(filename): + """ Load a CPDBench dataset """ with open(filename, "r") as fp: data = json.load(fp) @@ -58,6 +60,45 @@ def prepare_result( runtime, script_filename, ): + """Prepare the experiment output as a dictionary + + Parameters + ---------- + data : dict + The CPDBench dataset object + + data_filename : str + Absolute path to the dataset file + + status : str + Status of the experiments. Commonly used status codes are: SUCCESS if + the experiment was succesful, SKIP is the method was provided improper + parameters, FAIL if the method failed for whatever reason, and TIMEOUT + if the method ran too long. + + error : str + If an error occurred, this field can be used to describe what it is. + + params : dict + Dictionary of parameters provided to the method. It is good to be as + complete as possible, so even default methods should be added to this + field. This enhances reproducibility. + + locations : list + Detected change point locations. Remember that change locations are + indices of time points and are 0-based (start counting at zero, thus + change locations are integers on the interval [0, T-1], including both + endpoints). + + runtime : float + Runtime of the method. This should be computed as accurately as + possible, excluding any method-specific setup code. + + script_filename : + Path to the script of the method. This is hashed to enable rough + versioning. + + """ out = {} # record the command that was used @@ -88,7 +129,7 @@ def prepare_result( def dump_output(output, filename=None): - """Save result to output file or write to stdout """ + """Save result to output file or write to stdout (json format)""" if filename is None: print(json.dumps(output, sort_keys=True, indent="\t")) else: @@ -97,6 +138,7 @@ def dump_output(output, filename=None): def make_param_dict(args, defaults): + """Create the parameter dict combining CLI arguments and defaults""" params = copy.deepcopy(vars(args)) del params["input"] if "output" in params: @@ -106,6 +148,7 @@ def make_param_dict(args, defaults): def exit_with_error(data, args, parameters, error, script_filename): + """Exit and save result using the 'FAIL' exit status""" status = "FAIL" out = prepare_result( data, @@ -120,7 +163,9 @@ def exit_with_error(data, args, parameters, error, script_filename): dump_output(out, args.output) raise SystemExit + def exit_with_timeout(data, args, parameters, runtime, script_filename): + """Exit and save result using the 'TIMEOUT' exit status""" status = "TIMEOUT" out = prepare_result( data, @@ -137,6 +182,7 @@ def exit_with_timeout(data, args, parameters, runtime, script_filename): def exit_success(data, args, parameters, locations, runtime, script_filename): + """Exit and save result using the 'SUCCESS' exit status""" status = "SUCCESS" error = None out = prepare_result( |
