From 6f35564b83a9facf0c468742ce8d000427a58b97 Mon Sep 17 00:00:00 2001 From: Gertjan van den Burg Date: Tue, 23 Jun 2020 16:45:18 +0100 Subject: Add additional documentation on using the code --- execs/R/utils.R | 72 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) (limited to 'execs/R/utils.R') diff --git a/execs/R/utils.R b/execs/R/utils.R index 504b5373..a170a1c0 100644 --- a/execs/R/utils.R +++ b/execs/R/utils.R @@ -10,6 +10,16 @@ library(RJSONIO) printf <- function(...) invisible(cat(sprintf(...))); +#' Load a TCPDBench dataset +#' +#' This function reads in a JSON dataset in TCPDBench format (see TCPD +#' repository for schema) and creates a matrix representation of the dataset. +#' The dataset is scaled in the process. +#' +#' @param filename Path to the JSON file +#' @return List object with the raw data in the \code{original} field, the time +#' index in the \code{time} field, and the data matrix in the \code{mat} field. +#' load.dataset <- function(filename) { data <- fromJSON(filename) @@ -48,6 +58,28 @@ load.dataset <- function(filename) return(out) } +#' Prepare the experiment output +#' +#' This function creates a list of the necessary output data. This includes the +#' exact command that was run, dataset and script information, the hostname, +#' output status, any errors if present, and the detected change point location +#' and runtime. +#' +#' @param data the raw data loaded from the JSON file +#' @param data.filename the path to the dataset filename +#' @param status the output status code of the experiment. Currently in use are +#' 'SUCCESS' for when an experiment exited successfully, 'TIMEOUT' if the +#' experiment exceeded a limit on runtime, 'SKIP' if the method was supplied +#' with improper hyperparameters, and 'FAIL' if an error occurred. +#' @param error a description of the error, if one occurred +#' @param params input parameters (including defaults) to the method +#' @param locations detected change point locations (important: these locations +#' are 0-based, whereas R array indices are 1-based. It is important to convert +#' them accordingly. Change point locations should be integers on the interval +#' [0, T-1], including both endpoints). +#' @param runtime the runtime of the method. +#' +#' @return list with all the necessary output fields. prepare.result <- function(data, data.filename, status, error, params, locations, runtime) { out <- list(error=NULL) @@ -94,6 +126,13 @@ prepare.result <- function(data, data.filename, status, error, return(out) } +#' Combine default parameters and command line arguments +#' +#' @param args the command line arguments +#' @param defaults default algorithm parameters +#' @return a combined list with both the default parameter settings and those +#' provided on the command line. If a parameter is in the default list that is +#' specified on the command line the command line parameter takes precedence. make.param.list <- function(args, defaults) { params <- defaults @@ -106,6 +145,14 @@ make.param.list <- function(args, defaults) return(params) } +#' Write output to a file or stdout +#' +#' This function takes an output list generated by \code{\link{prepare.result}} +#' and writes it out as JSON to a file if provided or stdout otherwise. +#' +#' @param out experimental results as a list +#' @param filename (optional) output file to write to +#' dump.output <- function(out, filename) { json.out <- toJSON(out, pretty=T) if (!is.null(filename)) @@ -114,6 +161,16 @@ dump.output <- function(out, filename) { cat(json.out, '\n') } +#' Exit with SKIP status due to multidimensional data +#' +#' This is a shorthand for \code{\link{exit.with.error}} where the error is +#' already set for methods that don't handle multidimensional data. Writes out +#' the data and exits. +#' +#' @param data original data loaded by \code{\link{load.dataset}} +#' @param args command line arguments +#' @param params combined hyperparameters generated by +#' \code{\link{make.param.list}} exit.error.multidim <- function(data, args, params) { status = 'SKIP' error = 'This method has no support for multidimensional data.' @@ -122,6 +179,13 @@ exit.error.multidim <- function(data, args, params) { quit(save='no') } +#' Exit with FAIL status and a custom error message +#' +#' @param data original data loaded by \code{\link{load.dataset}} +#' @param args command line arguments +#' @param params combined hyperparameters generated by +#' \code{\link{make.param.list}} +#' @param error custom error message exit.with.error <- function(data, args, params, error) { status = 'FAIL' out <- prepare.result(data, args$input, status, error, params, NULL, NULL) @@ -129,6 +193,14 @@ exit.with.error <- function(data, args, params, error) { quit(save='no') } +#' Exit with SUCCESS status +#' +#' @param data original data loaded by \code{\link{load.dataset}} +#' @param args command line arguments +#' @param params combined hyperparameters generated by +#' \code{\link{make.param.list}} +#' @param locations detected change point locations (0-based!) +#' @param runtime runtime in seconds exit.success <- function(data, args, params, locations, runtime) { status = 'SUCCESS' error = NULL -- cgit v1.2.3