execs/R/utils.R


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138

#' ---
#' title: Utilities shared between R code
#' author: G.J.J. van den Burg
#' date: 2019-09-29
#' license: See the LICENSE file.
#' copyright: 2019, The Alan Turing Institute
#' ---

library(RJSONIO)

printf <- function(...) invisible(cat(sprintf(...)));

load.dataset <- function(filename)
{
    data <- fromJSON(filename)

    # reformat the data to a data frame with a time index and the data values
    tidx <- data$time$index
    exp <- 0:(data$n_obs - 1)
    if (all(tidx == exp) && length(tidx) == length(exp)) {
        tidx <- NULL
    } else {
        tidx <- data$time$index
    }

    mat <- NULL

    for (j in 1:data$n_dim) {
        s <- data$series[[j]]
        v <- NULL
        for (i in 1:data$n_obs) {
            val <- s$raw[[i]]
            if (is.null(val)) {
                v <- c(v, NA)
            } else {
                v <- c(v, val)
            }
        }
        mat <- cbind(mat, v)
    }

    # We normalize to avoid issues with numerical precision.
    mat <- scale(mat)

    out <- list(original=data,
                time=tidx,
                mat=mat)
    return(out)
}

prepare.result <- function(data, data.filename, status, error,
                           params, locations, runtime) {
    out <- list(error=NULL)
    cmd.args <- commandArgs(trailingOnly=F)

    # the full command used
    out$command <- paste(cmd.args, collapse=' ')

    # get the name of the current script
    file.arg <- "--file="
    out$script <- sub(file.arg, "", cmd.args[grep(file.arg, cmd.args)])

    # hash of the script
    script.hash <- tools::md5sum(out$script)
    names(script.hash) <- NULL
    out$script_md5 <- script.hash

    # hostname of the machine
    hostname <- Sys.info()['nodename']
    names(hostname) <- NULL
    out$hostname <- hostname

    # dataset name
    out$dataset <- data$name

    # dataset hash
    data.hash <- tools::md5sum(data.filename)
    names(data.hash) <- NULL
    out$dataset_md5 <- data.hash

    # status of running the script
    out$status <- status

    # error (if any)
    if (!is.null(error))
        out$error <- error

    # parameters used
    out$parameters <- params

    # result
    out$result <- list(cplocations=locations, runtime=runtime)

    return(out)
}

make.param.list <- function(args, defaults)
{
    params <- defaults

    args.copy <- args
    args.copy['input'] <- NULL
    args.copy['output'] <- NULL

    params <- modifyList(params, args.copy)
    return(params)
}

dump.output <- function(out, filename) {
    json.out <- toJSON(out, pretty=T)
    if (!is.null(filename))
        write(json.out, filename)
    else
        cat(json.out, '\n')
}

exit.error.multidim <- function(data, args, params) {
    status = 'SKIP'
    error = 'This method has no support for multidimensional data.'
    out <- prepare.result(data, args$input, status, error, params, NULL, NA)
    dump.output(out, args$output)
    quit(save='no')
}

exit.with.error <- function(data, args, params, error) {
    status = 'FAIL'
    out <- prepare.result(data, args$input, status, error, params, NULL, NULL)
    dump.output(out, args$output)
    quit(save='no')
}

exit.success <- function(data, args, params, locations, runtime) {
    status = 'SUCCESS'
    error = NULL
    out <- prepare.result(data, args$input, status, error, params, locations,
                          runtime)
    dump.output(out, args$output)
}