aboutsummaryrefslogtreecommitdiff
path: root/execs/R/cpdbench_changepoint.R
blob: ed1f63915f482e9c8634a2318227ba3bdd61c212 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
#' ---
#' title: Wrapper for changepoint package in TCPDBench
#' author: G.J.J. van den Burg
#' date: 2019-09-28
#' license: See LICENSE file.
#' copyright: 2019, The Alan Turing Institute
#' ---

library(argparse)
library(changepoint)

load.utils <- function() {
    # get the name of the current script so we can load utils.R (yay, R!)
    cmd.args <- commandArgs(trailingOnly=F)
    file.arg <- "--file="
    this.script <- sub(file.arg, "", cmd.args[grep(file.arg, cmd.args)])
    this.dir <- dirname(this.script)
    utils.script <- file.path(this.dir, 'utils.R')
    source(utils.script)
}

parse.args <- function() {
    parser <- ArgumentParser(description='Wrapper for changepoint package')
    parser$add_argument('-i', 
                        '--input',
                        help='path to the input data file',
                        required=TRUE
    )
    parser$add_argument('-o',
                        '--output',
                        help='path to the output file')
    parser$add_argument('-f',
                        '--func',
                        choices=c('mean', 'var', 'meanvar'), 
                        help='Function to call in the changepoint package',
                        required=TRUE
    )
    parser$add_argument('-p',
                        '--penalty',
                        choices=c(
                                  'None',
                                  'SIC',
                                  'BIC',
                                  'MBIC',
                                  'AIC',
                                  'Hannan-Quinn',
                                  'Asymptotic'
                                  ),
                        help='Choice of penalty in the cpt function',
                        default='MBIC'
    )
    parser$add_argument(
                        '-m',
                        '--method',
                        choices=c('AMOC', 'PELT', 'SegNeigh', 'BinSeg'),
                        help="Choice of method in the cpt function",
                        default='AMOC'
    )
    parser$add_argument(
                        '-t',
                        '--test-statistic',
                        choices=c('Normal', 'CUSUM', 'CSS', 'Gamma',
                                  'Exponential', 'Poisson'),
                        help="Test statistic to use",
                        default='Normal'
    )
    parser$add_argument('-Q',
                        '--max-cp',
                        help='Maximum number of change points',
                        choices=c('max', 'default'),
                        default='max')
    return(parser$parse_args())
}

main <- function()
{
    args <- parse.args()

    # load the data
    data <- load.dataset(args$input)

    n.obs <- data$original$n_obs

    # get the parameter list
    defaults <- list()
    # we set this to the maximum because we have no a priori knowledge of the 
    # maximum number of change points we expect.
    if (args$method == 'BinSeg' || args$method == 'SegNeigh') {
        if (args$max_cp == 'max')
            defaults$Q <- n.obs/2 + 1
        else
            defaults$Q <- 5
    }
    if (args$penalty == "Asymptotic")
        defaults$pen.value <- 0.05
    else
        defaults$pen.value <- 0 # not used for other penalties
    params <- make.param.list(args, defaults)

    if (args$func == "mean") {
        cpt.func <- cpt.mean
    } else if (args$func == "var") {
        cpt.func <- cpt.var
    } else if (args$func == "meanvar") {
        cpt.func <- cpt.meanvar
    }

    if (data$original$n_dim > 1) {
        # changepoint package can't handle multidimensional data
        exit.error.multidim(data$original, args, params)
    }

    vec <- as.vector(data$mat)
    start.time <- Sys.time()

    # call the appropriate function with the specified parameters
    result <- tryCatch({
        locs <- cpt.func(vec,
                 penalty=params$penalty,
                 pen.value=params$pen.value,
                 method=params$method,
                 test.stat=params$test_statistic,
                 Q=params$Q,
                 class=FALSE
        )
        list(locations=locs, error=NULL)
    }, error=function(e) {
        return(list(locations=NULL, error=e$message))
    })
    stop.time <- Sys.time()
    runtime <- difftime(stop.time, start.time, units="secs")

    if (!is.null(result$error)) {
        exit.with.error(data$original, args, params, result$error)
    }

    # convert indices to 0-based indices.
    if (params$method == 'AMOC') {
        locations <- c(result$locations[1]) - 1
        names(locations) <- NULL
        locations <- as.list(locations)
    } else {
        if (is.list(result$locations)) {
            result$locations <- result$locations$cpts
        }
        locations <- as.list(result$locations - 1)
    }

    exit.success(data$original, args, params, locations, runtime)
}

load.utils()
main()