R/path.sparsestep.R


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234

#' @title Approximate path algorithm for the SparseStep model
#'
#' @description Fits the entire regularization path for SparseStep using a 
#' Golden Section search. Note that this algorithm is approximate, there is no 
#' guarantee that the solutions _between_ induced values of lambdas do not 
#' differ from those calculated. For instance, if solutions are calculated at 
#' \eqn{\lambda_{i}}{\lambda[i]} and \eqn{\lambda_{i+1}}{\lambda[i+1]}, this 
#' algorithm ensures that \eqn{\lambda_{i+1}}{\lambda[i+1]} has one more zero 
#' than the solution at \eqn{\lambda_{i}}{\lambda[i]} (provided the recursion 
#' depth is large enough). There is however no guarantee that there are no 
#' different solutions between \eqn{\lambda_{i}}{\lambda[i]} and 
#' \eqn{\lambda_{i+1}}{\lambda[i+1]}.  This is an ongoing research topic.
#'
#' Note that this path algorithm is not faster than running the 
#' \code{sparsestep} function with the same \eqn{\lambda} sequence.
#'
#' @param x matrix of predictors
#' @param y response
#' @param max.depth maximum recursion depth
#' @param gamma0 starting value of the gamma parameter
#' @param gammastop stopping value of the gamma parameter
#' @param IMsteps number of steps of the majorization algorithm to perform for 
#' each value of gamma
#' @param gammastep factor to decrease gamma with at each step
#' @param normalize if TRUE, each variable is standardized to have unit L2 
#' norm, otherwise it is left alone.
#' @param intercept if TRUE, an intercept is included in the model (and not 
#' penalized), otherwise no intercept is included
#' @param force.zero if TRUE, absolute coefficients smaller than the provided 
#' threshold value are set to absolute zero as a post-processing step, 
#' otherwise no thresholding is performed
#' @param threshold threshold value to use for setting coefficients to 
#' absolute zero
#' @param XX The X'X matrix; useful for repeated runs where X'X stays the same
#' @param Xy The X'y matrix; useful for repeated runs where X'y stays the same
#' @param use.XX whether or not to compute X'X and return it
#' @param use.Xy whether or not to compute X'y and return it
#' @param quiet don't print search info while running
#'
#' @return A "sparsestep" S3 object is returned, for which print, predict, 
#' coef, and plot methods exist. It has the following items:
#' \item{call}{The call that was used to construct the model.}
#' \item{lambda}{The value(s) of lambda used to construct the model.}
#' \item{gamma0}{The gamma0 value of the model.}
#' \item{gammastop}{The gammastop value of the model}
#' \item{IMsteps}{The IMsteps value of the model}
#' \item{gammastep}{The gammastep value of the model}
#' \item{intercept}{Boolean indicating if an intercept was fitted in the 
#' model}
#' \item{force.zero}{Boolean indicating if a force zero-setting was 
#' performed.}
#' \item{threshold}{The threshold used for a forced zero-setting}
#' \item{beta}{The resulting coefficients stored in a sparse matrix format 
#' (dgCMatrix). This matrix has dimensions nvar x nlambda}
#' \item{a0}{The intercept vector for each value of gamma of length nlambda}
#' \item{normx}{Vector used to normalize the columns of x}
#' \item{meanx}{Vector of column means of x}
#' \item{XX}{The matrix X'X if use.XX was set to TRUE}
#' \item{Xy}{The matrix X'y if use.Xy was set to TRUE}
#'
#' @export
#'
#' @author
#' Gerrit J.J. van den Burg, Patrick J.F. Groenen, Andreas Alfons\cr
#' Maintainer: Gerrit J.J. van den Burg <gertjanvandenburg@gmail.com>
#'
#' @references
#' Van den Burg, G.J.J., Groenen, P.J.F. and Alfons, A. (2017).
#'  \emph{SparseStep: Approximating the Counting Norm for Sparse Regularization},
#'  arXiv preprint arXiv:1701.06967 [stat.ME]. 
#'  URL \url{https://arxiv.org/abs/1701.06967}.
#'
#' @seealso
#' \code{\link{coef}}, \code{\link{print}}, \code{\link{predict}}, 
#' \code{\link{plot}}, and \code{\link{sparsestep}}.
#'
#' @examples
#' x <- matrix(rnorm(100*20), 100, 20)
#' y <- rnorm(100)
#' pth <- path.sparsestep(x, y)
#'
path.sparsestep <- function(x, y, max.depth=10, gamma0=1e3, gammastop=1e-4,
			    IMsteps=2, gammastep=2.0, normalize=TRUE,
			    intercept=TRUE, force.zero=TRUE, threshold=1e-7,
			    XX=NULL, Xy=NULL, use.XX=TRUE, use.Xy=TRUE, quiet=FALSE)
{
	call <- match.call()

	prep <- preprocess(x, y, normalize, intercept, XX, Xy, use.XX, use.Xy)
	nvars <- prep$nvars;
	XX <- prep$XX;
	Xy <- prep$Xy;

	iter <- 0

	# First find the smallest lambda for which all coefficients are zero
	lambda.max <- 2^25
	beta <- NULL
	while (1) {
		last.beta <- beta
		beta <- run.sparsestep(prep$x, prep$y, XX, Xy, nvars,
				       lambda.max, gamma0, gammastep,
				       gammastop, IMsteps, force.zero,
				       threshold)
		iter <- iter + 1
		if (all(beta == 0)) {
			lambda.max <- lambda.max / 2
		} else {
			lambda.max <- lambda.max * 2
			break
		}
	}

    if (!quiet) {
        cat("Found maximum value of lambda: 2^(", log(lambda.max)/log(2), ")\n")
    }

	iter <- iter + 1
	if (is.null(last.beta)) {
		betas.max <- beta
	} else {
		betas.max <- last.beta
	}

	# Now find the largest lambda for which no coefficients are zero
	lambda.min <- 2^-12
	beta <- NULL
	while (1) {
		last.beta <- beta
		beta <- run.sparsestep(prep$x, prep$y, XX, Xy, nvars,
				       lambda.min, gamma0, gammastep,
				       gammastop, IMsteps, force.zero,
				       threshold)
		iter <- iter + 1
		if (all(beta != 0)) {
			lambda.min <- lambda.min * 2
		} else {
			lambda.min <- lambda.min / 2
			break
		}
	}
    if (!quiet) {
        cat("Found minimum value of lambda: 2^(", log(lambda.min)/log(2), ")\n")
    }
	iter <- iter + 1
	if (is.null(last.beta)) {
		betas.min <- beta
	} else {
		betas.min <- last.beta
	}

	# Run binary section search
	have.zeros <- as.vector(matrix(FALSE, 1, nvars+1))
	have.zeros[1] <- TRUE
	have.zeros[nvars+1] <- TRUE

	left <- log(lambda.min)/log(2)
	right <- log(lambda.max)/log(2)

	l <- lambda.search(prep$x, prep$y, 0, max.depth, have.zeros, left,
			   right, 1, nvars+1, XX, Xy, gamma0, gammastep,
			   gammastop, IMsteps, force.zero, threshold, quiet)
	have.zeros <- have.zeros | l$have.zeros
	lambdas <- c(lambda.min, l$lambdas, lambda.max)
	betas <- t(cbind(betas.min, l$betas, betas.max))
	iter <- iter + l$iter

	ord <- order(lambdas)
	lambdas <- lambdas[ord]
	betas <- betas[ord, ]

	post <- postprocess(betas, prep$a0, prep$x, prep$normx, nvars,
			    length(lambdas))

	object <- list(call = call, lambda = lambdas, gamma0 = gamma0,
		       gammastop = gammastop, IMsteps = IMsteps,
		       gammastep = gammastep, intercept = intercept,
		       force.zero = force.zero, threshold = threshold,
		       beta = post$beta, a0 = post$a0, normx = prep$normx,
		       meanx = prep$meanx, XX = if(use.XX) XX else NULL,
		       Xy = if(use.Xy) Xy else NULL)
	class(object) <- "sparsestep"
	return(object)
}

lambda.search <- function(x, y, depth, max.depth, have.zeros, left, right,
			  lidx, ridx, XX, Xy, gamma0, gammastep, gammastop,
			   IMsteps, force.zero, threshold, quiet)
{
    if (!quiet) {
        cat("Running search in interval [", left, ",", right, "] ... \n")
    }
    nvars <- dim(XX)[1]

	betas <- NULL
	lambdas <- NULL

	middle <- left + (right - left)/2
	lambda <- 2^middle
	beta <- run.sparsestep(x, y, XX, Xy, nvars, lambda, gamma0, gammastep,
			       gammastop, IMsteps, force.zero, threshold)
	iter <- 1

	num.zero <- length(which(beta == 0))
	cidx <- num.zero + 1
	if (have.zeros[cidx] == FALSE) {
		have.zeros[cidx] = TRUE
		betas <- cbind(betas, beta)
		lambdas <- c(lambdas, lambda)
	}

	idx <- rbind(c(lidx, cidx), c(cidx, ridx))
	bnd <- rbind(c(left, middle), c(middle, right))
	for (r in 1:2) {
		i1 <- idx[r, 1]
		i2 <- idx[r, 2]
		b1 <- bnd[r, 1]
		b2 <- bnd[r, 2]
		if (depth < max.depth && any(have.zeros[i1:i2] == F)) {
			ds <- lambda.search(x, y, depth+1, max.depth,
					    have.zeros, b1, b2, i1, i2, XX,
					    Xy, gamma0, gammastep, gammastop,
					    IMsteps, force.zero, threshold, quiet)
			have.zeros <- have.zeros | ds$have.zeros
			lambdas <- c(lambdas, ds$lambdas)
			betas <- cbind(betas, ds$betas)
			iter <- iter + ds$iter
		}
	}

	out <- list(have.zeros=have.zeros, lambdas=lambdas, betas=betas,
		    iter=iter)
	return(out)
}