aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGertjan van den Burg <gertjanvandenburg@gmail.com>2016-02-10 15:27:35 -0500
committerGertjan van den Burg <gertjanvandenburg@gmail.com>2016-02-10 15:27:35 -0500
commit904e559845f48f38c7eb928d65876e3a064a17c7 (patch)
tree1d4153ad46cfe94bda2bd5dc3105acf67b2dc994
parentredefine gamma to correspond to theory (diff)
downloadsparsestep-904e559845f48f38c7eb928d65876e3a064a17c7.tar.gz
sparsestep-904e559845f48f38c7eb928d65876e3a064a17c7.zip
add documentation, unify implementations of path and fit
-rw-r--r--NAMESPACE3
-rw-r--r--R/cd.sparsestep.R73
-rw-r--r--R/coef.sparsestep.R30
-rw-r--r--R/fit.sparsestep.R149
-rw-r--r--R/path.sparsestep.R189
-rw-r--r--R/plot.sparsestep.R30
-rw-r--r--R/postprocess.R21
-rw-r--r--R/predict.sparsestep.R27
-rw-r--r--R/preprocess.R38
-rw-r--r--R/print.sparsestep.R6
-rw-r--r--R/run.sparsestep.R52
-rw-r--r--man/print.sparsestep.Rd1
-rw-r--r--man/sparsestep.Rd45
-rw-r--r--man/sparsestep.path.Rd81
14 files changed, 485 insertions, 260 deletions
diff --git a/NAMESPACE b/NAMESPACE
index 0e14978..c3f5715 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,5 +1,8 @@
# Generated by roxygen2 (4.1.1): do not edit by hand
+S3method(coef,sparsestep)
+S3method(plot,sparsestep)
+S3method(predict,sparsestep)
S3method(print,sparsestep)
export(sparsestep)
export(sparsestep.cd)
diff --git a/R/cd.sparsestep.R b/R/cd.sparsestep.R
deleted file mode 100644
index 29dbec8..0000000
--- a/R/cd.sparsestep.R
+++ /dev/null
@@ -1,73 +0,0 @@
-#' @title Coordinate descent algorithm for SparseStep
-#'
-#'
-#' @export
-#'
-sparsestep.cd <- function(x, y, lambdas=NULL, epsilon=1e-5, intercept=TRUE,
- ...)
-{
- nm <- dim(X)
- n <- nm[1]
- m <- nm[2]
- one <- rep(1, n)
-
- if (intercept) {
- meanx <- drop(one %*% x)/n
- x <- scale(x, meanx, FALSE)
- mu <- mean(y)
- y <- drop(y - mu)
- } else {
- meanx <- rep(0, m)
- mu <- 0
- y <- drop(y)
- }
-
- XX <- t(x) %*% x
- Xy <- t(x) %*% y
-
- #gammas <- 2^(seq(log(1e6)/log(2), log(1e-8)/log(2)))
-
- num.lambdas <- length(lambdas)
- #num.gammas <- length(gammas)
-
- #betas <- array(0, dim=c(num.gammas, num.lambdas, m))
- betas <- array(0, dim=c(num.lambdas, m))
- for (l in num.lambdas:1) {
- lambda <- lambdas[l]
- # initialize beta
- if (l == num.lambdas) {
- beta <- as.vector(matrix(0, 1, m))
- } else {
- #beta <- betas[num.gammas, l+1, ]
- beta <- betas[l+1, ]
- }
-
- j <- 1
- last.beta <- as.vector(matrix(0, 1, m))
- while (TRUE) {
- # code
- b <- -2 * x[, j] %*% (y - x[, -j] %*% last.beta[-j])
- a <- x[, j] %*% x[, j]
- if (abs(last.beta[j]) > epsilon) {
- beta[j] <- b/a
- } else {
- beta[j] <- (2*b - lambda*sign(last.beta[j]))/
- (2*a)
- }
- # check convergence
- if (sum(abs(beta - last.beta)) < 1e-10) {
- break
- } else {
- last.beta <- beta
- }
- # continue
- j <- j %% m + 1
- }
- betas[l, ] <- beta
- }
-
- return(betas)
-}
-
-
-
diff --git a/R/coef.sparsestep.R b/R/coef.sparsestep.R
new file mode 100644
index 0000000..3a6286c
--- /dev/null
+++ b/R/coef.sparsestep.R
@@ -0,0 +1,30 @@
+#' @title Get the coefficients of a fitted SparseStep model
+#'
+#' @description Returns the coefficients of the SparseStep model.
+#'
+#' @param obj a "sparsestep" object
+#'
+#' @return The coefficients of the SparseStep model (i.e. the betas). If the
+#' model was fitted with an intercept this will be the first value in the
+#' resulting vector.
+#'
+#' @export
+#' @aliases coef
+#'
+#' @examples
+#' x <- matrix(rnorm(100*20), 100, 20)
+#' y <- rnorm(100)
+#' fit <- sparsestep(x, y)
+#' coef(fit)
+#'
+coef.sparsestep <- function(obj, ...)
+{
+ if (obj$intercept) {
+ beta <- rbind(obj$a0, obj$beta)
+ } else {
+ beta <- obj$beta
+ }
+ nbeta <- drop0(Matrix(as.matrix(beta)))
+
+ return(nbeta)
+}
diff --git a/R/fit.sparsestep.R b/R/fit.sparsestep.R
index aabf6f6..020f29b 100644
--- a/R/fit.sparsestep.R
+++ b/R/fit.sparsestep.R
@@ -25,115 +25,68 @@
#' @param use.XX whether or not to compute X'X and return it
#' @param use.Xy whether or not to compute X'y and return it
#'
-#' @return A "sparsestep" object is returned, for which predict, coef, methods
-#' exist.
+#' @return A "sparsestep" S3 object is returned, for which print, predict,
+#' coef, and plot methods exist. It has the following items:
+#' \item{call}{The call that was used to construct the model.}
+#' \item{lambda}{The value(s) of lambda used to construct the model.}
+#' \item{gamma0}{The gamma0 value of the model.}
+#' \item{gammastop}{The gammastop value of the model}
+#' \item{IMsteps}{The IMsteps value of the model}
+#' \item{gammastep}{The gammastep value of the model}
+#' \item{intercept}{Boolean indicating if an intercept was fitted in the
+#' model}
+#' \item{force.zero}{Boolean indicating if a force zero-setting was
+#' performed.}
+#' \item{threshold}{The threshold used for a forced zero-setting}
+#' \item{beta}{The resulting coefficients stored in a sparse matrix format
+#' (dgCMatrix). This matrix has dimensions nvar x nlambda}
+#' \item{a0}{The intercept vector for each value of gamma of length nlambda}
+#' \item{normx}{Vector used to normalize the columns of x}
+#' \item{meanx}{Vector of column means of x}
+#' \item{XX}{The matrix X'X if use.XX was set to TRUE}
+#' \item{Xy}{The matrix X'y if use.Xy was set to TRUE}
+#'
+#' @author
+#' Gertjan van den Burg (author and maintainer).
+#'
+#' @seealso
+#' \code{\link{coef}}, \code{\link{print}}, \code{\link{predict}},
+#' \code{\link{plot}}, and \code{\link{sparsestep.path}}.
#'
#' @export
#'
#' @examples
-#' data(diabetes)
-#' attach(diabetes)
-#' object <- sparsestep(x, y)
-#' plot(object)
-#' detach(diabetes)
+#' x <- matrix(rnorm(100*20), 100, 20)
+#' y <- rnorm(100)
+#' fit <- sparsestep(x, y)
#'
-sparsestep <- function(x, y, lambda=1.0, gamma0=1e6,
- gammastop=1e-8, IMsteps=2, gammastep=2.0, normalize=TRUE,
- intercept=TRUE, force.zero=TRUE, threshold=1e-7,
- XX=NULL, Xy=NULL, use.XX = TRUE, use.Xy = TRUE)
+sparsestep <- function(x, y, lambda=c(0.1, 0.5, 1.0, 5, 10), gamma0=1e3,
+ gammastop=1e-4, IMsteps=2, gammastep=2.0,
+ normalize=TRUE, intercept=TRUE, force.zero=TRUE,
+ threshold=1e-7, XX=NULL, Xy=NULL, use.XX = TRUE,
+ use.Xy = TRUE)
{
call <- match.call()
+ prep <- preprocess(x, y, normalize, intercept, XX, Xy, use.XX, use.Xy)
- nm <- dim(x)
- n <- nm[1]
- m <- nm[2]
- one <- rep(1, n)
-
- if (intercept) {
- meanx <- drop(one %*% x)/n
- x <- scale(x, meanx, FALSE)
- mu <- mean(y)
- y <- drop(y - mu)
- } else {
- meanx <- rep(0, m)
- mu <- 0
- y <- drop(y)
+ betas <- NULL
+ for (lmd in lambda) {
+ beta <- run.sparsestep(prep$XX, prep$Xy, prep$nvars, lmd,
+ gamma0, gammastep, gammastop, IMsteps,
+ force.zero, threshold)
+ betas <- cbind(beta, betas)
}
- if (normalize) {
- normx <- sqrt(drop(one %*% (x^2)))
- names(normx) <- NULL
- x <- scale(x, FALSE, normx)
- cat("Normalizing in sparsestep\n")
- } else {
- normx <- rep(1, m)
- }
+ post <- postprocess(t(betas), prep$a0, x, prep$normx, prep$nvars,
+ length(lambda))
- if (use.XX & is.null(XX)) {
- XX <- t(x) %*% x
- }
- if (use.Xy & is.null(Xy)) {
- Xy <- t(x) %*% y
- }
-
- # Start solving SparseStep
- gamma <- gamma0
- beta <- matrix(0.0, m, 1)
-
- it <- 0
- while (gamma > gammastop)
- {
- for (i in 1:IMsteps) {
- alpha <- beta
- omega <- gamma^2/(alpha^2 + gamma^2)^2
- Omega <- diag(as.vector(omega), m, m)
- beta <- solve(XX + lambda * Omega, Xy)
- }
- it <- it + 1
- gamma <- gamma / gammastep
- }
-
- # perform IM until convergence
- epsilon <- 1e-14
- loss <- get.loss(x, y, gamma, beta, lambda)
- lbar <- (1.0 + 2.0 * epsilon) * loss
- while ((lbar - loss)/loss > epsilon)
- {
- alpha <- beta
- omega <- gamma^2/(alpha^2 + gamma^2)^2
- Omega <- diag(as.vector(omega), m, m)
- beta <- solve(XX + lambda * Omega, Xy)
- lbar <- loss
- loss <- get.loss(x, y, gamma, beta, lambda)
- }
-
- # postprocessing
- if (force.zero) {
- beta[which(abs(beta) < threshold)] <- 0
- }
- residuals <- y - x %*% beta
- beta <- scale(t(beta), FALSE, normx)
- RSS <- apply(residuals^2, 2, sum)
- R2 <- 1 - RSS/RSS[1]
-
- object <- list(call = call, lambda = lambda, R2 = R2, RSS = RSS,
- gamma0 = gamma0, gammastop = gammastop,
- IMsteps = IMsteps, gammastep = gammastep,
- intercept = intercept, force.zero = force.zero,
- threshold = threshold, beta = beta, mu = mu,
- normx = normx, meanx = meanx,
- XX = if(use.XX) XX else NULL,
- Xy = if(use.Xy) Xy else NULL)
+ object <- list(call = call, lambda = lambda, gamma0 = gamma0,
+ gammastop = gammastop, IMsteps = IMsteps,
+ gammastep = gammastep, intercept = intercept,
+ force.zero = force.zero, threshold = threshold,
+ beta = post$beta, a0 = post$a0, normx = prep$normx,
+ meanx = prep$meanx, XX = if(use.XX) prep$XX else NULL,
+ Xy = if(use.Xy) prep$Xy else NULL)
class(object) <- "sparsestep"
return(object)
}
-
-get.loss <- function(x, y, gamma, beta, lambda)
-{
- Xb <- x %*% beta
- diff <- y - Xb
- b2 <- beta^2
- binv <- 1/(b2 + gamma^2)
- loss <- t(diff) %*% diff + lambda * t(b2) %*% binv
- return(loss)
-}
diff --git a/R/path.sparsestep.R b/R/path.sparsestep.R
index 40467b8..2081346 100644
--- a/R/path.sparsestep.R
+++ b/R/path.sparsestep.R
@@ -1,59 +1,100 @@
-#' @title Path algorithm for the SparseStep model
+#' @title Approximate path algorithm for the SparseStep model
#'
#' @description Fits the entire regularization path for SparseStep using a
-#' Golden Section search.
+#' Golden Section search. Note that this algorithm is approximate, there is no
+#' guarantee that the solutions _between_ induced values of lambdas do not
+#' differ from those calculated. For instance, if solutions are calculated at
+#' \eqn{\lambda_{i}}{\lambda[i]} and \eqn{\lambda_{i+1}}{\lambda[i+1]}, this
+#' algorithm ensures that \eqn{\lambda_{i+1}}{\lambda[i+1]} has one more zero
+#' than the solution at \eqn{\lambda_{i}}{\lambda[i]} (provided the recursion
+#' depth is large enough). There is however no guarantee that there are no
+#' different solutions between \eqn{\lambda_{i}}{\lambda[i]} and
+#' \eqn{\lambda_{i+1}}{\lambda[i+1]}. This is an ongoing research topic.
+#'
+#' Note that this path algorithm is not faster than running the
+#' \code{sparsestep} function with the same \eqn{\lambda} sequence.
#'
#' @param x matrix of predictors
#' @param y response
#' @param max.depth maximum recursion depth
-#' @param ... further arguments to sparsestep()
+#' @param gamma0 starting value of the gamma parameter
+#' @param gammastop stopping value of the gamma parameter
+#' @param IMsteps number of steps of the majorization algorithm to perform for
+#' each value of gamma
+#' @param gammastep factor to decrease gamma with at each step
+#' @param normalize if TRUE, each variable is standardized to have unit L2
+#' norm, otherwise it is left alone.
+#' @param intercept if TRUE, an intercept is included in the model (and not
+#' penalized), otherwise no intercept is included
+#' @param force.zero if TRUE, absolute coefficients smaller than the provided
+#' threshold value are set to absolute zero as a post-processing step,
+#' otherwise no thresholding is performed
+#' @param threshold threshold value to use for setting coefficients to
+#' absolute zero
+#' @param XX The X'X matrix; useful for repeated runs where X'X stays the same
+#' @param Xy The X'y matrix; useful for repeated runs where X'y stays the same
+#' @param use.XX whether or not to compute X'X and return it
+#' @param use.Xy whether or not to compute X'y and return it
+#'
+#' @return A "sparsestep" S3 object is returned, for which print, predict,
+#' coef, and plot methods exist. It has the following items:
+#' \item{call}{The call that was used to construct the model.}
+#' \item{lambda}{The value(s) of lambda used to construct the model.}
+#' \item{gamma0}{The gamma0 value of the model.}
+#' \item{gammastop}{The gammastop value of the model}
+#' \item{IMsteps}{The IMsteps value of the model}
+#' \item{gammastep}{The gammastep value of the model}
+#' \item{intercept}{Boolean indicating if an intercept was fitted in the
+#' model}
+#' \item{force.zero}{Boolean indicating if a force zero-setting was
+#' performed.}
+#' \item{threshold}{The threshold used for a forced zero-setting}
+#' \item{beta}{The resulting coefficients stored in a sparse matrix format
+#' (dgCMatrix). This matrix has dimensions nvar x nlambda}
+#' \item{a0}{The intercept vector for each value of gamma of length nlambda}
+#' \item{normx}{Vector used to normalize the columns of x}
+#' \item{meanx}{Vector of column means of x}
+#' \item{XX}{The matrix X'X if use.XX was set to TRUE}
+#' \item{Xy}{The matrix X'y if use.Xy was set to TRUE}
+#'
+#' @author
+#' Gertjan van den Burg (author and maintainer).
#'
#' @export
#'
-sparsestep.path <- function(x, y, max.depth=10, intercept=TRUE,
- normalize=TRUE, ...)
+#' @seealso
+#' \code{\link{coef}}, \code{\link{print}}, \code{\link{predict}},
+#' \code{\link{plot}}, and \code{\link{sparsestep.path}}.
+#'
+#' @examples
+#' x <- matrix(rnorm(100*20), 100, 20)
+#' y <- rnorm(100)
+#' pth <- path.sparsestep(x, y)
+#'
+path.sparsestep <- function(x, y, max.depth=10, gamma0=1e3, gammastop=1e-4,
+ IMsteps=2, gammastep=2.0, normalize=TRUE,
+ intercept=TRUE, force.zero=TRUE, threshold=1e-7,
+ XX=NULL, Xy=NULL, use.XX = TRUE, use.Xy = TRUE)
{
call <- match.call()
- nm <- dim(x)
- n <- nm[1]
- m <- nm[2]
- one <- rep(1, n)
-
- if (intercept) {
- meanx <- drop(one %*% x)/n
- x <- scale(x, meanx, FALSE)
- mu <- mean(y)
- y <- drop(y - mu)
- } else {
- meanx <- rep(0, m)
- mu <- 0
- y <- drop(y)
- }
-
- if (normalize) {
- normx <- sqrt(drop(one %*% (x^2)))
- names(normx) <- NULL
- x <- scale(x, FALSE, normx)
- cat("Normalizing in path.sparsestep\n")
- } else {
- normx <- rep(1, m)
- }
-
- XX <- t(x) %*% x
- Xy <- t(x) %*% y
+ prep <- preprocess(x, y, normalize, intercept, XX, Xy, use.XX, use.Xy)
+ nvars <- prep$nvars;
+ XX <- prep$XX;
+ Xy <- prep$Xy;
iter <- 0
# First find the smallest lambda for which all coefficients are zero
lambda.max <- 2^25
- fit <- NULL
+ beta <- NULL
while (1) {
- last.fit <- fit
- fit <- sparsestep(x, y, lambda=lambda.max, normalize=FALSE,
- XX=XX, Xy=Xy, ...)
+ last.beta <- beta
+ beta <- run.sparsestep(XX, Xy, nvars, lambda.max, gamma0,
+ gammastep, gammastop, IMsteps,
+ force.zero, threshold)
iter <- iter + 1
- if (all(fit$beta == 0)) {
+ if (all(beta == 0)) {
lambda.max <- lambda.max / 2
} else {
lambda.max <- lambda.max * 2
@@ -62,21 +103,22 @@ sparsestep.path <- function(x, y, max.depth=10, intercept=TRUE,
}
cat("Found maximum value of lambda: 2^(", log(lambda.max)/log(2), ")\n")
iter <- iter + 1
- if (is.null(last.fit)) {
- betas.max <- fit$beta
+ if (is.null(last.beta)) {
+ betas.max <- beta
} else {
- betas.max <- last.fit$beta
+ betas.max <- last.beta
}
# Now find the largest lambda for which no coefficients are zero
lambda.min <- 2^-12
- fit <- NULL
+ beta <- NULL
while (1) {
- last.fit <- fit
- fit <- sparsestep(x, y, lambda=lambda.min, normalize=FALSE,
- XX=XX, Xy=Xy, ...)
+ last.beta <- beta
+ beta <- run.sparsestep(XX, Xy, nvars, lambda.min, gamma0,
+ gammastep, gammastop, IMsteps,
+ force.zero, threshold)
iter <- iter + 1
- if (all(fit$beta != 0)) {
+ if (all(beta != 0)) {
lambda.min <- lambda.min * 2
} else {
lambda.min <- lambda.min / 2
@@ -85,57 +127,67 @@ sparsestep.path <- function(x, y, max.depth=10, intercept=TRUE,
}
cat("Found minimum value of lambda: 2^(", log(lambda.min)/log(2), ")\n")
iter <- iter + 1
- if (is.null(last.fit)) {
- betas.min <- fit$beta
+ if (is.null(last.beta)) {
+ betas.min <- beta
} else {
- betas.min <- last.fit$beta
+ betas.min <- last.beta
}
# Run binary section search
- have.zeros <- as.vector(matrix(FALSE, 1, m+1))
+ have.zeros <- as.vector(matrix(FALSE, 1, nvars+1))
have.zeros[1] <- TRUE
- have.zeros[m+1] <- TRUE
+ have.zeros[nvars+1] <- TRUE
left <- log(lambda.min)/log(2)
right <- log(lambda.max)/log(2)
- l <- lambda.search(x, y, 0, max.depth, have.zeros, left, right, 1,
- m+1, XX, Xy, ...)
+ l <- lambda.search(0, max.depth, have.zeros, left, right, 1,
+ nvars+1, XX, Xy, gamma0, gammastep, gammastop,
+ IMsteps, force.zero, threshold)
have.zeros <- have.zeros | l$have.zeros
lambdas <- c(lambda.min, l$lambdas, lambda.max)
- betas <- rbind(betas.min, l$betas, betas.max)
+ betas <- t(cbind(betas.min, l$betas, betas.max))
iter <- iter + l$iter
ord <- order(lambdas)
lambdas <- lambdas[ord]
betas <- betas[ord, ]
- betas <- scale(betas, FALSE, normx)
- object <- list(call=call, lambdas=lambdas, betas=betas,
- iterations=iter)
+ post <- postprocess(betas, prep$a0, x, prep$normx, nvars,
+ length(lambdas))
+
+ object <- list(call = call, lambda = lambdas, gamma0 = gamma0,
+ gammastop = gammastop, IMsteps = IMsteps,
+ gammastep = gammastep, intercept = intercept,
+ force.zero = force.zero, threshold = threshold,
+ beta = post$beta, a0 = post$a0, normx = prep$normx,
+ meanx = prep$meanx, XX = if(use.XX) XX else NULL,
+ Xy = if(use.Xy) Xy else NULL)
+ class(object) <- "sparsestep"
+ return(object)
}
-lambda.search <- function(x, y, depth, max.depth, have.zeros, left, right,
- lidx, ridx, XX, Xy, ...)
+lambda.search <- function(depth, max.depth, have.zeros, left, right,
+ lidx, ridx, XX, Xy, gamma0, gammastep, gammastop,
+ IMsteps, force.zero, threshold)
{
cat("Running search in interval [", left, ",", right, "] ... \n")
- nm <- dim(x)
- m <- nm[2]
+ nvars <- dim(XX)[1]
betas <- NULL
lambdas <- NULL
middle <- left + (right - left)/2
lambda <- 2^middle
- fit <- sparsestep(x, y, lambda=lambda, normalize=FALSE, XX=XX, Xy=Xy,
- ...)
+ beta <- run.sparsestep(XX, Xy, nvars, lambda, gamma0, gammastep,
+ gammastop, IMsteps, force.zero, threshold)
iter <- 1
- num.zero <- length(which(fit$beta == 0))
+ num.zero <- length(which(beta == 0))
cidx <- num.zero + 1
if (have.zeros[cidx] == FALSE) {
have.zeros[cidx] = TRUE
- betas <- rbind(betas, as.vector(fit$beta))
+ betas <- cbind(betas, beta)
lambdas <- c(lambdas, lambda)
}
@@ -147,12 +199,13 @@ lambda.search <- function(x, y, depth, max.depth, have.zeros, left, right,
b1 <- bnd[r, 1]
b2 <- bnd[r, 2]
if (depth < max.depth && any(have.zeros[i1:i2] == F)) {
- ds <- lambda.search(x, y, depth+1, max.depth,
- have.zeros, b1, b2, i1, i2, XX, Xy,
- ...)
+ ds <- lambda.search(depth+1, max.depth, have.zeros,
+ b1, b2, i1, i2, XX, Xy, gamma0,
+ gammastep, gammastop, IMsteps,
+ force.zero, threshold)
have.zeros <- have.zeros | ds$have.zeros
lambdas <- c(lambdas, ds$lambdas)
- betas <- rbind(betas, ds$betas)
+ betas <- cbind(betas, ds$betas)
iter <- iter + ds$iter
}
}
diff --git a/R/plot.sparsestep.R b/R/plot.sparsestep.R
new file mode 100644
index 0000000..87ab661
--- /dev/null
+++ b/R/plot.sparsestep.R
@@ -0,0 +1,30 @@
+#' @title Plot the SparseStep path
+#'
+#' @description Plot the coefficients of the SparseStep path
+#'
+#' @param obj a \code{sparsestep} object
+#' @param type the plot type, default: "s".
+#' @param lty line type, default: 1
+#' @param ... further argument to matplot
+#'
+#' @author
+#' Gertjan van den Burg (author and maintainer).
+#'
+#' @export
+#' @aliases plot
+#'
+#' @examples
+#' data(diabetes)
+#' attach(diabetes)
+#' fit <- sparsestep(x, y)
+#' plot(fit)
+#' pth <- sparsestep.path(x, y)
+#' plot(pth)
+plot.sparsestep <- function(obj, type="s", lty=1, ...)
+{
+ index <- log(obj$lambda)
+ coefs <- t(as.matrix(obj$beta))
+
+ matplot(index, coefs, xlab="Log lambda", ylab="Coefficients",
+ type=type, lty=lty, ...)
+}
diff --git a/R/postprocess.R b/R/postprocess.R
new file mode 100644
index 0000000..f43bb05
--- /dev/null
+++ b/R/postprocess.R
@@ -0,0 +1,21 @@
+postprocess <- function(betas, a0, x, normx, nvars, nlambdas) {
+ betas <- scale(betas, FALSE, normx)
+
+ # add names
+ vnames <- colnames(x)
+ if (is.null(vnames)) { vnames <- paste("V", seq(nvars), sep="") }
+ stepnames <- paste("s", seq(nlambdas)-1, sep="")
+ colnames(betas) <- vnames
+ rownames(betas) <- stepnames
+
+ a0 <- t(as.matrix(rep(a0, nlambdas)))
+ colnames(a0) <- stepnames
+ rownames(a0) <- "Intercept"
+
+ # Make it a sparse matrix
+ beta <- drop0(Matrix(as.matrix(t(betas))))
+
+ out <- list(beta = beta, a0 = a0)
+
+ return(out)
+}
diff --git a/R/predict.sparsestep.R b/R/predict.sparsestep.R
index 5f8898e..8a4be1a 100644
--- a/R/predict.sparsestep.R
+++ b/R/predict.sparsestep.R
@@ -1,4 +1,27 @@
-predict.sparsestep <- function(object, newx)
+#' @title Make predictions from a SparseStep model
+#'
+#' @description Predicts the outcome variable for the SparseStep model for
+#' each value of lambda supplied to the model.
+#'
+#' @param object Fitted "sparsestep" object
+#' @param newx Matrix of new values for `x` at which predictions are to be made.
+#'
+#' @return a matrix of numerical predictions of size nobs x nlambda
+#'
+#' @export
+#' @aliases predict
+#'
+#' @examples
+#' x <- matrix(rnorm(100*20), 100, 20)
+#' y <- rnorm(100)
+#' fit <- sparsestep(x, y)
+#' yhat <- predict(fit)
+#'
+predict.sparsestep <- function(obj, newx)
{
- NULL
+ yhat <- newx %*% as.matrix(obj$beta)
+ if (obj$intercept) {
+ yhat <- yhat + rep(1, nrow(yhat)) %*% obj$a0
+ }
+ return(yhat)
}
diff --git a/R/preprocess.R b/R/preprocess.R
new file mode 100644
index 0000000..253394e
--- /dev/null
+++ b/R/preprocess.R
@@ -0,0 +1,38 @@
+preprocess <- function(x, y, normalize, intercept, XX, Xy, use.XX, use.Xy)
+{
+ nm <- dim(x)
+ nobs <- nm[1]
+ nvars <- nm[2]
+ one <- rep(1, nobs)
+
+ if (intercept) {
+ meanx <- drop(one %*% x)/nobs
+ x <- scale(x, meanx, FALSE)
+ a0 <- mean(y)
+ y <- drop(y - a0)
+ } else {
+ meanx <- rep(0, nvars)
+ a0 <- 0
+ y <- drop(y)
+ }
+
+ if (normalize) {
+ normx <- sqrt(drop(one %*% (x^2)))
+ names(normx) <- NULL
+ x <- scale(x, FALSE, normx)
+ cat("Normalizing in sparsestep\n")
+ } else {
+ normx <- rep(1, nvars)
+ }
+
+ if (use.XX & is.null(XX)) {
+ XX <- t(x) %*% x
+ }
+ if (use.Xy & is.null(Xy)) {
+ Xy <- t(x) %*% y
+ }
+
+ out <- list(nvars = nvars, normx = normx, a0 = a0,
+ XX = XX, Xy = Xy, meanx = meanx)
+ return(out)
+}
diff --git a/R/print.sparsestep.R b/R/print.sparsestep.R
index 1de1370..336d705 100644
--- a/R/print.sparsestep.R
+++ b/R/print.sparsestep.R
@@ -6,6 +6,8 @@
#'
#' @export
#'
+#' @aliases print
+#'
#' @examples
#' data(diabetes)
#' attach(diabetes)
@@ -17,8 +19,6 @@ print.sparsestep <- function(obj, ...)
{
cat("\nCall:\n")
dput(obj$call)
- cat("R-squared:", format(round(rev(obj$R2)[1], 3)), "\n")
- zeros <- length(which(obj$beta == 0))/length(obj$beta)*100.0
- cat("Percentage coeff zero:", format(round(zeros, 2)), "\n")
+ cat("Lambda:", obj$lambda, "\n")
invisible(obj)
}
diff --git a/R/run.sparsestep.R b/R/run.sparsestep.R
new file mode 100644
index 0000000..43017c9
--- /dev/null
+++ b/R/run.sparsestep.R
@@ -0,0 +1,52 @@
+# Core SparseStep routine. This could be implemented in a low-level language
+# in the future, if necessary.
+#
+run.sparsestep <- function(XX, Xy, nvars, lambda, gamma0, gammastep, gammastop,
+ IMsteps, force.zero, threshold) {
+ # Start solving SparseStep
+ gamma <- gamma0
+ beta <- matrix(0.0, nvars, 1)
+
+ it <- 0
+ while (gamma > gammastop)
+ {
+ for (i in 1:IMsteps) {
+ alpha <- beta
+ omega <- gamma^2/(alpha^2 + gamma^2)^2
+ Omega <- diag(as.vector(omega), nvars, nvars)
+ beta <- solve(XX + lambda * Omega, Xy)
+ }
+ it <- it + 1
+ gamma <- gamma / gammastep
+ }
+
+ # perform IM until convergence
+ epsilon <- 1e-14
+ loss <- get.loss(x, y, gamma, beta, lambda)
+ lbar <- (1.0 + 2.0 * epsilon) * loss
+ while ((lbar - loss)/loss > epsilon)
+ {
+ alpha <- beta
+ omega <- gamma^2/(alpha^2 + gamma^2)^2
+ Omega <- diag(as.vector(omega), nvars, nvars)
+ beta <- solve(XX + lambda * Omega, Xy)
+ lbar <- loss
+ loss <- get.loss(x, y, gamma, beta, lambda)
+ }
+
+ # postprocessing
+ if (force.zero) {
+ beta[which(abs(beta) < threshold)] <- 0
+ }
+ return(beta)
+}
+
+get.loss <- function(x, y, gamma, beta, lambda)
+{
+ Xb <- x %*% beta
+ diff <- y - Xb
+ b2 <- beta^2
+ binv <- 1/(b2 + gamma^2)
+ loss <- t(diff) %*% diff + lambda * t(b2) %*% binv
+ return(loss)
+}
diff --git a/man/print.sparsestep.Rd b/man/print.sparsestep.Rd
index 6680788..ec09981 100644
--- a/man/print.sparsestep.Rd
+++ b/man/print.sparsestep.Rd
@@ -1,6 +1,7 @@
% Generated by roxygen2 (4.1.1): do not edit by hand
% Please edit documentation in R/print.sparsestep.R
\name{print.sparsestep}
+\alias{print}
\alias{print.sparsestep}
\title{Print the fitted SparseStep model}
\usage{
diff --git a/man/sparsestep.Rd b/man/sparsestep.Rd
index 3d4a311..b6e836e 100644
--- a/man/sparsestep.Rd
+++ b/man/sparsestep.Rd
@@ -6,10 +6,10 @@
\alias{sparsestep-package}
\title{Fits the SparseStep model}
\usage{
-sparsestep(x, y, lambda = 1, gamma0 = 1e+06, gammastop = 1e-08,
- IMsteps = 2, gammastep = 2, normalize = TRUE, intercept = TRUE,
- force.zero = TRUE, threshold = 1e-07, XX = NULL, Xy = NULL,
- use.XX = TRUE, use.Xy = TRUE)
+sparsestep(x, y, lambda = c(0.1, 0.5, 1, 5, 10), gamma0 = 1000,
+ gammastop = 1e-04, IMsteps = 2, gammastep = 2, normalize = TRUE,
+ intercept = TRUE, force.zero = TRUE, threshold = 1e-07, XX = NULL,
+ Xy = NULL, use.XX = TRUE, use.Xy = TRUE)
}
\arguments{
\item{x}{matrix of predictors}
@@ -49,8 +49,26 @@ absolute zero}
\item{use.Xy}{whether or not to compute X'y and return it}
}
\value{
-A "sparsestep" object is returned, for which predict, coef, methods
-exist.
+A "sparsestep" S3 object is returned, for which predict, coef, and
+plot methods exist. It has the following items:
+\item{call}{The call that was used to construct the model.}
+\item{lambda}{The value(s) of lambda used to construct the model.}
+\item{gamma0}{The gamma0 value of the model.}
+\item{gammastop}{The gammastop value of the model}
+\item{IMsteps}{The IMsteps value of the model}
+\item{gammastep}{The gammastep value of the model}
+\item{intercept}{Boolean indicating if an intercept was fitted in the
+model}
+\item{force.zero}{Boolean indicating if a force zero-setting was
+performed.}
+\item{threshold}{The threshold used for a forced zero-setting}
+\item{beta}{The resulting coefficients stored in a sparse matrix format
+(dgCMatrix). This matrix has dimensions nvar x nlambda}
+\item{a0}{The intercept vector for each value of gamma of length nlambda}
+\item{normx}{Vector used to normalize the columns of x}
+\item{meanx}{Vector of column means of x}
+\item{XX}{The matrix X'X if use.XX was set to TRUE}
+\item{Xy}{The matrix X'y if use.Xy was set to TRUE}
}
\description{
Fits the SparseStep model for a single value of the
@@ -59,10 +77,15 @@ regularization parameter.
sparsestep.
}
\examples{
-data(diabetes)
-attach(diabetes)
-object <- sparsestep(x, y)
-plot(object)
-detach(diabetes)
+x <- matrix(rnorm(100*20), 100, 20)
+y <- rnorm(100)
+fit <- sparsestep(x, y)
+}
+\author{
+Gertjan van den Burg (author and maintainer).
+}
+\seealso{
+\code{\link{coef}}, \code{\link{print}}, \code{\link{predict}},
+\code{\link{plot}}, and \code{\link{sparsestep.path}}.
}
diff --git a/man/sparsestep.path.Rd b/man/sparsestep.path.Rd
index 43625d5..c038dd9 100644
--- a/man/sparsestep.path.Rd
+++ b/man/sparsestep.path.Rd
@@ -2,10 +2,12 @@
% Please edit documentation in R/path.sparsestep.R
\name{sparsestep.path}
\alias{sparsestep.path}
-\title{Path algorithm for the SparseStep model}
+\title{Approximate path algorithm for the SparseStep model}
\usage{
-sparsestep.path(x, y, max.depth = 10, intercept = TRUE, normalize = TRUE,
- ...)
+sparsestep.path(x, y, max.depth = 10, gamma0 = 1000, gammastop = 1e-04,
+ IMsteps = 2, gammastep = 2, normalize = TRUE, intercept = TRUE,
+ force.zero = TRUE, threshold = 1e-07, XX = NULL, Xy = NULL,
+ use.XX = TRUE, use.Xy = TRUE)
}
\arguments{
\item{x}{matrix of predictors}
@@ -14,10 +16,79 @@ sparsestep.path(x, y, max.depth = 10, intercept = TRUE, normalize = TRUE,
\item{max.depth}{maximum recursion depth}
-\item{...}{further arguments to sparsestep()}
+\item{gamma0}{starting value of the gamma parameter}
+
+\item{gammastop}{stopping value of the gamma parameter}
+
+\item{IMsteps}{number of steps of the majorization algorithm to perform for
+each value of gamma}
+
+\item{gammastep}{factor to decrease gamma with at each step}
+
+\item{normalize}{if TRUE, each variable is standardized to have unit L2
+norm, otherwise it is left alone.}
+
+\item{intercept}{if TRUE, an intercept is included in the model (and not
+penalized), otherwise no intercept is included}
+
+\item{force.zero}{if TRUE, absolute coefficients smaller than the provided
+threshold value are set to absolute zero as a post-processing step,
+otherwise no thresholding is performed}
+
+\item{threshold}{threshold value to use for setting coefficients to
+absolute zero}
+
+\item{XX}{The X'X matrix; useful for repeated runs where X'X stays the same}
+
+\item{Xy}{The X'y matrix; useful for repeated runs where X'y stays the same}
+
+\item{use.XX}{whether or not to compute X'X and return it}
+
+\item{use.Xy}{whether or not to compute X'y and return it}
+}
+\value{
+A "sparsestep" S3 object is returned, for which print, predict,
+coef, and plot methods exist. It has the following items:
+\item{call}{The call that was used to construct the model.}
+\item{lambda}{The value(s) of lambda used to construct the model.}
+\item{gamma0}{The gamma0 value of the model.}
+\item{gammastop}{The gammastop value of the model}
+\item{IMsteps}{The IMsteps value of the model}
+\item{gammastep}{The gammastep value of the model}
+\item{intercept}{Boolean indicating if an intercept was fitted in the
+model}
+\item{force.zero}{Boolean indicating if a force zero-setting was
+performed.}
+\item{threshold}{The threshold used for a forced zero-setting}
+\item{beta}{The resulting coefficients stored in a sparse matrix format
+(dgCMatrix). This matrix has dimensions nvar x nlambda}
+\item{a0}{The intercept vector for each value of gamma of length nlambda}
+\item{normx}{Vector used to normalize the columns of x}
+\item{meanx}{Vector of column means of x}
+\item{XX}{The matrix X'X if use.XX was set to TRUE}
+\item{Xy}{The matrix X'y if use.Xy was set to TRUE}
}
\description{
Fits the entire regularization path for SparseStep using a
-Golden Section search.
+Golden Section search. Note that this algorithm is approximate, there is no
+guarantee that the solutions _between_ induced values of lambdas do not
+differ from those calculated. For instance, if solutions are calculated at
+\eqn{\lambda_{i}}{\lambda[i]} and \eqn{\lambda_{i+1}}{\lambda[i+1]}, this
+algorithm ensures that \eqn{\lambda_{i+1}}{\lambda[i+1]} has one more zero
+than the solution at \eqn{\lambda_{i}}{\lambda[i]} (provided the recursion
+depth is large enough). There is however no guarantee that there are no
+different solutions between \eqn{\lambda_{i}}{\lambda[i]} and
+\eqn{\lambda_{i+1}}{\lambda[i+1]}. This is an ongoing research topic.
+
+Note that this path algorithm is not faster than running the
+\code{sparsestep} function with the same \code{\lambda} sequence.
+}
+\examples{
+x <- matrix(rnorm(100*20), 100, 20)
+y <- rnorm(100)
+pth <- sparsestep.path(x, y)
+}
+\author{
+Gertjan van den Burg (author and maintainer).
}