diff options
| author | Gertjan van den Burg <gertjanvandenburg@gmail.com> | 2016-02-10 15:27:35 -0500 |
|---|---|---|
| committer | Gertjan van den Burg <gertjanvandenburg@gmail.com> | 2016-02-10 15:27:35 -0500 |
| commit | 904e559845f48f38c7eb928d65876e3a064a17c7 (patch) | |
| tree | 1d4153ad46cfe94bda2bd5dc3105acf67b2dc994 | |
| parent | redefine gamma to correspond to theory (diff) | |
| download | sparsestep-904e559845f48f38c7eb928d65876e3a064a17c7.tar.gz sparsestep-904e559845f48f38c7eb928d65876e3a064a17c7.zip | |
add documentation, unify implementations of path and fit
| -rw-r--r-- | NAMESPACE | 3 | ||||
| -rw-r--r-- | R/cd.sparsestep.R | 73 | ||||
| -rw-r--r-- | R/coef.sparsestep.R | 30 | ||||
| -rw-r--r-- | R/fit.sparsestep.R | 149 | ||||
| -rw-r--r-- | R/path.sparsestep.R | 189 | ||||
| -rw-r--r-- | R/plot.sparsestep.R | 30 | ||||
| -rw-r--r-- | R/postprocess.R | 21 | ||||
| -rw-r--r-- | R/predict.sparsestep.R | 27 | ||||
| -rw-r--r-- | R/preprocess.R | 38 | ||||
| -rw-r--r-- | R/print.sparsestep.R | 6 | ||||
| -rw-r--r-- | R/run.sparsestep.R | 52 | ||||
| -rw-r--r-- | man/print.sparsestep.Rd | 1 | ||||
| -rw-r--r-- | man/sparsestep.Rd | 45 | ||||
| -rw-r--r-- | man/sparsestep.path.Rd | 81 |
14 files changed, 485 insertions, 260 deletions
@@ -1,5 +1,8 @@ # Generated by roxygen2 (4.1.1): do not edit by hand +S3method(coef,sparsestep) +S3method(plot,sparsestep) +S3method(predict,sparsestep) S3method(print,sparsestep) export(sparsestep) export(sparsestep.cd) diff --git a/R/cd.sparsestep.R b/R/cd.sparsestep.R deleted file mode 100644 index 29dbec8..0000000 --- a/R/cd.sparsestep.R +++ /dev/null @@ -1,73 +0,0 @@ -#' @title Coordinate descent algorithm for SparseStep -#' -#' -#' @export -#' -sparsestep.cd <- function(x, y, lambdas=NULL, epsilon=1e-5, intercept=TRUE, - ...) -{ - nm <- dim(X) - n <- nm[1] - m <- nm[2] - one <- rep(1, n) - - if (intercept) { - meanx <- drop(one %*% x)/n - x <- scale(x, meanx, FALSE) - mu <- mean(y) - y <- drop(y - mu) - } else { - meanx <- rep(0, m) - mu <- 0 - y <- drop(y) - } - - XX <- t(x) %*% x - Xy <- t(x) %*% y - - #gammas <- 2^(seq(log(1e6)/log(2), log(1e-8)/log(2))) - - num.lambdas <- length(lambdas) - #num.gammas <- length(gammas) - - #betas <- array(0, dim=c(num.gammas, num.lambdas, m)) - betas <- array(0, dim=c(num.lambdas, m)) - for (l in num.lambdas:1) { - lambda <- lambdas[l] - # initialize beta - if (l == num.lambdas) { - beta <- as.vector(matrix(0, 1, m)) - } else { - #beta <- betas[num.gammas, l+1, ] - beta <- betas[l+1, ] - } - - j <- 1 - last.beta <- as.vector(matrix(0, 1, m)) - while (TRUE) { - # code - b <- -2 * x[, j] %*% (y - x[, -j] %*% last.beta[-j]) - a <- x[, j] %*% x[, j] - if (abs(last.beta[j]) > epsilon) { - beta[j] <- b/a - } else { - beta[j] <- (2*b - lambda*sign(last.beta[j]))/ - (2*a) - } - # check convergence - if (sum(abs(beta - last.beta)) < 1e-10) { - break - } else { - last.beta <- beta - } - # continue - j <- j %% m + 1 - } - betas[l, ] <- beta - } - - return(betas) -} - - - diff --git a/R/coef.sparsestep.R b/R/coef.sparsestep.R new file mode 100644 index 0000000..3a6286c --- /dev/null +++ b/R/coef.sparsestep.R @@ -0,0 +1,30 @@ +#' @title Get the coefficients of a fitted SparseStep model +#' +#' @description Returns the coefficients of the SparseStep model. +#' +#' @param obj a "sparsestep" object +#' +#' @return The coefficients of the SparseStep model (i.e. the betas). If the +#' model was fitted with an intercept this will be the first value in the +#' resulting vector. +#' +#' @export +#' @aliases coef +#' +#' @examples +#' x <- matrix(rnorm(100*20), 100, 20) +#' y <- rnorm(100) +#' fit <- sparsestep(x, y) +#' coef(fit) +#' +coef.sparsestep <- function(obj, ...) +{ + if (obj$intercept) { + beta <- rbind(obj$a0, obj$beta) + } else { + beta <- obj$beta + } + nbeta <- drop0(Matrix(as.matrix(beta))) + + return(nbeta) +} diff --git a/R/fit.sparsestep.R b/R/fit.sparsestep.R index aabf6f6..020f29b 100644 --- a/R/fit.sparsestep.R +++ b/R/fit.sparsestep.R @@ -25,115 +25,68 @@ #' @param use.XX whether or not to compute X'X and return it #' @param use.Xy whether or not to compute X'y and return it #' -#' @return A "sparsestep" object is returned, for which predict, coef, methods -#' exist. +#' @return A "sparsestep" S3 object is returned, for which print, predict, +#' coef, and plot methods exist. It has the following items: +#' \item{call}{The call that was used to construct the model.} +#' \item{lambda}{The value(s) of lambda used to construct the model.} +#' \item{gamma0}{The gamma0 value of the model.} +#' \item{gammastop}{The gammastop value of the model} +#' \item{IMsteps}{The IMsteps value of the model} +#' \item{gammastep}{The gammastep value of the model} +#' \item{intercept}{Boolean indicating if an intercept was fitted in the +#' model} +#' \item{force.zero}{Boolean indicating if a force zero-setting was +#' performed.} +#' \item{threshold}{The threshold used for a forced zero-setting} +#' \item{beta}{The resulting coefficients stored in a sparse matrix format +#' (dgCMatrix). This matrix has dimensions nvar x nlambda} +#' \item{a0}{The intercept vector for each value of gamma of length nlambda} +#' \item{normx}{Vector used to normalize the columns of x} +#' \item{meanx}{Vector of column means of x} +#' \item{XX}{The matrix X'X if use.XX was set to TRUE} +#' \item{Xy}{The matrix X'y if use.Xy was set to TRUE} +#' +#' @author +#' Gertjan van den Burg (author and maintainer). +#' +#' @seealso +#' \code{\link{coef}}, \code{\link{print}}, \code{\link{predict}}, +#' \code{\link{plot}}, and \code{\link{sparsestep.path}}. #' #' @export #' #' @examples -#' data(diabetes) -#' attach(diabetes) -#' object <- sparsestep(x, y) -#' plot(object) -#' detach(diabetes) +#' x <- matrix(rnorm(100*20), 100, 20) +#' y <- rnorm(100) +#' fit <- sparsestep(x, y) #' -sparsestep <- function(x, y, lambda=1.0, gamma0=1e6, - gammastop=1e-8, IMsteps=2, gammastep=2.0, normalize=TRUE, - intercept=TRUE, force.zero=TRUE, threshold=1e-7, - XX=NULL, Xy=NULL, use.XX = TRUE, use.Xy = TRUE) +sparsestep <- function(x, y, lambda=c(0.1, 0.5, 1.0, 5, 10), gamma0=1e3, + gammastop=1e-4, IMsteps=2, gammastep=2.0, + normalize=TRUE, intercept=TRUE, force.zero=TRUE, + threshold=1e-7, XX=NULL, Xy=NULL, use.XX = TRUE, + use.Xy = TRUE) { call <- match.call() + prep <- preprocess(x, y, normalize, intercept, XX, Xy, use.XX, use.Xy) - nm <- dim(x) - n <- nm[1] - m <- nm[2] - one <- rep(1, n) - - if (intercept) { - meanx <- drop(one %*% x)/n - x <- scale(x, meanx, FALSE) - mu <- mean(y) - y <- drop(y - mu) - } else { - meanx <- rep(0, m) - mu <- 0 - y <- drop(y) + betas <- NULL + for (lmd in lambda) { + beta <- run.sparsestep(prep$XX, prep$Xy, prep$nvars, lmd, + gamma0, gammastep, gammastop, IMsteps, + force.zero, threshold) + betas <- cbind(beta, betas) } - if (normalize) { - normx <- sqrt(drop(one %*% (x^2))) - names(normx) <- NULL - x <- scale(x, FALSE, normx) - cat("Normalizing in sparsestep\n") - } else { - normx <- rep(1, m) - } + post <- postprocess(t(betas), prep$a0, x, prep$normx, prep$nvars, + length(lambda)) - if (use.XX & is.null(XX)) { - XX <- t(x) %*% x - } - if (use.Xy & is.null(Xy)) { - Xy <- t(x) %*% y - } - - # Start solving SparseStep - gamma <- gamma0 - beta <- matrix(0.0, m, 1) - - it <- 0 - while (gamma > gammastop) - { - for (i in 1:IMsteps) { - alpha <- beta - omega <- gamma^2/(alpha^2 + gamma^2)^2 - Omega <- diag(as.vector(omega), m, m) - beta <- solve(XX + lambda * Omega, Xy) - } - it <- it + 1 - gamma <- gamma / gammastep - } - - # perform IM until convergence - epsilon <- 1e-14 - loss <- get.loss(x, y, gamma, beta, lambda) - lbar <- (1.0 + 2.0 * epsilon) * loss - while ((lbar - loss)/loss > epsilon) - { - alpha <- beta - omega <- gamma^2/(alpha^2 + gamma^2)^2 - Omega <- diag(as.vector(omega), m, m) - beta <- solve(XX + lambda * Omega, Xy) - lbar <- loss - loss <- get.loss(x, y, gamma, beta, lambda) - } - - # postprocessing - if (force.zero) { - beta[which(abs(beta) < threshold)] <- 0 - } - residuals <- y - x %*% beta - beta <- scale(t(beta), FALSE, normx) - RSS <- apply(residuals^2, 2, sum) - R2 <- 1 - RSS/RSS[1] - - object <- list(call = call, lambda = lambda, R2 = R2, RSS = RSS, - gamma0 = gamma0, gammastop = gammastop, - IMsteps = IMsteps, gammastep = gammastep, - intercept = intercept, force.zero = force.zero, - threshold = threshold, beta = beta, mu = mu, - normx = normx, meanx = meanx, - XX = if(use.XX) XX else NULL, - Xy = if(use.Xy) Xy else NULL) + object <- list(call = call, lambda = lambda, gamma0 = gamma0, + gammastop = gammastop, IMsteps = IMsteps, + gammastep = gammastep, intercept = intercept, + force.zero = force.zero, threshold = threshold, + beta = post$beta, a0 = post$a0, normx = prep$normx, + meanx = prep$meanx, XX = if(use.XX) prep$XX else NULL, + Xy = if(use.Xy) prep$Xy else NULL) class(object) <- "sparsestep" return(object) } - -get.loss <- function(x, y, gamma, beta, lambda) -{ - Xb <- x %*% beta - diff <- y - Xb - b2 <- beta^2 - binv <- 1/(b2 + gamma^2) - loss <- t(diff) %*% diff + lambda * t(b2) %*% binv - return(loss) -} diff --git a/R/path.sparsestep.R b/R/path.sparsestep.R index 40467b8..2081346 100644 --- a/R/path.sparsestep.R +++ b/R/path.sparsestep.R @@ -1,59 +1,100 @@ -#' @title Path algorithm for the SparseStep model +#' @title Approximate path algorithm for the SparseStep model #' #' @description Fits the entire regularization path for SparseStep using a -#' Golden Section search. +#' Golden Section search. Note that this algorithm is approximate, there is no +#' guarantee that the solutions _between_ induced values of lambdas do not +#' differ from those calculated. For instance, if solutions are calculated at +#' \eqn{\lambda_{i}}{\lambda[i]} and \eqn{\lambda_{i+1}}{\lambda[i+1]}, this +#' algorithm ensures that \eqn{\lambda_{i+1}}{\lambda[i+1]} has one more zero +#' than the solution at \eqn{\lambda_{i}}{\lambda[i]} (provided the recursion +#' depth is large enough). There is however no guarantee that there are no +#' different solutions between \eqn{\lambda_{i}}{\lambda[i]} and +#' \eqn{\lambda_{i+1}}{\lambda[i+1]}. This is an ongoing research topic. +#' +#' Note that this path algorithm is not faster than running the +#' \code{sparsestep} function with the same \eqn{\lambda} sequence. #' #' @param x matrix of predictors #' @param y response #' @param max.depth maximum recursion depth -#' @param ... further arguments to sparsestep() +#' @param gamma0 starting value of the gamma parameter +#' @param gammastop stopping value of the gamma parameter +#' @param IMsteps number of steps of the majorization algorithm to perform for +#' each value of gamma +#' @param gammastep factor to decrease gamma with at each step +#' @param normalize if TRUE, each variable is standardized to have unit L2 +#' norm, otherwise it is left alone. +#' @param intercept if TRUE, an intercept is included in the model (and not +#' penalized), otherwise no intercept is included +#' @param force.zero if TRUE, absolute coefficients smaller than the provided +#' threshold value are set to absolute zero as a post-processing step, +#' otherwise no thresholding is performed +#' @param threshold threshold value to use for setting coefficients to +#' absolute zero +#' @param XX The X'X matrix; useful for repeated runs where X'X stays the same +#' @param Xy The X'y matrix; useful for repeated runs where X'y stays the same +#' @param use.XX whether or not to compute X'X and return it +#' @param use.Xy whether or not to compute X'y and return it +#' +#' @return A "sparsestep" S3 object is returned, for which print, predict, +#' coef, and plot methods exist. It has the following items: +#' \item{call}{The call that was used to construct the model.} +#' \item{lambda}{The value(s) of lambda used to construct the model.} +#' \item{gamma0}{The gamma0 value of the model.} +#' \item{gammastop}{The gammastop value of the model} +#' \item{IMsteps}{The IMsteps value of the model} +#' \item{gammastep}{The gammastep value of the model} +#' \item{intercept}{Boolean indicating if an intercept was fitted in the +#' model} +#' \item{force.zero}{Boolean indicating if a force zero-setting was +#' performed.} +#' \item{threshold}{The threshold used for a forced zero-setting} +#' \item{beta}{The resulting coefficients stored in a sparse matrix format +#' (dgCMatrix). This matrix has dimensions nvar x nlambda} +#' \item{a0}{The intercept vector for each value of gamma of length nlambda} +#' \item{normx}{Vector used to normalize the columns of x} +#' \item{meanx}{Vector of column means of x} +#' \item{XX}{The matrix X'X if use.XX was set to TRUE} +#' \item{Xy}{The matrix X'y if use.Xy was set to TRUE} +#' +#' @author +#' Gertjan van den Burg (author and maintainer). #' #' @export #' -sparsestep.path <- function(x, y, max.depth=10, intercept=TRUE, - normalize=TRUE, ...) +#' @seealso +#' \code{\link{coef}}, \code{\link{print}}, \code{\link{predict}}, +#' \code{\link{plot}}, and \code{\link{sparsestep.path}}. +#' +#' @examples +#' x <- matrix(rnorm(100*20), 100, 20) +#' y <- rnorm(100) +#' pth <- path.sparsestep(x, y) +#' +path.sparsestep <- function(x, y, max.depth=10, gamma0=1e3, gammastop=1e-4, + IMsteps=2, gammastep=2.0, normalize=TRUE, + intercept=TRUE, force.zero=TRUE, threshold=1e-7, + XX=NULL, Xy=NULL, use.XX = TRUE, use.Xy = TRUE) { call <- match.call() - nm <- dim(x) - n <- nm[1] - m <- nm[2] - one <- rep(1, n) - - if (intercept) { - meanx <- drop(one %*% x)/n - x <- scale(x, meanx, FALSE) - mu <- mean(y) - y <- drop(y - mu) - } else { - meanx <- rep(0, m) - mu <- 0 - y <- drop(y) - } - - if (normalize) { - normx <- sqrt(drop(one %*% (x^2))) - names(normx) <- NULL - x <- scale(x, FALSE, normx) - cat("Normalizing in path.sparsestep\n") - } else { - normx <- rep(1, m) - } - - XX <- t(x) %*% x - Xy <- t(x) %*% y + prep <- preprocess(x, y, normalize, intercept, XX, Xy, use.XX, use.Xy) + nvars <- prep$nvars; + XX <- prep$XX; + Xy <- prep$Xy; iter <- 0 # First find the smallest lambda for which all coefficients are zero lambda.max <- 2^25 - fit <- NULL + beta <- NULL while (1) { - last.fit <- fit - fit <- sparsestep(x, y, lambda=lambda.max, normalize=FALSE, - XX=XX, Xy=Xy, ...) + last.beta <- beta + beta <- run.sparsestep(XX, Xy, nvars, lambda.max, gamma0, + gammastep, gammastop, IMsteps, + force.zero, threshold) iter <- iter + 1 - if (all(fit$beta == 0)) { + if (all(beta == 0)) { lambda.max <- lambda.max / 2 } else { lambda.max <- lambda.max * 2 @@ -62,21 +103,22 @@ sparsestep.path <- function(x, y, max.depth=10, intercept=TRUE, } cat("Found maximum value of lambda: 2^(", log(lambda.max)/log(2), ")\n") iter <- iter + 1 - if (is.null(last.fit)) { - betas.max <- fit$beta + if (is.null(last.beta)) { + betas.max <- beta } else { - betas.max <- last.fit$beta + betas.max <- last.beta } # Now find the largest lambda for which no coefficients are zero lambda.min <- 2^-12 - fit <- NULL + beta <- NULL while (1) { - last.fit <- fit - fit <- sparsestep(x, y, lambda=lambda.min, normalize=FALSE, - XX=XX, Xy=Xy, ...) + last.beta <- beta + beta <- run.sparsestep(XX, Xy, nvars, lambda.min, gamma0, + gammastep, gammastop, IMsteps, + force.zero, threshold) iter <- iter + 1 - if (all(fit$beta != 0)) { + if (all(beta != 0)) { lambda.min <- lambda.min * 2 } else { lambda.min <- lambda.min / 2 @@ -85,57 +127,67 @@ sparsestep.path <- function(x, y, max.depth=10, intercept=TRUE, } cat("Found minimum value of lambda: 2^(", log(lambda.min)/log(2), ")\n") iter <- iter + 1 - if (is.null(last.fit)) { - betas.min <- fit$beta + if (is.null(last.beta)) { + betas.min <- beta } else { - betas.min <- last.fit$beta + betas.min <- last.beta } # Run binary section search - have.zeros <- as.vector(matrix(FALSE, 1, m+1)) + have.zeros <- as.vector(matrix(FALSE, 1, nvars+1)) have.zeros[1] <- TRUE - have.zeros[m+1] <- TRUE + have.zeros[nvars+1] <- TRUE left <- log(lambda.min)/log(2) right <- log(lambda.max)/log(2) - l <- lambda.search(x, y, 0, max.depth, have.zeros, left, right, 1, - m+1, XX, Xy, ...) + l <- lambda.search(0, max.depth, have.zeros, left, right, 1, + nvars+1, XX, Xy, gamma0, gammastep, gammastop, + IMsteps, force.zero, threshold) have.zeros <- have.zeros | l$have.zeros lambdas <- c(lambda.min, l$lambdas, lambda.max) - betas <- rbind(betas.min, l$betas, betas.max) + betas <- t(cbind(betas.min, l$betas, betas.max)) iter <- iter + l$iter ord <- order(lambdas) lambdas <- lambdas[ord] betas <- betas[ord, ] - betas <- scale(betas, FALSE, normx) - object <- list(call=call, lambdas=lambdas, betas=betas, - iterations=iter) + post <- postprocess(betas, prep$a0, x, prep$normx, nvars, + length(lambdas)) + + object <- list(call = call, lambda = lambdas, gamma0 = gamma0, + gammastop = gammastop, IMsteps = IMsteps, + gammastep = gammastep, intercept = intercept, + force.zero = force.zero, threshold = threshold, + beta = post$beta, a0 = post$a0, normx = prep$normx, + meanx = prep$meanx, XX = if(use.XX) XX else NULL, + Xy = if(use.Xy) Xy else NULL) + class(object) <- "sparsestep" + return(object) } -lambda.search <- function(x, y, depth, max.depth, have.zeros, left, right, - lidx, ridx, XX, Xy, ...) +lambda.search <- function(depth, max.depth, have.zeros, left, right, + lidx, ridx, XX, Xy, gamma0, gammastep, gammastop, + IMsteps, force.zero, threshold) { cat("Running search in interval [", left, ",", right, "] ... \n") - nm <- dim(x) - m <- nm[2] + nvars <- dim(XX)[1] betas <- NULL lambdas <- NULL middle <- left + (right - left)/2 lambda <- 2^middle - fit <- sparsestep(x, y, lambda=lambda, normalize=FALSE, XX=XX, Xy=Xy, - ...) + beta <- run.sparsestep(XX, Xy, nvars, lambda, gamma0, gammastep, + gammastop, IMsteps, force.zero, threshold) iter <- 1 - num.zero <- length(which(fit$beta == 0)) + num.zero <- length(which(beta == 0)) cidx <- num.zero + 1 if (have.zeros[cidx] == FALSE) { have.zeros[cidx] = TRUE - betas <- rbind(betas, as.vector(fit$beta)) + betas <- cbind(betas, beta) lambdas <- c(lambdas, lambda) } @@ -147,12 +199,13 @@ lambda.search <- function(x, y, depth, max.depth, have.zeros, left, right, b1 <- bnd[r, 1] b2 <- bnd[r, 2] if (depth < max.depth && any(have.zeros[i1:i2] == F)) { - ds <- lambda.search(x, y, depth+1, max.depth, - have.zeros, b1, b2, i1, i2, XX, Xy, - ...) + ds <- lambda.search(depth+1, max.depth, have.zeros, + b1, b2, i1, i2, XX, Xy, gamma0, + gammastep, gammastop, IMsteps, + force.zero, threshold) have.zeros <- have.zeros | ds$have.zeros lambdas <- c(lambdas, ds$lambdas) - betas <- rbind(betas, ds$betas) + betas <- cbind(betas, ds$betas) iter <- iter + ds$iter } } diff --git a/R/plot.sparsestep.R b/R/plot.sparsestep.R new file mode 100644 index 0000000..87ab661 --- /dev/null +++ b/R/plot.sparsestep.R @@ -0,0 +1,30 @@ +#' @title Plot the SparseStep path +#' +#' @description Plot the coefficients of the SparseStep path +#' +#' @param obj a \code{sparsestep} object +#' @param type the plot type, default: "s". +#' @param lty line type, default: 1 +#' @param ... further argument to matplot +#' +#' @author +#' Gertjan van den Burg (author and maintainer). +#' +#' @export +#' @aliases plot +#' +#' @examples +#' data(diabetes) +#' attach(diabetes) +#' fit <- sparsestep(x, y) +#' plot(fit) +#' pth <- sparsestep.path(x, y) +#' plot(pth) +plot.sparsestep <- function(obj, type="s", lty=1, ...) +{ + index <- log(obj$lambda) + coefs <- t(as.matrix(obj$beta)) + + matplot(index, coefs, xlab="Log lambda", ylab="Coefficients", + type=type, lty=lty, ...) +} diff --git a/R/postprocess.R b/R/postprocess.R new file mode 100644 index 0000000..f43bb05 --- /dev/null +++ b/R/postprocess.R @@ -0,0 +1,21 @@ +postprocess <- function(betas, a0, x, normx, nvars, nlambdas) { + betas <- scale(betas, FALSE, normx) + + # add names + vnames <- colnames(x) + if (is.null(vnames)) { vnames <- paste("V", seq(nvars), sep="") } + stepnames <- paste("s", seq(nlambdas)-1, sep="") + colnames(betas) <- vnames + rownames(betas) <- stepnames + + a0 <- t(as.matrix(rep(a0, nlambdas))) + colnames(a0) <- stepnames + rownames(a0) <- "Intercept" + + # Make it a sparse matrix + beta <- drop0(Matrix(as.matrix(t(betas)))) + + out <- list(beta = beta, a0 = a0) + + return(out) +} diff --git a/R/predict.sparsestep.R b/R/predict.sparsestep.R index 5f8898e..8a4be1a 100644 --- a/R/predict.sparsestep.R +++ b/R/predict.sparsestep.R @@ -1,4 +1,27 @@ -predict.sparsestep <- function(object, newx) +#' @title Make predictions from a SparseStep model +#' +#' @description Predicts the outcome variable for the SparseStep model for +#' each value of lambda supplied to the model. +#' +#' @param object Fitted "sparsestep" object +#' @param newx Matrix of new values for `x` at which predictions are to be made. +#' +#' @return a matrix of numerical predictions of size nobs x nlambda +#' +#' @export +#' @aliases predict +#' +#' @examples +#' x <- matrix(rnorm(100*20), 100, 20) +#' y <- rnorm(100) +#' fit <- sparsestep(x, y) +#' yhat <- predict(fit) +#' +predict.sparsestep <- function(obj, newx) { - NULL + yhat <- newx %*% as.matrix(obj$beta) + if (obj$intercept) { + yhat <- yhat + rep(1, nrow(yhat)) %*% obj$a0 + } + return(yhat) } diff --git a/R/preprocess.R b/R/preprocess.R new file mode 100644 index 0000000..253394e --- /dev/null +++ b/R/preprocess.R @@ -0,0 +1,38 @@ +preprocess <- function(x, y, normalize, intercept, XX, Xy, use.XX, use.Xy) +{ + nm <- dim(x) + nobs <- nm[1] + nvars <- nm[2] + one <- rep(1, nobs) + + if (intercept) { + meanx <- drop(one %*% x)/nobs + x <- scale(x, meanx, FALSE) + a0 <- mean(y) + y <- drop(y - a0) + } else { + meanx <- rep(0, nvars) + a0 <- 0 + y <- drop(y) + } + + if (normalize) { + normx <- sqrt(drop(one %*% (x^2))) + names(normx) <- NULL + x <- scale(x, FALSE, normx) + cat("Normalizing in sparsestep\n") + } else { + normx <- rep(1, nvars) + } + + if (use.XX & is.null(XX)) { + XX <- t(x) %*% x + } + if (use.Xy & is.null(Xy)) { + Xy <- t(x) %*% y + } + + out <- list(nvars = nvars, normx = normx, a0 = a0, + XX = XX, Xy = Xy, meanx = meanx) + return(out) +} diff --git a/R/print.sparsestep.R b/R/print.sparsestep.R index 1de1370..336d705 100644 --- a/R/print.sparsestep.R +++ b/R/print.sparsestep.R @@ -6,6 +6,8 @@ #' #' @export #' +#' @aliases print +#' #' @examples #' data(diabetes) #' attach(diabetes) @@ -17,8 +19,6 @@ print.sparsestep <- function(obj, ...) { cat("\nCall:\n") dput(obj$call) - cat("R-squared:", format(round(rev(obj$R2)[1], 3)), "\n") - zeros <- length(which(obj$beta == 0))/length(obj$beta)*100.0 - cat("Percentage coeff zero:", format(round(zeros, 2)), "\n") + cat("Lambda:", obj$lambda, "\n") invisible(obj) } diff --git a/R/run.sparsestep.R b/R/run.sparsestep.R new file mode 100644 index 0000000..43017c9 --- /dev/null +++ b/R/run.sparsestep.R @@ -0,0 +1,52 @@ +# Core SparseStep routine. This could be implemented in a low-level language +# in the future, if necessary. +# +run.sparsestep <- function(XX, Xy, nvars, lambda, gamma0, gammastep, gammastop, + IMsteps, force.zero, threshold) { + # Start solving SparseStep + gamma <- gamma0 + beta <- matrix(0.0, nvars, 1) + + it <- 0 + while (gamma > gammastop) + { + for (i in 1:IMsteps) { + alpha <- beta + omega <- gamma^2/(alpha^2 + gamma^2)^2 + Omega <- diag(as.vector(omega), nvars, nvars) + beta <- solve(XX + lambda * Omega, Xy) + } + it <- it + 1 + gamma <- gamma / gammastep + } + + # perform IM until convergence + epsilon <- 1e-14 + loss <- get.loss(x, y, gamma, beta, lambda) + lbar <- (1.0 + 2.0 * epsilon) * loss + while ((lbar - loss)/loss > epsilon) + { + alpha <- beta + omega <- gamma^2/(alpha^2 + gamma^2)^2 + Omega <- diag(as.vector(omega), nvars, nvars) + beta <- solve(XX + lambda * Omega, Xy) + lbar <- loss + loss <- get.loss(x, y, gamma, beta, lambda) + } + + # postprocessing + if (force.zero) { + beta[which(abs(beta) < threshold)] <- 0 + } + return(beta) +} + +get.loss <- function(x, y, gamma, beta, lambda) +{ + Xb <- x %*% beta + diff <- y - Xb + b2 <- beta^2 + binv <- 1/(b2 + gamma^2) + loss <- t(diff) %*% diff + lambda * t(b2) %*% binv + return(loss) +} diff --git a/man/print.sparsestep.Rd b/man/print.sparsestep.Rd index 6680788..ec09981 100644 --- a/man/print.sparsestep.Rd +++ b/man/print.sparsestep.Rd @@ -1,6 +1,7 @@ % Generated by roxygen2 (4.1.1): do not edit by hand % Please edit documentation in R/print.sparsestep.R \name{print.sparsestep} +\alias{print} \alias{print.sparsestep} \title{Print the fitted SparseStep model} \usage{ diff --git a/man/sparsestep.Rd b/man/sparsestep.Rd index 3d4a311..b6e836e 100644 --- a/man/sparsestep.Rd +++ b/man/sparsestep.Rd @@ -6,10 +6,10 @@ \alias{sparsestep-package} \title{Fits the SparseStep model} \usage{ -sparsestep(x, y, lambda = 1, gamma0 = 1e+06, gammastop = 1e-08, - IMsteps = 2, gammastep = 2, normalize = TRUE, intercept = TRUE, - force.zero = TRUE, threshold = 1e-07, XX = NULL, Xy = NULL, - use.XX = TRUE, use.Xy = TRUE) +sparsestep(x, y, lambda = c(0.1, 0.5, 1, 5, 10), gamma0 = 1000, + gammastop = 1e-04, IMsteps = 2, gammastep = 2, normalize = TRUE, + intercept = TRUE, force.zero = TRUE, threshold = 1e-07, XX = NULL, + Xy = NULL, use.XX = TRUE, use.Xy = TRUE) } \arguments{ \item{x}{matrix of predictors} @@ -49,8 +49,26 @@ absolute zero} \item{use.Xy}{whether or not to compute X'y and return it} } \value{ -A "sparsestep" object is returned, for which predict, coef, methods -exist. +A "sparsestep" S3 object is returned, for which predict, coef, and +plot methods exist. It has the following items: +\item{call}{The call that was used to construct the model.} +\item{lambda}{The value(s) of lambda used to construct the model.} +\item{gamma0}{The gamma0 value of the model.} +\item{gammastop}{The gammastop value of the model} +\item{IMsteps}{The IMsteps value of the model} +\item{gammastep}{The gammastep value of the model} +\item{intercept}{Boolean indicating if an intercept was fitted in the +model} +\item{force.zero}{Boolean indicating if a force zero-setting was +performed.} +\item{threshold}{The threshold used for a forced zero-setting} +\item{beta}{The resulting coefficients stored in a sparse matrix format +(dgCMatrix). This matrix has dimensions nvar x nlambda} +\item{a0}{The intercept vector for each value of gamma of length nlambda} +\item{normx}{Vector used to normalize the columns of x} +\item{meanx}{Vector of column means of x} +\item{XX}{The matrix X'X if use.XX was set to TRUE} +\item{Xy}{The matrix X'y if use.Xy was set to TRUE} } \description{ Fits the SparseStep model for a single value of the @@ -59,10 +77,15 @@ regularization parameter. sparsestep. } \examples{ -data(diabetes) -attach(diabetes) -object <- sparsestep(x, y) -plot(object) -detach(diabetes) +x <- matrix(rnorm(100*20), 100, 20) +y <- rnorm(100) +fit <- sparsestep(x, y) +} +\author{ +Gertjan van den Burg (author and maintainer). +} +\seealso{ +\code{\link{coef}}, \code{\link{print}}, \code{\link{predict}}, +\code{\link{plot}}, and \code{\link{sparsestep.path}}. } diff --git a/man/sparsestep.path.Rd b/man/sparsestep.path.Rd index 43625d5..c038dd9 100644 --- a/man/sparsestep.path.Rd +++ b/man/sparsestep.path.Rd @@ -2,10 +2,12 @@ % Please edit documentation in R/path.sparsestep.R \name{sparsestep.path} \alias{sparsestep.path} -\title{Path algorithm for the SparseStep model} +\title{Approximate path algorithm for the SparseStep model} \usage{ -sparsestep.path(x, y, max.depth = 10, intercept = TRUE, normalize = TRUE, - ...) +sparsestep.path(x, y, max.depth = 10, gamma0 = 1000, gammastop = 1e-04, + IMsteps = 2, gammastep = 2, normalize = TRUE, intercept = TRUE, + force.zero = TRUE, threshold = 1e-07, XX = NULL, Xy = NULL, + use.XX = TRUE, use.Xy = TRUE) } \arguments{ \item{x}{matrix of predictors} @@ -14,10 +16,79 @@ sparsestep.path(x, y, max.depth = 10, intercept = TRUE, normalize = TRUE, \item{max.depth}{maximum recursion depth} -\item{...}{further arguments to sparsestep()} +\item{gamma0}{starting value of the gamma parameter} + +\item{gammastop}{stopping value of the gamma parameter} + +\item{IMsteps}{number of steps of the majorization algorithm to perform for +each value of gamma} + +\item{gammastep}{factor to decrease gamma with at each step} + +\item{normalize}{if TRUE, each variable is standardized to have unit L2 +norm, otherwise it is left alone.} + +\item{intercept}{if TRUE, an intercept is included in the model (and not +penalized), otherwise no intercept is included} + +\item{force.zero}{if TRUE, absolute coefficients smaller than the provided +threshold value are set to absolute zero as a post-processing step, +otherwise no thresholding is performed} + +\item{threshold}{threshold value to use for setting coefficients to +absolute zero} + +\item{XX}{The X'X matrix; useful for repeated runs where X'X stays the same} + +\item{Xy}{The X'y matrix; useful for repeated runs where X'y stays the same} + +\item{use.XX}{whether or not to compute X'X and return it} + +\item{use.Xy}{whether or not to compute X'y and return it} +} +\value{ +A "sparsestep" S3 object is returned, for which print, predict, +coef, and plot methods exist. It has the following items: +\item{call}{The call that was used to construct the model.} +\item{lambda}{The value(s) of lambda used to construct the model.} +\item{gamma0}{The gamma0 value of the model.} +\item{gammastop}{The gammastop value of the model} +\item{IMsteps}{The IMsteps value of the model} +\item{gammastep}{The gammastep value of the model} +\item{intercept}{Boolean indicating if an intercept was fitted in the +model} +\item{force.zero}{Boolean indicating if a force zero-setting was +performed.} +\item{threshold}{The threshold used for a forced zero-setting} +\item{beta}{The resulting coefficients stored in a sparse matrix format +(dgCMatrix). This matrix has dimensions nvar x nlambda} +\item{a0}{The intercept vector for each value of gamma of length nlambda} +\item{normx}{Vector used to normalize the columns of x} +\item{meanx}{Vector of column means of x} +\item{XX}{The matrix X'X if use.XX was set to TRUE} +\item{Xy}{The matrix X'y if use.Xy was set to TRUE} } \description{ Fits the entire regularization path for SparseStep using a -Golden Section search. +Golden Section search. Note that this algorithm is approximate, there is no +guarantee that the solutions _between_ induced values of lambdas do not +differ from those calculated. For instance, if solutions are calculated at +\eqn{\lambda_{i}}{\lambda[i]} and \eqn{\lambda_{i+1}}{\lambda[i+1]}, this +algorithm ensures that \eqn{\lambda_{i+1}}{\lambda[i+1]} has one more zero +than the solution at \eqn{\lambda_{i}}{\lambda[i]} (provided the recursion +depth is large enough). There is however no guarantee that there are no +different solutions between \eqn{\lambda_{i}}{\lambda[i]} and +\eqn{\lambda_{i+1}}{\lambda[i+1]}. This is an ongoing research topic. + +Note that this path algorithm is not faster than running the +\code{sparsestep} function with the same \code{\lambda} sequence. +} +\examples{ +x <- matrix(rnorm(100*20), 100, 20) +y <- rnorm(100) +pth <- sparsestep.path(x, y) +} +\author{ +Gertjan van den Burg (author and maintainer). } |
