add documentation, unify implementations of path and fit

author: Gertjan van den Burg <gertjanvandenburg@gmail.com> 2016-02-10 15:27:35 -0500
committer: Gertjan van den Burg <gertjanvandenburg@gmail.com> 2016-02-10 15:27:35 -0500
commit: 904e559845f48f38c7eb928d65876e3a064a17c7 (patch)
tree: 1d4153ad46cfe94bda2bd5dc3105acf67b2dc994
parent: redefine gamma to correspond to theory (diff)
download: sparsestep-904e559845f48f38c7eb928d65876e3a064a17c7.tar.gz
sparsestep-904e559845f48f38c7eb928d65876e3a064a17c7.zip
14 files changed, 485 insertions, 260 deletions
diff --git a/NAMESPACE b/NAMESPACE
index 0e14978..c3f5715 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,5 +1,8 @@
 # Generated by roxygen2 (4.1.1): do not edit by hand
 
+S3method(coef,sparsestep)
+S3method(plot,sparsestep)
+S3method(predict,sparsestep)
 S3method(print,sparsestep)
 export(sparsestep)
 export(sparsestep.cd)
diff --git a/R/cd.sparsestep.R b/R/cd.sparsestep.R
deleted file mode 100644
index 29dbec8..0000000
--- a/R/cd.sparsestep.R
+++ /dev/null
@@ -1,73 +0,0 @@
-#' @title Coordinate descent algorithm for SparseStep
-#'
-#'
-#' @export
-#'
-sparsestep.cd <- function(x, y, lambdas=NULL, epsilon=1e-5, intercept=TRUE,
-			  ...)
-{
-	nm <- dim(X)
-	n <- nm[1]
-	m <- nm[2]
-	one <- rep(1, n)
-
-	if (intercept) {
-		meanx <- drop(one %*% x)/n
-		x <- scale(x, meanx, FALSE)
-		mu <- mean(y)
-		y <- drop(y - mu)
-	} else {
-		meanx <- rep(0, m)
-		mu <- 0
-		y <- drop(y)
-	}
-
-	XX <- t(x) %*% x
-	Xy <- t(x) %*% y
-
-	#gammas <- 2^(seq(log(1e6)/log(2), log(1e-8)/log(2)))
-
-	num.lambdas <- length(lambdas)
-	#num.gammas <- length(gammas)
-	
-	#betas <- array(0, dim=c(num.gammas, num.lambdas, m))
-	betas <- array(0, dim=c(num.lambdas, m))
-	for (l in num.lambdas:1) {
-		lambda <- lambdas[l]
-		# initialize beta
-		if (l == num.lambdas) {
-			beta <- as.vector(matrix(0, 1, m))
-		} else {
-			#beta <- betas[num.gammas, l+1, ]
-			beta <- betas[l+1, ]
-		}
-
-		j <- 1
-		last.beta <- as.vector(matrix(0, 1, m))
-		while (TRUE) {
-			# code
-			b <- -2 * x[, j] %*% (y - x[, -j] %*% last.beta[-j])
-			a <- x[, j] %*% x[, j]
-			if (abs(last.beta[j]) > epsilon) {
-				beta[j] <- b/a
-			} else {
-				beta[j] <- (2*b - lambda*sign(last.beta[j]))/
-					(2*a)
-			}
-			# check convergence
-			if (sum(abs(beta - last.beta)) < 1e-10) {
-				break
-			} else {
-				last.beta <- beta
-			}
-			# continue
-			j <- j %% m + 1
-		}
-		betas[l, ] <- beta
-	}
-
-	return(betas)
-}
-
-
-
diff --git a/R/coef.sparsestep.R b/R/coef.sparsestep.R
new file mode 100644
index 0000000..3a6286c
--- /dev/null
+++ b/R/coef.sparsestep.R
@@ -0,0 +1,30 @@
+#' @title Get the coefficients of a fitted SparseStep model
+#'
+#' @description Returns the coefficients of the SparseStep model.
+#'
+#' @param obj a "sparsestep" object
+#'
+#' @return The coefficients of the SparseStep model (i.e. the betas). If the
+#' model was fitted with an intercept this will be the first value in the
+#' resulting vector.
+#'
+#' @export
+#' @aliases coef
+#'
+#' @examples
+#' x <- matrix(rnorm(100*20), 100, 20)
+#' y <- rnorm(100)
+#' fit <- sparsestep(x, y)
+#' coef(fit)
+#'
+coef.sparsestep <- function(obj, ...)
+{
+  if (obj$intercept) {
+    beta <- rbind(obj$a0, obj$beta)
+  } else {
+    beta <- obj$beta
+  }
+  nbeta <- drop0(Matrix(as.matrix(beta)))
+
+  return(nbeta)
+}
diff --git a/R/fit.sparsestep.R b/R/fit.sparsestep.R
index aabf6f6..020f29b 100644
--- a/R/fit.sparsestep.R
+++ b/R/fit.sparsestep.R
@@ -25,115 +25,68 @@
 #' @param use.XX whether or not to compute X'X and return it
 #' @param use.Xy whether or not to compute X'y and return it
 #'
-#' @return A "sparsestep" object is returned, for which predict, coef, methods 
-#' exist.
+#' @return A "sparsestep" S3 object is returned, for which print, predict, 
+#' coef, and plot methods exist. It has the following items:
+#' \item{call}{The call that was used to construct the model.}
+#' \item{lambda}{The value(s) of lambda used to construct the model.}
+#' \item{gamma0}{The gamma0 value of the model.}
+#' \item{gammastop}{The gammastop value of the model}
+#' \item{IMsteps}{The IMsteps value of the model}
+#' \item{gammastep}{The gammastep value of the model}
+#' \item{intercept}{Boolean indicating if an intercept was fitted in the 
+#' model}
+#' \item{force.zero}{Boolean indicating if a force zero-setting was 
+#' performed.}
+#' \item{threshold}{The threshold used for a forced zero-setting}
+#' \item{beta}{The resulting coefficients stored in a sparse matrix format 
+#' (dgCMatrix). This matrix has dimensions nvar x nlambda}
+#' \item{a0}{The intercept vector for each value of gamma of length nlambda}
+#' \item{normx}{Vector used to normalize the columns of x}
+#' \item{meanx}{Vector of column means of x}
+#' \item{XX}{The matrix X'X if use.XX was set to TRUE}
+#' \item{Xy}{The matrix X'y if use.Xy was set to TRUE}
+#'
+#' @author
+#' Gertjan van den Burg (author and maintainer).
+#'
+#' @seealso
+#' \code{\link{coef}}, \code{\link{print}}, \code{\link{predict}}, 
+#' \code{\link{plot}}, and \code{\link{sparsestep.path}}.
 #'
 #' @export
 #'
 #' @examples
-#' data(diabetes)
-#' attach(diabetes)
-#' object <- sparsestep(x, y)
-#' plot(object)
-#' detach(diabetes)
+#' x <- matrix(rnorm(100*20), 100, 20)
+#' y <- rnorm(100)
+#' fit <- sparsestep(x, y)
 #'
-sparsestep <- function(x, y, lambda=1.0, gamma0=1e6, 
-		       gammastop=1e-8, IMsteps=2, gammastep=2.0, normalize=TRUE,
-		       intercept=TRUE, force.zero=TRUE, threshold=1e-7,
-		       XX=NULL, Xy=NULL, use.XX = TRUE, use.Xy = TRUE)
+sparsestep <- function(x, y, lambda=c(0.1, 0.5, 1.0, 5, 10), gamma0=1e3,
+		       gammastop=1e-4, IMsteps=2, gammastep=2.0,
+		       normalize=TRUE, intercept=TRUE, force.zero=TRUE,
+		       threshold=1e-7, XX=NULL, Xy=NULL, use.XX = TRUE,
+		       use.Xy = TRUE)
 {
 	call <- match.call()
+	prep <- preprocess(x, y, normalize, intercept, XX, Xy, use.XX, use.Xy)
 
-	nm <- dim(x)
-	n <- nm[1]
-	m <- nm[2]
-	one <- rep(1, n)
-
-	if (intercept) {
-		meanx <- drop(one %*% x)/n
-		x <- scale(x, meanx, FALSE)
-		mu <- mean(y)
-		y <- drop(y - mu)
-	} else {
-		meanx <- rep(0, m)
-		mu <- 0
-		y <- drop(y)
+	betas <- NULL
+	for (lmd in lambda) {
+		beta <- run.sparsestep(prep$XX, prep$Xy, prep$nvars, lmd,
+				       gamma0, gammastep, gammastop, IMsteps,
+				       force.zero, threshold)
+		betas <- cbind(beta, betas)
 	}
 
-	if (normalize) {
-		normx <- sqrt(drop(one %*% (x^2)))
-		names(normx) <- NULL
-		x <- scale(x, FALSE, normx)
-		cat("Normalizing in sparsestep\n")
-	} else {
-		normx <- rep(1, m)
-	}
+	post <- postprocess(t(betas), prep$a0, x, prep$normx, prep$nvars,
+			    length(lambda))
 
-	if (use.XX & is.null(XX)) {
-		XX <- t(x) %*% x
-	}
-	if (use.Xy & is.null(Xy)) {
-		Xy <- t(x) %*% y
-	}
-
-	# Start solving SparseStep	
-	gamma <- gamma0
-	beta <- matrix(0.0, m, 1)
-
-	it <- 0
-	while (gamma > gammastop)
-       	{
-		for (i in 1:IMsteps) {
-			alpha <- beta
-			omega <- gamma^2/(alpha^2 + gamma^2)^2
-			Omega <- diag(as.vector(omega), m, m)
-			beta <- solve(XX + lambda * Omega, Xy)
-		}
-		it <- it + 1
-		gamma <- gamma / gammastep
-	}
-
-	# perform IM until convergence
-	epsilon <- 1e-14
-	loss <- get.loss(x, y, gamma, beta, lambda)
-	lbar <- (1.0 + 2.0 * epsilon) * loss
-	while ((lbar - loss)/loss > epsilon)
-	{
-		alpha <- beta
-		omega <- gamma^2/(alpha^2 + gamma^2)^2
-		Omega <- diag(as.vector(omega), m, m)
-		beta <- solve(XX + lambda * Omega, Xy)
-		lbar <- loss
-		loss <- get.loss(x, y, gamma, beta, lambda)
-	}
-
-	# postprocessing
-	if (force.zero) {
-		beta[which(abs(beta) < threshold)] <- 0
-	}
-	residuals <- y - x %*% beta
-	beta <- scale(t(beta), FALSE, normx)
-	RSS <- apply(residuals^2, 2, sum)
-	R2 <- 1 - RSS/RSS[1]
-
-	object <- list(call = call, lambda = lambda, R2 = R2, RSS = RSS,
-		       gamma0 = gamma0, gammastop = gammastop,
-		       IMsteps = IMsteps, gammastep = gammastep,
-		       intercept = intercept, force.zero = force.zero,
-		       threshold = threshold, beta = beta, mu = mu,
-		       normx = normx, meanx = meanx,
-		       XX = if(use.XX) XX else NULL,
-		       Xy = if(use.Xy) Xy else NULL)
+	object <- list(call = call, lambda = lambda, gamma0 = gamma0,
+		       gammastop = gammastop, IMsteps = IMsteps,
+		       gammastep = gammastep, intercept = intercept,
+		       force.zero = force.zero, threshold = threshold,
+		       beta = post$beta, a0 = post$a0, normx = prep$normx,
+		       meanx = prep$meanx, XX = if(use.XX) prep$XX else NULL,
+		       Xy = if(use.Xy) prep$Xy else NULL)
 	class(object) <- "sparsestep"
 	return(object)
 }
-
-get.loss <- function(x, y, gamma, beta, lambda)
-{
-	Xb <- x %*% beta
-	diff <- y - Xb
-	b2 <- beta^2
-	binv <- 1/(b2 + gamma^2)
-	loss <- t(diff) %*% diff + lambda * t(b2) %*% binv
-	return(loss)
-}
diff --git a/R/path.sparsestep.R b/R/path.sparsestep.R
index 40467b8..2081346 100644
--- a/R/path.sparsestep.R
+++ b/R/path.sparsestep.R
@@ -1,59 +1,100 @@
-#' @title Path algorithm for the SparseStep model
+#' @title Approximate path algorithm for the SparseStep model
 #'
 #' @description Fits the entire regularization path for SparseStep using a 
-#' Golden Section search.
+#' Golden Section search. Note that this algorithm is approximate, there is no 
+#' guarantee that the solutions _between_ induced values of lambdas do not 
+#' differ from those calculated. For instance, if solutions are calculated at 
+#' \eqn{\lambda_{i}}{\lambda[i]} and \eqn{\lambda_{i+1}}{\lambda[i+1]}, this 
+#' algorithm ensures that \eqn{\lambda_{i+1}}{\lambda[i+1]} has one more zero 
+#' than the solution at \eqn{\lambda_{i}}{\lambda[i]} (provided the recursion 
+#' depth is large enough). There is however no guarantee that there are no 
+#' different solutions between \eqn{\lambda_{i}}{\lambda[i]} and 
+#' \eqn{\lambda_{i+1}}{\lambda[i+1]}.  This is an ongoing research topic.
+#'
+#' Note that this path algorithm is not faster than running the 
+#' \code{sparsestep} function with the same \eqn{\lambda} sequence.
 #'
 #' @param x matrix of predictors
 #' @param y response
 #' @param max.depth maximum recursion depth
-#' @param ... further arguments to sparsestep()
+#' @param gamma0 starting value of the gamma parameter
+#' @param gammastop stopping value of the gamma parameter
+#' @param IMsteps number of steps of the majorization algorithm to perform for 
+#' each value of gamma
+#' @param gammastep factor to decrease gamma with at each step
+#' @param normalize if TRUE, each variable is standardized to have unit L2 
+#' norm, otherwise it is left alone.
+#' @param intercept if TRUE, an intercept is included in the model (and not 
+#' penalized), otherwise no intercept is included
+#' @param force.zero if TRUE, absolute coefficients smaller than the provided 
+#' threshold value are set to absolute zero as a post-processing step, 
+#' otherwise no thresholding is performed
+#' @param threshold threshold value to use for setting coefficients to 
+#' absolute zero
+#' @param XX The X'X matrix; useful for repeated runs where X'X stays the same
+#' @param Xy The X'y matrix; useful for repeated runs where X'y stays the same
+#' @param use.XX whether or not to compute X'X and return it
+#' @param use.Xy whether or not to compute X'y and return it
+#'
+#' @return A "sparsestep" S3 object is returned, for which print, predict, 
+#' coef, and plot methods exist. It has the following items:
+#' \item{call}{The call that was used to construct the model.}
+#' \item{lambda}{The value(s) of lambda used to construct the model.}
+#' \item{gamma0}{The gamma0 value of the model.}
+#' \item{gammastop}{The gammastop value of the model}
+#' \item{IMsteps}{The IMsteps value of the model}
+#' \item{gammastep}{The gammastep value of the model}
+#' \item{intercept}{Boolean indicating if an intercept was fitted in the 
+#' model}
+#' \item{force.zero}{Boolean indicating if a force zero-setting was 
+#' performed.}
+#' \item{threshold}{The threshold used for a forced zero-setting}
+#' \item{beta}{The resulting coefficients stored in a sparse matrix format 
+#' (dgCMatrix). This matrix has dimensions nvar x nlambda}
+#' \item{a0}{The intercept vector for each value of gamma of length nlambda}
+#' \item{normx}{Vector used to normalize the columns of x}
+#' \item{meanx}{Vector of column means of x}
+#' \item{XX}{The matrix X'X if use.XX was set to TRUE}
+#' \item{Xy}{The matrix X'y if use.Xy was set to TRUE}
+#'
+#' @author
+#' Gertjan van den Burg (author and maintainer).
 #'
 #' @export
 #'
-sparsestep.path <- function(x, y, max.depth=10, intercept=TRUE,
-			    normalize=TRUE, ...)
+#' @seealso
+#' \code{\link{coef}}, \code{\link{print}}, \code{\link{predict}}, 
+#' \code{\link{plot}}, and \code{\link{sparsestep.path}}.
+#'
+#' @examples
+#' x <- matrix(rnorm(100*20), 100, 20)
+#' y <- rnorm(100)
+#' pth <- path.sparsestep(x, y)
+#'
+path.sparsestep <- function(x, y, max.depth=10, gamma0=1e3, gammastop=1e-4,
+			    IMsteps=2, gammastep=2.0, normalize=TRUE,
+			    intercept=TRUE, force.zero=TRUE, threshold=1e-7,
+			    XX=NULL, Xy=NULL, use.XX = TRUE, use.Xy = TRUE)
 {
 	call <- match.call()
 
-	nm <- dim(x)
-	n <- nm[1]
-	m <- nm[2]
-	one <- rep(1, n)
-
-	if (intercept) {
-		meanx <- drop(one %*% x)/n
-		x <- scale(x, meanx, FALSE)
-		mu <- mean(y)
-		y <- drop(y - mu)
-	} else {
-		meanx <- rep(0, m)
-		mu <- 0
-		y <- drop(y)
-	}
-
-	if (normalize) {
-		normx <- sqrt(drop(one %*% (x^2)))
-		names(normx) <- NULL
-		x <- scale(x, FALSE, normx)
-		cat("Normalizing in path.sparsestep\n")
-	} else {
-		normx <- rep(1, m)
-	}
-
-	XX <- t(x) %*% x
-	Xy <- t(x) %*% y
+	prep <- preprocess(x, y, normalize, intercept, XX, Xy, use.XX, use.Xy)
+	nvars <- prep$nvars;
+	XX <- prep$XX;
+	Xy <- prep$Xy;
 
 	iter <- 0
 
 	# First find the smallest lambda for which all coefficients are zero
 	lambda.max <- 2^25
-	fit <- NULL
+	beta <- NULL
 	while (1) {
-		last.fit <- fit
-		fit <- sparsestep(x, y, lambda=lambda.max, normalize=FALSE, 
-				  XX=XX, Xy=Xy, ...)
+		last.beta <- beta
+		beta <- run.sparsestep(XX, Xy, nvars, lambda.max, gamma0,
+				      gammastep, gammastop, IMsteps,
+				      force.zero, threshold)
 		iter <- iter + 1
-		if (all(fit$beta == 0)) {
+		if (all(beta == 0)) {
 			lambda.max <- lambda.max / 2
 		} else {
 			lambda.max <- lambda.max * 2
@@ -62,21 +103,22 @@ sparsestep.path <- function(x, y, max.depth=10, intercept=TRUE,
 	}
 	cat("Found maximum value of lambda: 2^(", log(lambda.max)/log(2), ")\n")
 	iter <- iter + 1
-	if (is.null(last.fit)) {
-		betas.max <- fit$beta
+	if (is.null(last.beta)) {
+		betas.max <- beta
 	} else {
-		betas.max <- last.fit$beta
+		betas.max <- last.beta
 	}
 
 	# Now find the largest lambda for which no coefficients are zero
 	lambda.min <- 2^-12
-	fit <- NULL
+	beta <- NULL
 	while (1) {
-		last.fit <- fit
-		fit <- sparsestep(x, y, lambda=lambda.min, normalize=FALSE, 
-				  XX=XX, Xy=Xy, ...)
+		last.beta <- beta
+		beta <- run.sparsestep(XX, Xy, nvars, lambda.min, gamma0,
+				      gammastep, gammastop, IMsteps,
+				      force.zero, threshold)
 		iter <- iter + 1
-		if (all(fit$beta != 0)) {
+		if (all(beta != 0)) {
 			lambda.min <- lambda.min * 2
 		} else {
 			lambda.min <- lambda.min / 2
@@ -85,57 +127,67 @@ sparsestep.path <- function(x, y, max.depth=10, intercept=TRUE,
 	}
 	cat("Found minimum value of lambda: 2^(", log(lambda.min)/log(2), ")\n")
 	iter <- iter + 1
-	if (is.null(last.fit)) {
-		betas.min <- fit$beta
+	if (is.null(last.beta)) {
+		betas.min <- beta
 	} else {
-		betas.min <- last.fit$beta
+		betas.min <- last.beta
 	}
 
 	# Run binary section search
-	have.zeros <- as.vector(matrix(FALSE, 1, m+1))
+	have.zeros <- as.vector(matrix(FALSE, 1, nvars+1))
 	have.zeros[1] <- TRUE
-	have.zeros[m+1] <- TRUE
+	have.zeros[nvars+1] <- TRUE
 
 	left <- log(lambda.min)/log(2)
 	right <- log(lambda.max)/log(2)
 
-	l <- lambda.search(x, y, 0, max.depth, have.zeros, left, right, 1, 
-			   m+1, XX, Xy, ...)
+	l <- lambda.search(0, max.depth, have.zeros, left, right, 1, 
+			   nvars+1, XX, Xy, gamma0, gammastep, gammastop,
+			   IMsteps, force.zero, threshold)
 	have.zeros <- have.zeros | l$have.zeros
 	lambdas <- c(lambda.min, l$lambdas, lambda.max)
-	betas <- rbind(betas.min, l$betas, betas.max)
+	betas <- t(cbind(betas.min, l$betas, betas.max))
 	iter <- iter + l$iter
 
 	ord <- order(lambdas)
 	lambdas <- lambdas[ord]
 	betas <- betas[ord, ]
-	betas <- scale(betas, FALSE, normx)
 
-	object <- list(call=call, lambdas=lambdas, betas=betas,
-		       iterations=iter)
+	post <- postprocess(betas, prep$a0, x, prep$normx, nvars,
+			    length(lambdas))
+
+	object <- list(call = call, lambda = lambdas, gamma0 = gamma0,
+		       gammastop = gammastop, IMsteps = IMsteps,
+		       gammastep = gammastep, intercept = intercept,
+		       force.zero = force.zero, threshold = threshold,
+		       beta = post$beta, a0 = post$a0, normx = prep$normx,
+		       meanx = prep$meanx, XX = if(use.XX) XX else NULL,
+		       Xy = if(use.Xy) Xy else NULL)
+	class(object) <- "sparsestep"
+	return(object)
 }
 
-lambda.search <- function(x, y, depth, max.depth, have.zeros, left, right,
-			  lidx, ridx, XX, Xy, ...)
+lambda.search <- function(depth, max.depth, have.zeros, left, right,
+			  lidx, ridx, XX, Xy, gamma0, gammastep, gammastop,
+			   IMsteps, force.zero, threshold)
 {
 	cat("Running search in interval [", left, ",", right, "] ... \n")
-	nm <- dim(x)
-	m <- nm[2]
+	nvars <- dim(XX)[1]
 
 	betas <- NULL
 	lambdas <- NULL
 
 	middle <- left + (right - left)/2
 	lambda <- 2^middle
-	fit <- sparsestep(x, y, lambda=lambda, normalize=FALSE, XX=XX, Xy=Xy, 
-			  ...)
+	beta <- run.sparsestep(XX, Xy, nvars, lambda, gamma0, gammastep,
+			       gammastop, IMsteps, force.zero, threshold)
 	iter <- 1
 
-	num.zero <- length(which(fit$beta == 0))
+	num.zero <- length(which(beta == 0))
 	cidx <- num.zero + 1
 	if (have.zeros[cidx] == FALSE) {
 		have.zeros[cidx] = TRUE
-		betas <- rbind(betas, as.vector(fit$beta))
+		betas <- cbind(betas, beta)
 		lambdas <- c(lambdas, lambda)
 	}
 
@@ -147,12 +199,13 @@ lambda.search <- function(x, y, depth, max.depth, have.zeros, left, right,
 		b1 <- bnd[r, 1]
 		b2 <- bnd[r, 2]
 		if (depth < max.depth && any(have.zeros[i1:i2] == F)) {
-			ds <- lambda.search(x, y, depth+1, max.depth,
-					    have.zeros, b1, b2, i1, i2, XX, Xy,
-					    ...)
+			ds <- lambda.search(depth+1, max.depth, have.zeros,
+					    b1, b2, i1, i2, XX, Xy, gamma0,
+					    gammastep, gammastop, IMsteps,
+					    force.zero, threshold)
 			have.zeros <- have.zeros | ds$have.zeros
 			lambdas <- c(lambdas, ds$lambdas)
-			betas <- rbind(betas, ds$betas)
+			betas <- cbind(betas, ds$betas)
 			iter <- iter + ds$iter
 		}
 	}
diff --git a/R/plot.sparsestep.R b/R/plot.sparsestep.R
new file mode 100644
index 0000000..87ab661
--- /dev/null
+++ b/R/plot.sparsestep.R
@@ -0,0 +1,30 @@
+#' @title Plot the SparseStep path
+#'
+#' @description Plot the coefficients of the SparseStep path
+#'
+#' @param obj a \code{sparsestep} object
+#' @param type the plot type, default: "s".
+#' @param lty line type, default: 1
+#' @param ... further argument to matplot
+#'
+#' @author
+#' Gertjan van den Burg (author and maintainer).
+#'
+#' @export
+#' @aliases plot
+#'
+#' @examples
+#' data(diabetes)
+#' attach(diabetes)
+#' fit <- sparsestep(x, y)
+#' plot(fit)
+#' pth <- sparsestep.path(x, y)
+#' plot(pth)
+plot.sparsestep <- function(obj, type="s", lty=1, ...)
+{
+	index <- log(obj$lambda)
+	coefs <- t(as.matrix(obj$beta))
+
+	matplot(index, coefs, xlab="Log lambda", ylab="Coefficients",
+	       	type=type, lty=lty, ...)
+}
diff --git a/R/postprocess.R b/R/postprocess.R
new file mode 100644
index 0000000..f43bb05
--- /dev/null
+++ b/R/postprocess.R
@@ -0,0 +1,21 @@
+postprocess <- function(betas, a0, x, normx, nvars, nlambdas) {
+	betas <- scale(betas, FALSE, normx)
+
+	# add names
+	vnames <- colnames(x)
+	if (is.null(vnames)) { vnames <- paste("V", seq(nvars), sep="") }
+	stepnames <- paste("s", seq(nlambdas)-1, sep="")
+	colnames(betas) <- vnames
+	rownames(betas) <- stepnames
+
+	a0 <- t(as.matrix(rep(a0, nlambdas)))
+	colnames(a0) <- stepnames
+	rownames(a0) <- "Intercept"
+
+	# Make it a sparse matrix
+	beta <- drop0(Matrix(as.matrix(t(betas))))
+
+	out <- list(beta = beta, a0 = a0)
+
+	return(out)
+}
diff --git a/R/predict.sparsestep.R b/R/predict.sparsestep.R
index 5f8898e..8a4be1a 100644
--- a/R/predict.sparsestep.R
+++ b/R/predict.sparsestep.R
@@ -1,4 +1,27 @@
-predict.sparsestep <- function(object, newx)
+#' @title Make predictions from a SparseStep model
+#'
+#' @description Predicts the outcome variable for the SparseStep model for 
+#' each value of lambda supplied to the model.
+#'
+#' @param object Fitted "sparsestep" object
+#' @param newx Matrix of new values for `x` at which predictions are to be made.
+#' 
+#' @return a matrix of numerical predictions of size nobs x nlambda
+#'
+#' @export
+#' @aliases predict
+#'
+#' @examples
+#' x <- matrix(rnorm(100*20), 100, 20)
+#' y <- rnorm(100)
+#' fit <- sparsestep(x, y)
+#' yhat <- predict(fit)
+#'
+predict.sparsestep <- function(obj, newx)
 {
-	NULL
+  yhat <- newx %*% as.matrix(obj$beta)
+  if (obj$intercept) {
+	  yhat <- yhat + rep(1, nrow(yhat)) %*% obj$a0
+  }
+  return(yhat)
 }
diff --git a/R/preprocess.R b/R/preprocess.R
new file mode 100644
index 0000000..253394e
--- /dev/null
+++ b/R/preprocess.R
@@ -0,0 +1,38 @@
+preprocess <- function(x, y, normalize, intercept, XX, Xy, use.XX, use.Xy)
+{
+	nm <- dim(x)
+	nobs <- nm[1]
+	nvars <- nm[2]
+	one <- rep(1, nobs)
+
+	if (intercept) {
+		meanx <- drop(one %*% x)/nobs
+		x <- scale(x, meanx, FALSE)
+		a0 <- mean(y)
+		y <- drop(y - a0)
+	} else {
+		meanx <- rep(0, nvars)
+		a0 <- 0
+		y <- drop(y)
+	}
+
+	if (normalize) {
+		normx <- sqrt(drop(one %*% (x^2)))
+		names(normx) <- NULL
+		x <- scale(x, FALSE, normx)
+		cat("Normalizing in sparsestep\n")
+	} else {
+		normx <- rep(1, nvars)
+	}
+
+	if (use.XX & is.null(XX)) {
+		XX <- t(x) %*% x
+	}
+	if (use.Xy & is.null(Xy)) {
+		Xy <- t(x) %*% y
+	}
+
+	out <- list(nvars = nvars, normx = normx, a0 = a0,
+		    XX = XX, Xy = Xy, meanx = meanx)
+	return(out)
+}
diff --git a/R/print.sparsestep.R b/R/print.sparsestep.R
index 1de1370..336d705 100644
--- a/R/print.sparsestep.R
+++ b/R/print.sparsestep.R
@@ -6,6 +6,8 @@
 #'
 #' @export
 #'
+#' @aliases print
+#'
 #' @examples
 #' data(diabetes)
 #' attach(diabetes)
@@ -17,8 +19,6 @@ print.sparsestep <- function(obj, ...)
 {
 	cat("\nCall:\n")
 	dput(obj$call)
-	cat("R-squared:", format(round(rev(obj$R2)[1], 3)), "\n")
-	zeros <- length(which(obj$beta == 0))/length(obj$beta)*100.0
-	cat("Percentage coeff zero:", format(round(zeros, 2)), "\n")
+	cat("Lambda:", obj$lambda, "\n")
 	invisible(obj)
 }
diff --git a/R/run.sparsestep.R b/R/run.sparsestep.R
new file mode 100644
index 0000000..43017c9
--- /dev/null
+++ b/R/run.sparsestep.R
@@ -0,0 +1,52 @@
+# Core SparseStep routine. This could be implemented in a low-level language 
+# in the future, if necessary.
+#
+run.sparsestep <- function(XX, Xy, nvars, lambda, gamma0, gammastep, gammastop,
+			   IMsteps, force.zero, threshold) {
+	# Start solving SparseStep
+	gamma <- gamma0
+	beta <- matrix(0.0, nvars, 1)
+
+	it <- 0
+	while (gamma > gammastop)
+	{
+		for (i in 1:IMsteps) {
+			alpha <- beta
+			omega <- gamma^2/(alpha^2 + gamma^2)^2
+			Omega <- diag(as.vector(omega), nvars, nvars)
+			beta <- solve(XX + lambda * Omega, Xy)
+		}
+		it <- it + 1
+		gamma <- gamma / gammastep
+	}
+
+	# perform IM until convergence
+	epsilon <- 1e-14
+	loss <- get.loss(x, y, gamma, beta, lambda)
+	lbar <- (1.0 + 2.0 * epsilon) * loss
+	while ((lbar - loss)/loss > epsilon)
+	{
+		alpha <- beta
+		omega <- gamma^2/(alpha^2 + gamma^2)^2
+		Omega <- diag(as.vector(omega), nvars, nvars)
+		beta <- solve(XX + lambda * Omega, Xy)
+		lbar <- loss
+		loss <- get.loss(x, y, gamma, beta, lambda)
+	}
+
+	# postprocessing
+	if (force.zero) {
+		beta[which(abs(beta) < threshold)] <- 0
+	}
+	return(beta)
+}
+
+get.loss <- function(x, y, gamma, beta, lambda)
+{
+	Xb <- x %*% beta
+	diff <- y - Xb
+	b2 <- beta^2
+	binv <- 1/(b2 + gamma^2)
+	loss <- t(diff) %*% diff + lambda * t(b2) %*% binv
+	return(loss)
+}
diff --git a/man/print.sparsestep.Rd b/man/print.sparsestep.Rd
index 6680788..ec09981 100644
--- a/man/print.sparsestep.Rd
+++ b/man/print.sparsestep.Rd
@@ -1,6 +1,7 @@
 % Generated by roxygen2 (4.1.1): do not edit by hand
 % Please edit documentation in R/print.sparsestep.R
 \name{print.sparsestep}
+\alias{print}
 \alias{print.sparsestep}
 \title{Print the fitted SparseStep model}
 \usage{
diff --git a/man/sparsestep.Rd b/man/sparsestep.Rd
index 3d4a311..b6e836e 100644
--- a/man/sparsestep.Rd
+++ b/man/sparsestep.Rd
@@ -6,10 +6,10 @@
 \alias{sparsestep-package}
 \title{Fits the SparseStep model}
 \usage{
-sparsestep(x, y, lambda = 1, gamma0 = 1e+06, gammastop = 1e-08,
-  IMsteps = 2, gammastep = 2, normalize = TRUE, intercept = TRUE,
-  force.zero = TRUE, threshold = 1e-07, XX = NULL, Xy = NULL,
-  use.XX = TRUE, use.Xy = TRUE)
+sparsestep(x, y, lambda = c(0.1, 0.5, 1, 5, 10), gamma0 = 1000,
+  gammastop = 1e-04, IMsteps = 2, gammastep = 2, normalize = TRUE,
+  intercept = TRUE, force.zero = TRUE, threshold = 1e-07, XX = NULL,
+  Xy = NULL, use.XX = TRUE, use.Xy = TRUE)
 }
 \arguments{
 \item{x}{matrix of predictors}
@@ -49,8 +49,26 @@ absolute zero}
 \item{use.Xy}{whether or not to compute X'y and return it}
 }
 \value{
-A "sparsestep" object is returned, for which predict, coef, methods
-exist.
+A "sparsestep" S3 object is returned, for which predict, coef, and
+plot methods exist. It has the following items:
+\item{call}{The call that was used to construct the model.}
+\item{lambda}{The value(s) of lambda used to construct the model.}
+\item{gamma0}{The gamma0 value of the model.}
+\item{gammastop}{The gammastop value of the model}
+\item{IMsteps}{The IMsteps value of the model}
+\item{gammastep}{The gammastep value of the model}
+\item{intercept}{Boolean indicating if an intercept was fitted in the
+model}
+\item{force.zero}{Boolean indicating if a force zero-setting was
+performed.}
+\item{threshold}{The threshold used for a forced zero-setting}
+\item{beta}{The resulting coefficients stored in a sparse matrix format
+(dgCMatrix). This matrix has dimensions nvar x nlambda}
+\item{a0}{The intercept vector for each value of gamma of length nlambda}
+\item{normx}{Vector used to normalize the columns of x}
+\item{meanx}{Vector of column means of x}
+\item{XX}{The matrix X'X if use.XX was set to TRUE}
+\item{Xy}{The matrix X'y if use.Xy was set to TRUE}
 }
 \description{
 Fits the SparseStep model for a single value of the
@@ -59,10 +77,15 @@ regularization parameter.
 sparsestep.
 }
 \examples{
-data(diabetes)
-attach(diabetes)
-object <- sparsestep(x, y)
-plot(object)
-detach(diabetes)
+x <- matrix(rnorm(100*20), 100, 20)
+y <- rnorm(100)
+fit <- sparsestep(x, y)
+}
+\author{
+Gertjan van den Burg (author and maintainer).
+}
+\seealso{
+\code{\link{coef}}, \code{\link{print}}, \code{\link{predict}},
+\code{\link{plot}}, and \code{\link{sparsestep.path}}.
 }
 
diff --git a/man/sparsestep.path.Rd b/man/sparsestep.path.Rd
index 43625d5..c038dd9 100644
--- a/man/sparsestep.path.Rd
+++ b/man/sparsestep.path.Rd
@@ -2,10 +2,12 @@
 % Please edit documentation in R/path.sparsestep.R
 \name{sparsestep.path}
 \alias{sparsestep.path}
-\title{Path algorithm for the SparseStep model}
+\title{Approximate path algorithm for the SparseStep model}
 \usage{
-sparsestep.path(x, y, max.depth = 10, intercept = TRUE, normalize = TRUE,
-  ...)
+sparsestep.path(x, y, max.depth = 10, gamma0 = 1000, gammastop = 1e-04,
+  IMsteps = 2, gammastep = 2, normalize = TRUE, intercept = TRUE,
+  force.zero = TRUE, threshold = 1e-07, XX = NULL, Xy = NULL,
+  use.XX = TRUE, use.Xy = TRUE)
 }
 \arguments{
 \item{x}{matrix of predictors}
@@ -14,10 +16,79 @@ sparsestep.path(x, y, max.depth = 10, intercept = TRUE, normalize = TRUE,
 
 \item{max.depth}{maximum recursion depth}
 
-\item{...}{further arguments to sparsestep()}
+\item{gamma0}{starting value of the gamma parameter}
+
+\item{gammastop}{stopping value of the gamma parameter}
+
+\item{IMsteps}{number of steps of the majorization algorithm to perform for
+each value of gamma}
+
+\item{gammastep}{factor to decrease gamma with at each step}
+
+\item{normalize}{if TRUE, each variable is standardized to have unit L2
+norm, otherwise it is left alone.}
+
+\item{intercept}{if TRUE, an intercept is included in the model (and not
+penalized), otherwise no intercept is included}
+
+\item{force.zero}{if TRUE, absolute coefficients smaller than the provided
+threshold value are set to absolute zero as a post-processing step,
+otherwise no thresholding is performed}
+
+\item{threshold}{threshold value to use for setting coefficients to
+absolute zero}
+
+\item{XX}{The X'X matrix; useful for repeated runs where X'X stays the same}
+
+\item{Xy}{The X'y matrix; useful for repeated runs where X'y stays the same}
+
+\item{use.XX}{whether or not to compute X'X and return it}
+
+\item{use.Xy}{whether or not to compute X'y and return it}
+}
+\value{
+A "sparsestep" S3 object is returned, for which print, predict,
+coef, and plot methods exist. It has the following items:
+\item{call}{The call that was used to construct the model.}
+\item{lambda}{The value(s) of lambda used to construct the model.}
+\item{gamma0}{The gamma0 value of the model.}
+\item{gammastop}{The gammastop value of the model}
+\item{IMsteps}{The IMsteps value of the model}
+\item{gammastep}{The gammastep value of the model}
+\item{intercept}{Boolean indicating if an intercept was fitted in the
+model}
+\item{force.zero}{Boolean indicating if a force zero-setting was
+performed.}
+\item{threshold}{The threshold used for a forced zero-setting}
+\item{beta}{The resulting coefficients stored in a sparse matrix format
+(dgCMatrix). This matrix has dimensions nvar x nlambda}
+\item{a0}{The intercept vector for each value of gamma of length nlambda}
+\item{normx}{Vector used to normalize the columns of x}
+\item{meanx}{Vector of column means of x}
+\item{XX}{The matrix X'X if use.XX was set to TRUE}
+\item{Xy}{The matrix X'y if use.Xy was set to TRUE}
 }
 \description{
 Fits the entire regularization path for SparseStep using a
-Golden Section search.
+Golden Section search. Note that this algorithm is approximate, there is no
+guarantee that the solutions _between_ induced values of lambdas do not
+differ from those calculated. For instance, if solutions are calculated at
+\eqn{\lambda_{i}}{\lambda[i]} and \eqn{\lambda_{i+1}}{\lambda[i+1]}, this
+algorithm ensures that \eqn{\lambda_{i+1}}{\lambda[i+1]} has one more zero
+than the solution at \eqn{\lambda_{i}}{\lambda[i]} (provided the recursion
+depth is large enough). There is however no guarantee that there are no
+different solutions between \eqn{\lambda_{i}}{\lambda[i]} and
+\eqn{\lambda_{i+1}}{\lambda[i+1]}.  This is an ongoing research topic.
+
+Note that this path algorithm is not faster than running the
+\code{sparsestep} function with the same \code{\lambda} sequence.
+}
+\examples{
+x <- matrix(rnorm(100*20), 100, 20)
+y <- rnorm(100)
+pth <- sparsestep.path(x, y)
+}
+\author{
+Gertjan van den Burg (author and maintainer).
 }
author	Gertjan van den Burg <gertjanvandenburg@gmail.com>	2016-02-10 15:27:35 -0500
committer	Gertjan van den Burg <gertjanvandenburg@gmail.com>	2016-02-10 15:27:35 -0500
commit	904e559845f48f38c7eb928d65876e3a064a17c7 (patch)
tree	1d4153ad46cfe94bda2bd5dc3105acf67b2dc994
parent	redefine gamma to correspond to theory (diff)
download	sparsestep-904e559845f48f38c7eb928d65876e3a064a17c7.tar.gz sparsestep-904e559845f48f38c7eb928d65876e3a064a17c7.zip