GenSVM R package

author: Gertjan van den Burg <gertjanvandenburg@gmail.com> 2018-03-27 12:31:28 +0100
committer: Gertjan van den Burg <gertjanvandenburg@gmail.com> 2018-03-27 12:31:28 +0100
commit: 004941896bac692d354c41a3334d20ee1d4627f7 (patch)
tree: 2b11e42d8524843409e2bf8deb4ceb74c8b69347 /R/gensvm.maxabs.scale.R
parent: updates to GenSVM C library (diff)
download: rgensvm-004941896bac692d354c41a3334d20ee1d4627f7.tar.gz
rgensvm-004941896bac692d354c41a3334d20ee1d4627f7.zip
1 files changed, 77 insertions, 0 deletions
diff --git a/R/gensvm.maxabs.scale.R b/R/gensvm.maxabs.scale.R
new file mode 100644
index 0000000..6ac351b
--- /dev/null
+++ b/R/gensvm.maxabs.scale.R
@@ -0,0 +1,77 @@
+#' @title Scale each column of a matrix by its maximum absolute value
+#'
+#' @description Scaling a dataset can creatly decrease the computation time of 
+#' GenSVM. This function scales the data by dividing each column of a matrix by 
+#' the maximum absolute value of that column. This preserves sparsity in the 
+#' data while mapping each column to the interval [-1, 1].
+#'
+#' Optionally a test dataset can be provided as well. In this case, the scaling 
+#' will be computed on the first argument (\code{x}) and applied to the test 
+#' dataset. Note that the return value is a list when this argument is 
+#' supplied.
+#'
+#' @param x a matrix to scale
+#' @param x.test (optional) a test matrix to scale as well.
+#'
+#' @return if x.test=NULL a scaled matrix where the maximum value of the 
+#' columns is 1 and the minimum value of the columns isn't below -1. If x.test 
+#' is supplied, a list with elements \code{x} and \code{x.test} representing 
+#' the scaled datasets.
+#'
+#' @author
+#' Gerrit J.J. van den Burg, Patrick J.F. Groenen \cr
+#' Maintainer: Gerrit J.J. van den Burg <gertjanvandenburg@gmail.com>
+#'
+#' @references
+#' Van den Burg, G.J.J. and Groenen, P.J.F. (2016). \emph{GenSVM: A Generalized 
+#' Multiclass Support Vector Machine}, Journal of Machine Learning Research, 
+#' 17(225):1--42. URL \url{http://jmlr.org/papers/v17/14-526.html}.
+#'
+#' @export
+#'
+#' @examples
+#' x <- iris[, -5]
+#'
+#' # check the min and max of the columns
+#' apply(x, 2, min)
+#' apply(x, 2, max)
+#'
+#' # scale the data
+#' x.scale <- gensvm.maxabs.scale(x)
+#'
+#' # check again (max should be 1.0, min shouldn't be below -1)
+#' apply(x.scale, 2, min)
+#' apply(x.scale, 2, max)
+#'
+#' # with a train and test dataset
+#' x <- iris[, -5]
+#' split <- gensvm.train.test.split(x)
+#' x.train <- split$x.train
+#' x.test <- split$x.test
+#' scaled <- gensvm.maxabs.scale(x.train, x.test)
+#' x.train.scl <- scaled$x
+#' x.test.scl <- scaled$x.test
+#'
+gensvm.maxabs.scale <- function(x, x.test=NULL)
+{
+    xm <- as.matrix(x)
+    max.abs <- apply(apply(xm, 2, abs), 2, max)
+    max.abs[max.abs == 0] <- 1
+
+    scaled <- xm %*% diag(1.0 / max.abs)
+    colnames(scaled) <- colnames(x)
+    rownames(scaled) <- rownames(x)
+
+    if (!is.null(x.test)) {
+        xtm <- as.matrix(x.test)
+        scaled.test <- xtm %*% diag(1.0 / max.abs)
+        colnames(scaled.test) <- colnames(x.test)
+        rownames(scaled.test) <- rownames(x.test)
+
+        ret.val <- list(x=scaled, x.test=scaled.test)
+    } else {
+        ret.val <- scaled
+    }
+
+    return(ret.val)
+}
author	Gertjan van den Burg <gertjanvandenburg@gmail.com>	2018-03-27 12:31:28 +0100
committer	Gertjan van den Burg <gertjanvandenburg@gmail.com>	2018-03-27 12:31:28 +0100
commit	004941896bac692d354c41a3334d20ee1d4627f7 (patch)
tree	2b11e42d8524843409e2bf8deb4ceb74c8b69347 /R/gensvm.maxabs.scale.R
parent	updates to GenSVM C library (diff)
download	rgensvm-004941896bac692d354c41a3334d20ee1d4627f7.tar.gz rgensvm-004941896bac692d354c41a3334d20ee1d4627f7.zip