Minor fixes

author: Gertjan van den Burg <gertjanvandenburg@gmail.com> 2018-03-30 22:08:12 +0100
committer: Gertjan van den Burg <gertjanvandenburg@gmail.com> 2018-03-30 22:08:12 +0100
commit: b56c98398db6b85411cc262a835ed44224d066f3 (patch)
tree: 97f864ba6c3d0c12693d8af40b1abe18e6a75687
parent: Fixes to get the input data from the call (diff)
download: rgensvm-b56c98398db6b85411cc262a835ed44224d066f3.tar.gz
rgensvm-b56c98398db6b85411cc262a835ed44224d066f3.zip
8 files changed, 72 insertions, 70 deletions
diff --git a/R/gensvm.R b/R/gensvm.R
index acc39ef..8994d3a 100644
--- a/R/gensvm.R
+++ b/R/gensvm.R
@@ -1,40 +1,41 @@
 #' @title Fit the GenSVM model
 #'
-#' @description Fits the Generalized Multiclass Support Vector Machine model 
-#' with the given parameters. See the package documentation 
+#' @description Fits the Generalized Multiclass Support Vector Machine model
+#' with the given parameters. See the package documentation
 #' (\code{\link{gensvm-package}}) for more general information about GenSVM.
 #'
 #' @param x data matrix with the predictors. \cr\cr
-#' Note that for SVMs categorical features should be converted to binary dummy 
-#' features. This can be done with using the \code{\link{model.matrix}} 
+#' Note that for SVMs categorical features should be converted to binary dummy
+#' features. This can be done with using the \code{\link{model.matrix}}
 #' function (i.e. \code{model.matrix( ~ var - 1)}).
 #' @param y class labels
 #' @param p parameter for the L_p norm of the loss function (1.0 <= p <= 2.0)
 #' @param lambda regularization parameter for the loss function (lambda > 0)
-#' @param kappa parameter for the hinge function in the loss function (kappa > 
+#' @param kappa parameter for the hinge function in the loss function (kappa >
 #' -1.0)
-#' @param weights type of instance weights to use. Options are 'unit' for unit 
-#' weights and 'group' for group size correction weight (eq. 4 in the paper).
-#' @param kernel the kernel type to use in the classifier. It must be one of 
-#' 'linear', 'poly', 'rbf', or 'sigmoid'. See the section "Kernels in GenSVM" 
+#' @param weights type or vector of instance weights to use. Options are 'unit'
+#' for unit weights and 'group' for group size correction weights (eq. 4 in the
+#' paper). Alternatively, a vector of weights can be provided.
+#' @param kernel the kernel type to use in the classifier. It must be one of
+#' 'linear', 'poly', 'rbf', or 'sigmoid'. See the section "Kernels in GenSVM"
 #' in \code{\link{gensvm-package}} for more info.
-#' @param gamma kernel parameter for the rbf, polynomial, and sigmoid kernel.  
+#' @param gamma kernel parameter for the rbf, polynomial, and sigmoid kernel.
 #' If gamma is 'auto', then 1/n_features will be used.
 #' @param coef parameter for the polynomial and sigmoid kernel.
 #' @param degree parameter for the polynomial kernel
-#' @param kernel.eigen.cutoff Cutoff point for the reduced eigendecomposition 
-#' used with kernel-GenSVM. Eigenvectors for which the ratio between their 
-#' corresponding eigenvalue and the largest eigenvalue is smaller than this 
+#' @param kernel.eigen.cutoff Cutoff point for the reduced eigendecomposition
+#' used with kernel-GenSVM. Eigenvectors for which the ratio between their
+#' corresponding eigenvalue and the largest eigenvalue is smaller than this
 #' cutoff value will be dropped.
 #' @param verbose Turn on verbose output and fit progress
-#' @param random.seed Seed for the random number generator (useful for 
+#' @param random.seed Seed for the random number generator (useful for
 #' reproducible output)
 #' @param max.iter Maximum number of iterations of the optimization algorithm.
-#' @param seed.V Matrix to warm-start the optimization algorithm. This is 
-#' typically the output of \code{coef(fit)}. Note that this function will 
+#' @param seed.V Matrix to warm-start the optimization algorithm. This is
+#' typically the output of \code{coef(fit)}. Note that this function will
 #' silently drop seed.V if the dimensions don't match the provided data.
 #'
-#' @return A "gensvm" S3 object is returned for which the print, predict, coef, 
+#' @return A "gensvm" S3 object is returned for which the print, predict, coef,
 #' and plot methods are available. It has the following items:
 #' \item{call}{The call that was used to construct the model.}
 #' \item{p}{The value of the lp norm in the loss function}
@@ -46,7 +47,7 @@
 #' \item{gamma}{The value of the gamma parameter of the kernel, if applicable}
 #' \item{coef}{The value of the coef parameter of the kernel, if applicable}
 #' \item{degree}{The degree of the kernel, if applicable}
-#' \item{kernel.eigen.cutoff}{The cutoff value of the reduced 
+#' \item{kernel.eigen.cutoff}{The cutoff value of the reduced
 #' eigendecomposition of the kernel matrix.}
 #' \item{verbose}{Whether or not the model was fitted with progress output}
 #' \item{random.seed}{The random seed used to seed the model.}
@@ -61,7 +62,7 @@
 #' \item{training.time}{Total training time}
 #'
 #' @note
-#' This function returns partial results when the computation is interrupted by 
+#' This function returns partial results when the computation is interrupted by
 #' the user.
 #'
 #' @author
@@ -69,12 +70,12 @@
 #' Maintainer: Gerrit J.J. van den Burg <gertjanvandenburg@gmail.com>
 #'
 #' @references
-#' Van den Burg, G.J.J. and Groenen, P.J.F. (2016). \emph{GenSVM: A Generalized 
-#' Multiclass Support Vector Machine}, Journal of Machine Learning Research, 
+#' Van den Burg, G.J.J. and Groenen, P.J.F. (2016). \emph{GenSVM: A Generalized
+#' Multiclass Support Vector Machine}, Journal of Machine Learning Research,
 #' 17(225):1--42. URL \url{http://jmlr.org/papers/v17/14-526.html}.
 #'
 #' @seealso
-#' \code{\link{coef}}, \code{\link{print}}, \code{\link{predict}}, 
+#' \code{\link{coef}}, \code{\link{print}}, \code{\link{predict}},
 #' \code{\link{plot}}, \code{\link{gensvm.grid}}, \code{\link{gensvm-package}}
 #'
 #' @export
@@ -109,9 +110,9 @@
 #' all.equal(coef(fit), coef(fit2))
 #'
 #'
-gensvm <- function(X, y, p=1.0, lambda=1e-8, kappa=0.0, epsilon=1e-6, 
-                   weights='unit', kernel='linear', gamma='auto', coef=1.0, 
-                   degree=2.0, kernel.eigen.cutoff=1e-8, verbose=FALSE, 
+gensvm <- function(x, y, p=1.0, lambda=1e-8, kappa=0.0, epsilon=1e-6,
+                   weights='unit', kernel='linear', gamma='auto', coef=1.0,
+                   degree=2.0, kernel.eigen.cutoff=1e-8, verbose=FALSE,
                    random.seed=NULL, max.iter=1e8, seed.V=NULL)
 {
     call <- match.call()
@@ -121,13 +122,13 @@ gensvm <- function(X, y, p=1.0, lambda=1e-8, kappa=0.0, epsilon=1e-6,
         return(invisible(NULL))
     }
 
-    # Generate the random.seed value in R if it is NULL. This way users can 
+    # Generate the random.seed value in R if it is NULL. This way users can
     # reproduce the run because it is returned in the output object.
     if (is.null(random.seed))
         random.seed <- runif(1) * (2**31 - 1)
 
-    n.objects <- nrow(X)
-    n.features <- ncol(X)
+    n.objects <- nrow(x)
+    n.features <- ncol(x)
     n.classes <- length(unique(y))
 
     # Convert labels to integers
@@ -154,7 +155,7 @@ gensvm <- function(X, y, p=1.0, lambda=1e-8, kappa=0.0, epsilon=1e-6,
 
     # Call the C train routine
     out <- .Call("R_gensvm_train",
-                  as.matrix(X),
+                  data.matrix(x),
                   as.integer(y.clean),
                   p,
                   lambda,
@@ -177,16 +178,16 @@ gensvm <- function(X, y, p=1.0, lambda=1e-8, kappa=0.0, epsilon=1e-6,
                   as.integer(n.classes))
 
     # build the output object
-    object <- list(call = call, p = p, lambda = lambda, kappa = kappa, 
-                   epsilon = epsilon, weights = weights, kernel = kernel, 
-                   gamma = gamma, coef = coef, degree = degree, 
-                   kernel.eigen.cutoff = kernel.eigen.cutoff, 
-                   verbose = verbose, random.seed = random.seed, 
-                   max.iter = max.iter, n.objects = n.objects, 
-                   n.features = n.features, n.classes = n.classes, 
-                   classes = classes, V = out$V, n.iter = out$n.iter, 
-                   n.support = out$n.support, 
-                   training.time = out$training.time,
+    object <- list(call = call, p = p, lambda = lambda, kappa = kappa,
+                   epsilon = epsilon, weights = weights, kernel = kernel,
+                   gamma = gamma, coef = coef, degree = degree,
+                   kernel.eigen.cutoff = kernel.eigen.cutoff,
+                   verbose = verbose, random.seed = random.seed,
+                   max.iter = max.iter, n.objects = n.objects,
+                   n.features = n.features, n.classes = n.classes,
+                   classes = classes, V = out$V, n.iter = out$n.iter,
+                   n.support = out$n.support,
+                   training.time = out$training.time)
     class(object) <- "gensvm"
 
     return(object)
diff --git a/R/gensvm.grid.R b/R/gensvm.grid.R
index c541ea0..5fa026e 100644
--- a/R/gensvm.grid.R
+++ b/R/gensvm.grid.R
@@ -6,7 +6,7 @@
 #' starts to speed up computation. The function uses the GenSVM C library for 
 #' speed. 
 #'
-#' @param X training data matrix. We denote the size of this matrix by 
+#' @param x training data matrix. We denote the size of this matrix by 
 #' n_samples x n_features.
 #' @param y training vector of class labes of length n_samples. The number of 
 #' unique labels in this vector is denoted by n_classes.
@@ -147,13 +147,13 @@
 #'                   lambda=c(1e-8, 1e-6), max.iter=c(5000))
 #' grid <- gensvm.grid(x, y, param.grid=pg, verbose=2)
 #'
-gensvm.grid <- function(X, y, param.grid='tiny', refit=TRUE, scoring=NULL, cv=3, 
+gensvm.grid <- function(x, y, param.grid='tiny', refit=TRUE, scoring=NULL, cv=3, 
                         verbose=0, return.train.score=TRUE)
 {
     call <- match.call()
 
-    n.objects <- nrow(X)
-    n.features <- ncol(X)
+    n.objects <- nrow(x)
+    n.features <- ncol(x)
     n.classes <- length(unique(y))
 
     if (n.objects != length(y)) {
@@ -195,7 +195,7 @@ gensvm.grid <- function(X, y, param.grid='tiny', refit=TRUE, scoring=NULL, cv=3,
     }
 
     results <- .Call("R_gensvm_grid",
-                 as.matrix(X),
+                 data.matrix(x),
                  as.integer(y.clean),
                  as.matrix(C.param.grid),
                  as.integer(nrow(C.param.grid)),
@@ -225,7 +225,7 @@ gensvm.grid <- function(X, y, param.grid='tiny', refit=TRUE, scoring=NULL, cv=3,
 
     if (refit && !is.na(best.index)) {
         gensvm.args <- as.list(best.params)
-        gensvm.args$X <- X
+        gensvm.args$x <- x
         gensvm.args$y <- y
         gensvm.args$verbose <- if(verbose>1) 1 else 0
         if (verbose > 1)
diff --git a/R/plot.gensvm.grid.R b/R/plot.gensvm.grid.R
index 6f042e9..abb0601 100644
--- a/R/plot.gensvm.grid.R
+++ b/R/plot.gensvm.grid.R
@@ -5,7 +5,6 @@
 #' \code{\link{plot.gensvm}} for more information.
 #'
 #' @param grid A \code{gensvm.grid} object trained with refit=TRUE
-#' @param x the dataset to plot
 #' @param ... further arguments are passed to the plot function
 #'
 #' @return returns the object passed as input
@@ -32,12 +31,12 @@
 #' grid <- gensvm.grid(x, y)
 #' plot(grid, x)
 #'
-plot.gensvm.grid <- function(grid, x, ...)
+plot.gensvm.grid <- function(grid, ...)
 {
     if (is.null(grid$best.estimator)) {
         cat("Error: Can't plot, the best.estimator element is NULL\n")
         return
     }
     fit <- grid$best.estimator
-    return(plot(fit, x, ...))
+    return(plot(fit, ...))
 }
diff --git a/man/gensvm.Rd b/man/gensvm.Rd
index 5aabcaa..e48444f 100644
--- a/man/gensvm.Rd
+++ b/man/gensvm.Rd
@@ -24,8 +24,9 @@ function (i.e. \code{model.matrix( ~ var - 1)}).}
 \item{kappa}{parameter for the hinge function in the loss function (kappa > 
 -1.0)}
 
-\item{weights}{type of instance weights to use. Options are 'unit' for unit 
-weights and 'group' for group size correction weight (eq. 4 in the paper).}
+\item{weights}{type or vector of instance weights to use. Options are 'unit' 
+for unit weights and 'group' for group size correction weights (eq. 4 in the 
+paper). Alternatively, a vector of weights can be provided.}
 
 \item{kernel}{the kernel type to use in the classifier. It must be one of 
 'linear', 'poly', 'rbf', or 'sigmoid'. See the section "Kernels in GenSVM" 
diff --git a/man/gensvm.grid.Rd b/man/gensvm.grid.Rd
index cc44286..b365e10 100644
--- a/man/gensvm.grid.Rd
+++ b/man/gensvm.grid.Rd
@@ -4,11 +4,11 @@
 \alias{gensvm.grid}
 \title{Cross-validated grid search for GenSVM}
 \usage{
-gensvm.grid(X, y, param.grid = "tiny", refit = TRUE, scoring = NULL,
+gensvm.grid(x, y, param.grid = "tiny", refit = TRUE, scoring = NULL,
   cv = 3, verbose = 0, return.train.score = TRUE)
 }
 \arguments{
-\item{X}{training data matrix. We denote the size of this matrix by 
+\item{x}{training data matrix. We denote the size of this matrix by 
 n_samples x n_features.}
 
 \item{y}{training vector of class labes of length n_samples. The number of 
diff --git a/man/plot.gensvm.Rd b/man/plot.gensvm.Rd
index a958c4d..9c15bea 100644
--- a/man/plot.gensvm.Rd
+++ b/man/plot.gensvm.Rd
@@ -4,18 +4,15 @@
 \alias{plot.gensvm}
 \title{Plot the simplex space of the fitted GenSVM model}
 \usage{
-\method{plot}{gensvm}(fit, x, y.true = NULL, with.margins = TRUE,
+\method{plot}{gensvm}(fit, y, x.test = NULL, with.margins = TRUE,
   with.shading = TRUE, with.legend = TRUE, center.plot = TRUE,
   xlim = NULL, ylim = NULL, ...)
 }
 \arguments{
 \item{fit}{A fitted \code{gensvm} object}
 
-\item{x}{the dataset to plot}
-
-\item{y.true}{the true data labels. If provided the objects will be colored 
-using the true labels instead of the predicted labels. This makes it easy to 
-identify misclassified objects.}
+\item{y}{the labels to color points with (if NULL the predicted labels are 
+used)}
 
 \item{with.margins}{plot the margins}
 
@@ -34,6 +31,8 @@ bounds will be used for the vertical axis and the value of center.plot will
 be ignored}
 
 \item{...}{further arguments are passed to the builtin plot() function}
+
+\item{x}{the dataset to plot (if NULL the training data is used)}
 }
 \value{
 returns the object passed as input
@@ -52,16 +51,16 @@ y <- iris[, 5]
 fit <- gensvm(x, y)
 
 # plot the simplex space
-plot(fit, x)
+plot(fit)
 
 # plot and use the true colors (easier to spot misclassified samples)
-plot(fit, x, y.true=y)
+plot(fit, y)
 
 # plot only misclassified samples
-x.mis <- x[predict(fit, x) != y, ]
-y.mis.true <- y[predict(fit, x) != y]
-plot(fit, x.mis)
-plot(fit, x.mis, y.true=y.mis.true)
+x.mis <- x[predict(fit) != y, ]
+y.mis.true <- y[predict(fit) != y]
+plot(fit, x.test=x.mis)
+plot(fit, y.mis.true, x.test=x.mis)
 
 }
 \author{
diff --git a/man/plot.gensvm.grid.Rd b/man/plot.gensvm.grid.Rd
index 0db01ec..3e5ef39 100644
--- a/man/plot.gensvm.grid.Rd
+++ b/man/plot.gensvm.grid.Rd
@@ -4,14 +4,14 @@
 \alias{plot.gensvm.grid}
 \title{Plot the simplex space of the best fitted model in the GenSVMGrid}
 \usage{
-\method{plot}{gensvm.grid}(grid, x, ...)
+\method{plot}{gensvm.grid}(grid, ...)
 }
 \arguments{
 \item{grid}{A \code{gensvm.grid} object trained with refit=TRUE}
 
-\item{x}{the dataset to plot}
-
 \item{...}{further arguments are passed to the plot function}
+
+\item{x}{the dataset to plot}
 }
 \value{
 returns the object passed as input
diff --git a/man/predict.gensvm.Rd b/man/predict.gensvm.Rd
index 2881e26..0b82662 100644
--- a/man/predict.gensvm.Rd
+++ b/man/predict.gensvm.Rd
@@ -5,13 +5,15 @@
 \alias{predict.gensvm}
 \title{Predict class labels with the GenSVM model}
 \usage{
-\method{predict}{gensvm}(fit, x.test, ...)
+\method{predict}{gensvm}(fit, newdata, add.rownames = FALSE, ...)
 }
 \arguments{
 \item{fit}{Fitted \code{gensvm} object}
 
-\item{x.test}{Matrix of new values for \code{x} for which predictions need 
-to be made.}
+\item{newdata}{Matrix of new data for which predictions need to be made.}
+
+\item{add.rownames}{add the rownames from the training data to the 
+predictions}
 
 \item{\dots}{further arguments are ignored}
 }
author	Gertjan van den Burg <gertjanvandenburg@gmail.com>	2018-03-30 22:08:12 +0100
committer	Gertjan van den Burg <gertjanvandenburg@gmail.com>	2018-03-30 22:08:12 +0100
commit	b56c98398db6b85411cc262a835ed44224d066f3 (patch)
tree	97f864ba6c3d0c12693d8af40b1abe18e6a75687
parent	Fixes to get the input data from the call (diff)
download	rgensvm-b56c98398db6b85411cc262a835ed44224d066f3.tar.gz rgensvm-b56c98398db6b85411cc262a835ed44224d066f3.zip