diff options
| author | Gertjan van den Burg <gertjanvandenburg@gmail.com> | 2018-03-30 22:08:12 +0100 |
|---|---|---|
| committer | Gertjan van den Burg <gertjanvandenburg@gmail.com> | 2018-03-30 22:08:12 +0100 |
| commit | b56c98398db6b85411cc262a835ed44224d066f3 (patch) | |
| tree | 97f864ba6c3d0c12693d8af40b1abe18e6a75687 | |
| parent | Fixes to get the input data from the call (diff) | |
| download | rgensvm-b56c98398db6b85411cc262a835ed44224d066f3.tar.gz rgensvm-b56c98398db6b85411cc262a835ed44224d066f3.zip | |
Minor fixes
| -rw-r--r-- | R/gensvm.R | 79 | ||||
| -rw-r--r-- | R/gensvm.grid.R | 12 | ||||
| -rw-r--r-- | R/plot.gensvm.grid.R | 5 | ||||
| -rw-r--r-- | man/gensvm.Rd | 5 | ||||
| -rw-r--r-- | man/gensvm.grid.Rd | 4 | ||||
| -rw-r--r-- | man/plot.gensvm.Rd | 23 | ||||
| -rw-r--r-- | man/plot.gensvm.grid.Rd | 6 | ||||
| -rw-r--r-- | man/predict.gensvm.Rd | 8 |
8 files changed, 72 insertions, 70 deletions
@@ -1,40 +1,41 @@ #' @title Fit the GenSVM model #' -#' @description Fits the Generalized Multiclass Support Vector Machine model -#' with the given parameters. See the package documentation +#' @description Fits the Generalized Multiclass Support Vector Machine model +#' with the given parameters. See the package documentation #' (\code{\link{gensvm-package}}) for more general information about GenSVM. #' #' @param x data matrix with the predictors. \cr\cr -#' Note that for SVMs categorical features should be converted to binary dummy -#' features. This can be done with using the \code{\link{model.matrix}} +#' Note that for SVMs categorical features should be converted to binary dummy +#' features. This can be done with using the \code{\link{model.matrix}} #' function (i.e. \code{model.matrix( ~ var - 1)}). #' @param y class labels #' @param p parameter for the L_p norm of the loss function (1.0 <= p <= 2.0) #' @param lambda regularization parameter for the loss function (lambda > 0) -#' @param kappa parameter for the hinge function in the loss function (kappa > +#' @param kappa parameter for the hinge function in the loss function (kappa > #' -1.0) -#' @param weights type of instance weights to use. Options are 'unit' for unit -#' weights and 'group' for group size correction weight (eq. 4 in the paper). -#' @param kernel the kernel type to use in the classifier. It must be one of -#' 'linear', 'poly', 'rbf', or 'sigmoid'. See the section "Kernels in GenSVM" +#' @param weights type or vector of instance weights to use. Options are 'unit' +#' for unit weights and 'group' for group size correction weights (eq. 4 in the +#' paper). Alternatively, a vector of weights can be provided. +#' @param kernel the kernel type to use in the classifier. It must be one of +#' 'linear', 'poly', 'rbf', or 'sigmoid'. See the section "Kernels in GenSVM" #' in \code{\link{gensvm-package}} for more info. -#' @param gamma kernel parameter for the rbf, polynomial, and sigmoid kernel. +#' @param gamma kernel parameter for the rbf, polynomial, and sigmoid kernel. #' If gamma is 'auto', then 1/n_features will be used. #' @param coef parameter for the polynomial and sigmoid kernel. #' @param degree parameter for the polynomial kernel -#' @param kernel.eigen.cutoff Cutoff point for the reduced eigendecomposition -#' used with kernel-GenSVM. Eigenvectors for which the ratio between their -#' corresponding eigenvalue and the largest eigenvalue is smaller than this +#' @param kernel.eigen.cutoff Cutoff point for the reduced eigendecomposition +#' used with kernel-GenSVM. Eigenvectors for which the ratio between their +#' corresponding eigenvalue and the largest eigenvalue is smaller than this #' cutoff value will be dropped. #' @param verbose Turn on verbose output and fit progress -#' @param random.seed Seed for the random number generator (useful for +#' @param random.seed Seed for the random number generator (useful for #' reproducible output) #' @param max.iter Maximum number of iterations of the optimization algorithm. -#' @param seed.V Matrix to warm-start the optimization algorithm. This is -#' typically the output of \code{coef(fit)}. Note that this function will +#' @param seed.V Matrix to warm-start the optimization algorithm. This is +#' typically the output of \code{coef(fit)}. Note that this function will #' silently drop seed.V if the dimensions don't match the provided data. #' -#' @return A "gensvm" S3 object is returned for which the print, predict, coef, +#' @return A "gensvm" S3 object is returned for which the print, predict, coef, #' and plot methods are available. It has the following items: #' \item{call}{The call that was used to construct the model.} #' \item{p}{The value of the lp norm in the loss function} @@ -46,7 +47,7 @@ #' \item{gamma}{The value of the gamma parameter of the kernel, if applicable} #' \item{coef}{The value of the coef parameter of the kernel, if applicable} #' \item{degree}{The degree of the kernel, if applicable} -#' \item{kernel.eigen.cutoff}{The cutoff value of the reduced +#' \item{kernel.eigen.cutoff}{The cutoff value of the reduced #' eigendecomposition of the kernel matrix.} #' \item{verbose}{Whether or not the model was fitted with progress output} #' \item{random.seed}{The random seed used to seed the model.} @@ -61,7 +62,7 @@ #' \item{training.time}{Total training time} #' #' @note -#' This function returns partial results when the computation is interrupted by +#' This function returns partial results when the computation is interrupted by #' the user. #' #' @author @@ -69,12 +70,12 @@ #' Maintainer: Gerrit J.J. van den Burg <gertjanvandenburg@gmail.com> #' #' @references -#' Van den Burg, G.J.J. and Groenen, P.J.F. (2016). \emph{GenSVM: A Generalized -#' Multiclass Support Vector Machine}, Journal of Machine Learning Research, +#' Van den Burg, G.J.J. and Groenen, P.J.F. (2016). \emph{GenSVM: A Generalized +#' Multiclass Support Vector Machine}, Journal of Machine Learning Research, #' 17(225):1--42. URL \url{http://jmlr.org/papers/v17/14-526.html}. #' #' @seealso -#' \code{\link{coef}}, \code{\link{print}}, \code{\link{predict}}, +#' \code{\link{coef}}, \code{\link{print}}, \code{\link{predict}}, #' \code{\link{plot}}, \code{\link{gensvm.grid}}, \code{\link{gensvm-package}} #' #' @export @@ -109,9 +110,9 @@ #' all.equal(coef(fit), coef(fit2)) #' #' -gensvm <- function(X, y, p=1.0, lambda=1e-8, kappa=0.0, epsilon=1e-6, - weights='unit', kernel='linear', gamma='auto', coef=1.0, - degree=2.0, kernel.eigen.cutoff=1e-8, verbose=FALSE, +gensvm <- function(x, y, p=1.0, lambda=1e-8, kappa=0.0, epsilon=1e-6, + weights='unit', kernel='linear', gamma='auto', coef=1.0, + degree=2.0, kernel.eigen.cutoff=1e-8, verbose=FALSE, random.seed=NULL, max.iter=1e8, seed.V=NULL) { call <- match.call() @@ -121,13 +122,13 @@ gensvm <- function(X, y, p=1.0, lambda=1e-8, kappa=0.0, epsilon=1e-6, return(invisible(NULL)) } - # Generate the random.seed value in R if it is NULL. This way users can + # Generate the random.seed value in R if it is NULL. This way users can # reproduce the run because it is returned in the output object. if (is.null(random.seed)) random.seed <- runif(1) * (2**31 - 1) - n.objects <- nrow(X) - n.features <- ncol(X) + n.objects <- nrow(x) + n.features <- ncol(x) n.classes <- length(unique(y)) # Convert labels to integers @@ -154,7 +155,7 @@ gensvm <- function(X, y, p=1.0, lambda=1e-8, kappa=0.0, epsilon=1e-6, # Call the C train routine out <- .Call("R_gensvm_train", - as.matrix(X), + data.matrix(x), as.integer(y.clean), p, lambda, @@ -177,16 +178,16 @@ gensvm <- function(X, y, p=1.0, lambda=1e-8, kappa=0.0, epsilon=1e-6, as.integer(n.classes)) # build the output object - object <- list(call = call, p = p, lambda = lambda, kappa = kappa, - epsilon = epsilon, weights = weights, kernel = kernel, - gamma = gamma, coef = coef, degree = degree, - kernel.eigen.cutoff = kernel.eigen.cutoff, - verbose = verbose, random.seed = random.seed, - max.iter = max.iter, n.objects = n.objects, - n.features = n.features, n.classes = n.classes, - classes = classes, V = out$V, n.iter = out$n.iter, - n.support = out$n.support, - training.time = out$training.time, + object <- list(call = call, p = p, lambda = lambda, kappa = kappa, + epsilon = epsilon, weights = weights, kernel = kernel, + gamma = gamma, coef = coef, degree = degree, + kernel.eigen.cutoff = kernel.eigen.cutoff, + verbose = verbose, random.seed = random.seed, + max.iter = max.iter, n.objects = n.objects, + n.features = n.features, n.classes = n.classes, + classes = classes, V = out$V, n.iter = out$n.iter, + n.support = out$n.support, + training.time = out$training.time) class(object) <- "gensvm" return(object) diff --git a/R/gensvm.grid.R b/R/gensvm.grid.R index c541ea0..5fa026e 100644 --- a/R/gensvm.grid.R +++ b/R/gensvm.grid.R @@ -6,7 +6,7 @@ #' starts to speed up computation. The function uses the GenSVM C library for #' speed. #' -#' @param X training data matrix. We denote the size of this matrix by +#' @param x training data matrix. We denote the size of this matrix by #' n_samples x n_features. #' @param y training vector of class labes of length n_samples. The number of #' unique labels in this vector is denoted by n_classes. @@ -147,13 +147,13 @@ #' lambda=c(1e-8, 1e-6), max.iter=c(5000)) #' grid <- gensvm.grid(x, y, param.grid=pg, verbose=2) #' -gensvm.grid <- function(X, y, param.grid='tiny', refit=TRUE, scoring=NULL, cv=3, +gensvm.grid <- function(x, y, param.grid='tiny', refit=TRUE, scoring=NULL, cv=3, verbose=0, return.train.score=TRUE) { call <- match.call() - n.objects <- nrow(X) - n.features <- ncol(X) + n.objects <- nrow(x) + n.features <- ncol(x) n.classes <- length(unique(y)) if (n.objects != length(y)) { @@ -195,7 +195,7 @@ gensvm.grid <- function(X, y, param.grid='tiny', refit=TRUE, scoring=NULL, cv=3, } results <- .Call("R_gensvm_grid", - as.matrix(X), + data.matrix(x), as.integer(y.clean), as.matrix(C.param.grid), as.integer(nrow(C.param.grid)), @@ -225,7 +225,7 @@ gensvm.grid <- function(X, y, param.grid='tiny', refit=TRUE, scoring=NULL, cv=3, if (refit && !is.na(best.index)) { gensvm.args <- as.list(best.params) - gensvm.args$X <- X + gensvm.args$x <- x gensvm.args$y <- y gensvm.args$verbose <- if(verbose>1) 1 else 0 if (verbose > 1) diff --git a/R/plot.gensvm.grid.R b/R/plot.gensvm.grid.R index 6f042e9..abb0601 100644 --- a/R/plot.gensvm.grid.R +++ b/R/plot.gensvm.grid.R @@ -5,7 +5,6 @@ #' \code{\link{plot.gensvm}} for more information. #' #' @param grid A \code{gensvm.grid} object trained with refit=TRUE -#' @param x the dataset to plot #' @param ... further arguments are passed to the plot function #' #' @return returns the object passed as input @@ -32,12 +31,12 @@ #' grid <- gensvm.grid(x, y) #' plot(grid, x) #' -plot.gensvm.grid <- function(grid, x, ...) +plot.gensvm.grid <- function(grid, ...) { if (is.null(grid$best.estimator)) { cat("Error: Can't plot, the best.estimator element is NULL\n") return } fit <- grid$best.estimator - return(plot(fit, x, ...)) + return(plot(fit, ...)) } diff --git a/man/gensvm.Rd b/man/gensvm.Rd index 5aabcaa..e48444f 100644 --- a/man/gensvm.Rd +++ b/man/gensvm.Rd @@ -24,8 +24,9 @@ function (i.e. \code{model.matrix( ~ var - 1)}).} \item{kappa}{parameter for the hinge function in the loss function (kappa > -1.0)} -\item{weights}{type of instance weights to use. Options are 'unit' for unit -weights and 'group' for group size correction weight (eq. 4 in the paper).} +\item{weights}{type or vector of instance weights to use. Options are 'unit' +for unit weights and 'group' for group size correction weights (eq. 4 in the +paper). Alternatively, a vector of weights can be provided.} \item{kernel}{the kernel type to use in the classifier. It must be one of 'linear', 'poly', 'rbf', or 'sigmoid'. See the section "Kernels in GenSVM" diff --git a/man/gensvm.grid.Rd b/man/gensvm.grid.Rd index cc44286..b365e10 100644 --- a/man/gensvm.grid.Rd +++ b/man/gensvm.grid.Rd @@ -4,11 +4,11 @@ \alias{gensvm.grid} \title{Cross-validated grid search for GenSVM} \usage{ -gensvm.grid(X, y, param.grid = "tiny", refit = TRUE, scoring = NULL, +gensvm.grid(x, y, param.grid = "tiny", refit = TRUE, scoring = NULL, cv = 3, verbose = 0, return.train.score = TRUE) } \arguments{ -\item{X}{training data matrix. We denote the size of this matrix by +\item{x}{training data matrix. We denote the size of this matrix by n_samples x n_features.} \item{y}{training vector of class labes of length n_samples. The number of diff --git a/man/plot.gensvm.Rd b/man/plot.gensvm.Rd index a958c4d..9c15bea 100644 --- a/man/plot.gensvm.Rd +++ b/man/plot.gensvm.Rd @@ -4,18 +4,15 @@ \alias{plot.gensvm} \title{Plot the simplex space of the fitted GenSVM model} \usage{ -\method{plot}{gensvm}(fit, x, y.true = NULL, with.margins = TRUE, +\method{plot}{gensvm}(fit, y, x.test = NULL, with.margins = TRUE, with.shading = TRUE, with.legend = TRUE, center.plot = TRUE, xlim = NULL, ylim = NULL, ...) } \arguments{ \item{fit}{A fitted \code{gensvm} object} -\item{x}{the dataset to plot} - -\item{y.true}{the true data labels. If provided the objects will be colored -using the true labels instead of the predicted labels. This makes it easy to -identify misclassified objects.} +\item{y}{the labels to color points with (if NULL the predicted labels are +used)} \item{with.margins}{plot the margins} @@ -34,6 +31,8 @@ bounds will be used for the vertical axis and the value of center.plot will be ignored} \item{...}{further arguments are passed to the builtin plot() function} + +\item{x}{the dataset to plot (if NULL the training data is used)} } \value{ returns the object passed as input @@ -52,16 +51,16 @@ y <- iris[, 5] fit <- gensvm(x, y) # plot the simplex space -plot(fit, x) +plot(fit) # plot and use the true colors (easier to spot misclassified samples) -plot(fit, x, y.true=y) +plot(fit, y) # plot only misclassified samples -x.mis <- x[predict(fit, x) != y, ] -y.mis.true <- y[predict(fit, x) != y] -plot(fit, x.mis) -plot(fit, x.mis, y.true=y.mis.true) +x.mis <- x[predict(fit) != y, ] +y.mis.true <- y[predict(fit) != y] +plot(fit, x.test=x.mis) +plot(fit, y.mis.true, x.test=x.mis) } \author{ diff --git a/man/plot.gensvm.grid.Rd b/man/plot.gensvm.grid.Rd index 0db01ec..3e5ef39 100644 --- a/man/plot.gensvm.grid.Rd +++ b/man/plot.gensvm.grid.Rd @@ -4,14 +4,14 @@ \alias{plot.gensvm.grid} \title{Plot the simplex space of the best fitted model in the GenSVMGrid} \usage{ -\method{plot}{gensvm.grid}(grid, x, ...) +\method{plot}{gensvm.grid}(grid, ...) } \arguments{ \item{grid}{A \code{gensvm.grid} object trained with refit=TRUE} -\item{x}{the dataset to plot} - \item{...}{further arguments are passed to the plot function} + +\item{x}{the dataset to plot} } \value{ returns the object passed as input diff --git a/man/predict.gensvm.Rd b/man/predict.gensvm.Rd index 2881e26..0b82662 100644 --- a/man/predict.gensvm.Rd +++ b/man/predict.gensvm.Rd @@ -5,13 +5,15 @@ \alias{predict.gensvm} \title{Predict class labels with the GenSVM model} \usage{ -\method{predict}{gensvm}(fit, x.test, ...) +\method{predict}{gensvm}(fit, newdata, add.rownames = FALSE, ...) } \arguments{ \item{fit}{Fitted \code{gensvm} object} -\item{x.test}{Matrix of new values for \code{x} for which predictions need -to be made.} +\item{newdata}{Matrix of new data for which predictions need to be made.} + +\item{add.rownames}{add the rownames from the training data to the +predictions} \item{\dots}{further arguments are ignored} } |
