diff options
| author | Gertjan van den Burg <gertjanvandenburg@gmail.com> | 2018-03-27 12:31:28 +0100 |
|---|---|---|
| committer | Gertjan van den Burg <gertjanvandenburg@gmail.com> | 2018-03-27 12:31:28 +0100 |
| commit | 004941896bac692d354c41a3334d20ee1d4627f7 (patch) | |
| tree | 2b11e42d8524843409e2bf8deb4ceb74c8b69347 /R/gensvm.grid.R | |
| parent | updates to GenSVM C library (diff) | |
| download | rgensvm-004941896bac692d354c41a3334d20ee1d4627f7.tar.gz rgensvm-004941896bac692d354c41a3334d20ee1d4627f7.zip | |
GenSVM R package
Diffstat (limited to 'R/gensvm.grid.R')
| -rw-r--r-- | R/gensvm.grid.R | 626 |
1 files changed, 563 insertions, 63 deletions
diff --git a/R/gensvm.grid.R b/R/gensvm.grid.R index 37e2f7f..5d27fde 100644 --- a/R/gensvm.grid.R +++ b/R/gensvm.grid.R @@ -3,44 +3,17 @@ #' @description This function performs a cross-validated grid search of the #' model parameters to find the best hyperparameter configuration for a given #' dataset. This function takes advantage of GenSVM's ability to use warm -#' starts to speed up computation. The function also uses the GenSVM C library -#' for speed. -#' -#' There are two ways to use this function: either by providing a data frame -#' with the parameter configurations to try or by giving each of the function -#' inputs a vector of values to evaluate. In the latter case all combinations -#' of the provided values will be used (i.e. the product set). +#' starts to speed up computation. The function uses the GenSVM C library for +#' speed. #' #' @param X training data matrix. We denote the size of this matrix by #' n_samples x n_features. #' @param y training vector of class labes of length n_samples. The number of #' unique labels in this vector is denoted by n_classes. -#' @param df Data frame with parameter configurations to evaluate. -#' If this is provided it overrides the other parameter ranges provided. The -#' data frame must provide *all* required columns, as described below. -#' @param p vector of values to try for the \eqn{p} hyperparameter -#' for the \eqn{\ell_p} norm in the loss function. All values should be on the -#' interval [1.0, 2.0]. -#' @param lambda vector of values for the regularization parameter -#' \eqn{\lambda} in the loss function. All values should be larger than 0. -#' @param kappa vector of values for the hinge function parameter in -#' the loss function. All values should be larger than -1. -#' @param weights vector of values for the instance weights. Values -#' should be either 'unit', 'group', or both. -#' @param kernel vector of values for the kernel type. Possible -#' values are: 'linear', 'rbf', 'poly', or 'sigmoid', or any combination of -#' these values. See the article \link[=gensvm-kernels]{Kernels in GenSVM} for -#' more information. -#' @param gamma kernel parameter for the 'rbf', 'poly', and 'sigmoid' kernels. -#' If it is 'auto', 1/n_features will be used. See the article -#' \link[=gensvm-kernels]{Kernels in GenSVM} for more information. -#' @param coef kernel parameter for the 'poly' and 'sigmoid' -#' kernels. See the article \link[=gensvm-kernels]{Kernels in GenSVM} for more -#' information. -#' @param degree kernel parameter for the 'poly' kernel. See the -#' article \link[=gensvm-kernels]{Kernels in GenSVM} for more information. -#' @param max.iter maximum number of iterations to run in the -#' optimization algorithm. +#' @param param.grid String (\code{'tiny'}, \code{'small'}, or \code{'full'}) +#' or data frame with parameter configurations to evaluate. Typically this is +#' the output of \code{expand.grid}. For more details, see "Using a Parameter +#' Grid" below. #' @param refit boolean variable. If true, the best model from cross validation #' is fitted again on the entire dataset. #' @param scoring metric to use to evaluate the classifier performance during @@ -49,29 +22,94 @@ #' values are better. If it is NULL, the accuracy score will be used. #' @param cv the number of cross-validation folds to use or a vector with the #' same length as \code{y} where each unique value denotes a test split. -#' @param verbose boolean variable to indicate whether training details should -#' be printed. +#' @param verbose integer to indicate the level of verbosity (higher is more +#' verbose) +#' @param return.train.score whether or not to return the scores on the +#' training splits #' #' @return A "gensvm.grid" S3 object with the following items: +#' \item{call}{Call that produced this object} +#' \item{param.grid}{Sorted version of the parameter grid used in training} #' \item{cv.results}{A data frame with the cross validation results} #' \item{best.estimator}{If refit=TRUE, this is the GenSVM model fitted with #' the best hyperparameter configuration, otherwise it is NULL} -#' \item{best.score}{Mean cross-validated score for the model with the best -#' hyperparameter configuration} +#' \item{best.score}{Mean cross-validated test score for the model with the +#' best hyperparameter configuration} #' \item{best.params}{Parameter configuration that provided the highest mean -#' cross-validated score} +#' cross-validated test score} #' \item{best.index}{Row index of the cv.results data frame that corresponds to #' the best hyperparameter configuration} #' \item{n.splits}{The number of cross-validation splits} +#' \item{n.objects}{The number of instances in the data} +#' \item{n.features}{The number of features of the data} +#' \item{n.classes}{The number of classes in the data} +#' \item{classes}{Array with the unique classes in the data} +#' \item{total.time}{Training time for the grid search} +#' \item{cv.idx}{Array with cross validation indices used to split the data} +#' +#' @section Using a Parameter Grid: +#' To evaluate certain paramater configurations, a data frame can be supplied +#' to the \code{param.grid} argument of the function. Such a data frame can +#' easily be generated using the R function \code{expand.grid}, or could be +#' created through other ways to test specific parameter configurations. +#' +#' Three parameter grids are predefined: +#' \describe{ +#' \item{\code{'tiny'}}{This parameter grid is generated by the function +#' \code{\link{gensvm.load.tiny.grid}} and is the default parameter grid. It +#' consists of parameter configurations that are likely to perform well on +#' various datasets.} +#' \item{\code{'small'}}{This grid is generated by +#' \code{\link{gensvm.load.small.grid}} and generates a data frame with 90 +#' configurations. It is typically fast to train but contains some +#' configurations that are unlikely to perform well. It is included for +#' educational purposes.} +#' \item{\code{'full'}}{This grid loads the parameter grid as used in the +#' GenSVM paper. It consists of 342 configurations and is generated by the +#' \code{\link{gensvm.load.full.grid}} function. Note that in the GenSVM paper +#' cross validation was done with this parameter grid, but the final training +#' step used \code{epsilon=1e-8}. The \code{\link{gensvm.refit}} function is +#' useful in this scenario.} +#' } #' +#' When you provide your own parameter grid, beware that only certain column +#' names are allowed in the data frame corresponding to parameters for the +#' GenSVM model. These names are: #' +#' \describe{ +#' \item{p}{Parameter for the lp norm. Must be in [1.0, 2.0].} +#' \item{kappa}{Parameter for the Huber hinge function. Must be larger than +#' -1.} +#' \item{lambda}{Parameter for the regularization term. Must be larger than 0.} +#' \item{weight}{Instance weight specification. Allowed values are "unit" for +#' unit weights and "group" for group-size correction weights} +#' \item{epsilon}{Stopping parameter for the algorithm. Must be larger than 0.} +#' \item{max.iter}{Maximum number of iterations of the algorithm. Must be +#' larger than 0.} +#' \item{kernel}{The kernel to used, allowed values are "linear", "poly", +#' "rbf", and "sigmoid". The default is "linear"} +#' \item{coef}{Parameter for the "poly" and "sigmoid" kernels. See the section +#' "Kernels in GenSVM" in the code{ink{gensvm-package}} page for more info.} +#' \item{degree}{Parameter for the "poly" kernel. See the section "Kernels in +#' GenSVM" in the code{ink{gensvm-package}} page for more info.} +#' \item{gamma}{Parameter for the "poly", "rbf", and "sigmoid" kernels. See the +#' section "Kernels in GenSVM" in the code{ink{gensvm-package}} page for more +#' info.} +#' } #' -#' @section Using a DataFrame: -#' ... +#' For variables that are not present in the \code{param.grid} data frame the +#' default parameter values in the \code{\link{gensvm}} function will be used. #' +#' Note that this function reorders the parameter grid to make the warm starts +#' as efficient as possible, which is why the param.grid in the result will not +#' be the same as the param.grid in the input. +#' +#' @note +#' This function returns partial results when the computation is interrupted by +#' the user. #' #' @author -#' Gerrit J.J. van den Burg, Patrick J.F. Groenen +#' Gerrit J.J. van den Burg, Patrick J.F. Groenen \cr #' Maintainer: Gerrit J.J. van den Burg <gertjanvandenburg@gmail.com> #' #' @references @@ -80,37 +118,499 @@ #' 17(225):1--42. URL \url{http://jmlr.org/papers/v17/14-526.html}. #' #' @seealso -#' \code{\link{coef}}, \code{\link{print}}, \code{\link{predict}}, -#' \code{\link{plot}}, and \code{\link{gensvm.grid}}. -#' +#' \code{\link{predict.gensvm.grid}}, \code{\link{print.gensvm.grid}}, and +#' \code{\link{gensvm}}. #' #' @export #' #' @examples -#' X <- -#' - -gensvm.grid <- function(X, y, - df=NULL, - p=c(1.0, 1.5, 2.0), - lambda=c(1e-8, 1e-6, 1e-4, 1e-2, 1), - kappa=c(-0.9, 0.5, 5.0), - weights=c('unit', 'group'), - kernel=c('linear'), - gamma=c('auto'), - coef=c(0.0), - degree=c(2.0), - max.iter=c(1e8), - refit=TRUE, - scoring=NULL, - cv=3, - verbose=TRUE) +#' x <- iris[, -5] +#' y <- iris[, 5] +#' +#' # use the default parameter grid +#' grid <- gensvm.grid(x, y) +#' +#' # use a smaller parameter grid +#' pg <- expand.grid(p=c(1.0, 1.5, 2.0), kappa=c(-0.9, 1.0), epsilon=c(1e-3)) +#' grid <- gensvm.grid(x, y, param.grid=pg) +#' +#' # print the result +#' print(grid) +#' +#' # Using a custom scoring function (accuracy as percentage) +#' acc.pct <- function(yt, yp) { return (100 * sum(yt == yp) / length(yt)) } +#' grid <- gensvm.grid(x, y, scoring=acc.pct) +#' +gensvm.grid <- function(X, y, param.grid='tiny', refit=TRUE, scoring=NULL, cv=3, + verbose=0, return.train.score=TRUE) { call <- match.call() + n.objects <- nrow(X) + n.features <- ncol(X) + n.classes <- length(unique(y)) + + if (is.character(param.grid)) { + if (param.grid == 'tiny') { + param.grid <- gensvm.load.tiny.grid() + } else if (param.grid == 'small') { + param.grid <- gensvm.load.small.grid() + } else if (param.grid == 'full') { + param.grid <- gensvm.load.full.grid() + } + } + + # Validate the range of the values for the gridsearch + gensvm.validate.param.grid(param.grid) + + # Sort the parameter grid for efficient warm starts + param.grid <- gensvm.sort.param.grid(param.grid) + + # Expand and convert the parameter grid for use in the C function + C.param.grid <- gensvm.expand.param.grid(param.grid, n.features) + + # Convert labels to integers + classes <- sort(unique(y)) + y.clean <- match(y, classes) + + if (is.vector(cv) && length(cv) == n.objects) { + folds <- sort(unique(cv)) + cv.idx <- match(cv, folds) - 1 + n.splits <- length(folds) + } else { + cv.idx <- gensvm.generate.cv.idx(n.objects, cv[1]) + n.splits <- cv + } + + results <- .Call("R_gensvm_grid", + as.matrix(X), + as.integer(y.clean), + as.matrix(C.param.grid), + as.integer(nrow(C.param.grid)), + as.integer(ncol(C.param.grid)), + as.integer(cv.idx), + as.integer(n.splits), + as.integer(verbose), + as.integer(n.objects), + as.integer(n.features), + as.integer(n.classes) + ) + + cv.results <- gensvm.cv.results(results, param.grid, cv.idx, + y.clean, scoring, + return.train.score=return.train.score) + best.index <- which.min(cv.results$rank.test.score)[1] + if (!is.na(best.index)) { # can occur when user interrupts + best.score <- cv.results$mean.test.score[best.index] + best.params <- param.grid[best.index, , drop=F] + # Remove duplicate attributes from best.params + attr(best.params, "out.attrs") <- NULL + } else { + best.score <- NA + best.params <- list() + } - object <- list(...) + if (refit && !is.na(best.index)) { + gensvm.args <- as.list(best.params) + gensvm.args$X <- X + gensvm.args$y <- y + best.estimator <- do.call(gensvm, gensvm.args) + } else { + best.estimator <- NULL + } + + object <- list(call = call, param.grid = param.grid, + cv.results = cv.results, best.estimator = best.estimator, + best.score = best.score, best.params = best.params, + best.index = best.index, n.splits = n.splits, + n.objects = n.objects, n.features = n.features, + n.classes = n.classes, classes = classes, + total.time = results$total.time, cv.idx = cv.idx) class(object) <- "gensvm.grid" return(object) } + +#' @title Load a tiny parameter grid for the GenSVM grid search +#' +#' @description This function returns a parameter grid to use in the GenSVM +#' grid search. This grid was obtained by analyzing the experiments done for +#' the GenSVM paper and selecting the configurations that achieve accuracy +#' within the 95th percentile on over 90% of the datasets. It is a good start +#' for a parameter search with a reasonably high chance of achieving good +#' performance on most datasets. +#' +#' Note that this grid is only tested to work well in combination with the +#' linear kernel. +#' +#' @author +#' Gerrit J.J. van den Burg, Patrick J.F. Groenen \cr +#' Maintainer: Gerrit J.J. van den Burg <gertjanvandenburg@gmail.com> +#' +#' @references +#' Van den Burg, G.J.J. and Groenen, P.J.F. (2016). \emph{GenSVM: A Generalized +#' Multiclass Support Vector Machine}, Journal of Machine Learning Research, +#' 17(225):1--42. URL \url{http://jmlr.org/papers/v17/14-526.html}. +#' +#' @export +#' +#' @seealso +#' \code{\link{gensvm.grid}}, \code{\link{gensvm.load.small.grid}}, +#' \code{\link{gensvm.load.full.grid}}. +#' +gensvm.load.tiny.grid <- function() +{ + df <- data.frame( + p=c(2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 1.5, 2.0, 2.0), + kappa=c(5.0, 5.0, 0.5, 5.0, -0.9, 5.0, 0.5, -0.9, 0.5, 0.5), + lambda = c(2^-16, 2^-18, 2^-18, 2^-18, 2^-18, 2^-14, 2^-18, + 2^-18, 2^-16, 2^-16), + weight = c('unit', 'unit', 'unit', 'group', 'unit', + 'unit', 'group', 'unit', 'unit', 'group') + ) + return(df) +} + +#' @title Load a large parameter grid for the GenSVM grid search +#' +#' @description This loads the parameter grid from the GenSVM paper. It +#' consists of 342 configurations and is constructed from all possible +#' combinations of the following parameter sets: +#' +#' \code{p = c(1.0, 1.5, 2.0)} +#' +#' \code{lambda = 2^seq(-18, 18, 2)} +#' +#' \code{kappa = c(-0.9, 0.5, 5.0)} +#' +#' \code{weight = c('unit', 'group')} +#' +#' @author +#' Gerrit J.J. van den Burg, Patrick J.F. Groenen \cr +#' Maintainer: Gerrit J.J. van den Burg <gertjanvandenburg@gmail.com> +#' +#' @references +#' Van den Burg, G.J.J. and Groenen, P.J.F. (2016). \emph{GenSVM: A Generalized +#' Multiclass Support Vector Machine}, Journal of Machine Learning Research, +#' 17(225):1--42. URL \url{http://jmlr.org/papers/v17/14-526.html}. +#' +#' @export +#' +#' @seealso +#' \code{\link{gensvm.grid}}, \code{\link{gensvm.load.tiny.grid}}, +#' \code{\link{gensvm.load.full.grid}}. +#' +gensvm.load.full.grid <- function() +{ + df <- expand.grid(p=c(1.0, 1.5, 2.0), lambda=2^seq(-18, 18, 2), + kappa=c(-0.9, 0.5, 5.0), weight=c('unit', 'group'), + epsilon=c(1e-6)) + return(df) +} + + +#' @title Load the default parameter grid for the GenSVM grid search +#' +#' @description This function loads a default parameter grid to use for the +#' GenSVM gridsearch. It contains all possible combinations of the following +#' parameter sets: +#' +#' \code{p = c(1.0, 1.5, 2.0)} +#' +#' \code{lambda = c(1e-8, 1e-6, 1e-4, 1e-2, 1)} +#' +#' \code{kappa = c(-0.9, 0.5, 5.0)} +#' +#' \code{weight = c('unit', 'group')} +#' +#' @author +#' Gerrit J.J. van den Burg, Patrick J.F. Groenen \cr +#' Maintainer: Gerrit J.J. van den Burg <gertjanvandenburg@gmail.com> +#' +#' @references +#' Van den Burg, G.J.J. and Groenen, P.J.F. (2016). \emph{GenSVM: A Generalized +#' Multiclass Support Vector Machine}, Journal of Machine Learning Research, +#' 17(225):1--42. URL \url{http://jmlr.org/papers/v17/14-526.html}. +#' +#' @export +#' +#' @seealso +#' \code{\link{gensvm.grid}}, \code{\link{gensvm.load.tiny.grid}}, +#' \code{\link{gensvm.load.small.grid}}. +#' +gensvm.load.small.grid <- function() +{ + df <- expand.grid(p=c(1.0, 1.5, 2.0), lambda=c(1e-8, 1e-6, 1e-4, 1e-2, 1), + kappa=c(-0.9, 0.5, 5.0), weight=c('unit', 'group')) + return(df) +} + + +#' Generate a vector of cross-validation indices +#' +#' This function generates a vector of length \code{n} with values from 0 to +#' \code{folds-1} to mark train and test splits. +#' +gensvm.generate.cv.idx <- function(n, folds) +{ + cv.idx <- matrix(0, n, 1) + + big.folds <- n %% folds + small.fold.size <- n %/% folds + + j <- 0 + for (i in 0:(small.fold.size * folds)) { + while (TRUE) { + idx <- round(runif(1, 1, n)) + if (cv.idx[idx] == 0) { + cv.idx[idx] <- j + j <- j + 1 + j <- (j %% folds) + break + } + } + } + + j <- 1 + i <- 0 + while (i < big.folds) { + if (cv.idx[j] == 0) { + cv.idx[j] <- i + i <- i + 1 + } + j <- j + 1 + } + + return(cv.idx) +} + +gensvm.validate.param.grid <- function(df) +{ + expected.colnames <- c("kernel", "coef", "degree", "gamma", "weight", + "kappa", "lambda", "p", "epsilon", "max.iter") + for (name in colnames(df)) { + if (!(name %in% expected.colnames)) { + stop("Invalid header name supplied in parameter grid: ", name) + } + } + + conditions <- list( + p=function(x) { x >= 1.0 && x <= 2.0 }, + kappa=function(x) { x > -1.0 }, + lambda=function(x) {x > 0.0 }, + epsilon=function(x) { x > 0.0 }, + gamma=function(x) { x != 0.0 }, + weight=function(x) { x %in% c("unit", "group") }, + kernel=function(x) { x %in% c("linear", "poly", "rbf", "sigmoid") } + ) + + for (idx in 1:nrow(df)) { + for (param in colnames(df)) { + if (!(param %in% names(conditions))) + next + func <- conditions[[param]] + value <- df[[param]][idx] + if (!func(value)) + stop("Invalid value in grid for parameter: ", param) + } + } +} + +gensvm.cv.results <- function(results, param.grid, cv.idx, y.true, + scoring, return.train.score=TRUE) +{ + n.candidates <- nrow(param.grid) + n.splits <- length(unique(cv.idx)) + + score <- if(is.function(scoring)) scoring else gensvm.accuracy + + # Build names and initialize the data.frame + names <- c("mean.fit.time", "mean.score.time", "mean.test.score") + if (return.train.score) + names <- c(names, "mean.train.score") + for (param in names(param.grid)) { + names <- c(names, sprintf("param.%s", param)) + } + names <- c(names, "rank.test.score") + for (idx in sort(unique(cv.idx))) { + names <- c(names, sprintf("split%i.test.score", idx)) + if (return.train.score) + names <- c(names, sprintf("split%i.train.score", idx)) + } + names <- c(names, "std.fit.time", "std.score.time", "std.test.score") + if (return.train.score) + names <- c(names, "std.train.score") + + df <- data.frame(matrix(ncol=length(names), nrow=n.candidates)) + colnames(df) <- names + + for (pidx in 1:n.candidates) { + param <- param.grid[pidx, , drop=F] + durations <- results$durations[pidx, ] + predictions <- results$predictions[pidx, ] + + fit.times <- durations + score.times <- c() + test.scores <- c() + train.scores <- c() + + is.missing <- any(is.na(durations)) + + for (test.idx in sort(unique(cv.idx))) { + score.time <- 0 + + if (return.train.score) { + y.train.pred <- predictions[cv.idx != test.idx] + y.train.true <- y.true[cv.idx != test.idx] + + start.time <- proc.time() + train.score <- score(y.train.true, y.train.pred) + stop.time <- proc.time() + score.time <- score.time + (stop.time - start.time)[3] + + train.scores <- c(train.scores, train.score) + } + + y.test.pred <- predictions[cv.idx == test.idx] + y.test.true <- y.true[cv.idx == test.idx] + + start.time <- proc.time() + test.score <- score(y.test.true, y.test.pred) + stop.time <- proc.time() + score.time <- score.time + (stop.time - start.time)[3] + + test.scores <- c(test.scores, test.score) + + score.times <- c(score.times, score.time) + } + + df$mean.fit.time[pidx] <- mean(fit.times) + df$mean.score.time[pidx] <- if(is.missing) NA else mean(score.times) + df$mean.test.score[pidx] <- mean(test.scores) + df$std.fit.time[pidx] <- sd(fit.times) + df$std.score.time[pidx] <- if(is.missing) NA else sd(score.times) + df$std.test.score[pidx] <- sd(test.scores) + if (return.train.score) { + df$mean.train.score[pidx] <- mean(train.scores) + df$std.train.score[pidx] <- sd(train.scores) + } + + for (parname in names(param.grid)) { + df[[sprintf("param.%s", parname)]][pidx] <- param[[parname]] + } + + j <- 1 + for (test.idx in sort(unique(cv.idx))) { + lbl <- sprintf("split%i.test.score", test.idx) + df[[lbl]][pidx] <- test.scores[j] + if (return.train.score) { + lbl <- sprintf("split%i.train.score", test.idx) + df[[lbl]][pidx] <- train.scores[j] + } + j <- j + 1 + } + } + + df$rank.test.score <- gensvm.rank.score(df$mean.test.score) + + return(df) +} + +gensvm.sort.param.grid <- function(param.grid) +{ + all.cols <- c("kernel", "coef", "degree", "gamma", "weight", "kappa", + "lambda", "p", "epsilon", "max.iter") + + order.args <- NULL + for (name in all.cols) { + if (name %in% colnames(param.grid)) { + if (name == "epsilon") + order.args <- cbind(order.args, -param.grid[[name]]) + else + order.args <- cbind(order.args, param.grid[[name]]) + } + } + sorted.pg <- param.grid[do.call(order, as.list(as.data.frame(order.args))), ] + + rownames(sorted.pg) <- NULL + + return(sorted.pg) +} + +gensvm.expand.param.grid <- function(pg, n.features) +{ + if ("kernel" %in% colnames(pg)) { + all.kernels <- c("linear", "poly", "rbf", "sigmoid") + pg$kernel <- match(pg$kernel, all.kernels) - 1 + } else { + pg$kernel <- 0 + } + + if ("weight" %in% colnames(pg)) { + all.weights <- c("unit", "group") + pg$weight <- match(pg$weight, all.weights) + } else { + pg$weight <- 1 + } + + if ("gamma" %in% colnames(pg)) { + pg$gamma[pg$gamma == "auto"] <- 1.0/n.features + } else { + pg$gamma <- 1.0/n.features + } + + if (!("degree" %in% colnames(pg))) + pg$degree <- 2.0 + if (!("coef" %in% colnames(pg))) + pg$coef <- 0.0 + if (!("p" %in% colnames(pg))) + pg$p <- 1.0 + if (!("lambda" %in% colnames(pg))) + pg$lambda <- 1e-8 + if (!("kappa" %in% colnames(pg))) + pg$kappa <- 0.0 + if (!("epsilon" %in% colnames(pg))) + pg$epsilon <- 1e-6 + if (!("max.iter" %in% colnames(pg))) + pg$max.iter <- 1e8 + + C.param.grid <- data.frame(kernel=pg$kernel, coef=pg$coef, + degree=pg$degree, gamma=pg$gamma, + weight=pg$weight, kappa=pg$kappa, + lambda=pg$lambda, p=pg$p, epsilon=pg$epsilon, + max.iter=pg$max.iter) + + return(C.param.grid) +} + +#' @title Compute the ranks for the numbers in a given vector +#' +#' @details +#' This function computes the ranks for the values in an array. The highest +#' value gets the smallest rank. Ties are broken by assigning the smallest +#' value. +#' +#' @param x array of numeric values +#' +#' @examples +#' x <- c(7, 0.1, 0.5, 0.1, 10, 100, 200) +#' gensvm.rank.score(x) +#' [ 4 6 5 6 3 2 1 ] +#' +gensvm.rank.score <- function(x) +{ + x <- as.array(x) + l <- length(x) + r <- 1 + ranks <- as.vector(matrix(0, l, 1)) + ranks[which(is.na(x))] <- NA + while (!all(mapply(is.na, x))) { + m <- max(x, na.rm=T) + idx <- which(x == m) + ranks[idx] <- r + r <- r + length(idx) + x[idx] <- NA + } + + return(ranks) +} |
