R/gensvm.grid.R


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624

#' @title Cross-validated grid search for GenSVM
#'
#' @description This function performs a cross-validated grid search of the 
#' model parameters to find the best hyperparameter configuration for a given 
#' dataset. This function takes advantage of GenSVM's ability to use warm 
#' starts to speed up computation. The function uses the GenSVM C library for 
#' speed. 
#'
#' @param x training data matrix. We denote the size of this matrix by 
#' n_samples x n_features.
#' @param y training vector of class labels of length n_samples. The number of 
#' unique labels in this vector is denoted by n_classes.
#' @param param.grid String (\code{'tiny'}, \code{'small'}, or \code{'full'}) 
#' or data frame with parameter configurations to evaluate.  Typically this is 
#' the output of \code{expand.grid}. For more details, see "Using a Parameter 
#' Grid" below.
#' @param refit boolean variable. If true, the best model from cross validation 
#' is fitted again on the entire dataset.
#' @param scoring metric to use to evaluate the classifier performance during 
#' cross validation. The metric should be an R function that takes two 
#' arguments: y_true and y_pred and that returns a float such that higher 
#' values are better. If it is NULL, the accuracy score will be used.
#' @param cv the number of cross-validation folds to use or a vector with the 
#' same length as \code{y} where each unique value denotes a test split.
#' @param verbose integer to indicate the level of verbosity (higher is more 
#' verbose)
#' @param return.train.score whether or not to return the scores on the 
#' training splits
#'
#' @return A "gensvm.grid" S3 object with the following items:
#' \item{call}{Call that produced this object}
#' \item{param.grid}{Sorted version of the parameter grid used in training}
#' \item{cv.results}{A data frame with the cross validation results}
#' \item{best.estimator}{If refit=TRUE, this is the GenSVM model fitted with 
#' the best hyperparameter configuration, otherwise it is NULL}
#' \item{best.score}{Mean cross-validated test score for the model with the 
#' best hyperparameter configuration}
#' \item{best.params}{Parameter configuration that provided the highest mean 
#' cross-validated test score}
#' \item{best.index}{Row index of the cv.results data frame that corresponds to 
#' the best hyperparameter configuration}
#' \item{n.splits}{The number of cross-validation splits}
#' \item{n.objects}{The number of instances in the data}
#' \item{n.features}{The number of features of the data}
#' \item{n.classes}{The number of classes in the data}
#' \item{classes}{Array with the unique classes in the data}
#' \item{total.time}{Training time for the grid search}
#' \item{cv.idx}{Array with cross validation indices used to split the data}
#'
#' @section Using a Parameter Grid:
#' To evaluate certain parameter configurations, a data frame can be supplied 
#' to the \code{param.grid} argument of the function. Such a data frame can 
#' easily be generated using the R function \code{expand.grid}, or could be 
#' created through other ways to test specific parameter configurations.
#'
#' Three parameter grids are predefined:
#' \describe{
#' \item{\code{'tiny'}}{This parameter grid is generated by the function 
#' \code{\link{gensvm.load.tiny.grid}} and is the default parameter grid. It 
#' consists of parameter configurations that are likely to perform well on 
#' various datasets.}
#' \item{\code{'small'}}{This grid is generated by 
#' \code{\link{gensvm.load.small.grid}} and generates a data frame with 90 
#' configurations. It is typically fast to train but contains some 
#' configurations that are unlikely to perform well. It is included for 
#' educational purposes.}
#' \item{\code{'full'}}{This grid loads the parameter grid as used in the 
#' GenSVM paper. It consists of 342 configurations and is generated by the 
#' \code{\link{gensvm.load.full.grid}} function. Note that in the GenSVM paper 
#' cross validation was done with this parameter grid, but the final training 
#' step used \code{epsilon=1e-8}. The \code{\link{gensvm.refit}} function is 
#' useful in this scenario.}
#' }
#'
#' When you provide your own parameter grid, beware that only certain column 
#' names are allowed in the data frame corresponding to parameters for the 
#' GenSVM model. These names are:
#'
#' \describe{
#' \item{p}{Parameter for the lp norm. Must be in [1.0, 2.0].}
#' \item{kappa}{Parameter for the Huber hinge function. Must be larger than 
#' -1.}
#' \item{lambda}{Parameter for the regularization term. Must be larger than 0.}
#' \item{weights}{Instance weights specification. Allowed values are "unit" for 
#' unit weights and "group" for group-size correction weights}
#' \item{epsilon}{Stopping parameter for the algorithm. Must be larger than 0.}
#' \item{max.iter}{Maximum number of iterations of the algorithm. Must be 
#' larger than 0.}
#' \item{kernel}{The kernel to used, allowed values are "linear", "poly", 
#' "rbf", and "sigmoid". The default is "linear"}
#' \item{coef}{Parameter for the "poly" and "sigmoid" kernels. See the section 
#' "Kernels in GenSVM" in the code{ink{gensvm-package}} page for more info.}
#' \item{degree}{Parameter for the "poly" kernel. See the section "Kernels in 
#' GenSVM" in the code{ink{gensvm-package}} page for more info.}
#' \item{gamma}{Parameter for the "poly", "rbf", and "sigmoid" kernels. See the 
#' section "Kernels in GenSVM" in the code{ink{gensvm-package}} page for more 
#' info.}
#' }
#'
#' For variables that are not present in the \code{param.grid} data frame the 
#' default parameter values in the \code{\link{gensvm}} function will be used.
#'
#' Note that this function reorders the parameter grid to make the warm starts 
#' as efficient as possible, which is why the param.grid in the result will not 
#' be the same as the param.grid in the input.
#'
#' @note
#' 1. This function returns partial results when the computation is interrupted 
#' by the user.
#' 2. The score.time reported in the results only covers the time needed to 
#' compute the score from the predictions and true class labels. It does not 
#' include the time to compute the predictions themselves.
#'
#' @author
#' Gerrit J.J. van den Burg, Patrick J.F. Groenen \cr
#' Maintainer: Gerrit J.J. van den Burg <gertjanvandenburg@gmail.com>
#'
#' @references
#' Van den Burg, G.J.J. and Groenen, P.J.F. (2016). \emph{GenSVM: A Generalized 
#' Multiclass Support Vector Machine}, Journal of Machine Learning Research, 
#' 17(225):1--42. URL \url{https://jmlr.org/papers/v17/14-526.html}.
#'
#' @seealso
#' \code{\link{predict.gensvm.grid}}, \code{\link{print.gensvm.grid}}, 
#' \code{\link{plot.gensvm.grid}}, \code{\link{gensvm}}, 
#' \code{\link{gensvm-package}}
#'
#' @export
#'
#' @importFrom stats sd
#'
#' @examples
#' x <- iris[, -5]
#' y <- iris[, 5]
#'
#' \donttest{
#' # use the default parameter grid
#' grid <- gensvm.grid(x, y, verbose=TRUE)
#' }
#'
#' # use a smaller parameter grid
#' pg <- expand.grid(p=c(1.0, 1.5, 2.0), kappa=c(-0.9, 1.0), epsilon=c(1e-3))
#' grid <- gensvm.grid(x, y, param.grid=pg)
#'
#' # print the result
#' print(grid)
#'
#' \donttest{
#' # Using a custom scoring function (accuracy as percentage)
#' acc.pct <- function(yt, yp) { return (100 * sum(yt == yp) / length(yt)) }
#' grid <- gensvm.grid(x, y, scoring=acc.pct)
#'
#' # With RBF kernel and very verbose progress printing
#' pg <- expand.grid(kernel=c('rbf'), gamma=c(1e-2, 1e-1, 1, 1e1, 1e2),
#'                   lambda=c(1e-8, 1e-6), max.iter=c(5000))
#' grid <- gensvm.grid(x, y, param.grid=pg, verbose=2)
#' }
#'
gensvm.grid <- function(x, y, param.grid='tiny', refit=TRUE, scoring=NULL, cv=3, 
                        verbose=0, return.train.score=TRUE)
{
    call <- match.call()

    n.objects <- nrow(x)
    n.features <- ncol(x)
    n.classes <- length(unique(y))

    if (n.objects != length(y)) {
        cat("Error: x and y are not the same length.\n")
        return(invisible(NULL))
    }

    if (is.character(param.grid)) {
        if (param.grid == 'tiny') {
            param.grid <- gensvm.load.tiny.grid()
        } else if (param.grid == 'small') {
            param.grid <- gensvm.load.small.grid()
        } else if (param.grid == 'full') {
            param.grid <- gensvm.load.full.grid()
        }
    }

    # Validate the range of the values for the gridsearch
    if (!gensvm.validate.param.grid(param.grid))
        return(invisible(NULL))

    # Sort the parameter grid for efficient warm starts
    param.grid <- gensvm.sort.param.grid(param.grid)

    # Expand and convert the parameter grid for use in the C function
    C.param.grid <- gensvm.expand.param.grid(param.grid, n.features)

    # Convert labels to integers
    classes <- sort(unique(y))
    y.clean <- match(y, classes)

    if (is.vector(cv) && length(cv) == n.objects) {
        folds <- sort(unique(cv))
        cv.idx <- match(cv, folds) - 1
        n.splits <- length(folds)
    } else {
        cv.idx <- gensvm.generate.cv.idx(n.objects, cv[1])
        n.splits <- cv
    }

    results <- .Call("R_gensvm_grid",
                 data.matrix(x),
                 as.integer(y.clean),
                 as.matrix(C.param.grid),
                 as.integer(nrow(C.param.grid)),
                 as.integer(ncol(C.param.grid)),
                 as.integer(cv.idx),
                 as.integer(n.splits),
                 as.integer(verbose),
                 as.integer(n.objects),
                 as.integer(n.features),
                 as.integer(n.classes)
                 )

    cv.results <- gensvm.cv.results(results, param.grid, cv.idx, y.clean, 
                                    scoring, 
                                    return.train.score=return.train.score)

    best.index <- which.min(cv.results$rank.test.score)[1]
    if (!is.na(best.index)) { # can occur when user interrupts
        best.score <- cv.results$mean.test.score[best.index]
        best.params <- param.grid[best.index, , drop=F]
        # Remove duplicate attributes from best.params
        attr(best.params, "out.attrs") <- NULL
    } else {
        best.score <- NA
        best.params <- list()
    }

    if (refit && !is.na(best.index)) {
        gensvm.args <- as.list(best.params)
        # Stupid factors...
        if ("weights" %in% names(gensvm.args))
            gensvm.args$weights <- as.character(gensvm.args$weights)
        if ("kernel" %in% names(gensvm.args))
            gensvm.args$kernel <- as.character(gensvm.args$kernel)
        gensvm.args$x <- x
        gensvm.args$y <- y
        gensvm.args$verbose <- if(verbose>1) 1 else 0
        if (verbose > 1)
            cat("Refitting with best parameters ...\n")
        best.estimator <- do.call(gensvm, gensvm.args)
    } else {
        best.estimator <- NULL
    }

    object <- list(call = call, param.grid = param.grid, 
                   cv.results = cv.results, best.estimator = best.estimator, 
                   best.score = best.score, best.params = best.params, 
                   best.index = best.index, n.splits = n.splits, 
                   n.objects = n.objects, n.features = n.features, 
                   n.classes = n.classes, classes = classes, 
                   total.time = results$total.time, cv.idx = cv.idx)
    class(object) <- "gensvm.grid"
    return(object)
}

#' @title Load a tiny parameter grid for the GenSVM grid search
#'
#' @description This function returns a parameter grid to use in the GenSVM 
#' grid search. This grid was obtained by analyzing the experiments done for 
#' the GenSVM paper and selecting the configurations that achieve accuracy 
#' within the 95th percentile on over 90% of the datasets. It is a good start 
#' for a parameter search with a reasonably high chance of achieving good 
#' performance on most datasets.
#'
#' Note that this grid is only tested to work well in combination with the 
#' linear kernel.
#'
#' @author
#' Gerrit J.J. van den Burg, Patrick J.F. Groenen \cr
#' Maintainer: Gerrit J.J. van den Burg <gertjanvandenburg@gmail.com>
#'
#' @references
#' Van den Burg, G.J.J. and Groenen, P.J.F. (2016). \emph{GenSVM: A Generalized 
#' Multiclass Support Vector Machine}, Journal of Machine Learning Research, 
#' 17(225):1--42. URL \url{https://jmlr.org/papers/v17/14-526.html}.
#'
#' @export
#'
#' @seealso
#' \code{\link{gensvm.grid}}, \code{\link{gensvm.load.small.grid}}, 
#' \code{\link{gensvm.load.full.grid}}.
#'
gensvm.load.tiny.grid <- function()
{
    df <- data.frame(
                     p=c(2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 1.5, 2.0, 2.0),
                     kappa=c(5.0, 5.0, 0.5, 5.0, -0.9, 5.0, 0.5, -0.9, 0.5, 0.5),
                     lambda=c(2^-16, 2^-18, 2^-18, 2^-18, 2^-18, 2^-14, 2^-18,
                              2^-18, 2^-16, 2^-16),
                     weights=c('unit', 'unit', 'unit', 'group', 'unit',
                               'unit', 'group', 'unit', 'unit', 'group')
                     )
    return(df)
}

#' @title Load a large parameter grid for the GenSVM grid search
#'
#' @description This loads the parameter grid from the GenSVM paper. It 
#' consists of 342 configurations and is constructed from all possible 
#' combinations of the following parameter sets:
#'
#' \code{p = c(1.0, 1.5, 2.0)}
#' \code{lambda = 2^seq(-18, 18, 2)}
#' \code{kappa = c(-0.9, 0.5, 5.0)}
#' \code{weights = c('unit', 'group')}
#'
#' @author
#' Gerrit J.J. van den Burg, Patrick J.F. Groenen \cr
#' Maintainer: Gerrit J.J. van den Burg <gertjanvandenburg@gmail.com>
#'
#' @references
#' Van den Burg, G.J.J. and Groenen, P.J.F. (2016). \emph{GenSVM: A Generalized 
#' Multiclass Support Vector Machine}, Journal of Machine Learning Research, 
#' 17(225):1--42. URL \url{https://jmlr.org/papers/v17/14-526.html}.
#'
#' @export
#'
#' @seealso
#' \code{\link{gensvm.grid}}, \code{\link{gensvm.load.tiny.grid}}, 
#' \code{\link{gensvm.load.full.grid}}.
#'
gensvm.load.full.grid <- function()
{
    df <- expand.grid(p=c(1.0, 1.5, 2.0), lambda=2^seq(-18, 18, 2),
                      kappa=c(-0.9, 0.5, 5.0), weights=c('unit', 'group'),
                      epsilon=c(1e-6))
    return(df)
}


#' @title Load the small parameter grid for the GenSVM grid search
#'
#' @description This function loads a small parameter grid to use for the 
#' GenSVM gridsearch. It contains all possible combinations of the following 
#' parameter sets:
#'
#' \code{p = c(1.0, 1.5, 2.0)}
#' \code{lambda = c(1e-8, 1e-6, 1e-4, 1e-2, 1)}
#' \code{kappa = c(-0.9, 0.5, 5.0)}
#' \code{weights= c('unit', 'group')}
#'
#' @author
#' Gerrit J.J. van den Burg, Patrick J.F. Groenen \cr
#' Maintainer: Gerrit J.J. van den Burg <gertjanvandenburg@gmail.com>
#'
#' @references
#' Van den Burg, G.J.J. and Groenen, P.J.F. (2016). \emph{GenSVM: A Generalized 
#' Multiclass Support Vector Machine}, Journal of Machine Learning Research, 
#' 17(225):1--42. URL \url{https://jmlr.org/papers/v17/14-526.html}.
#'
#' @export
#'
#' @seealso
#' \code{\link{gensvm.grid}}, \code{\link{gensvm.load.tiny.grid}}, 
#' \code{\link{gensvm.load.small.grid}}.
#'
gensvm.load.small.grid <- function()
{
    df <- expand.grid(p=c(1.0, 1.5, 2.0), lambda=c(1e-8, 1e-6, 1e-4, 1e-2, 1),
                      kappa=c(-0.9, 0.5, 5.0), weights=c('unit', 'group'))
    return(df)
}


#' @title Generate a vector of cross-validation indices
#'
#' @description
#' This function generates a vector of length \code{n} with values from 0 to 
#' \code{folds-1} to mark train and test splits.
#'
#' @param n the number of instances
#' @param folds the number of cross validation folds
#'
#' @return an array of length \code{n} with values in the range [0, folds-1] 
#' indicating the test fold of each instance.
#'
#' @author
#' Gerrit J.J. van den Burg, Patrick J.F. Groenen \cr
#' Maintainer: Gerrit J.J. van den Burg <gertjanvandenburg@gmail.com>
#'
#' @references
#' Van den Burg, G.J.J. and Groenen, P.J.F. (2016). \emph{GenSVM: A Generalized 
#' Multiclass Support Vector Machine}, Journal of Machine Learning Research, 
#' 17(225):1--42. URL \url{https://jmlr.org/papers/v17/14-526.html}.
#'
#' @seealso
#' \code{\link{gensvm.grid}}
gensvm.generate.cv.idx <- function(n, folds)
{
    cv.idx <- matrix(0, n, 1)

    big.folds <- n %% folds
    small.fold.size <- n %/% folds

    j <- 0
    for (i in 0:(small.fold.size * folds)) {
        while (TRUE) {
            idx <- round(runif(1, 1, n))
            if (cv.idx[idx] == 0) {
                cv.idx[idx] <- j
                j <- j + 1
                j <- (j %% folds)
                break
            }
        }
    }

    j <- 1
    i <- 0
    while (i < big.folds) {
        if (cv.idx[j] == 0) {
            cv.idx[j] <- i
            i <- i + 1
        }
        j <- j + 1
    }

    return(cv.idx)
}

gensvm.cv.results <- function(results, param.grid, cv.idx, y.true, 
                                     scoring, return.train.score=TRUE)
{
    n.candidates <- nrow(param.grid)
    n.splits <- length(unique(cv.idx))

    score <- if(is.function(scoring)) scoring else gensvm.accuracy

    # Build names and initialize the data.frame
    names <- c("mean.fit.time", "mean.score.time", "mean.test.score")
    if (return.train.score)
        names <- c(names, "mean.train.score")
    for (param in names(param.grid)) {
        names <- c(names, sprintf("param.%s", param))
    }
    names <- c(names, "rank.test.score")
    for (idx in sort(unique(cv.idx))) {
        names <- c(names, sprintf("split%i.test.score", idx))
        if (return.train.score)
            names <- c(names, sprintf("split%i.train.score", idx))
    }
    names <- c(names, "std.fit.time", "std.score.time", "std.test.score")
    if (return.train.score)
        names <- c(names, "std.train.score")

    df <- data.frame(matrix(ncol=length(names), nrow=n.candidates))
    colnames(df) <- names

    for (pidx in 1:n.candidates) {
        param <- param.grid[pidx, , drop=F]
        durations <- results$durations[pidx, ]
        predictions <- results$predictions[pidx, ]

        fit.times <- durations
        score.times <- c()
        test.scores <- c()
        train.scores <- c()

        is.missing <- any(is.na(durations))

        for (test.idx in sort(unique(cv.idx))) {
            score.time <- 0

            if (return.train.score) {
                y.train.pred <- predictions[cv.idx != test.idx]
                y.train.true <- y.true[cv.idx != test.idx]

                start.time <- proc.time()
                train.score <- score(y.train.true, y.train.pred)
                stop.time <- proc.time()
                score.time <- score.time + (stop.time - start.time)[3]

                train.scores <- c(train.scores, train.score)
            }

            y.test.pred <- predictions[cv.idx == test.idx]
            y.test.true <- y.true[cv.idx == test.idx]

            start.time <- proc.time()
            test.score <- score(y.test.true, y.test.pred)
            stop.time <- proc.time()
            score.time <- score.time + (stop.time - start.time)[3]

            test.scores <- c(test.scores, test.score)

            score.times <- c(score.times, score.time)
        }

        df$mean.fit.time[pidx] <- mean(fit.times)
        df$mean.score.time[pidx] <- if(is.missing) NA else mean(score.times)
        df$mean.test.score[pidx] <- mean(test.scores)
        df$std.fit.time[pidx] <- sd(fit.times)
        df$std.score.time[pidx] <- if(is.missing) NA else sd(score.times)
        df$std.test.score[pidx] <- sd(test.scores)
        if (return.train.score) {
            df$mean.train.score[pidx] <- mean(train.scores)
            df$std.train.score[pidx] <- sd(train.scores)
        }

        for (parname in names(param.grid)) {
            header <- sprintf("param.%s", parname)
            val <- param[[parname]]
            if (is.factor(val))
                val <- levels(val)[val]
            df[[header]][pidx] <- val
        }

        j <- 1
        for (test.idx in sort(unique(cv.idx))) {
            lbl <- sprintf("split%i.test.score", test.idx)
            df[[lbl]][pidx] <- test.scores[j]
            if (return.train.score) {
                lbl <- sprintf("split%i.train.score", test.idx)
                df[[lbl]][pidx] <- train.scores[j]
            }
            j <- j + 1
        }
    }

    df$rank.test.score <- gensvm.rank.score(df$mean.test.score)

    return(df)
}

gensvm.sort.param.grid <- function(param.grid)
{
    all.cols <- c("kernel", "coef", "degree", "gamma", "weights", "kappa",
                  "lambda", "p", "epsilon", "max.iter")

    order.args <- NULL
    for (name in all.cols) {
        if (name %in% colnames(param.grid)) {
            if (name == "epsilon")
                order.args <- cbind(order.args, -param.grid[[name]])
            else
                order.args <- cbind(order.args, param.grid[[name]])
        }
    }
    sorted.pg <- param.grid[do.call(order, as.list(as.data.frame(order.args))), ]

    rownames(sorted.pg) <- NULL

    return(sorted.pg)
}

gensvm.expand.param.grid <- function(pg, n.features)
{
    if ("kernel" %in% colnames(pg)) {
        all.kernels <- c("linear", "poly", "rbf", "sigmoid")
        pg$kernel <- match(pg$kernel, all.kernels) - 1
    } else {
        pg$kernel <- 0
    }

    if ("weights" %in% colnames(pg)) {
        all.weights <- c("unit", "group")
        pg$weights <- match(pg$weights, all.weights)
    } else {
        pg$weights <- 1
    }

    if ("gamma" %in% colnames(pg)) {
        pg$gamma[pg$gamma == "auto"] <- 1.0/n.features
    } else {
        pg$gamma <- 1.0/n.features
    }

    if (!("degree" %in% colnames(pg)))
        pg$degree <- 2.0
    if (!("coef" %in% colnames(pg)))
        pg$coef <- 0.0
    if (!("p" %in% colnames(pg)))
        pg$p <- 1.0
    if (!("lambda" %in% colnames(pg)))
        pg$lambda <- 1e-8
    if (!("kappa" %in% colnames(pg)))
        pg$kappa <- 0.0
    if (!("epsilon" %in% colnames(pg)))
        pg$epsilon <- 1e-6
    if (!("max.iter" %in% colnames(pg)))
        pg$max.iter <- 1e8

    C.param.grid <- data.frame(kernel=pg$kernel, coef=pg$coef, 
                               degree=pg$degree, gamma=pg$gamma, 
                               weights=pg$weights, kappa=pg$kappa, 
                               lambda=pg$lambda, p=pg$p, epsilon=pg$epsilon, 
                               max.iter=pg$max.iter)
    return(C.param.grid)
}

#' @title Compute the ranks for the numbers in a given vector
#'
#' @description This function computes the ranks for the values in an array. 
#' The highest value gets the smallest rank. Ties are broken by assigning the 
#' smallest value. The smallest rank is 1.
#'
#' @param x array of numeric values
#'
#' @return array with the ranks of the values in the input array.
#'
gensvm.rank.score <- function(x)
{
    x <- as.array(x)
    l <- length(x)
    r <- 1
    ranks <- as.vector(matrix(0, l, 1))
    ranks[which(is.na(x))] <- NA
    while (!all(mapply(is.na, x))) {
        m <- max(x, na.rm=T)
        idx <- which(x == m)
        ranks[idx] <- r
        r <- r + length(idx)
        x[idx] <- NA
    }

    return(ranks)
}