man/gensvm.train.test.split.Rd


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79

% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/gensvm.train.test.split.R
\name{gensvm.train.test.split}
\alias{gensvm.train.test.split}
\title{Create a train/test split of a dataset}
\usage{
gensvm.train.test.split(
  x,
  y = NULL,
  train.size = NULL,
  test.size = NULL,
  shuffle = TRUE,
  random.state = NULL,
  return.idx = FALSE
)
}
\arguments{
\item{x}{array to split}

\item{y}{another array to split (typically this is a vector)}

\item{train.size}{size of the training dataset. This can be provided as 
float or as int. If it's a float, it should be between 0.0 and 1.0 and 
represents the fraction of the dataset that should be placed in the training 
dataset.  If it's an int, it represents the exact number of samples in the 
training dataset. If it is NULL, the complement of \code{test.size} will be 
used.}

\item{test.size}{size of the test dataset. Similarly to train.size both a 
float or an int can be supplied. If it's NULL, the complement of train.size 
will be used. If both train.size and test.size are NULL, a default test.size 
of 0.25 will be used.}

\item{shuffle}{shuffle the rows or not}

\item{random.state}{seed for the random number generator (int)}

\item{return.idx}{whether or not to return the indices in the output}
}
\value{
a list with \code{x.train} and \code{x.test} splits of the \code{x} 
array provided. If \code{y} is provided, also \code{y.train} and 
\code{y.test}. If \code{return.idx} is TRUE, also \code{idx.train} and 
\code{idx.test}.
}
\description{
Often it is desirable to split a dataset into a training and 
testing sample. This function is included in GenSVM to make it easy to do 
so. The function is inspired by a similar function in Scikit-Learn.
}
\examples{
x <- iris[, -5]
y <- iris[, 5]

# using the default values
split <- gensvm.train.test.split(x, y)

# using the split in a GenSVM model
fit <- gensvm(split$x.train, split$y.train)
gensvm.accuracy(split$y.test, predict(fit, split$x.test))

# using attach makes the results directly available
attach(gensvm.train.test.split(x, y))
fit <- gensvm(x.train, y.train)
gensvm.accuracy(y.test, predict(fit, x.test))

}
\references{
Van den Burg, G.J.J. and Groenen, P.J.F. (2016). \emph{GenSVM: A Generalized 
Multiclass Support Vector Machine}, Journal of Machine Learning Research, 
17(225):1--42. URL \url{https://jmlr.org/papers/v17/14-526.html}.
}
\seealso{
\code{\link{gensvm}}, \code{\link{gensvm-package}}
}
\author{
Gerrit J.J. van den Burg, Patrick J.F. Groenen \cr
Maintainer: Gerrit J.J. van den Burg <gertjanvandenburg@gmail.com>
}