aboutsummaryrefslogtreecommitdiff
path: root/src/crossval.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/crossval.c')
-rw-r--r--src/crossval.c63
1 files changed, 59 insertions, 4 deletions
diff --git a/src/crossval.c b/src/crossval.c
index 9a3c1cc..10e3051 100644
--- a/src/crossval.c
+++ b/src/crossval.c
@@ -1,7 +1,40 @@
+/**
+ * @file crossval.c
+ * @author Gertjan van den Burg
+ * @date January 7, 2014
+ * @brief Functions for cross validation
+ *
+ * @details
+ * This file contains functions for performing cross validation. The funtion
+ * msvmmaj_make_cv_split() creates a cross validation vector for non-stratified
+ * cross validation. The function msvmmaj_get_tt_split() creates a train and
+ * test dataset from a given dataset and a pre-determined CV partition vector.
+ * See individual function documentation for details.
+ *
+ */
+
#include "crossval.h"
-#include "matrix.h"
-#include "MSVMMaj.h"
+#include "msvmmaj.h"
+#include "msvmmaj_matrix.h"
+/**
+ * @brief Create a cross validation split vector
+ *
+ * @details
+ * A pre-allocated vector of length N is created which can be used to define
+ * cross validation splits. The folds are contain between
+ * @f$ \lfloor N / folds \rfloor @f$ and @f$ \lceil N / folds \rceil @f$
+ * instances. An instance is mapped to a partition randomly until all folds
+ * contain @f$ N \% folds @f$ instances. The zero fold then contains
+ * @f$ N / folds + N \% folds @f$ instances. These remaining @f$ N \% folds @f$
+ * instances are then distributed over the first @f$ N \% folds @f$ folds.
+ *
+ * @param[in] N number of instances
+ * @param[in] folds number of folds
+ * @param[in,out] cv_idx array of size N which contains the fold index
+ * for each observation on exit
+ *
+ */
void msvmmaj_make_cv_split(long N, long folds, long *cv_idx)
{
long i, j, idx;
@@ -30,6 +63,26 @@ void msvmmaj_make_cv_split(long N, long folds, long *cv_idx)
}
}
+
+/**
+ * @brief Create train and test datasets for a CV split
+ *
+ * @details
+ * Given a MajData structure for the full dataset, a previously created
+ * cross validation split vector and a fold index, a training and test dataset
+ * are created.
+ *
+ * @param[in] full_data a MajData structure for the entire
+ * dataset
+ * @param[in,out] train_data an initialized MajData structure which
+ * on exit contains the training dataset
+ * @param[in,out] test_data an initialized MajData structure which
+ * on exit contains the test dataset
+ * @param[in] cv_idx a vector of cv partitions created by
+ * msvmmaj_make_cv_split()
+ * @param[in] fold_idx index of the fold which becomes the
+ * test dataset
+ */
void msvmmaj_get_tt_split(struct MajData *full_data, struct MajData *train_data,
struct MajData *test_data, long *cv_idx, long fold_idx)
{
@@ -67,13 +120,15 @@ void msvmmaj_get_tt_split(struct MajData *full_data, struct MajData *train_data,
test_data->y[k] = full_data->y[i];
for (j=0; j<m+1; j++)
matrix_set(test_data->Z, m+1, k, j,
- matrix_get(full_data->Z, m+1, i, j));
+ matrix_get(full_data->Z, m+1,
+ i, j));
k++;
} else {
train_data->y[l] = full_data->y[i];
for (j=0; j<m+1; j++)
matrix_set(train_data->Z, m+1, l, j,
- matrix_get(full_data->Z, m+1, i, j));
+ matrix_get(full_data->Z, m+1,
+ i, j));
l++;
}
}