From 044dc5a93c33d7aa4c9c98a626890c16446a56fc Mon Sep 17 00:00:00 2001 From: Gertjan van den Burg Date: Mon, 16 May 2016 18:47:09 +0200 Subject: major refactor of the code --- src/gensvm_cv_util.c | 141 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 141 insertions(+) create mode 100644 src/gensvm_cv_util.c (limited to 'src/gensvm_cv_util.c') diff --git a/src/gensvm_cv_util.c b/src/gensvm_cv_util.c new file mode 100644 index 0000000..d9cde09 --- /dev/null +++ b/src/gensvm_cv_util.c @@ -0,0 +1,141 @@ +/** + * @file gensvm_cv_util.c + * @author Gertjan van den Burg + * @date January 7, 2014 + * @brief Functions for cross validation + * + * @details + * This file contains functions for performing cross validation. The funtion + * gensvm_make_cv_split() creates a cross validation vector for non-stratified + * cross validation. The function gensvm_get_tt_split() creates a train and + * test dataset from a given dataset and a pre-determined CV partition vector. + * See individual function documentation for details. + * + */ + +#include "gensvm_cv_util.h" + +/** + * @brief Create a cross validation split vector + * + * @details + * A pre-allocated vector of length N is created which can be used to define + * cross validation splits. The folds are contain between + * @f$ \lfloor N / folds \rfloor @f$ and @f$ \lceil N / folds \rceil @f$ + * instances. An instance is mapped to a partition randomly until all folds + * contain @f$ N \% folds @f$ instances. The zero fold then contains + * @f$ N / folds + N \% folds @f$ instances. These remaining @f$ N \% folds @f$ + * instances are then distributed over the first @f$ N \% folds @f$ folds. + * + * @param[in] N number of instances + * @param[in] folds number of folds + * @param[in,out] cv_idx array of size N which contains the fold index + * for each observation on exit + * + */ +void gensvm_make_cv_split(long N, long folds, long *cv_idx) +{ + long i, j, idx; + + for (i=0; in; + long m = full_data->m; + long K = full_data->K; + + double value; + + test_n = 0; + for (i=0; in = test_n; + train_data->n = train_n; + + train_data->K = K; + test_data->K = K; + + train_data->m = m; + test_data->m = m; + + train_data->y = Calloc(long, train_n); + test_data->y = Calloc(long, test_n); + + train_data->RAW = Calloc(double, train_n*(m+1)); + test_data->RAW = Calloc(double, test_n*(m+1)); + + k = 0; + l = 0; + for (i=0; iy[k] = full_data->y[i]; + for (j=0; jRAW, m+1, i, j); + matrix_set(test_data->RAW, m+1, k, j, value); + } + k++; + } else { + train_data->y[l] = full_data->y[i]; + for (j=0; jRAW, m+1, i, j); + matrix_set(train_data->RAW, m+1, l, j, value); + } + l++; + } + } + + train_data->Z = train_data->RAW; + test_data->Z = test_data->RAW; +} -- cgit v1.2.3