From 993c503ce1b440be6947bc91fbf1fa6098569b51 Mon Sep 17 00:00:00 2001 From: Gertjan van den Burg Date: Mon, 9 May 2016 20:45:06 +0200 Subject: use gensvm namespace for all crossval/timer/util --- src/gensvm_crossval.c | 143 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 143 insertions(+) create mode 100644 src/gensvm_crossval.c (limited to 'src/gensvm_crossval.c') diff --git a/src/gensvm_crossval.c b/src/gensvm_crossval.c new file mode 100644 index 0000000..864e692 --- /dev/null +++ b/src/gensvm_crossval.c @@ -0,0 +1,143 @@ +/** + * @file crossval.c + * @author Gertjan van den Burg + * @date January 7, 2014 + * @brief Functions for cross validation + * + * @details + * This file contains functions for performing cross validation. The funtion + * gensvm_make_cv_split() creates a cross validation vector for non-stratified + * cross validation. The function gensvm_get_tt_split() creates a train and + * test dataset from a given dataset and a pre-determined CV partition vector. + * See individual function documentation for details. + * + */ + +#include "gensvm.h" +#include "gensvm_crossval.h" +#include "gensvm_matrix.h" + +/** + * @brief Create a cross validation split vector + * + * @details + * A pre-allocated vector of length N is created which can be used to define + * cross validation splits. The folds are contain between + * @f$ \lfloor N / folds \rfloor @f$ and @f$ \lceil N / folds \rceil @f$ + * instances. An instance is mapped to a partition randomly until all folds + * contain @f$ N \% folds @f$ instances. The zero fold then contains + * @f$ N / folds + N \% folds @f$ instances. These remaining @f$ N \% folds @f$ + * instances are then distributed over the first @f$ N \% folds @f$ folds. + * + * @param[in] N number of instances + * @param[in] folds number of folds + * @param[in,out] cv_idx array of size N which contains the fold index + * for each observation on exit + * + */ +void gensvm_make_cv_split(long N, long folds, long *cv_idx) +{ + long i, j, idx; + + for (i=0; in; + long m = full_data->m; + long K = full_data->K; + + double value; + + test_n = 0; + for (i=0; in = test_n; + train_data->n = train_n; + + train_data->K = K; + test_data->K = K; + + train_data->m = m; + test_data->m = m; + + train_data->y = Calloc(long, train_n); + test_data->y = Calloc(long, test_n); + + train_data->RAW = Calloc(double, train_n*(m+1)); + test_data->RAW = Calloc(double, test_n*(m+1)); + + k = 0; + l = 0; + for (i=0; iy[k] = full_data->y[i]; + for (j=0; jRAW, m+1, i, j); + matrix_set(test_data->RAW, m+1, k, j, value); + } + k++; + } else { + train_data->y[l] = full_data->y[i]; + for (j=0; jRAW, m+1, i, j); + matrix_set(train_data->RAW, m+1, l, j, value); + } + l++; + } + } + + train_data->Z = train_data->RAW; + test_data->Z = test_data->RAW; +} -- cgit v1.2.3