From ddbd423f54e2fd92659a0d277ee844659eee8ba1 Mon Sep 17 00:00:00 2001 From: Gertjan van den Burg Date: Wed, 15 Jan 2014 00:35:21 +0100 Subject: added documentation, restart git usage, start implementing kernels --- src/crossval.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 59 insertions(+), 4 deletions(-) (limited to 'src/crossval.c') diff --git a/src/crossval.c b/src/crossval.c index 9a3c1cc..10e3051 100644 --- a/src/crossval.c +++ b/src/crossval.c @@ -1,7 +1,40 @@ +/** + * @file crossval.c + * @author Gertjan van den Burg + * @date January 7, 2014 + * @brief Functions for cross validation + * + * @details + * This file contains functions for performing cross validation. The funtion + * msvmmaj_make_cv_split() creates a cross validation vector for non-stratified + * cross validation. The function msvmmaj_get_tt_split() creates a train and + * test dataset from a given dataset and a pre-determined CV partition vector. + * See individual function documentation for details. + * + */ + #include "crossval.h" -#include "matrix.h" -#include "MSVMMaj.h" +#include "msvmmaj.h" +#include "msvmmaj_matrix.h" +/** + * @brief Create a cross validation split vector + * + * @details + * A pre-allocated vector of length N is created which can be used to define + * cross validation splits. The folds are contain between + * @f$ \lfloor N / folds \rfloor @f$ and @f$ \lceil N / folds \rceil @f$ + * instances. An instance is mapped to a partition randomly until all folds + * contain @f$ N \% folds @f$ instances. The zero fold then contains + * @f$ N / folds + N \% folds @f$ instances. These remaining @f$ N \% folds @f$ + * instances are then distributed over the first @f$ N \% folds @f$ folds. + * + * @param[in] N number of instances + * @param[in] folds number of folds + * @param[in,out] cv_idx array of size N which contains the fold index + * for each observation on exit + * + */ void msvmmaj_make_cv_split(long N, long folds, long *cv_idx) { long i, j, idx; @@ -30,6 +63,26 @@ void msvmmaj_make_cv_split(long N, long folds, long *cv_idx) } } + +/** + * @brief Create train and test datasets for a CV split + * + * @details + * Given a MajData structure for the full dataset, a previously created + * cross validation split vector and a fold index, a training and test dataset + * are created. + * + * @param[in] full_data a MajData structure for the entire + * dataset + * @param[in,out] train_data an initialized MajData structure which + * on exit contains the training dataset + * @param[in,out] test_data an initialized MajData structure which + * on exit contains the test dataset + * @param[in] cv_idx a vector of cv partitions created by + * msvmmaj_make_cv_split() + * @param[in] fold_idx index of the fold which becomes the + * test dataset + */ void msvmmaj_get_tt_split(struct MajData *full_data, struct MajData *train_data, struct MajData *test_data, long *cv_idx, long fold_idx) { @@ -67,13 +120,15 @@ void msvmmaj_get_tt_split(struct MajData *full_data, struct MajData *train_data, test_data->y[k] = full_data->y[i]; for (j=0; jZ, m+1, k, j, - matrix_get(full_data->Z, m+1, i, j)); + matrix_get(full_data->Z, m+1, + i, j)); k++; } else { train_data->y[l] = full_data->y[i]; for (j=0; jZ, m+1, l, j, - matrix_get(full_data->Z, m+1, i, j)); + matrix_get(full_data->Z, m+1, + i, j)); l++; } } -- cgit v1.2.3