diff options
Diffstat (limited to 'src/gensvm_crossval.c')
| -rw-r--r-- | src/gensvm_crossval.c | 144 |
1 files changed, 0 insertions, 144 deletions
diff --git a/src/gensvm_crossval.c b/src/gensvm_crossval.c deleted file mode 100644 index 8f09cb5..0000000 --- a/src/gensvm_crossval.c +++ /dev/null @@ -1,144 +0,0 @@ -/** - * @file crossval.c - * @author Gertjan van den Burg - * @date January 7, 2014 - * @brief Functions for cross validation - * - * @details - * This file contains functions for performing cross validation. The funtion - * gensvm_make_cv_split() creates a cross validation vector for non-stratified - * cross validation. The function gensvm_get_tt_split() creates a train and - * test dataset from a given dataset and a pre-determined CV partition vector. - * See individual function documentation for details. - * - */ - -#include "globals.h" -#include "gensvm.h" -#include "gensvm_crossval.h" -#include "gensvm_matrix.h" - -/** - * @brief Create a cross validation split vector - * - * @details - * A pre-allocated vector of length N is created which can be used to define - * cross validation splits. The folds are contain between - * @f$ \lfloor N / folds \rfloor @f$ and @f$ \lceil N / folds \rceil @f$ - * instances. An instance is mapped to a partition randomly until all folds - * contain @f$ N \% folds @f$ instances. The zero fold then contains - * @f$ N / folds + N \% folds @f$ instances. These remaining @f$ N \% folds @f$ - * instances are then distributed over the first @f$ N \% folds @f$ folds. - * - * @param[in] N number of instances - * @param[in] folds number of folds - * @param[in,out] cv_idx array of size N which contains the fold index - * for each observation on exit - * - */ -void gensvm_make_cv_split(long N, long folds, long *cv_idx) -{ - long i, j, idx; - - for (i=0; i<N; i++) - cv_idx[i] = 0; - - long big_folds = N%folds; - long small_fold_size = N/folds; - - j = 0; - for (i=0; i<small_fold_size*folds; i++) - while (1) { - idx = rand()%N; - if (cv_idx[idx] == 0) { - cv_idx[idx] = j; - j++; - j%=folds; - break; - } - } - j = 0; - i = 0; - while (i < big_folds) { - if (cv_idx[j] == 0) { - cv_idx[j] = i++; - } - j++; - } -} - - -/** - * @brief Create train and test datasets for a CV split - * - * @details - * Given a GenData structure for the full dataset, a previously created - * cross validation split vector and a fold index, a training and test dataset - * are created. - * - * @param[in] full_data a GenData structure for the entire - * dataset - * @param[in,out] train_data an initialized GenData structure which - * on exit contains the training dataset - * @param[in,out] test_data an initialized GenData structure which - * on exit contains the test dataset - * @param[in] cv_idx a vector of cv partitions created by - * gensvm_make_cv_split() - * @param[in] fold_idx index of the fold which becomes the - * test dataset - */ -void gensvm_get_tt_split(struct GenData *full_data, struct GenData *train_data, - struct GenData *test_data, long *cv_idx, long fold_idx) -{ - long i, j, k, l, test_n, train_n; - - long n = full_data->n; - long m = full_data->m; - long K = full_data->K; - - double value; - - test_n = 0; - for (i=0; i<n; i++) - if (cv_idx[i] == fold_idx) - test_n++; - train_n = n - test_n; - - test_data->n = test_n; - train_data->n = train_n; - - train_data->K = K; - test_data->K = K; - - train_data->m = m; - test_data->m = m; - - train_data->y = Calloc(long, train_n); - test_data->y = Calloc(long, test_n); - - train_data->RAW = Calloc(double, train_n*(m+1)); - test_data->RAW = Calloc(double, test_n*(m+1)); - - k = 0; - l = 0; - for (i=0; i<n; i++) { - if (cv_idx[i] == fold_idx) { - test_data->y[k] = full_data->y[i]; - for (j=0; j<m+1; j++) { - value = matrix_get(full_data->RAW, m+1, i, j); - matrix_set(test_data->RAW, m+1, k, j, value); - } - k++; - } else { - train_data->y[l] = full_data->y[i]; - for (j=0; j<m+1; j++) { - value = matrix_get(full_data->RAW, m+1, i, j); - matrix_set(train_data->RAW, m+1, l, j, value); - } - l++; - } - } - - train_data->Z = train_data->RAW; - test_data->Z = test_data->RAW; -} |
