aboutsummaryrefslogtreecommitdiff
path: root/src/gensvm_crossval.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/gensvm_crossval.c')
-rw-r--r--src/gensvm_crossval.c144
1 files changed, 0 insertions, 144 deletions
diff --git a/src/gensvm_crossval.c b/src/gensvm_crossval.c
deleted file mode 100644
index 8f09cb5..0000000
--- a/src/gensvm_crossval.c
+++ /dev/null
@@ -1,144 +0,0 @@
-/**
- * @file crossval.c
- * @author Gertjan van den Burg
- * @date January 7, 2014
- * @brief Functions for cross validation
- *
- * @details
- * This file contains functions for performing cross validation. The funtion
- * gensvm_make_cv_split() creates a cross validation vector for non-stratified
- * cross validation. The function gensvm_get_tt_split() creates a train and
- * test dataset from a given dataset and a pre-determined CV partition vector.
- * See individual function documentation for details.
- *
- */
-
-#include "globals.h"
-#include "gensvm.h"
-#include "gensvm_crossval.h"
-#include "gensvm_matrix.h"
-
-/**
- * @brief Create a cross validation split vector
- *
- * @details
- * A pre-allocated vector of length N is created which can be used to define
- * cross validation splits. The folds are contain between
- * @f$ \lfloor N / folds \rfloor @f$ and @f$ \lceil N / folds \rceil @f$
- * instances. An instance is mapped to a partition randomly until all folds
- * contain @f$ N \% folds @f$ instances. The zero fold then contains
- * @f$ N / folds + N \% folds @f$ instances. These remaining @f$ N \% folds @f$
- * instances are then distributed over the first @f$ N \% folds @f$ folds.
- *
- * @param[in] N number of instances
- * @param[in] folds number of folds
- * @param[in,out] cv_idx array of size N which contains the fold index
- * for each observation on exit
- *
- */
-void gensvm_make_cv_split(long N, long folds, long *cv_idx)
-{
- long i, j, idx;
-
- for (i=0; i<N; i++)
- cv_idx[i] = 0;
-
- long big_folds = N%folds;
- long small_fold_size = N/folds;
-
- j = 0;
- for (i=0; i<small_fold_size*folds; i++)
- while (1) {
- idx = rand()%N;
- if (cv_idx[idx] == 0) {
- cv_idx[idx] = j;
- j++;
- j%=folds;
- break;
- }
- }
- j = 0;
- i = 0;
- while (i < big_folds) {
- if (cv_idx[j] == 0) {
- cv_idx[j] = i++;
- }
- j++;
- }
-}
-
-
-/**
- * @brief Create train and test datasets for a CV split
- *
- * @details
- * Given a GenData structure for the full dataset, a previously created
- * cross validation split vector and a fold index, a training and test dataset
- * are created.
- *
- * @param[in] full_data a GenData structure for the entire
- * dataset
- * @param[in,out] train_data an initialized GenData structure which
- * on exit contains the training dataset
- * @param[in,out] test_data an initialized GenData structure which
- * on exit contains the test dataset
- * @param[in] cv_idx a vector of cv partitions created by
- * gensvm_make_cv_split()
- * @param[in] fold_idx index of the fold which becomes the
- * test dataset
- */
-void gensvm_get_tt_split(struct GenData *full_data, struct GenData *train_data,
- struct GenData *test_data, long *cv_idx, long fold_idx)
-{
- long i, j, k, l, test_n, train_n;
-
- long n = full_data->n;
- long m = full_data->m;
- long K = full_data->K;
-
- double value;
-
- test_n = 0;
- for (i=0; i<n; i++)
- if (cv_idx[i] == fold_idx)
- test_n++;
- train_n = n - test_n;
-
- test_data->n = test_n;
- train_data->n = train_n;
-
- train_data->K = K;
- test_data->K = K;
-
- train_data->m = m;
- test_data->m = m;
-
- train_data->y = Calloc(long, train_n);
- test_data->y = Calloc(long, test_n);
-
- train_data->RAW = Calloc(double, train_n*(m+1));
- test_data->RAW = Calloc(double, test_n*(m+1));
-
- k = 0;
- l = 0;
- for (i=0; i<n; i++) {
- if (cv_idx[i] == fold_idx) {
- test_data->y[k] = full_data->y[i];
- for (j=0; j<m+1; j++) {
- value = matrix_get(full_data->RAW, m+1, i, j);
- matrix_set(test_data->RAW, m+1, k, j, value);
- }
- k++;
- } else {
- train_data->y[l] = full_data->y[i];
- for (j=0; j<m+1; j++) {
- value = matrix_get(full_data->RAW, m+1, i, j);
- matrix_set(train_data->RAW, m+1, l, j, value);
- }
- l++;
- }
- }
-
- train_data->Z = train_data->RAW;
- test_data->Z = test_data->RAW;
-}