1 files changed, 59 insertions, 4 deletions
diff --git a/src/crossval.c b/src/crossval.c
index 9a3c1cc..10e3051 100644
--- a/src/crossval.c
+++ b/src/crossval.c
@@ -1,7 +1,40 @@
+/**
+ * @file crossval.c
+ * @author Gertjan van den Burg 
+ * @date January 7, 2014
+ * @brief Functions for cross validation
+ *
+ * @details
+ * This file contains functions for performing cross validation. The funtion
+ * msvmmaj_make_cv_split() creates a cross validation vector for non-stratified
+ * cross validation. The function msvmmaj_get_tt_split() creates a train and 
+ * test dataset from a given dataset and a pre-determined CV partition vector.
+ * See individual function documentation for details.
+ *
+ */
+
 #include "crossval.h"
-#include "matrix.h"
-#include "MSVMMaj.h"
+#include "msvmmaj.h"
+#include "msvmmaj_matrix.h"
 
+/**
+ * @brief Create a cross validation split vector
+ *
+ * @details
+ * A pre-allocated vector of length N is created which can be used to define
+ * cross validation splits. The folds are contain between 
+ * @f$ \lfloor N / folds \rfloor @f$ and @f$ \lceil N / folds \rceil @f$ 
+ * instances. An instance is mapped to a partition randomly until all folds 
+ * contain @f$ N \% folds @f$ instances. The zero fold then contains 
+ * @f$ N / folds + N \% folds @f$ instances. These remaining @f$ N \% folds @f$
+ * instances are then distributed over the first @f$ N \% folds @f$ folds. 
+ *
+ * @param[in] 		N 	number of instances
+ * @param[in] 		folds 	number of folds
+ * @param[in,out] 	cv_idx 	array of size N which contains the fold index
+ * 				for each observation on exit	
+ *
+ */
 void msvmmaj_make_cv_split(long N, long folds, long *cv_idx)
 {
 	long i, j, idx;
@@ -30,6 +63,26 @@ void msvmmaj_make_cv_split(long N, long folds, long *cv_idx)
 	}
 }
 
+
+/**
+ * @brief Create train and test datasets for a CV split
+ *
+ * @details
+ * Given a MajData structure for the full dataset, a previously created
+ * cross validation split vector and a fold index, a training and test dataset
+ * are created. 
+ *
+ * @param[in] 		full_data 	a MajData structure for the entire 
+ * 					dataset
+ * @param[in,out] 	train_data 	an initialized MajData structure which
+ * 					on exit contains the training dataset
+ * @param[in,out] 	test_data 	an initialized MajData structure which
+ * 					on exit contains the test dataset
+ * @param[in] 		cv_idx 		a vector of cv partitions created by
+ * 					msvmmaj_make_cv_split()
+ * @param[in] 		fold_idx 	index of the fold which becomes the 
+ * 					test dataset
+ */
 void msvmmaj_get_tt_split(struct MajData *full_data, struct MajData *train_data,
 		struct MajData *test_data, long *cv_idx, long fold_idx)
 {
@@ -67,13 +120,15 @@ void msvmmaj_get_tt_split(struct MajData *full_data, struct MajData *train_data,
 			test_data->y[k] = full_data->y[i];
 			for (j=0; j<m+1; j++)
 				matrix_set(test_data->Z, m+1, k, j, 
-						matrix_get(full_data->Z, m+1, i, j));
+						matrix_get(full_data->Z, m+1, 
+							i, j));
 			k++;
 		} else {
 			train_data->y[l] = full_data->y[i];
 			for (j=0; j<m+1; j++)
 				matrix_set(train_data->Z, m+1, l, j,
-						matrix_get(full_data->Z, m+1, i, j));
+						matrix_get(full_data->Z, m+1, 
+							i, j));
 			l++;
 		}
 	}