aboutsummaryrefslogtreecommitdiff
path: root/include/msvmmaj_train_dataset.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/msvmmaj_train_dataset.h')
-rw-r--r--include/msvmmaj_train_dataset.h76
1 files changed, 76 insertions, 0 deletions
diff --git a/include/msvmmaj_train_dataset.h b/include/msvmmaj_train_dataset.h
index fdcdb4c..5248b4a 100644
--- a/include/msvmmaj_train_dataset.h
+++ b/include/msvmmaj_train_dataset.h
@@ -1,9 +1,39 @@
+/**
+ * @file msvmmaj_train_dataset.h
+ * @author Gertjan van den Burg
+ * @date August, 2013
+ * @brief Structs and functions necessary for the grid search
+ *
+ * @details
+ * The grid search for the optimal parameters is done through a queue.
+ * This file contains struct definitions for this queue and a single
+ * task in a queue, as well as a structure for the complete training
+ * scheme. Function declarations are also included.
+ *
+ */
+
#ifndef MSVMMAJ_TRAIN_DATASET_H
#define MSVMMAJ_TRAIN_DATASET_H
#include "globals.h"
#include "types.h"
+/**
+ * @brief A structure for a single task in the queue.
+ *
+ * @param folds number of folds in cross validation
+ * @param ID numeric id of the task in the queue
+ * @param weight_idx parameter for the MajModel
+ * @param p parameter for the MajModel
+ * @param kappa parameter for the MajModel
+ * @param lambda parameter for the MajModel
+ * @param epsilon parameter for the MajModel
+ * @param kerneltype parameter for the MajModel
+ * @param *kernel_param parameters for the MajModel
+ * @param *train_data pointer to the training data
+ * @param *test_data pointer to the test data (if any)
+ * @param performance performance after cross validation
+ */
struct Task {
KernelType kerneltype;
int weight_idx;
@@ -19,14 +49,54 @@ struct Task {
double performance;
};
+/**
+ * @brief Simple task queue.
+ *
+ * This struct is basically just an array of pointers to Task instances,
+ * with a length and an index of the current task.
+ *
+ * @param **tasks array of pointers to Task structs
+ * @param N size of task array
+ * @param i index used for keeping track of the queue
+ */
struct Queue {
struct Task **tasks;
long N;
long i;
};
+/**
+ * @brief Structure for describing the entire grid search
+ *
+ * @param traintype type of training to use
+ * @param kerneltype type of kernel to use throughout training
+ * @param repeats number of repeats to be done after the grid
+ * search to find the parameter set with the
+ * most consistent high performance
+ * @param folds number of folds in cross validation
+ * @param Np size of the array of p values
+ * @param Nl size of the array of lambda values
+ * @param Nk size of the array of kappa values
+ * @param Ne size of the array of epsilon values
+ * @param Nw size of the array of weight_idx values
+ * @param Ng size of the array of gamma values
+ * @param Nc size of the array of coef values
+ * @param Nd size of the array of degree values
+ * @param *weight_idxs array of weight_idxs
+ * @param *ps array of p values
+ * @param *lambdas array of lambda values
+ * @param *kappas array of kappa values
+ * @param *epsilons array of epsilon values
+ * @param *gammas array of gamma values
+ * @param *coefs array of coef values
+ * @param *degrees array of degree values
+ * @param *train_data_file filename of train data file
+ * @param *test_data_file filename of test data file
+ *
+ */
struct Training {
TrainType traintype;
+ KernelType kerneltype;
long repeats;
long folds;
long Np;
@@ -34,11 +104,17 @@ struct Training {
long Nk;
long Ne;
long Nw;
+ long Ng;
+ long Nc;
+ long Nd;
int *weight_idxs;
double *ps;
double *lambdas;
double *kappas;
double *epsilons;
+ double *gammas;
+ double *coefs;
+ double *degrees;
char *train_data_file;
char *test_data_file;
};