diff options
| author | Gertjan van den Burg <gertjanvandenburg@gmail.com> | 2014-01-15 00:35:21 +0100 |
|---|---|---|
| committer | Gertjan van den Burg <gertjanvandenburg@gmail.com> | 2014-01-15 00:35:21 +0100 |
| commit | ddbd423f54e2fd92659a0d277ee844659eee8ba1 (patch) | |
| tree | 316a82d463009364a6cdf07892bc3e28330698db /include | |
| parent | remove note in read_data (diff) | |
| download | gensvm-ddbd423f54e2fd92659a0d277ee844659eee8ba1.tar.gz gensvm-ddbd423f54e2fd92659a0d277ee844659eee8ba1.zip | |
added documentation, restart git usage, start implementing kernels
Diffstat (limited to 'include')
| -rw-r--r-- | include/MSVMMaj.h | 46 | ||||
| -rw-r--r-- | include/crossval.h | 12 | ||||
| -rw-r--r-- | include/globals.h | 22 | ||||
| -rw-r--r-- | include/kernel.h | 11 | ||||
| -rw-r--r-- | include/libMSVMMaj.h | 17 | ||||
| -rw-r--r-- | include/msvmmaj.h | 98 | ||||
| -rw-r--r-- | include/msvmmaj_init.h | 23 | ||||
| -rw-r--r-- | include/msvmmaj_kernel.h | 32 | ||||
| -rw-r--r-- | include/msvmmaj_lapack.h | 23 | ||||
| -rw-r--r-- | include/msvmmaj_matrix.h (renamed from include/matrix.h) | 15 | ||||
| -rw-r--r-- | include/msvmmaj_pred.h | 11 | ||||
| -rw-r--r-- | include/msvmmaj_train.h | 12 | ||||
| -rw-r--r-- | include/msvmmaj_train_dataset.h | 76 | ||||
| -rw-r--r-- | include/mylapack.h | 11 | ||||
| -rw-r--r-- | include/parallel.h | 13 | ||||
| -rw-r--r-- | include/strutil.h | 12 | ||||
| -rw-r--r-- | include/timer.h | 15 | ||||
| -rw-r--r-- | include/types.h | 40 | ||||
| -rw-r--r-- | include/util.h | 15 |
19 files changed, 405 insertions, 99 deletions
diff --git a/include/MSVMMaj.h b/include/MSVMMaj.h deleted file mode 100644 index de99f91..0000000 --- a/include/MSVMMaj.h +++ /dev/null @@ -1,46 +0,0 @@ -#ifndef MSVMMAJ_H -#define MSVMMAJ_H - -#include "globals.h" -#include "types.h" - -/* - Model structure -*/ -struct MajModel { - int weight_idx; - long K; - long n; - long m; - double epsilon; - double p; - double kappa; - double lambda; - double *W; - double *t; - double *V; - double *Vbar; - double *U; - double *UU; - double *Q; - double *H; - double *R; - double *rho; - double training_error; - char *data_file; - KernelType kerneltype; - double *kernelparam; -}; - -/* - Data structure -*/ -struct MajData { - long K; - long n; - long m; - long *y; - double *Z; -}; - -#endif diff --git a/include/crossval.h b/include/crossval.h index 0794622..0dff0b9 100644 --- a/include/crossval.h +++ b/include/crossval.h @@ -1,3 +1,15 @@ +/** + * @file crossval.h + * @author Gertjan van den Burg + * @date January, 2014 + * @brief Header file for crossval.c + * + * @details + * Contains function declarations for functions needed for performing cross + * validation on MajData structures. + * + */ + #ifndef CROSSVAL_H #define CROSSVAL_H diff --git a/include/globals.h b/include/globals.h index 8420f76..55fb6c4 100644 --- a/include/globals.h +++ b/include/globals.h @@ -1,5 +1,23 @@ -#ifndef GLOBALS_H -#define GLOBALS_H +/** + * @file globals.h + * @author Gertjan van den Burg + * @date January, 2014 + * @brief Global definitions + * + * @details + * This header file contains defines and includes which are used in many + * parts of the program. Most notable are the Calloc, Malloc and Memset + * defines, which are commonly used to allocate memory. These functions + * are shorthands for their lowercase counterparts. + * + * Furthermore, a maximum and minimum function are defined here. These + * functions have their own include guards, to ensure potential linked + * libraries don't conflict with these definitions. + * + */ + +#ifndef MSVMMAJ_GLOBALS_H +#define MSVMMAJ_GLOBALS_H #include <stdio.h> #include <stdlib.h> diff --git a/include/kernel.h b/include/kernel.h deleted file mode 100644 index ac5c35d..0000000 --- a/include/kernel.h +++ /dev/null @@ -1,11 +0,0 @@ -#ifndef KERNEL_H -#define KERNEL_H - -#include "globals.h" -#include "types.h" - -// forward declarations -struct MajData; - -// function declarations - diff --git a/include/libMSVMMaj.h b/include/libMSVMMaj.h index 21efc2f..b7261dc 100644 --- a/include/libMSVMMaj.h +++ b/include/libMSVMMaj.h @@ -1,3 +1,20 @@ +/** + * @file libMSVMMaj.h + * @author Gertjan van den Burg + * @date August, 2013 + * @brief Header file for the core MSVMMaj library libMSVMMaj.c + * + * @details + * The core computational routines for MSVMMaj are defined in libMSVMMaj.c. + * This file contains function declarations for these functions. + * + */ + +/** + * @todo + * rename this file and libMSVMMaj.c to correspond with the lowercase convention. + * Also change the name of the include guard. + */ #ifndef LIBMSVMMAJ_H #define LIBMSVMMAJ_H diff --git a/include/msvmmaj.h b/include/msvmmaj.h new file mode 100644 index 0000000..d67ad8b --- /dev/null +++ b/include/msvmmaj.h @@ -0,0 +1,98 @@ +/** + * @file msvmmaj.h + * @author Gertjan van den Burg + * @date August, 2013 + * @brief Definitions for common structures + * + * @details + * Contains documentation and declarations of MajModel and MajData. + * + */ + +#ifndef MSVMMAJ_H +#define MSVMMAJ_H + +#include "globals.h" +#include "types.h" + +/** + * @brief A structure to represent a single MSVMMaj model. + * + * @param weight_idx which weights to use (1 = unit, 2 = group) + * @param K number of classes in the dataset + * @param n number of instances in the dataset + * @param m number of predictors in the dataset + * @param epsilon stopping criterion + * @param p parameter for the L_p norm + * @param kappa parameter for the Huber hinge + * @param lambda regularization parameter + * @param *W pointer to the weight matrix + * @param *t pointer to the translation vector + * @param *V pointer to the augmented weight matrix + * @param *Vbar pointer to the augmented weight matrix from a + * previous iteration + * @param *U pointer to the simplex matrix + * @param *UU pointer to the 3D simplex difference matrix + * @param *Q pointer to the error matrix + * @param *H pointer to the Huber weighted error matrix + * @param *R pointer to the 0-1 auxiliary matrix + * @param *rho pointer to the instance weight vector + * @param training_error error after training has completed + * @param *data_file pointer to the filename of the data + * @param kerneltype kernel to be used in the model + * @param kernelparam pointer to the vector of kernel parameters + * @param use_cholesky whether the Cholesky decomposition should be + * used + * + */ +struct MajModel { + int weight_idx; + long K; + long n; + long m; + double epsilon; + double p; + double kappa; + double lambda; + double *W; + double *t; + double *V; + double *Vbar; + double *U; + double *UU; + double *Q; + double *H; + double *R; + double *rho; + double training_error; + char *data_file; + KernelType kerneltype; + double *kernelparam; + bool use_cholesky; +}; + +/** + * @brief A structure to represent the data. + * + * @param K number of classes + * @param n number of instances + * @param m number of predictors + * @param *y pointer to vector of class labels + * @param *Z pointer to augmented data matrix + * @param kerneltype kerneltype used in MajData::Z + * @param *kernelparam kernel parameters used in MajData::Z + * @param use_cholesky whether the Cholesky decomposition is used in MajData::Z + * + */ +struct MajData { + long K; + long n; + long m; + long *y; + double *Z; + KernelType kerneltype; + double *kernelparam; + bool use_cholesky; +}; + +#endif diff --git a/include/msvmmaj_init.h b/include/msvmmaj_init.h new file mode 100644 index 0000000..6e2e36f --- /dev/null +++ b/include/msvmmaj_init.h @@ -0,0 +1,23 @@ +/** + * @file msvmmaj_init.h + * @author Gertjan van den Burg + * @date January, 2014 + * @brief Header file for msvmmaj_init.c + * + * @details + * Contains function declarations for the initialization functions for + * MajModel and MajData structures. + */ + +#ifndef MSVMMAJ_INIT_H +#define MSVMMAJ_INIT_H + +// forward declaration +struct MajData; +struct MajModel; + +struct MajModel *msvmmaj_init_model(); + +struct MajData *msvmmaj_init_data(); + +#endif diff --git a/include/msvmmaj_kernel.h b/include/msvmmaj_kernel.h new file mode 100644 index 0000000..69bf267 --- /dev/null +++ b/include/msvmmaj_kernel.h @@ -0,0 +1,32 @@ +/** + * @file msvmmaj_kernel.h + * @author Gertjan van den Burg + * @date January, 2014 + * @brief Header file for kernel functionality + * + * @details + * Contains function declarations for computing the kernel matrix + * in nonlinear MSVMMaj. Additional kernel functions should be + * included here and in msvmmaj_kernel.c + * + */ + +#ifndef MSVMMAJ_KERNEL_H +#define MSVMMAJ_KERNEL_H + +#include "globals.h" + +// forward declarations +struct MajData; +struct MajModel; + +// function declarations +void msvmmaj_make_kernel(struct MajModel *model, struct MajData *data); + +double msvmmaj_compute_rbf(double *x1, double *x2, double *kernelparam, + long n); +double msvmmaj_compute_poly(double *x1, double *x2, double *kernelparam, + long n); +double msvmmaj_compute_sigmoid(double *x1, double *x2, double *kernelparam, + long n); +#endif diff --git a/include/msvmmaj_lapack.h b/include/msvmmaj_lapack.h new file mode 100644 index 0000000..766a475 --- /dev/null +++ b/include/msvmmaj_lapack.h @@ -0,0 +1,23 @@ +/** + * @file msvmmaj_lapack.h + * @author Gertjan van den Burg + * @date August, 2013 + * @brief Header file for msvmmaj_lapack.c + * + * @details + * Function declarations for external LAPACK functions + * + */ + +#ifndef MSVMMAJ_LAPACK_H +#define MSVMMAJ_LAPACK_H + +#include "globals.h" + +int dposv(char UPLO, int N, int NRHS, double *A, int LDA, double *B, + int LDB); +int dsysv(char UPLO, int N, int NRHS, double *A, int LDA, int *IPIV, + double *B, int LDB, double *WORK, int LWORK); +int dpotrf(char UPLO, int N, double *A, int LDA); + +#endif diff --git a/include/matrix.h b/include/msvmmaj_matrix.h index 5f0a441..8f5ca59 100644 --- a/include/matrix.h +++ b/include/msvmmaj_matrix.h @@ -1,5 +1,16 @@ -#ifndef MATRIX_H -#define MATRIX_H +/** + * @file msvmmaj_matrix.h + * @author Gertjan van den Burg + * @date August, 2013 + * @brief Header file for msvmmaj_matrix.c + * + * @details + * Contains function declarations for functions useful for dealing with matrices. + * + */ + +#ifndef MSVMMAJ_MATRIX_H +#define MSVMMAJ_MATRIX_H #include "globals.h" diff --git a/include/msvmmaj_pred.h b/include/msvmmaj_pred.h index 952389c..ce22b10 100644 --- a/include/msvmmaj_pred.h +++ b/include/msvmmaj_pred.h @@ -1,3 +1,14 @@ +/** + * @file msvmmaj_pred.h + * @author Gertjan van den Burg + * @date August, 2013 + * @brief Header file for msvmmaj_pred.c + * + * @details + * Contains function declarations for prediction functions. + * + */ + #ifndef MSVMMAJ_PRED_H #define MSVMMAJ_PRED_H diff --git a/include/msvmmaj_train.h b/include/msvmmaj_train.h index 4fb198e..835100f 100644 --- a/include/msvmmaj_train.h +++ b/include/msvmmaj_train.h @@ -1,3 +1,15 @@ +/** + * @file msvmmaj_train.h + * @author Gertjan van den Burg + * @date August, 2013 + * @brief Header file for msvmmaj_train.c + * + * @details + * Contains function declarations for functions used to train a single + * MajModel. + * + */ + #ifndef MSVMMAJ_TRAIN_H #define MSVMMAJ_TRAIN_H diff --git a/include/msvmmaj_train_dataset.h b/include/msvmmaj_train_dataset.h index fdcdb4c..5248b4a 100644 --- a/include/msvmmaj_train_dataset.h +++ b/include/msvmmaj_train_dataset.h @@ -1,9 +1,39 @@ +/** + * @file msvmmaj_train_dataset.h + * @author Gertjan van den Burg + * @date August, 2013 + * @brief Structs and functions necessary for the grid search + * + * @details + * The grid search for the optimal parameters is done through a queue. + * This file contains struct definitions for this queue and a single + * task in a queue, as well as a structure for the complete training + * scheme. Function declarations are also included. + * + */ + #ifndef MSVMMAJ_TRAIN_DATASET_H #define MSVMMAJ_TRAIN_DATASET_H #include "globals.h" #include "types.h" +/** + * @brief A structure for a single task in the queue. + * + * @param folds number of folds in cross validation + * @param ID numeric id of the task in the queue + * @param weight_idx parameter for the MajModel + * @param p parameter for the MajModel + * @param kappa parameter for the MajModel + * @param lambda parameter for the MajModel + * @param epsilon parameter for the MajModel + * @param kerneltype parameter for the MajModel + * @param *kernel_param parameters for the MajModel + * @param *train_data pointer to the training data + * @param *test_data pointer to the test data (if any) + * @param performance performance after cross validation + */ struct Task { KernelType kerneltype; int weight_idx; @@ -19,14 +49,54 @@ struct Task { double performance; }; +/** + * @brief Simple task queue. + * + * This struct is basically just an array of pointers to Task instances, + * with a length and an index of the current task. + * + * @param **tasks array of pointers to Task structs + * @param N size of task array + * @param i index used for keeping track of the queue + */ struct Queue { struct Task **tasks; long N; long i; }; +/** + * @brief Structure for describing the entire grid search + * + * @param traintype type of training to use + * @param kerneltype type of kernel to use throughout training + * @param repeats number of repeats to be done after the grid + * search to find the parameter set with the + * most consistent high performance + * @param folds number of folds in cross validation + * @param Np size of the array of p values + * @param Nl size of the array of lambda values + * @param Nk size of the array of kappa values + * @param Ne size of the array of epsilon values + * @param Nw size of the array of weight_idx values + * @param Ng size of the array of gamma values + * @param Nc size of the array of coef values + * @param Nd size of the array of degree values + * @param *weight_idxs array of weight_idxs + * @param *ps array of p values + * @param *lambdas array of lambda values + * @param *kappas array of kappa values + * @param *epsilons array of epsilon values + * @param *gammas array of gamma values + * @param *coefs array of coef values + * @param *degrees array of degree values + * @param *train_data_file filename of train data file + * @param *test_data_file filename of test data file + * + */ struct Training { TrainType traintype; + KernelType kerneltype; long repeats; long folds; long Np; @@ -34,11 +104,17 @@ struct Training { long Nk; long Ne; long Nw; + long Ng; + long Nc; + long Nd; int *weight_idxs; double *ps; double *lambdas; double *kappas; double *epsilons; + double *gammas; + double *coefs; + double *degrees; char *train_data_file; char *test_data_file; }; diff --git a/include/mylapack.h b/include/mylapack.h deleted file mode 100644 index 4c79e0e..0000000 --- a/include/mylapack.h +++ /dev/null @@ -1,11 +0,0 @@ -#ifndef MYLAPACK_H -#define MYLAPACK_H - -#include "globals.h" - -int dposv(char UPLO, int N, int NRHS, double *A, int LDA, double *B, - int LDB); -int dsysv(char UPLO, int N, int NRHS, double *A, int LDA, int *IPIV, - double *B, int LDB, double *WORK, int LWORK); - -#endif diff --git a/include/parallel.h b/include/parallel.h deleted file mode 100644 index 8747347..0000000 --- a/include/parallel.h +++ /dev/null @@ -1,13 +0,0 @@ - -struct Task { - enum KernelType kernel_type; - int weight_idx; - double epsilon; - double p; - double kappa; - double lambda; - double *kernel_param; - struct MajData **data; -} - - diff --git a/include/strutil.h b/include/strutil.h index 66722ae..740fde1 100644 --- a/include/strutil.h +++ b/include/strutil.h @@ -1,3 +1,15 @@ +/** + * @file strutil.h + * @author Gertjan van den Burg + * @date August, 2013 + * @brief Header file for strutil.c + * + * @details + * Function declarations for useful string functions used in parsing + * input files. + * + */ + #ifndef STRUTIL_H #define STRUTIL_H diff --git a/include/timer.h b/include/timer.h index 8a737e0..d4d4d23 100644 --- a/include/timer.h +++ b/include/timer.h @@ -1,5 +1,16 @@ -#ifndef TIMER_H -#define TIMER_H +/** + * @file timer.h + * @author Gertjan van den Burg + * @date August, 2013 + * @brief Header file for timer.c + * + * @details + * Function declaration for timer function used to measure computation time. + * + */ + +#ifndef MSVMMAJ_TIMER_H +#define MSVMMAJ_TIMER_H #include "globals.h" diff --git a/include/types.h b/include/types.h index b4db8d8..f6d008b 100644 --- a/include/types.h +++ b/include/types.h @@ -1,21 +1,41 @@ -#ifndef TYPES_H -#define TYPES_H +/** + * @file types.h + * @author Gertjan van den Burg + * @date August, 2013 + * @brief Definitions of common types + * + * @details + * Here common types used throughout the program are defined. + * + */ +#ifndef MSVMMAJ_TYPES_H +#define MSVMMAJ_TYPES_H + +/** + * @brief Implementation of true and false + */ typedef enum { - false, - true + false=0, /**< false keyword, corresponding to 0. */ + true=1 /**< true keyword, corresponding to 1. */ } bool; +/** + * @brief type of training used in parameter grid search + */ typedef enum { - CV=0, - TT=1 + CV=0, /**< cross validation */ + TT=1 /**< data with existing train/test split */ } TrainType; +/** + * @brief type of kernel used in training + */ typedef enum { - K_LINEAR=0, - K_POLY=1, - K_RBF=2, - K_SIGMOID=3, + K_LINEAR=0, /**< Linear kernel */ + K_POLY=1, /**< Polynomial kernel */ + K_RBF=2, /**< RBF kernel */ + K_SIGMOID=3, /**< Sigmoid kernel */ } KernelType; #endif diff --git a/include/util.h b/include/util.h index facae79..995a927 100644 --- a/include/util.h +++ b/include/util.h @@ -1,5 +1,16 @@ -#ifndef UTIL_H -#define UTIL_H +/** + * @file util.h + * @author Gertjan van den Burg + * @date August, 2013 + * @brief Header file for util.c + * + * @details + * Function declarations for utility functions of the program. + * + */ + +#ifndef MSVMMAJ_UTIL_H +#define MSVMMAJ_UTIL_H #include "globals.h" |
