diff options
Diffstat (limited to 'include')
| -rw-r--r-- | include/crossval.h | 25 | ||||
| -rw-r--r-- | include/globals.h | 38 | ||||
| -rw-r--r-- | include/libMSVMMaj.h | 40 | ||||
| -rw-r--r-- | include/msvmmaj.h | 98 | ||||
| -rw-r--r-- | include/msvmmaj_init.h | 27 | ||||
| -rw-r--r-- | include/msvmmaj_io.h | 30 | ||||
| -rw-r--r-- | include/msvmmaj_kernel.h | 32 | ||||
| -rw-r--r-- | include/msvmmaj_lapack.h | 23 | ||||
| -rw-r--r-- | include/msvmmaj_matrix.h | 29 | ||||
| -rw-r--r-- | include/msvmmaj_pred.h | 26 | ||||
| -rw-r--r-- | include/msvmmaj_train.h | 31 | ||||
| -rw-r--r-- | include/msvmmaj_train_dataset.h | 137 | ||||
| -rw-r--r-- | include/strutil.h | 31 | ||||
| -rw-r--r-- | include/timer.h | 21 | ||||
| -rw-r--r-- | include/types.h | 41 | ||||
| -rw-r--r-- | include/util.h | 27 |
16 files changed, 656 insertions, 0 deletions
diff --git a/include/crossval.h b/include/crossval.h new file mode 100644 index 0000000..0dff0b9 --- /dev/null +++ b/include/crossval.h @@ -0,0 +1,25 @@ +/** + * @file crossval.h + * @author Gertjan van den Burg + * @date January, 2014 + * @brief Header file for crossval.c + * + * @details + * Contains function declarations for functions needed for performing cross + * validation on MajData structures. + * + */ + +#ifndef CROSSVAL_H +#define CROSSVAL_H + +#include "globals.h" + +// forward delaration +struct MajData; + +void msvmmaj_make_cv_split(long N, long folds, long *cv_idx); +void msvmmaj_get_tt_split(struct MajData *full_data, struct MajData *train_data, + struct MajData *test_data, long *cv_idx, long fold_idx); + +#endif diff --git a/include/globals.h b/include/globals.h new file mode 100644 index 0000000..55fb6c4 --- /dev/null +++ b/include/globals.h @@ -0,0 +1,38 @@ +/** + * @file globals.h + * @author Gertjan van den Burg + * @date January, 2014 + * @brief Global definitions + * + * @details + * This header file contains defines and includes which are used in many + * parts of the program. Most notable are the Calloc, Malloc and Memset + * defines, which are commonly used to allocate memory. These functions + * are shorthands for their lowercase counterparts. + * + * Furthermore, a maximum and minimum function are defined here. These + * functions have their own include guards, to ensure potential linked + * libraries don't conflict with these definitions. + * + */ + +#ifndef MSVMMAJ_GLOBALS_H +#define MSVMMAJ_GLOBALS_H + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#define MAX_LINE_LENGTH 1024 + +#define Calloc(type, n) (type *)calloc((n), sizeof(type)) +#define Malloc(type, n) (type *)malloc((n)*sizeof(type)) +#define Memset(var, type, n) memset(var, 0, (n)*sizeof(type)) + +#ifndef MIN_MAX_DEFINE +#define MIN_MAX_DEFINE +#define maximum(a, b) (a) > (b) ? (a) : (b) +#define minimum(a, b) (a) < (b) ? (a) : (b) +#endif + +#endif diff --git a/include/libMSVMMaj.h b/include/libMSVMMaj.h new file mode 100644 index 0000000..b7261dc --- /dev/null +++ b/include/libMSVMMaj.h @@ -0,0 +1,40 @@ +/** + * @file libMSVMMaj.h + * @author Gertjan van den Burg + * @date August, 2013 + * @brief Header file for the core MSVMMaj library libMSVMMaj.c + * + * @details + * The core computational routines for MSVMMaj are defined in libMSVMMaj.c. + * This file contains function declarations for these functions. + * + */ + +/** + * @todo + * rename this file and libMSVMMaj.c to correspond with the lowercase convention. + * Also change the name of the include guard. + */ +#ifndef LIBMSVMMAJ_H +#define LIBMSVMMAJ_H + +#include "globals.h" + +// forward declarations +struct MajData; +struct MajModel; + +// function declarations +void msvmmaj_simplex_gen(long K, double *U); +void msvmmaj_category_matrix(struct MajModel *model, struct MajData *data); +void msvmmaj_simplex_diff(struct MajModel *model, struct MajData *dataset); + +void msvmmaj_calculate_errors(struct MajModel *model, struct MajData *data, double *ZV); +void msvmmaj_calculate_huber(struct MajModel *model); + +void msvmmaj_step_doubling(struct MajModel *model); + +void msvmmaj_seed_model_V(struct MajModel *from_model, struct MajModel *to_model); +void msvmmaj_initialize_weights(struct MajData *data, struct MajModel *model); + +#endif diff --git a/include/msvmmaj.h b/include/msvmmaj.h new file mode 100644 index 0000000..d67ad8b --- /dev/null +++ b/include/msvmmaj.h @@ -0,0 +1,98 @@ +/** + * @file msvmmaj.h + * @author Gertjan van den Burg + * @date August, 2013 + * @brief Definitions for common structures + * + * @details + * Contains documentation and declarations of MajModel and MajData. + * + */ + +#ifndef MSVMMAJ_H +#define MSVMMAJ_H + +#include "globals.h" +#include "types.h" + +/** + * @brief A structure to represent a single MSVMMaj model. + * + * @param weight_idx which weights to use (1 = unit, 2 = group) + * @param K number of classes in the dataset + * @param n number of instances in the dataset + * @param m number of predictors in the dataset + * @param epsilon stopping criterion + * @param p parameter for the L_p norm + * @param kappa parameter for the Huber hinge + * @param lambda regularization parameter + * @param *W pointer to the weight matrix + * @param *t pointer to the translation vector + * @param *V pointer to the augmented weight matrix + * @param *Vbar pointer to the augmented weight matrix from a + * previous iteration + * @param *U pointer to the simplex matrix + * @param *UU pointer to the 3D simplex difference matrix + * @param *Q pointer to the error matrix + * @param *H pointer to the Huber weighted error matrix + * @param *R pointer to the 0-1 auxiliary matrix + * @param *rho pointer to the instance weight vector + * @param training_error error after training has completed + * @param *data_file pointer to the filename of the data + * @param kerneltype kernel to be used in the model + * @param kernelparam pointer to the vector of kernel parameters + * @param use_cholesky whether the Cholesky decomposition should be + * used + * + */ +struct MajModel { + int weight_idx; + long K; + long n; + long m; + double epsilon; + double p; + double kappa; + double lambda; + double *W; + double *t; + double *V; + double *Vbar; + double *U; + double *UU; + double *Q; + double *H; + double *R; + double *rho; + double training_error; + char *data_file; + KernelType kerneltype; + double *kernelparam; + bool use_cholesky; +}; + +/** + * @brief A structure to represent the data. + * + * @param K number of classes + * @param n number of instances + * @param m number of predictors + * @param *y pointer to vector of class labels + * @param *Z pointer to augmented data matrix + * @param kerneltype kerneltype used in MajData::Z + * @param *kernelparam kernel parameters used in MajData::Z + * @param use_cholesky whether the Cholesky decomposition is used in MajData::Z + * + */ +struct MajData { + long K; + long n; + long m; + long *y; + double *Z; + KernelType kerneltype; + double *kernelparam; + bool use_cholesky; +}; + +#endif diff --git a/include/msvmmaj_init.h b/include/msvmmaj_init.h new file mode 100644 index 0000000..febfb4a --- /dev/null +++ b/include/msvmmaj_init.h @@ -0,0 +1,27 @@ +/** + * @file msvmmaj_init.h + * @author Gertjan van den Burg + * @date January, 2014 + * @brief Header file for msvmmaj_init.c + * + * @details + * Contains function declarations for the initialization functions for + * MajModel and MajData structures. + */ + +#ifndef MSVMMAJ_INIT_H +#define MSVMMAJ_INIT_H + +// forward declaration +struct MajData; +struct MajModel; + +struct MajModel *msvmmaj_init_model(); + +struct MajData *msvmmaj_init_data(); + +void msvmmaj_allocate_model(struct MajModel *model); +void msvmmaj_free_model(struct MajModel *model); +void msvmmaj_free_data(struct MajData *data); + +#endif diff --git a/include/msvmmaj_io.h b/include/msvmmaj_io.h new file mode 100644 index 0000000..99fb4dc --- /dev/null +++ b/include/msvmmaj_io.h @@ -0,0 +1,30 @@ +/** + * @file msvmmaj_io.h + * @author Gertjan van den Burg + * @date January, 2014 + * @brief Header files for msvmmaj_io.c + * + * @details + * Function declarations for input/output functions. + * + */ + +#ifndef MSVMMAJ_IO_H +#define MSVMMAJ_IO_H + +#include "globals.h" + +// forward declarations +struct MajData; +struct MajModel; + +// function declarations +void msvmmaj_read_data(struct MajData *dataset, char *data_file); + +void msvmmaj_read_model(struct MajModel *model, char *model_filename); +void msvmmaj_write_model(struct MajModel *model, char *output_filename); + +void msvmmaj_write_predictions(struct MajData *data, long *predy, + char *output_filename); + +#endif diff --git a/include/msvmmaj_kernel.h b/include/msvmmaj_kernel.h new file mode 100644 index 0000000..69bf267 --- /dev/null +++ b/include/msvmmaj_kernel.h @@ -0,0 +1,32 @@ +/** + * @file msvmmaj_kernel.h + * @author Gertjan van den Burg + * @date January, 2014 + * @brief Header file for kernel functionality + * + * @details + * Contains function declarations for computing the kernel matrix + * in nonlinear MSVMMaj. Additional kernel functions should be + * included here and in msvmmaj_kernel.c + * + */ + +#ifndef MSVMMAJ_KERNEL_H +#define MSVMMAJ_KERNEL_H + +#include "globals.h" + +// forward declarations +struct MajData; +struct MajModel; + +// function declarations +void msvmmaj_make_kernel(struct MajModel *model, struct MajData *data); + +double msvmmaj_compute_rbf(double *x1, double *x2, double *kernelparam, + long n); +double msvmmaj_compute_poly(double *x1, double *x2, double *kernelparam, + long n); +double msvmmaj_compute_sigmoid(double *x1, double *x2, double *kernelparam, + long n); +#endif diff --git a/include/msvmmaj_lapack.h b/include/msvmmaj_lapack.h new file mode 100644 index 0000000..766a475 --- /dev/null +++ b/include/msvmmaj_lapack.h @@ -0,0 +1,23 @@ +/** + * @file msvmmaj_lapack.h + * @author Gertjan van den Burg + * @date August, 2013 + * @brief Header file for msvmmaj_lapack.c + * + * @details + * Function declarations for external LAPACK functions + * + */ + +#ifndef MSVMMAJ_LAPACK_H +#define MSVMMAJ_LAPACK_H + +#include "globals.h" + +int dposv(char UPLO, int N, int NRHS, double *A, int LDA, double *B, + int LDB); +int dsysv(char UPLO, int N, int NRHS, double *A, int LDA, int *IPIV, + double *B, int LDB, double *WORK, int LWORK); +int dpotrf(char UPLO, int N, double *A, int LDA); + +#endif diff --git a/include/msvmmaj_matrix.h b/include/msvmmaj_matrix.h new file mode 100644 index 0000000..8f5ca59 --- /dev/null +++ b/include/msvmmaj_matrix.h @@ -0,0 +1,29 @@ +/** + * @file msvmmaj_matrix.h + * @author Gertjan van den Burg + * @date August, 2013 + * @brief Header file for msvmmaj_matrix.c + * + * @details + * Contains function declarations for functions useful for dealing with matrices. + * + */ + +#ifndef MSVMMAJ_MATRIX_H +#define MSVMMAJ_MATRIX_H + +#include "globals.h" + +void matrix_set(double *M, long cols, long i, long j, double val); +void matrix_add(double *M, long cols, long i, long j, double val); +void matrix_mul(double *M, long cols, long i, long j, double val); + +double matrix_get(double *M, long cols, long i, long j); + +void matrix3_set(double *M, long N2, long N3, long i, long j, long k, + double val); +double matrix3_get(double *M, long N2, long N3, long i, long j, long k); + +void print_matrix(double *M, long rows, long cols); + +#endif diff --git a/include/msvmmaj_pred.h b/include/msvmmaj_pred.h new file mode 100644 index 0000000..ce22b10 --- /dev/null +++ b/include/msvmmaj_pred.h @@ -0,0 +1,26 @@ +/** + * @file msvmmaj_pred.h + * @author Gertjan van den Burg + * @date August, 2013 + * @brief Header file for msvmmaj_pred.c + * + * @details + * Contains function declarations for prediction functions. + * + */ + +#ifndef MSVMMAJ_PRED_H +#define MSVMMAJ_PRED_H + +#include "globals.h" + +// forward declarations +struct MajData; +struct MajModel; + +// function declarations +void msvmmaj_predict_labels(struct MajData *data, struct MajModel *model, + long *predy); +double msvmmaj_prediction_perf(struct MajData *data, long *perdy); + +#endif diff --git a/include/msvmmaj_train.h b/include/msvmmaj_train.h new file mode 100644 index 0000000..835100f --- /dev/null +++ b/include/msvmmaj_train.h @@ -0,0 +1,31 @@ +/** + * @file msvmmaj_train.h + * @author Gertjan van den Burg + * @date August, 2013 + * @brief Header file for msvmmaj_train.c + * + * @details + * Contains function declarations for functions used to train a single + * MajModel. + * + */ + +#ifndef MSVMMAJ_TRAIN_H +#define MSVMMAJ_TRAIN_H + +#include "globals.h" + +//forward declarations +struct MajData; +struct MajModel; + +// function declarations +void msvmmaj_optimize(struct MajModel *model, struct MajData *data); + +double msvmmaj_get_loss(struct MajModel *model, struct MajData *data, + double *ZV); + +void msvmmaj_get_update(struct MajModel *model, struct MajData *data, + double *B, double *ZAZ, double *ZAZV, double *ZAZVT); + +#endif diff --git a/include/msvmmaj_train_dataset.h b/include/msvmmaj_train_dataset.h new file mode 100644 index 0000000..5248b4a --- /dev/null +++ b/include/msvmmaj_train_dataset.h @@ -0,0 +1,137 @@ +/** + * @file msvmmaj_train_dataset.h + * @author Gertjan van den Burg + * @date August, 2013 + * @brief Structs and functions necessary for the grid search + * + * @details + * The grid search for the optimal parameters is done through a queue. + * This file contains struct definitions for this queue and a single + * task in a queue, as well as a structure for the complete training + * scheme. Function declarations are also included. + * + */ + +#ifndef MSVMMAJ_TRAIN_DATASET_H +#define MSVMMAJ_TRAIN_DATASET_H + +#include "globals.h" +#include "types.h" + +/** + * @brief A structure for a single task in the queue. + * + * @param folds number of folds in cross validation + * @param ID numeric id of the task in the queue + * @param weight_idx parameter for the MajModel + * @param p parameter for the MajModel + * @param kappa parameter for the MajModel + * @param lambda parameter for the MajModel + * @param epsilon parameter for the MajModel + * @param kerneltype parameter for the MajModel + * @param *kernel_param parameters for the MajModel + * @param *train_data pointer to the training data + * @param *test_data pointer to the test data (if any) + * @param performance performance after cross validation + */ +struct Task { + KernelType kerneltype; + int weight_idx; + long folds; + long ID; + double p; + double kappa; + double lambda; + double epsilon; + double *kernel_param; + struct MajData *train_data; + struct MajData *test_data; + double performance; +}; + +/** + * @brief Simple task queue. + * + * This struct is basically just an array of pointers to Task instances, + * with a length and an index of the current task. + * + * @param **tasks array of pointers to Task structs + * @param N size of task array + * @param i index used for keeping track of the queue + */ +struct Queue { + struct Task **tasks; + long N; + long i; +}; + +/** + * @brief Structure for describing the entire grid search + * + * @param traintype type of training to use + * @param kerneltype type of kernel to use throughout training + * @param repeats number of repeats to be done after the grid + * search to find the parameter set with the + * most consistent high performance + * @param folds number of folds in cross validation + * @param Np size of the array of p values + * @param Nl size of the array of lambda values + * @param Nk size of the array of kappa values + * @param Ne size of the array of epsilon values + * @param Nw size of the array of weight_idx values + * @param Ng size of the array of gamma values + * @param Nc size of the array of coef values + * @param Nd size of the array of degree values + * @param *weight_idxs array of weight_idxs + * @param *ps array of p values + * @param *lambdas array of lambda values + * @param *kappas array of kappa values + * @param *epsilons array of epsilon values + * @param *gammas array of gamma values + * @param *coefs array of coef values + * @param *degrees array of degree values + * @param *train_data_file filename of train data file + * @param *test_data_file filename of test data file + * + */ +struct Training { + TrainType traintype; + KernelType kerneltype; + long repeats; + long folds; + long Np; + long Nl; + long Nk; + long Ne; + long Nw; + long Ng; + long Nc; + long Nd; + int *weight_idxs; + double *ps; + double *lambdas; + double *kappas; + double *epsilons; + double *gammas; + double *coefs; + double *degrees; + char *train_data_file; + char *test_data_file; +}; + +void make_queue(struct Training *training, struct Queue *queue, + struct MajData *train_data, struct MajData *test_data); + +struct Task *get_next_task(struct Queue *q); +void start_training_tt(struct Queue *q); +void start_training_cv(struct Queue *q); +void free_queue(struct Queue *q); + +void consistency_repeats(struct Queue *q, long repeats, TrainType traintype); + +double cross_validation(struct MajModel *model, struct MajModel *seed_model, + struct MajData *data, long folds); + +void make_model_from_task(struct Task *task, struct MajModel *model); +void copy_model(struct MajModel *from, struct MajModel *to); +#endif diff --git a/include/strutil.h b/include/strutil.h new file mode 100644 index 0000000..740fde1 --- /dev/null +++ b/include/strutil.h @@ -0,0 +1,31 @@ +/** + * @file strutil.h + * @author Gertjan van den Burg + * @date August, 2013 + * @brief Header file for strutil.c + * + * @details + * Function declarations for useful string functions used in parsing + * input files. + * + */ + +#ifndef STRUTIL_H +#define STRUTIL_H + +#include "globals.h" +#include "types.h" + +bool str_startswith(const char *str, const char *pre); +bool str_endswith(const char *str, const char *suf); + +void next_line(FILE *fid, char *filename); +void get_line(FILE *fid, char *filename, char *buffer); + +double get_fmt_double(FILE *fid, char *filename, const char *fmt); +long get_fmt_long(FILE *fid, char *filename, const char *fmt); + +long all_doubles_str(char *buffer, long offset, double *all_doubles); +long all_longs_str(char *buffer, long offset, long *all_longs); + +#endif diff --git a/include/timer.h b/include/timer.h new file mode 100644 index 0000000..d4af649 --- /dev/null +++ b/include/timer.h @@ -0,0 +1,21 @@ +/** + * @file timer.h + * @author Gertjan van den Burg + * @date August, 2013 + * @brief Header file for timer.c + * + * @details + * Function declaration for timer function used to measure computation time. + * + */ + +#ifndef MSVMMAJ_TIMER_H +#define MSVMMAJ_TIMER_H + +#include "globals.h" + +double elapsed_time(clock_t s_time, clock_t e_time); + +void get_time_string(char *buffer); + +#endif diff --git a/include/types.h b/include/types.h new file mode 100644 index 0000000..f6d008b --- /dev/null +++ b/include/types.h @@ -0,0 +1,41 @@ +/** + * @file types.h + * @author Gertjan van den Burg + * @date August, 2013 + * @brief Definitions of common types + * + * @details + * Here common types used throughout the program are defined. + * + */ + +#ifndef MSVMMAJ_TYPES_H +#define MSVMMAJ_TYPES_H + +/** + * @brief Implementation of true and false + */ +typedef enum { + false=0, /**< false keyword, corresponding to 0. */ + true=1 /**< true keyword, corresponding to 1. */ +} bool; + +/** + * @brief type of training used in parameter grid search + */ +typedef enum { + CV=0, /**< cross validation */ + TT=1 /**< data with existing train/test split */ +} TrainType; + +/** + * @brief type of kernel used in training + */ +typedef enum { + K_LINEAR=0, /**< Linear kernel */ + K_POLY=1, /**< Polynomial kernel */ + K_RBF=2, /**< RBF kernel */ + K_SIGMOID=3, /**< Sigmoid kernel */ +} KernelType; + +#endif diff --git a/include/util.h b/include/util.h new file mode 100644 index 0000000..375a9c2 --- /dev/null +++ b/include/util.h @@ -0,0 +1,27 @@ +/** + * @file util.h + * @author Gertjan van den Burg + * @date August, 2013 + * @brief Header file for util.c + * + * @details + * Function declarations for utility functions of the program. + * + */ + +#ifndef MSVMMAJ_UTIL_H +#define MSVMMAJ_UTIL_H + +#include "globals.h" + +// forward declarations +struct MajData; +struct MajModel; + +// function declarations +int msvmmaj_check_argv(int argc, char **argv, char *str); +int msvmmaj_check_argv_eq(int argc, char **argv, char *str); + +void note(const char *fmt,...); + +#endif |
