aboutsummaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
Diffstat (limited to 'include')
-rw-r--r--include/crossval.h25
-rw-r--r--include/globals.h38
-rw-r--r--include/libMSVMMaj.h40
-rw-r--r--include/msvmmaj.h98
-rw-r--r--include/msvmmaj_init.h27
-rw-r--r--include/msvmmaj_io.h30
-rw-r--r--include/msvmmaj_kernel.h32
-rw-r--r--include/msvmmaj_lapack.h23
-rw-r--r--include/msvmmaj_matrix.h29
-rw-r--r--include/msvmmaj_pred.h26
-rw-r--r--include/msvmmaj_train.h31
-rw-r--r--include/msvmmaj_train_dataset.h137
-rw-r--r--include/strutil.h31
-rw-r--r--include/timer.h21
-rw-r--r--include/types.h41
-rw-r--r--include/util.h27
16 files changed, 656 insertions, 0 deletions
diff --git a/include/crossval.h b/include/crossval.h
new file mode 100644
index 0000000..0dff0b9
--- /dev/null
+++ b/include/crossval.h
@@ -0,0 +1,25 @@
+/**
+ * @file crossval.h
+ * @author Gertjan van den Burg
+ * @date January, 2014
+ * @brief Header file for crossval.c
+ *
+ * @details
+ * Contains function declarations for functions needed for performing cross
+ * validation on MajData structures.
+ *
+ */
+
+#ifndef CROSSVAL_H
+#define CROSSVAL_H
+
+#include "globals.h"
+
+// forward delaration
+struct MajData;
+
+void msvmmaj_make_cv_split(long N, long folds, long *cv_idx);
+void msvmmaj_get_tt_split(struct MajData *full_data, struct MajData *train_data,
+ struct MajData *test_data, long *cv_idx, long fold_idx);
+
+#endif
diff --git a/include/globals.h b/include/globals.h
new file mode 100644
index 0000000..55fb6c4
--- /dev/null
+++ b/include/globals.h
@@ -0,0 +1,38 @@
+/**
+ * @file globals.h
+ * @author Gertjan van den Burg
+ * @date January, 2014
+ * @brief Global definitions
+ *
+ * @details
+ * This header file contains defines and includes which are used in many
+ * parts of the program. Most notable are the Calloc, Malloc and Memset
+ * defines, which are commonly used to allocate memory. These functions
+ * are shorthands for their lowercase counterparts.
+ *
+ * Furthermore, a maximum and minimum function are defined here. These
+ * functions have their own include guards, to ensure potential linked
+ * libraries don't conflict with these definitions.
+ *
+ */
+
+#ifndef MSVMMAJ_GLOBALS_H
+#define MSVMMAJ_GLOBALS_H
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#define MAX_LINE_LENGTH 1024
+
+#define Calloc(type, n) (type *)calloc((n), sizeof(type))
+#define Malloc(type, n) (type *)malloc((n)*sizeof(type))
+#define Memset(var, type, n) memset(var, 0, (n)*sizeof(type))
+
+#ifndef MIN_MAX_DEFINE
+#define MIN_MAX_DEFINE
+#define maximum(a, b) (a) > (b) ? (a) : (b)
+#define minimum(a, b) (a) < (b) ? (a) : (b)
+#endif
+
+#endif
diff --git a/include/libMSVMMaj.h b/include/libMSVMMaj.h
new file mode 100644
index 0000000..b7261dc
--- /dev/null
+++ b/include/libMSVMMaj.h
@@ -0,0 +1,40 @@
+/**
+ * @file libMSVMMaj.h
+ * @author Gertjan van den Burg
+ * @date August, 2013
+ * @brief Header file for the core MSVMMaj library libMSVMMaj.c
+ *
+ * @details
+ * The core computational routines for MSVMMaj are defined in libMSVMMaj.c.
+ * This file contains function declarations for these functions.
+ *
+ */
+
+/**
+ * @todo
+ * rename this file and libMSVMMaj.c to correspond with the lowercase convention.
+ * Also change the name of the include guard.
+ */
+#ifndef LIBMSVMMAJ_H
+#define LIBMSVMMAJ_H
+
+#include "globals.h"
+
+// forward declarations
+struct MajData;
+struct MajModel;
+
+// function declarations
+void msvmmaj_simplex_gen(long K, double *U);
+void msvmmaj_category_matrix(struct MajModel *model, struct MajData *data);
+void msvmmaj_simplex_diff(struct MajModel *model, struct MajData *dataset);
+
+void msvmmaj_calculate_errors(struct MajModel *model, struct MajData *data, double *ZV);
+void msvmmaj_calculate_huber(struct MajModel *model);
+
+void msvmmaj_step_doubling(struct MajModel *model);
+
+void msvmmaj_seed_model_V(struct MajModel *from_model, struct MajModel *to_model);
+void msvmmaj_initialize_weights(struct MajData *data, struct MajModel *model);
+
+#endif
diff --git a/include/msvmmaj.h b/include/msvmmaj.h
new file mode 100644
index 0000000..d67ad8b
--- /dev/null
+++ b/include/msvmmaj.h
@@ -0,0 +1,98 @@
+/**
+ * @file msvmmaj.h
+ * @author Gertjan van den Burg
+ * @date August, 2013
+ * @brief Definitions for common structures
+ *
+ * @details
+ * Contains documentation and declarations of MajModel and MajData.
+ *
+ */
+
+#ifndef MSVMMAJ_H
+#define MSVMMAJ_H
+
+#include "globals.h"
+#include "types.h"
+
+/**
+ * @brief A structure to represent a single MSVMMaj model.
+ *
+ * @param weight_idx which weights to use (1 = unit, 2 = group)
+ * @param K number of classes in the dataset
+ * @param n number of instances in the dataset
+ * @param m number of predictors in the dataset
+ * @param epsilon stopping criterion
+ * @param p parameter for the L_p norm
+ * @param kappa parameter for the Huber hinge
+ * @param lambda regularization parameter
+ * @param *W pointer to the weight matrix
+ * @param *t pointer to the translation vector
+ * @param *V pointer to the augmented weight matrix
+ * @param *Vbar pointer to the augmented weight matrix from a
+ * previous iteration
+ * @param *U pointer to the simplex matrix
+ * @param *UU pointer to the 3D simplex difference matrix
+ * @param *Q pointer to the error matrix
+ * @param *H pointer to the Huber weighted error matrix
+ * @param *R pointer to the 0-1 auxiliary matrix
+ * @param *rho pointer to the instance weight vector
+ * @param training_error error after training has completed
+ * @param *data_file pointer to the filename of the data
+ * @param kerneltype kernel to be used in the model
+ * @param kernelparam pointer to the vector of kernel parameters
+ * @param use_cholesky whether the Cholesky decomposition should be
+ * used
+ *
+ */
+struct MajModel {
+ int weight_idx;
+ long K;
+ long n;
+ long m;
+ double epsilon;
+ double p;
+ double kappa;
+ double lambda;
+ double *W;
+ double *t;
+ double *V;
+ double *Vbar;
+ double *U;
+ double *UU;
+ double *Q;
+ double *H;
+ double *R;
+ double *rho;
+ double training_error;
+ char *data_file;
+ KernelType kerneltype;
+ double *kernelparam;
+ bool use_cholesky;
+};
+
+/**
+ * @brief A structure to represent the data.
+ *
+ * @param K number of classes
+ * @param n number of instances
+ * @param m number of predictors
+ * @param *y pointer to vector of class labels
+ * @param *Z pointer to augmented data matrix
+ * @param kerneltype kerneltype used in MajData::Z
+ * @param *kernelparam kernel parameters used in MajData::Z
+ * @param use_cholesky whether the Cholesky decomposition is used in MajData::Z
+ *
+ */
+struct MajData {
+ long K;
+ long n;
+ long m;
+ long *y;
+ double *Z;
+ KernelType kerneltype;
+ double *kernelparam;
+ bool use_cholesky;
+};
+
+#endif
diff --git a/include/msvmmaj_init.h b/include/msvmmaj_init.h
new file mode 100644
index 0000000..febfb4a
--- /dev/null
+++ b/include/msvmmaj_init.h
@@ -0,0 +1,27 @@
+/**
+ * @file msvmmaj_init.h
+ * @author Gertjan van den Burg
+ * @date January, 2014
+ * @brief Header file for msvmmaj_init.c
+ *
+ * @details
+ * Contains function declarations for the initialization functions for
+ * MajModel and MajData structures.
+ */
+
+#ifndef MSVMMAJ_INIT_H
+#define MSVMMAJ_INIT_H
+
+// forward declaration
+struct MajData;
+struct MajModel;
+
+struct MajModel *msvmmaj_init_model();
+
+struct MajData *msvmmaj_init_data();
+
+void msvmmaj_allocate_model(struct MajModel *model);
+void msvmmaj_free_model(struct MajModel *model);
+void msvmmaj_free_data(struct MajData *data);
+
+#endif
diff --git a/include/msvmmaj_io.h b/include/msvmmaj_io.h
new file mode 100644
index 0000000..99fb4dc
--- /dev/null
+++ b/include/msvmmaj_io.h
@@ -0,0 +1,30 @@
+/**
+ * @file msvmmaj_io.h
+ * @author Gertjan van den Burg
+ * @date January, 2014
+ * @brief Header files for msvmmaj_io.c
+ *
+ * @details
+ * Function declarations for input/output functions.
+ *
+ */
+
+#ifndef MSVMMAJ_IO_H
+#define MSVMMAJ_IO_H
+
+#include "globals.h"
+
+// forward declarations
+struct MajData;
+struct MajModel;
+
+// function declarations
+void msvmmaj_read_data(struct MajData *dataset, char *data_file);
+
+void msvmmaj_read_model(struct MajModel *model, char *model_filename);
+void msvmmaj_write_model(struct MajModel *model, char *output_filename);
+
+void msvmmaj_write_predictions(struct MajData *data, long *predy,
+ char *output_filename);
+
+#endif
diff --git a/include/msvmmaj_kernel.h b/include/msvmmaj_kernel.h
new file mode 100644
index 0000000..69bf267
--- /dev/null
+++ b/include/msvmmaj_kernel.h
@@ -0,0 +1,32 @@
+/**
+ * @file msvmmaj_kernel.h
+ * @author Gertjan van den Burg
+ * @date January, 2014
+ * @brief Header file for kernel functionality
+ *
+ * @details
+ * Contains function declarations for computing the kernel matrix
+ * in nonlinear MSVMMaj. Additional kernel functions should be
+ * included here and in msvmmaj_kernel.c
+ *
+ */
+
+#ifndef MSVMMAJ_KERNEL_H
+#define MSVMMAJ_KERNEL_H
+
+#include "globals.h"
+
+// forward declarations
+struct MajData;
+struct MajModel;
+
+// function declarations
+void msvmmaj_make_kernel(struct MajModel *model, struct MajData *data);
+
+double msvmmaj_compute_rbf(double *x1, double *x2, double *kernelparam,
+ long n);
+double msvmmaj_compute_poly(double *x1, double *x2, double *kernelparam,
+ long n);
+double msvmmaj_compute_sigmoid(double *x1, double *x2, double *kernelparam,
+ long n);
+#endif
diff --git a/include/msvmmaj_lapack.h b/include/msvmmaj_lapack.h
new file mode 100644
index 0000000..766a475
--- /dev/null
+++ b/include/msvmmaj_lapack.h
@@ -0,0 +1,23 @@
+/**
+ * @file msvmmaj_lapack.h
+ * @author Gertjan van den Burg
+ * @date August, 2013
+ * @brief Header file for msvmmaj_lapack.c
+ *
+ * @details
+ * Function declarations for external LAPACK functions
+ *
+ */
+
+#ifndef MSVMMAJ_LAPACK_H
+#define MSVMMAJ_LAPACK_H
+
+#include "globals.h"
+
+int dposv(char UPLO, int N, int NRHS, double *A, int LDA, double *B,
+ int LDB);
+int dsysv(char UPLO, int N, int NRHS, double *A, int LDA, int *IPIV,
+ double *B, int LDB, double *WORK, int LWORK);
+int dpotrf(char UPLO, int N, double *A, int LDA);
+
+#endif
diff --git a/include/msvmmaj_matrix.h b/include/msvmmaj_matrix.h
new file mode 100644
index 0000000..8f5ca59
--- /dev/null
+++ b/include/msvmmaj_matrix.h
@@ -0,0 +1,29 @@
+/**
+ * @file msvmmaj_matrix.h
+ * @author Gertjan van den Burg
+ * @date August, 2013
+ * @brief Header file for msvmmaj_matrix.c
+ *
+ * @details
+ * Contains function declarations for functions useful for dealing with matrices.
+ *
+ */
+
+#ifndef MSVMMAJ_MATRIX_H
+#define MSVMMAJ_MATRIX_H
+
+#include "globals.h"
+
+void matrix_set(double *M, long cols, long i, long j, double val);
+void matrix_add(double *M, long cols, long i, long j, double val);
+void matrix_mul(double *M, long cols, long i, long j, double val);
+
+double matrix_get(double *M, long cols, long i, long j);
+
+void matrix3_set(double *M, long N2, long N3, long i, long j, long k,
+ double val);
+double matrix3_get(double *M, long N2, long N3, long i, long j, long k);
+
+void print_matrix(double *M, long rows, long cols);
+
+#endif
diff --git a/include/msvmmaj_pred.h b/include/msvmmaj_pred.h
new file mode 100644
index 0000000..ce22b10
--- /dev/null
+++ b/include/msvmmaj_pred.h
@@ -0,0 +1,26 @@
+/**
+ * @file msvmmaj_pred.h
+ * @author Gertjan van den Burg
+ * @date August, 2013
+ * @brief Header file for msvmmaj_pred.c
+ *
+ * @details
+ * Contains function declarations for prediction functions.
+ *
+ */
+
+#ifndef MSVMMAJ_PRED_H
+#define MSVMMAJ_PRED_H
+
+#include "globals.h"
+
+// forward declarations
+struct MajData;
+struct MajModel;
+
+// function declarations
+void msvmmaj_predict_labels(struct MajData *data, struct MajModel *model,
+ long *predy);
+double msvmmaj_prediction_perf(struct MajData *data, long *perdy);
+
+#endif
diff --git a/include/msvmmaj_train.h b/include/msvmmaj_train.h
new file mode 100644
index 0000000..835100f
--- /dev/null
+++ b/include/msvmmaj_train.h
@@ -0,0 +1,31 @@
+/**
+ * @file msvmmaj_train.h
+ * @author Gertjan van den Burg
+ * @date August, 2013
+ * @brief Header file for msvmmaj_train.c
+ *
+ * @details
+ * Contains function declarations for functions used to train a single
+ * MajModel.
+ *
+ */
+
+#ifndef MSVMMAJ_TRAIN_H
+#define MSVMMAJ_TRAIN_H
+
+#include "globals.h"
+
+//forward declarations
+struct MajData;
+struct MajModel;
+
+// function declarations
+void msvmmaj_optimize(struct MajModel *model, struct MajData *data);
+
+double msvmmaj_get_loss(struct MajModel *model, struct MajData *data,
+ double *ZV);
+
+void msvmmaj_get_update(struct MajModel *model, struct MajData *data,
+ double *B, double *ZAZ, double *ZAZV, double *ZAZVT);
+
+#endif
diff --git a/include/msvmmaj_train_dataset.h b/include/msvmmaj_train_dataset.h
new file mode 100644
index 0000000..5248b4a
--- /dev/null
+++ b/include/msvmmaj_train_dataset.h
@@ -0,0 +1,137 @@
+/**
+ * @file msvmmaj_train_dataset.h
+ * @author Gertjan van den Burg
+ * @date August, 2013
+ * @brief Structs and functions necessary for the grid search
+ *
+ * @details
+ * The grid search for the optimal parameters is done through a queue.
+ * This file contains struct definitions for this queue and a single
+ * task in a queue, as well as a structure for the complete training
+ * scheme. Function declarations are also included.
+ *
+ */
+
+#ifndef MSVMMAJ_TRAIN_DATASET_H
+#define MSVMMAJ_TRAIN_DATASET_H
+
+#include "globals.h"
+#include "types.h"
+
+/**
+ * @brief A structure for a single task in the queue.
+ *
+ * @param folds number of folds in cross validation
+ * @param ID numeric id of the task in the queue
+ * @param weight_idx parameter for the MajModel
+ * @param p parameter for the MajModel
+ * @param kappa parameter for the MajModel
+ * @param lambda parameter for the MajModel
+ * @param epsilon parameter for the MajModel
+ * @param kerneltype parameter for the MajModel
+ * @param *kernel_param parameters for the MajModel
+ * @param *train_data pointer to the training data
+ * @param *test_data pointer to the test data (if any)
+ * @param performance performance after cross validation
+ */
+struct Task {
+ KernelType kerneltype;
+ int weight_idx;
+ long folds;
+ long ID;
+ double p;
+ double kappa;
+ double lambda;
+ double epsilon;
+ double *kernel_param;
+ struct MajData *train_data;
+ struct MajData *test_data;
+ double performance;
+};
+
+/**
+ * @brief Simple task queue.
+ *
+ * This struct is basically just an array of pointers to Task instances,
+ * with a length and an index of the current task.
+ *
+ * @param **tasks array of pointers to Task structs
+ * @param N size of task array
+ * @param i index used for keeping track of the queue
+ */
+struct Queue {
+ struct Task **tasks;
+ long N;
+ long i;
+};
+
+/**
+ * @brief Structure for describing the entire grid search
+ *
+ * @param traintype type of training to use
+ * @param kerneltype type of kernel to use throughout training
+ * @param repeats number of repeats to be done after the grid
+ * search to find the parameter set with the
+ * most consistent high performance
+ * @param folds number of folds in cross validation
+ * @param Np size of the array of p values
+ * @param Nl size of the array of lambda values
+ * @param Nk size of the array of kappa values
+ * @param Ne size of the array of epsilon values
+ * @param Nw size of the array of weight_idx values
+ * @param Ng size of the array of gamma values
+ * @param Nc size of the array of coef values
+ * @param Nd size of the array of degree values
+ * @param *weight_idxs array of weight_idxs
+ * @param *ps array of p values
+ * @param *lambdas array of lambda values
+ * @param *kappas array of kappa values
+ * @param *epsilons array of epsilon values
+ * @param *gammas array of gamma values
+ * @param *coefs array of coef values
+ * @param *degrees array of degree values
+ * @param *train_data_file filename of train data file
+ * @param *test_data_file filename of test data file
+ *
+ */
+struct Training {
+ TrainType traintype;
+ KernelType kerneltype;
+ long repeats;
+ long folds;
+ long Np;
+ long Nl;
+ long Nk;
+ long Ne;
+ long Nw;
+ long Ng;
+ long Nc;
+ long Nd;
+ int *weight_idxs;
+ double *ps;
+ double *lambdas;
+ double *kappas;
+ double *epsilons;
+ double *gammas;
+ double *coefs;
+ double *degrees;
+ char *train_data_file;
+ char *test_data_file;
+};
+
+void make_queue(struct Training *training, struct Queue *queue,
+ struct MajData *train_data, struct MajData *test_data);
+
+struct Task *get_next_task(struct Queue *q);
+void start_training_tt(struct Queue *q);
+void start_training_cv(struct Queue *q);
+void free_queue(struct Queue *q);
+
+void consistency_repeats(struct Queue *q, long repeats, TrainType traintype);
+
+double cross_validation(struct MajModel *model, struct MajModel *seed_model,
+ struct MajData *data, long folds);
+
+void make_model_from_task(struct Task *task, struct MajModel *model);
+void copy_model(struct MajModel *from, struct MajModel *to);
+#endif
diff --git a/include/strutil.h b/include/strutil.h
new file mode 100644
index 0000000..740fde1
--- /dev/null
+++ b/include/strutil.h
@@ -0,0 +1,31 @@
+/**
+ * @file strutil.h
+ * @author Gertjan van den Burg
+ * @date August, 2013
+ * @brief Header file for strutil.c
+ *
+ * @details
+ * Function declarations for useful string functions used in parsing
+ * input files.
+ *
+ */
+
+#ifndef STRUTIL_H
+#define STRUTIL_H
+
+#include "globals.h"
+#include "types.h"
+
+bool str_startswith(const char *str, const char *pre);
+bool str_endswith(const char *str, const char *suf);
+
+void next_line(FILE *fid, char *filename);
+void get_line(FILE *fid, char *filename, char *buffer);
+
+double get_fmt_double(FILE *fid, char *filename, const char *fmt);
+long get_fmt_long(FILE *fid, char *filename, const char *fmt);
+
+long all_doubles_str(char *buffer, long offset, double *all_doubles);
+long all_longs_str(char *buffer, long offset, long *all_longs);
+
+#endif
diff --git a/include/timer.h b/include/timer.h
new file mode 100644
index 0000000..d4af649
--- /dev/null
+++ b/include/timer.h
@@ -0,0 +1,21 @@
+/**
+ * @file timer.h
+ * @author Gertjan van den Burg
+ * @date August, 2013
+ * @brief Header file for timer.c
+ *
+ * @details
+ * Function declaration for timer function used to measure computation time.
+ *
+ */
+
+#ifndef MSVMMAJ_TIMER_H
+#define MSVMMAJ_TIMER_H
+
+#include "globals.h"
+
+double elapsed_time(clock_t s_time, clock_t e_time);
+
+void get_time_string(char *buffer);
+
+#endif
diff --git a/include/types.h b/include/types.h
new file mode 100644
index 0000000..f6d008b
--- /dev/null
+++ b/include/types.h
@@ -0,0 +1,41 @@
+/**
+ * @file types.h
+ * @author Gertjan van den Burg
+ * @date August, 2013
+ * @brief Definitions of common types
+ *
+ * @details
+ * Here common types used throughout the program are defined.
+ *
+ */
+
+#ifndef MSVMMAJ_TYPES_H
+#define MSVMMAJ_TYPES_H
+
+/**
+ * @brief Implementation of true and false
+ */
+typedef enum {
+ false=0, /**< false keyword, corresponding to 0. */
+ true=1 /**< true keyword, corresponding to 1. */
+} bool;
+
+/**
+ * @brief type of training used in parameter grid search
+ */
+typedef enum {
+ CV=0, /**< cross validation */
+ TT=1 /**< data with existing train/test split */
+} TrainType;
+
+/**
+ * @brief type of kernel used in training
+ */
+typedef enum {
+ K_LINEAR=0, /**< Linear kernel */
+ K_POLY=1, /**< Polynomial kernel */
+ K_RBF=2, /**< RBF kernel */
+ K_SIGMOID=3, /**< Sigmoid kernel */
+} KernelType;
+
+#endif
diff --git a/include/util.h b/include/util.h
new file mode 100644
index 0000000..375a9c2
--- /dev/null
+++ b/include/util.h
@@ -0,0 +1,27 @@
+/**
+ * @file util.h
+ * @author Gertjan van den Burg
+ * @date August, 2013
+ * @brief Header file for util.c
+ *
+ * @details
+ * Function declarations for utility functions of the program.
+ *
+ */
+
+#ifndef MSVMMAJ_UTIL_H
+#define MSVMMAJ_UTIL_H
+
+#include "globals.h"
+
+// forward declarations
+struct MajData;
+struct MajModel;
+
+// function declarations
+int msvmmaj_check_argv(int argc, char **argv, char *str);
+int msvmmaj_check_argv_eq(int argc, char **argv, char *str);
+
+void note(const char *fmt,...);
+
+#endif